1 /*--------------------------------------------------------------------------
2 * Copyright 2010 utgenome.org
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *--------------------------------------------------------------------------*/
16 //--------------------------------------
17 // utgb-core Project
18 //
19 // FastqReader.java
20 // Since: Jul 2, 2010
21 //
22 //--------------------------------------
23 package org.utgenome.format.fastq;
24
25 import java.io.BufferedReader;
26 import java.io.IOException;
27 import java.io.Reader;
28
29 import org.utgenome.UTGBException;
30
31 /**
32 * FASTQ format file reader.
33 *
34 *
35 * <h1>FASTQ Format Specification: http://maq.sourceforge.net/fastq.shtml</h1>
36 *
37 * <h2>Introduction</h2>
38 *
39 * <p>
40 * FASTQ format stores sequences and Phred qualities in a single file. It is concise and compact. FASTQ is first widely
41 * used in the Sanger Institute and therefore we usually take the Sanger specification and the standard FASTQ format, or
42 * simply FASTQ format. Although Solexa/Illumina read file looks pretty much like FASTQ, they are different in that the
43 * qualities are scaled differently. In the quality string, if you can see a character with its ASCII code higher than
44 * 90, probably your file is in the Solexa/Illumina format.
45 * </p>
46 *
47 * <pre>
48 * Example
49 *
50 * @EAS54_6_R1_2_1_413_324
51 * CCCTTCTTGTCTTCAGCGTTTCTCC
52 * +
53 * ;;3;;;;;;;;;;;;7;;;;;;;88
54 * @EAS54_6_R1_2_1_540_792
55 * TTGGCAGGCCAAGGCCGATGGATCA
56 * +
57 * ;;;;;;;;;;;7;;;;;-;;;3;83
58 * @EAS54_6_R1_2_1_443_348
59 * GTTGCTTCTGGCGTGGGTGGGGGGG
60 * +EAS54_6_R1_2_1_443_348
61 * ;;;;;;;;;;;9;7;;.7;393333
62 * </pre>
63 *
64 * <h2>FASTQ Format Specification</h2>
65 *
66 * <h3>Notations</h3>
67 *
68 * <pre>
69 * <fastq>, <blocks> and so on represents non-terminal symbols.
70 * Characters in red are regex-like operators.
71 * '\n' stands for the Return key.
72 * Syntax
73 *
74 * <fastq> := <block>+
75 * <block> := @<seqname>\n<seq>\n"+"[<seqname>]?\n<qual>\n
76 * <seqname> := [A-Za-z0-9_.:-]+
77 * <seq> := [A-Za-z\n\.~]+
78 * <qual> := [!-~\n]+
79 * </pre>
80 *
81 * <h2>Requirements</h2>
82 *
83 * The <seqname> following '+' is optional, but if it appears right after '+', it should be identical to the <seqname>
84 * following '@'. The length of <seq> is identical the length of <qual>. Each character in <qual> represents the phred
85 * quality of the corresponding nucleotide in <seq>. If the Phred quality is $Q, which is a non-negative integer, the
86 * corresponding quality character can be calculated with the following Perl code: $q = chr(($Q<=93? $Q : 93) + 33);
87 * where chr() is the Perl function to convert an integer to a character based on the ASCII table. Conversely, given a
88 * character $q, the corresponding Phred quality can be calculated with: $Q = ord($q) - 33; where ord() gives the ASCII
89 * code of a character. Solexa/Illumina Read Format
90 *
91 * The syntax of Solexa/Illumina read format is almost identical to the FASTQ format, but the qualities are scaled
92 * differently. Given a character $sq, the following Perl code gives the Phred quality $Q:
93 *
94 * $Q = 10 * log(1 + 10 ** (ord($sq) - 64) / 10.0)) / log(10);
95 *
96 * @author leo
97 *
98 */
99 public class FastqReader {
100
101 BufferedReader reader;
102
103 public FastqReader(Reader input) {
104 if (!BufferedReader.class.isInstance(input))
105 this.reader = new BufferedReader(input);
106 else
107 this.reader = BufferedReader.class.cast(input);
108 }
109
110 /**
111 * Read the next FASTQ read entry.
112 *
113 * @return FastqRead entry or null if the end of stream has reached
114 * @throws UTGBException
115 */
116 public FastqRead next() throws UTGBException {
117 return FastqRead.parse(reader);
118 }
119
120 public void close() throws IOException {
121 reader.close();
122 }
123
124 }