View Javadoc

1   /*--------------------------------------------------------------------------
2    *  Copyright 2010 utgenome.org
3    *
4    *  Licensed under the Apache License, Version 2.0 (the "License");
5    *  you may not use this file except in compliance with the License.
6    *  You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   *  Unless required by applicable law or agreed to in writing, software
11   *  distributed under the License is distributed on an "AS IS" BASIS,
12   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   *  See the License for the specific language governing permissions and
14   *  limitations under the License.
15   *--------------------------------------------------------------------------*/
16  //--------------------------------------
17  // utgb-core Project
18  //
19  // Seq2Fastq.java
20  // Since: Jun 14, 2010
21  //
22  //--------------------------------------
23  package org.utgenome.format.illumina;
24  
25  import java.io.BufferedReader;
26  import java.io.IOException;
27  import java.io.Writer;
28  
29  import org.utgenome.UTGBErrorCode;
30  import org.utgenome.UTGBException;
31  import org.utgenome.format.fastq.FastqRead;
32  import org.xerial.util.log.Logger;
33  
34  /**
35   * For converting Illumina's input into Fastq format
36   * 
37   * @author leo
38   * 
39   */
40  public class Seq2Fastq {
41  
42  	private static Logger _logger = Logger.getLogger(Seq2Fastq.class);
43  
44  	public static FastqRead convertToFastq(String line) throws UTGBException {
45  		if (line == null)
46  			return null;
47  
48  		String[] c = line.split(":");
49  		if (c.length < 7) {
50  			throw new UTGBException(UTGBErrorCode.PARSE_ERROR, "insufficient number of columns: " + line);
51  		}
52  
53  		// name, lane, x, y, pair? 
54  		String readName = String.format("%s:%s:%s:%s:%s", sanitizeReadName(c[0]), c[1], c[2], c[3], c[4]);
55  		String seq = c[5];
56  		String qual = c[6];
57  		StringBuilder phreadQualityString = new StringBuilder();
58  		for (int i = 0; i < qual.length(); ++i) {
59  			int phreadQual = qual.charAt(i) - 64;
60  			char phreadQualChar = (char) (phreadQual + 33);
61  			phreadQualityString.append(phreadQualChar);
62  		}
63  
64  		return new FastqRead(readName, seq, sanitizeQualityValue(phreadQualityString.toString()));
65  	}
66  
67  	public static String sanitizeReadName(String name) {
68  		return name.replaceAll("\\s+", "_");
69  	}
70  
71  	public static String sanitizeQualityValue(String qual) {
72  		return qual.replaceAll("[^!-~\n]+", "$");
73  	}
74  
75  	public static void convert(BufferedReader illuminaSequenceFile, Writer output) throws IOException {
76  
77  		int lineCount = 1;
78  		for (String line; (line = illuminaSequenceFile.readLine()) != null; lineCount++) {
79  			try {
80  				FastqRead r = convertToFastq(line);
81  				output.write(r.toFASTQString());
82  			}
83  			catch (UTGBException e) {
84  				_logger.warn(String.format("line %d: %s", lineCount, e));
85  			}
86  
87  		}
88  
89  	}
90  
91  }