View Javadoc

1   /*--------------------------------------------------------------------------
2    *  Copyright 2010 utgenome.org
3    *
4    *  Licensed under the Apache License, Version 2.0 (the "License");
5    *  you may not use this file except in compliance with the License.
6    *  You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   *  Unless required by applicable law or agreed to in writing, software
11   *  distributed under the License is distributed on an "AS IS" BASIS,
12   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   *  See the License for the specific language governing permissions and
14   *  limitations under the License.
15   *--------------------------------------------------------------------------*/
16  //--------------------------------------
17  // utgb-core Project
18  //
19  // QSeqToFASTQ.java
20  // Since: Jul 20, 2010
21  //
22  //--------------------------------------
23  package org.utgenome.format.illumina;
24  
25  import java.io.BufferedReader;
26  import java.io.IOException;
27  import java.io.Writer;
28  
29  import org.utgenome.UTGBErrorCode;
30  import org.utgenome.UTGBException;
31  import org.utgenome.format.fastq.FastqRead;
32  import org.xerial.util.log.Logger;
33  
34  /**
35   * Converting Illumina's qseq format into FASTQ
36   * 
37   * @author leo
38   * 
39   */
40  public class QSeqToFASTQ {
41  
42  	private static Logger _logger = Logger.getLogger(QSeqToFASTQ.class);
43  
44  	private String readGroup = null;
45  	private int readCount = 0;
46  	private boolean disableQualityFilter = false;
47  
48  	public QSeqToFASTQ(boolean disableQualityFilter) {
49  		this.disableQualityFilter = disableQualityFilter;
50  	}
51  
52  	public QSeqToFASTQ(String readGroup, boolean disableQualityFilter) {
53  		this.readGroup = readGroup;
54  		this.disableQualityFilter = disableQualityFilter;
55  	}
56  
57  	public FastqRead convertToFastq(String line) throws UTGBException {
58  		if (line == null)
59  			return null;
60  
61  		final String[] c = line.split("\t");
62  		if (c.length < 11) {
63  			throw new UTGBException(UTGBErrorCode.PARSE_ERROR, "insufficient number of columns: " + line);
64  		}
65  
66  		final String qfilter = (c[10].equals("1")) ? "Y" : "N";
67  
68  		readCount++;
69  
70  		if (!disableQualityFilter && "N".equals(qfilter)) {
71  			return null;
72  		}
73  
74  		// name, lane, x, y, pair?
75  		String readName;
76  		if (readGroup == null)
77  			readName = String.format("%s:%s:%s:%s:%s", c[2], c[3], c[4], c[5], qfilter);
78  		else
79  			readName = String.format("%s.%d", readGroup, readCount);
80  
81  		String seq = c[8];
82  		String qual = c[9];
83  		StringBuilder phreadQualityString = new StringBuilder();
84  		for (int i = 0; i < qual.length(); ++i) {
85  			int phreadQual = qual.charAt(i) - 64;
86  			char phreadQualChar = (char) (phreadQual + 33);
87  			phreadQualityString.append(phreadQualChar);
88  		}
89  
90  		return new FastqRead(readName, seq, sanitizeQualityValue(phreadQualityString.toString()));
91  	}
92  
93  	public static String sanitizeReadName(String name) {
94  		return name.replaceAll("\\s+", "_");
95  	}
96  
97  	public static String sanitizeQualityValue(String qual) {
98  		return qual.replaceAll("[^!-~\n]+", "$");
99  	}
100 
101 	public void convert(BufferedReader illuminaSequenceFile, Writer output) throws IOException {
102 
103 		int lineCount = 1;
104 		for (String line; (line = illuminaSequenceFile.readLine()) != null; lineCount++) {
105 			try {
106 				FastqRead r = convertToFastq(line);
107 				if (r != null)
108 					output.write(r.toFASTQString());
109 			}
110 			catch (Exception e) {
111 				_logger.warn(String.format("line %d: %s", lineCount, e));
112 			}
113 		}
114 
115 		output.flush();
116 	}
117 }