1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.utgenome.format.illumina;
24
25 import java.io.BufferedReader;
26 import java.io.IOException;
27 import java.io.Writer;
28
29 import org.utgenome.UTGBErrorCode;
30 import org.utgenome.UTGBException;
31 import org.utgenome.format.fastq.FastqRead;
32 import org.xerial.util.log.Logger;
33
34
35
36
37
38
39
40 public class QSeqToFASTQ {
41
42 private static Logger _logger = Logger.getLogger(QSeqToFASTQ.class);
43
44 private String readGroup = null;
45 private int readCount = 0;
46 private boolean disableQualityFilter = false;
47
48 public QSeqToFASTQ(boolean disableQualityFilter) {
49 this.disableQualityFilter = disableQualityFilter;
50 }
51
52 public QSeqToFASTQ(String readGroup, boolean disableQualityFilter) {
53 this.readGroup = readGroup;
54 this.disableQualityFilter = disableQualityFilter;
55 }
56
57 public FastqRead convertToFastq(String line) throws UTGBException {
58 if (line == null)
59 return null;
60
61 final String[] c = line.split("\t");
62 if (c.length < 11) {
63 throw new UTGBException(UTGBErrorCode.PARSE_ERROR, "insufficient number of columns: " + line);
64 }
65
66 final String qfilter = (c[10].equals("1")) ? "Y" : "N";
67
68 readCount++;
69
70 if (!disableQualityFilter && "N".equals(qfilter)) {
71 return null;
72 }
73
74
75 String readName;
76 if (readGroup == null)
77 readName = String.format("%s:%s:%s:%s:%s", c[2], c[3], c[4], c[5], qfilter);
78 else
79 readName = String.format("%s.%d", readGroup, readCount);
80
81 String seq = c[8];
82 String qual = c[9];
83 StringBuilder phreadQualityString = new StringBuilder();
84 for (int i = 0; i < qual.length(); ++i) {
85 int phreadQual = qual.charAt(i) - 64;
86 char phreadQualChar = (char) (phreadQual + 33);
87 phreadQualityString.append(phreadQualChar);
88 }
89
90 return new FastqRead(readName, seq, sanitizeQualityValue(phreadQualityString.toString()));
91 }
92
93 public static String sanitizeReadName(String name) {
94 return name.replaceAll("\\s+", "_");
95 }
96
97 public static String sanitizeQualityValue(String qual) {
98 return qual.replaceAll("[^!-~\n]+", "$");
99 }
100
101 public void convert(BufferedReader illuminaSequenceFile, Writer output) throws IOException {
102
103 int lineCount = 1;
104 for (String line; (line = illuminaSequenceFile.readLine()) != null; lineCount++) {
105 try {
106 FastqRead r = convertToFastq(line);
107 if (r != null)
108 output.write(r.toFASTQString());
109 }
110 catch (Exception e) {
111 _logger.warn(String.format("line %d: %s", lineCount, e));
112 }
113 }
114
115 output.flush();
116 }
117 }