1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.utgenome.format.illumina;
24
25 import java.io.BufferedReader;
26 import java.io.IOException;
27 import java.io.Writer;
28
29 import org.utgenome.UTGBErrorCode;
30 import org.utgenome.UTGBException;
31 import org.utgenome.format.fastq.FastqRead;
32 import org.xerial.util.log.Logger;
33
34
35
36
37
38
39
40 public class Seq2Fastq {
41
42 private static Logger _logger = Logger.getLogger(Seq2Fastq.class);
43
44 public static FastqRead convertToFastq(String line) throws UTGBException {
45 if (line == null)
46 return null;
47
48 String[] c = line.split(":");
49 if (c.length < 7) {
50 throw new UTGBException(UTGBErrorCode.PARSE_ERROR, "insufficient number of columns: " + line);
51 }
52
53
54 String readName = String.format("%s:%s:%s:%s:%s", sanitizeReadName(c[0]), c[1], c[2], c[3], c[4]);
55 String seq = c[5];
56 String qual = c[6];
57 StringBuilder phreadQualityString = new StringBuilder();
58 for (int i = 0; i < qual.length(); ++i) {
59 int phreadQual = qual.charAt(i) - 64;
60 char phreadQualChar = (char) (phreadQual + 33);
61 phreadQualityString.append(phreadQualChar);
62 }
63
64 return new FastqRead(readName, seq, sanitizeQualityValue(phreadQualityString.toString()));
65 }
66
67 public static String sanitizeReadName(String name) {
68 return name.replaceAll("\\s+", "_");
69 }
70
71 public static String sanitizeQualityValue(String qual) {
72 return qual.replaceAll("[^!-~\n]+", "$");
73 }
74
75 public static void convert(BufferedReader illuminaSequenceFile, Writer output) throws IOException {
76
77 int lineCount = 1;
78 for (String line; (line = illuminaSequenceFile.readLine()) != null; lineCount++) {
79 try {
80 FastqRead r = convertToFastq(line);
81 output.write(r.toFASTQString());
82 }
83 catch (UTGBException e) {
84 _logger.warn(String.format("line %d: %s", lineCount, e));
85 }
86
87 }
88
89 }
90
91 }