1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.utgenome.format.fasta;
26
27 import java.io.IOException;
28 import java.io.OutputStream;
29 import java.util.ArrayList;
30 import java.util.Random;
31
32 import org.utgenome.gwt.utgb.client.bio.ACGTEncoder;
33 import org.xerial.util.StringUtil;
34
35
36
37
38
39
40
41
42 public class CompactACGTWriter {
43
44 private final OutputStream seqOut;
45 private final OutputStream nSeqOut;
46 private final int BUFFER_SIZE = 4096;
47 private final byte[] seqBuffer = new byte[BUFFER_SIZE];
48 private final byte[] nSeqBuffer = new byte[BUFFER_SIZE / 2];
49 private int index = 0;
50 private long length = 0;
51 private final Random rand = new Random(17);
52
53 public CompactACGTWriter(OutputStream seqOut, OutputStream nSeqOut) {
54 this.seqOut = seqOut;
55 this.nSeqOut = nSeqOut;
56
57 clearBuffer();
58 }
59
60 private void clearBuffer() {
61 for (int i = 0; i < seqBuffer.length; ++i)
62 seqBuffer[i] = 0;
63
64 for (int i = 0; i < nSeqBuffer.length; ++i)
65 nSeqBuffer[i] = 0;
66 }
67
68 public long getSequenceLength() {
69 return length;
70 }
71
72 public void close() throws IOException {
73 finish();
74 seqOut.close();
75 nSeqOut.close();
76 }
77
78 private void finish() throws IOException {
79 if (index <= 0)
80 return;
81
82 seqOut.write(seqBuffer, 0, index / 4 + ((index % 4 > 0) ? 1 : 0));
83 nSeqOut.write(nSeqBuffer, 0, index / 8 + ((index % 8 > 0) ? 1 : 0));
84 index = 0;
85
86 seqOut.flush();
87 nSeqOut.flush();
88 }
89
90 void append2bit(byte code) throws IOException {
91
92 if (index >= BUFFER_SIZE * 4) {
93
94 seqOut.write(seqBuffer, 0, BUFFER_SIZE);
95 nSeqOut.write(nSeqBuffer, 0, BUFFER_SIZE / 2);
96 clearBuffer();
97 index = 0;
98 }
99
100 int pos = index / 4;
101 int offset = index % 4;
102
103 if (code >= 4) {
104 code = (byte) rand.nextInt(4);
105 nSeqBuffer[index / 8] |= (byte) (0x01 << (7 - (index % 8)));
106 }
107
108 seqBuffer[pos] |= (byte) (code << (6 - offset * 2));
109 index++;
110 length++;
111 }
112
113 public void append(String sequence) throws IOException {
114 String t = sequence.trim();
115 for (int i = 0; i < t.length(); ++i) {
116 append2bit(ACGTEncoder.to2bitCode(t.charAt(i)));
117 }
118 }
119
120 public void append(char ch) throws IOException {
121 append2bit(ACGTEncoder.to2bitCode(ch));
122 }
123
124 public static byte to2bitCode(char acgt) {
125 return ACGTEncoder.to2bitCode(acgt);
126 }
127
128
129
130
131 public static void generateCharTo2BitACGTTable() {
132
133 ArrayList<Byte> buffer = new ArrayList<Byte>();
134 for (char c = 0; c < 256; ++c) {
135 char u = Character.toUpperCase(c);
136 byte code = 0;
137 switch (u) {
138 case 'A':
139 code = 0;
140 break;
141 case 'C':
142 code = 1;
143 break;
144 case 'G':
145 code = 2;
146 break;
147 case 'T':
148 case 'U':
149 code = 3;
150 break;
151 default:
152 code = 4;
153 break;
154 }
155 if (buffer.size() >= 16) {
156 System.out.println(StringUtil.join(buffer, ", ") + ", ");
157 buffer.clear();
158 }
159 buffer.add(code);
160 }
161 if (!buffer.isEmpty()) {
162 System.out.println(StringUtil.join(buffer, ", "));
163 }
164
165 }
166
167 }