1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.utgenome.format.wig;
26
27 import java.io.ByteArrayInputStream;
28 import java.io.File;
29 import java.io.IOException;
30 import java.io.ObjectInputStream;
31 import java.sql.Connection;
32 import java.sql.DriverManager;
33 import java.sql.ResultSet;
34 import java.sql.SQLException;
35 import java.sql.Statement;
36 import java.util.ArrayList;
37 import java.util.HashMap;
38 import java.util.List;
39 import java.util.Map;
40 import java.util.zip.DataFormatException;
41 import java.util.zip.GZIPInputStream;
42
43 import org.utgenome.UTGBErrorCode;
44 import org.utgenome.UTGBException;
45 import org.utgenome.graphics.GenomeWindow;
46 import org.utgenome.gwt.utgb.client.bio.ChrLoc;
47 import org.utgenome.gwt.utgb.client.bio.CompactWIGData;
48 import org.utgenome.gwt.utgb.client.bio.GraphWindow;
49 import org.utgenome.gwt.utgb.client.bio.WigGraphData;
50 import org.xerial.util.StopWatch;
51 import org.xerial.util.log.Logger;
52
53
54
55
56
57
58
59
60
61 public class WIGDatabaseReader {
62 private Connection connection = null;
63 private Statement statement;
64 private static Logger _logger = Logger.getLogger(WIGDatabaseReader.class);
65
66 private static float minValue = Float.MAX_VALUE;
67 private static float maxValue = Float.MIN_VALUE;
68
69 private GraphWindow windowFunc = GraphWindow.MEDIAN;
70
71 public WIGDatabaseReader(File file, GraphWindow windowFunc) throws UTGBException {
72 this(file.toString(), windowFunc);
73 }
74
75 public WIGDatabaseReader(String inputFileURL, GraphWindow windowFunc) throws UTGBException {
76 this.windowFunc = windowFunc;
77 try {
78 Class.forName("org.sqlite.JDBC");
79 connection = DriverManager.getConnection("jdbc:sqlite:" + inputFileURL);
80 statement = connection.createStatement();
81 }
82 catch (Exception e) {
83 throw UTGBException.convert(e);
84 }
85 }
86
87 public void close() throws SQLException {
88 if (connection != null)
89 connection.close();
90 }
91
92 public ArrayList<String> getBrowser() throws SQLException {
93 ArrayList<String> browser = new ArrayList<String>();
94
95 ResultSet rs = statement.executeQuery(String.format("select * from browser"));
96 while (rs.next()) {
97 browser.add(rs.getString("description"));
98 }
99
100 return browser;
101 }
102
103 public ArrayList<Integer> getTrackIdList(String chrom) throws SQLException {
104 ArrayList<Integer> trackIdList = new ArrayList<Integer>();
105
106 ResultSet rs = statement.executeQuery(String.format("select distinct track_id from track where name='chrom' and value='%s'", chrom));
107 while (rs.next()) {
108 trackIdList.add(Integer.valueOf(rs.getInt("track_id")));
109 }
110
111 return trackIdList;
112 }
113
114 public ArrayList<Integer> getTrackIdList() throws SQLException {
115 ArrayList<Integer> trackIdList = new ArrayList<Integer>();
116
117 ResultSet rs = statement.executeQuery("select distinct track_id from track");
118 while (rs.next()) {
119 trackIdList.add(Integer.valueOf(rs.getInt("track_id")));
120 }
121
122 return trackIdList;
123 }
124
125 public ArrayList<String> getChromList() throws SQLException {
126 ArrayList<String> trackIdList = new ArrayList<String>();
127
128 ResultSet rs = statement.executeQuery("select distinct value from track where name='chrom'");
129 while (rs.next()) {
130 trackIdList.add(rs.getString("value"));
131 }
132
133 return trackIdList;
134 }
135
136 public HashMap<String, String> getTrack(int trackId) throws SQLException {
137 HashMap<String, String> track = new HashMap<String, String>();
138
139 ResultSet rs = statement.executeQuery(String.format("select * from track where track_id=%d", trackId));
140 while (rs.next()) {
141 track.put(rs.getString("name"), rs.getString("value"));
142 }
143
144 return track;
145 }
146
147 public HashMap<Integer, Float> getData(int trackId, int start, int end) throws SQLException, IOException, DataFormatException, NumberFormatException,
148 ClassNotFoundException, UTGBException {
149 return (getData((end - start), trackId, start, end));
150 }
151
152 private static interface ValueSelector {
153 public float select(float prev, float max, float min, float avg, float median);
154 }
155
156 public static class MAXSelector implements ValueSelector {
157 public float select(float prev, float max, float min, float avg, float median) {
158 return Math.max(prev, max);
159 }
160 }
161
162 public static class MINSelector implements ValueSelector {
163 public float select(float prev, float max, float min, float avg, float median) {
164 return Math.min(prev, min);
165 }
166 }
167
168 public static class MedianSelector implements ValueSelector {
169 public float select(float prev, float max, float min, float avg, float median) {
170 return Math.max(prev, median);
171 }
172 }
173
174 public static class AvgSelector implements ValueSelector {
175 public float select(float prev, float max, float min, float avg, float median) {
176 return Math.max(prev, avg);
177 }
178 }
179
180 private ValueSelector getSelector() throws UTGBException {
181 ValueSelector selector = null;
182 switch (windowFunc) {
183 case AVG:
184 selector = new AvgSelector();
185 break;
186 case MAX:
187 selector = new MAXSelector();
188 break;
189 case MEDIAN:
190 selector = new MedianSelector();
191 break;
192 case MIN:
193 selector = new MINSelector();
194 break;
195 default:
196 throw new UTGBException(UTGBErrorCode.INVALID_INPUT, "unknown window function: " + windowFunc);
197 }
198 return selector;
199 }
200
201 public CompactWIGData fillPixelsWithMedian(CompactWIGData cwig, int pixelWidth, int trackId, int start, int end) throws SQLException, UTGBException {
202
203 GenomeWindow w = new GenomeWindow(start, end);
204 float[] dataValues = new float[pixelWidth];
205 for (int i = 0; i < dataValues.length; ++i)
206 dataValues[i] = 0.0f;
207
208 StopWatch st1 = new StopWatch();
209 StopWatch st2 = new StopWatch();
210
211 float minInBlock = Float.MAX_VALUE;
212 float maxInBlock = Float.MIN_VALUE;
213
214 ValueSelector selector = getSelector();
215
216 ResultSet rs = null;
217 try {
218 rs = statement.executeQuery(String.format(
219 "select start, end, min_value, max_value, median, avg from data where track_id=%d and start<=%d and end>=%d order by start", trackId, end,
220 start));
221 while (rs.next()) {
222 int s = rs.getInt("start");
223 int e = rs.getInt("end");
224 float max = rs.getFloat("max_value");
225 float min = rs.getFloat("min_value");
226 float median = rs.getFloat("median");
227 float avg = rs.getFloat("avg");
228
229 int pixelStart = w.getXPosOnWindow(s, pixelWidth);
230 if (pixelStart <= 0)
231 pixelStart = 0;
232 int pixelEnd = w.getXPosOnWindow(e + cwig.getSpan(), pixelWidth);
233 for (int x = pixelStart; x < pixelWidth && x < pixelEnd; ++x) {
234 dataValues[x] = selector.select(dataValues[x], max, min, avg, median);
235 }
236
237 minInBlock = Math.min(min, minInBlock);
238 maxInBlock = Math.max(max, maxInBlock);
239 }
240 }
241 finally {
242 if (rs != null)
243 rs.close();
244 }
245
246 cwig.setMinValue(minInBlock);
247 cwig.setMaxValue(maxInBlock);
248 cwig.setData(dataValues);
249
250 if (_logger.isTraceEnabled())
251 _logger.trace("Time(all) : " + st1.getElapsedTime());
252 return cwig;
253
254 }
255
256 public HashMap<Integer, Float> getData(int windowWidth, int trackId, int start, int end) throws SQLException, IOException, DataFormatException,
257 NumberFormatException, ClassNotFoundException, UTGBException {
258 HashMap<Integer, Float> data = new HashMap<Integer, Float>();
259 HashMap<String, String> track = getTrack(trackId);
260
261 int[] chromStarts;
262 float[] dataValues;
263
264 minValue = Float.MAX_VALUE;
265 maxValue = Float.MIN_VALUE;
266
267 if (start > end) {
268 int tmp = start;
269 start = end;
270 end = tmp;
271 }
272
273 int rough = (int) Math.floor((end - start) / windowWidth);
274 if (rough < 1)
275 rough = 1;
276
277 ValueSelector selector = getSelector();
278
279 StopWatch st1 = new StopWatch();
280 StopWatch st2 = new StopWatch();
281
282 ResultSet rs = statement.executeQuery(String.format("select * from data where track_id=%d and start<=%d and end>=%d order by start", trackId, end,
283 start));
284 while (rs.next()) {
285 int i;
286 ByteArrayInputStream bis;
287 GZIPInputStream in;
288 ObjectInputStream ois;
289
290 int nDatas = rs.getInt("data_num");
291
292
293 if (track.get("stepType").equals("variableStep")) {
294 bis = new ByteArrayInputStream(rs.getBytes("chrom_starts"));
295 in = new GZIPInputStream(bis);
296 ois = new ObjectInputStream(in);
297
298 chromStarts = (int[]) ois.readObject();
299
300 ois.close();
301 in.close();
302 bis.close();
303 }
304 else if (track.get("stepType").equals("fixedStep")) {
305 int startPoint = rs.getInt("start");
306 int stepSize = Integer.parseInt(track.get("step"));
307 chromStarts = new int[nDatas];
308
309 for (i = 0; i < nDatas; i++) {
310 chromStarts[i] = startPoint + (stepSize * i);
311 }
312 }
313 else {
314 throw new DataFormatException();
315 }
316
317
318 bis = new ByteArrayInputStream(rs.getBytes("data_values"));
319 in = new GZIPInputStream(bis);
320 ois = new ObjectInputStream(in);
321
322 dataValues = (float[]) ois.readObject();
323
324 ois.close();
325 in.close();
326 bis.close();
327
328 st2.resume();
329 for (i = 0; i < nDatas; i++) {
330 if (start <= chromStarts[i] && chromStarts[i] <= end) {
331 if (dataValues[i] != 0.0f) {
332 int chromStart = chromStarts[i];
333 if (data.containsKey(chromStart)) {
334 float prev = data.get(chromStart);
335 float current = dataValues[i];
336 float newValue = selector.select(prev, maxValue, minValue, current, current);
337 data.put(chromStart, newValue);
338 }
339 else {
340 data.put(chromStart, dataValues[i]);
341 }
342 }
343
344 minValue = Math.min(minValue, dataValues[i]);
345 maxValue = Math.max(maxValue, dataValues[i]);
346 }
347 }
348 st2.stop();
349 }
350
351 rs.close();
352
353 if (_logger.isTraceEnabled()) {
354 _logger.trace("min: " + minValue + ", max: " + maxValue);
355 _logger.trace("Time(all) : " + st1.getElapsedTime());
356 _logger.trace("Time(archive): " + st2.getElapsedTime());
357 }
358 return data;
359 }
360
361 public HashMap<Integer, Float> getData(int trackId) throws NumberFormatException, SQLException, IOException, DataFormatException, ClassNotFoundException,
362 UTGBException {
363 return getData(trackId, 0, Integer.MAX_VALUE);
364 }
365
366 public WigGraphData getWigData(int windowWidth, int trackId, int start, int end) throws SQLException, NumberFormatException, IOException,
367 DataFormatException, ClassNotFoundException, UTGBException {
368 WigGraphData wigData = prepareWigData(trackId);
369 wigData.setData(getData(windowWidth, trackId, start, end));
370 return wigData;
371 }
372
373 public WigGraphData getWigData(int trackId, int start, int end) throws SQLException, NumberFormatException, IOException, DataFormatException,
374 ClassNotFoundException, UTGBException {
375 WigGraphData wigData = prepareWigData(trackId);
376 wigData.setData(getData(trackId, start, end));
377 return wigData;
378 }
379
380 public CompactWIGData getCompactWigData(int trackId, int start, int end, int pixelWidth) throws SQLException, UTGBException {
381 WigGraphData wigData = prepareWigData(trackId);
382 CompactWIGData cWig = prepareCompactWigData(wigData, new ChrLoc(null, start, end));
383 fillPixelsWithMedian(cWig, pixelWidth, trackId, start, end);
384 return cWig;
385 }
386
387 private WigGraphData prepareWigData(int trackId) throws SQLException {
388 WigGraphData wigData = new WigGraphData();
389 wigData.setTrack_id(trackId);
390 wigData.setBrowser(getBrowser());
391 wigData.setTrack(getTrack(trackId));
392 wigData.setMinValue(minValue);
393 wigData.setMaxValue(maxValue);
394 return wigData;
395 }
396
397 public WigGraphData getWigData(int trackId) throws SQLException, NumberFormatException, IOException, DataFormatException, ClassNotFoundException,
398 UTGBException {
399 return getWigData(trackId, 0, Integer.MAX_VALUE);
400 }
401
402 public ArrayList<WigGraphData> getWigDataList(int windowWidth, String chrom, int start, int end) throws SQLException, NumberFormatException, IOException,
403 DataFormatException, ClassNotFoundException, UTGBException {
404 ArrayList<WigGraphData> wigDataList = new ArrayList<WigGraphData>();
405
406 for (int id : getTrackIdList(chrom)) {
407 wigDataList.add(getWigData(windowWidth, id, start, end));
408 }
409
410 return wigDataList;
411 }
412
413 public ArrayList<WigGraphData> getWigDataList(String chrom, int start, int end) throws SQLException, NumberFormatException, IOException,
414 DataFormatException, ClassNotFoundException, UTGBException {
415 ArrayList<WigGraphData> wigDataList = new ArrayList<WigGraphData>();
416
417 for (int id : getTrackIdList(chrom)) {
418 wigDataList.add(getWigData(id, start, end));
419 }
420
421 return wigDataList;
422 }
423
424 public static List<WigGraphData> getWigDataList(File fileName, int windowWidth, ChrLoc location, GraphWindow windowFunc) throws UTGBException, SQLException {
425 ArrayList<WigGraphData> wigDataList = null;
426
427 WIGDatabaseReader reader = new WIGDatabaseReader(fileName, windowFunc);
428 try {
429 wigDataList = reader.getWigDataList(windowWidth, location.chr, location.start, location.end);
430 }
431 catch (Exception e) {
432 _logger.error(e);
433 e.printStackTrace(System.err);
434 }
435 finally {
436 reader.close();
437 }
438
439 return wigDataList;
440 }
441
442 public static List<CompactWIGData> getRoughCompactWigDataList(File path, int pixelWidth, ChrLoc location, GraphWindow windowFunc) throws UTGBException,
443 SQLException {
444
445 ArrayList<CompactWIGData> cWig = new ArrayList<CompactWIGData>();
446
447 WIGDatabaseReader reader = new WIGDatabaseReader(path, windowFunc);
448 try {
449 for (int id : reader.getTrackIdList(location.chr)) {
450 cWig.add(reader.getCompactWigData(id, location.start, location.end, pixelWidth));
451 }
452 }
453 finally {
454 reader.close();
455 }
456 return cWig;
457 }
458
459 public static List<CompactWIGData> getCompactWigDataList(File path, int pixelWidth, ChrLoc location, GraphWindow windowFunc) throws UTGBException,
460 SQLException {
461
462 int numBlocks = location.length() / WIGDatabaseGenerator.DATA_SPLIT_UNIT;
463 if (_logger.isDebugEnabled())
464 _logger.debug(String.format("num blocks: %s in %s, pixel width: %d", numBlocks, location, pixelWidth));
465 if (numBlocks >= 100) {
466
467 if (_logger.isDebugEnabled())
468 _logger.debug(String.format("query wig summary (path:%s, pixel width:%d, loc:%s)", path, pixelWidth, location));
469 return getRoughCompactWigDataList(path, pixelWidth, location, windowFunc);
470 }
471 else {
472 ArrayList<CompactWIGData> cWig = new ArrayList<CompactWIGData>();
473 List<WigGraphData> wig = getWigDataList(path, pixelWidth, location, windowFunc);
474 for (WigGraphData w : wig) {
475 cWig.add(WIGDatabaseReader.convertResolution(w, location, pixelWidth));
476 }
477 return cWig;
478 }
479 }
480
481 public static CompactWIGData prepareCompactWigData(WigGraphData w, ChrLoc location) {
482 CompactWIGData cwig = new CompactWIGData();
483 cwig.setTrack(w.getTrack());
484 cwig.setMaxValue(w.getMaxValue());
485 cwig.setMinValue(w.getMinValue());
486 cwig.setBrowser(w.getBrowser());
487 cwig.setTrack_id(w.getTrack_id());
488 cwig.setStart(location.start < location.end ? location.start : location.end);
489 int span = 1;
490 if (w.getTrack().containsKey("span")) {
491 span = Integer.parseInt(w.getTrack().get("span"));
492 cwig.setSpan(span);
493 }
494 return cwig;
495 }
496
497 public static CompactWIGData convertResolution(WigGraphData w, ChrLoc location, int windowWidth) {
498
499 CompactWIGData cwig = prepareCompactWigData(w, location);
500 final int span = cwig.getSpan();
501
502 if (_logger.isDebugEnabled())
503 _logger.debug(String.format("convert resolution: loc:%s, window width:%d", location, windowWidth));
504
505 GenomeWindow window = new GenomeWindow(location.start, location.end);
506
507 float[] pixelWiseGraphData = new float[windowWidth + span];
508 for (int i = 0; i < pixelWiseGraphData.length; ++i)
509 pixelWiseGraphData[i] = 0;
510
511 Map<Integer, Float> data = w.getData();
512 for (Map.Entry<Integer, Float> each : data.entrySet()) {
513 int xOnGenome = each.getKey();
514 float val = each.getValue();
515
516 int x1 = window.getXPosOnWindow(xOnGenome, windowWidth);
517 int x2 = window.getXPosOnWindow(xOnGenome + span, windowWidth);
518 if (x1 == x2)
519 x2 = x1 + 1;
520
521 if (x1 < 0)
522 x1 = 0;
523
524 for (int i = x1; i < x2 && i < windowWidth + span; ++i) {
525 float current = pixelWiseGraphData[i];
526 float abs = Math.abs(val);
527 if (current < abs) {
528 pixelWiseGraphData[i] = val;
529 }
530 }
531 }
532
533 cwig.setData(pixelWiseGraphData);
534 return cwig;
535 }
536
537 }