View Javadoc

1   /*--------------------------------------------------------------------------
2    *  Copyright 2008 utgenome.org
3    *
4    *  Licensed under the Apache License, Version 2.0 (the "License");
5    *  you may not use this file except in compliance with the License.
6    *  You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   *  Unless required by applicable law or agreed to in writing, software
11   *  distributed under the License is distributed on an "AS IS" BASIS,
12   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   *  See the License for the specific language governing permissions and
14   *  limitations under the License.
15   *--------------------------------------------------------------------------*/
16  //--------------------------------------
17  // utgb-shell Project
18  //
19  // WIGReader.java
20  // Since: 2009/11/30
21  //
22  // $URL: http://svn.utgenome.org/utgb/trunk/utgb/utgb-shell/src/main/java/org/utgenome/shell/db/wig/WIGReader.java $ 
23  // $Author: yoshimura $
24  //--------------------------------------
25  package org.utgenome.format.wig;
26  
27  import java.io.ByteArrayInputStream;
28  import java.io.File;
29  import java.io.IOException;
30  import java.io.ObjectInputStream;
31  import java.sql.Connection;
32  import java.sql.DriverManager;
33  import java.sql.ResultSet;
34  import java.sql.SQLException;
35  import java.sql.Statement;
36  import java.util.ArrayList;
37  import java.util.HashMap;
38  import java.util.List;
39  import java.util.Map;
40  import java.util.zip.DataFormatException;
41  import java.util.zip.GZIPInputStream;
42  
43  import org.utgenome.UTGBErrorCode;
44  import org.utgenome.UTGBException;
45  import org.utgenome.graphics.GenomeWindow;
46  import org.utgenome.gwt.utgb.client.bio.ChrLoc;
47  import org.utgenome.gwt.utgb.client.bio.CompactWIGData;
48  import org.utgenome.gwt.utgb.client.bio.GraphWindow;
49  import org.utgenome.gwt.utgb.client.bio.WigGraphData;
50  import org.xerial.util.StopWatch;
51  import org.xerial.util.log.Logger;
52  
53  /**
54   * Wig database (sqlite) reader
55   * 
56   * 
57   * @author yoshimura
58   * @author leo
59   * 
60   */
61  public class WIGDatabaseReader {
62  	private Connection connection = null;
63  	private Statement statement;
64  	private static Logger _logger = Logger.getLogger(WIGDatabaseReader.class);
65  
66  	private static float minValue = Float.MAX_VALUE;
67  	private static float maxValue = Float.MIN_VALUE;
68  
69  	private GraphWindow windowFunc = GraphWindow.MEDIAN;
70  
71  	public WIGDatabaseReader(File file, GraphWindow windowFunc) throws UTGBException {
72  		this(file.toString(), windowFunc);
73  	}
74  
75  	public WIGDatabaseReader(String inputFileURL, GraphWindow windowFunc) throws UTGBException {
76  		this.windowFunc = windowFunc;
77  		try {
78  			Class.forName("org.sqlite.JDBC");
79  			connection = DriverManager.getConnection("jdbc:sqlite:" + inputFileURL);
80  			statement = connection.createStatement();
81  		}
82  		catch (Exception e) {
83  			throw UTGBException.convert(e);
84  		}
85  	}
86  
87  	public void close() throws SQLException {
88  		if (connection != null)
89  			connection.close();
90  	}
91  
92  	public ArrayList<String> getBrowser() throws SQLException {
93  		ArrayList<String> browser = new ArrayList<String>();
94  
95  		ResultSet rs = statement.executeQuery(String.format("select * from browser"));
96  		while (rs.next()) {
97  			browser.add(rs.getString("description"));
98  		}
99  
100 		return browser;
101 	}
102 
103 	public ArrayList<Integer> getTrackIdList(String chrom) throws SQLException {
104 		ArrayList<Integer> trackIdList = new ArrayList<Integer>();
105 
106 		ResultSet rs = statement.executeQuery(String.format("select distinct track_id from track where name='chrom' and value='%s'", chrom));
107 		while (rs.next()) {
108 			trackIdList.add(Integer.valueOf(rs.getInt("track_id")));
109 		}
110 
111 		return trackIdList;
112 	}
113 
114 	public ArrayList<Integer> getTrackIdList() throws SQLException {
115 		ArrayList<Integer> trackIdList = new ArrayList<Integer>();
116 
117 		ResultSet rs = statement.executeQuery("select distinct track_id from track");
118 		while (rs.next()) {
119 			trackIdList.add(Integer.valueOf(rs.getInt("track_id")));
120 		}
121 
122 		return trackIdList;
123 	}
124 
125 	public ArrayList<String> getChromList() throws SQLException {
126 		ArrayList<String> trackIdList = new ArrayList<String>();
127 
128 		ResultSet rs = statement.executeQuery("select distinct value from track where name='chrom'");
129 		while (rs.next()) {
130 			trackIdList.add(rs.getString("value"));
131 		}
132 
133 		return trackIdList;
134 	}
135 
136 	public HashMap<String, String> getTrack(int trackId) throws SQLException {
137 		HashMap<String, String> track = new HashMap<String, String>();
138 
139 		ResultSet rs = statement.executeQuery(String.format("select * from track where track_id=%d", trackId));
140 		while (rs.next()) {
141 			track.put(rs.getString("name"), rs.getString("value"));
142 		}
143 
144 		return track;
145 	}
146 
147 	public HashMap<Integer, Float> getData(int trackId, int start, int end) throws SQLException, IOException, DataFormatException, NumberFormatException,
148 			ClassNotFoundException, UTGBException {
149 		return (getData((end - start), trackId, start, end));
150 	}
151 
152 	private static interface ValueSelector {
153 		public float select(float prev, float max, float min, float avg, float median);
154 	}
155 
156 	public static class MAXSelector implements ValueSelector {
157 		public float select(float prev, float max, float min, float avg, float median) {
158 			return Math.max(prev, max);
159 		}
160 	}
161 
162 	public static class MINSelector implements ValueSelector {
163 		public float select(float prev, float max, float min, float avg, float median) {
164 			return Math.min(prev, min);
165 		}
166 	}
167 
168 	public static class MedianSelector implements ValueSelector {
169 		public float select(float prev, float max, float min, float avg, float median) {
170 			return Math.max(prev, median);
171 		}
172 	}
173 
174 	public static class AvgSelector implements ValueSelector {
175 		public float select(float prev, float max, float min, float avg, float median) {
176 			return Math.max(prev, avg);
177 		}
178 	}
179 
180 	private ValueSelector getSelector() throws UTGBException {
181 		ValueSelector selector = null;
182 		switch (windowFunc) {
183 		case AVG:
184 			selector = new AvgSelector();
185 			break;
186 		case MAX:
187 			selector = new MAXSelector();
188 			break;
189 		case MEDIAN:
190 			selector = new MedianSelector();
191 			break;
192 		case MIN:
193 			selector = new MINSelector();
194 			break;
195 		default:
196 			throw new UTGBException(UTGBErrorCode.INVALID_INPUT, "unknown window function: " + windowFunc);
197 		}
198 		return selector;
199 	}
200 
201 	public CompactWIGData fillPixelsWithMedian(CompactWIGData cwig, int pixelWidth, int trackId, int start, int end) throws SQLException, UTGBException {
202 
203 		GenomeWindow w = new GenomeWindow(start, end);
204 		float[] dataValues = new float[pixelWidth];
205 		for (int i = 0; i < dataValues.length; ++i)
206 			dataValues[i] = 0.0f;
207 
208 		StopWatch st1 = new StopWatch();
209 		StopWatch st2 = new StopWatch();
210 
211 		float minInBlock = Float.MAX_VALUE;
212 		float maxInBlock = Float.MIN_VALUE;
213 
214 		ValueSelector selector = getSelector();
215 
216 		ResultSet rs = null;
217 		try {
218 			rs = statement.executeQuery(String.format(
219 					"select start, end, min_value, max_value, median, avg from data where track_id=%d and start<=%d and end>=%d order by start", trackId, end,
220 					start));
221 			while (rs.next()) {
222 				int s = rs.getInt("start");
223 				int e = rs.getInt("end");
224 				float max = rs.getFloat("max_value");
225 				float min = rs.getFloat("min_value");
226 				float median = rs.getFloat("median");
227 				float avg = rs.getFloat("avg");
228 
229 				int pixelStart = w.getXPosOnWindow(s, pixelWidth);
230 				if (pixelStart <= 0)
231 					pixelStart = 0;
232 				int pixelEnd = w.getXPosOnWindow(e + cwig.getSpan(), pixelWidth);
233 				for (int x = pixelStart; x < pixelWidth && x < pixelEnd; ++x) {
234 					dataValues[x] = selector.select(dataValues[x], max, min, avg, median);
235 				}
236 
237 				minInBlock = Math.min(min, minInBlock);
238 				maxInBlock = Math.max(max, maxInBlock);
239 			}
240 		}
241 		finally {
242 			if (rs != null)
243 				rs.close();
244 		}
245 
246 		cwig.setMinValue(minInBlock);
247 		cwig.setMaxValue(maxInBlock);
248 		cwig.setData(dataValues);
249 
250 		if (_logger.isTraceEnabled())
251 			_logger.trace("Time(all)    : " + st1.getElapsedTime());
252 		return cwig;
253 
254 	}
255 
256 	public HashMap<Integer, Float> getData(int windowWidth, int trackId, int start, int end) throws SQLException, IOException, DataFormatException,
257 			NumberFormatException, ClassNotFoundException, UTGBException {
258 		HashMap<Integer, Float> data = new HashMap<Integer, Float>();
259 		HashMap<String, String> track = getTrack(trackId);
260 
261 		int[] chromStarts;
262 		float[] dataValues;
263 
264 		minValue = Float.MAX_VALUE;
265 		maxValue = Float.MIN_VALUE;
266 
267 		if (start > end) {
268 			int tmp = start;
269 			start = end;
270 			end = tmp;
271 		}
272 
273 		int rough = (int) Math.floor((end - start) / windowWidth);
274 		if (rough < 1)
275 			rough = 1;
276 
277 		ValueSelector selector = getSelector();
278 
279 		StopWatch st1 = new StopWatch();
280 		StopWatch st2 = new StopWatch();
281 
282 		ResultSet rs = statement.executeQuery(String.format("select * from data where track_id=%d and start<=%d and end>=%d order by start", trackId, end,
283 				start));
284 		while (rs.next()) {
285 			int i;
286 			ByteArrayInputStream bis;
287 			GZIPInputStream in;
288 			ObjectInputStream ois;
289 
290 			int nDatas = rs.getInt("data_num");
291 
292 			// read data point
293 			if (track.get("stepType").equals("variableStep")) {
294 				bis = new ByteArrayInputStream(rs.getBytes("chrom_starts"));
295 				in = new GZIPInputStream(bis);
296 				ois = new ObjectInputStream(in);
297 
298 				chromStarts = (int[]) ois.readObject();
299 
300 				ois.close();
301 				in.close();
302 				bis.close();
303 			}
304 			else if (track.get("stepType").equals("fixedStep")) {
305 				int startPoint = rs.getInt("start");
306 				int stepSize = Integer.parseInt(track.get("step"));
307 				chromStarts = new int[nDatas];
308 
309 				for (i = 0; i < nDatas; i++) {
310 					chromStarts[i] = startPoint + (stepSize * i);
311 				}
312 			}
313 			else {
314 				throw new DataFormatException();
315 			}
316 
317 			// read data value
318 			bis = new ByteArrayInputStream(rs.getBytes("data_values"));
319 			in = new GZIPInputStream(bis);
320 			ois = new ObjectInputStream(in);
321 
322 			dataValues = (float[]) ois.readObject();
323 
324 			ois.close();
325 			in.close();
326 			bis.close();
327 
328 			st2.resume();
329 			for (i = 0; i < nDatas; i++) {
330 				if (start <= chromStarts[i] && chromStarts[i] <= end) {
331 					if (dataValues[i] != 0.0f) {
332 						int chromStart = chromStarts[i]; // - (chromStarts[i] % rough);
333 						if (data.containsKey(chromStart)) {
334 							float prev = data.get(chromStart);
335 							float current = dataValues[i];
336 							float newValue = selector.select(prev, maxValue, minValue, current, current);
337 							data.put(chromStart, newValue);
338 						}
339 						else {
340 							data.put(chromStart, dataValues[i]);
341 						}
342 					}
343 
344 					minValue = Math.min(minValue, dataValues[i]);
345 					maxValue = Math.max(maxValue, dataValues[i]);
346 				}
347 			}
348 			st2.stop();
349 		}
350 
351 		rs.close();
352 
353 		if (_logger.isTraceEnabled()) {
354 			_logger.trace("min: " + minValue + ", max: " + maxValue);
355 			_logger.trace("Time(all)    : " + st1.getElapsedTime());
356 			_logger.trace("Time(archive): " + st2.getElapsedTime());
357 		}
358 		return data;
359 	}
360 
361 	public HashMap<Integer, Float> getData(int trackId) throws NumberFormatException, SQLException, IOException, DataFormatException, ClassNotFoundException,
362 			UTGBException {
363 		return getData(trackId, 0, Integer.MAX_VALUE);
364 	}
365 
366 	public WigGraphData getWigData(int windowWidth, int trackId, int start, int end) throws SQLException, NumberFormatException, IOException,
367 			DataFormatException, ClassNotFoundException, UTGBException {
368 		WigGraphData wigData = prepareWigData(trackId);
369 		wigData.setData(getData(windowWidth, trackId, start, end));
370 		return wigData;
371 	}
372 
373 	public WigGraphData getWigData(int trackId, int start, int end) throws SQLException, NumberFormatException, IOException, DataFormatException,
374 			ClassNotFoundException, UTGBException {
375 		WigGraphData wigData = prepareWigData(trackId);
376 		wigData.setData(getData(trackId, start, end));
377 		return wigData;
378 	}
379 
380 	public CompactWIGData getCompactWigData(int trackId, int start, int end, int pixelWidth) throws SQLException, UTGBException {
381 		WigGraphData wigData = prepareWigData(trackId);
382 		CompactWIGData cWig = prepareCompactWigData(wigData, new ChrLoc(null, start, end));
383 		fillPixelsWithMedian(cWig, pixelWidth, trackId, start, end);
384 		return cWig;
385 	}
386 
387 	private WigGraphData prepareWigData(int trackId) throws SQLException {
388 		WigGraphData wigData = new WigGraphData();
389 		wigData.setTrack_id(trackId);
390 		wigData.setBrowser(getBrowser());
391 		wigData.setTrack(getTrack(trackId));
392 		wigData.setMinValue(minValue);
393 		wigData.setMaxValue(maxValue);
394 		return wigData;
395 	}
396 
397 	public WigGraphData getWigData(int trackId) throws SQLException, NumberFormatException, IOException, DataFormatException, ClassNotFoundException,
398 			UTGBException {
399 		return getWigData(trackId, 0, Integer.MAX_VALUE);
400 	}
401 
402 	public ArrayList<WigGraphData> getWigDataList(int windowWidth, String chrom, int start, int end) throws SQLException, NumberFormatException, IOException,
403 			DataFormatException, ClassNotFoundException, UTGBException {
404 		ArrayList<WigGraphData> wigDataList = new ArrayList<WigGraphData>();
405 
406 		for (int id : getTrackIdList(chrom)) {
407 			wigDataList.add(getWigData(windowWidth, id, start, end));
408 		}
409 
410 		return wigDataList;
411 	}
412 
413 	public ArrayList<WigGraphData> getWigDataList(String chrom, int start, int end) throws SQLException, NumberFormatException, IOException,
414 			DataFormatException, ClassNotFoundException, UTGBException {
415 		ArrayList<WigGraphData> wigDataList = new ArrayList<WigGraphData>();
416 
417 		for (int id : getTrackIdList(chrom)) {
418 			wigDataList.add(getWigData(id, start, end));
419 		}
420 
421 		return wigDataList;
422 	}
423 
424 	public static List<WigGraphData> getWigDataList(File fileName, int windowWidth, ChrLoc location, GraphWindow windowFunc) throws UTGBException, SQLException {
425 		ArrayList<WigGraphData> wigDataList = null;
426 
427 		WIGDatabaseReader reader = new WIGDatabaseReader(fileName, windowFunc);
428 		try {
429 			wigDataList = reader.getWigDataList(windowWidth, location.chr, location.start, location.end);
430 		}
431 		catch (Exception e) {
432 			_logger.error(e);
433 			e.printStackTrace(System.err);
434 		}
435 		finally {
436 			reader.close();
437 		}
438 
439 		return wigDataList;
440 	}
441 
442 	public static List<CompactWIGData> getRoughCompactWigDataList(File path, int pixelWidth, ChrLoc location, GraphWindow windowFunc) throws UTGBException,
443 			SQLException {
444 
445 		ArrayList<CompactWIGData> cWig = new ArrayList<CompactWIGData>();
446 
447 		WIGDatabaseReader reader = new WIGDatabaseReader(path, windowFunc);
448 		try {
449 			for (int id : reader.getTrackIdList(location.chr)) {
450 				cWig.add(reader.getCompactWigData(id, location.start, location.end, pixelWidth));
451 			}
452 		}
453 		finally {
454 			reader.close();
455 		}
456 		return cWig;
457 	}
458 
459 	public static List<CompactWIGData> getCompactWigDataList(File path, int pixelWidth, ChrLoc location, GraphWindow windowFunc) throws UTGBException,
460 			SQLException {
461 
462 		int numBlocks = location.length() / WIGDatabaseGenerator.DATA_SPLIT_UNIT;
463 		if (_logger.isDebugEnabled())
464 			_logger.debug(String.format("num blocks: %s in %s, pixel width: %d", numBlocks, location, pixelWidth));
465 		if (numBlocks >= 100) {
466 			// use max values in the wig data table
467 			if (_logger.isDebugEnabled())
468 				_logger.debug(String.format("query wig summary (path:%s, pixel width:%d, loc:%s)", path, pixelWidth, location));
469 			return getRoughCompactWigDataList(path, pixelWidth, location, windowFunc);
470 		}
471 		else {
472 			ArrayList<CompactWIGData> cWig = new ArrayList<CompactWIGData>();
473 			List<WigGraphData> wig = getWigDataList(path, pixelWidth, location, windowFunc);
474 			for (WigGraphData w : wig) {
475 				cWig.add(WIGDatabaseReader.convertResolution(w, location, pixelWidth));
476 			}
477 			return cWig;
478 		}
479 	}
480 
481 	public static CompactWIGData prepareCompactWigData(WigGraphData w, ChrLoc location) {
482 		CompactWIGData cwig = new CompactWIGData();
483 		cwig.setTrack(w.getTrack());
484 		cwig.setMaxValue(w.getMaxValue());
485 		cwig.setMinValue(w.getMinValue());
486 		cwig.setBrowser(w.getBrowser());
487 		cwig.setTrack_id(w.getTrack_id());
488 		cwig.setStart(location.start < location.end ? location.start : location.end);
489 		int span = 1;
490 		if (w.getTrack().containsKey("span")) {
491 			span = Integer.parseInt(w.getTrack().get("span"));
492 			cwig.setSpan(span);
493 		}
494 		return cwig;
495 	}
496 
497 	public static CompactWIGData convertResolution(WigGraphData w, ChrLoc location, int windowWidth) {
498 
499 		CompactWIGData cwig = prepareCompactWigData(w, location);
500 		final int span = cwig.getSpan();
501 
502 		if (_logger.isDebugEnabled())
503 			_logger.debug(String.format("convert resolution: loc:%s, window width:%d", location, windowWidth));
504 
505 		GenomeWindow window = new GenomeWindow(location.start, location.end);
506 
507 		float[] pixelWiseGraphData = new float[windowWidth + span];
508 		for (int i = 0; i < pixelWiseGraphData.length; ++i)
509 			pixelWiseGraphData[i] = 0;
510 
511 		Map<Integer, Float> data = w.getData();
512 		for (Map.Entry<Integer, Float> each : data.entrySet()) {
513 			int xOnGenome = each.getKey();
514 			float val = each.getValue();
515 
516 			int x1 = window.getXPosOnWindow(xOnGenome, windowWidth);
517 			int x2 = window.getXPosOnWindow(xOnGenome + span, windowWidth);
518 			if (x1 == x2)
519 				x2 = x1 + 1;
520 
521 			if (x1 < 0)
522 				x1 = 0;
523 
524 			for (int i = x1; i < x2 && i < windowWidth + span; ++i) {
525 				float current = pixelWiseGraphData[i];
526 				float abs = Math.abs(val);
527 				if (current < abs) {
528 					pixelWiseGraphData[i] = val; // take the max (or min for negative value)
529 				}
530 			}
531 		}
532 
533 		cwig.setData(pixelWiseGraphData);
534 		return cwig;
535 	}
536 
537 }