View Javadoc

1   /*--------------------------------------------------------------------------
2    *  Copyright 2008 utgenome.org
3    *
4    *  Licensed under the Apache License, Version 2.0 (the "License");
5    *  you may not use this file except in compliance with the License.
6    *  You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   *  Unless required by applicable law or agreed to in writing, software
11   *  distributed under the License is distributed on an "AS IS" BASIS,
12   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   *  See the License for the specific language governing permissions and
14   *  limitations under the License.
15   *--------------------------------------------------------------------------*/
16  //--------------------------------------
17  // utgb-shell Project
18  //
19  // BEDDatabaseGenerator.java
20  // Since: May 26, 2009
21  //
22  // $URL$ 
23  // $Author$
24  //--------------------------------------
25  package org.utgenome.format.bed;
26  
27  import java.io.ByteArrayOutputStream;
28  import java.io.File;
29  import java.io.FileReader;
30  import java.io.IOException;
31  import java.io.ObjectOutputStream;
32  import java.io.Reader;
33  import java.sql.Connection;
34  import java.sql.DriverManager;
35  import java.sql.PreparedStatement;
36  import java.sql.ResultSet;
37  import java.sql.SQLException;
38  import java.sql.Statement;
39  import java.util.ArrayList;
40  import java.util.List;
41  
42  import org.utgenome.UTGBException;
43  import org.utgenome.gwt.utgb.client.bio.BEDGene;
44  import org.utgenome.gwt.utgb.client.bio.ChrLoc;
45  import org.utgenome.gwt.utgb.client.bio.OnGenome;
46  import org.xerial.core.XerialException;
47  import org.xerial.db.sql.ResultSetHandler;
48  import org.xerial.db.sql.SQLExpression;
49  import org.xerial.db.sql.sqlite.SQLiteAccess;
50  import org.xerial.lens.SilkLens;
51  import org.xerial.util.log.Logger;
52  
53  /**
54   * Query/Update interface for BED database stored in SQLite
55   * 
56   * @author leo
57   * 
58   */
59  public class BEDDatabase {
60  
61  	private static Logger _logger = Logger.getLogger(BEDDatabase.class);
62  
63  	public static void toSQLiteDB(Reader bedInput, String dbName) throws IOException, XerialException {
64  		BED2SilkReader reader = new BED2SilkReader(bedInput);
65  
66  		DBBuilder query;
67  		try {
68  
69  			Class.forName("org.sqlite.JDBC");
70  			Connection conn = DriverManager.getConnection("jdbc:sqlite:" + dbName);
71  
72  			query = new DBBuilder(conn);
73  			SilkLens.loadSilk(query, reader);
74  
75  			query.dispose();
76  		}
77  		catch (Exception e) {
78  			_logger.error(e);
79  		}
80  	}
81  
82  	public static class DBBuilder {
83  
84  		int geneCount = 0;
85  
86  		Connection conn;
87  		PreparedStatement p1;
88  		PreparedStatement p2;
89  		Statement stat;
90  
91  		public DBBuilder(Connection conn) throws SQLException {
92  			this.conn = conn;
93  			this.stat = conn.createStatement();
94  
95  			stat.executeUpdate("pragma synchronous = off");
96  
97  			conn.setAutoCommit(false);
98  
99  			stat.executeUpdate("drop table if exists track");
100 			stat.executeUpdate("drop table if exists gene");
101 			stat.executeUpdate("create table track (object blob)");
102 
103 			stat.executeUpdate("create table gene (coordinate text, start integer, end integer, " + "name text, score integer, strand text, cds text, "
104 					+ "exon text, color text)");
105 
106 			p1 = conn.prepareStatement("insert into track values(?)");
107 			p2 = conn.prepareStatement("insert into gene values(?, ?, ?, ?, ?, ?, ?, ?, ?)");
108 
109 		}
110 
111 		public void dispose() throws SQLException {
112 
113 			_logger.info("creating indexes...");
114 			stat.executeUpdate("create index gene_index on gene (coordinate, start)");
115 
116 			conn.commit();
117 
118 			p1.close();
119 			p2.close();
120 			stat.close();
121 			conn.close();
122 		}
123 
124 		public void addTrack(BEDTrack track) {
125 			// store track data to db
126 			try {
127 				ByteArrayOutputStream buf = new ByteArrayOutputStream();
128 				ObjectOutputStream out = new ObjectOutputStream(buf);
129 				out.writeObject(track);
130 				out.flush();
131 
132 				p1.setBytes(1, buf.toByteArray());
133 				p1.execute();
134 			}
135 			catch (Exception e) {
136 				_logger.error(e);
137 			}
138 		}
139 
140 		public void addGene(BEDEntry gene) {
141 			// store gene data to db
142 			try {
143 				p2.setString(1, gene.coordinate);
144 				p2.setLong(2, gene.getStart());
145 				p2.setLong(3, gene.getEnd());
146 				p2.setString(4, gene.getName());
147 				p2.setInt(5, gene.score);
148 				p2.setString(6, Character.toString(gene.getStrand()));
149 				p2.setString(7, gene.getCDS().toString());
150 				p2.setString(8, gene.getExon().toString());
151 				p2.setString(9, gene.getColor());
152 
153 				p2.execute();
154 
155 				geneCount++;
156 				if ((geneCount % 10000) == 0)
157 					_logger.info(String.format("added %d entries.", geneCount));
158 
159 			}
160 			catch (Exception e) {
161 				_logger.error(e);
162 			}
163 		}
164 	}
165 
166 	@SuppressWarnings("unchecked")
167 	public static List<OnGenome> overlapQuery(File bedPath, final ChrLoc location) throws UTGBException {
168 
169 		final ArrayList<OnGenome> geneList = new ArrayList<OnGenome>();
170 		int sqlStart = location.end >= location.start ? location.start : location.end;
171 		int sqlEnd = location.end >= location.start ? location.end : location.start;
172 
173 		try {
174 			File dbInput = new File(bedPath.getAbsolutePath() + ".sqlite");
175 			if (dbInput.exists()) {
176 				// use db
177 				SQLiteAccess dbAccess = new SQLiteAccess(dbInput.getAbsolutePath());
178 
179 				try {
180 					String sql = SQLExpression.fillTemplate("select start, end, name, score, strand, cds, exon, color from gene "
181 							+ "where coordinate = '$1' and ((start between $2 and $3) or (start <= $2 and end >= $3))", location.chr, sqlEnd, sqlStart);
182 
183 					if (_logger.isDebugEnabled())
184 						_logger.debug(sql);
185 
186 					dbAccess.query(sql, new ResultSetHandler() {
187 						@Override
188 						public Object handle(ResultSet rs) throws SQLException {
189 							geneList.add(new BEDGene(BEDEntry.createFromResultSet(location.chr, rs)));
190 							return null;
191 						}
192 					});
193 				}
194 				finally {
195 					dbAccess.dispose();
196 				}
197 			}
198 			else {
199 				// use raw text
200 				BED2SilkReader in = null;
201 				try {
202 					in = new BED2SilkReader(new FileReader(bedPath));
203 					BEDRangeQuery query = new BEDRangeQuery(geneList, location.chr, sqlStart, sqlEnd);
204 					SilkLens.loadSilk(query, in);
205 				}
206 				finally {
207 					if (in != null)
208 						in.close();
209 				}
210 			}
211 		}
212 		catch (Exception e) {
213 			throw UTGBException.convert(e);
214 		}
215 
216 		return geneList;
217 	}
218 
219 	public static class BEDRangeQuery implements BEDQuery {
220 		private String coordinate;
221 		private int start;
222 		private int end;
223 		public List<OnGenome> geneList;
224 
225 		public BEDRangeQuery(List<OnGenome> geneList, String coordinate, int start, int end) {
226 			this.geneList = geneList;
227 			this.coordinate = coordinate;
228 			this.start = end >= start ? start : end;
229 			this.end = end >= start ? end : start;
230 		}
231 
232 		public BEDTrack track;
233 
234 		public void addGene(BEDEntry gene) {
235 			if (coordinate.equals(gene.coordinate) && (start <= gene.getEnd()) && (end >= gene.getStart())) {
236 				geneList.add(new BEDGene(gene));
237 			}
238 		}
239 
240 		public void addTrack(BEDTrack track) {
241 
242 		}
243 
244 		public void reportError(Exception e) {
245 			_logger.error(e);
246 		}
247 	}
248 
249 }