1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.utgenome.format.bed;
26
27 import java.io.ByteArrayOutputStream;
28 import java.io.File;
29 import java.io.FileReader;
30 import java.io.IOException;
31 import java.io.ObjectOutputStream;
32 import java.io.Reader;
33 import java.sql.Connection;
34 import java.sql.DriverManager;
35 import java.sql.PreparedStatement;
36 import java.sql.ResultSet;
37 import java.sql.SQLException;
38 import java.sql.Statement;
39 import java.util.ArrayList;
40 import java.util.List;
41
42 import org.utgenome.UTGBException;
43 import org.utgenome.gwt.utgb.client.bio.BEDGene;
44 import org.utgenome.gwt.utgb.client.bio.ChrLoc;
45 import org.utgenome.gwt.utgb.client.bio.OnGenome;
46 import org.xerial.core.XerialException;
47 import org.xerial.db.sql.ResultSetHandler;
48 import org.xerial.db.sql.SQLExpression;
49 import org.xerial.db.sql.sqlite.SQLiteAccess;
50 import org.xerial.lens.SilkLens;
51 import org.xerial.util.log.Logger;
52
53
54
55
56
57
58
59 public class BEDDatabase {
60
61 private static Logger _logger = Logger.getLogger(BEDDatabase.class);
62
63 public static void toSQLiteDB(Reader bedInput, String dbName) throws IOException, XerialException {
64 BED2SilkReader reader = new BED2SilkReader(bedInput);
65
66 DBBuilder query;
67 try {
68
69 Class.forName("org.sqlite.JDBC");
70 Connection conn = DriverManager.getConnection("jdbc:sqlite:" + dbName);
71
72 query = new DBBuilder(conn);
73 SilkLens.loadSilk(query, reader);
74
75 query.dispose();
76 }
77 catch (Exception e) {
78 _logger.error(e);
79 }
80 }
81
82 public static class DBBuilder {
83
84 int geneCount = 0;
85
86 Connection conn;
87 PreparedStatement p1;
88 PreparedStatement p2;
89 Statement stat;
90
91 public DBBuilder(Connection conn) throws SQLException {
92 this.conn = conn;
93 this.stat = conn.createStatement();
94
95 stat.executeUpdate("pragma synchronous = off");
96
97 conn.setAutoCommit(false);
98
99 stat.executeUpdate("drop table if exists track");
100 stat.executeUpdate("drop table if exists gene");
101 stat.executeUpdate("create table track (object blob)");
102
103 stat.executeUpdate("create table gene (coordinate text, start integer, end integer, " + "name text, score integer, strand text, cds text, "
104 + "exon text, color text)");
105
106 p1 = conn.prepareStatement("insert into track values(?)");
107 p2 = conn.prepareStatement("insert into gene values(?, ?, ?, ?, ?, ?, ?, ?, ?)");
108
109 }
110
111 public void dispose() throws SQLException {
112
113 _logger.info("creating indexes...");
114 stat.executeUpdate("create index gene_index on gene (coordinate, start)");
115
116 conn.commit();
117
118 p1.close();
119 p2.close();
120 stat.close();
121 conn.close();
122 }
123
124 public void addTrack(BEDTrack track) {
125
126 try {
127 ByteArrayOutputStream buf = new ByteArrayOutputStream();
128 ObjectOutputStream out = new ObjectOutputStream(buf);
129 out.writeObject(track);
130 out.flush();
131
132 p1.setBytes(1, buf.toByteArray());
133 p1.execute();
134 }
135 catch (Exception e) {
136 _logger.error(e);
137 }
138 }
139
140 public void addGene(BEDEntry gene) {
141
142 try {
143 p2.setString(1, gene.coordinate);
144 p2.setLong(2, gene.getStart());
145 p2.setLong(3, gene.getEnd());
146 p2.setString(4, gene.getName());
147 p2.setInt(5, gene.score);
148 p2.setString(6, Character.toString(gene.getStrand()));
149 p2.setString(7, gene.getCDS().toString());
150 p2.setString(8, gene.getExon().toString());
151 p2.setString(9, gene.getColor());
152
153 p2.execute();
154
155 geneCount++;
156 if ((geneCount % 10000) == 0)
157 _logger.info(String.format("added %d entries.", geneCount));
158
159 }
160 catch (Exception e) {
161 _logger.error(e);
162 }
163 }
164 }
165
166 @SuppressWarnings("unchecked")
167 public static List<OnGenome> overlapQuery(File bedPath, final ChrLoc location) throws UTGBException {
168
169 final ArrayList<OnGenome> geneList = new ArrayList<OnGenome>();
170 int sqlStart = location.end >= location.start ? location.start : location.end;
171 int sqlEnd = location.end >= location.start ? location.end : location.start;
172
173 try {
174 File dbInput = new File(bedPath.getAbsolutePath() + ".sqlite");
175 if (dbInput.exists()) {
176
177 SQLiteAccess dbAccess = new SQLiteAccess(dbInput.getAbsolutePath());
178
179 try {
180 String sql = SQLExpression.fillTemplate("select start, end, name, score, strand, cds, exon, color from gene "
181 + "where coordinate = '$1' and ((start between $2 and $3) or (start <= $2 and end >= $3))", location.chr, sqlEnd, sqlStart);
182
183 if (_logger.isDebugEnabled())
184 _logger.debug(sql);
185
186 dbAccess.query(sql, new ResultSetHandler() {
187 @Override
188 public Object handle(ResultSet rs) throws SQLException {
189 geneList.add(new BEDGene(BEDEntry.createFromResultSet(location.chr, rs)));
190 return null;
191 }
192 });
193 }
194 finally {
195 dbAccess.dispose();
196 }
197 }
198 else {
199
200 BED2SilkReader in = null;
201 try {
202 in = new BED2SilkReader(new FileReader(bedPath));
203 BEDRangeQuery query = new BEDRangeQuery(geneList, location.chr, sqlStart, sqlEnd);
204 SilkLens.loadSilk(query, in);
205 }
206 finally {
207 if (in != null)
208 in.close();
209 }
210 }
211 }
212 catch (Exception e) {
213 throw UTGBException.convert(e);
214 }
215
216 return geneList;
217 }
218
219 public static class BEDRangeQuery implements BEDQuery {
220 private String coordinate;
221 private int start;
222 private int end;
223 public List<OnGenome> geneList;
224
225 public BEDRangeQuery(List<OnGenome> geneList, String coordinate, int start, int end) {
226 this.geneList = geneList;
227 this.coordinate = coordinate;
228 this.start = end >= start ? start : end;
229 this.end = end >= start ? end : start;
230 }
231
232 public BEDTrack track;
233
234 public void addGene(BEDEntry gene) {
235 if (coordinate.equals(gene.coordinate) && (start <= gene.getEnd()) && (end >= gene.getStart())) {
236 geneList.add(new BEDGene(gene));
237 }
238 }
239
240 public void addTrack(BEDTrack track) {
241
242 }
243
244 public void reportError(Exception e) {
245 _logger.error(e);
246 }
247 }
248
249 }