1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.utgenome.format.egt;
26
27 import java.io.BufferedReader;
28 import java.io.File;
29 import java.io.FileNotFoundException;
30 import java.io.FileReader;
31 import java.io.InputStreamReader;
32 import java.io.Reader;
33
34 import org.utgenome.UTGBException;
35 import org.xerial.core.XerialErrorCode;
36 import org.xerial.core.XerialException;
37 import org.xerial.db.DBException;
38 import org.xerial.db.sql.SQLExpression;
39 import org.xerial.db.sql.SQLUtil;
40 import org.xerial.db.sql.sqlite.SQLiteAccess;
41 import org.xerial.lens.XMLLens;
42 import org.xerial.util.log.Logger;
43 import org.xerial.util.opt.Argument;
44 import org.xerial.util.opt.Option;
45 import org.xerial.util.opt.OptionParser;
46 import org.xerial.util.opt.OptionParserException;
47
48 public class EGTXMLReader {
49
50 static abstract class GeneDBGenerator {
51 final SQLiteAccess sqlite;
52
53 public GeneDBGenerator(String dbFileName) throws DBException {
54 File f;
55 if ((f = new File(dbFileName)).exists()) {
56 f.delete();
57 }
58 sqlite = new SQLiteAccess(dbFileName);
59 }
60
61 public abstract void insertGene(Gene gene) throws DBException;
62
63 public abstract void createIndexes() throws DBException;
64
65 public void beginTransaction() throws DBException {
66 sqlite.update("begin transaction", false);
67 }
68
69 public void commit() throws DBException {
70 sqlite.update("commit", false);
71 }
72 }
73
74 static class CompactDBGenerator extends GeneDBGenerator {
75 int geneCount = 0;
76
77 public CompactDBGenerator(String dbFileName) throws DBException {
78 super(dbFileName);
79
80 sqlite.update("create table gene ( " + "id integer primary key not null, " + "target string, " + "start integer, " + "end integer, "
81 + "strand string, " + "name string)", false);
82
83 sqlite.update("create table gene_info ( " + "id integer primary key not null, " + "url string)", false);
84
85 sqlite.update("create table exon ( " + "gene_id integer, " + "id integer, " + "start integer, " + "end integer, "
86 + "primary key (gene_id, id, start, end))", false);
87
88 }
89
90 @Override
91 public void insertGene(Gene gene) throws DBException {
92 int geneID = geneCount++;
93 sqlite.update(
94 SQLExpression.fillTemplate("insert into gene values($1, $2, $3, $4, $5, $6)", geneID, SQLUtil.singleQuote(gene.getTarget()),
95 gene.getStart(), gene.getEnd(), SQLUtil.singleQuote(gene.getStrand()), SQLUtil.singleQuote(gene.getName())), false);
96
97 sqlite.update(SQLExpression.fillTemplate("insert into gene_info values($1, $2)", geneID, SQLUtil.singleQuote(gene.getUrl())), false);
98
99 int exonID = 1;
100 for (Exon e : gene.getExon()) {
101 sqlite.update(SQLExpression.fillTemplate("insert into exon values($1, $2, $3, $4)", geneID, exonID++, e.getStart(), e.getEnd()), false);
102 }
103
104 }
105
106 @Override
107 public void createIndexes() throws DBException {
108
109 }
110
111 }
112
113 static class MinimalDBGenerator extends GeneDBGenerator {
114 int geneCount = 0;
115
116 public MinimalDBGenerator(String dbFileName) throws DBException {
117 super(dbFileName);
118
119 sqlite.update("create table gene ( " + "id integer primary key not null, " + "target string, " + "start integer, " + "end integer, "
120 + "strand string, " + "name string)", false);
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138 }
139
140 @Override
141 public void insertGene(Gene gene) throws DBException {
142 int geneID = geneCount++;
143 sqlite.update(
144 SQLExpression.fillTemplate("insert into gene values($1, $2, $3, $4, $5, $6)", geneID, SQLUtil.singleQuote(gene.getTarget()),
145 gene.getStart(), gene.getEnd(), SQLUtil.singleQuote(gene.getStrand()), SQLUtil.singleQuote(gene.getName())), false);
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165 }
166
167 @Override
168 public void createIndexes() throws DBException {
169
170 }
171
172 }
173
174 static class WithIndexDBGenerator extends MinimalDBGenerator {
175 public WithIndexDBGenerator(String dbName) throws DBException {
176 super(dbName);
177
178 sqlite.update("create index gene_index on gene (target, start)", false);
179
180 }
181 }
182
183 private static enum TableType {
184 COMPACT, MINIMAL, WITHINDEX, DENORMALIZED, VERBOSE
185 }
186
187 public static enum Opt {
188 HELP, DBTYPE, READ_STDIN, DBFILENAME, USEMEMORYDB
189 }
190
191 private static Logger _logger = Logger.getLogger(EGTXMLReader.class);
192
193 private GeneDBGenerator dbGenerator;
194
195 @Option(symbol = "h", longName = "help", description = "display help message")
196 private boolean displayHelp = false;
197 @Option(symbol = "t", longName = "type", varName = "DBTYPE", description = "(compact|minimal|denomalized|verbose)")
198 private TableType tableType = TableType.COMPACT;
199
200 @Option(symbol = "o", longName = "out", varName = "DBFILE", description = "output db file name")
201 private String dbName = "egt.db";
202
203 @Option(symbol = "c", longName = "stream", description = "read from standard input")
204 private boolean useSTDIN = false;
205
206 @Option(symbol = "m", longName = "memory", description = "use memory db")
207 private boolean useMemoryDB = false;
208
209 @Argument(index = 0)
210 private String xmlFile = null;
211
212 public static void main(String[] args) {
213 try {
214 EGTXMLReader reader = new EGTXMLReader(args);
215 }
216 catch (OptionParserException e) {
217 System.err.println(e);
218 }
219 }
220
221 public EGTXMLReader(String[] args) throws OptionParserException {
222 OptionParser parser = new OptionParser(this);
223 parser.parse(args);
224
225 if (displayHelp) {
226 parser.printUsage();
227 return;
228 }
229
230 try {
231
232 switch (tableType) {
233 case MINIMAL:
234 dbGenerator = new MinimalDBGenerator(dbName);
235 break;
236 case WITHINDEX:
237 dbGenerator = new WithIndexDBGenerator(dbName);
238 break;
239 case COMPACT:
240 default:
241 dbGenerator = new CompactDBGenerator(dbName);
242 break;
243 }
244
245 if (useSTDIN) {
246 load(new InputStreamReader(System.in));
247 }
248 else {
249 if (xmlFile == null)
250 throw new OptionParserException(XerialErrorCode.INVALID_INPUT, "no xml file is given");
251
252 Reader xmlReader = null;
253 try {
254 xmlReader = new BufferedReader(new FileReader(xmlFile));
255 load(xmlReader);
256 }
257 catch (FileNotFoundException e) {
258 throw new UTGBException(e);
259 }
260 finally {
261 if (xmlReader != null)
262 xmlReader.close();
263 }
264 }
265 }
266 catch (Exception e) {
267 e.printStackTrace();
268 }
269
270 }
271
272 public void load(Reader xmlReader) throws UTGBException, XerialException, DBException {
273 if (dbGenerator == null)
274 throw new UTGBException("db generator is null");
275
276 beginTime = System.currentTimeMillis();
277
278 XMLLens.populateBeanWithXML(this, xmlReader);
279 dbGenerator.commit();
280 }
281
282 private int geneCount = 0;
283 private long beginTime = 0;
284
285 public void addGene(Gene gene) {
286 try {
287 dbGenerator.insertGene(gene);
288 if ((geneCount % 10000) == 0) {
289 _logger.info("inserted: " + geneCount + "\t " + (System.currentTimeMillis() - beginTime) / 1000.0 + " sec.");
290 }
291 geneCount++;
292 }
293 catch (DBException e) {
294 _logger.error(e);
295 }
296 }
297
298 }