View Javadoc

1   /*--------------------------------------------------------------------------
2    *  Copyright 2007 utgenome.org
3    *
4    *  Licensed under the Apache License, Version 2.0 (the "License");
5    *  you may not use this file except in compliance with the License.
6    *  You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   *  Unless required by applicable law or agreed to in writing, software
11   *  distributed under the License is distributed on an "AS IS" BASIS,
12   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   *  See the License for the specific language governing permissions and
14   *  limitations under the License.
15   *--------------------------------------------------------------------------*/
16  //--------------------------------------
17  // utgb-core Project
18  //
19  // EGTXMLReader.java
20  // Since: Dec 21, 2007
21  //
22  // $URL$ 
23  // $Author$
24  //--------------------------------------
25  package org.utgenome.format.egt;
26  
27  import java.io.BufferedReader;
28  import java.io.File;
29  import java.io.FileNotFoundException;
30  import java.io.FileReader;
31  import java.io.InputStreamReader;
32  import java.io.Reader;
33  
34  import org.utgenome.UTGBException;
35  import org.xerial.core.XerialErrorCode;
36  import org.xerial.core.XerialException;
37  import org.xerial.db.DBException;
38  import org.xerial.db.sql.SQLExpression;
39  import org.xerial.db.sql.SQLUtil;
40  import org.xerial.db.sql.sqlite.SQLiteAccess;
41  import org.xerial.lens.XMLLens;
42  import org.xerial.util.log.Logger;
43  import org.xerial.util.opt.Argument;
44  import org.xerial.util.opt.Option;
45  import org.xerial.util.opt.OptionParser;
46  import org.xerial.util.opt.OptionParserException;
47  
48  public class EGTXMLReader {
49  
50  	static abstract class GeneDBGenerator {
51  		final SQLiteAccess sqlite;
52  
53  		public GeneDBGenerator(String dbFileName) throws DBException {
54  			File f;
55  			if ((f = new File(dbFileName)).exists()) {
56  				f.delete();
57  			}
58  			sqlite = new SQLiteAccess(dbFileName);
59  		}
60  
61  		public abstract void insertGene(Gene gene) throws DBException;
62  
63  		public abstract void createIndexes() throws DBException;
64  
65  		public void beginTransaction() throws DBException {
66  			sqlite.update("begin transaction", false);
67  		}
68  
69  		public void commit() throws DBException {
70  			sqlite.update("commit", false);
71  		}
72  	}
73  
74  	static class CompactDBGenerator extends GeneDBGenerator {
75  		int geneCount = 0;
76  
77  		public CompactDBGenerator(String dbFileName) throws DBException {
78  			super(dbFileName);
79  
80  			sqlite.update("create table gene ( " + "id integer primary key not null, " + "target string, " + "start integer, " + "end integer, "
81  					+ "strand string, " + "name string)", false);
82  
83  			sqlite.update("create table gene_info ( " + "id integer primary key not null, " + "url string)", false);
84  
85  			sqlite.update("create table exon ( " + "gene_id integer, " + "id integer, " + "start integer, " + "end integer, "
86  					+ "primary key (gene_id, id, start, end))", false);
87  
88  		}
89  
90  		@Override
91  		public void insertGene(Gene gene) throws DBException {
92  			int geneID = geneCount++;
93  			sqlite.update(
94  					SQLExpression.fillTemplate("insert into gene values($1, $2, $3, $4, $5, $6)", geneID, SQLUtil.singleQuote(gene.getTarget()),
95  							gene.getStart(), gene.getEnd(), SQLUtil.singleQuote(gene.getStrand()), SQLUtil.singleQuote(gene.getName())), false);
96  
97  			sqlite.update(SQLExpression.fillTemplate("insert into gene_info values($1, $2)", geneID, SQLUtil.singleQuote(gene.getUrl())), false);
98  
99  			int exonID = 1;
100 			for (Exon e : gene.getExon()) {
101 				sqlite.update(SQLExpression.fillTemplate("insert into exon values($1, $2, $3, $4)", geneID, exonID++, e.getStart(), e.getEnd()), false);
102 			}
103 
104 		}
105 
106 		@Override
107 		public void createIndexes() throws DBException {
108 
109 		}
110 
111 	}
112 
113 	static class MinimalDBGenerator extends GeneDBGenerator {
114 		int geneCount = 0;
115 
116 		public MinimalDBGenerator(String dbFileName) throws DBException {
117 			super(dbFileName);
118 
119 			sqlite.update("create table gene ( " + "id integer primary key not null, " + "target string, " + "start integer, " + "end integer, "
120 					+ "strand string, " + "name string)", false);
121 
122 			/*
123 			sqlite.update(
124 					"create table gene_info ( " +
125 					"id integer primary key not null, " +
126 					"url string)");
127 			
128 			
129 			sqlite.update(
130 					"create table exon ( " +
131 					"gene_id integer, " +
132 					"id integer, " +
133 					"start integer, " +
134 					"end integer, " +
135 					"primary key (gene_id, id, start, end))");
136 					*/
137 
138 		}
139 
140 		@Override
141 		public void insertGene(Gene gene) throws DBException {
142 			int geneID = geneCount++;
143 			sqlite.update(
144 					SQLExpression.fillTemplate("insert into gene values($1, $2, $3, $4, $5, $6)", geneID, SQLUtil.singleQuote(gene.getTarget()),
145 							gene.getStart(), gene.getEnd(), SQLUtil.singleQuote(gene.getStrand()), SQLUtil.singleQuote(gene.getName())), false);
146 
147 			/*
148 			sqlite.update(SQLExpression.fillTemplate(
149 					"insert into gene_info values($1, $2)", 
150 					geneID,
151 					SQLUtil.singleQuote(gene.getUrl())));
152 			
153 			int exonID = 1;
154 			for(Exon e : gene.getExon())
155 			{
156 				sqlite.update(SQLExpression.fillTemplate(
157 						"insert into exon values($1, $2, $3, $4)",
158 						geneID,
159 						exonID++,
160 						e.getStart(),
161 						e.getEnd()));
162 			}
163 			*/
164 
165 		}
166 
167 		@Override
168 		public void createIndexes() throws DBException {
169 
170 		}
171 
172 	}
173 
174 	static class WithIndexDBGenerator extends MinimalDBGenerator {
175 		public WithIndexDBGenerator(String dbName) throws DBException {
176 			super(dbName);
177 
178 			sqlite.update("create index gene_index on gene (target, start)", false);
179 
180 		}
181 	}
182 
183 	private static enum TableType {
184 		COMPACT, MINIMAL, WITHINDEX, DENORMALIZED, VERBOSE
185 	}
186 
187 	public static enum Opt {
188 		HELP, DBTYPE, READ_STDIN, DBFILENAME, USEMEMORYDB
189 	}
190 
191 	private static Logger _logger = Logger.getLogger(EGTXMLReader.class);
192 
193 	private GeneDBGenerator dbGenerator;
194 
195 	@Option(symbol = "h", longName = "help", description = "display help message")
196 	private boolean displayHelp = false;
197 	@Option(symbol = "t", longName = "type", varName = "DBTYPE", description = "(compact|minimal|denomalized|verbose)")
198 	private TableType tableType = TableType.COMPACT;
199 
200 	@Option(symbol = "o", longName = "out", varName = "DBFILE", description = "output db file name")
201 	private String dbName = "egt.db";
202 
203 	@Option(symbol = "c", longName = "stream", description = "read from standard input")
204 	private boolean useSTDIN = false;
205 
206 	@Option(symbol = "m", longName = "memory", description = "use memory db")
207 	private boolean useMemoryDB = false;
208 
209 	@Argument(index = 0)
210 	private String xmlFile = null;
211 
212 	public static void main(String[] args) {
213 		try {
214 			EGTXMLReader reader = new EGTXMLReader(args);
215 		}
216 		catch (OptionParserException e) {
217 			System.err.println(e);
218 		}
219 	}
220 
221 	public EGTXMLReader(String[] args) throws OptionParserException {
222 		OptionParser parser = new OptionParser(this);
223 		parser.parse(args);
224 
225 		if (displayHelp) {
226 			parser.printUsage();
227 			return;
228 		}
229 
230 		try {
231 
232 			switch (tableType) {
233 			case MINIMAL:
234 				dbGenerator = new MinimalDBGenerator(dbName);
235 				break;
236 			case WITHINDEX:
237 				dbGenerator = new WithIndexDBGenerator(dbName);
238 				break;
239 			case COMPACT:
240 			default:
241 				dbGenerator = new CompactDBGenerator(dbName);
242 				break;
243 			}
244 
245 			if (useSTDIN) {
246 				load(new InputStreamReader(System.in));
247 			}
248 			else {
249 				if (xmlFile == null)
250 					throw new OptionParserException(XerialErrorCode.INVALID_INPUT, "no xml file is given");
251 
252 				Reader xmlReader = null;
253 				try {
254 					xmlReader = new BufferedReader(new FileReader(xmlFile));
255 					load(xmlReader);
256 				}
257 				catch (FileNotFoundException e) {
258 					throw new UTGBException(e);
259 				}
260 				finally {
261 					if (xmlReader != null)
262 						xmlReader.close();
263 				}
264 			}
265 		}
266 		catch (Exception e) {
267 			e.printStackTrace();
268 		}
269 
270 	}
271 
272 	public void load(Reader xmlReader) throws UTGBException, XerialException, DBException {
273 		if (dbGenerator == null)
274 			throw new UTGBException("db generator is null");
275 
276 		beginTime = System.currentTimeMillis();
277 
278 		XMLLens.populateBeanWithXML(this, xmlReader);
279 		dbGenerator.commit();
280 	}
281 
282 	private int geneCount = 0;
283 	private long beginTime = 0;
284 
285 	public void addGene(Gene gene) {
286 		try {
287 			dbGenerator.insertGene(gene);
288 			if ((geneCount % 10000) == 0) {
289 				_logger.info("inserted: " + geneCount + "\t " + (System.currentTimeMillis() - beginTime) / 1000.0 + " sec.");
290 			}
291 			geneCount++;
292 		}
293 		catch (DBException e) {
294 			_logger.error(e);
295 		}
296 	}
297 
298 }