View Javadoc

1   /*--------------------------------------------------------------------------
2    *  Copyright 2008 utgenome.org
3    *
4    *  Licensed under the Apache License, Version 2.0 (the "License");
5    *  you may not use this file except in compliance with the License.
6    *  You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   *  Unless required by applicable law or agreed to in writing, software
11   *  distributed under the License is distributed on an "AS IS" BASIS,
12   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   *  See the License for the specific language governing permissions and
14   *  limitations under the License.
15   *--------------------------------------------------------------------------*/
16  //--------------------------------------
17  // utgb-shell Project
18  //
19  // GenomeReadFormat.java
20  // Since: Apr 10, 2009
21  //
22  // $URL$ 
23  // $Author$
24  //--------------------------------------
25  package org.utgenome.format.silk.read;
26  
27  import java.io.Reader;
28  import java.net.URL;
29  import java.sql.Connection;
30  import java.sql.DriverManager;
31  import java.sql.SQLException;
32  import java.sql.Statement;
33  
34  import org.xerial.core.XerialError;
35  import org.xerial.core.XerialErrorCode;
36  import org.xerial.lens.Lens;
37  import org.xerial.silk.SilkEnv;
38  import org.xerial.silk.SilkParser;
39  import org.xerial.silk.SilkParserConfig;
40  import org.xerial.util.log.Logger;
41  
42  /**
43   * Reader
44   * 
45   * @author leo
46   * 
47   */
48  public class ReadDBBuilder {
49  
50  	private static Logger _logger = Logger.getLogger(ReadDBBuilder.class);
51  
52  	public static class ReferenceReader extends Reference {
53  		private static int count = 1;
54  		private final int id;
55  
56  		private StringBuilder sequenceBuilder = new StringBuilder();
57  
58  		public ReferenceReader() {
59  			id = count++;
60  		}
61  
62  		public void addRead(Read read) {
63  			// add read data
64  			if (_logger.isDebugEnabled())
65  				_logger.debug("add read: " + read);
66  
67  			try {
68  				stat.execute(String.format("insert into read values(%d, '%s', %d, %d, %d, %d, '%s', '%s')", this.id, read.name, read.viewstart, read.viewend,
69  						read.start, read.end, read.strand, read.sequence));
70  			}
71  			catch (SQLException e) {
72  				_logger.error(e);
73  			}
74  
75  		}
76  
77  		public void appendSequence(String seq) {
78  			sequenceBuilder.append(seq);
79  		}
80  
81  		@Override
82  		public String toString() {
83  			return String.format("reference: name=%s, start=%d, strand=%s\nsequence=%s", name, start, strand, sequenceBuilder.toString());
84  		}
85  
86  	}
87  
88  	private static Connection conn = null;
89  	private static Statement stat = null;
90  
91  	public ReadDBBuilder() {
92  
93  	}
94  
95  	public ReadDBBuilder(String dbFilePath) throws SQLException {
96  		try {
97  			Class.forName("org.sqlite.JDBC");
98  			conn = DriverManager.getConnection("jdbc:sqlite:" + dbFilePath);
99  
100 			conn.setAutoCommit(true);
101 			stat = conn.createStatement();
102 			stat.execute("pragma synchronous=off");
103 			conn.setAutoCommit(false);
104 
105 			// prepare table
106 			stat.execute("create table coordinate('group' text, species text, revision text)");
107 			stat.execute("create table reference(id integer primary key, name text, start integer, strand text, tag integer, score integer, sequence text)");
108 			stat
109 					.execute("create table read(reference_id integer, name text, view_start integer, view_end integer, start integer, end integer, strand text, sequence text)");
110 		}
111 		catch (ClassNotFoundException e) {
112 			throw new XerialError(XerialErrorCode.INVALID_STATE, "sqlite JDBC not found");
113 		}
114 
115 	}
116 
117 	public void build(Reader input) throws SQLException {
118 		try {
119 			SilkParserConfig config = new SilkParserConfig();
120 			config.bufferSize = 8 * 1024 * 1024;
121 			config.numWorkers = 2;
122 			SilkParser parser = new SilkParser(input, SilkEnv.newEnv(), config);
123 			Lens.load(this, parser);
124 		}
125 		catch (Exception e) {
126 			_logger.error(e);
127 		}
128 		finally {
129 			commit();
130 		}
131 
132 	}
133 
134 	public void build(URL silkFile) throws SQLException {
135 		// TODO remove duplicate code
136 		try {
137 			SilkParserConfig config = new SilkParserConfig();
138 			config.bufferSize = 8 * 1024 * 1024;
139 			config.numWorkers = 2;
140 			SilkParser parser = new SilkParser(silkFile, config);
141 			Lens.load(this, parser);
142 		}
143 		catch (Exception e) {
144 			_logger.error(e);
145 		}
146 		finally {
147 			commit();
148 		}
149 	}
150 
151 	public void setCoordinate(Coordinate coordinate) throws SQLException {
152 		if (_logger.isDebugEnabled())
153 			_logger.debug("set coordinate: " + coordinate);
154 
155 		stat.execute(String.format("insert into coordinate values('%s', '%s', '%s')", coordinate.group, coordinate.species, coordinate.revision));
156 
157 	}
158 
159 	public void addReference(ReferenceReader reference) throws SQLException {
160 		if (_logger.isDebugEnabled())
161 			_logger.debug("add reference: " + reference);
162 
163 		stat.execute(String.format("insert into reference values(%d, '%s', %d, '%s', %d, %d, '%s')", reference.id, reference.name, reference.start,
164 				reference.strand, reference.tag, reference.score, reference.sequenceBuilder.toString()));
165 
166 	}
167 
168 	public void commit() throws SQLException {
169 		conn.commit();
170 		conn.close();
171 		_logger.info("commit done.");
172 
173 		stat = null;
174 		conn = null;
175 	}
176 
177 }