View Javadoc

1   /*--------------------------------------------------------------------------
2    *  Copyright 2010 utgenome.org
3    *
4    *  Licensed under the Apache License, Version 2.0 (the "License");
5    *  you may not use this file except in compliance with the License.
6    *  You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   *  Unless required by applicable law or agreed to in writing, software
11   *  distributed under the License is distributed on an "AS IS" BASIS,
12   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   *  See the License for the specific language governing permissions and
14   *  limitations under the License.
15   *--------------------------------------------------------------------------*/
16  //--------------------------------------
17  // utgb-core Project
18  //
19  // Blast2Silk.java
20  // Since: 2010/09/02
21  //
22  //--------------------------------------
23  package org.utgenome.format.blast;
24  
25  import java.io.BufferedReader;
26  import java.io.IOException;
27  import java.io.Reader;
28  import java.io.Writer;
29  import java.util.ArrayList;
30  import java.util.List;
31  import java.util.regex.Matcher;
32  import java.util.regex.Pattern;
33  
34  import org.utgenome.format.FormatConversionReader;
35  import org.utgenome.gwt.utgb.client.bio.ReadList;
36  import org.xerial.silk.SilkWriter;
37  import org.xerial.util.ArrayDeque;
38  
39  /**
40   * Converting blast (default) format into Silk
41   * 
42   * @author leo
43   * 
44   */
45  public class Blast2Silk extends FormatConversionReader {
46  
47  	public Blast2Silk(Reader reader) throws IOException {
48  		super(reader, new Blast2SilkConverter());
49  	}
50  
51  	public static class BlastResult {
52  		public String header;
53  		public String reference;
54  		public String queryName;
55  		public long queryLetters;
56  		public String databaseName;
57  
58  		public List<BlastAlignment> alignment = new ArrayList<BlastAlignment>();
59  
60  		public static class BlastAlignment {
61  
62  			public String targetName;
63  			public long targetLength;
64  			public int bitScore;
65  			public float eValue;
66  			public int matchLength;
67  			public int alignmentLength;
68  
69  			public static enum Strand {
70  				Plus, Minus
71  			}
72  
73  			public Strand queryStrand = Strand.Plus;
74  			public Strand targetStrand = Strand.Plus;
75  
76  		}
77  
78  	}
79  
80  	public static class Blast2SilkConverter extends FormatConversionReader.PipeConsumer {
81  
82  		Pattern queryLine = Pattern.compile("^Query=\\s*(\\S+)");
83  		Pattern qlenLine = Pattern.compile("^\\s+\\((\\S+)\\s+letters\\)");
84  		Pattern refNameLine = Pattern.compile("^>(\\S+)");
85  		Pattern scoreLine = Pattern.compile("Score\\s+=\\s+(\\S+)\\s+bits.+Expect(\\(\\d+\\))?\\s+=\\s+(\\S+)");
86  		Pattern identitiesLine = Pattern.compile("Identities\\s+=\\s+(\\S)/(\\S)\\s+\\((\\S+%)\\)");
87  		Pattern strandLine = Pattern.compile("Strand\\s+=\\s+(\\S+)\\s+/\\s+(\\S+)");
88  		Pattern queryAlignmentLine = Pattern.compile("(^Query:\\s(\\d+)\\s*)(\\S+)\\s(\\d+)");
89  
90  		private PeekableReader reader;
91  
92  		private void parseBlastEntry() throws IOException {
93  
94  			ReadList block = new ReadList();
95  			int queryLength = -1;
96  			String rname = null;
97  
98  			for (String line; (line = reader.peekNextLine()) != null; reader.readLine()) {
99  				Matcher m;
100 				if ((m = queryLine.matcher(line)).find()) {
101 					String querySequenceName = m.group(1);
102 					block.setName(querySequenceName);
103 					reader.readLine();
104 				}
105 				else if ((m = qlenLine.matcher(line)).find()) {
106 					queryLength = Integer.parseInt(m.group(1).replaceAll(",", ""));
107 				}
108 				else if ((m = refNameLine.matcher(line)).find()) {
109 					rname = m.group(1);
110 				}
111 				else if ((m = scoreLine.matcher(line)).find()) {
112 					//fragment.bitScore = (int) (Float.parseFloat(m.group(1)) + 0.5f);
113 					//fragment.eValue = m.group(2);
114 				}
115 				else if ((m = identitiesLine.matcher(line)).find()) {
116 					//fragment.identity = Integer.parseInt(m.group(1));
117 					//fragment.matchLength = Integer.parseInt(m.group(1));
118 				}
119 				else if ((m = strandLine.matcher(line)).find()) {
120 					//fragment.setStrand("Minus".equals(m.group(1)) ? "-" : "+");
121 				}
122 				else if ((m = queryAlignmentLine.matcher(line)).find()) {
123 					int qStart = Integer.parseInt(m.group(2));
124 					String qSeq = m.group(3);
125 					int qEnd = Integer.parseInt(m.group(4));
126 
127 					int diffStringPos = m.group(1).length();
128 					line = reader.readLine();
129 					if (line == null)
130 						continue;
131 					else {
132 						// read diff string
133 						String diff = line.substring(diffStringPos);
134 						for (int i = 0; i < diff.length(); ++i) {
135 
136 						}
137 					}
138 
139 				}
140 			}
141 
142 		}
143 
144 		@Override
145 		public void consume(Reader in, Writer out) throws Exception {
146 
147 			reader = new PeekableReader(new BufferedReader(in));
148 			SilkWriter silk = new SilkWriter(out);
149 
150 			ReadList block = null;
151 			BlastResult fragment = null;
152 			int queryLength = -1;
153 			String rname;
154 
155 			for (String line; (line = reader.peekNextLine()) != null;) {
156 
157 				if (line.startsWith("BLAST")) {
158 					reader.readLine();
159 					parseBlastEntry();
160 				}
161 
162 			}
163 
164 		}
165 	}
166 
167 	public static class PeekableReader extends BufferedReader {
168 
169 		private ArrayDeque<String> lineBuffer = new ArrayDeque<String>();
170 		private boolean hasNoMoreLine = false;
171 
172 		public PeekableReader(BufferedReader out) {
173 			super(out);
174 		}
175 
176 		private void fillQueue() throws IOException {
177 			if (!lineBuffer.isEmpty())
178 				return;
179 
180 			String line = super.readLine();
181 			if (line == null)
182 				hasNoMoreLine = true;
183 			else
184 				lineBuffer.push(line);
185 		}
186 
187 		public String peekNextLine() throws IOException {
188 			if (!lineBuffer.isEmpty())
189 				return lineBuffer.peekFirst();
190 			else {
191 				if (hasNoMoreLine)
192 					return null;
193 				fillQueue();
194 				return peekNextLine();
195 			}
196 		}
197 
198 		@Override
199 		public String readLine() throws IOException {
200 			if (!lineBuffer.isEmpty())
201 				return lineBuffer.pollFirst();
202 			else {
203 				if (hasNoMoreLine)
204 					return null;
205 				fillQueue();
206 				return readLine();
207 			}
208 		};
209 	}
210 
211 }