View Javadoc

1   /*--------------------------------------------------------------------------
2    *  Copyright 2010 utgenome.org
3    *
4    *  Licensed under the Apache License, Version 2.0 (the "License");
5    *  you may not use this file except in compliance with the License.
6    *  You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   *  Unless required by applicable law or agreed to in writing, software
11   *  distributed under the License is distributed on an "AS IS" BASIS,
12   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   *  See the License for the specific language governing permissions and
14   *  limitations under the License.
15   *--------------------------------------------------------------------------*/
16  //--------------------------------------
17  // utgb-core Project
18  //
19  // Sam2WigConverter.java
20  // Since: 2010/09/28
21  //
22  //--------------------------------------
23  package org.utgenome.format.sam;
24  
25  import java.io.File;
26  import java.io.IOException;
27  import java.io.Writer;
28  import java.util.Iterator;
29  
30  import net.sf.samtools.SAMFileHeader;
31  import net.sf.samtools.SAMFileReader;
32  import net.sf.samtools.SAMFileReader.ValidationStringency;
33  import net.sf.samtools.SAMRecord;
34  import net.sf.samtools.SAMRecordIterator;
35  import net.sf.samtools.SAMSequenceRecord;
36  
37  import org.utgenome.UTGBException;
38  import org.utgenome.gwt.utgb.client.bio.Interval;
39  import org.utgenome.gwt.utgb.client.bio.OnGenome;
40  import org.utgenome.gwt.utgb.client.bio.OnGenomeDataVisitor;
41  import org.utgenome.util.ReadDepth;
42  import org.utgenome.util.ReadDepth.DepthOutput;
43  import org.xerial.util.ArrayDeque;
44  import org.xerial.util.Deque;
45  import org.xerial.util.log.Logger;
46  
47  /**
48   * Converting SAM into WIG (coverage depth)
49   * 
50   * @author leo
51   * 
52   */
53  public class Sam2WigConverter {
54  
55  	private static Logger _logger = Logger.getLogger(Sam2WigConverter.class);
56  
57  	private Deque<Interval> readSetInStartOrder = new ArrayDeque<Interval>();
58  	private String currentChr = null;
59  	private int sweepLine = 1;
60  
61  	/**
62  	 * Convert the input SAM file records into WIG format of read depth
63  	 * 
64  	 * @param samOrBam
65  	 * @param out
66  	 * @throws IOException
67  	 * @throws UTGBException
68  	 */
69  	public void convert(File samOrBam, Writer out) throws UTGBException {
70  		try {
71  			this.reporter = new CoverageWriter(out);
72  			SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
73  			SAMFileReader samReader = new SAMFileReader(samOrBam);
74  
75  			try {
76  				// for each chromosome
77  				SAMFileHeader fileHeader = samReader.getFileHeader();
78  				for (SAMSequenceRecord each : fileHeader.getSequenceDictionary().getSequences()) {
79  					String chr = each.getSequenceName();
80  					_logger.info("processing " + chr);
81  					convert(samOrBam, chr, 1, each.getSequenceLength());
82  				}
83  			}
84  			finally {
85  				samReader.close();
86  				this.reporter.flush();
87  			}
88  		}
89  		catch (IOException e) {
90  			throw UTGBException.convert(e);
91  		}
92  		_logger.info("done.");
93  	}
94  
95  	/**
96  	 * Iterator of SAMRecord
97  	 * 
98  	 * @author leo
99  	 * 
100 	 */
101 	public static class SAMRecordCursor implements Iterator<OnGenome> {
102 
103 		private static class SAMRecordWrap extends Interval {
104 
105 			private static final long serialVersionUID = 1L;
106 			final SAMRecord read;
107 
108 			public SAMRecordWrap(SAMRecord read) {
109 				super(read.getAlignmentStart(), read.getAlignmentEnd() + 1);
110 				this.read = read;
111 			}
112 
113 			@Override
114 			public String getName() {
115 				return read.getReadName();
116 			}
117 
118 			@Override
119 			public void accept(OnGenomeDataVisitor visitor) {
120 				visitor.visitInterval(this);
121 			}
122 
123 		}
124 
125 		private final Deque<SAMRecord> queue = new ArrayDeque<SAMRecord>();
126 		private final SAMRecordIterator cursor;
127 
128 		public SAMRecordCursor(SAMRecordIterator cursor) {
129 			this.cursor = cursor;
130 		}
131 
132 		public boolean hasNext() {
133 			if (!queue.isEmpty())
134 				return true;
135 
136 			for (; cursor.hasNext();) {
137 				SAMRecord next = cursor.next();
138 				if (next.getReadUnmappedFlag()) {
139 					continue;
140 				}
141 				queue.add(next);
142 				return true;
143 			}
144 
145 			return false;
146 		}
147 
148 		public OnGenome next() {
149 			if (hasNext())
150 				return new SAMRecordWrap(queue.pollFirst());
151 
152 			return null;
153 		}
154 
155 		public void remove() {
156 			throw new UnsupportedOperationException("remove");
157 
158 		}
159 
160 		public void close() {
161 			cursor.close();
162 		}
163 
164 	}
165 
166 	public void convert(File samOrBam, String chr, int start, int end) throws UTGBException {
167 
168 		SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
169 		SAMFileReader samReader = new SAMFileReader(samOrBam, SAMReader.getBamIndexFile(samOrBam));
170 		SAMRecordCursor cursor = null;
171 		try {
172 			cursor = new SAMRecordCursor(samReader.queryOverlapping(chr, start, end));
173 			reporter.switchChr();
174 			ReadDepth.compute(chr, cursor, reporter);
175 		}
176 		catch (Exception e) {
177 			throw UTGBException.convert(e);
178 		}
179 		finally {
180 			if (cursor != null)
181 				cursor.close();
182 			samReader.close();
183 		}
184 	}
185 
186 	private CoverageWriter reporter;
187 
188 	public static class CoverageWriter implements DepthOutput {
189 		private enum State {
190 			LEADING_ZEROs, AFTER_HEADER
191 		}
192 
193 		private State state = State.LEADING_ZEROs;
194 		private final Writer out;
195 
196 		public CoverageWriter(Writer out) throws IOException {
197 			this.out = out;
198 
199 			out.write(String.format("track type=wiggle_0\n"));
200 		}
201 
202 		public void switchChr() {
203 			state = State.LEADING_ZEROs;
204 		}
205 
206 		public void outputHeader(String chr, int start) throws IOException {
207 			out.write(String.format("fixedStep chrom=%s start=%d step=1 span=1\n", chr, start));
208 			state = State.AFTER_HEADER;
209 		}
210 
211 		public void flush() throws IOException {
212 			this.out.flush();
213 		}
214 
215 		public void reportDepth(String chr, int start, int end, int depth) throws IOException {
216 
217 			// skip leading zeros
218 			if (state == State.LEADING_ZEROs) {
219 				if (depth == 0)
220 					return;
221 				else
222 					outputHeader(chr, start);
223 			}
224 
225 			// output data entry
226 			for (int i = start; i < end; ++i) {
227 				out.write(Integer.toString(depth));
228 				out.write("\n");
229 			}
230 		}
231 
232 	}
233 
234 }