View Javadoc

1   /*--------------------------------------------------------------------------
2    *  Copyright 2010 utgenome.org
3    *
4    *  Licensed under the Apache License, Version 2.0 (the "License");
5    *  you may not use this file except in compliance with the License.
6    *  You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   *  Unless required by applicable law or agreed to in writing, software
11   *  distributed under the License is distributed on an "AS IS" BASIS,
12   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   *  See the License for the specific language governing permissions and
14   *  limitations under the License.
15   *--------------------------------------------------------------------------*/
16  //--------------------------------------
17  // utgb-core Project
18  //
19  // SAMReadSweepIterator.java
20  // Since: 2010/10/12
21  //
22  //--------------------------------------
23  package org.utgenome.format.sam;
24  
25  import java.util.Collection;
26  import java.util.Iterator;
27  
28  import org.utgenome.gwt.utgb.client.bio.OnGenome;
29  import org.utgenome.gwt.utgb.client.canvas.IntervalTree;
30  import org.xerial.util.log.Logger;
31  
32  /**
33   * Sweeping SAM reads in their start order
34   * 
35   * @author leo
36   * 
37   */
38  public class ReadSweeper<T extends OnGenome> {
39  
40  	private static Logger _logger = Logger.getLogger(ReadSweeper.class);
41  
42  	private IntervalTree<T> readSet = new IntervalTree<T>();
43  	private int sweepLine = 1;
44  	private long readCount = 0;
45  
46  	public interface ReadSetHandler<T extends OnGenome> {
47  		public void handle(int sweepLine, Collection<T> readSet);
48  	}
49  
50  	public void sweep(Iterator<T> cursor, ReadSetHandler<T> handler) {
51  
52  		readSet.clear();
53  		sweepLine = 1;
54  		readCount = 0;
55  
56  		// assume that read data are sorted in the start order
57  		for (; cursor.hasNext();) {
58  			readCount++;
59  
60  			if ((readCount % 1000000) == 0) {
61  				_logger.info(String.format("processed %,d reads", readCount));
62  			}
63  
64  			T read = cursor.next();
65  			int readStart = read.getStart();
66  			if (sweepLine < readStart) {
67  				// we can sweep reads up to sweepEnd
68  				sweepUpto(readStart, handler);
69  			}
70  			readSet.add(read);
71  		}
72  
73  		if (!readSet.isEmpty()) {
74  			sweepUpto(maxReadEnd(readSet), handler);
75  		}
76  	}
77  
78  	private int maxReadEnd(Iterable<T> readSet) {
79  		int maxEnd = -1;
80  		for (OnGenome each : readSet) {
81  			if (maxEnd < each.getEnd())
82  				maxEnd = each.getEnd();
83  		}
84  		return maxEnd;
85  	}
86  
87  	private void sweepUpto(int sweepEnd, ReadSetHandler<T> handler) {
88  		for (; sweepLine < sweepEnd; sweepLine++) {
89  			handler.handle(sweepLine, readSet);
90  			readSet.removeBefore(sweepLine);
91  		}
92  	}
93  }