1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.utgenome.format.sam;
24
25 import java.util.Collection;
26 import java.util.Iterator;
27
28 import org.utgenome.gwt.utgb.client.bio.OnGenome;
29 import org.utgenome.gwt.utgb.client.canvas.IntervalTree;
30 import org.xerial.util.log.Logger;
31
32
33
34
35
36
37
38 public class ReadSweeper<T extends OnGenome> {
39
40 private static Logger _logger = Logger.getLogger(ReadSweeper.class);
41
42 private IntervalTree<T> readSet = new IntervalTree<T>();
43 private int sweepLine = 1;
44 private long readCount = 0;
45
46 public interface ReadSetHandler<T extends OnGenome> {
47 public void handle(int sweepLine, Collection<T> readSet);
48 }
49
50 public void sweep(Iterator<T> cursor, ReadSetHandler<T> handler) {
51
52 readSet.clear();
53 sweepLine = 1;
54 readCount = 0;
55
56
57 for (; cursor.hasNext();) {
58 readCount++;
59
60 if ((readCount % 1000000) == 0) {
61 _logger.info(String.format("processed %,d reads", readCount));
62 }
63
64 T read = cursor.next();
65 int readStart = read.getStart();
66 if (sweepLine < readStart) {
67
68 sweepUpto(readStart, handler);
69 }
70 readSet.add(read);
71 }
72
73 if (!readSet.isEmpty()) {
74 sweepUpto(maxReadEnd(readSet), handler);
75 }
76 }
77
78 private int maxReadEnd(Iterable<T> readSet) {
79 int maxEnd = -1;
80 for (OnGenome each : readSet) {
81 if (maxEnd < each.getEnd())
82 maxEnd = each.getEnd();
83 }
84 return maxEnd;
85 }
86
87 private void sweepUpto(int sweepEnd, ReadSetHandler<T> handler) {
88 for (; sweepLine < sweepEnd; sweepLine++) {
89 handler.handle(sweepLine, readSet);
90 readSet.removeBefore(sweepLine);
91 }
92 }
93 }