View Javadoc

1   /*--------------------------------------------------------------------------
2    *  Copyright 2009 utgenome.org
3    *
4    *  Licensed under the Apache License, Version 2.0 (the "License");
5    *  you may not use this file except in compliance with the License.
6    *  You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   *  Unless required by applicable law or agreed to in writing, software
11   *  distributed under the License is distributed on an "AS IS" BASIS,
12   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   *  See the License for the specific language governing permissions and
14   *  limitations under the License.
15   *--------------------------------------------------------------------------*/
16  //--------------------------------------
17  // utgb-core Project
18  //
19  // CIGARString.java
20  // Since: Mar 15, 2010
21  //
22  // $URL$ 
23  // $Author$
24  //--------------------------------------
25  package org.utgenome.gwt.utgb.client.bio;
26  
27  import java.io.Serializable;
28  import java.util.ArrayList;
29  import java.util.Iterator;
30  import java.util.List;
31  
32  import org.utgenome.gwt.utgb.client.UTGBClientErrorCode;
33  import org.utgenome.gwt.utgb.client.UTGBClientException;
34  
35  /**
36   * CIGAR string (in SAM format) management utility
37   * 
38   * @author leo
39   * 
40   */
41  public class CIGAR implements Serializable, Iterable<CIGAR.Element> {
42  
43  	/**
44  	 * 
45  	 */
46  	private static final long serialVersionUID = 1L;
47  
48  	public static enum Type implements Serializable {
49  		Matches("M"), Insertions("I"), Deletions("D"), SkippedRegion("N"), SoftClip("S"), HardClip("H"), Padding("P");
50  		public final String shortName;
51  
52  		private Type(String shortName) {
53  			this.shortName = shortName;
54  		}
55  
56  		public static Type convert(char c) throws UTGBClientException {
57  			switch (c) {
58  			case 'M':
59  				return Type.Matches;
60  			case 'I':
61  				return Type.Insertions;
62  			case 'D':
63  				return Type.Deletions;
64  			case 'N':
65  				return Type.SkippedRegion;
66  			case 'S':
67  				return Type.SoftClip;
68  			case 'H':
69  				return Type.HardClip;
70  			case 'P':
71  				return Type.Padding;
72  			default:
73  				throw new UTGBClientException(UTGBClientErrorCode.PARSE_ERROR, "unknown CIGAR type: " + c);
74  			}
75  		}
76  
77  		@Override
78  		public String toString() {
79  			return shortName;
80  		}
81  
82  	}
83  
84  	public static class Element implements Serializable {
85  		/**
86  		 * 
87  		 */
88  		private static final long serialVersionUID = 1L;
89  		public final Type type;
90  		public final int length;
91  
92  		public Element(Type type, int length) {
93  			this.type = type;
94  			this.length = length;
95  		}
96  
97  		@Override
98  		public String toString() {
99  			return length + ":" + type;
100 		}
101 
102 	}
103 
104 	private final ArrayList<Element> cigar;
105 
106 	/**
107 	 * Creates an empty CIGAR
108 	 */
109 	public CIGAR() {
110 		cigar = new ArrayList<Element>();
111 	}
112 
113 	public CIGAR(String cigarString) throws UTGBClientException {
114 		this.cigar = parse(cigarString);
115 	}
116 
117 	private CIGAR(ArrayList<Element> cigar) {
118 		this.cigar = cigar;
119 	}
120 
121 	public void add(int length, Type type) {
122 		cigar.add(new Element(type, length));
123 	}
124 
125 	/**
126 	 * Return the number of CIGAR elements
127 	 * 
128 	 * @return
129 	 */
130 	public int size() {
131 		return cigar.size();
132 	}
133 
134 	public Element get(int index) {
135 		return cigar.get(index);
136 	}
137 
138 	public List<Element> getElements() {
139 		return cigar;
140 	}
141 
142 	public String toCIGARString() {
143 		StringBuilder buf = new StringBuilder();
144 		for (Element each : cigar) {
145 			buf.append(each.length + each.type.shortName);
146 		}
147 		return buf.toString();
148 	}
149 
150 	@Override
151 	public String toString() {
152 		return toCIGARString();
153 	}
154 
155 	private static CIGAR parseCIGAR(String cigar) throws UTGBClientException {
156 		return new CIGAR(parse(cigar));
157 	}
158 
159 	private static ArrayList<Element> parse(String cigarString) throws UTGBClientException {
160 
161 		ArrayList<Element> result = new ArrayList<Element>();
162 		int startIndexOfNumber = 0;
163 		for (int cursor = 0; cursor < cigarString.length(); cursor++) {
164 			char c = cigarString.charAt(cursor);
165 			if (c >= '0' && c <= '9')
166 				continue;
167 			else {
168 				if (startIndexOfNumber == cursor)
169 					break; // not a CIGAR string, ignoring the error
170 				int len = Integer.parseInt(cigarString.substring(startIndexOfNumber, cursor));
171 				Type t = Type.convert(cigarString.charAt(cursor));
172 				result.add(new Element(t, len));
173 
174 				startIndexOfNumber = cursor + 1;
175 			}
176 		}
177 
178 		return result;
179 	}
180 
181 	public Iterator<Element> iterator() {
182 		return cigar.iterator();
183 	}
184 
185 }