View Javadoc

1   /*--------------------------------------------------------------------------
2    *  Copyright 2010 utgenome.org
3    *
4    *  Licensed under the Apache License, Version 2.0 (the "License");
5    *  you may not use this file except in compliance with the License.
6    *  You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   *  Unless required by applicable law or agreed to in writing, software
11   *  distributed under the License is distributed on an "AS IS" BASIS,
12   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   *  See the License for the specific language governing permissions and
14   *  limitations under the License.
15   *--------------------------------------------------------------------------*/
16  //--------------------------------------
17  // utgb-core Project
18  //
19  // KmerInterger.java
20  // Since: 2010/10/05
21  //
22  //--------------------------------------
23  package org.utgenome.util.kmer;
24  
25  import org.utgenome.UTGBErrorCode;
26  import org.utgenome.UTGBException;
27  import org.utgenome.format.fasta.CompactACGTWriter;
28  
29  /**
30   * Generator of the bit-integer representation of a k-mer sequence
31   * 
32   * @author leo
33   * 
34   */
35  public class KmerIntegerFactory {
36  
37  	private final int K;
38  
39  	public KmerIntegerFactory(int K) {
40  		if (K >= 16)
41  			throw new IllegalArgumentException("K must be less than 16: K=" + K);
42  
43  		this.K = K;
44  	}
45  
46  	/**
47  	 * Return the XOR value of the double strand reads of a given k-mer sequence. Since XOR is commutative, the result
48  	 * value is the same for both strand reads.
49  	 * 
50  	 * @param K
51  	 * @return
52  	 */
53  	public int doubleStrandXOR(int kmer) {
54  		int rc = reverseComplement(kmer);
55  		return kmer ^ rc;
56  	}
57  
58  	public int reverseComplement(int kmerInt) {
59  		int complement = ~kmerInt;
60  
61  		int reverseComplement = 0;
62  		for (int i = 0; i < K; i++) {
63  			int next = (complement >>> 2 * i) & 0x03;
64  			reverseComplement <<= 2;
65  			reverseComplement |= next;
66  		}
67  
68  		return reverseComplement;
69  	}
70  
71  	private char[] ACGT = { 'A', 'C', 'G', 'T' };
72  
73  	/**
74  	 * @param acgt
75  	 * @return
76  	 * @throws UTGBException
77  	 *             when the input contains a non-ACGT character;
78  	 */
79  	public int parseString(String acgt) throws UTGBException {
80  
81  		int kmer = 0;
82  
83  		for (int i = 0; i < acgt.length(); i++) {
84  			byte b = CompactACGTWriter.to2bitCode(acgt.charAt(i));
85  			if (b >= 4)
86  				throw new UTGBException(UTGBErrorCode.NOT_AN_ACGT);
87  
88  			kmer <<= 2;
89  			kmer |= b;
90  		}
91  
92  		return kmer;
93  	}
94  
95  	public String toString(int kmerInt) {
96  		StringBuilder seq = new StringBuilder();
97  		for (int i = 0; i < K; i++) {
98  			int index = (kmerInt >>> (2 * (K - i - 1))) & 0x03;
99  			seq.append(ACGT[index]);
100 		}
101 		return seq.toString();
102 	}
103 
104 }