1 /*--------------------------------------------------------------------------
2 * Copyright 2010 utgenome.org
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *--------------------------------------------------------------------------*/
16 //--------------------------------------
17 // utgb-core Project
18 //
19 // KmerInterger.java
20 // Since: 2010/10/05
21 //
22 //--------------------------------------
23 package org.utgenome.util.kmer;
24
25 import org.utgenome.UTGBErrorCode;
26 import org.utgenome.UTGBException;
27 import org.utgenome.format.fasta.CompactACGTWriter;
28
29 /**
30 * Generator of the bit-integer representation of a k-mer sequence
31 *
32 * @author leo
33 *
34 */
35 public class KmerIntegerFactory {
36
37 private final int K;
38
39 public KmerIntegerFactory(int K) {
40 if (K >= 16)
41 throw new IllegalArgumentException("K must be less than 16: K=" + K);
42
43 this.K = K;
44 }
45
46 /**
47 * Return the XOR value of the double strand reads of a given k-mer sequence. Since XOR is commutative, the result
48 * value is the same for both strand reads.
49 *
50 * @param K
51 * @return
52 */
53 public int doubleStrandXOR(int kmer) {
54 int rc = reverseComplement(kmer);
55 return kmer ^ rc;
56 }
57
58 public int reverseComplement(int kmerInt) {
59 int complement = ~kmerInt;
60
61 int reverseComplement = 0;
62 for (int i = 0; i < K; i++) {
63 int next = (complement >>> 2 * i) & 0x03;
64 reverseComplement <<= 2;
65 reverseComplement |= next;
66 }
67
68 return reverseComplement;
69 }
70
71 private char[] ACGT = { 'A', 'C', 'G', 'T' };
72
73 /**
74 * @param acgt
75 * @return
76 * @throws UTGBException
77 * when the input contains a non-ACGT character;
78 */
79 public int parseString(String acgt) throws UTGBException {
80
81 int kmer = 0;
82
83 for (int i = 0; i < acgt.length(); i++) {
84 byte b = CompactACGTWriter.to2bitCode(acgt.charAt(i));
85 if (b >= 4)
86 throw new UTGBException(UTGBErrorCode.NOT_AN_ACGT);
87
88 kmer <<= 2;
89 kmer |= b;
90 }
91
92 return kmer;
93 }
94
95 public String toString(int kmerInt) {
96 StringBuilder seq = new StringBuilder();
97 for (int i = 0; i < K; i++) {
98 int index = (kmerInt >>> (2 * (K - i - 1))) & 0x03;
99 seq.append(ACGT[index]);
100 }
101 return seq.toString();
102 }
103
104 }