1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.utgenome.gwt.utgb.client.bio;
24
25 import org.utgenome.format.fasta.CompactACGTWriter;
26
27
28
29
30
31
32
33 public class ACGTEncoder {
34
35
36
37
38
39 private final static byte[] charToACGTCodeTable = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
40 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
41 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
42 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
43 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
44 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 };
45
46 private static char[] acgt = { 'A', 'C', 'G', 'T' };
47
48 public static byte to2bitCode(char base) {
49 return charToACGTCodeTable[base];
50 }
51
52 public static char toBase(int code) {
53 if (code < 0 || code > 3)
54 return 'N';
55 else
56 return acgt[code & 0x03];
57 }
58
59
60
61
62
63
64
65
66 public static int toKmerInt(final int K, String acgt) {
67 int kmer = 0;
68
69 final int max = Math.min(K, acgt.length());
70 for (int i = 0; i < max; i++) {
71 byte b = to2bitCode(acgt.charAt(i));
72 if (b >= 4)
73 return -1;
74
75 kmer <<= 2;
76 kmer |= b;
77 }
78
79 return kmer;
80 }
81
82 public static String toString(int kmerInt, int K) {
83 StringBuilder seq = new StringBuilder();
84 for (int i = 0; i < K; i++) {
85 int index = (kmerInt >>> (2 * (K - i - 1))) & 0x03;
86 seq.append(acgt[index]);
87 }
88 return seq.toString();
89 }
90
91 }