1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.utgenome.shell;
26
27 import java.io.BufferedReader;
28 import java.io.File;
29 import java.io.FileReader;
30 import java.io.InputStreamReader;
31 import java.io.Reader;
32 import java.util.Iterator;
33
34 import net.sf.samtools.SAMFileHeader;
35 import net.sf.samtools.SAMFileHeader.SortOrder;
36 import net.sf.samtools.SAMFileReader;
37 import net.sf.samtools.SAMFileReader.ValidationStringency;
38 import net.sf.samtools.SAMFileWriter;
39 import net.sf.samtools.SAMFileWriterFactory;
40 import net.sf.samtools.SAMRecord;
41
42 import org.apache.tools.ant.util.ReaderInputStream;
43 import org.utgenome.format.bed.BEDDatabase;
44 import org.utgenome.format.fasta.FASTADatabase;
45 import org.utgenome.format.silk.read.ReadDBBuilder;
46 import org.utgenome.format.wig.WIGDatabaseGenerator;
47 import org.xerial.util.log.Logger;
48 import org.xerial.util.opt.Argument;
49 import org.xerial.util.opt.Option;
50
51
52
53
54
55
56
57
58 public class Import extends UTGBShellCommand {
59
60 private static Logger _logger = Logger.getLogger(Import.class);
61
62 public static enum FileType {
63 AUTO, READ, BED, SAM, FASTA, WIG, KTAB, UNKNOWN, BAM
64 }
65
66 @Option(symbol = "t", longName = "type", description = "specify the input file type: (AUTO, FASTA, READ, BED, WIG)")
67 private FileType fileType = FileType.AUTO;
68
69 @Argument(index = 0, required = false)
70 private String inputFilePath = null;
71
72 @Option(symbol = "d", description = "output directory. default = db")
73 private String outDir = "db";
74
75 @Option(symbol = "o", longName = "output", varName = "DB FILE NAME", description = "output SQLite DB file name")
76 private String outputFileName;
77
78 @Option(symbol = "n", description = "do not overwrite existing DB files (default = false)")
79 private boolean doNotOverwriteDB = false;
80
81 @Override
82 public void execute(String[] args) throws Exception {
83
84 File input = null;
85
86 Reader in = null;
87 if (inputFilePath == null) {
88 _logger.info("use STDIN for the input");
89 in = new InputStreamReader(System.in);
90 }
91 else {
92 _logger.info("input file: " + inputFilePath);
93 input = new File(inputFilePath);
94 if (!input.exists())
95 throw new UTGBShellException("file not found: " + inputFilePath);
96
97 in = new BufferedReader(new FileReader(input));
98 }
99
100 if (fileType == FileType.AUTO)
101 fileType = detectFileType(inputFilePath);
102 _logger.info("file type: " + fileType);
103
104 if (outputFileName == null) {
105
106
107 String inputName = inputFilePath == null ? "out" : inputFilePath;
108
109 if (fileType == FileType.SAM) {
110 outputFileName = org.xerial.util.FileType.replaceFileExt(inputName, "bam");
111 }
112 else {
113 outputFileName = String.format("%s.sqlite", inputName);
114 }
115 int count = 1;
116 if (doNotOverwriteDB) {
117 while (new File(outputFileName).exists()) {
118 if (fileType == FileType.SAM) {
119 outputFileName = org.xerial.util.FileType.replaceFileExt(inputName, String.format("%d.bam", count));
120 }
121 else {
122 outputFileName = String.format("%s.%d.sqlite", inputName, count);
123 }
124 count++;
125 }
126 }
127
128 }
129 _logger.info("output file: " + outputFileName);
130
131 switch (fileType) {
132 case READ: {
133 ReadDBBuilder builder = new ReadDBBuilder(outputFileName);
134 builder.build(in);
135 break;
136 }
137 case BED: {
138 BEDDatabase.toSQLiteDB(in, outputFileName);
139 break;
140 }
141 case FASTA:
142 if (input != null)
143 FASTADatabase.main(new String[] { inputFilePath, "-o", outputFileName });
144 else
145 FASTADatabase.main(new String[] { "-o", outputFileName });
146 break;
147 case WIG:
148 WIGDatabaseGenerator.toSQLiteDB(in, outputFileName);
149 break;
150 case SAM: {
151 _logger.info("creating a BAM file from the input SAM.");
152 SAMFileReader reader = new SAMFileReader(new ReaderInputStream(in));
153 reader.setValidationStringency(ValidationStringency.SILENT);
154
155 String bamOut = outputFileName;
156 if (!bamOut.endsWith(".bam"))
157 bamOut += ".bam";
158 _logger.info("output BAM: " + bamOut);
159
160 SAMFileHeader header = reader.getFileHeader();
161 int nRefs = header.getSequenceDictionary().size();
162 SortOrder sortOrder = header.getSortOrder();
163 boolean sorted = false;
164 switch (sortOrder) {
165 case coordinate:
166 sorted = true;
167 break;
168 default:
169 sorted = false;
170 break;
171 }
172
173 SAMFileWriterFactory fac = new SAMFileWriterFactory();
174
175 fac.setCreateIndex(true);
176 header.setSortOrder(SortOrder.coordinate);
177 final SAMFileWriter writer = fac.makeBAMWriter(header, sorted, new File(bamOut));
178 final Iterator<SAMRecord> iterator = reader.iterator();
179 while (iterator.hasNext()) {
180 writer.addAlignment(iterator.next());
181 }
182 reader.close();
183 writer.close();
184
185 _logger.info("done.");
186
187 }
188 break;
189 case UNKNOWN:
190 default: {
191 _logger.warn("specify the input file type with -t option. Type utgb import --help to see the list of the supported file types");
192 break;
193 }
194 }
195
196 }
197
198 public static FileType detectFileType(String fileName) {
199 if (fileName == null)
200 return FileType.UNKNOWN;
201
202 if (fileName.endsWith(".fa") || fileName.endsWith(".fasta"))
203 return FileType.FASTA;
204 else if (fileName.endsWith(".bed"))
205 return FileType.BED;
206 else if (fileName.endsWith(".wig"))
207 return FileType.WIG;
208 else if (fileName.endsWith(".sam"))
209 return FileType.SAM;
210 else if (fileName.endsWith(".bam"))
211 return FileType.BAM;
212 else if (fileName.endsWith(".ktab"))
213 return FileType.KTAB;
214
215 return FileType.AUTO;
216 }
217
218 @Override
219 public String name() {
220 return "import";
221 }
222
223 @Override
224 public String getOneLinerDescription() {
225 return "import a file and create a new database";
226 }
227
228 }