View Javadoc

1   /*--------------------------------------------------------------------------
2    *  Copyright 2008 utgenome.org
3    *
4    *  Licensed under the Apache License, Version 2.0 (the "License");
5    *  you may not use this file except in compliance with the License.
6    *  You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   *  Unless required by applicable law or agreed to in writing, software
11   *  distributed under the License is distributed on an "AS IS" BASIS,
12   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   *  See the License for the specific language governing permissions and
14   *  limitations under the License.
15   *--------------------------------------------------------------------------*/
16  //--------------------------------------
17  // utgb-shell Project
18  //
19  // Import.java
20  // Since: Jan 20, 2009
21  //
22  // $URL$ 
23  // $Author$
24  //--------------------------------------
25  package org.utgenome.shell;
26  
27  import java.io.BufferedReader;
28  import java.io.File;
29  import java.io.FileReader;
30  import java.io.InputStreamReader;
31  import java.io.Reader;
32  import java.util.Iterator;
33  
34  import net.sf.samtools.SAMFileHeader;
35  import net.sf.samtools.SAMFileHeader.SortOrder;
36  import net.sf.samtools.SAMFileReader;
37  import net.sf.samtools.SAMFileReader.ValidationStringency;
38  import net.sf.samtools.SAMFileWriter;
39  import net.sf.samtools.SAMFileWriterFactory;
40  import net.sf.samtools.SAMRecord;
41  
42  import org.apache.tools.ant.util.ReaderInputStream;
43  import org.utgenome.format.bed.BEDDatabase;
44  import org.utgenome.format.fasta.FASTADatabase;
45  import org.utgenome.format.silk.read.ReadDBBuilder;
46  import org.utgenome.format.wig.WIGDatabaseGenerator;
47  import org.xerial.util.log.Logger;
48  import org.xerial.util.opt.Argument;
49  import org.xerial.util.opt.Option;
50  
51  /**
52   * import command
53   * 
54   * @author leo
55   * 
56   */
57  // @Usage(command = "> utgb import", description = "import command")
58  public class Import extends UTGBShellCommand {
59  
60  	private static Logger _logger = Logger.getLogger(Import.class);
61  
62  	public static enum FileType {
63  		AUTO, READ, BED, SAM, FASTA, WIG, KTAB, UNKNOWN, BAM
64  	}
65  
66  	@Option(symbol = "t", longName = "type", description = "specify the input file type: (AUTO, FASTA, READ, BED, WIG)")
67  	private FileType fileType = FileType.AUTO;
68  
69  	@Argument(index = 0, required = false)
70  	private String inputFilePath = null;
71  
72  	@Option(symbol = "d", description = "output directory. default = db")
73  	private String outDir = "db";
74  
75  	@Option(symbol = "o", longName = "output", varName = "DB FILE NAME", description = "output SQLite DB file name")
76  	private String outputFileName;
77  
78  	@Option(symbol = "n", description = "do not overwrite existing DB files (default = false)")
79  	private boolean doNotOverwriteDB = false;
80  
81  	@Override
82  	public void execute(String[] args) throws Exception {
83  
84  		File input = null;
85  
86  		Reader in = null;
87  		if (inputFilePath == null) {
88  			_logger.info("use STDIN for the input");
89  			in = new InputStreamReader(System.in);
90  		}
91  		else {
92  			_logger.info("input file: " + inputFilePath);
93  			input = new File(inputFilePath);
94  			if (!input.exists())
95  				throw new UTGBShellException("file not found: " + inputFilePath);
96  
97  			in = new BufferedReader(new FileReader(input));
98  		}
99  
100 		if (fileType == FileType.AUTO)
101 			fileType = detectFileType(inputFilePath);
102 		_logger.info("file type: " + fileType);
103 
104 		if (outputFileName == null) {
105 			// new File("db").mkdirs();
106 
107 			String inputName = inputFilePath == null ? "out" : inputFilePath;
108 
109 			if (fileType == FileType.SAM) {
110 				outputFileName = org.xerial.util.FileType.replaceFileExt(inputName, "bam");
111 			}
112 			else {
113 				outputFileName = String.format("%s.sqlite", inputName);
114 			}
115 			int count = 1;
116 			if (doNotOverwriteDB) {
117 				while (new File(outputFileName).exists()) {
118 					if (fileType == FileType.SAM) {
119 						outputFileName = org.xerial.util.FileType.replaceFileExt(inputName, String.format("%d.bam", count));
120 					}
121 					else {
122 						outputFileName = String.format("%s.%d.sqlite", inputName, count);
123 					}
124 					count++;
125 				}
126 			}
127 
128 		}
129 		_logger.info("output file: " + outputFileName);
130 
131 		switch (fileType) {
132 		case READ: {
133 			ReadDBBuilder builder = new ReadDBBuilder(outputFileName);
134 			builder.build(in);
135 			break;
136 		}
137 		case BED: {
138 			BEDDatabase.toSQLiteDB(in, outputFileName);
139 			break;
140 		}
141 		case FASTA:
142 			if (input != null)
143 				FASTADatabase.main(new String[] { inputFilePath, "-o", outputFileName });
144 			else
145 				FASTADatabase.main(new String[] { "-o", outputFileName });
146 			break;
147 		case WIG:
148 			WIGDatabaseGenerator.toSQLiteDB(in, outputFileName);
149 			break;
150 		case SAM: {
151 			_logger.info("creating a BAM file from the input SAM.");
152 			SAMFileReader reader = new SAMFileReader(new ReaderInputStream(in));
153 			reader.setValidationStringency(ValidationStringency.SILENT);
154 
155 			String bamOut = outputFileName;
156 			if (!bamOut.endsWith(".bam"))
157 				bamOut += ".bam";
158 			_logger.info("output BAM: " + bamOut);
159 
160 			SAMFileHeader header = reader.getFileHeader();
161 			int nRefs = header.getSequenceDictionary().size();
162 			SortOrder sortOrder = header.getSortOrder();
163 			boolean sorted = false;
164 			switch (sortOrder) {
165 			case coordinate:
166 				sorted = true;
167 				break;
168 			default:
169 				sorted = false;
170 				break;
171 			}
172 
173 			SAMFileWriterFactory fac = new SAMFileWriterFactory();
174 			// create .bai (BAM index) file
175 			fac.setCreateIndex(true);
176 			header.setSortOrder(SortOrder.coordinate);
177 			final SAMFileWriter writer = fac.makeBAMWriter(header, sorted, new File(bamOut));
178 			final Iterator<SAMRecord> iterator = reader.iterator();
179 			while (iterator.hasNext()) {
180 				writer.addAlignment(iterator.next());
181 			}
182 			reader.close();
183 			writer.close();
184 
185 			_logger.info("done.");
186 
187 		}
188 			break;
189 		case UNKNOWN:
190 		default: {
191 			_logger.warn("specify the input file type with -t option. Type utgb import --help to see the list of the supported file types");
192 			break;
193 		}
194 		}
195 
196 	}
197 
198 	public static FileType detectFileType(String fileName) {
199 		if (fileName == null)
200 			return FileType.UNKNOWN;
201 
202 		if (fileName.endsWith(".fa") || fileName.endsWith(".fasta"))
203 			return FileType.FASTA;
204 		else if (fileName.endsWith(".bed"))
205 			return FileType.BED;
206 		else if (fileName.endsWith(".wig"))
207 			return FileType.WIG;
208 		else if (fileName.endsWith(".sam"))
209 			return FileType.SAM;
210 		else if (fileName.endsWith(".bam"))
211 			return FileType.BAM;
212 		else if (fileName.endsWith(".ktab"))
213 			return FileType.KTAB;
214 
215 		return FileType.AUTO;
216 	}
217 
218 	@Override
219 	public String name() {
220 		return "import";
221 	}
222 
223 	@Override
224 	public String getOneLinerDescription() {
225 		return "import a file and create a new database";
226 	}
227 
228 }