Skip to content

Commit 228639e

Browse files
create VcfFileReader based on RecordReader
1 parent d155b8e commit 228639e

File tree

2 files changed

+189
-0
lines changed

2 files changed

+189
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/**
2+
* © Copyright The University of Queensland 2010-2014.
3+
* © Copyright QIMR Berghofer Medical Research Institute 2014-2016.
4+
*
5+
* This code is released under the terms outlined in the included LICENSE file.
6+
*/
7+
package org.qcmg.qio.vcf;
8+
9+
import java.io.File;
10+
import java.io.IOException;
11+
12+
import org.qcmg.common.util.Constants;
13+
import org.qcmg.common.util.TabTokenizer;
14+
import org.qcmg.common.vcf.VcfRecord;
15+
import org.qcmg.common.vcf.header.VcfHeader;
16+
import org.qcmg.qio.record.RecordReader;
17+
18+
/**
19+
* Contains an `InputStream` so remember to call close() or use in try-with-resources
20+
*/
21+
public final class VcfFileReader extends RecordReader<VcfRecord> {
22+
private static final String HEADER_PREFIX = Constants.HASH_STRING;
23+
24+
private VcfHeader header;
25+
26+
public VcfFileReader(final String file) throws IOException {
27+
this(new File(file));
28+
}
29+
30+
public VcfFileReader(final File file) throws IOException {
31+
super(file, HEADER_PREFIX);
32+
header = new VcfHeader(getHeader());
33+
}
34+
35+
public VcfHeader getVcfHeader() {
36+
return header;
37+
}
38+
39+
@Override
40+
public VcfRecord getRecord(String line) {
41+
final String[] params = TabTokenizer.tokenize(line);
42+
final int arrayLength = params.length;
43+
if (8 > arrayLength) {
44+
throw new IllegalArgumentException("Bad VCF format. Insufficient columns: '" + line + "'");
45+
}
46+
47+
return new VcfRecord(params);
48+
}
49+
50+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
package org.qcmg.qio.vcf;
2+
3+
import static org.junit.Assert.*;
4+
5+
import java.io.File;
6+
import java.io.IOException;
7+
import java.util.Arrays;
8+
import java.util.stream.Collectors;
9+
10+
import org.junit.Assert;
11+
import org.junit.Rule;
12+
import org.junit.Test;
13+
import org.junit.rules.TemporaryFolder;
14+
import org.qcmg.common.util.FileUtils;
15+
import org.qcmg.common.vcf.VcfRecord;
16+
import org.qcmg.common.vcf.header.VcfHeader;
17+
import org.qcmg.common.vcf.header.VcfHeaderUtils;
18+
import org.qcmg.qio.vcf.VcfFileReader;
19+
import org.qcmg.qio.record.RecordWriter;
20+
21+
public class VcfWriterReaderTest {
22+
23+
public static final String[] vcfStrings = new String[] {"##test=test", VcfHeaderUtils.STANDARD_FINAL_HEADER_LINE};
24+
25+
@Rule
26+
public TemporaryFolder testFolder = new TemporaryFolder();
27+
28+
@Test
29+
public void getHeaderFromZippedVcfFileUsingStreams() throws IOException {
30+
File file = testFolder.newFile("header.vcf.gz");
31+
32+
try(RecordWriter<VcfRecord> writer = new RecordWriter<>(file) ){
33+
writer.addHeader(Arrays.stream(vcfStrings).collect(Collectors.joining("\n")));
34+
}
35+
assertEquals(true, FileUtils.isInputGZip(file) );
36+
37+
/*
38+
* Should be able to get the header back out
39+
*/
40+
VcfHeader header = null;
41+
try(VcfFileReader reader = new VcfFileReader(file) ){
42+
header = reader.getVcfHeader();
43+
}
44+
assertEquals(true, null != header);
45+
assertEquals(VcfHeaderUtils.STANDARD_FINAL_HEADER_LINE, header.getChrom().toString());
46+
assertEquals(1, header.getAllMetaRecords().size());
47+
assertEquals("##test=test", header.getAllMetaRecords().get(0).toString());
48+
}
49+
50+
@Test
51+
public void getHeaderFromZippedVcfFile() throws IOException {
52+
File file = testFolder.newFile("header.vcf.gz");
53+
54+
try(RecordWriter<VcfRecord> writer = new RecordWriter<>(file) ){
55+
writer.addHeader(Arrays.stream(vcfStrings).collect(Collectors.joining("\n")));
56+
}
57+
assertEquals(true, FileUtils.isInputGZip(file) );
58+
59+
60+
/*
61+
* Should be able to get the header back out
62+
*/
63+
VcfHeader header = null;
64+
65+
try(VcfFileReader reader = new VcfFileReader(file) ){
66+
header = reader.getVcfHeader();
67+
}
68+
assertEquals(true, null != header);
69+
assertEquals(VcfHeaderUtils.STANDARD_FINAL_HEADER_LINE, header.getChrom().toString());
70+
assertEquals(1, header.getAllMetaRecords().size());
71+
assertEquals("##test=test", header.getAllMetaRecords().get(0).toString());
72+
}
73+
74+
@Test
75+
public void testValidation() throws IOException {
76+
String[] fnames = new String[] { "test.output.gz", "test.output" };
77+
String[] vcfStrings = new String[] {"#testing...", VcfHeaderUtils.STANDARD_FINAL_HEADER_LINE};
78+
for(int i = 0; i < fnames.length; i ++){
79+
File file = testFolder.newFile(fnames[i]);
80+
//create writer
81+
try(RecordWriter<VcfRecord> writer = new RecordWriter<>(file) ){
82+
writer.addHeader(vcfStrings[i]);
83+
} catch (Exception e) { fail(); }
84+
85+
//check output type
86+
try {
87+
if( FileUtils.isFileNameGZip(file) )
88+
Assert.assertTrue( FileUtils.isInputGZip(file) );
89+
else
90+
Assert.assertFalse( FileUtils.isInputGZip(file) );
91+
} catch (IOException e) { fail(); }
92+
93+
//read invaid vcf
94+
try(VcfFileReader reader = new VcfFileReader(file);){
95+
//file can be deleted here even without close but can't be closed if fail()
96+
Assert.assertTrue( file.delete());
97+
if(i == 0 ) fail();
98+
Assert.assertTrue( reader.getVcfHeader() != null);
99+
100+
} catch (Exception e) {
101+
e.printStackTrace();
102+
if(i == 1 ) fail();
103+
//delete testing file which is invalid vcf,
104+
Assert.assertTrue( file.delete());
105+
}
106+
}
107+
}
108+
109+
@Test
110+
public void testCreateAppendVcfWriter() throws IOException {
111+
File file = testFolder.newFile("output.vcf");
112+
113+
//create new file
114+
try(RecordWriter<VcfRecord> writer = new RecordWriter<>(file) ){
115+
writer.addHeader(vcfStrings[0]);
116+
} catch (Exception e) { fail(); }
117+
118+
// read throw exception
119+
try(VcfFileReader reader = new VcfFileReader(file);){
120+
fail();
121+
} catch (Exception e) { }
122+
123+
124+
//append to file
125+
try(RecordWriter<VcfRecord> writer = new RecordWriter<>(file, true) ){
126+
writer.addHeader(vcfStrings[1]);
127+
} catch (Exception e) { fail(); }
128+
129+
//now it become a valid vcf file
130+
try(VcfFileReader reader = new VcfFileReader(file);){
131+
Assert.assertFalse( FileUtils.isInputGZip(file) );
132+
Assert.assertTrue( reader.getVcfHeader().getAllMetaRecords().size() == 1);
133+
Assert.assertTrue(file.delete());
134+
135+
} catch (Exception e) {fail(); }
136+
137+
}
138+
139+
}

0 commit comments

Comments
 (0)