Skip to content

Commit ca65b51

Browse files
Merge pull request #242 from AdamaJava/newVcfReader
create VcfFileReader based on RecordReader
2 parents d155b8e + 9584c25 commit ca65b51

File tree

7 files changed

+216
-0
lines changed

7 files changed

+216
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/**
2+
* © Copyright The University of Queensland 2010-2014.
3+
* © Copyright QIMR Berghofer Medical Research Institute 2014-2016.
4+
*
5+
* This code is released under the terms outlined in the included LICENSE file.
6+
*/
7+
package org.qcmg.qio.vcf;
8+
9+
import java.io.File;
10+
import java.io.IOException;
11+
12+
import org.qcmg.common.util.Constants;
13+
import org.qcmg.common.util.TabTokenizer;
14+
import org.qcmg.common.vcf.VcfRecord;
15+
import org.qcmg.common.vcf.header.VcfHeader;
16+
import org.qcmg.qio.record.RecordReader;
17+
18+
/**
19+
* Contains an `InputStream` so remember to call close() or use in try-with-resources
20+
*/
21+
public final class VcfFileReader extends RecordReader<VcfRecord> {
22+
private static final String HEADER_PREFIX = Constants.HASH_STRING;
23+
24+
private VcfHeader header;
25+
26+
public VcfFileReader(final String file) throws IOException {
27+
this(new File(file));
28+
}
29+
30+
public VcfFileReader(final File file) throws IOException {
31+
super(file, HEADER_PREFIX);
32+
header = new VcfHeader(getHeader());
33+
}
34+
35+
public VcfHeader getVcfHeader() {
36+
return header;
37+
}
38+
39+
@Override
40+
public VcfRecord getRecord(String line) {
41+
final String[] params = TabTokenizer.tokenize(line);
42+
final int arrayLength = params.length;
43+
if (8 > arrayLength) {
44+
throw new IllegalArgumentException("Bad VCF format. Insufficient columns: '" + line + "'");
45+
}
46+
47+
return new VcfRecord(params);
48+
}
49+
50+
}

qio/src/org/qcmg/vcf/VCFFileReader.java

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
/**
2727
* Contains an `InputStream` so remember to call close() or use in try-with-resources
2828
*/
29+
@Deprecated
2930
public final class VCFFileReader implements Closeable, Iterable<VcfRecord> {
3031
private final File file;
3132
private final InputStream inputStream;

qio/src/org/qcmg/vcf/VCFFileWriter.java

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.qcmg.common.util.FileUtils;
1818
import org.qcmg.common.vcf.VcfRecord;
1919

20+
@Deprecated
2021
public final class VCFFileWriter implements Closeable {
2122
private final File file;
2223
private final OutputStream outputStream;

qio/src/org/qcmg/vcf/VCFRecordIterator.java

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import org.qcmg.common.vcf.VcfRecord;
1616

17+
@Deprecated
1718
public final class VCFRecordIterator implements Iterator<VcfRecord> {
1819
private final BufferedReader reader;
1920
private VcfRecord next;

qio/src/org/qcmg/vcf/VCFSerializer.java

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.qcmg.common.vcf.VcfRecord;
1616
import org.qcmg.common.vcf.header.VcfHeader;
1717

18+
@Deprecated
1819
public final class VCFSerializer {
1920
private static final String DEFAULT_HEADER_PREFIX = "#";
2021

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
package org.qcmg.qio.vcf;
2+
3+
import static org.junit.Assert.*;
4+
5+
import java.io.File;
6+
import java.io.IOException;
7+
import java.util.Arrays;
8+
import java.util.stream.Collectors;
9+
10+
import org.junit.Assert;
11+
import org.junit.Rule;
12+
import org.junit.Test;
13+
import org.junit.rules.TemporaryFolder;
14+
import org.qcmg.common.util.FileUtils;
15+
import org.qcmg.common.vcf.VcfRecord;
16+
import org.qcmg.common.vcf.header.VcfHeader;
17+
import org.qcmg.common.vcf.header.VcfHeaderUtils;
18+
import org.qcmg.qio.vcf.VcfFileReader;
19+
import org.qcmg.qio.record.RecordWriter;
20+
21+
public class VcfWriterReaderTest {
22+
23+
public static final String[] vcfStrings = new String[] {"##test=test", VcfHeaderUtils.STANDARD_FINAL_HEADER_LINE, };
24+
private final String[] parms = {"chrY","2675826",".","TG","CA"};
25+
26+
27+
@Rule
28+
public TemporaryFolder testFolder = new TemporaryFolder();
29+
30+
@Test
31+
public void getHeaderFromZippedVcfFile() throws IOException {
32+
File file = testFolder.newFile("header.vcf.gz");
33+
34+
try(RecordWriter<VcfRecord> writer = new RecordWriter<>(file) ){
35+
writer.addHeader(Arrays.stream(vcfStrings).collect(Collectors.joining("\n")));
36+
}
37+
assertEquals(true, FileUtils.isInputGZip(file) );
38+
39+
/*
40+
* Should be able to get the header back out
41+
*/
42+
VcfHeader header = null;
43+
int num = 0;
44+
try(VcfFileReader reader = new VcfFileReader(file) ){
45+
header = reader.getVcfHeader();
46+
for(VcfRecord re: reader) {
47+
num ++;
48+
}
49+
50+
}
51+
assertEquals(true, null != header);
52+
assertEquals(VcfHeaderUtils.STANDARD_FINAL_HEADER_LINE, header.getChrom().toString());
53+
assertEquals(1, header.getAllMetaRecords().size());
54+
assertEquals("##test=test", header.getAllMetaRecords().get(0).toString());
55+
assertEquals(0, num); //no record
56+
}
57+
58+
@Test
59+
public void getHeaderFromInvalidVcfFile() throws IOException {
60+
File file = testFolder.newFile("header.vcf.gz");
61+
62+
try(RecordWriter<VcfRecord> writer = new RecordWriter<>(file) ){
63+
writer.addHeader(Arrays.stream(vcfStrings).collect(Collectors.joining("\n")));
64+
//add two valide record
65+
writer.add(new VcfRecord(parms));
66+
writer.add(new VcfRecord(parms));
67+
68+
writer.addHeader(Arrays.stream(vcfStrings).collect(Collectors.joining("\n")));
69+
}
70+
assertEquals(true, FileUtils.isInputGZip(file) );
71+
72+
73+
/*
74+
* Should be able to get the header back out
75+
*/
76+
VcfHeader header = null;
77+
try(VcfFileReader reader = new VcfFileReader(file) ){
78+
header = reader.getVcfHeader();
79+
}
80+
assertEquals(true, null != header);
81+
assertEquals(1, header.getAllMetaRecords().size());
82+
83+
int num = 0;
84+
try(VcfFileReader reader = new VcfFileReader(file) ){
85+
for(VcfRecord re: reader) {
86+
num ++;
87+
System.out.println(re.toSimpleString());
88+
}
89+
fail("expected exception should throw here! ");
90+
}catch(IllegalArgumentException e) {
91+
//two valid record but exception happed when check next record before create second vcf record
92+
assertEquals(1, num); //one valid record
93+
}
94+
}
95+
96+
@Test
97+
public void testValidation() throws IOException {
98+
String[] fnames = new String[] { "test.output.gz", "test.output" };
99+
String[] vcfStrings = new String[] {"#testing...", VcfHeaderUtils.STANDARD_FINAL_HEADER_LINE};
100+
for(int i = 0; i < fnames.length; i ++){
101+
File file = testFolder.newFile(fnames[i]);
102+
//create writer
103+
try(RecordWriter<VcfRecord> writer = new RecordWriter<>(file) ){
104+
writer.addHeader(vcfStrings[i]);
105+
} catch (Exception e) { fail(); }
106+
107+
//check output type
108+
try {
109+
if( FileUtils.isFileNameGZip(file) )
110+
Assert.assertTrue( FileUtils.isInputGZip(file) );
111+
else
112+
Assert.assertFalse( FileUtils.isInputGZip(file) );
113+
} catch (IOException e) { fail(); }
114+
115+
//read invaid vcf
116+
try(VcfFileReader reader = new VcfFileReader(file);){
117+
//file can be deleted here even without close but can't be closed if fail()
118+
Assert.assertTrue( file.delete());
119+
if(i == 0 ) fail();
120+
Assert.assertTrue( reader.getVcfHeader() != null);
121+
122+
} catch (Exception e) {
123+
e.printStackTrace();
124+
if(i == 1 ) fail();
125+
//delete testing file which is invalid vcf,
126+
Assert.assertTrue( file.delete());
127+
}
128+
}
129+
}
130+
131+
@Test
132+
public void testCreateAppendVcfWriter() throws IOException {
133+
File file = testFolder.newFile("output.vcf");
134+
135+
//create new file
136+
try(RecordWriter<VcfRecord> writer = new RecordWriter<>(file) ){
137+
writer.addHeader(vcfStrings[0]);
138+
} catch (Exception e) { fail(); }
139+
140+
// read throw exception
141+
try(VcfFileReader reader = new VcfFileReader(file);){
142+
fail();
143+
} catch (Exception e) { }
144+
145+
146+
//append to file
147+
try(RecordWriter<VcfRecord> writer = new RecordWriter<>(file, true) ){
148+
writer.addHeader(vcfStrings[1]);
149+
} catch (Exception e) { fail(); }
150+
151+
//now it become a valid vcf file
152+
try(VcfFileReader reader = new VcfFileReader(file);){
153+
Assert.assertFalse( FileUtils.isInputGZip(file) );
154+
Assert.assertTrue( reader.getVcfHeader().getAllMetaRecords().size() == 1);
155+
Assert.assertTrue(file.delete());
156+
157+
} catch (Exception e) {fail(); }
158+
159+
}
160+
161+
}

qio/test/org/qcmg/vcf/VcfWriterReaderTest.java

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.qcmg.common.vcf.header.VcfHeader;
1616
import org.qcmg.common.vcf.header.VcfHeaderUtils;
1717

18+
@Deprecated
1819
public class VcfWriterReaderTest {
1920

2021
public static final String[] vcfStrings = new String[] {"##test=test", VcfHeaderUtils.STANDARD_FINAL_HEADER_LINE};

0 commit comments

Comments
 (0)