From 4778137e61534f4f2507ecc2126a255cd2d51b19 Mon Sep 17 00:00:00 2001 From: jrobinso <933148+jrobinso@users.noreply.github.com> Date: Wed, 9 Oct 2024 10:12:33 -0700 Subject: [PATCH] Extend file format determination code. --- .../org/broad/igv/track/FileFormatUtils.java | 22 +++++++++++++++++++ .../org/broad/igv/track/TrackProperties.java | 12 ++++++++++ .../java/org/broad/igv/util/ParsingUtils.java | 7 ++++-- .../broad/igv/track/FileFormatUtilsTest.java | 4 ++++ 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/broad/igv/track/FileFormatUtils.java b/src/main/java/org/broad/igv/track/FileFormatUtils.java index 4969624c83..6171ff27c9 100644 --- a/src/main/java/org/broad/igv/track/FileFormatUtils.java +++ b/src/main/java/org/broad/igv/track/FileFormatUtils.java @@ -4,6 +4,7 @@ import htsjdk.samtools.util.BlockCompressedInputStream; import org.broad.igv.ucsc.twobit.UnsignedByteBuffer; import org.broad.igv.ucsc.twobit.UnsignedByteBufferImpl; +import org.broad.igv.util.ParsingUtils; import org.broad.igv.util.stream.IGVSeekableStreamFactory; import java.io.*; @@ -89,6 +90,27 @@ public static String determineFormat(String path) throws IOException { if (firstLine.startsWith("##gff-version")) { return "gff"; } + if(firstLine.startsWith("##fileformat=")) { + return firstLine.substring(13); // Non standard extension of VCF convention + } + + // Read maximum of first 100 lines searching for format indication. + int n = 0; + String nextLine; + while((nextLine = reader.readLine()) != null && n++ < 100) { + if(nextLine.startsWith("#")) continue; + if(nextLine.startsWith("track")) { + TrackProperties properties = new TrackProperties(); + ParsingUtils.parseTrackLine(nextLine, properties); + if(properties.getFormat() != null) { + return properties.getFormat(); + } + } + if(nextLine.startsWith("fixedStep") || nextLine.startsWith("variableStep")) { + return "wig"; + } + } + if (maybeSampleInfo(bytes)) { return "sampleinfo"; } diff --git a/src/main/java/org/broad/igv/track/TrackProperties.java b/src/main/java/org/broad/igv/track/TrackProperties.java index 16ffa2043f..96658053f7 100644 --- a/src/main/java/org/broad/igv/track/TrackProperties.java +++ b/src/main/java/org/broad/igv/track/TrackProperties.java @@ -143,6 +143,11 @@ public enum BaseCoord { private String coverageURL; + /** + * Non-standard track field to indicate file format + */ + private String format; + /** * Track attributes (meta data) */ @@ -153,6 +158,13 @@ public TrackProperties() { } + public String getFormat() { + return format; + } + + public void setFormat(String format) { + this.format = format; + } public void setTrackLine(String trackLine) { this.trackLine = trackLine; diff --git a/src/main/java/org/broad/igv/util/ParsingUtils.java b/src/main/java/org/broad/igv/util/ParsingUtils.java index 06e4685aa3..68d72fe00d 100644 --- a/src/main/java/org/broad/igv/util/ParsingUtils.java +++ b/src/main/java/org/broad/igv/util/ParsingUtils.java @@ -386,7 +386,10 @@ public static boolean parseTrackLine(String nextLine, TrackProperties trackPrope String key = kv.get(0).toLowerCase().trim(); String value = kv.get(1).replaceAll("\"", ""); - if (key.equals("coords")) { + if(key.equals("format")) { + trackProperties.setFormat(value); + } + else if (key.equals("coords")) { if (value.equals("0")) { trackProperties.setBaseCoord(TrackProperties.BaseCoord.ZERO); } else if (value.equals("1")) { @@ -394,7 +397,7 @@ public static boolean parseTrackLine(String nextLine, TrackProperties trackPrope } } - if (key.equals("name")) { + else if (key.equals("name")) { trackProperties.setName(value); //dhmay adding name check for TopHat junctions files. graphType is also checked. if (value.equals("junctions")) { diff --git a/src/test/java/org/broad/igv/track/FileFormatUtilsTest.java b/src/test/java/org/broad/igv/track/FileFormatUtilsTest.java index 1010d9537d..eaf23587d4 100644 --- a/src/test/java/org/broad/igv/track/FileFormatUtilsTest.java +++ b/src/test/java/org/broad/igv/track/FileFormatUtilsTest.java @@ -42,5 +42,9 @@ public void testDetermineFormat() throws Exception { String sampleInfoFile = "http://igvdata.broadinstitute.org/data/hg18/tcga/gbm/gbmsubtypes/sampleTable.txt.gz"; format = FileFormatUtils.determineFormat(sampleInfoFile); assertEquals("sampleinfo", format); + + String wigFile = TestUtils.DATA_DIR + "wig/dm3_var_sample.wig"; + format = FileFormatUtils.determineFormat(wigFile); + assertEquals("wig", format); } } \ No newline at end of file