From a5ffdfef324508b3b064426a7aa7b68c36a0e901 Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 4 Nov 2020 15:06:26 +1000 Subject: [PATCH 01/73] update q3panel with new StringFileReader --- q3panel/src/au/edu/qimr/panel/Q3ClinVar.java | 25 ++++------- q3panel/src/au/edu/qimr/panel/Q3Panel.java | 41 ++++++++----------- .../illumina/IlluminaSerializerTest.java | 0 .../simple/SimpleSerializerTest.java | 0 4 files changed, 27 insertions(+), 39 deletions(-) rename qio/test/org/qcmg/{ => unused}/illumina/IlluminaSerializerTest.java (100%) rename qio/test/org/qcmg/{ => unused}/simple/SimpleSerializerTest.java (100%) diff --git a/q3panel/src/au/edu/qimr/panel/Q3ClinVar.java b/q3panel/src/au/edu/qimr/panel/Q3ClinVar.java index 5ff9e5d07..7ce12120b 100644 --- a/q3panel/src/au/edu/qimr/panel/Q3ClinVar.java +++ b/q3panel/src/au/edu/qimr/panel/Q3ClinVar.java @@ -68,16 +68,13 @@ import org.qcmg.common.util.LoadReferencedClasses; import org.qcmg.common.util.Pair; import org.qcmg.common.util.TabTokenizer; - import org.qcmg.common.vcf.VcfRecord; import org.qcmg.common.vcf.VcfUtils; import org.qcmg.common.vcf.header.VcfHeader; import org.qcmg.common.vcf.header.VcfHeaderRecord; import org.qcmg.common.vcf.header.VcfHeaderUtils; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; -import org.qcmg.vcf.VCFFileWriter; +import org.qcmg.qio.record.StringFileReader; +import org.qcmg.qio.record.RecordWriter; public class Q3ClinVar { @@ -483,22 +480,18 @@ private void loadTiledAlignerData() throws Exception { logger.info("loading genome tiles alignment data"); - try (TabbedFileReader reader = new TabbedFileReader(new File(refTiledAlignmentFile))) { + try (StringFileReader reader = new StringFileReader(new File(refTiledAlignmentFile))) { + List headerList = reader.getHeader(); - TabbedHeader header = reader.getHeader(); - List headerList = new ArrayList<>(); - for (String head : header) { - headerList.add(head); - } positionToActualLocation.loadMap(headerList); int i = 0; - for (TabbedRecord rec : reader) { + for (String rec : reader) { if (++i % 1000000 == 0) { logger.info("hit " + (i / 1000000) + "M records"); } - String tile = rec.getData().substring(0, TILE_SIZE); + String tile = rec.substring(0, TILE_SIZE); if (ampliconTiles.contains(tile)) { - String countOrPosition = rec.getData().substring(rec.getData().indexOf('\t') + 1); + String countOrPosition = rec.substring(rec.indexOf('\t') + 1); if (countOrPosition.charAt(0) == 'C') { frequentlyOccurringRefTiles.add(tile); } else { @@ -1055,7 +1048,7 @@ private void writeHaplotypesCsv(boolean filter) throws IOException { //make a new header String outputFileName = filter ? outputFileNameBase + "vcf" : outputFileNameBase + "diag.unfiltered.vcf"; - try (VCFFileWriter writer = new VCFFileWriter(new File(outputFileName))) { + try (RecordWriter writer = new RecordWriter<>(new File(outputFileName))) { /* * Setup the VcfHeader @@ -1084,7 +1077,7 @@ private void writeHaplotypesCsv(boolean filter) throws IOException { } outputFileName = filter ? outputFileNameBase + "diag.detailed.vcf" : outputFileNameBase + "diag.unfiltered_detailed.vcf"; - try (VCFFileWriter writer = new VCFFileWriter(new File(outputFileName))) { + try (RecordWriter writer = new RecordWriter<>(new File(outputFileName))) { /* * Setup the VcfHeader diff --git a/q3panel/src/au/edu/qimr/panel/Q3Panel.java b/q3panel/src/au/edu/qimr/panel/Q3Panel.java index b5beae2cc..8b30a7032 100644 --- a/q3panel/src/au/edu/qimr/panel/Q3Panel.java +++ b/q3panel/src/au/edu/qimr/panel/Q3Panel.java @@ -88,13 +88,10 @@ import org.qcmg.common.vcf.header.VcfHeaderRecord; import org.qcmg.common.vcf.header.VcfHeaderUtils; import org.qcmg.common.vcf.header.VcfHeaderUtils.VcfInfoType; -import org.qcmg.qmule.SmithWatermanGotoh; - -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.record.StringFileReader; import org.qcmg.vcf.VCFFileReader; -import org.qcmg.vcf.VCFFileWriter; +import org.qcmg.qio.record.RecordWriter; +import org.qcmg.qmule.SmithWatermanGotoh; public class Q3Panel { @@ -391,12 +388,12 @@ private void loadTranscripts() throws IOException { logger.info("Number of unique chromosomes in bed file: " + uniqueChrs.size()); - try (TabbedFileReader reader = new TabbedFileReader(new File(geneTranscriptsFile))) { + try (StringFileReader reader = new StringFileReader(new File(geneTranscriptsFile))) { String currentTranscriptId = null; - for (TabbedRecord rec : reader) { - String contig = rec.getData().substring(0, rec.getData().indexOf(Constants.TAB)); + for (String rec : reader) { + String contig = rec.substring(0, rec.indexOf(Constants.TAB)); if (uniqueChrs.containsKey(contig)) { - String [] params = TabTokenizer.tokenize(rec.getData()); + String [] params = TabTokenizer.tokenize(rec); String [] column8 = params[8].split(Constants.SEMI_COLON_STRING); Optional optionalId = Arrays.stream(column8).filter(s -> s.trim().startsWith("transcript_id")).findAny(); Optional optionalExonNumber = Arrays.stream(column8).filter(s -> s.trim().startsWith("exon_number")).findAny(); @@ -672,9 +669,9 @@ private void mapBedToAmplicons() throws IOException, Exception { */ if (bedFile != null && new File(bedFile).exists()) { int bedId = 0; - try (TabbedFileReader reader = new TabbedFileReader(new File(bedFile));) { - for (TabbedRecord rec : reader) { - String [] params = TabTokenizer.tokenize(rec.getData()); + try (StringFileReader reader = new StringFileReader(new File(bedFile));) { + for (String rec : reader) { + String [] params = TabTokenizer.tokenize(rec); ChrPosition cp = new ChrRangePosition(params[0], Integer.parseInt(params[1]), Integer.parseInt(params[2])); bedToAmpliconMap.put(new Contig(++bedId, cp), new ArrayList(1)); } @@ -1038,7 +1035,7 @@ private void addFormatFieldValues() { } private void writeMutationsToFile() throws IOException { - try (VCFFileWriter writer = new VCFFileWriter(new File(outputFileNameBase + ".vcf"))) { + try (RecordWriter writer = new RecordWriter<>(new File(outputFileNameBase + ".vcf"))) { /* * Setup the VcfHeader @@ -1083,7 +1080,7 @@ private void writeMutationsToFile() throws IOException { private void writeMutationsToFileNew(List vcfs) throws IOException { String filename = outputFileNameBase + "_2.vcf"; - try (VCFFileWriter writer = new VCFFileWriter(new File(filename))) { + try (RecordWriter writer = new RecordWriter<>(new File(filename))) { /* * Setup the VcfHeader @@ -1394,20 +1391,18 @@ private void loadTiledAlignerData() { logger.info("loading genome tiles alignment data"); - try (TabbedFileReader reader = new TabbedFileReader(new File(refTiledAlignmentFile))) { + try (StringFileReader reader = new StringFileReader(new File(refTiledAlignmentFile))) { - TabbedHeader header = reader.getHeader(); - List headerList = new ArrayList<>(); - for (String head : header) { - headerList.add(head); - } + + List headerList = reader.getHeader(); + positionToActualLocation.loadMap(headerList); int i = 0; - for (TabbedRecord rec : reader) { + for (String rec : reader) { if (++i % 1000000 == 0) { logger.info("hit " + (i / 1000000) + "M records"); } - queue.add(rec.getData()); + queue.add(rec); } /* * kill off threads waiting on queue diff --git a/qio/test/org/qcmg/illumina/IlluminaSerializerTest.java b/qio/test/org/qcmg/unused/illumina/IlluminaSerializerTest.java similarity index 100% rename from qio/test/org/qcmg/illumina/IlluminaSerializerTest.java rename to qio/test/org/qcmg/unused/illumina/IlluminaSerializerTest.java diff --git a/qio/test/org/qcmg/simple/SimpleSerializerTest.java b/qio/test/org/qcmg/unused/simple/SimpleSerializerTest.java similarity index 100% rename from qio/test/org/qcmg/simple/SimpleSerializerTest.java rename to qio/test/org/qcmg/unused/simple/SimpleSerializerTest.java From 4c3d988b4eac1b7d374cec14def08d7d46487b19 Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 4 Nov 2020 15:09:14 +1000 Subject: [PATCH 02/73] update q3tiledaligner with new StringFileReader --- .../tiledaligner/util/TiledAlignerUtil.java | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/q3tiledaligner/src/au/edu/qimr/tiledaligner/util/TiledAlignerUtil.java b/q3tiledaligner/src/au/edu/qimr/tiledaligner/util/TiledAlignerUtil.java index c0475134a..6557c2ad4 100644 --- a/q3tiledaligner/src/au/edu/qimr/tiledaligner/util/TiledAlignerUtil.java +++ b/q3tiledaligner/src/au/edu/qimr/tiledaligner/util/TiledAlignerUtil.java @@ -30,10 +30,8 @@ import org.qcmg.common.string.StringUtils; import org.qcmg.common.util.Constants; import org.qcmg.common.util.NumberUtils; +import org.qcmg.qio.record.StringFileReader; import org.qcmg.qmule.SmithWatermanGotoh; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; import gnu.trove.list.TLongList; import gnu.trove.list.array.TLongArrayList; @@ -482,20 +480,20 @@ public static int nonContinuousCount(long [][] array, long l, int arrayStartPosi * @param tiledInput */ public static void getTiledDataInMap(String tiledAlignerFile, Map tiledInput) { - try (TabbedFileReader reader = new TabbedFileReader(new File(tiledAlignerFile))) { + try (StringFileReader reader = new StringFileReader(new File(tiledAlignerFile))) { int i = 0; int matches = 0; int mapSize = tiledInput.size(); - for (TabbedRecord rec : reader) { + for (String rec : reader) { if (++i % 1000000 == 0) { logger.info("hit " + (i / 1000000) + "M records, matches: " + matches); } - String data = rec.getData(); - int tabindex = data.indexOf(Constants.TAB); - if (tiledInput.containsKey(data.substring(0, tabindex))) { + + int tabindex = rec.indexOf(Constants.TAB); + if (tiledInput.containsKey(rec.substring(0, tabindex))) { matches++; - tiledInput.put(data.substring(0, tabindex), data.substring(tabindex + 1)); + tiledInput.put(rec.substring(0, tabindex), rec.substring(tabindex + 1)); /* * If we have found all elements in the map - might as well stop looking @@ -513,8 +511,8 @@ public static void getTiledDataInMap(String tiledAlignerFile, Map getTiledAlignerHeader(String file) throws IOException { + try (StringFileReader reader = new StringFileReader(new File(file))) { return reader.getHeader(); } } From c516ad63436a3ac52ec55adb7bfaf29458b3f20f Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 4 Nov 2020 15:13:31 +1000 Subject: [PATCH 03/73] update qpileup with new StringFileReader --- qpileup/src/org/qcmg/pileup/Options.java | 65 ++++++++-------- .../org/qcmg/pileup/metrics/SnpMetric.java | 78 +++++++++---------- .../qcmg/pileup/metrics/SnpMetricTest.java | 5 +- 3 files changed, 71 insertions(+), 77 deletions(-) diff --git a/qpileup/src/org/qcmg/pileup/Options.java b/qpileup/src/org/qcmg/pileup/Options.java index dc9147364..36ab76e01 100644 --- a/qpileup/src/org/qcmg/pileup/Options.java +++ b/qpileup/src/org/qcmg/pileup/Options.java @@ -16,7 +16,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -41,8 +40,7 @@ import org.qcmg.pileup.metrics.StrandBiasMetric; import org.qcmg.pileup.metrics.SummaryMetric; import org.qcmg.pileup.model.StrandEnum; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.record.StringFileReader; public final class Options { @@ -144,48 +142,49 @@ public Options(final String[] args) throws Exception { } private void getRangesFromRangeFile(String rangeFile, String viewGroup) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(rangeFile)); + try(StringFileReader reader = new StringFileReader(new File(rangeFile));){ - Iterator iterator = reader.getRecordIterator(); - positionMap = new HashMap>(); - while (iterator.hasNext()) { - - TabbedRecord tab = iterator.next(); - - if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo") || tab.getData().startsWith("analysis")) { - continue; - } - String[] data = tab.getData().split("\t"); - if (rangeFile.endsWith("gff3")) { - String range = data[0] + ":" + data[3] + "-" + data[4]; + //Iterator iterator = reader.getRecordIterator(); + positionMap = new HashMap>(); +// while (iterator.hasNext()) { + for(String tab : reader) { - graphRangeInfoMap.put(range, data[8]); - readRanges.add(range); - } else { - String range = data[0] + "\t" + data[1] + "\t" + data[1]; - if (data[0].equals("chrXY")) { - range = "chrX" + ":" + data[1] + "-" + data[1]; + //TabbedRecord tab = iterator.next(); + + if (tab.startsWith("#") || tab.startsWith("Hugo") || tab.startsWith("analysis")) { + continue; } - int pos = new Integer(data[1]); - if (positionMap.containsKey(data[0])) { - positionMap.get(data[0]).put(pos, range); + String[] data = tab.split("\t"); + if (rangeFile.endsWith("gff3")) { + String range = data[0] + ":" + data[3] + "-" + data[4]; + + graphRangeInfoMap.put(range, data[8]); + readRanges.add(range); } else { - TreeMap treemap = new TreeMap(); - treemap.put(pos, range); - positionMap.put(data[0], treemap); + String range = data[0] + "\t" + data[1] + "\t" + data[1]; + if (data[0].equals("chrXY")) { + range = "chrX" + ":" + data[1] + "-" + data[1]; + } + int pos = new Integer(data[1]); + if (positionMap.containsKey(data[0])) { + positionMap.get(data[0]).put(pos, range); + } else { + TreeMap treemap = new TreeMap(); + treemap.put(pos, range); + positionMap.put(data[0], treemap); + + } + //graphRangeInfoMap.put(range, ""); } - //graphRangeInfoMap.put(range, ""); - } - } - + } for (Entry> e: positionMap.entrySet()) { TreeMap map = e.getValue(); readRanges.add(e.getKey() + ":" + (map.firstKey()-10) + "-" + (map.lastKey()+10)); } - reader.close(); + // reader.close(); } public Map getGraphRangeInfoMap() { diff --git a/qpileup/src/org/qcmg/pileup/metrics/SnpMetric.java b/qpileup/src/org/qcmg/pileup/metrics/SnpMetric.java index 6b96bf0a2..1501aa510 100644 --- a/qpileup/src/org/qcmg/pileup/metrics/SnpMetric.java +++ b/qpileup/src/org/qcmg/pileup/metrics/SnpMetric.java @@ -36,8 +36,7 @@ import org.qcmg.pileup.model.Chromosome; import org.qcmg.pileup.model.QPileupRecord; import org.qcmg.pileup.model.StrandEnum; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.record.StringFileReader; import org.qcmg.vcf.VCFFileReader; @@ -495,7 +494,7 @@ public synchronized void writeRecords(String gffName, StrandEnum[] strandEnums, snpMap.clear(); tabFileWriter = new BufferedWriter(new FileWriter(snpOutputFile, true)); dccFileWriter = new BufferedWriter(new FileWriter(dccOutputFile, true)); - Map> mismapMap = readGFFRecords(gffName); + Map> mismapMap = readGFFRecords(gffName); for (Chromosome c : chromosomes) { @@ -527,16 +526,16 @@ public synchronized void writeRecords(String gffName, StrandEnum[] strandEnums, } - private void processMisMapRegion(Map> mismapMap) throws Exception { + private void processMisMapRegion(Map> mismapMap) throws Exception { for (Entry> entry : snpMap.entrySet()) { for (Entry currentEntry : entry.getValue().entrySet()) { SnpRecord snp = currentEntry.getValue(); ChrRangePosition chrPos = new ChrRangePosition(snp.getChromosome(), snp.getPosition(), snp.getEndPosition()); if (mismapMap.containsKey(snp.getChromosome())) { - TreeMap compareMap = mismapMap.get(snp.getChromosome()); - Entry floor = compareMap.floorEntry(chrPos); - Entry ceiling = compareMap.ceilingEntry(chrPos); + TreeMap compareMap = mismapMap.get(snp.getChromosome()); + Entry floor = compareMap.floorEntry(chrPos); + Entry ceiling = compareMap.ceilingEntry(chrPos); if (tabbedRecordFallsInCompareRecord(chrPos, floor) || tabbedRecordFallsInCompareRecord(chrPos, ceiling)) { snp.setInMismapRegion(true); @@ -546,7 +545,7 @@ private void processMisMapRegion(Map entry) { + public boolean tabbedRecordFallsInCompareRecord(ChrRangePosition inputChrPos, Entry entry) { if (entry != null) { ChrRangePosition compareChrPos = entry.getKey(); if ((inputChrPos.getStartPosition() >= compareChrPos.getStartPosition() && inputChrPos.getStartPosition() <= compareChrPos.getEndPosition()) || @@ -558,53 +557,50 @@ public boolean tabbedRecordFallsInCompareRecord(ChrRangePosition inputChrPos, En return false; } - private Map> readGFFRecords(String gffFile) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(gffFile)); - - Map> map = new HashMap>(); + private Map> readGFFRecords(String gffFile) throws Exception { - Iterator iterator = reader.getRecordIterator(); - - while (iterator.hasNext()) { - - TabbedRecord tab = iterator.next(); - - if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo") || tab.getData().startsWith("analysis")) { - continue; - } - - String[] values = tab.getData().split("\t"); - String key = values[0]; - ChrRangePosition chrPos = new ChrRangePosition(key, new Integer(values[3]), new Integer(values[4])); - if (map.containsKey(key)) { - map.get(key).put(chrPos, tab); - } else { - TreeMap tmap = new TreeMap(); - tmap.put(chrPos, tab); - map.put(key, tmap); + Map> map = new HashMap<>(); + + try(StringFileReader reader = new StringFileReader(new File(gffFile));){ + for(String tab : reader) { + if (tab.startsWith("#") || tab.startsWith("Hugo") || tab.startsWith("analysis")) { + continue; + } + + String[] values = tab.split("\t"); + String key = values[0]; + ChrRangePosition chrPos = new ChrRangePosition(key, new Integer(values[3]), new Integer(values[4])); + if (map.containsKey(key)) { + map.get(key).put(chrPos, tab); + } else { + TreeMap tmap = new TreeMap<>(); + tmap.put(chrPos, tab); + map.put(key, tmap); + } + } - } - reader.close(); - return map; + return map; } private void readTmpRecords(Chromosome c) throws Exception { File tmpFile = new File(snpTmpFileStem + c.getName()); - if (tmpFile.exists()) { - TabbedFileReader reader = new TabbedFileReader(tmpFile); + if(!tmpFile.exists()) return; + + try( StringFileReader reader = new StringFileReader(tmpFile); BufferedWriter dccWriter = new BufferedWriter(new FileWriter(snpLowConfStem + c.getName() + ".dcc")); - BufferedWriter txtWriter = new BufferedWriter(new FileWriter(snpLowConfStem + c.getName() + ".snp.txt")); + BufferedWriter txtWriter = new BufferedWriter(new FileWriter(snpLowConfStem + c.getName() + ".snp.txt"));) { + dccWriter.write(getDccColumnHeaders()); txtWriter.write(getColumnHeaders()); - Iterator iterator = reader.getRecordIterator(); + int count = 0; - while (iterator.hasNext()) { - TabbedRecord r = iterator.next(); + for (String r : reader) { + count++; - String[] values = r.getData().split("\t"); + String[] values = r.split("\t"); String baseString = ""; for (int i=6; i<=15; i++) { baseString += values[i] + "\t"; diff --git a/qpileup/test/org/qcmg/pileup/metrics/SnpMetricTest.java b/qpileup/test/org/qcmg/pileup/metrics/SnpMetricTest.java index 21d0870b3..d532aab8e 100644 --- a/qpileup/test/org/qcmg/pileup/metrics/SnpMetricTest.java +++ b/qpileup/test/org/qcmg/pileup/metrics/SnpMetricTest.java @@ -28,7 +28,6 @@ import org.qcmg.pileup.model.Chromosome; import org.qcmg.pileup.model.QPileupRecord; import org.qcmg.pileup.model.StrandEnum; -import org.qcmg.tab.TabbedRecord; public class SnpMetricTest { @@ -87,8 +86,8 @@ public void testGetWindow() { @Test public void testTabbedRecordFallsInCompareRecord() { - TreeMap map = new TreeMap(); - map.put(new ChrRangePosition(CHR1, 11, 13), new TabbedRecord()); + TreeMap map = new TreeMap(); + map.put(new ChrRangePosition(CHR1, 11, 13), new String()); assertTrue(metric.tabbedRecordFallsInCompareRecord(new ChrRangePosition(CHR1, 10, 12), map.firstEntry())); assertTrue(metric.tabbedRecordFallsInCompareRecord(new ChrRangePosition(CHR1, 10, 14), map.firstEntry())); assertFalse(metric.tabbedRecordFallsInCompareRecord(new ChrRangePosition(CHR1, 10, 10), map.firstEntry())); From bffb3c1da57c0c7e3865431c7e460a9dba9d4300 Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 4 Nov 2020 15:28:27 +1000 Subject: [PATCH 04/73] create new gff3 and gff based on new recordreader --- qio/src/org/qcmg/qio/gff/GffReader.java | 30 ++ qio/src/org/qcmg/qio/gff/GffRecord.java | 178 +++++++++++ ...RecordChromosomeAndPositionComparator.java | 31 ++ .../gff3/GFF3RecordPositionComparator.java | 20 ++ qio/src/org/qcmg/qio/gff3/Gff3FileReader.java | 26 ++ qio/src/org/qcmg/qio/gff3/Gff3Record.java | 282 ++++++++++++++++++ 6 files changed, 567 insertions(+) create mode 100644 qio/src/org/qcmg/qio/gff/GffReader.java create mode 100644 qio/src/org/qcmg/qio/gff/GffRecord.java create mode 100644 qio/src/org/qcmg/qio/gff3/GFF3RecordChromosomeAndPositionComparator.java create mode 100644 qio/src/org/qcmg/qio/gff3/GFF3RecordPositionComparator.java create mode 100644 qio/src/org/qcmg/qio/gff3/Gff3FileReader.java create mode 100644 qio/src/org/qcmg/qio/gff3/Gff3Record.java diff --git a/qio/src/org/qcmg/qio/gff/GffReader.java b/qio/src/org/qcmg/qio/gff/GffReader.java new file mode 100644 index 000000000..59dd987d1 --- /dev/null +++ b/qio/src/org/qcmg/qio/gff/GffReader.java @@ -0,0 +1,30 @@ +package org.qcmg.qio.gff; + +import java.io.File; +import java.io.IOException; + +import org.qcmg.qio.record.RecordReader; + +public final class GffReader extends RecordReader { + public static final String TAB_DELIMITER = "\t"; + + public GffReader(File file) throws IOException { + super(file); + } + + @Override + public GffRecord getRecord(String line) throws Exception { + if (null == line) { + throw new AssertionError("Record was null"); + } + + String[] fields = line.split(TAB_DELIMITER); + + if (fields.length < 8) { + throw new Exception("Not enough fields in the Record"); + } + + return new GffRecord(fields); + } +} + diff --git a/qio/src/org/qcmg/qio/gff/GffRecord.java b/qio/src/org/qcmg/qio/gff/GffRecord.java new file mode 100644 index 000000000..c74e67360 --- /dev/null +++ b/qio/src/org/qcmg/qio/gff/GffRecord.java @@ -0,0 +1,178 @@ +package org.qcmg.qio.gff; + +import java.util.HashMap; +import java.util.Map; + +/** + * @author jpearson + * @version $Id: GffRecord.java,v 1.8 2009/08/17 20:17:35 jbeckstr Exp $ + * + * Data container class for records from SOLiD GFF format sequence + * alignment files. GFF is a tab-separated text file with unix-style + * line endings and the following fields of which the last two are + * optional: + * + * Fieldname Example value 1. seqname 1231_644_1328_F3 2. source solid + * 3. feature read 4. start 97 5. end 121 6. score 13.5 7. strand - 8. + * frame . 9. [attributes] b=TAGGGTTAGGGTTGGGTTAGGGTTA; c=AAA; + * g=T320010320010100103000103; i=1; p=1.000; + * q=23,28,27,20,17,12,24,16,20,8,13,26,28,2 + * 4,13,13,27,14,19,4,23,16,19,9,14; r=20_2; s=a20; u=0,1 10. + * [comments] + */ +public class GffRecord { + + // private String originalLine; // original line + private String seqname; // read ID + private String source; // should always be "solid" + private String feature; // should always be "read" + private int start; // start position of mapping to reference + private int end; // end position of mapping to reference + private double score; // quality of mapping + private String strand; // - or + + private String frame; // 1,2,3,. + private String attribStr; // this is the gold! + private String comments; // comments (seldom present) + private Map attributes; // deconstruct attribStr + + /** + * Constructor 1 + * + * @param textRecord + * text GFF Record typically read from GFF file + * @throws Exception + * @throws QProfilerException + */ + public GffRecord(String[] fields) throws Exception { + // public GffRecord( String textRecord, String delimiter ) { + // this(); // call constructor 0 + // originalLine = textRecord; + + // String[] fields = textRecord.split( "\t" ); + seqname = fields[0]; + source = fields[1]; + feature = fields[2]; + start = Integer.parseInt(fields[3]); + end = Integer.parseInt(fields[4]); + score = Double.parseDouble(fields[5]); + strand = fields[6]; + frame = fields[7]; + + // Cope with the optional attribute field + if (fields.length > 8) { + attributes = new HashMap(); + + attribStr = fields[8]; + String[] tmpattribs = attribStr.split(";"); + for (int i = 0; i < tmpattribs.length; i++) { + String[] attrFields = tmpattribs[i].split("="); + if (attrFields.length < 2) { + throw new Exception("Attribute [" + tmpattribs[i] + + "] is badly formed"); + } + attributes.put(attrFields[0], attrFields[1]); + } + } + + // And comments is also optional + if (fields.length > 9) { + comments = fields[9]; + } + } + + public String getSeqname() { + return seqname; + } + + public void setSeqname(String seqname) { + this.seqname = seqname; + } + + public String getSource() { + return source; + } + + public void setSource(String source) { + this.source = source; + } + + public String getFeature() { + return feature; + } + + public void setFeature(String feature) { + this.feature = feature; + } + + public int getStart() { + return start; + } + + public void setStart(int start) { + this.start = start; + } + + public int getEnd() { + return end; + } + + public void setEnd(int end) { + this.end = end; + } + + public double getScore() { + return score; + } + + public void setScore(double score) { + this.score = score; + } + + public String getStrand() { + return strand; + } + + public void setStrand(String strand) { + this.strand = strand; + } + + public String getFrame() { + return frame; + } + + public void setFrame(String frame) { + this.frame = frame; + } + + public String getAttribStr() { + return attribStr; + } + + public void setAttribStr(String attribStr) { + this.attribStr = attribStr; + } + + public String getComments() { + return comments; + } + + public void setComments(String comments) { + this.comments = comments; + } + + public Map getAttributes() { + return attributes; + } + + public void setAttributes(Map attributes) { + this.attributes = attributes; + } + + public void setAttribute(String key, String value) { + this.attributes.put(key, value); + } + + public String getAttribute(String key) { + return null != attributes ? attributes.get(key) : null; + } +} diff --git a/qio/src/org/qcmg/qio/gff3/GFF3RecordChromosomeAndPositionComparator.java b/qio/src/org/qcmg/qio/gff3/GFF3RecordChromosomeAndPositionComparator.java new file mode 100644 index 000000000..dc4c7ca8e --- /dev/null +++ b/qio/src/org/qcmg/qio/gff3/GFF3RecordChromosomeAndPositionComparator.java @@ -0,0 +1,31 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qio.gff3; + +import java.util.Comparator; + +import org.qcmg.common.model.ReferenceNameComparator; + +public class GFF3RecordChromosomeAndPositionComparator implements Comparator { + + private static final Comparator chrComp = new ReferenceNameComparator(); + + public int compare(Gff3Record recordA, Gff3Record recordB) { + + // first compare chromosome + int chrcompare = chrComp.compare(recordA.getSeqId(), recordB.getSeqId()); + + if (chrcompare != 0) return chrcompare; + + return compareStart(recordA, recordB) + compareEnd(recordA, recordB); + } + + public int compareStart(Gff3Record recordA, Gff3Record recordB) { + return recordA.getStart() - recordB.getStart(); + } + + public int compareEnd(Gff3Record recordA, Gff3Record recordB) { + return recordA.getEnd() - recordB.getEnd(); + } +} diff --git a/qio/src/org/qcmg/qio/gff3/GFF3RecordPositionComparator.java b/qio/src/org/qcmg/qio/gff3/GFF3RecordPositionComparator.java new file mode 100644 index 000000000..84482e46f --- /dev/null +++ b/qio/src/org/qcmg/qio/gff3/GFF3RecordPositionComparator.java @@ -0,0 +1,20 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qio.gff3; + +import java.util.Comparator; + +public class GFF3RecordPositionComparator implements Comparator { + public int compare(Gff3Record recordA, Gff3Record recordB) { + return compareStart(recordA, recordB) + compareEnd(recordA, recordB); + } + + public int compareStart(Gff3Record recordA, Gff3Record recordB) { + return recordA.getStart() - recordB.getStart(); + } + + public int compareEnd(Gff3Record recordA, Gff3Record recordB) { + return recordA.getEnd() - recordB.getEnd(); + } +} diff --git a/qio/src/org/qcmg/qio/gff3/Gff3FileReader.java b/qio/src/org/qcmg/qio/gff3/Gff3FileReader.java new file mode 100644 index 000000000..5c6c368e4 --- /dev/null +++ b/qio/src/org/qcmg/qio/gff3/Gff3FileReader.java @@ -0,0 +1,26 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qio.gff3; + +import java.io.File; +import java.io.IOException; + +import org.qcmg.common.util.Constants; +import org.qcmg.qio.record.RecordReader; + +public final class Gff3FileReader extends RecordReader { + private static final String HEADER_PREFIX = Constants.HASH_STRING; + + public Gff3FileReader(File file) throws IOException { + super(file, HEADER_PREFIX); + } + + @Override + public Gff3Record getRecord(String line) throws Exception { + return new Gff3Record(line); + } +} diff --git a/qio/src/org/qcmg/qio/gff3/Gff3Record.java b/qio/src/org/qcmg/qio/gff3/Gff3Record.java new file mode 100644 index 000000000..7aa259d4c --- /dev/null +++ b/qio/src/org/qcmg/qio/gff3/Gff3Record.java @@ -0,0 +1,282 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qio.gff3; + +import org.qcmg.common.util.Constants; +import org.qcmg.common.util.TabTokenizer; + +public class Gff3Record { + + protected String seqId; + protected String source; + protected String type; + protected int start; + protected int end; + protected String score; + protected String strand; + protected String phase; + protected String attributes; + protected String rawData; + + public Gff3Record() {} + public Gff3Record(final String line) throws Exception { + String[] params = TabTokenizer.tokenize(line); + if (8 > params.length) { + throw new Exception("Bad GFF3 format. Insufficient columns: '" + line + "'"); + } + + setRawData(line); + setSeqId(params[0]); + setSource(params[1]); + setType(params[2]); + setStart(Integer.parseInt(params[3])); + setEnd(Integer.parseInt(params[4])); + setScore(params[5]); + setStrand(params[6]); + setPhase(params[7]); + if (8 < params.length) { + setAttributes(params[8]); + } + } + + /** + * Gets the value of the seqId property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSeqId() { + return seqId; + } + + /** + * Sets the value of the seqId property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSeqId(String value) { + this.seqId = value; + } + + /** + * Gets the value of the source property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSource() { + return source; + } + + /** + * Sets the value of the source property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSource(String value) { + this.source = value; + } + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + + /** + * Gets the value of the start property. + * + */ + public int getStart() { + return start; + } + + /** + * Sets the value of the start property. + * + */ + public void setStart(int value) { + this.start = value; + } + + /** + * Gets the value of the end property. + * + */ + public int getEnd() { + return end; + } + + /** + * Sets the value of the end property. + * + */ + public void setEnd(int value) { + this.end = value; + } + + /** + * Gets the value of the score property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getScore() { + return score; + } + + /** + * Sets the value of the score property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setScore(String value) { + this.score = value; + } + + /** + * Gets the value of the strand property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getStrand() { + return strand; + } + + /** + * Sets the value of the strand property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setStrand(String value) { + this.strand = value; + } + + /** + * Gets the value of the phase property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getPhase() { + return phase; + } + + /** + * Sets the value of the phase property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setPhase(String value) { + this.phase = value; + } + + /** + * Gets the value of the attributes property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getAttributes() { + return attributes; + } + + /** + * Sets the value of the attributes property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setAttributes(String value) { + this.attributes = value; + } + + /** + * Gets the value of the rawData property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getRawData() { + return rawData; + } + + /** + * Sets the value of the rawData property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setRawData(String value) { + this.rawData = value; + } + + public String toString() { + StringBuilder result = new StringBuilder(getSeqId()).append(Constants.TAB); + result.append(getSource()).append(Constants.TAB); + result.append(getType()).append(Constants.TAB); + result.append(getStart()).append(Constants.TAB); + result.append(getEnd()).append(Constants.TAB); + result.append(getScore()).append(Constants.TAB); + result.append(getStrand()).append(Constants.TAB); + result.append(getPhase()).append(Constants.TAB); + if (null != getAttributes()) { + result.append(getAttributes()); + } + return result.toString(); + } + +} From f42d6ded3a55bfdadc64ae20679839d742f0cc46 Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 4 Nov 2020 15:29:19 +1000 Subject: [PATCH 05/73] update unit test --- qio/test/org/qcmg/unused/illumina/IlluminaSerializerTest.java | 2 +- qio/test/org/qcmg/unused/simple/SimpleSerializerTest.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/qio/test/org/qcmg/unused/illumina/IlluminaSerializerTest.java b/qio/test/org/qcmg/unused/illumina/IlluminaSerializerTest.java index c3d0d7a6b..852f816ca 100644 --- a/qio/test/org/qcmg/unused/illumina/IlluminaSerializerTest.java +++ b/qio/test/org/qcmg/unused/illumina/IlluminaSerializerTest.java @@ -1,4 +1,4 @@ -package org.qcmg.illumina; +package org.qcmg.unused.illumina; import java.io.BufferedReader; import java.io.File; diff --git a/qio/test/org/qcmg/unused/simple/SimpleSerializerTest.java b/qio/test/org/qcmg/unused/simple/SimpleSerializerTest.java index 233da945d..a697b0da3 100644 --- a/qio/test/org/qcmg/unused/simple/SimpleSerializerTest.java +++ b/qio/test/org/qcmg/unused/simple/SimpleSerializerTest.java @@ -1,8 +1,8 @@ -package org.qcmg.simple; +package org.qcmg.unused.simple; import org.junit.Assert; import org.junit.Test; -import org.qcmg.record.SimpleRecord; +import org.qcmg.qmule.record.SimpleRecord; public class SimpleSerializerTest { From fc90a5e5a41b43fb41b5d2fc94d6e89681fde8b1 Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 4 Nov 2020 16:40:40 +1000 Subject: [PATCH 06/73] update qcoverage with new gff3 --- qcoverage/src/org/qcmg/coverage/Coverage.java | 5 +- .../src/org/qcmg/coverage/CoverageJob.java | 12 ++--- qcoverage/src/org/qcmg/coverage/JobQueue.java | 20 ++++---- .../org/qcmg/coverage/CoverageJobTest.java | 28 +++++----- .../test/org/qcmg/coverage/JobQueueTest.java | 6 +-- .../MultiBamPhysicalCoverageTest.java | 10 ++-- .../PerFeaturePhysicalCoverageTest.java | 10 ++-- .../PerFeatureSequenceCoverageTest.java | 8 +-- .../qcmg/coverage/PhysicalCoverageTest.java | 8 +-- .../coverage/QueryPhysicalCoverageTest.java | 10 ++-- .../coverage/QuerySequenceCoverageTest.java | 10 ++-- .../qcmg/coverage/SequenceCoverageTest.java | 8 +-- qio/src/org/qcmg/qio/fasta/FastaReader.java | 33 ++++++++++++ qio/src/org/qcmg/qio/fasta/FastaRecord.java | 51 +++++++++++++++++++ 14 files changed, 151 insertions(+), 68 deletions(-) create mode 100644 qio/src/org/qcmg/qio/fasta/FastaReader.java create mode 100644 qio/src/org/qcmg/qio/fasta/FastaRecord.java diff --git a/qcoverage/src/org/qcmg/coverage/Coverage.java b/qcoverage/src/org/qcmg/coverage/Coverage.java index 418162336..90bb07ab7 100644 --- a/qcoverage/src/org/qcmg/coverage/Coverage.java +++ b/qcoverage/src/org/qcmg/coverage/Coverage.java @@ -33,8 +33,7 @@ import org.qcmg.common.vcf.header.VcfHeader; import org.qcmg.common.vcf.header.VcfHeaderRecord; import org.qcmg.common.vcf.header.VcfHeaderUtils; -import org.qcmg.vcf.VCFFileWriter; - +import org.qcmg.qio.record.RecordWriter; public final class Coverage { private final Options options; private final Configuration invariants; @@ -116,7 +115,7 @@ private void writeVCFReport(final QCoverageStats report) throws Exception { if ( ! vcfs.isEmpty()) { Collections.sort(vcfs, new VcfPositionComparator()); - try(final VCFFileWriter writer = new VCFFileWriter(file)) { + try(final RecordWriter writer = new RecordWriter<>(file)) { final VcfHeader header = getHeaderForQCoverage(options.getBAMFileNames()[0], options.getInputGFF3FileNames()[0]); for(final VcfHeaderRecord record: header) { writer.addHeader(record.toString()+"\n"); diff --git a/qcoverage/src/org/qcmg/coverage/CoverageJob.java b/qcoverage/src/org/qcmg/coverage/CoverageJob.java index fe8b1d899..d18aab2a4 100644 --- a/qcoverage/src/org/qcmg/coverage/CoverageJob.java +++ b/qcoverage/src/org/qcmg/coverage/CoverageJob.java @@ -19,14 +19,14 @@ import org.qcmg.common.log.QLogger; import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.util.Pair; -import org.qcmg.gff3.GFF3Record; import org.qcmg.picard.SAMFileReaderFactory; import org.qcmg.qbamfilter.query.QueryExecutor; +import org.qcmg.qio.gff3.Gff3Record; class CoverageJob implements Job { private final int refLength; private final String refName; - private final HashSet features; + private final HashSet features; private int[] perBaseCoverages; // Uses 0-based coordinate indexing private final HashMap> idToCoverageToBaseCountMap = new HashMap>(); private final QLogger logger; @@ -38,12 +38,12 @@ class CoverageJob implements Job { private final ReadsNumberCounter counterOut; private boolean fullyPopulated; - CoverageJob(final String refName, final int refLength, final HashMap> refToFeaturesMap, + CoverageJob(final String refName, final int refLength, final HashMap> refToFeaturesMap, final HashSet> filePairs, final QueryExecutor filter, final boolean perFeatureFlag, final Algorithm algorithm, final ReadsNumberCounter counterIn,final ReadsNumberCounter counterOut) throws Exception { this(refName, refLength, refToFeaturesMap, filePairs, filter, perFeatureFlag, algorithm, counterIn, counterOut, null); } - CoverageJob(final String refName, final int refLength, final HashMap> refToFeaturesMap, + CoverageJob(final String refName, final int refLength, final HashMap> refToFeaturesMap, final HashSet> filePairs, final QueryExecutor filter, final boolean perFeatureFlag, final Algorithm algorithm, final ReadsNumberCounter counterIn,final ReadsNumberCounter counterOut, final String validation) throws Exception { assert (refLength > -1); @@ -103,7 +103,7 @@ void constructCoverageMap() { logger.debug("in constructCoverageMap with array length: " + perBaseCoverages.length); // For all coordinates where a feature exists, set to zero coverage - for (GFF3Record feature : features) { + for (Gff3Record feature : features) { int start = feature.getStart(); if (start == 0) @@ -179,7 +179,7 @@ private void performCoverage() throws Exception { } private void assembleResults() { - for (GFF3Record feature : features) { + for (Gff3Record feature : features) { String id = null; if (perFeatureFlag) { id = feature.getRawData(); diff --git a/qcoverage/src/org/qcmg/coverage/JobQueue.java b/qcoverage/src/org/qcmg/coverage/JobQueue.java index caf7a038a..9acefacdb 100755 --- a/qcoverage/src/org/qcmg/coverage/JobQueue.java +++ b/qcoverage/src/org/qcmg/coverage/JobQueue.java @@ -31,10 +31,10 @@ import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.util.Constants; import org.qcmg.common.util.Pair; -import org.qcmg.gff3.GFF3FileReader; -import org.qcmg.gff3.GFF3Record; import org.qcmg.picard.SAMFileReaderFactory; import org.qcmg.qbamfilter.query.QueryExecutor; +import org.qcmg.qio.gff3.Gff3FileReader; +import org.qcmg.qio.gff3.Gff3Record; public final class JobQueue { private final HashMap> perIdPerCoverageBaseCounts = new HashMap>(); @@ -43,7 +43,7 @@ public final class JobQueue { private int numberFeatures = 0; private final File gff3File; private final HashSet refNames = new HashSet(); - private final HashMap> perRefnameFeatures = new HashMap>(); + private final HashMap> perRefnameFeatures = new HashMap>(); private final HashMap perRefnameLengths = new HashMap(); private final HashMap> perLengthRefnames = new HashMap>(); private final HashSet> filePairs; @@ -150,13 +150,13 @@ private void reduceResults() throws Exception { private void loadFeatures() throws Exception, IOException { identifyRefNames(); - GFF3FileReader featureReader = new GFF3FileReader(gff3File); - for (final GFF3Record feature : featureReader) { + Gff3FileReader featureReader = new Gff3FileReader(gff3File); + for (final Gff3Record feature : featureReader) { String ref = feature.getSeqId(); if (refNames.contains(ref)) { - HashSet features = perRefnameFeatures.get(ref); + HashSet features = perRefnameFeatures.get(ref); if (null == features) { - features = new HashSet(); + features = new HashSet(); perRefnameFeatures.put(ref, features); } features.add(feature); @@ -182,8 +182,8 @@ private Collection identifyGff3RefNames() throws Exception, IOException Map gff3RefNames = new HashMap<>(); final StringBuilder gffErrors = new StringBuilder(); - try (GFF3FileReader gff3Reader = new GFF3FileReader(gff3File);) { - for (GFF3Record record : gff3Reader) { + try (Gff3FileReader gff3Reader = new Gff3FileReader(gff3File);) { + for (Gff3Record record : gff3Reader) { if (isGff3RecordValid(record)) { numberFeatures++; String refName = record.getSeqId(); @@ -232,7 +232,7 @@ private Collection identifyGff3RefNames() throws Exception, IOException * @param record * @return */ - public static boolean isGff3RecordValid(GFF3Record record) { + public static boolean isGff3RecordValid(Gff3Record record) { return null != record && record.getStart() <= record.getEnd(); } diff --git a/qcoverage/test/org/qcmg/coverage/CoverageJobTest.java b/qcoverage/test/org/qcmg/coverage/CoverageJobTest.java index d816ae84b..f97bb3810 100644 --- a/qcoverage/test/org/qcmg/coverage/CoverageJobTest.java +++ b/qcoverage/test/org/qcmg/coverage/CoverageJobTest.java @@ -8,7 +8,7 @@ import org.junit.Ignore; import org.junit.Test; import org.qcmg.common.util.Pair; -import org.qcmg.gff3.GFF3Record; +import org.qcmg.qio.gff3.Gff3Record; public class CoverageJobTest { @@ -17,10 +17,10 @@ public class CoverageJobTest { @Test public void testConstructCoverageMap() throws Exception { - HashSet features = new HashSet(); - HashMap> refToFeaturesMap = new HashMap>(); + HashSet features = new HashSet(); + HashMap> refToFeaturesMap = new HashMap>(); - GFF3Record gff = new GFF3Record(); + Gff3Record gff = new Gff3Record(); gff.setStart(100); gff.setEnd(200); @@ -54,14 +54,14 @@ else if (i < gff.getEnd()) } @Test public void testConstructCoverageMapManyFeatures() throws Exception { - HashSet features = new HashSet(); - HashMap> refToFeaturesMap = new HashMap>(); + HashSet features = new HashSet(); + HashMap> refToFeaturesMap = new HashMap>(); int gffRange = 100; for (int i = 1 ; i < 1000 ; i++) { int startPosition = i * 1000; - GFF3Record gff = new GFF3Record(); + Gff3Record gff = new Gff3Record(); gff.setStart(startPosition); gff.setEnd(startPosition + gffRange); features.add(gff); @@ -78,7 +78,7 @@ public void testConstructCoverageMapManyFeatures() throws Exception { constructCoverageMapOld(arrayOld, features); Assert.assertEquals(arraySize, arrayNew.length); - for (GFF3Record gff : features) { + for (Gff3Record gff : features) { int start = gff.getStart() -1; int end = gff.getEnd(); for (int i = start ; i < end && i < arraySize; i++) { @@ -96,14 +96,14 @@ public void testConstructCoverageMapManyFeatures() throws Exception { @Ignore public void testConstructCoverageMapPerformance() throws Exception { - HashSet features = new HashSet(); - HashMap> refToFeaturesMap = new HashMap>(); + HashSet features = new HashSet(); + HashMap> refToFeaturesMap = new HashMap>(); int gffRange = 100; for (int i = 1 ; i < 10000 ; i++) { int startPosition = i * 1000; - GFF3Record gff = new GFF3Record(); + Gff3Record gff = new Gff3Record(); gff.setStart(startPosition); gff.setEnd(startPosition + gffRange); features.add(gff); @@ -124,7 +124,7 @@ public void testConstructCoverageMapPerformance() throws Exception { System.out.println("Old: " + (System.nanoTime() - startTime)); Assert.assertEquals(arraySize, arrayNew.length); - for (GFF3Record gff : features) { + for (Gff3Record gff : features) { int start = gff.getStart() -1; int end = gff.getEnd(); for (int i = start ; i < end && i < arraySize; i++) { @@ -155,13 +155,13 @@ public void testConstructCoverageMapPerformance() throws Exception { } - private void constructCoverageMapOld(int [] perBaseCoverages, HashSet features) { + private void constructCoverageMapOld(int [] perBaseCoverages, HashSet features) { // Initially set all values to -1 for no coverage at that coordinate for (int i = 0; i < perBaseCoverages.length; i++) { perBaseCoverages[i] = -1; } // For all coordinates where a feature exists, set to zero coverage - for (GFF3Record feature : features) { + for (Gff3Record feature : features) { for (int coord = feature.getStart(); coord <= feature.getEnd(); coord++) { // GFF3 format uses 1-based feature coordinates; avoid problem // of GFF3 accidentally containing 0 coordinate diff --git a/qcoverage/test/org/qcmg/coverage/JobQueueTest.java b/qcoverage/test/org/qcmg/coverage/JobQueueTest.java index 31edca434..652f8a953 100644 --- a/qcoverage/test/org/qcmg/coverage/JobQueueTest.java +++ b/qcoverage/test/org/qcmg/coverage/JobQueueTest.java @@ -3,15 +3,15 @@ import static org.junit.Assert.assertEquals; import org.junit.Test; -import org.qcmg.gff3.GFF3Record; +import org.qcmg.qio.gff3.Gff3Record; public class JobQueueTest { @Test public void isGFF3RecordValid() { assertEquals(false, JobQueue.isGff3RecordValid(null)); - assertEquals(true, JobQueue.isGff3RecordValid(new GFF3Record())); - GFF3Record rec = new GFF3Record(); + assertEquals(true, JobQueue.isGff3RecordValid(new Gff3Record())); + Gff3Record rec = new Gff3Record(); rec.setStart(1); rec.setEnd(0); assertEquals(false, JobQueue.isGff3RecordValid(rec)); diff --git a/qcoverage/test/org/qcmg/coverage/MultiBamPhysicalCoverageTest.java b/qcoverage/test/org/qcmg/coverage/MultiBamPhysicalCoverageTest.java index d4f81fe0f..60db9fbfb 100644 --- a/qcoverage/test/org/qcmg/coverage/MultiBamPhysicalCoverageTest.java +++ b/qcoverage/test/org/qcmg/coverage/MultiBamPhysicalCoverageTest.java @@ -22,8 +22,8 @@ import org.junit.Test; import org.junit.rules.ExpectedException; import org.qcmg.common.commandline.Executor; -import org.qcmg.gff3.GFF3FileWriter; -import org.qcmg.gff3.GFF3Record; +import org.qcmg.qio.gff3.Gff3Record; +import org.qcmg.qio.record.RecordWriter; public class MultiBamPhysicalCoverageTest { @@ -33,7 +33,7 @@ public class MultiBamPhysicalCoverageTest { static String inputBai2; static Path tmpDir; private File fOutput; - static GFF3Record record; + static Gff3Record record; @Rule public ExpectedException thrown = ExpectedException.none(); @@ -48,7 +48,7 @@ public static void setup() throws IOException { SequenceCoverageTest.createCoverageBam(inputBam, SequenceCoverageTest.getAACSAMRecords(SortOrder.coordinate), SequenceCoverageTest.createSamHeaderObject(SortOrder.coordinate)); SequenceCoverageTest.createCoverageBam(inputBam2, SequenceCoverageTest.getAACSAMRecords(SortOrder.coordinate), SequenceCoverageTest.createSamHeaderObject(SortOrder.coordinate)); - record = new GFF3Record(); + record = new Gff3Record(); record.setSeqId("chr1"); record.setType("exon"); record.setScore("."); @@ -80,7 +80,7 @@ private File createGFF3File(final int start, final int end) throws IOException { record.setEnd(end); File file = new File(tmpDir + "/test" + start +"-" + end + ".gff3"); - try (GFF3FileWriter writer = new GFF3FileWriter(file)) { + try (RecordWriter writer = new RecordWriter<>(file)) { writer.add(record); } return file; diff --git a/qcoverage/test/org/qcmg/coverage/PerFeaturePhysicalCoverageTest.java b/qcoverage/test/org/qcmg/coverage/PerFeaturePhysicalCoverageTest.java index 445de83dc..760f08121 100644 --- a/qcoverage/test/org/qcmg/coverage/PerFeaturePhysicalCoverageTest.java +++ b/qcoverage/test/org/qcmg/coverage/PerFeaturePhysicalCoverageTest.java @@ -22,8 +22,8 @@ import org.junit.Test; import org.junit.rules.ExpectedException; import org.qcmg.common.commandline.Executor; -import org.qcmg.gff3.GFF3FileWriter; -import org.qcmg.gff3.GFF3Record; +import org.qcmg.qio.gff3.Gff3Record; +import org.qcmg.qio.record.RecordWriter; public class PerFeaturePhysicalCoverageTest { @@ -31,7 +31,7 @@ public class PerFeaturePhysicalCoverageTest { static String inputBai; static Path tmpDir; private File fOutput; - static GFF3Record record; + static Gff3Record record; @Rule @@ -45,7 +45,7 @@ public static void setup() throws IOException { inputBai = inputBam.replace("bam", "bai"); SequenceCoverageTest.createCoverageBam(inputBam, SequenceCoverageTest.getAACSAMRecords(SortOrder.coordinate), SequenceCoverageTest.createSamHeaderObject(SortOrder.coordinate)); - record = new GFF3Record(); + record = new Gff3Record(); record.setSeqId("chr1"); record.setType("exon"); record.setScore("."); @@ -77,7 +77,7 @@ private File createGFF3File(final int start, final int end) throws IOException { record.setEnd(end); File file = new File(tmpDir + "/test" + start +"-" + end + ".gff3"); - try (GFF3FileWriter writer = new GFF3FileWriter(file)) { + try (RecordWriter writer = new RecordWriter<>(file)) { writer.add(record); } return file; diff --git a/qcoverage/test/org/qcmg/coverage/PerFeatureSequenceCoverageTest.java b/qcoverage/test/org/qcmg/coverage/PerFeatureSequenceCoverageTest.java index d02120f98..4b4182b47 100644 --- a/qcoverage/test/org/qcmg/coverage/PerFeatureSequenceCoverageTest.java +++ b/qcoverage/test/org/qcmg/coverage/PerFeatureSequenceCoverageTest.java @@ -22,8 +22,8 @@ import org.junit.Test; import org.junit.rules.ExpectedException; import org.qcmg.common.commandline.Executor; -import org.qcmg.gff3.GFF3FileWriter; -import org.qcmg.gff3.GFF3Record; +import org.qcmg.qio.gff3.Gff3Record; +import org.qcmg.qio.record.RecordWriter; public class PerFeatureSequenceCoverageTest { static String inputBam; @@ -62,7 +62,7 @@ private String getCmd(int start, int stop) { } private File createGFF3File(final int start, final int end) throws IOException { - GFF3Record record = new GFF3Record(); + Gff3Record record = new Gff3Record(); record.setSeqId("chr1"); record.setType("exon"); record.setStart(start); @@ -72,7 +72,7 @@ private File createGFF3File(final int start, final int end) throws IOException { record.setStrand("+"); File file = new File(tmpDir + "/test" + start +"-" + end + ".gff3"); - try (GFF3FileWriter writer = new GFF3FileWriter(file)) { + try (RecordWriter writer = new RecordWriter<>(file)) { writer.add(record); } diff --git a/qcoverage/test/org/qcmg/coverage/PhysicalCoverageTest.java b/qcoverage/test/org/qcmg/coverage/PhysicalCoverageTest.java index 037c93f41..d49ba42a2 100644 --- a/qcoverage/test/org/qcmg/coverage/PhysicalCoverageTest.java +++ b/qcoverage/test/org/qcmg/coverage/PhysicalCoverageTest.java @@ -22,8 +22,8 @@ import org.junit.Test; import org.junit.rules.ExpectedException; import org.qcmg.common.commandline.Executor; -import org.qcmg.gff3.GFF3FileWriter; -import org.qcmg.gff3.GFF3Record; +import org.qcmg.qio.gff3.Gff3Record; +import org.qcmg.qio.record.RecordWriter; public class PhysicalCoverageTest { static String inputBam; @@ -62,7 +62,7 @@ private String getCmd(int start, int stop) { } private File createGFF3File(final int start, final int end) throws IOException { - GFF3Record record = new GFF3Record(); + Gff3Record record = new Gff3Record(); record.setSeqId("chr1"); record.setType("exon"); record.setStart(start); @@ -72,7 +72,7 @@ private File createGFF3File(final int start, final int end) throws IOException { record.setStrand("+"); File file = new File(tmpDir + "/test" + start +"-" + end + ".gff3"); - try (GFF3FileWriter writer = new GFF3FileWriter(file)) { + try (RecordWriter writer = new RecordWriter<>(file)) { writer.add(record); } diff --git a/qcoverage/test/org/qcmg/coverage/QueryPhysicalCoverageTest.java b/qcoverage/test/org/qcmg/coverage/QueryPhysicalCoverageTest.java index 892fe3e8b..39eb1b33f 100644 --- a/qcoverage/test/org/qcmg/coverage/QueryPhysicalCoverageTest.java +++ b/qcoverage/test/org/qcmg/coverage/QueryPhysicalCoverageTest.java @@ -21,15 +21,15 @@ import org.junit.Test; import org.junit.rules.ExpectedException; import org.qcmg.common.commandline.Executor; -import org.qcmg.gff3.GFF3FileWriter; -import org.qcmg.gff3.GFF3Record; +import org.qcmg.qio.gff3.Gff3Record; +import org.qcmg.qio.record.RecordWriter; public class QueryPhysicalCoverageTest { static String inputBam; static String inputBai; static Path tmpDir; private File fOutput; - static GFF3Record record; + static Gff3Record record; @Rule public ExpectedException thrown = ExpectedException.none(); @@ -41,7 +41,7 @@ public static void setup() throws Exception { inputBai = inputBam.replace("bam", "bai"); SequenceCoverageTest.createCoverageBam(inputBam, SequenceCoverageTest.getAACSAMRecords(SortOrder.coordinate), SequenceCoverageTest.createSamHeaderObject(SortOrder.coordinate)); - record = new GFF3Record(); + record = new Gff3Record(); record.setSeqId("chr1"); record.setType("exon"); record.setScore("."); @@ -76,7 +76,7 @@ private File createGFF3File(final int start, final int end) throws IOException { record.setEnd(end); File file = new File(tmpDir + "/test" + start +"-" + end + ".gff3"); - try (GFF3FileWriter writer = new GFF3FileWriter(file)) { + try (RecordWriter writer = new RecordWriter<>(file)) { writer.add(record); } return file; diff --git a/qcoverage/test/org/qcmg/coverage/QuerySequenceCoverageTest.java b/qcoverage/test/org/qcmg/coverage/QuerySequenceCoverageTest.java index d7cbaa6a8..15f02c88e 100644 --- a/qcoverage/test/org/qcmg/coverage/QuerySequenceCoverageTest.java +++ b/qcoverage/test/org/qcmg/coverage/QuerySequenceCoverageTest.java @@ -22,8 +22,8 @@ import org.junit.Test; import org.junit.rules.ExpectedException; import org.qcmg.common.commandline.Executor; -import org.qcmg.gff3.GFF3FileWriter; -import org.qcmg.gff3.GFF3Record; +import org.qcmg.qio.gff3.Gff3Record; +import org.qcmg.qio.record.RecordWriter; public class QuerySequenceCoverageTest { @@ -31,7 +31,7 @@ public class QuerySequenceCoverageTest { static String inputBai; static Path tmpDir; private File fOutput; - static GFF3Record record; + static Gff3Record record; @Rule public ExpectedException thrown = ExpectedException.none(); @@ -43,7 +43,7 @@ public static void setup() throws Exception { inputBai = inputBam.replace("bam", "bai"); SequenceCoverageTest.createCoverageBam(inputBam, SequenceCoverageTest.getAACSAMRecords(SortOrder.coordinate), SequenceCoverageTest.createSamHeaderObject(SortOrder.coordinate)); - record = new GFF3Record(); + record = new Gff3Record(); record.setSeqId("chr1"); record.setType("exon"); record.setScore("."); @@ -78,7 +78,7 @@ private File createGFF3File(final int start, final int end) throws IOException { record.setEnd(end); File file = new File(tmpDir + "/test" + start +"-" + end + ".gff3"); - try (GFF3FileWriter writer = new GFF3FileWriter(file)) { + try (RecordWriter writer = new RecordWriter<>(file)) { writer.add(record); } return file; diff --git a/qcoverage/test/org/qcmg/coverage/SequenceCoverageTest.java b/qcoverage/test/org/qcmg/coverage/SequenceCoverageTest.java index 6b11328a5..7097428fd 100644 --- a/qcoverage/test/org/qcmg/coverage/SequenceCoverageTest.java +++ b/qcoverage/test/org/qcmg/coverage/SequenceCoverageTest.java @@ -33,9 +33,9 @@ import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; import org.qcmg.common.commandline.Executor; -import org.qcmg.gff3.GFF3FileWriter; -import org.qcmg.gff3.GFF3Record; import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.qio.gff3.Gff3Record; +import org.qcmg.qio.record.RecordWriter; public class SequenceCoverageTest { @@ -100,7 +100,7 @@ public final void before() throws Exception { } private void createGFF3File(final int start, final int end, File file) throws IOException { - GFF3Record record = new GFF3Record(); + Gff3Record record = new Gff3Record(); record.setSeqId("chr1"); record.setType("exon"); record.setStart(start); @@ -109,7 +109,7 @@ private void createGFF3File(final int start, final int end, File file) throws IO record.setSource("."); record.setStrand("+"); - try (GFF3FileWriter writer = new GFF3FileWriter(file)) { + try (RecordWriter writer = new RecordWriter<>(file)) { writer.add(record); } } diff --git a/qio/src/org/qcmg/qio/fasta/FastaReader.java b/qio/src/org/qcmg/qio/fasta/FastaReader.java new file mode 100644 index 000000000..88d3b2cfe --- /dev/null +++ b/qio/src/org/qcmg/qio/fasta/FastaReader.java @@ -0,0 +1,33 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ + +package org.qcmg.qio.fasta; + +import java.io.File; + +import org.qcmg.qio.record.RecordReader; + +/** + * create file reader for fasta file format, each record cross two lines, id and sequence. + * @author christix + * + */ +public final class FastaReader extends RecordReader { + private static final String HEADER_PREFIX = "#"; + + public FastaReader(File file) throws Exception { + super(file, HEADER_PREFIX); + } + + @Override + /** + * it has to read two line to construct one record + */ + public FastaRecord getRecord(String line) throws Exception { + String id = line; + String seq = bin.readLine(); + + return new FastaRecord(id, seq); + } +} diff --git a/qio/src/org/qcmg/qio/fasta/FastaRecord.java b/qio/src/org/qcmg/qio/fasta/FastaRecord.java new file mode 100644 index 000000000..8908a0994 --- /dev/null +++ b/qio/src/org/qcmg/qio/fasta/FastaRecord.java @@ -0,0 +1,51 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qio.fasta; + +/** + * this data set container class for records that have an id, and some data + *

+ * eg. the .csfasta format from SOLiD sequence alignment files. + * Each record is split over two lines. The first line starts with '>' followed by the ID, + * the subsequent line contains the colour space sequence + * + * @author oholmes christina + */ +public class FastaRecord { + private static final String ID_PREFIX = ">"; + + private String id; + private String data; + + public FastaRecord(String id, String data) throws Exception { + setId(id); + setData(data); + } + + public FastaRecord() {} + + public void setId(String id) throws Exception { + //id start with < + if ( ! id.startsWith(ID_PREFIX)) { + throw new Exception("Bad id format: " + id); + } + this.id = id; + } + + public String getId() { + return id; + } + + public void setData(String data) throws Exception { + //seq should not start with < + if (data.startsWith(ID_PREFIX)) { + throw new Exception("Bad sequence format: " + data); + } + this.data = data; + } + + public String getData() { + return data; + } +} From 5a05a30aee97b48aac411eb82f6d331b2d218319 Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 4 Nov 2020 16:46:52 +1000 Subject: [PATCH 07/73] update qprofiler with new fasta record --- .../qcmg/qprofiler/fasta/FastaSummarizer.java | 13 +++++++------ .../qprofiler/fasta/FastaSummaryReport.java | 4 ++-- .../org/qcmg/qprofiler/gff/GffSummarizer.java | 17 ++++++++--------- .../qcmg/qprofiler/gff/GffSummaryReport.java | 4 ++-- .../org/qcmg/qprofiler/qual/QualSummarizer.java | 14 ++++++-------- .../qcmg/qprofiler/qual/QualSummaryReport.java | 4 ++-- .../org/qcmg/qprofiler/vcf/VcfSummarizer.java | 6 +----- .../test/org/qcmg/qprofiler/QProfilerTest.java | 6 +++--- 8 files changed, 31 insertions(+), 37 deletions(-) diff --git a/qprofiler/src/org/qcmg/qprofiler/fasta/FastaSummarizer.java b/qprofiler/src/org/qcmg/qprofiler/fasta/FastaSummarizer.java index 70274b936..9bc09bb23 100644 --- a/qprofiler/src/org/qcmg/qprofiler/fasta/FastaSummarizer.java +++ b/qprofiler/src/org/qcmg/qprofiler/fasta/FastaSummarizer.java @@ -25,11 +25,12 @@ import org.qcmg.common.log.QLevel; import org.qcmg.common.log.QLogger; import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.qio.fasta.FastaReader; +import org.qcmg.qio.fasta.FastaRecord; import org.qcmg.qprofiler.report.SummaryReport; import org.qcmg.qprofiler.summarise.Summarizer; -import org.qcmg.record.Record; -import org.qcmg.record.SimpleRecord; -import org.qcmg.simple.SimpleFileReader; + +//import org.qcmg.simple.SimpleFileReader; public class FastaSummarizer implements Summarizer { @@ -56,11 +57,11 @@ public SummaryReport summarize(String input, String index, String[] regions) thr long recordsParsed = 0; - try (SimpleFileReader reader = new SimpleFileReader(new File(input));){ - for (Record record : reader) { + try (FastaReader reader = new FastaReader(new File(input));){ + for (FastaRecord record : reader) { if (null != record) { - fastaSummaryReport.parseRecord((SimpleRecord) record); + fastaSummaryReport.parseRecord(record); recordsParsed = fastaSummaryReport.getRecordsParsed(); if (isLevelEnabled && recordsParsed % (FEEDBACK_LINES_COUNT * 2) == 0) { diff --git a/qprofiler/src/org/qcmg/qprofiler/fasta/FastaSummaryReport.java b/qprofiler/src/org/qcmg/qprofiler/fasta/FastaSummaryReport.java index 0ce52acc3..20b5d89e8 100644 --- a/qprofiler/src/org/qcmg/qprofiler/fasta/FastaSummaryReport.java +++ b/qprofiler/src/org/qcmg/qprofiler/fasta/FastaSummaryReport.java @@ -13,12 +13,12 @@ import java.util.concurrent.atomic.AtomicLong; import org.qcmg.common.model.ProfileType; +import org.qcmg.qio.fasta.FastaRecord; import org.qcmg.qprofiler.report.SummaryReport; import org.qcmg.qprofiler.util.SummaryReportUtils; import org.qcmg.qvisualise.util.SummaryByCycle; import org.qcmg.qvisualise.util.SummaryByCycleUtils; -import org.qcmg.record.SimpleRecord; import org.w3c.dom.Element; public class FastaSummaryReport extends SummaryReport { @@ -61,7 +61,7 @@ public void toXml(Element parent) { * * @return next row in file */ - public void parseRecord(SimpleRecord record) { + public void parseRecord(FastaRecord record) { if (null != record) { String data = record.getData(); diff --git a/qprofiler/src/org/qcmg/qprofiler/gff/GffSummarizer.java b/qprofiler/src/org/qcmg/qprofiler/gff/GffSummarizer.java index 614006912..9cc3c594f 100644 --- a/qprofiler/src/org/qcmg/qprofiler/gff/GffSummarizer.java +++ b/qprofiler/src/org/qcmg/qprofiler/gff/GffSummarizer.java @@ -20,24 +20,21 @@ package org.qcmg.qprofiler.gff; import java.io.File; -import java.io.IOException; - import org.qcmg.common.date.DateUtils; import org.qcmg.common.log.QLevel; import org.qcmg.common.log.QLogger; import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.gff.GFFReader; -import org.qcmg.gff.GFFRecord; +import org.qcmg.qio.gff.GffReader; +import org.qcmg.qio.gff.GffRecord; import org.qcmg.qprofiler.report.SummaryReport; import org.qcmg.qprofiler.summarise.Summarizer; -import org.qcmg.record.Record; public class GffSummarizer implements Summarizer { private static final QLogger logger = QLoggerFactory.getLogger(GffSummarizer.class); @Override - public SummaryReport summarize(String input, String index, String[] regions) throws IOException { + public SummaryReport summarize(String input, String index, String[] regions){ GffSummaryReport gffSummaryReport = new GffSummaryReport(); gffSummaryReport.setFileName(input); @@ -47,14 +44,16 @@ public SummaryReport summarize(String input, String index, String[] regions) thr final boolean isLevelEnabled = logger.isLevelEnabled(QLevel.DEBUG); - try (GFFReader reader = new GFFReader(new File(input));){ - for (Record record : reader) { - gffSummaryReport.parseRecord((GFFRecord) record); + try (GffReader reader = new GffReader(new File(input));){ + for (GffRecord record : reader) { + gffSummaryReport.parseRecord((GffRecord) record); if (isLevelEnabled && gffSummaryReport.getRecordsParsed() % FEEDBACK_LINES_COUNT == 0) { logger.debug("Records parsed: " + gffSummaryReport.getRecordsParsed()); } } + } catch (Exception e) { + logger.warn("error during reading gff file:"+input + "\n"+e.getMessage()); } gffSummaryReport.setFinishTime(DateUtils.getCurrentDateAsString()); diff --git a/qprofiler/src/org/qcmg/qprofiler/gff/GffSummaryReport.java b/qprofiler/src/org/qcmg/qprofiler/gff/GffSummaryReport.java index 1a3a18ba6..a7179e10f 100644 --- a/qprofiler/src/org/qcmg/qprofiler/gff/GffSummaryReport.java +++ b/qprofiler/src/org/qcmg/qprofiler/gff/GffSummaryReport.java @@ -9,7 +9,7 @@ package org.qcmg.qprofiler.gff; import org.qcmg.common.model.ProfileType; -import org.qcmg.gff.GFFRecord; +import org.qcmg.qio.gff.GffRecord; import org.qcmg.qprofiler.report.SummaryReport; import org.qcmg.qvisualise.util.SummaryByCycle; import org.w3c.dom.Element; @@ -34,7 +34,7 @@ public void toXml(Element parent) { * * @return next row in file */ - public void parseRecord(GFFRecord gffRecord) { + public void parseRecord(GffRecord gffRecord) { updateRecordsParsed(); // recordsParsed++; diff --git a/qprofiler/src/org/qcmg/qprofiler/qual/QualSummarizer.java b/qprofiler/src/org/qcmg/qprofiler/qual/QualSummarizer.java index 440270cb4..629e8bc82 100644 --- a/qprofiler/src/org/qcmg/qprofiler/qual/QualSummarizer.java +++ b/qprofiler/src/org/qcmg/qprofiler/qual/QualSummarizer.java @@ -25,11 +25,11 @@ import org.qcmg.common.log.QLevel; import org.qcmg.common.log.QLogger; import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.qio.fasta.FastaReader; +import org.qcmg.qio.fasta.FastaRecord; import org.qcmg.qprofiler.report.SummaryReport; import org.qcmg.qprofiler.summarise.Summarizer; -import org.qcmg.record.Record; -import org.qcmg.record.SimpleRecord; -import org.qcmg.simple.SimpleFileReader; + public class QualSummarizer implements Summarizer { @@ -44,8 +44,6 @@ public QualSummarizer(String [] excludeArray) { @Override public SummaryReport summarize(String input, String index, String[] regions) throws Exception { - - // create the SummaryReport QualSummaryReport qualSummaryReport = new QualSummaryReport(excludes); @@ -55,12 +53,12 @@ public SummaryReport summarize(String input, String index, String[] regions) thr // set logging level for printing of no of records parsed final boolean isLevelEnabled = logger.isLevelEnabled(QLevel.DEBUG); - try (SimpleFileReader reader = new SimpleFileReader(new File(input));){ - for (Record record : reader) { + try (FastaReader reader = new FastaReader(new File(input));){ + for (FastaRecord record : reader) { if (null != record) { try { - qualSummaryReport.parseRecord((SimpleRecord) record); + qualSummaryReport.parseRecord( record); } catch (Exception e) { logger.error("Exception caught in QualSummarizer, number of records parsed: " + qualSummaryReport.getRecordsParsed()); diff --git a/qprofiler/src/org/qcmg/qprofiler/qual/QualSummaryReport.java b/qprofiler/src/org/qcmg/qprofiler/qual/QualSummaryReport.java index 5f5fac206..5985777f9 100644 --- a/qprofiler/src/org/qcmg/qprofiler/qual/QualSummaryReport.java +++ b/qprofiler/src/org/qcmg/qprofiler/qual/QualSummaryReport.java @@ -14,11 +14,11 @@ import java.util.concurrent.atomic.AtomicLong; import org.qcmg.common.model.ProfileType; +import org.qcmg.qio.fasta.FastaRecord; import org.qcmg.qprofiler.report.SummaryReport; import org.qcmg.qprofiler.util.SummaryReportUtils; import org.qcmg.qvisualise.util.SummaryByCycle; import org.qcmg.qvisualise.util.SummaryByCycleUtils; -import org.qcmg.record.SimpleRecord; import org.w3c.dom.Element; public class QualSummaryReport extends SummaryReport { @@ -69,7 +69,7 @@ public void toXml(Element parent) { * @see org.qcmg.qprofiler.util.SummaryByCycleUtils#parseIntegerSummary(SummaryByCycle, String, String) * @see org.qcmg.qprofiler.util.SummaryReportUtils#tallyQualScores(String, Map, String) */ - public void parseRecord(SimpleRecord record) throws Exception{ + public void parseRecord(FastaRecord record) throws Exception{ if (null != record) { updateRecordsParsed(); diff --git a/qprofiler/src/org/qcmg/qprofiler/vcf/VcfSummarizer.java b/qprofiler/src/org/qcmg/qprofiler/vcf/VcfSummarizer.java index f8b25b05a..a38767924 100644 --- a/qprofiler/src/org/qcmg/qprofiler/vcf/VcfSummarizer.java +++ b/qprofiler/src/org/qcmg/qprofiler/vcf/VcfSummarizer.java @@ -7,12 +7,9 @@ import org.qcmg.common.log.QLogger; import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.header.VcfHeader; -import org.qcmg.common.vcf.header.VcfHeaderRecord; +import org.qcmg.vcf.VCFFileReader; import org.qcmg.qprofiler.report.SummaryReport; import org.qcmg.qprofiler.summarise.Summarizer; -import org.qcmg.vcf.VCFFileReader; -import org.qcmg.vcf.VCFFileWriter; public class VcfSummarizer implements Summarizer { private final static QLogger logger = QLoggerFactory.getLogger(VcfSummarizer.class); @@ -21,7 +18,6 @@ public class VcfSummarizer implements Summarizer { public SummaryReport summarize(String input, String index, String[] regions) throws Exception{ // set logging level for printing of no of records parsed - final boolean isLevelEnabled = logger.isLevelEnabled(QLevel.DEBUG); VcfSummaryReport vcfSummaryReport; try (VCFFileReader reader = new VCFFileReader(new File(input))) { diff --git a/qprofiler/test/org/qcmg/qprofiler/QProfilerTest.java b/qprofiler/test/org/qcmg/qprofiler/QProfilerTest.java index 9671c16fe..d8ba4adad 100644 --- a/qprofiler/test/org/qcmg/qprofiler/QProfilerTest.java +++ b/qprofiler/test/org/qcmg/qprofiler/QProfilerTest.java @@ -15,7 +15,7 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.qcmg.gff.GFFReader; +import org.qcmg.qio.gff.GffReader; public class QProfilerTest { @@ -118,9 +118,9 @@ public final void executeWithNonexistantInputFile() throws Exception { @Ignore public final void executeWithCorruptGffFile() throws Exception { - GFFReader reader = null; + GffReader reader = null; try { - reader = new GFFReader(DODGY_GFF_FILE_NAME_FILE); + reader = new GffReader(DODGY_GFF_FILE_NAME_FILE); Assert.fail("Should have thrown an Exception"); } catch (Exception e) { Assert.assertEquals("Not enough fields in the Record", e.getMessage()); From 8a490eb9859fb76934484ec42ed6097823d682f2 Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 4 Nov 2020 16:47:47 +1000 Subject: [PATCH 08/73] update qsv with new gff3 and new StringFileReader --- qsv/src/org/qcmg/qsv/QSVCluster.java | 14 ++++++------- qsv/src/org/qcmg/qsv/QSVClusterWriter.java | 23 +++++++++++----------- qsv/src/org/qcmg/qsv/blat/BLAT.java | 21 ++++++++++---------- 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/qsv/src/org/qcmg/qsv/QSVCluster.java b/qsv/src/org/qcmg/qsv/QSVCluster.java index 161c951e4..a3a1415dc 100644 --- a/qsv/src/org/qcmg/qsv/QSVCluster.java +++ b/qsv/src/org/qcmg/qsv/QSVCluster.java @@ -18,7 +18,7 @@ import org.qcmg.common.log.QLogger; import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.util.Constants; -import org.qcmg.gff3.GFF3Record; +import org.qcmg.qio.gff3.Gff3Record; import org.qcmg.qsv.blat.BLAT; import org.qcmg.qsv.discordantpair.DiscordantPairCluster; import org.qcmg.qsv.discordantpair.QPrimerCategory; @@ -1085,7 +1085,7 @@ public void checkReferenceFlank(String referenceFile, Map> gffMap) { + public void checkGFF(Map> gffMap) { String ref1 = leftReference; String ref2 = rightReference; if (getOrientationCategory().equals(QSVConstants.ORIENTATION_2)) { @@ -1094,12 +1094,12 @@ public void checkGFF(Map> gffMap) { } List features = new ArrayList<>(); if (!ref1.equals(ref2)) { - List leftRecords = gffMap.get(ref1); - List rightRecords = gffMap.get(ref2); + List leftRecords = gffMap.get(ref1); + List rightRecords = gffMap.get(ref2); features.addAll(findGFFFeatureOverlap("pos1", leftRecords, (getFinalLeftBreakpoint()))); features.addAll(findGFFFeatureOverlap("pos2", rightRecords, (getFinalRightBreakpoint()))); } else { - List records = gffMap.get(leftReference); + List records = gffMap.get(leftReference); features.addAll(findGFFFeatureOverlap("pos1", records, (getFinalLeftBreakpoint()))); features.addAll(findGFFFeatureOverlap("pos2", records, (getFinalRightBreakpoint()))); } @@ -1119,10 +1119,10 @@ public void checkGFF(Map> gffMap) { /* * Find overlap between SV breakpoint and GFF feature */ - private List findGFFFeatureOverlap(String pos, List gffRecords, int breakpoint) { + private List findGFFFeatureOverlap(String pos, List gffRecords, int breakpoint) { List features = new ArrayList<>(); if (gffRecords != null) { - for (GFF3Record r: gffRecords) { + for (Gff3Record r: gffRecords) { if (breakpoint >= r.getStart() && breakpoint <= r.getEnd()) { features.add(pos + "=" + r.getType()); } diff --git a/qsv/src/org/qcmg/qsv/QSVClusterWriter.java b/qsv/src/org/qcmg/qsv/QSVClusterWriter.java index ab15c57b2..150e35abf 100644 --- a/qsv/src/org/qcmg/qsv/QSVClusterWriter.java +++ b/qsv/src/org/qcmg/qsv/QSVClusterWriter.java @@ -10,14 +10,13 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.atomic.AtomicInteger; -import org.qcmg.gff3.GFF3FileReader; -import org.qcmg.gff3.GFF3Record; +import org.qcmg.qio.gff3.Gff3FileReader; +import org.qcmg.qio.gff3.Gff3Record; import org.qcmg.qsv.discordantpair.DiscordantPairCluster; import org.qcmg.qsv.discordantpair.PairGroup; import org.qcmg.qsv.report.DCCReport; @@ -42,7 +41,7 @@ public class QSVClusterWriter { private final int minInsertSize; private final String validationPlatform; private final List gffFiles; - private final Map> gffMap; + private final Map> gffMap; public QSVClusterWriter(QSVParameters tumor, QSVParameters normal, boolean isQCMG, String analysisId, boolean singleSided, boolean twoFileMode, int minInsertSize, String validationPlatform, List gffFiles) throws IOException { this.tumorParameters = tumor; @@ -58,19 +57,19 @@ public QSVClusterWriter(QSVParameters tumor, QSVParameters normal, boolean isQCM } - private Map> parseGFFFiles() throws IOException { - Map> gffMap = new HashMap<>(); + private Map> parseGFFFiles() throws IOException { + Map> gffMap = new HashMap<>(); for (String file: gffFiles) { - try (GFF3FileReader reader = new GFF3FileReader(new File(file));) { - - Iterator it = reader.getRecordIterator(); - while (it.hasNext()) { - GFF3Record g3 = it.next(); + try (Gff3FileReader reader = new Gff3FileReader(new File(file));) { + for(Gff3Record rec: reader) { +// Iterator it = reader.getRecordIterator(); +// while (it.hasNext()) { + Gff3Record g3 = rec; if (gffMap.containsKey(g3.getSeqId())) { gffMap.get(g3.getSeqId()).add(g3); } else { - List list = new ArrayList<>(); + List list = new ArrayList<>(); list.add(g3); gffMap.put(g3.getSeqId(), list); } diff --git a/qsv/src/org/qcmg/qsv/blat/BLAT.java b/qsv/src/org/qcmg/qsv/blat/BLAT.java index 3bcc44c12..b6b1811c7 100644 --- a/qsv/src/org/qcmg/qsv/blat/BLAT.java +++ b/qsv/src/org/qcmg/qsv/blat/BLAT.java @@ -22,11 +22,10 @@ import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.model.BLATRecord; import org.qcmg.common.util.TabTokenizer; +import org.qcmg.qio.record.StringFileReader; import org.qcmg.qsv.QSVException; import org.qcmg.qsv.QSVParameters; import org.qcmg.qsv.util.QSVUtil; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; /** * Class to launch BLAT @@ -89,9 +88,9 @@ public Map parseResults(String blatOutputFile) throws IOExce Map records = new HashMap<>(); File blatOutput = new File(blatOutputFile); - try (TabbedFileReader reader = new TabbedFileReader(blatOutput);) { - for (TabbedRecord tab: reader) { - BLATRecord record = new BLATRecord(TabTokenizer.tokenize(tab.getData())); + try (StringFileReader reader = new StringFileReader(blatOutput);) { + for (String tab: reader) { + BLATRecord record = new BLATRecord(TabTokenizer.tokenize(tab)); if (record.isValid()) { BLATRecord previous = records.get(record.getName()); if (null == previous || record.getScore() > previous.getScore()) { @@ -176,10 +175,10 @@ public List alignConsensus(String softclipDir, String name, String c List records = new ArrayList<>(); File out = new File(outFile); - try (TabbedFileReader reader = new TabbedFileReader(out);) { + try (StringFileReader reader = new StringFileReader(out);) { - for (TabbedRecord tab: reader) { - BLATRecord record = new BLATRecord(TabTokenizer.tokenize(tab.getData())); + for (String tab: reader) { + BLATRecord record = new BLATRecord(TabTokenizer.tokenize(tab)); if (record.isValid()) { if (leftReference != null && rightReference != null) { if (record.getReference().equals(leftReference) || record.getReference().equals(rightReference)) { @@ -207,10 +206,10 @@ public List getBlatResults(String blatFile, String leftReference, St List records = new ArrayList<>(); - try (TabbedFileReader reader = new TabbedFileReader(outFile);) { + try (StringFileReader reader = new StringFileReader(outFile);) { - for (TabbedRecord tab: reader) { - BLATRecord record = new BLATRecord(TabTokenizer.tokenize(tab.getData())); + for (String tab: reader) { + BLATRecord record = new BLATRecord(TabTokenizer.tokenize(tab)); if (record.isValid() && record.getName().equals(name)) { if (leftReference != null && rightReference != null) { From 75a09cb1c19c9ee34c2f976bc0cc9ad8947b9fb4 Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 4 Nov 2020 17:00:07 +1000 Subject: [PATCH 09/73] create new illumina record, update qsignature with new StringFileReader, new illumina, new RecordWriter --- .../qcmg/qio/illumina/IlluminaFileReader.java | 57 +++ .../org/qcmg/qio/illumina/IlluminaRecord.java | 261 +++++++++++++ qmaftools/src/org/qcmg/maf/DccToMaf.java | 343 ++---------------- qmaftools/src/org/qcmg/maf/MafAddCPG.java | 78 ++-- qmaftools/src/org/qcmg/maf/MafAddGffBait.java | 72 ++-- qmaftools/src/org/qcmg/maf/MafAddStuff.java | 77 ++-- qmaftools/src/org/qcmg/maf/MafFilter.java | 79 ++-- .../src/org/qcmg/maf/MafFinalFilter.java | 45 +-- qmaftools/src/org/qcmg/maf/MafPipeline.java | 68 +--- .../src/org/qcmg/maf/MafPipelineNew.java | 38 +- qmaftools/src/org/qcmg/maf/util/MafUtils.java | 104 +++--- .../test/org/qcmg/maf/MafPipelineNewTest.java | 4 +- .../org/qcmg/maf/util/MafFilterUtilsTest.java | 20 +- .../test/org/qcmg/maf/util/MafUtilsTest.java | 86 ++--- .../src/org/qcmg/sig/CompareIlluminaData.java | 22 +- qsignature/src/org/qcmg/sig/QSigCompare.java | 26 +- .../src/org/qcmg/sig/QSigCompareDistance.java | 20 +- .../src/org/qcmg/sig/SignatureGenerator.java | 30 +- .../qcmg/sig/SignatureGeneratorBespoke.java | 19 +- .../src/org/qcmg/sig/SnpFileDetails.java | 13 +- .../src/org/qcmg/sig/util/SignatureUtil.java | 39 +- .../sig/SignatureGeneratorBespokeTest.java | 6 +- .../org/qcmg/sig/SignatureGeneratorTest.java | 56 +-- .../org/qcmg/sig/util/SignatureUtilTest.java | 39 +- 24 files changed, 682 insertions(+), 920 deletions(-) create mode 100644 qio/src/org/qcmg/qio/illumina/IlluminaFileReader.java create mode 100644 qio/src/org/qcmg/qio/illumina/IlluminaRecord.java diff --git a/qio/src/org/qcmg/qio/illumina/IlluminaFileReader.java b/qio/src/org/qcmg/qio/illumina/IlluminaFileReader.java new file mode 100644 index 000000000..ed4157be3 --- /dev/null +++ b/qio/src/org/qcmg/qio/illumina/IlluminaFileReader.java @@ -0,0 +1,57 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qio.illumina; + + +import java.io.File; +import java.io.IOException; + +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.qio.record.RecordReader; + +public final class IlluminaFileReader extends RecordReader { + public static final String HEADER_LINE = "[Header]"; + public static final String DATA_LINE = "[Data]"; + + public IlluminaFileReader(File file) throws IOException { + super(file, DEFAULT_BUFFER_SIZE, HEADER_LINE, DEFAULT_CHARSET); + } + + @Override + public String readHeader(CharSequence headerPrefix ) throws IOException{ + String nextLine = bin.readLine(); + + //empty file + if( nextLine == null ) return null; + + //check the first header line + if(headerPrefix == null || !nextLine.startsWith(headerPrefix+"") ) return nextLine; + + //reader header, hence file pointer to first line after header + while (null != nextLine && !nextLine.startsWith(DATA_LINE) ) { + headerLines.add(nextLine); + //reset current read line + nextLine = bin.readLine(); + } + + //add [Data] into header + headerLines.add(nextLine); + // next line is still header.... + headerLines.add(bin.readLine()); + + nextLine = bin.readLine(); + return nextLine; + } + + @Override + public IlluminaRecord getRecord(String line) throws Exception { + String[] dataArray = TabTokenizer.tokenize(line); + + // raw Illumina data has 32 fields... and the first one is an integer + if (dataArray.length != 32) throw new Exception("Bad Illumina data format - expecting 32 fields but saw " + dataArray.length); + + return new IlluminaRecord( dataArray ); + } + +} diff --git a/qio/src/org/qcmg/qio/illumina/IlluminaRecord.java b/qio/src/org/qcmg/qio/illumina/IlluminaRecord.java new file mode 100644 index 000000000..05b1312c3 --- /dev/null +++ b/qio/src/org/qcmg/qio/illumina/IlluminaRecord.java @@ -0,0 +1,261 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qio.illumina; + +import org.qcmg.common.string.StringUtils; + +public class IlluminaRecord { + + private String chr; + private int start; + private final String strand; + private String snpId; + + //TODO do we need this field? + private float GCScore; + + private char firstAllele; + private char secondAllele; + + private final char firstAlleleForward; + private final char secondAlleleForward; + + private final char firstAlleleCall; + private final char secondAlleleCall; + + //TODO do we need this field? + private boolean hom; + private boolean isSnp; + private String snp; + + private final float logRRatio; + private final float bAlleleFreq; + + private final int rawX; + private final int rawY; + + /** + * Constructor that takes in a String array, retrieving pertinent fields from the array to populate the record + * + * @param rawIlluminaData String[] representing a line in the raw Illumina data file + * @throws Exception + */ + public IlluminaRecord(String [] rawIlluminaData) { + // chromosome and position defined in the raw Illumina data file relate to an old version + // of the genome (hg18), so instead, we use the dbSNP id to get the more recent + //(hg19) chromosome and position details from the dbSNP file at a later date + int length = rawIlluminaData.length; + snpId = rawIlluminaData[0]; + GCScore = Float.parseFloat(rawIlluminaData[4]); + firstAlleleForward = rawIlluminaData[10].charAt(0); + secondAlleleForward = rawIlluminaData[11].charAt(0); + firstAllele = rawIlluminaData[12].charAt(0); + secondAllele = rawIlluminaData[13].charAt(0); + setHom(rawIlluminaData[14].equals(rawIlluminaData[15])); + chr = rawIlluminaData[16]; + start = Integer.parseInt(rawIlluminaData[17]); + snp = rawIlluminaData[20]; + rawX = Integer.parseInt(rawIlluminaData[length - 4]); + rawY = Integer.parseInt(rawIlluminaData[length - 3]); + bAlleleFreq = Float.parseFloat(rawIlluminaData[length - 2]); + String logRRatioString = rawIlluminaData[length - 1]; + if (StringUtils.isNullOrEmpty(logRRatioString)) + logRRatioString = "NaN"; + logRRatio = Float.parseFloat(logRRatioString); + firstAlleleCall = rawIlluminaData[14].charAt(0); + secondAlleleCall = rawIlluminaData[15].charAt(0); + strand = rawIlluminaData[22]; // use customer strand rather than illumina strand + } + + + public String getChr() { + return chr; + } + public void setChr(String chr) { + this.chr = chr; + } + public int getStart() { + return start; + } + public void setStart(int start) { + this.start = start; + } + public String getSnpId() { + return snpId; + } + public void setSnpId(String snpId) { + this.snpId = snpId; + } + public float getGCScore() { + return GCScore; + } + public void setGCScore(float GCScore) { + this.GCScore = GCScore; + } + public char getFirstAllele() { + return firstAllele; + } + public void setFirstAllele(char firstAllele) { + this.firstAllele = firstAllele; + } + public char getSecondAllele() { + return secondAllele; + } + public void setSecondAllele(char secondAllele) { + this.secondAllele = secondAllele; + } + public String getSnp() { + return snp; + } + public void setSnp(String snp) { + this.snp = snp; + } + public void setHom(boolean hom) { + this.hom = hom; + } + public boolean isHom() { + return hom; + } + + public void setSnp(boolean isSnp) { + this.isSnp = isSnp; + } + + public boolean isSnp() { + return isSnp; + } + + public float getLogRRatio() { + return logRRatio; + } + + + public float getbAlleleFreq() { + return bAlleleFreq; + } + + + public char getFirstAlleleCall() { + return firstAlleleCall; + } + + + public char getSecondAlleleCall() { + return secondAlleleCall; + } + + public int getRawX() { + return rawX; + } + + public int getRawY() { + return rawY; + } + + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + Float.floatToIntBits(GCScore); + result = prime * result + Float.floatToIntBits(bAlleleFreq); + result = prime * result + ((chr == null) ? 0 : chr.hashCode()); + result = prime * result + firstAllele; + result = prime * result + firstAlleleCall; + result = prime * result + (hom ? 1231 : 1237); + result = prime * result + (isSnp ? 1231 : 1237); + result = prime * result + Float.floatToIntBits(logRRatio); + result = prime * result + rawX; + result = prime * result + rawY; + result = prime * result + secondAllele; + result = prime * result + secondAlleleCall; + result = prime * result + ((snp == null) ? 0 : snp.hashCode()); + result = prime * result + ((snpId == null) ? 0 : snpId.hashCode()); + result = prime * result + start; + return result; + } + + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + IlluminaRecord other = (IlluminaRecord) obj; + if (Float.floatToIntBits(GCScore) != Float + .floatToIntBits(other.GCScore)) + return false; + if (Float.floatToIntBits(bAlleleFreq) != Float + .floatToIntBits(other.bAlleleFreq)) + return false; + if (chr == null) { + if (other.chr != null) + return false; + } else if (!chr.equals(other.chr)) + return false; + if (firstAllele != other.firstAllele) + return false; + if (firstAlleleCall != other.firstAlleleCall) + return false; + if (hom != other.hom) + return false; + if (isSnp != other.isSnp) + return false; + if (Float.floatToIntBits(logRRatio) != Float + .floatToIntBits(other.logRRatio)) + return false; + if (rawX != other.rawX) + return false; + if (rawY != other.rawY) + return false; + if (secondAllele != other.secondAllele) + return false; + if (secondAlleleCall != other.secondAlleleCall) + return false; + if (snp == null) { + if (other.snp != null) + return false; + } else if (!snp.equals(other.snp)) + return false; + if (snpId == null) { + if (other.snpId != null) + return false; + } else if (!snpId.equals(other.snpId)) + return false; + if (start != other.start) + return false; + return true; + } + + + @Override + public String toString() { + return "IlluminaRecord [GCScore=" + GCScore + ", bAlleleFreq=" + + bAlleleFreq + ", chr=" + chr + ", firstAllele=" + firstAllele + + ", firstAlleleCall=" + firstAlleleCall + ", hom=" + hom + + ", isSnp=" + isSnp + ", logRRatio=" + logRRatio + ", rawX=" + + rawX + ", rawY=" + rawY + ", secondAllele=" + secondAllele + + ", secondAlleleCall=" + secondAlleleCall + ", snp=" + snp + + ", snpId=" + snpId + ", start=" + start + "]"; + } + + + public String getStrand() { + return strand; + } + + + public char getFirstAlleleForward() { + return firstAlleleForward; + } + + public char getSecondAlleleForward() { + return secondAlleleForward; + } + + +} diff --git a/qmaftools/src/org/qcmg/maf/DccToMaf.java b/qmaftools/src/org/qcmg/maf/DccToMaf.java index d1db77c56..1ec50415b 100644 --- a/qmaftools/src/org/qcmg/maf/DccToMaf.java +++ b/qmaftools/src/org/qcmg/maf/DccToMaf.java @@ -20,9 +20,7 @@ import org.qcmg.common.model.TorrentVerificationStatus; import org.qcmg.common.util.FileUtils; import org.qcmg.maf.util.MafUtils; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.record.StringFileReader; public class DccToMaf { @@ -94,335 +92,46 @@ public int engage() throws Exception { return exitStatus; } - private void getPatientId(String fileName) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(fileName)); - TabbedHeader header = reader.getHeader(); - - try { - for (String headerLine : header) { - if (headerLine.startsWith("#PatientID")) + private void getPatientId(String fileName) throws Exception { + try(StringFileReader reader = new StringFileReader(new File(fileName));) { + for(String headerLine: reader.getHeader()) { + if (headerLine.startsWith("#PatientID")) { patientId = headerLine.substring(headerLine.indexOf(':') +2); + } } - } finally { - reader.close(); - } + } } private void loadFile(String fileName) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(fileName)); - TabbedHeader header = reader.getHeader(); - - // should be able to glean some useful info from the header -// String patientId = null; - String controlSampleID = null; - String tumourSampleID = null; - String tool = null; -// DccType type = null; - - for (String headerLine : header) { - if (headerLine.startsWith("#PatientID")) - patientId = headerLine.substring(headerLine.indexOf(':') +2); - if (headerLine.startsWith("#ControlSampleID")) - controlSampleID = headerLine.substring(headerLine.indexOf(':') +2); - if (headerLine.startsWith("#TumourSampleID")) - tumourSampleID = headerLine.substring(headerLine.indexOf(':') +2); - if (headerLine.startsWith("#Tool")) { - tool = headerLine.substring(headerLine.indexOf(':') +2); -// type = headerLine.endsWith("SNP") ? DccType.SNP : (headerLine.endsWith("small_indel_tool") ? DccType.INSERTION : null); + try (StringFileReader reader = new StringFileReader(new File(fileName));) { + String controlSampleID = null; + String tumourSampleID = null; + String tool = null; + + for (String headerLine : reader.getHeader()) { + if (headerLine.startsWith("#PatientID")) + patientId = headerLine.substring(headerLine.indexOf(':') +2); + if (headerLine.startsWith("#ControlSampleID")) + controlSampleID = headerLine.substring(headerLine.indexOf(':') +2); + if (headerLine.startsWith("#TumourSampleID")) + tumourSampleID = headerLine.substring(headerLine.indexOf(':') +2); + if (headerLine.startsWith("#Tool")) { + tool = headerLine.substring(headerLine.indexOf(':') +2); + } } - } - logger.info("patient: " + patientId + ", controlSampleID: " + controlSampleID + ", tumourSampleID: " + tumourSampleID + ", tool: " + tool); + logger.info("patient: " + patientId + ", controlSampleID: " + controlSampleID + ", tumourSampleID: " + tumourSampleID + ", tool: " + tool); - Map patientSpecificVerification = verifiedData.get(patientId); - - try { + Map patientSpecificVerification = verifiedData.get(patientId); int count = 0; - for (TabbedRecord rec : reader) { + for (String rec : reader) { if (++count ==1) continue; // header line MafUtils.convertDccToMaf(rec, patientId, controlSampleID, tumourSampleID, patientSpecificVerification, mafs, ensemblToEntrez); -// convertDccToMaf(rec, patientId, controlSampleID, tumourSampleID, type); } logger.info("ignored " + ignoredCount + " dcc records"); - } finally { - reader.close(); - } + } } -// private void convertDccToMaf(TabbedRecord tabbedRecord, String patientId, String controlSampleID, String tumourSampleID, DccType type) { -//// String[] params = tabbedPattern.split(tabbedRecord.getData(), -1); -// String[] params = TabTokenizer.tokenize(tabbedRecord.getData()); -// -// // if we have 2 entries (pipe delimited) in the Hugo symbol field - create 2 maf records - 1 per gene -//// String [] genes = getHugoSymbol(params[30]).split("\\|"); -// String [] genes = MafUtils.getHugoSymbol(params[30]).split("\\|"); -// String [] geneIds = params[26].split("\\|"); -// String [] transcriptIds = params[27].split("\\|"); -// -// // check if position verified -// String chromosome = params[2]; -// int startPosition = Integer.parseInt(params[3]); -// int endPosition = Integer.parseInt(params[4]); -// TorrentVerificationStatus tvs = null; -// if (null != verifiedData.get(patientId)) -// tvs = verifiedData.get(patientId).get(new ChrPosition("chr"+chromosome, startPosition, endPosition)); -//// Boolean verified = verifiedData.get(new ChrPosition("chr"+chromosome, startPosition, endPosition)); -// // if it didn't verify - ignore!!! -//// if (null != tvs && tvs.removeFromMaf()) { -//// logger.info("position did not verify: " + chromosome + ":" + startPosition + "-" + endPosition + ", ignoring"); -//// ignoredCount++; -//// return; -//// } -// -// // setup maf record with static and common fields -// MAFRecord maf = new MAFRecord(); -// MafUtils.setupStaticMafFields(maf, patientId, controlSampleID, tumourSampleID); -// -// // use M rather than MT -// String chr = chromosome.substring(chromosome.indexOf("chr") + 1); -// if ("MT".equals(chr)) chr = "M"; -// -// maf.setChromosome(chr); -// maf.setStartPosition(startPosition); -// maf.setEndPosition(endPosition); -// maf.setStrand(Integer.parseInt(params[5]) == 1 ? '+' : '-'); // set this according to 1 or 0 -// maf.setRef(params[8]); -// maf.setTumourAllele1(params[10].substring(0, params[10].indexOf('/'))); -// maf.setTumourAllele2(params[10].substring(params[10].indexOf('/')+1)); -//// maf.setDbSnpId(getDbSnpId(params[18])); -// maf.setDbSnpId(MafUtils.getDbSnpId(params[18])); -// -// //FIXME - take this out once test for Karin is complete -//// maf.setValidationStatus((null != tvs && tvs.verified()) ? "Valid" : "Unknown"); -// if (null != tvs) { -// maf.setValidationStatus(tvs.getMafDisplayName()); -// -//// if (tvs.verified()) { -//// maf.setValidationStatus("Valid"); -//// } else { -//// if (TorrentVerificationStatus.COVERAGE.equals(tvs)) -//// maf.setValidationStatus("Coverage"); -//// else -//// maf.setValidationStatus("False"); -//// } -// } else { -// maf.setValidationStatus("Unknown"); -// } -// -// -// -// // qcmg specific -// maf.setFlag(params[36]); // QCMGFlag field -// maf.setNd(params[20]); // ND field -// maf.setTd(params[21]); // TD field -// -// // normal doesn't always exist for somatic... -// if ("--".equals(params[9]) || "-/-".equals(params[9])) { -// maf.setNormalAllele1("-"); -// maf.setNormalAllele2("-"); -// } else { -// maf.setNormalAllele1(params[9].substring(0, params[9].indexOf('/'))); -// maf.setNormalAllele2(params[9].substring(params[9].indexOf('/')+1)); -// } -// -// if (DccType.SNP == type) { -// maf.setVariantType("SNP"); -// } else if (DccType.INDEL == type){ -// maf.setVariantType(Integer.parseInt(params[1]) == 2 ? "INS" : (Integer.parseInt(params[1]) == 3 ? "DEL" : "???")); -// } -// -// if (canonicalMafMode) { -// canonicalTranscript(type, params, genes, geneIds, transcriptIds, maf); -// } else { -// worstCaseTranscript(type, params, genes, geneIds, transcriptIds, maf); -// } -// -// -// // need to check that there is a valid gene set on the Maf object -// // if not - don't add to collection -// -// if (null != maf.getHugoSymbol()) -// mafs.add(maf); -// } - -// private void canonicalTranscript(DccType type, String[] params, -// String[] genes, String[] geneIds, String[] transcriptIds, -// MAFRecord maf) { -// int i = 0, allTranscriptIdCount = 0; -// for (String gene : genes) { -// String[] geneSpecificTranscriptIds = transcriptIds[i].split(","); -// String geneId = geneIds[i++]; -// -// // get canonical transcript id -//// String canonicalTranscripId = getCanonicalTranscript(geneId); -// String canonicalTranscripId = ensemblGeneToCanonicalTranscript.get(geneId); -// maf.addCanonicalTranscriptId(canonicalTranscripId); -// if (null != canonicalTranscripId) { -// int positionInTranscripts = StringUtils.getPositionOfStringInArray(geneSpecificTranscriptIds, canonicalTranscripId, true); -// String [] consequences = params[22].split(","); -// String [] aaChanges = params[23].split(","); -// String [] baseChanges = params[24].split(","); -// -// //TODO what to do if canonical transcript id is not found!! -// -// if (positionInTranscripts > -1) { -// // we have a matching canonical transcript -// positionInTranscripts += allTranscriptIdCount; -// -// if (consequences.length > positionInTranscripts) { -// String dccConseq = DccConsequence.getMafName(consequences[positionInTranscripts], type, Integer.parseInt(params[1])); -// -// if ( ! DccConsequence.passesMafNameFilter(dccConseq)) { -// continue; -// } -// -// maf.addVariantClassification(dccConseq); -// maf.addCanonicalAAChange(aaChanges[positionInTranscripts]); -// maf.addCanonicalBaseChange(baseChanges[positionInTranscripts]); -// } else { -// logger.info("consequences.length is <= positionInTranscripts"); -// } -// } else { -// missingCanonicalTransId++; -// logger.debug("canonical transcript id not found in transcript id array"); -// -// // don't want to record this gene -// continue; -// } -// allTranscriptIdCount += geneSpecificTranscriptIds.length; -// -// // set the alternate transcriptId field to be all the other transcripts -// int position = 0; -// for (String transId : geneSpecificTranscriptIds) { -// if ( ! canonicalTranscripId.equalsIgnoreCase(transId)) { -// maf.setAlternateTranscriptId(StringUtils.isNullOrEmpty(maf.getAlternateTranscriptId()) -// ? transId : maf.getAlternateTranscriptId() + (position == 0 ? ";" : ", ") + transId); -// // also alternate aa change & base change -// maf.setAlternateAAChange(StringUtils.isNullOrEmpty(maf.getAlternateAAChange()) -// ? aaChanges[position] : maf.getAlternateAAChange() + (position == 0 ? ";" : ", ") + aaChanges[position]); -// maf.setAlternateBaseChange(StringUtils.isNullOrEmpty(maf.getAlternateBaseChange()) -// ? baseChanges[position] : maf.getAlternateBaseChange() + (position == 0 ? ";" : ", ") + baseChanges[position]); -// } -// position++; -// } -// -// } else { -// // still want to keep the transcript count up to date -// allTranscriptIdCount += geneSpecificTranscriptIds.length; -// maf.addVariantClassification(DccConsequence.getMafName(params[22], type, Integer.parseInt(params[1]))); -// } -// -//// maf.addEntrezGeneId(getEntrezId(geneId)); -// maf.addEntrezGeneId(MafUtils.getEntrezId(geneId, ensemblToEntrez)); -//// maf.addHugoSymbol("Unknown".equals(gene) ? getHugoSymbol(geneId) : gene); -// maf.addHugoSymbol("Unknown".equals(gene) ? MafUtils.getHugoSymbol(geneId) : gene); -// } -// } - -// private void worstCaseTranscript(DccType type, String[] params, String[] genes, String[] geneIds, String[] transcriptIds, MAFRecord maf) { -// int i = 0, allTranscriptIdCount = 0; -// for (String gene : genes) { -// String[] geneSpecificTranscriptIds = transcriptIds[i].split(","); -// String geneId = geneIds[i++]; -// -// -// String [] allTranscripts = params[27].split("[,|]"); -// // need start and stop positions of transcripts belonging to this gene so that the relevant consequences can be retrieved -// int startPosition = StringUtils.getPositionOfStringInArray(allTranscripts, geneSpecificTranscriptIds[0], true); -// int endPosition = StringUtils.getPositionOfStringInArray(allTranscripts, geneSpecificTranscriptIds[geneSpecificTranscriptIds.length -1], true); -// -// String [] consequences = params[22].split(","); -// -// String [] geneConsequences = new String[1 + endPosition-startPosition]; -// for (int j = startPosition , k = 0; j <= endPosition ; j++, k++) { -// geneConsequences[k] = consequences[j]; -// } -// -// -// String worstCaseConsequence = DccConsequence.getWorstCaseConsequence(type, geneConsequences); -// String dccConseq = DccConsequence.getMafName(worstCaseConsequence, type, Integer.parseInt(params[1])); -// if ( ! DccConsequence.passesMafNameFilter(dccConseq)) { -// continue; -// } -// -// maf.addVariantClassification(dccConseq); -// -// -// int currentPosition = 0; -// for (String c : geneConsequences) { -// if (c.equals(worstCaseConsequence)) break; -// currentPosition++; -// } -// -// String [] aaChanges = params[23].split(","); -// String [] baseChanges = params[24].split(","); -// String worstCaseTranscriptId = geneSpecificTranscriptIds[currentPosition]; -// maf.addCanonicalTranscriptId(worstCaseTranscriptId); -// maf.addCanonicalAAChange(aaChanges[currentPosition]); -// maf.addCanonicalBaseChange(baseChanges[currentPosition]); -// -// -// // get position of worstCaseConsequence in -// -// // get canonical transcript id -//// String canonicalTranscripId = getCanonicalTranscript(geneId); -//// if (null != worstCaseTranscriptId) { -////// int positionInTranscripts = StringUtils.getPositionOfStringInArray(geneSpecificTranscriptIds, worstCaseTranscriptId, true); -//// -//// //TODO what to do if canonical transcript id is not found!! -//// -//// if (currentPosition > -1) { -//// // we have a matching canonical transcript -//// positionInTranscripts += allTranscriptIdCount; -//// -//// if (consequences.length > positionInTranscripts) { -//// String dccConseq = DccConsequence.getMafName(worstCaseConsequence, type, Integer.parseInt(params[1])); -//// -//// if ( ! DccConsequence.passesMafNameFilter(dccConseq)) { -//// continue; -//// } -//// -//// maf.addVariantClassification(dccConseq); -//// } else { -//// logger.info("consequences.length is <= positionInTranscripts"); -//// } -//// } else { -//// missingCanonicalTransId++; -//// logger.debug("canonical transcript id not found in transcript id array"); -//// -//// // don't want to record this gene -//// continue; -//// } -//// allTranscriptIdCount += geneSpecificTranscriptIds.length; -//// -//// // set the alternate transcriptId field to be all the other transcripts -//// int position = 0; -//// for (String transId : geneSpecificTranscriptIds) { -//// if ( ! canonicalTranscripId.equalsIgnoreCase(transId)) { -//// maf.setAlternateTranscriptId(StringUtils.isNullOrEmpty(maf.getAlternateTranscriptId()) -//// ? transId : maf.getAlternateTranscriptId() + (position == 0 ? ";" : ", ") + transId); -//// // also alternate aa change & base change -//// maf.setAlternateAAChange(StringUtils.isNullOrEmpty(maf.getAlternateAAChange()) -//// ? aaChanges[position] : maf.getAlternateAAChange() + (position == 0 ? ";" : ", ") + aaChanges[position]); -//// maf.setAlternateBaseChange(StringUtils.isNullOrEmpty(maf.getAlternateBaseChange()) -//// ? baseChanges[position] : maf.getAlternateBaseChange() + (position == 0 ? ";" : ", ") + baseChanges[position]); -//// } -//// position++; -//// } -// -//// } else { -//// // still want to keep the transcript count up to date -//// allTranscriptIdCount += geneSpecificTranscriptIds.length; -//// maf.addVariantClassification(DccConsequence.getMafName(params[22], type, Integer.parseInt(params[1]))); -//// } -// -//// maf.addEntrezGeneId(getEntrezId(geneId)); -// maf.addEntrezGeneId(MafUtils.getEntrezId(geneId, ensemblToEntrez)); -//// maf.addHugoSymbol("Unknown".equals(gene) ? getHugoSymbol(geneId) : gene); -// maf.addHugoSymbol("Unknown".equals(gene) ? MafUtils.getHugoSymbol(geneId) : gene); -// } -// } - public static void main(String[] args) throws Exception { DccToMaf sp = new DccToMaf(); int exitStatus = 0; diff --git a/qmaftools/src/org/qcmg/maf/MafAddCPG.java b/qmaftools/src/org/qcmg/maf/MafAddCPG.java index 2d377e55d..6704bed60 100644 --- a/qmaftools/src/org/qcmg/maf/MafAddCPG.java +++ b/qmaftools/src/org/qcmg/maf/MafAddCPG.java @@ -9,6 +9,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileWriter; +import java.io.IOException; import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -24,9 +25,7 @@ import org.qcmg.common.model.ChrPosition; import org.qcmg.common.model.ChrRangePosition; import org.qcmg.common.util.FileUtils; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.record.StringFileReader; public class MafAddCPG { @@ -62,30 +61,30 @@ public int engage() throws Exception { return exitStatus; } - private void populatePositionsOfInterest(String refFile) throws FileNotFoundException { - IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(new File(refFile)); + private void populatePositionsOfInterest(String refFile) throws IOException { + try(IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(new File(refFile));){ - for (ChrPosition cp : positionsOfInterestSet) { - String chr = "chr" + cp.getChromosome(); - if ("chrM".equals(chr)) chr = "chrMT"; - - ReferenceSequence seq = fasta.getSubsequenceAt(chr, cp.getStartPosition(), cp.getEndPosition()); - positionsOfInterestMap.put(cp, new String(seq.getBases())); + for (ChrPosition cp : positionsOfInterestSet) { + String chr = "chr" + cp.getChromosome(); + if ("chrM".equals(chr)) chr = "chrMT"; + + ReferenceSequence seq = fasta.getSubsequenceAt(chr, cp.getStartPosition(), cp.getEndPosition()); + positionsOfInterestMap.put(cp, new String(seq.getBases())); + } + logger.info("no of entries in map: " + positionsOfInterestMap.size()); } - logger.info("no of entries in map: " + positionsOfInterestMap.size()); - } private void loadPositionsOfInterest(String mafFile) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(mafFile)); - try { + + try(StringFileReader reader = new StringFileReader(new File(mafFile));) { int count = 0; - for (TabbedRecord rec : reader) { + for (String rec : reader) { if (count++ == 0) continue; // first line is header - String[] params = tabbedPattern.split(rec.getData(), -1); + String[] params = tabbedPattern.split(rec, -1); String chr = params[4]; int startPos = Integer.parseInt(params[5]); int endPos = Integer.parseInt(params[6]); @@ -95,43 +94,33 @@ private void loadPositionsOfInterest(String mafFile) throws Exception { } logger.info("for file: " + mafFile + " no of records: " + count + ", no of entries in chrpos set: " + positionsOfInterestSet.size()); - } finally { - reader.close(); - } + } } private void writeMafOutput(String inputMafFile, String outputMafFile) throws Exception { if (positionsOfInterestMap.isEmpty()) return; - TabbedFileReader reader = new TabbedFileReader(new File(inputMafFile)); - TabbedHeader header = reader.getHeader(); - FileWriter writer = new FileWriter(new File(outputMafFile), false); - - int count = 0; - - try { - for (Iterator iter = header.iterator() ; iter.hasNext() ;) { - String headerLine = iter.next(); + int count = 0; + try (StringFileReader reader = new StringFileReader(new File(inputMafFile)); + FileWriter writer = new FileWriter(new File(outputMafFile), false); ){ + for (String headerLine: reader.getHeader() ) { if (headerLine.startsWith("#version")) { writer.write(headerLine + "\n"); } else { // add CPG column header to end of line -// if (headerLine.indexOf("\n") != -1) -// writer.write(headerLine.replace("\n", "\tCPG\n")); -// else - writer.write(headerLine + "\tCPG\n"); + writer.write(headerLine + "\tCPG\n"); } } // writer.write(MafUtils.HEADER_WITH_CPG); - for (TabbedRecord rec : reader) { + for (String rec : reader) { // first line is part of header - if (count++ == 0 && (rec.getData().startsWith("Hugo_Symbol"))) { - writer.write(rec.getData() + "\tCPG\n"); + if (count++ == 0 && (rec.startsWith("Hugo_Symbol"))) { + writer.write(rec + "\tCPG\n"); continue; } - String[] params = tabbedPattern.split(rec.getData(), -1); + String[] params = tabbedPattern.split(rec, -1); String chr = params[4]; int startPos = Integer.parseInt(params[5]); int endPos = Integer.parseInt(params[6]); @@ -146,19 +135,12 @@ private void writeMafOutput(String inputMafFile, String outputMafFile) throws Ex logger.warn("reference base: " + ref + " does not equal base retrieved for cpg purposes: " + bases.charAt(noOfBases) + " at chrpos: " + cp.toString()); } - writer.write(rec.getData() + "\t" + bases + "\n"); + writer.write(rec + "\t" + bases + "\n"); } else { logger.warn("no reference bases for chr pos: " + cp.toString()); } } logger.info("written " + count + " maf records to file"); - - } finally { - try { - writer.close(); - } finally { - reader.close(); - } } } @@ -169,9 +151,11 @@ public static void main(String[] args) throws Exception { exitStatus = sp.setup(args); } catch (Exception e) { exitStatus = 1; - if (null != logger) + if (null != logger) { logger.error("Exception caught whilst running MafAddCPG:", e); - else System.err.println("Exception caught whilst running MafAddCPG"); + } else { + System.err.println("Exception caught whilst running MafAddCPG"); + } } if (null != logger) diff --git a/qmaftools/src/org/qcmg/maf/MafAddGffBait.java b/qmaftools/src/org/qcmg/maf/MafAddGffBait.java index e4d3f64fb..cf90d994d 100644 --- a/qmaftools/src/org/qcmg/maf/MafAddGffBait.java +++ b/qmaftools/src/org/qcmg/maf/MafAddGffBait.java @@ -22,11 +22,9 @@ import org.qcmg.common.model.ChrRangePosition; import org.qcmg.common.util.ChrPositionUtils; import org.qcmg.common.util.FileUtils; -import org.qcmg.gff3.GFF3FileReader; -import org.qcmg.gff3.GFF3Record; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.gff3.Gff3FileReader; +import org.qcmg.qio.gff3.Gff3Record; +import org.qcmg.qio.record.StringFileReader; public class MafAddGffBait { @@ -37,10 +35,8 @@ public class MafAddGffBait { private String[] cmdLineInputFiles; private String[] cmdLineOutputFiles; private int exitStatus; -// private final int noOfBases = 5; Map positionsOfInterestMap = new HashMap(); -// List positionsOfInterestList = new ArrayList(); SortedSet positionsOfInterestSet = new TreeSet(); Map> gffTypes = new HashMap>(); @@ -53,12 +49,10 @@ public int engage() throws Exception { // populate the positionsOfInterest map with reference data from the fasta file logger.info("populating positions of interest from gff file: " + cmdLineInputFiles[1]); populateGffTypes(cmdLineInputFiles[1]); - logger.info("populating positions of interest from gff file: " + cmdLineInputFiles[1] + " - DONE"); - + logger.info("populating positions of interest from gff file: " + cmdLineInputFiles[1] + " - DONE"); populatePositionsOfInterest(); - // output new maf file with additional column - + // output new maf file with additional column logger.info("write output: " + cmdLineOutputFiles[0]); writeMafOutput(cmdLineInputFiles[0], cmdLineOutputFiles[0]); @@ -66,10 +60,10 @@ public int engage() throws Exception { } private void populateGffTypes(String gff3File) throws Exception { - GFF3FileReader reader = new GFF3FileReader(new File(gff3File)); + Gff3FileReader reader = new Gff3FileReader(new File(gff3File)); try { int count = 0; - for (GFF3Record rec : reader) { + for (Gff3Record rec : reader) { String chr = rec.getSeqId(); Map thisMap = gffTypes.get(chr); if (null == thisMap) { @@ -114,14 +108,14 @@ private void populatePositionsOfInterest() { } private void loadPositionsOfInterest(String mafFile) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(mafFile)); - try { + + try(StringFileReader reader = new StringFileReader(new File(mafFile));) { int count = 0; - for (TabbedRecord rec : reader) { + for (String rec : reader) { count++; - String[] params = tabbedPattern.split(rec.getData(), -1); + String[] params = tabbedPattern.split(rec, -1); String chr = params[4]; int startPos = Integer.parseInt(params[5]); int endPos = Integer.parseInt(params[6]); @@ -131,23 +125,16 @@ private void loadPositionsOfInterest(String mafFile) throws Exception { } logger.info("for file: " + mafFile + " no of records: " + count + ", no of entries in chrpos set: " + positionsOfInterestSet.size()); - } finally { - reader.close(); - } + } } private void writeMafOutput(String inputMafFile, String outputMafFile) throws Exception { if (positionsOfInterestMap.isEmpty()) return; - TabbedFileReader reader = new TabbedFileReader(new File(inputMafFile)); - TabbedHeader header = reader.getHeader(); - FileWriter writer = new FileWriter(new File(outputMafFile), false); - - int count = 0; - - try { - for (Iterator iter = header.iterator() ; iter.hasNext() ;) { - String headerLine = iter.next(); + try(StringFileReader reader = new StringFileReader(new File(inputMafFile)); + FileWriter writer = new FileWriter(new File(outputMafFile), false);) { + int count = 0; + for (String headerLine : reader.getHeader()) { if (headerLine.startsWith("#version")) { writer.write(headerLine + "\n"); } else { @@ -155,39 +142,22 @@ private void writeMafOutput(String inputMafFile, String outputMafFile) throws Ex } } - for (TabbedRecord rec : reader) { + for (String rec : reader) { count++; - String[] params = tabbedPattern.split(rec.getData(), -1); + String[] params = tabbedPattern.split(rec, -1); String chr = params[4]; int startPos = Integer.parseInt(params[5]); - int endPos = Integer.parseInt(params[6]); - -// String fullChr = "chr" + chr; -// if ("chrM".equals(fullChr)) fullChr = "chrMT"; -// -// ChrPosition chrCompliantCP = new ChrPosition(fullChr, startPos, endPos); - + int endPos = Integer.parseInt(params[6]); ChrPosition cp = new ChrRangePosition(chr, startPos, endPos); String gff3Type = positionsOfInterestMap.get(cp); if (null != gff3Type) { -// if ('-' != ref && ref != gff3Type.charAt(noOfBases)) { -// logger.warn("reference base: " + ref + " does not equal base retrieved for cpg purposes: " -// + gff3Type.charAt(noOfBases) + " at chrpos: " + cp.toString()); -// } - writer.write(rec.getData() + "\t" + gff3Type + "\n"); + writer.write(rec + "\t" + gff3Type + "\n"); } else { logger.warn("no type for chr pos: " + cp.toString()); } } - logger.info("written " + count + " maf records to file"); - - } finally { - try { - writer.close(); - } finally { - reader.close(); - } + logger.info("written " + count + " maf records to file"); } } diff --git a/qmaftools/src/org/qcmg/maf/MafAddStuff.java b/qmaftools/src/org/qcmg/maf/MafAddStuff.java index c47b71328..26e9268e0 100644 --- a/qmaftools/src/org/qcmg/maf/MafAddStuff.java +++ b/qmaftools/src/org/qcmg/maf/MafAddStuff.java @@ -9,8 +9,8 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileWriter; +import java.io.IOException; import java.util.HashMap; -import java.util.Iterator; import java.util.Map; import java.util.SortedSet; import java.util.TreeSet; @@ -28,12 +28,10 @@ import org.qcmg.common.model.ChrRangePosition; import org.qcmg.common.util.ChrPositionUtils; import org.qcmg.common.util.FileUtils; -import org.qcmg.gff3.GFF3FileReader; -import org.qcmg.gff3.GFF3Record; import org.qcmg.maf.util.MafUtils; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.gff3.Gff3FileReader; +import org.qcmg.qio.gff3.Gff3Record; +import org.qcmg.qio.record.StringFileReader; public class MafAddStuff { @@ -92,7 +90,7 @@ private void populateLiftOverMap() { Interval newInt = picardLiftover.liftOver(oldInt); logger.info("oldInt: " + oldInt + ", new Int: " + newInt); - mafPositionsOfInterestLiftover.put(cp, new ChrRangePosition(newInt.getSequence().substring(3), newInt.getStart(), newInt.getEnd())); + mafPositionsOfInterestLiftover.put(cp, new ChrRangePosition(newInt.getContig().substring(3), newInt.getStart(), newInt.getEnd())); } } else { for (ChrPosition cp : mafPositionsOfInterest) { @@ -103,10 +101,10 @@ private void populateLiftOverMap() { private void getGffTypes(String gff3File) throws Exception { Map> gffTypes = new HashMap>(); - GFF3FileReader reader = new GFF3FileReader(new File(gff3File)); + Gff3FileReader reader = new Gff3FileReader(new File(gff3File)); try { int count = 0; - for (GFF3Record rec : reader) { + for (Gff3Record rec : reader) { String chr = rec.getSeqId(); Map thisMap = gffTypes.get(chr); if (null == thisMap) { @@ -148,36 +146,32 @@ private void getGffTypes(String gff3File) throws Exception { logger.info("no of entries in chrPosGffType: " + chrPosGffType.size()); } - private void getFastaData(String refFile) throws FileNotFoundException { - IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(new File(refFile)); + private void getFastaData(String refFile) throws IOException { + try(IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(new File(refFile));){ - for (ChrPosition cp : mafPositionsOfInterestLiftover.values()) { - String chr = MafUtils.getFullChrFromMafChr(cp.getChromosome()); - - logger.info("retrieveing info for ChrPos: " + chr + ", " + (cp.getStartPosition() - noOfBases) + "-" + (cp.getEndPosition() + noOfBases)); - ReferenceSequence seq = null; - try { - seq = fasta.getSubsequenceAt(chr, cp.getStartPosition() - noOfBases, cp.getEndPosition() + noOfBases); - } catch (UnsupportedOperationException pe) { - logger.error("Exception caught in getFastaData",pe); + for (ChrPosition cp : mafPositionsOfInterestLiftover.values()) { + String chr = MafUtils.getFullChrFromMafChr(cp.getChromosome()); + + logger.info("retrieveing info for ChrPos: " + chr + ", " + (cp.getStartPosition() - noOfBases) + "-" + (cp.getEndPosition() + noOfBases)); + ReferenceSequence seq = null; + try { + seq = fasta.getSubsequenceAt(chr, cp.getStartPosition() - noOfBases, cp.getEndPosition() + noOfBases); + } catch (UnsupportedOperationException pe) { + logger.error("Exception caught in getFastaData",pe); + } + if (null != seq) + fastaCPGDataMap.put(cp, new String(seq.getBases())); } - if (null != seq) - fastaCPGDataMap.put(cp, new String(seq.getBases())); + logger.info("no of entries in CPG map: " + fastaCPGDataMap.size()); } - logger.info("no of entries in CPG map: " + fastaCPGDataMap.size()); } private void writeMafOutput(String inputMafFile, String outputMafFile) throws Exception { if (fastaCPGDataMap.isEmpty() && chrPosGffType.isEmpty()) return; - - TabbedFileReader reader = new TabbedFileReader(new File(inputMafFile)); - TabbedHeader header = reader.getHeader(); - FileWriter writer = new FileWriter(new File(outputMafFile), false); - int count = 0; - try { - for (Iterator iter = header.iterator() ; iter.hasNext() ;) { - String headerLine = iter.next(); + try(StringFileReader reader = new StringFileReader(new File(inputMafFile)); + FileWriter writer = new FileWriter(new File(outputMafFile), false);) { + for (String headerLine : reader.getHeader()) { if (headerLine.startsWith("#version")) { writer.write(headerLine + "\n"); } else { @@ -185,14 +179,14 @@ private void writeMafOutput(String inputMafFile, String outputMafFile) throws Ex } } - for (TabbedRecord rec : reader) { + for (String rec : reader) { // first line is part of header - if (count++ == 0 && (rec.getData().startsWith("Hugo_Symbol"))) { - writer.write(rec.getData() + "\tCPG\tGff3_Bait\n"); + if (count++ == 0 && (rec.startsWith("Hugo_Symbol"))) { + writer.write(rec + "\tCPG\tGff3_Bait\n"); continue; } - String[] params = tabbedPattern.split(rec.getData(), -1); + String[] params = tabbedPattern.split(rec, -1); String chr = params[4]; int startPos = Integer.parseInt(params[5]); int endPos = Integer.parseInt(params[6]); @@ -207,7 +201,7 @@ private void writeMafOutput(String inputMafFile, String outputMafFile) throws Ex if (null == cp) logger.warn("null entry in mafPositionsOfInterestLiftover map "); - //FIXME - upping the version number by 1 + //upping the version number by 1 if (params[3].startsWith("hg") || params[3].startsWith("GRCh")) { } else { @@ -246,15 +240,8 @@ private void writeMafOutput(String inputMafFile, String outputMafFile) throws Ex writer.write(sb.toString()); } - logger.info("written " + count + " maf records to file"); - - } finally { - try { - writer.close(); - } finally { - reader.close(); - } - } + logger.info("written " + count + " maf records to file"); + } } public static void main(String[] args) throws Exception { diff --git a/qmaftools/src/org/qcmg/maf/MafFilter.java b/qmaftools/src/org/qcmg/maf/MafFilter.java index d1c4c85a7..ce3cc638e 100644 --- a/qmaftools/src/org/qcmg/maf/MafFilter.java +++ b/qmaftools/src/org/qcmg/maf/MafFilter.java @@ -21,8 +21,7 @@ import org.qcmg.common.util.SnpUtils; import org.qcmg.maf.util.MafUtils; import org.qcmg.picard.util.PileupElementUtil; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.record.StringFileReader; public class MafFilter { @@ -44,11 +43,9 @@ public boolean accept(File file, String name) { && ! cmdLineOutputFiles[1].endsWith(name); } }; - -// private boolean includePositionsThatDidNotVerify; - - List highConfidenceMafs = new ArrayList(); - List probableNoiseMafs = new ArrayList(); + + List highConfidenceMafs = new ArrayList<>(); + List probableNoiseMafs = new ArrayList<>(); public int engage() throws Exception { // load mapping files @@ -72,16 +69,7 @@ private void loadMafFiles(String directory) throws Exception { File dir = new File(directory); if (! dir.isDirectory()) throw new IllegalArgumentException("Supplied directory is not a directory: " + directory); - File[] mafFiles = dir.listFiles(mafFilenameFilter); -// File[] mafFiles = dir.listFiles(new FilenameFilter(){ -// @Override -// public boolean accept(File file, String name) { -// return name.endsWith(".maf") -// && ! cmdLineOutputFiles[0].endsWith(name) -// && ! cmdLineOutputFiles[1].endsWith(name); -// } -// }); - + File[] mafFiles = dir.listFiles(mafFilenameFilter); for (File f : mafFiles) { logger.info("will operate on file: " + f.getAbsolutePath()); loadFile(f); @@ -89,13 +77,13 @@ private void loadMafFiles(String directory) throws Exception { } private void loadKRASFile(String krasFile) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(krasFile)); + StringFileReader reader = new StringFileReader(new File(krasFile)); try { int high = 0, noise = 0, fail = 0, count = 0; - for (TabbedRecord rec : reader) { + for (String rec : reader) { count++; - String[] params = tabbedPattern.split(rec.getData(), -1); + String[] params = tabbedPattern.split(rec, -1); String chr = params[4]; String position = params[5]; String id = params[15]; @@ -114,7 +102,7 @@ private void loadKRASFile(String krasFile) throws Exception { // APGI_2270, APGI_2271, APGI_2285 if (id.contains(lowCovPatient)) { lowCov = true; - logger.info("Skipping KRAS record: " + rec.getData() + " - belongs to low coverage patient"); + logger.info("Skipping KRAS record: " + rec + " - belongs to low coverage patient"); break; } } @@ -123,8 +111,8 @@ private void loadKRASFile(String krasFile) throws Exception { // check that we are not adding a duplicate into the highConfMaf list boolean recordAlreadyInList = false; - for (TabbedRecord tr : highConfidenceMafs) { - String [] p2 = tabbedPattern.split(tr.getData(), -1); + for (String tr : highConfidenceMafs) { + String [] p2 = tabbedPattern.split(tr, -1); String chr2 = p2[4]; String position2 = p2[5]; String id2 = p2[15]; @@ -138,7 +126,7 @@ private void loadKRASFile(String krasFile) throws Exception { logger.info("verification DOES NOT match! - updating"); // update record with "Valid" validation status - tr.setData(tr.getData().replaceAll("Unknown", "Valid")); + tr = tr.replaceAll("Unknown", "Valid"); } break; } @@ -148,8 +136,8 @@ private void loadKRASFile(String krasFile) throws Exception { boolean recordAlreadyInLowerConfList = false; // if record exists in low confidence file, remove, and put into high - for (TabbedRecord tr : probableNoiseMafs) { - String [] p2 = tabbedPattern.split(tr.getData(), -1); + for (String tr : probableNoiseMafs) { + String [] p2 = tabbedPattern.split(tr, -1); String chr2 = p2[4]; String position2 = p2[5]; String id2 = p2[15]; @@ -157,11 +145,10 @@ private void loadKRASFile(String krasFile) throws Exception { if (chr.equals(chr2) && position.equals(position2) && id.equals(id2)) { // remove from list recordAlreadyInLowerConfList = true; -// logger.info("removing record from low conf file: " + probableNoiseMafs.remove(tr)); logger.info("moving record from low conf to high conf, and updating verification status to Valid: " + probableNoiseMafs.remove(tr)); - tr.setData(tr.getData().replaceAll("Unknown", "Valid")); + tr = tr.replaceAll("Unknown", "Valid"); highConfidenceMafs.add(tr); break; @@ -170,7 +157,7 @@ private void loadKRASFile(String krasFile) throws Exception { if ( ! recordAlreadyInLowerConfList) { // count no of fields in rec - beef up to the current number - int diff = MafUtils.HEADER_WITH_CONFIDENCE_COLUMN_COUNT - tabbedPattern.split(rec.getData(), -1).length; + int diff = MafUtils.HEADER_WITH_CONFIDENCE_COLUMN_COUNT - tabbedPattern.split(rec, -1).length; for (int i = 0 ; i < diff ; i++) addColumn(rec, null); @@ -190,15 +177,15 @@ private void loadKRASFile(String krasFile) throws Exception { } private void loadFile(File file) throws Exception { - TabbedFileReader reader = new TabbedFileReader(file); + StringFileReader reader = new StringFileReader(file); try { int high = 0, noise = 0, fail = 0, count = 0; - for (TabbedRecord rec : reader) { - if (count++ == 0 && rec.getData().startsWith("Hugo_Symbol")) continue; + for (String rec : reader) { + if (count++ == 0 && rec.startsWith("Hugo_Symbol")) continue; - String[] params = tabbedPattern.split(rec.getData(), -1); + String[] params = tabbedPattern.split(rec, -1); String flag = params[32]; String type = params[9]; //eg. SNP, INS or DEL String td = params[34]; //eg. A:5[40],3[35],T:1[25],19[35.43] @@ -209,11 +196,7 @@ private void loadFile(File file) throws Exception { String variant = ref.equals(tumour1) ? tumour2 : tumour1; String consequence = params[8]; String verification = params[24]; - - -// if (("False".equals(verification) || "Coverage".equals(verification)) && ! includePositionsThatDidNotVerify) -// continue; - + // if maf position verifies, put it straight away into high conf file if ("Valid".equals(verification)) { high++; @@ -246,19 +229,18 @@ private void loadFile(File file) throws Exception { } } - private TabbedRecord addColumn(TabbedRecord tabbedRec, String data) { - tabbedRec.setData(tabbedRec.getData() + "\t" + data); - return tabbedRec; + private String addColumn(String tabbedRec, String data) { + return tabbedRec + "\t" + data; } - private void writeMafOutput(String fileName, List mafs, String header) throws IOException { + private void writeMafOutput(String fileName, List mafs, String header) throws IOException { if (mafs.isEmpty()) return; FileWriter writer = new FileWriter(new File(fileName), false); try { writer.write(header); - for (TabbedRecord record : mafs) { - writer.write(record.getData() + "\n"); + for (String record : mafs) { + writer.write(record + "\n"); } } finally { writer.close(); @@ -310,10 +292,6 @@ public static void main(String[] args) throws Exception { protected int setup(String args[]) throws Exception{ int returnStatus = 1; -// if (null == args || args.length == 0) { -// System.err.println(Messages.USAGE); -// System.exit(1); -// } Options options = new Options(args); if (options.hasHelpOption()) { @@ -357,10 +335,7 @@ protected int setup(String args[]) throws Exception{ lowCoveragePatients = options.getLowCoveragePatients(); logger.tool("Will handle the following low coverage patients: " + Arrays.deepToString(lowCoveragePatients)); - -// if (options.getIncludeInvalid()) -// includePositionsThatDidNotVerify = true; - + return engage(); } return returnStatus; diff --git a/qmaftools/src/org/qcmg/maf/MafFinalFilter.java b/qmaftools/src/org/qcmg/maf/MafFinalFilter.java index aef629dd0..d62af20f0 100644 --- a/qmaftools/src/org/qcmg/maf/MafFinalFilter.java +++ b/qmaftools/src/org/qcmg/maf/MafFinalFilter.java @@ -4,18 +4,15 @@ package org.qcmg.maf; import java.io.File; -import java.io.FileWriter; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import java.util.regex.Pattern; import org.qcmg.common.log.QLogger; import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.util.FileUtils; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.record.RecordWriter; +import org.qcmg.qio.record.StringFileReader; public class MafFinalFilter { @@ -30,8 +27,8 @@ public class MafFinalFilter { private boolean includePositionsThatDidNotVerify; - List highConfidenceMafs = new ArrayList(); -// List probableNoiseMafs = new ArrayList(); + List highConfidenceMafs = new ArrayList<>(); +// List probableNoiseMafs = new ArrayList(); public int engage() throws Exception { // load mapping files @@ -43,26 +40,22 @@ public int engage() throws Exception { } private void filterMafFile(String inputFile, String outputMafFile) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(inputFile)); - TabbedHeader header = reader.getHeader(); - FileWriter writer = new FileWriter(new File(outputMafFile), false); int passNovelCountCheck = 0, count = 0; - try { - for (Iterator iter = header.iterator() ; iter.hasNext() ;) { - String headerLine = iter.next(); - writer.write(headerLine + "\n"); - } - - for (TabbedRecord rec : reader) { - if (count++ == 0 && rec.getData().startsWith("Hugo_Symbol")) { - writer.write(rec.getData() + "\n"); + try(StringFileReader reader = new StringFileReader(new File(inputFile)); + RecordWriter writer = new RecordWriter<>(new File(outputMafFile)); ) { + + writer.addHeader(reader.getHeader()); + + for (String rec : reader) { + if (count++ == 0 && rec.startsWith("Hugo_Symbol")) { + writer.add(rec ); continue; } - String[] params = tabbedPattern.split(rec.getData(), -1); + String[] params = tabbedPattern.split(rec, -1); // want to include all of KRAS mafs, regardless of if they pass 4 novel start filters String geneName = params[0]; @@ -76,24 +69,18 @@ private void filterMafFile(String inputFile, String outputMafFile) throws Except if (validationStatus.startsWith("False")) continue; if (validationStatus.startsWith("Unknown")) { // don't want the extra info in the validation status field - just Unknown - rec.setData(rec.getData().replace(validationStatus, "Unknown")); + rec = rec.replace(validationStatus, "Unknown"); } } // add to collection - writer.write(rec.getData() + "\n"); + writer.add(rec); passNovelCountCheck++; } } logger.info("for file: " + inputFile + " stats (count, passNovelCountCheck): " + count + "," + passNovelCountCheck); - } finally { - try { - writer.close(); - } finally { - reader.close(); - } - } + } } diff --git a/qmaftools/src/org/qcmg/maf/MafPipeline.java b/qmaftools/src/org/qcmg/maf/MafPipeline.java index 6a4290616..cd4d309c6 100644 --- a/qmaftools/src/org/qcmg/maf/MafPipeline.java +++ b/qmaftools/src/org/qcmg/maf/MafPipeline.java @@ -46,14 +46,12 @@ import org.qcmg.common.util.FileUtils; import org.qcmg.common.util.Pair; import org.qcmg.common.util.SnpUtils; -import org.qcmg.gff3.GFF3FileReader; -import org.qcmg.gff3.GFF3Record; import org.qcmg.maf.util.MafUtils; import org.qcmg.picard.SAMFileReaderFactory; import org.qcmg.picard.util.SAMUtils; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.gff3.Gff3FileReader; +import org.qcmg.qio.gff3.Gff3Record; +import org.qcmg.qio.record.StringFileReader; public abstract class MafPipeline { @@ -278,33 +276,24 @@ protected String[] getDccMetaData(String patient) throws IOException { // get file - get dcc meta info from file, and prepend to header Pair filePair = patientsAndFiles.get(patient); // try snp one first, then indel - TabbedHeader header = null; + List header = null; String somGerm = ""; - if (filePair.getLeft() != null) { - + if (filePair.getLeft() != null) { File f = filePair.getLeft(); somGerm = f.getAbsolutePath().contains("Somatic") ? "Somatic" : f.getAbsolutePath().contains("Germline") ? "Germline" : ""; - TabbedFileReader reader = null; - try { - reader = new TabbedFileReader(f); + try(StringFileReader reader = new StringFileReader(f);) { header = reader.getHeader(); } catch (Exception e) { e.printStackTrace(); - } finally { - reader.close(); - } + } } else if (filePair.getRight() != null) { File f = filePair.getRight(); somGerm = f.getAbsolutePath().contains("Somatic") ? "Somatic" : f.getAbsolutePath().contains("Germline") ? "Germline" : ""; - TabbedFileReader reader = null; - try { - reader = new TabbedFileReader(f); + try(StringFileReader reader = new StringFileReader(f);) { header = reader.getHeader(); } catch (Exception e) { e.printStackTrace(); - } finally { - reader.close(); - } + } } StringBuilder sb = new StringBuilder(); @@ -320,26 +309,6 @@ protected String[] getDccMetaData(String patient) throws IOException { return new String[] {somGerm, dccMetaInfo}; } -// void writeFinalPREFilteredOutput() throws IOException { -// -// // get lists of high and low conf mafs -// List highConfMafs = new ArrayList(); -// List lowConfMafs = new ArrayList(); -// -// for (MAFRecord maf : filteredMafs) { -// if (maf.isHighConf()) { -// highConfMafs.add(maf); -// continue; -// } -// if (maf.isLowConf()) { -// lowConfMafs.add(maf); -// continue; -// } -// } -// -// MafUtils.writeMafOutput(outputDirectory + FS + "highConfidencePreFilter.maf", highConfMafs, MafUtils.HEADER_WITH_CONFIDENCE_CPG, true); -// MafUtils.writeMafOutput(outputDirectory + FS + "lowConfidencePreFilter.maf", lowConfMafs, MafUtils.HEADER_WITH_CONFIDENCE_CPG, true); -// } void addNovelStartsMT(String bamFilePathPart1, String bamFilePathPart2, String bamFilePattern) throws Exception { logger.info("adding novel starts"); @@ -363,7 +332,6 @@ void addNovelStartsMT(String bamFilePathPart1, String bamFilePathPart2, String b } } -// CountDownLatch latch = new CountDownLatch(100); int poolSize = 2; ExecutorService executor = Executors.newFixedThreadPool(poolSize); @@ -543,11 +511,11 @@ void addCpgAndGff() throws Exception { // if file is null, skip this stage if ( ! StringUtils.isNullOrEmpty(gffFile) && ! gffs.isEmpty()) { logger.info("number of records requiring gff data: " + gffs.size()); - GFF3FileReader reader = new GFF3FileReader(new File(gffFile)); + Gff3FileReader reader = new Gff3FileReader(new File(gffFile)); // Map> gffTypes = new HashMap>(); try { int count = 0, updatedCount = 0; - for (GFF3Record rec : reader) { + for (Gff3Record rec : reader) { String chr = rec.getSeqId(); ChrPosition cp = new ChrRangePosition(chr, rec.getStart(), rec.getEnd()); @@ -615,13 +583,13 @@ void addCpgAndGff() throws Exception { } void loadKRASData() throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(krasFile)); - try { + + try(StringFileReader reader = new StringFileReader(new File(krasFile));) { int count = 0, validCount = 0, alreadyPresent = 0, alreadyPresentSameVerification = 0; - for (TabbedRecord rec : reader) { + for (String rec : reader) { count++; - String[] params = tabbedPattern.split(rec.getData(), -1); + String[] params = tabbedPattern.split(rec, -1); String chr = params[4]; String position = params[5]; String id = params[15]; @@ -639,7 +607,7 @@ void loadKRASData() throws Exception { if (id.contains(lowCovPatient)) { lowCov = true; - logger.info("Skipping KRAS record: " + rec.getData() + " - belongs to low coverage patient"); + logger.info("Skipping KRAS record: " + rec + " - belongs to low coverage patient"); continue; } } @@ -678,9 +646,7 @@ void loadKRASData() throws Exception { } } logger.info("KRAS file - count: " + count + ", validCount: " + validCount + ", alreadyPresent: " + alreadyPresent + ", alreadyPresentSameVerification: " + alreadyPresentSameVerification); - } finally { - reader.close(); - } + } } void checkAlleleFraction() { diff --git a/qmaftools/src/org/qcmg/maf/MafPipelineNew.java b/qmaftools/src/org/qcmg/maf/MafPipelineNew.java index 9327f7ce5..d4e1bfdf6 100644 --- a/qmaftools/src/org/qcmg/maf/MafPipelineNew.java +++ b/qmaftools/src/org/qcmg/maf/MafPipelineNew.java @@ -51,16 +51,14 @@ import org.qcmg.common.util.FileUtils; import org.qcmg.common.util.SnpUtils; import org.qcmg.common.util.TabTokenizer; -import org.qcmg.gff3.GFF3FileReader; -import org.qcmg.gff3.GFF3Record; import org.qcmg.maf.util.FilterOptions; import org.qcmg.maf.util.MafFilterUtils; import org.qcmg.maf.util.MafUtils; import org.qcmg.picard.SAMFileReaderFactory; import org.qcmg.picard.util.SAMUtils; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.gff3.Gff3FileReader; +import org.qcmg.qio.gff3.Gff3Record; +import org.qcmg.qio.record.StringFileReader; public abstract class MafPipelineNew { @@ -165,8 +163,8 @@ public MafPipelineNew () { protected String getDccMetaData() throws Exception { // get dcc meta info from file, and prepend to header - TabbedHeader header = null; - try (TabbedFileReader reader = new TabbedFileReader(new File(dccqFile))){ + List header = null; + try (StringFileReader reader = new StringFileReader(new File(dccqFile))){ header = reader.getHeader(); } @@ -400,12 +398,12 @@ void addCpgAndGff() throws Exception { logger.info("number of records requiring gff data: " + gffs.size()); // GFF3FileReader reader = new GFF3FileReader(new File(gffFile)); // Map> gffTypes = new HashMap>(); - try (GFF3FileReader reader = new GFF3FileReader(new File(gffFile))) { + try (Gff3FileReader reader = new Gff3FileReader(new File(gffFile))) { int count = 0, updatedCount = 0; List relevantList = null; String currentChr = null; - for (GFF3Record rec : reader) { + for (Gff3Record rec : reader) { String chr = rec.getSeqId(); if (count == 0) { @@ -516,14 +514,14 @@ void loadCOSMICData() throws Exception { String identifier = mafType.isIndel() ? "Insertion" : "Substitution"; int count = 0, chrPosCount = 0, chrPosMutCount=0; - try (TabbedFileReader reader = new TabbedFileReader(new File(cosmicFile));) { - for (TabbedRecord rec : reader) { - if (StringUtils.isNullOrEmpty(rec.getData())) continue; // blank lines in file.... my god..... - if (rec.getData().startsWith("Gene name")) continue; //header line - if (rec.getData().contains(identifier)) { + try (StringFileReader reader = new StringFileReader(new File(cosmicFile));) { + for (String rec : reader) { + if (StringUtils.isNullOrEmpty(rec)) continue; // blank lines in file.... my god..... + if (rec.startsWith("Gene name")) continue; //header line + if (rec.contains(identifier)) { boolean forwardStrand = true; count++; - String [] params = TabTokenizer.tokenize(rec.getData()); + String [] params = TabTokenizer.tokenize(rec); String chrPos = params[19]; if (params[20] != null && params[20] == "-") forwardStrand = false; if (StringUtils.isNullOrEmpty(chrPos)) { @@ -531,7 +529,7 @@ void loadCOSMICData() throws Exception { if (params[18] != null && params[18] == "-") forwardStrand = false; } if (StringUtils.isNullOrEmpty(chrPos)) { -// logger.info("skipping record due to no position info for: " + rec.getData()); +// logger.info("skipping record due to no position info for: " + rec); continue; } chrPosCount++; @@ -630,12 +628,12 @@ void updateMafsWithMafType() { } void loadKRASData() throws Exception { - try (TabbedFileReader reader = new TabbedFileReader(new File(krasFile));) { + try (StringFileReader reader = new StringFileReader(new File(krasFile));) { int count = 0, validCount = 0, alreadyPresent = 0, alreadyPresentSameVerification = 0; - for (TabbedRecord rec : reader) { + for (String rec : reader) { count++; - String[] params = tabbedPattern.split(rec.getData(), -1); + String[] params = tabbedPattern.split(rec, -1); String chr = params[4]; String position = params[5]; String id = params[15]; @@ -653,7 +651,7 @@ void loadKRASData() throws Exception { if (id.contains(lowCovPatient)) { lowCov = true; - logger.info("Skipping KRAS record: " + rec.getData() + " - belongs to low coverage patient"); + logger.info("Skipping KRAS record: " + rec + " - belongs to low coverage patient"); continue; } } diff --git a/qmaftools/src/org/qcmg/maf/util/MafUtils.java b/qmaftools/src/org/qcmg/maf/util/MafUtils.java index f152c69c5..ce52d0416 100644 --- a/qmaftools/src/org/qcmg/maf/util/MafUtils.java +++ b/qmaftools/src/org/qcmg/maf/util/MafUtils.java @@ -39,9 +39,7 @@ import org.qcmg.maf.QMafException; import org.qcmg.picard.util.PileupElementUtil; import org.qcmg.picard.util.QDccMetaFactory; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.record.StringFileReader; public class MafUtils { private static final ReferenceNameComparator chrComp = new ReferenceNameComparator(); @@ -80,25 +78,22 @@ public class MafUtils { public static final int HEADER_WITH_CONFIDENCE_COLUMN_COUNT = TabTokenizer.tokenize(HEADER_WITH_CONFIDENCE).length; - public static TabbedRecord addColumn(TabbedRecord tabbedRec, String data) { - tabbedRec.setData(tabbedRec.getData() + "\t" + data); - return tabbedRec; - } +// public static String addColumn(String tabbedRec, String data) { +// return tabbedRec + "\t" + data; +// } public static void loadEntrezMapping(String fileName, Map> ensemblToEntrez) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(fileName)); - try { + + try(StringFileReader reader = new StringFileReader(new File(fileName));) { int count = 0; - for (TabbedRecord rec : reader) { + for (String rec : reader) { // header line if (count++ == 0) continue; - String[] params = TabTokenizer.tokenize(rec.getData()); + String[] params = TabTokenizer.tokenize(rec); // ensemble id is column 2, entrez id is column 3 // need to deal with one to many mappings - keep them all -// String ensembl = params[1]; -// String entrez = params[2]; String ensembl = params[1]; String entrez = params[5]; // now being taken from larger file @@ -112,22 +107,19 @@ public static void loadEntrezMapping(String fileName, Map> ensemblToEntrez.put(ensembl, existingEntrez); } existingEntrez.add(Integer.parseInt(entrez)); - } - - } finally { - reader.close(); - } + } + } } public static void loadCanonicalTranscriptMapping(String fileName, Map ensemblGeneToCanonicalTranscript) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(fileName)); + StringFileReader reader = new StringFileReader(new File(fileName)); try { int count = 0; - for (TabbedRecord rec : reader) { + for (String rec : reader) { if (++count == 1) continue; // header line - String[] params = TabTokenizer.tokenize(rec.getData()); + String[] params = TabTokenizer.tokenize(rec); // ensemble gene id is column 1, canonical transcript id is column 2 // need to deal with one to many mappings - keep them all String geneId = params[0]; @@ -151,16 +143,16 @@ public static void loadCanonicalTranscriptMapping(String fileName, Map> verifiedData) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(fileName)); + StringFileReader reader = new StringFileReader(new File(fileName)); try { int verifiedYes = 0, verifiedNo = 0; - for (TabbedRecord rec : reader) { + for (String rec : reader) { // seem to be some blank lines at the top of the verification data file // also skip past header line - if (StringUtils.isNullOrEmpty(rec.getData()) || rec.getData().startsWith("PatientID")) continue; + if (StringUtils.isNullOrEmpty(rec) || rec.startsWith("PatientID")) continue; - String[] params = TabTokenizer.tokenize(rec.getData()); + String[] params = TabTokenizer.tokenize(rec); String patientFromFile = params[0]; @@ -196,11 +188,11 @@ public static void getDbSNPValDetails(String fileName, List mafs) thr snpIdMap.put(maf.getDbSnpId(), maf); } - TabbedFileReader reader = new TabbedFileReader(new File(fileName)); + StringFileReader reader = new StringFileReader(new File(fileName)); try { - for (TabbedRecord rec : reader) { - String[] params = TabTokenizer.tokenize(rec.getData()); + for (String rec : reader) { + String[] params = TabTokenizer.tokenize(rec); // dbSnp id is column 5, val details is column 12 // need to deal with one to many mappings - keep them all String dbSnpId = params[4]; @@ -337,32 +329,23 @@ public static String getDbSnpId(String id) { } public static void loadDCCFile(File fileName, String patientId, Map verifiedData, List mafs, Map> ensemblToEntrez, MutationType type) throws Exception { - TabbedFileReader reader = new TabbedFileReader(fileName); - TabbedHeader header = reader.getHeader(); - + StringFileReader reader = new StringFileReader(fileName); // should be able to glean some useful info from the header -// String patientIdFromFile = null; String controlSampleID = null; String tumourSampleID = null; String tool = null; -// DccType type = null; - for (String headerLine : header) { -// if (headerLine.startsWith("#PatientID")) -// patientIdFromFile = headerLine.substring(headerLine.indexOf(':') +2); + for (String headerLine : reader.getHeader()) { if (headerLine.startsWith("#ControlSampleID")) controlSampleID = headerLine.substring(headerLine.indexOf(':') +2); if (headerLine.startsWith("#TumourSampleID")) tumourSampleID = headerLine.substring(headerLine.indexOf(':') +2); if (headerLine.startsWith("#Tool")) { tool = headerLine.substring(headerLine.indexOf(':') +2); -// type = headerLine.endsWith("SNP") || headerLine.endsWith("GATK") ? DccType.SNP : (headerLine.endsWith("small_indel_tool") ? DccType.INDEL : null); } } - // default to snp if not in header -// if (null == type) type = DccType.SNP; - + // default to snp if not in header logger.info("patient: " + patientId + ", controlSampleID: " + controlSampleID + ", tumourSampleID: " + tumourSampleID + ", tool: " + tool); try { @@ -370,14 +353,14 @@ public static void loadDCCFile(File fileName, String patientId, Map mafs, Map> ensemblToEntrez, MafType mafType) throws IOException, Exception { - try (TabbedFileReader reader = new TabbedFileReader(new File(dccqFile));) { - TabbedHeader header = reader.getHeader(); + try (StringFileReader reader = new StringFileReader(new File(dccqFile));) { - QDccMeta dccMeta = QDccMetaFactory.getDccMeta(header); + QDccMeta dccMeta = QDccMetaFactory.getDccMeta(reader.getHeader()); String controlSampleId = dccMeta.getMatchedSampleId().getValue(); String testSampleId = dccMeta.getAnalyzedSampleId().getValue(); @@ -424,12 +406,12 @@ public static void loadDCCFile(String dccqFile, Map verifiedData, List mafs, Map> ensemblToEntrez) throws QMafException { convertDccToMaf(tabbedRecord, patientId, controlSampleID, tumourSampleID, verifiedData, mafs, ensemblToEntrez, false, false); } - public static void convertDccToMaf(final TabbedRecord tabbedRecord, final String patientId, final String controlSampleID, + public static void convertDccToMaf(final String tabbedRecord, final String patientId, final String controlSampleID, final String tumourSampleID, final Map verifiedData, final List mafs, final Map> ensemblToEntrez, final boolean containsNS, final boolean containsEA) throws QMafException { @@ -466,7 +448,7 @@ public static void convertDccToMaf(final TabbedRecord tabbedRecord, final String final int flagPosition = 35 + offset; final int flankingSequencePosition = 36 + offset; - String[] params = TabTokenizer.tokenize(tabbedRecord.getData()); + String[] params = TabTokenizer.tokenize(tabbedRecord); // check if position verified String chromosome = params[2]; @@ -562,7 +544,7 @@ public static void convertDccToMaf(final TabbedRecord tabbedRecord, final String mafs.add(maf); } - public static void convertGermlineDccToMaf(final TabbedRecord tabbedRecord, final String patientId, final String controlSampleID, + public static void convertGermlineDccToMaf(final String tabbedRecord, final String patientId, final String controlSampleID, final String tumourSampleID, final Map verifiedData, final List mafs, final Map> ensemblToEntrez) throws QMafException { @@ -578,7 +560,7 @@ public static void convertGermlineDccToMaf(final TabbedRecord tabbedRecord, fina final int flagPosition = 35 + offset; final int flankingSequencePosition = 36 + offset; - String[] params = TabTokenizer.tokenize(tabbedRecord.getData()); + String[] params = TabTokenizer.tokenize(tabbedRecord); // check if position verified String chromosome = params[2]; @@ -905,7 +887,7 @@ public static void canonicalTranscript(MutationType type, String[] params, String[] genes, String[] geneIds, String[] transcriptIds, MAFRecord maf, Map ensemblGeneToCanonicalTranscript, Map> ensemblToEntrez) { int i = 0, allTranscriptIdCount = 0; - //TODO may need to up index positions if novel starts info is contained in dcc file + //may need to up index positions if novel starts info is contained in dcc file for (String gene : genes) { String[] geneSpecificTranscriptIds = transcriptIds[i].split(","); String geneId = geneIds[i++]; @@ -919,7 +901,7 @@ public static void canonicalTranscript(MutationType type, String[] params, String [] aaChanges = params[23].split(","); String [] baseChanges = params[24].split(","); - //TODO what to do if canonical transcript id is not found!! + // what to do if canonical transcript id is not found!! if (positionInTranscripts > -1) { // we have a matching canonical transcript @@ -1118,15 +1100,15 @@ public static String getFullChrFromMafChr(final String chr) { public static void loadPositionsOfInterest(String mafFile, Collection positionsOfInterest ) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(mafFile)); + StringFileReader reader = new StringFileReader(new File(mafFile)); try { int count = 0; - for (TabbedRecord rec : reader) { - if (count++ == 0 && (rec.getData().startsWith("Hugo_Symbol"))) continue; // first line is header + for (String rec : reader) { + if (count++ == 0 && (rec.startsWith("Hugo_Symbol"))) continue; // first line is header - String[] params = TabTokenizer.tokenize(rec.getData()); + String[] params = TabTokenizer.tokenize(rec); String chr = params[4]; int startPos = Integer.parseInt(params[5]); int endPos = Integer.parseInt(params[6]); diff --git a/qmaftools/test/org/qcmg/maf/MafPipelineNewTest.java b/qmaftools/test/org/qcmg/maf/MafPipelineNewTest.java index dbcbc2bc4..1e239223c 100644 --- a/qmaftools/test/org/qcmg/maf/MafPipelineNewTest.java +++ b/qmaftools/test/org/qcmg/maf/MafPipelineNewTest.java @@ -17,7 +17,6 @@ import org.qcmg.maf.util.FilterOptions; import org.qcmg.maf.util.MafFilterUtils; import org.qcmg.maf.util.MafUtils; -import org.qcmg.tab.TabbedRecord; public class MafPipelineNewTest { @@ -108,9 +107,8 @@ public void testRealLifeIndel() throws QMafException { List mafs = new ArrayList(); Map> ensemblToEntrez = new HashMap>(); String s = "aba9fc0c_7f03_417f_b087_2e8ab1a45e42_ICGC-ABMJ-20120706-01_ind353976 3 chr11 62638311 62638313 1 -999 -999 TAA -999 --- TAA/--- 1 -999 -999 -999 -999 -999 -999 -99 -999 0;46;37;0;1;0;1 0;34;30;0;0;0;0 -- splice_donor_variant,intron_variant,feature_truncation,splice_donor_variant,intron_variant,feature_truncation,intron_variant,feature_truncation,intron_variant,feature_truncation,intron_variant,feature_truncation,splice_donor_variant,intron_variant,feature_truncation -888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888 -888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888 -888 ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003 ENST00000377892,ENST00000377892,ENST00000377892,ENST00000377890,ENST00000377890,ENST00000377890,ENST00000377891,ENST00000377891,ENST00000377889,ENST00000377889,ENST00000535296,ENST00000535296,ENST00000538084,ENST00000538084,ENST00000538084 70 -999 SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2 -- -- -- chr11:62638311-62638313 PASS;NNS;HOMADJ_2 --"; - TabbedRecord data = new TabbedRecord(); s = s.replaceAll("\\s+", "\t"); - data.setData(s); + String data = s; MafUtils.convertDccToMaf(data, "", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez, true, true); FilterOptions fo = new FilterOptions(); diff --git a/qmaftools/test/org/qcmg/maf/util/MafFilterUtilsTest.java b/qmaftools/test/org/qcmg/maf/util/MafFilterUtilsTest.java index 9c5d8abd1..dff5f414e 100644 --- a/qmaftools/test/org/qcmg/maf/util/MafFilterUtilsTest.java +++ b/qmaftools/test/org/qcmg/maf/util/MafFilterUtilsTest.java @@ -8,8 +8,7 @@ import java.util.Map; import java.util.Set; -import junit.framework.Assert; - +import org.junit.Assert; import org.junit.Before; import org.junit.Ignore; import org.junit.Test; @@ -20,7 +19,6 @@ import org.qcmg.common.util.SnpUtils; import org.qcmg.maf.MafPipelineNew; import org.qcmg.maf.QMafException; -import org.qcmg.tab.TabbedRecord; public class MafFilterUtilsTest { @@ -163,10 +161,8 @@ public void testCheckMAFForMINIndelWithNumbers() { public void testCheckMAFForMINIndelRealLifeData() throws QMafException { List mafs = new ArrayList(); Map> ensemblToEntrez = new HashMap>(); - TabbedRecord data = new TabbedRecord(); String s = "7PrimaryTumour_ind63058 2 chr19 53651845 53651846 1 -999 -999 T -999 TTC T>TTC -999 -999 -999 -999 -999 -99 -999 -999 15;150;76;71;1;3;0 11;200;56;51;2;0;0; -- intron_variant,feature_truncation,intron_variant,feature_truncation,intron_variant,feature_truncation,intron_variant,feature_truncation,intron_variant,feature_truncation,intron_variant,feature_truncation,downstream_gene_variant,downstream_gene_variant -888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888 -888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888 -88 ENSG00000197937,ENSG00000197937,ENSG00000197937,ENSG00000197937,ENSG00000197937,ENSG00000197937,ENSG00000197937,ENSG00000197937,ENSG00000197937,ENSG00000197937,ENSG00000197937,ENSG00000197937,ENSG00000197937,ENSG00000197937 ENST00000601804,ENST00000601804,ENST00000334197,ENST00000334197,ENST00000452676,ENST00000452676,ENST00000597183,ENST00000597183,ENST00000601469,ENST00000601469,ENST00000595967,ENST00000595967,ENST00000595710,ENST00000599096 70 -999 ZNF347,ZNF347,ZNF347,ZNF347,ZNF347,ZNF347,ZNF347,ZNF347,ZNF347,ZNF347,ZNF347,ZNF347,ZNF347,ZNF347 -- -- -- chr19:53651845-53651846 PASS;MIN --"; -// s = s.replaceAll("\\s+", "\t"); - data.setData(s); + String data = s; MafUtils.convertDccToMaf(data, "7PrimaryTumour_ind63058", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez, true, true); @@ -175,8 +171,7 @@ public void testCheckMAFForMINIndelRealLifeData() throws QMafException { maf.setMafType(MafType.INDEL_SOMATIC); MafFilterUtils.checkMAFForMIN(maf); - assertEquals(SnpUtils.PASS + ";" + SnpUtils.MUTATION_IN_NORMAL, maf.getFlag()); - + assertEquals(SnpUtils.PASS + ";" + SnpUtils.MUTATION_IN_NORMAL, maf.getFlag()); } @Test @@ -486,9 +481,8 @@ public void testClassifyRealLifeRecord() throws QMafException { List mafs = new ArrayList(); Map> ensemblToEntrez = new HashMap>(); String s = "AOCS_067_SNP_54973 1 17 30965850 30965850 1 -888 -888 T T/T C/T T>C -999 -999 0.12844960008474762 19 2 2 -888 -999 -999 T:0[0],12[33.83] C:0[0],5[7.6],T:1[40],13[23] 5 missense_variant,missense_variant,non_coding_exon_variant,nc_transcript_variant,missense_variant,intron_variant,nc_transcript_variant,non_coding_exon_variant,nc_transcript_variant N867D,N779D,-888,-888,N867D,-888,-888,-888,-888 2904T>C,2879T>C,546T>C,546T>C,2871T>C,-888,-888,505T>C,505T>C PF06017,PF06017,PF06017 ENSG00000176658,ENSG00000176658,ENSG00000176658,ENSG00000176658,ENSG00000176658,,,ENSG00000176658,ENSG00000176658 ENST00000318217,ENST00000394649,ENST00000577352,ENST00000577352,ENST00000579584,ENST00000582272,ENST00000582272,ENST00000581059,ENST00000581059 70 -999 MYO1D,MYO1D,MYO1D,MYO1D,MYO1D,,,MYO1D,MYO1D PF06017,PF06017,PF06017 pfam,pfam,pfam Myosin_tail_2,Myosin_tail_2,Myosin_tail_2 chr17:30965850-30965850 PASS TCGATCTACCT"; - TabbedRecord data = new TabbedRecord(); s = s.replaceAll("\\s+", "\t"); - data.setData(s); + String data = s; MafUtils.convertDccToMaf(data, "AOCS_067", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez, true, true); @@ -502,9 +496,8 @@ public void testClassifyRealLifeRecordIndel() throws QMafException { List mafs = new ArrayList(); Map> ensemblToEntrez = new HashMap>(); String s = "aba9fc0c_7f03_417f_b087_2e8ab1a45e42_ICGC-ABMJ-20120706-01_ind353976 3 chr11 62638311 62638313 1 -999 -999 TAA -999 --- TAA/--- 1 -999 -999 -999 -999 -999 -999 -99 -999 0;46;37;0;1;0;1 0;34;30;0;0;0;0 -- splice_donor_variant,intron_variant,feature_truncation,splice_donor_variant,intron_variant,feature_truncation,intron_variant,feature_truncation,intron_variant,feature_truncation,intron_variant,feature_truncation,splice_donor_variant,intron_variant,feature_truncation -888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888 -888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888 -888 ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003,ENSG00000168003 ENST00000377892,ENST00000377892,ENST00000377892,ENST00000377890,ENST00000377890,ENST00000377890,ENST00000377891,ENST00000377891,ENST00000377889,ENST00000377889,ENST00000535296,ENST00000535296,ENST00000538084,ENST00000538084,ENST00000538084 70 -999 SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2,SLC3A2 -- -- -- chr11:62638311-62638313 PASS;NNS;HOMADJ_2 --"; - TabbedRecord data = new TabbedRecord(); s = s.replaceAll("\\s+", "\t"); - data.setData(s); + String data = s; MafUtils.convertDccToMaf(data, "", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez, true, true); @@ -518,9 +511,8 @@ public void testClassifyRealLifeRecordIndel2() throws QMafException { List mafs = new ArrayList(); Map> ensemblToEntrez = new HashMap>(); String s = "e3201e6a_2b36_4f04_8eb8_3c71ca2dc59d_ICGC-DBPC-20130205-124_ind14587 2 chr17 39411682 39411683 1 -999 -999 --------------- -99 ACCACCTGCTGCAGG ---------------/ACCACCTGCTGCAGG -999 -999 -999 -999 -999 -999 -999 -999 0;32;32;0[0|0];0;0;11 0;37;37;0[0|0];0;0;7;\"2_discontiguous_CTGCTGCAGGaccacctgctgcaggACCACCTGCT\" -- downstream_gene_variant,frameshift_variant,feature_truncation -888,RT15RT,RT15RT -888,47->ACCACCTGCTGCAGG,47->ACCACCTGCTGCAGG -888 ENSG00000241595,ENSG00000198083,ENSG00000198083 ENST00000334109,ENST00000394008,ENST00000394008 70 -999 KRTAP9-4,KRTAP9-9,KRTAP9-9 -- -- -- chr17:39411682-39411683 PASS;NNS;HOMADJ_2 --"; - TabbedRecord data = new TabbedRecord(); s = s.replaceAll("\\s+", "\t"); - data.setData(s); + String data = s; MafUtils.convertDccToMaf(data, "", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez, false, true); diff --git a/qmaftools/test/org/qcmg/maf/util/MafUtilsTest.java b/qmaftools/test/org/qcmg/maf/util/MafUtilsTest.java index 1c28f2801..d8e010025 100644 --- a/qmaftools/test/org/qcmg/maf/util/MafUtilsTest.java +++ b/qmaftools/test/org/qcmg/maf/util/MafUtilsTest.java @@ -26,7 +26,6 @@ import org.qcmg.common.util.SnpUtils; import org.qcmg.common.util.TabTokenizer; import org.qcmg.maf.QMafException; -import org.qcmg.tab.TabbedRecord; public class MafUtilsTest { @@ -409,7 +408,6 @@ public void testLoadEntrezMapping() throws Exception { writer.write("\tTwo\t\t\t\t2\n"); writer.write("\tThree\t\t\t\t3\n"); writer.write("\tThree\t\t\t\t33\n"); - // writer.write("\tFour\t\n"); writer.flush(); } @@ -420,19 +418,18 @@ public void testLoadEntrezMapping() throws Exception { assertEquals(4, results.get("One").size()); assertEquals(1, results.get("Two").size()); assertEquals(2, results.get("Three").size()); -// assertEquals(0, results.get("Four")); } @Test public void testConvertDccToMafWithNS() throws QMafException { List mafs = new ArrayList(); Map> ensemblToEntrez = new HashMap>(); - TabbedRecord data = new TabbedRecord(); + String s = "COLO_829_SNP_4014186 1 19 56662400 56662400 1 -888 -888 A A/A A/G A>G -999 -999 59 2 2 -888 -999 -999 A:27[28.53],28[29.94] A:26[27.91],20[29.55],G:4[39],9[39.36] 11 INTRONIC,INTRONIC,INTRONIC,NON_SYNONYMOUS_CODING,INTRONIC -888,-888,-888,V200A,-888 -888,-888,-888,599A>G,-888 -888,-888,-888,-888,-888 ENSG00000167685|ENSG00000204533 ENST00000337080,ENST00000412291,ENST00000391714|ENST00000376272,ENST00000376271 61 -999 ZNF444|-888 -888,-888,-888|-888,-888 -888,-888,-888|-888,-888 -888,-888,-888|-888,-888 A/G chr19:56662400-56662400"; s = s.replaceAll("\\s+", "\t"); s+= "\tmutation also found in pileup of (unfiltered) normal"; - data.setData(s); - String [] params = TabTokenizer.tokenize(data.getData()); + String data = s; + String [] params = TabTokenizer.tokenize(data); assertEquals("mutation also found in pileup of (unfiltered) normal", params[37]); MafUtils.convertDccToMaf(data, "COLO_829", "control_sample_id", "tumourSampleID", null, mafs, ensemblToEntrez, true, false); @@ -444,40 +441,34 @@ public void testConvertDccToMafWithNS() throws QMafException { public void testConvertDccToMafWithoutNS() throws QMafException { List mafs = new ArrayList(); Map> ensemblToEntrez = new HashMap>(); - TabbedRecord data = new TabbedRecord(); String s = "APGI_2057_SNP_3302627 1 20 44115498 44115498 1 -888 -888 A A/A A/T A>T -999 -999 14 2 2 -888 -999 -999 A:10[40.8],7[40.29],T:7[40.16],3[35.22] A:5[38.21],3[30.25],T:3[40],3[40] UPSTREAM,DOWNSTREAM -888,-888 -888,-888 -888,-888 ENSG00000237464|ENSG00000237068 ENST00000417630|ENST00000429598 61 -999 -888|RPL5P2 -888|-888 -888|-888 -888|-888 A/T chr20:44115498-44115498"; s = s.replaceAll("\\s+", "\t"); s+= "\tmutation also found in pileup of normal; mutation is a germline variant in another patient"; - data.setData(s); - String [] params = TabTokenizer.tokenize(data.getData()); + String data = s; + String [] params = TabTokenizer.tokenize(data); assertEquals("mutation also found in pileup of normal; mutation is a germline variant in another patient", params[36]); MafUtils.convertDccToMaf(data, "APGI_2057", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez, false, false); assertEquals(1, mafs.size()); -// assertEquals(0, mafs.size()); s = "APGI_2057_SNP_3260617 1 15 102211884 102211884 1 -888 -888 T T/T A/T T>A -999 -999 7 2 2 -888 -999 -999 T:8[24.83],20[39.34],A:3[21.49],0[0],G:1[16],0[0] T:5[11.03],1[36],A:1[23],0[0] NON_SYNONYMOUS_CODING,NON_SYNONYMOUS_CODING Y619F,Y524F 2073T>A,1626T>A -888,-888 ENSG00000185418 ENST00000335968,ENST00000333018 61 -999 TARSL2 TIGR00418_PS50862,PS50862_TIGR00418 tigrfam_pfscan,pfscan_tigrfam Thr-tRNA-synth_IIa__Aminoacyl-tRNA-synth_II,Aminoacyl-tRNA-synth_II__Thr-tRNA-synth_IIa T/A chr15:102211884-102211884"; s = s.replaceAll("\\s+", "\t"); s+= "\tmutation also found in pileup of normal; mutation is a germline variant in another patient"; - data.setData(s); - params = TabTokenizer.tokenize(data.getData()); + data = s; + params = TabTokenizer.tokenize(data); assertEquals("mutation also found in pileup of normal; mutation is a germline variant in another patient", params[36]); MafUtils.convertDccToMaf(data, "APGI_2057", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez, false, false); assertEquals(2, mafs.size()); -// assertEquals(1, mafs.size()); -// System.out.println("mafs.get(0).getVariantClassification: " + mafs.get(0).getVariantClassification()); -// assertEquals("", mafs.get(0).getVariantClassification()); } @Test public void testRealLifeData() throws QMafException { List mafs = new ArrayList(); Map> ensemblToEntrez = new HashMap>(); - TabbedRecord data = new TabbedRecord(); String s = "AOCS_066_SNP_3124 1 1 115256530 115256530 1 G/T -1 G G/G G/T G>T -999 -999 1.2420510993064712E-22 110 1 2 -888 rs121913254 -999 G:25[34.12],67[36.06] G:10[33.2],31[33.35],T:16[39.62],53[38.58] 44 missense_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant Q61K;Q61K;Q61K;Q61K;Q61K;Q61K;Q61K;Q61K;Q61K,-888,-888,-888,-888,-888,-888,-888,-888 435G>T;435G>T;435G>T;435G>T;435G>T;435G>T;435G>T;435G>T;435G>T,-888,-888,-888,-888,-888,-888,-888,-888 PF00071;PF08477;PF00025;PF00009;TIGR00231;PR00449;SM00173;SM00175;SM00174 ENSG00000213281,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307 ENST00000369535,ENST00000339438,ENST00000438362,ENST00000358528,ENST00000261443,ENST00000530886,ENST00000369530,ENST00000483407,ENST00000534699 70 -999 NRAS,CSDE1,CSDE1,CSDE1,CSDE1,CSDE1,CSDE1,CSDE1,CSDE1 PF00071;PF08477;PF00025;PF00009;TIGR00231;PR00449;SM00173;SM00175;SM00174 pfam;pfam;pfam;pfam;tigrfam;prints;smart;smart;smart Small_GTPase;MIRO-like;Small_GTPase_ARF/SAR;EF_GTP-bd_dom;Small_GTP-bd_dom;Small_GTPase;Small_GTPase_Ras;Small_GTPase_Rab_type;Small_GTPase_Rho chr1:115256530-115256530 PASS TTCTTTTCCAG"; s = s.replaceAll("\\s+", "\t"); - data.setData(s); + String data = s; MafUtils.convertDccToMaf(data, "AOCS_066", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez, true, true); @@ -491,7 +482,6 @@ public void testRealLifeData() throws QMafException { assertEquals(true, DccConsequence.passesMafNameFilter(maf.getVariantClassification())); // high conf filter -// String variant = maf.getRef().equals(maf.getTumourAllele1()) ? maf.getTumourAllele2() : maf.getTumourAllele1(); char alt = MafUtils.getVariant(maf).charAt(0); assertEquals(true, MafUtils.passesHighConfidenceFilter(maf.getFlag(), maf.getVariantType(), maf.getTd(), true , alt)); @@ -503,10 +493,10 @@ public void testRealLifeData() throws QMafException { public void testRealLifeData2() throws QMafException { List mafs = new ArrayList(); Map> ensemblToEntrez = new HashMap>(); - TabbedRecord data = new TabbedRecord(); + String s = "AOCS_066_SNP_5524 1 1 196762515 196762515 1 -888 -888 A A/A A/C A>C -999 -999 1.0515508456799864E-8 99 2 2 -888 -999 -999 A:94[38.29],4[38.25] A:71[35.69],3[40],C:24[38.54],1[37] 19 missense_variant,downstream_gene_variant,3_prime_UTR_variant,NMD_transcript_variant,missense_variant,non_coding_exon_variant,nc_transcript_variant K289Q,-888,-888,-888,K228Q,-888,-888 957A>C,-888,1139A>C,1139A>C,753A>C,537A>C,537A>C SSF57535,SSF57535 ENSG00000116785,ENSG00000116785,ENSG00000116785,ENSG00000116785,ENSG00000116785,ENSG00000116785,ENSG00000116785 ENST00000367425,ENST00000471440,ENST00000367427,ENST00000367427,ENST00000391985,ENST00000461558,ENST00000461558 70 -999 CFHR3,CFHR3,CFHR3,CFHR3,CFHR3,CFHR3,CFHR3 SSF57535,SSF57535 superfamily,superfamily Complement_control_module,Complement_control_module chr1:196762515-196762515 PASS ACAGACAATAT"; s = s.replaceAll("\\s+", "\t"); - data.setData(s); + String data = s; MafUtils.convertDccToMaf(data, "AOCS_066", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez, true, true); @@ -522,7 +512,6 @@ public void testRealLifeData2() throws QMafException { maf.setMafType(MafType.SNV_SOMATIC); // high conf filter -// String variant = maf.getRef().equals(maf.getTumourAllele1()) ? maf.getTumourAllele2() : maf.getTumourAllele1(); char alt = MafUtils.getVariant(maf).charAt(0); assertEquals(true, MafUtils.passesHighConfidenceFilter(maf.getFlag(), maf.getVariantType(), maf.getTd(), true , alt)); @@ -530,40 +519,14 @@ public void testRealLifeData2() throws QMafException { assertEquals(true, MafUtils.passesLowerConfidenceFilter(maf.getFlag(), maf.getVariantType(), maf.getTd(), alt)); } -// @Ignore -// public void testRealLifeData2() throws QMafException { -// List mafs = new ArrayList(); -// Map> ensemblToEntrez = new HashMap>(); -// TabbedRecord data = new TabbedRecord(); -// String s = "APGI_2179_SNP_3228926 1 12 25398284 25398284 1 A/C -1 C C/CA/C C>A -999 -999 42 1 2 -888 rs121913529 -999 C:21[37.62],21[37.67] A:14[36.57],18[34.17],C:5[36.2],5[32.2] 28 NON_SYNONYMOUS_CODING,NON_SYNONYMOUS_CODING,NON_SYNONYMOUS_CODING,NON_SYNONYMOUS_CODINGG12V;G12V;G12V;G12V;G12V;G12V;G12V;G12V,G12V;G12V;G12V,G12V;G12V;G12V;G12V;G12V;G12V;G12V;G12V;G12V,G12V;G12V 227C>A;227C>A;227C>A;227C>A;227C>A;227C>A;227C>A;227C>A,232C>A;232C>A;232C>A,99C>A;99C>A;99C>A;99C>A;99C>A;99C>A;99C>A;99C>A;99C>A,212C>A;212C>APF00071;PF08477;PF00025;TIGR00231;PR00449;SM00173;SM00175;SM00174,PF00071;PR00449;SM00173,PF00071;PF08477;PF00025;PR00449;TIGR00231;SM00173;SM00175;SM00174;SM00176,PF00071;PR00449 ENSG00000133703,ENSG00000133703,ENSG00000133703,ENSG00000133703 ENST00000311936,ENST00000557334,ENST00000256078,ENST00000556131 66 -999 KRAS,KRAS,KRAS,KRAS PF00071;PF08477;PF00025;TIGR00231;PR00449;SM00173;SM00175;SM00174,PF00071;PR00449;SM00173,PF00071;PF08477;PF00025;PR00449;TIGR00231;SM00173;SM00175;SM00174;SM00176,PF00071;PR00449 pfam;pfam;pfam;tigrfam;prints;smart;smart;smart,pfam;prints;smart,pfam;pfam;pfam;prints;tigrfam;smart;smart;smart;smart,pfam;prints Small_GTPase;MIRO-like;Small_GTPase_ARF/SAR;Small_GTP-bd_dom;Small_GTPase;Small_GTPase_Ras;Small_GTPase_Rab_type;Small_GTPase_Rho,Small_GTPase;Small_GTPase;Small_GTPase_Ras,Small_GTPase;MIRO-like;Small_GTPase_ARF/SAR;Small_GTPase;Small_GTP-bd_dom;Small_GTPase_Ras;Small_GTPase_Rab_type;Small_GTPase_Rho;Ran_GTPase,Small_GTPase;Small_GTPase chr12:25398284-25398284 PASS;GERM CGCCAACAGCT"; -//// s = s.replaceAll("\\s+", "\t"); -// data.setData(s); -// -// MafUtils.convertDccToMaf(data, "APGI_2179", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez, false, false); -// -// assertEquals(1, mafs.size()); -// MAFRecord maf = mafs.get(0); -// -// // name filter -// assertEquals(true, DccConsequence.passesMafNameFilter(maf.getVariantClassification())); -// -// // high conf filter -//// String variant = maf.getRef().equals(maf.getTumourAllele1()) ? maf.getTumourAllele2() : maf.getTumourAllele1(); -// char alt = MafUtils.getVariant(maf); -// assertEquals(true, MafUtils.passesHighConfidenceFilter(maf.getFlag(), maf.getVariantType(), maf.getTd(), true , alt)); -// -// // low conf filter -// assertEquals(true, MafUtils.passesLowerConfidenceFilter(maf.getFlag(), maf.getVariantType(), maf.getTd(), alt)); -// } - @Test public void testRealLifeData3() throws QMafException { List mafs = new ArrayList(); Map> ensemblToEntrez = new HashMap>(); - TabbedRecord data = new TabbedRecord(); + String s = "AOCS_067_SNP_20521 1 5 94784186 94784186 1 -888 -888 A A/A A/C A>C -999 -999 0.12844960008474762 19 2 2 -888 -999 -999 A:1[18],11[28] A:0[0],14[27.5],C:0[0],5[26] 4 intron_variant,intron_variant,NMD_transcript_variant,downstream_gene_variant,intron_variant,intron_variant,NMD_transcript_variant -888,-888,-888,-888,-888,-888,-888 -888,-888,-888,-888,-888,-888,-888 -888 ENSG00000153347,ENSG00000153347,ENSG00000153347,ENSG00000153347,ENSG00000153347,ENSG00000153347,ENSG00000153347 ENST00000283357,ENST00000507832,ENST00000507832,ENST00000503361,ENST00000512365,ENST00000513110,ENST00000513110 70 -999 FAM81B,FAM81B,FAM81B,FAM81B,FAM81B,FAM81B,FAM81B -- -- -- chr5:94784186-94784186 PASS CTTTTCTTAAG"; s = s.replaceAll("\\s+", "\t"); - data.setData(s); + String data = s; MafUtils.convertDccToMaf(data, "AOCS_067", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez, true, true); @@ -578,7 +541,6 @@ public void testRealLifeData3() throws QMafException { maf.setMafType(MafType.SNV_SOMATIC); // high conf filter -// String variant = maf.getRef().equals(maf.getTumourAllele1()) ? maf.getTumourAllele2() : maf.getTumourAllele1(); char alt = MafUtils.getVariant(maf).charAt(0); assertEquals(true, MafUtils.passesHighConfidenceFilter(maf.getFlag(), maf.getVariantType(), maf.getTd(), true , alt)); @@ -592,7 +554,7 @@ public void testRealLifeData3() throws QMafException { s = "AOCS_067_SNP_38220 1 11 5373331 5373331 1 -888 -888 T T/T G/T T>G -999 -999 0.001102600781824035 32 2 2 -888 -999 -999 T:29[36.79],58[35.22] G:0[0],5[28.6],T:8[40],19[28.95] 4 intron_variant,intron_variant,intron_variant,intron_variant,intron_variant,nc_transcript_variant,intron_variant,nc_transcript_variant,intron_variant,nc_transcript_variant,intron_variant,nc_transcript_variant,synonymous_variant,upstream_gene_variant -888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,V198V;V198V;V198V,-888 -888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,-888,594T>G;594T>G;594T>G,-888 PF00001;PF10320;PS50262 ENSG00000196565,ENSG00000196565,ENSG00000213931,ENSG00000213931,,,,,,,,,ENSG00000176239, ENST00000380252,ENST00000380259,ENST00000380237,ENST00000396895,ENST00000420465,ENST00000420465,ENST00000415970,ENST00000415970,ENST00000420726,ENST00000420726,ENST00000418729,ENST00000418729,ENST00000380219,ENST00000450768 70 -999 HBG2,HBG2,HBE1,HBE1,,,,,,,,,OR51B6, PF00001;PF10320;PS50262 pfam;pfam;pfscan GPCR_Rhodpsn;7TM_GPCR_olfarory/Srsx;GPCR_Rhodpsn_7TM chr11:5373331-5373331 PASS CCAGTGGTAGT"; s = s.replaceAll("\\s+", "\t"); - data.setData(s); + data = s; MafUtils.convertDccToMaf(data, "AOCS_067", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez, true, true); maf = mafs.get(0); assertEquals("Silent", maf.getVariantClassification()); @@ -602,7 +564,6 @@ public void testRealLifeData3() throws QMafException { maf.setMafType(MafType.SNV_SOMATIC); // high conf filter -// variant = maf.getRef().equals(maf.getTumourAllele1()) ? maf.getTumourAllele2() : maf.getTumourAllele1(); alt = MafUtils.getVariant(maf).charAt(0); assertEquals(true, MafUtils.passesHighConfidenceFilter(maf.getFlag(), maf.getVariantType(), maf.getTd(), true , alt)); @@ -616,10 +577,10 @@ public void testRealLifeData3() throws QMafException { public void testRealLifeData4() throws QMafException { List mafs = new ArrayList(); Map> ensemblToEntrez = new HashMap>(); - TabbedRecord data = new TabbedRecord(); + String s = "APGI_1594_SNP_2943597 1 11 62373211 62373211 1 -888 -888 T T/T G/T T>G -999 -999 0.060573922747081455 40 2 2 -888 -999 -999 G:1[6],0[0],T:18[22.72],16[37.69] G:5[7.6],2[21.5],T:11[21.45],22[35.55] 7 upstream_gene_variant,missense,3_prime_UTR_variant,non_coding_exon_variant,missense,non_coding_exon_variant,missense,missense,missense,non_coding_exon_variant,missense,upstream_gene_variant,upstream_gene_variant,upstream_gene_variant,downstream_gene_variant,downstream_gene_variant,upstream_gene_variant,upstream_gene_variant,upstream_gene_variant -888,H/D,-888,-888,H/D,-888,H/D,H/D,H/D,-888,H/D,-888,-888,-888,-888,-888,-888,-888,-888 -888,Cac/Gac,-888,-888,Cac/Gac,-888,Cac/Gac,Cac/Gac,Cac/Gac,-888,Cac/Gac,-888,-888,-888,-888,-888,-888,-888,-888 -888 ENSG00000149480,ENSG00000149499,ENSG00000149499,ENSG00000149499,ENSG00000149499,ENSG00000149499,ENSG00000149499,ENSG00000149499,ENSG00000149499,ENSG00000149499,ENSG00000149499,ENSG00000149499,ENSG00000149499,ENSG00000149499,ENSG00000149499,ENSG00000149499,ENSG00000254964,ENSG00000149480,ENSG00000149480 ENST00000278823,ENST00000278845,ENST00000494448,ENST00000483199,ENST00000529309,ENST00000526116,ENST00000394776,ENST00000531557,ENST00000494176,ENST00000460939,ENST00000394773,ENST00000439994,ENST00000533165,ENST00000524518,ENST00000462626,ENST00000438258,ENST00000532626,ENST00000527204,ENST00000526844 70 -999 MTA2,EML3,EML3,EML3,EML3,EML3,EML3,EML3,EML3,EML3,EML3,EML3,EML3,EML3,EML3,EML3,RP11-831H9.3,MTA2,MTA2 -- -- -- chr11:62373211-62373211 MIN AGGGGGGTGTG"; s = s.replaceAll("\\s+", "\t"); - data.setData(s); + String data = s; MafUtils.convertDccToMaf(data, "APGI_1594", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez, true, true); @@ -633,10 +594,10 @@ public void testRealLifeData4() throws QMafException { public void testRealLifeData5() throws QMafException { List mafs = new ArrayList(); Map> ensemblToEntrez = new HashMap>(); - TabbedRecord data = new TabbedRecord(); + String s = "75edf18c_801c_48ae_8acf_44c7c87de319_SNP_5212932.3134 4 1 12907507 12907508 1 / -888 CT CT/CT CC/TT CT>TC -999 -999 -999 113 2 2 -888 -999 -999 ALL:1+12-;REF:1+12-;ALT:0+0+0-0-; ALL:11+12-;REF:11+12-;ALT:0+0+0-0-; 27 missense_variant E212G 861CT>TC PIRSF037992 ENSG00000179172 ENST00000317869 70 -999 HNRNPCL1 PIRSF037992 pirsf hnRNP_C_Raly chr1:12907507-12907508 PASS --"; s = s.replaceAll("\\s+", "\t"); - data.setData(s); + String data = s; MafUtils.convertDccToMaf(data, "APGI_1594", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez, true, true); @@ -645,18 +606,17 @@ public void testRealLifeData5() throws QMafException { assertEquals("Missense_Mutation", maf.getVariantClassification()); assertEquals("HNRNPCL1", maf.getHugoSymbol()); - MafFilterUtils.classifyMAFRecord(maf); - //assertEquals(MafConfidence.ZERO, maf.getConfidence()); + MafFilterUtils.classifyMAFRecord(maf); //assertEquals(MafConfidence.ZERO, maf.getConfidence()); } @Ignore public void testRealLifeDataGermline() throws QMafException { List mafs = new ArrayList(); Map> ensemblToEntrez = new HashMap>(); - TabbedRecord data = new TabbedRecord(); + String s = "OESO_1020_SNP_3 1 1 10109 10109 1 -888 -888 A A/T A/T -999 -999 -999 170 2 2 -888 -999 -999 A:90[14.17],41[34.61],C:1[6],0[0],G:1[2],0[0],T:19[21.42],18[31.39] A:72[14.14],46[32.37],C:5[2],0[0],T:11[23.09],19[28.47] 30 UPSTREAM,UPSTREAM,UPSTREAM,UPSTREAM,DOWNSTREAM,DOWNSTREAM,DOWNSTREAM,DOWNSTREAM,DOWNSTREAM -888,-888,-888,-888,-888,-888,-888,-888,-888 -888,-888,-888,-888,-888,-888,-888,-888,-888 -888 ENSG00000223972,ENSG00000223972,ENSG00000223972,ENSG00000223972,ENSG00000227232,ENSG00000227232,ENSG00000227232,ENSG00000227232,ENSG00000227232 ENST00000456328,ENST00000515242,ENST00000518655,ENST00000450305,ENST00000438504,ENST00000541675,ENST00000423562,ENST00000488147,ENST00000538476 66 -999 DDX11L1,DDX11L1,DDX11L1,DDX11L1,WASH7P,WASH7P,WASH7P,WASH7P,WASH7P -- -- -- chr1:10109-10109 PASS AACCCTACCCT A>T"; s = s.replaceAll("\\s+", "\t"); - data.setData(s); + String data = s; MafUtils.convertGermlineDccToMaf(data, "OESO_1020", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez); @@ -679,7 +639,6 @@ public void testRealLifeDataGermline() throws QMafException { assertEquals("A", maf.getRef()); assertEquals("ENST00000456328", maf.getCanonicalTranscriptId()); assertEquals("DDX11L1", maf.getHugoSymbol()); -// assertEquals("A", maf.getEntrezGeneId()); // name filter assertEquals(false, DccConsequence.passesMafNameFilter(maf.getVariantClassification())); @@ -689,10 +648,10 @@ public void testRealLifeDataGermline() throws QMafException { public void testRealLifeDataGermline2() throws QMafException { List mafs = new ArrayList(); Map> ensemblToEntrez = new HashMap>(); - TabbedRecord data = new TabbedRecord(); + String s = "0b09a9d1_7d96_4159_881a_a4aaf52ac3e9_SNP_2 1 1 10109 10109 1 -888 -888 A A/T A/T -999 -999 -999 168 2 2 -888 -999 -999 A:88[14.47],29[36.59],C:4[4],0[0],T:16[15.62],31[32.58] A:142[13.49],52[35.42],C:1[2],0[0],T:39[23.08],43[32.47] 30 upstream_gene_variant,upstream_gene_variant,upstream_gene_variant,upstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant -888 -888 -888 ENSG00000223972,ENSG00000223972,ENSG00000223972,ENSG00000223972,ENSG00000227232,ENSG00000227232,ENSG00000227232,ENSG00000227232,ENSG00000227232 ENST00000456328,ENST00000515242,ENST00000518655,ENST00000450305,ENST00000438504,ENST00000541675,ENST00000423562,ENST00000488147,ENST00000538476 70 -999 DDX11L1,DDX11L1,DDX11L1,DDX11L1,WASH7P,WASH7P,WASH7P,WASH7P,WASH7P -- -- -- chr1:10109-10109 PASS AACCCTACCCT A>T"; s = s.replaceAll("\\s+", "\t"); - data.setData(s); + String data = s; MafUtils.convertGermlineDccToMaf(data, "APGI_2027", "controlSampleID", "tumourSampleID", null, mafs, ensemblToEntrez); @@ -715,7 +674,6 @@ public void testRealLifeDataGermline2() throws QMafException { assertEquals("A", maf.getRef()); assertEquals("ENST00000456328", maf.getCanonicalTranscriptId()); assertEquals("DDX11L1", maf.getHugoSymbol()); -// assertEquals("A", maf.getEntrezGeneId()); // name filter assertEquals(false, DccConsequence.passesMafNameFilter(maf.getVariantClassification())); diff --git a/qsignature/src/org/qcmg/sig/CompareIlluminaData.java b/qsignature/src/org/qcmg/sig/CompareIlluminaData.java index f3c779aa5..bc83b19ba 100644 --- a/qsignature/src/org/qcmg/sig/CompareIlluminaData.java +++ b/qsignature/src/org/qcmg/sig/CompareIlluminaData.java @@ -17,9 +17,8 @@ import org.qcmg.common.model.ChrPointPosition; import org.qcmg.common.model.ChrPosition; import org.qcmg.common.util.FileUtils; -import org.qcmg.illumina.IlluminaFileReader; -import org.qcmg.illumina.IlluminaRecord; -import org.qcmg.record.Record; +import org.qcmg.qio.illumina.IlluminaFileReader; +import org.qcmg.qio.illumina.IlluminaRecord; /** * This class takes in 2 Illumina snp chip files as input @@ -112,25 +111,24 @@ private void compareIlluminaData() { static void loadIlluminaData(File illuminaFile, Map illuminaMap) throws IOException { try (IlluminaFileReader reader = new IlluminaFileReader(illuminaFile);) { - IlluminaRecord tempRec; - for (Record rec : reader) { - tempRec = (IlluminaRecord) rec; - + + for (IlluminaRecord rec : reader) { + // only interested in illumina data if it has a gc score above 0.7, and a valid chromosome // get XY, 0 for chromosome // ignore chromosome 0, and for XY, create 2 records, one for each! - if (null != tempRec.getChr() && ! "0".equals(tempRec.getChr()) && tempRec.getGCScore() > 0.6999 ) { + if (null != rec.getChr() && ! "0".equals(rec.getChr()) && rec.getGCScore() > 0.6999 ) { - if ("XY".equals(tempRec.getChr())) { + if ("XY".equals(rec.getChr())) { // add both X and Y to map - illuminaMap.put(ChrPointPosition.valueOf("chrX", tempRec.getStart()), tempRec); - illuminaMap.put(ChrPointPosition.valueOf("chrY", tempRec.getStart()), tempRec); + illuminaMap.put(ChrPointPosition.valueOf("chrX", rec.getStart()), rec); + illuminaMap.put(ChrPointPosition.valueOf("chrY", rec.getStart()), rec); continue; } // Illumina record chromosome does not contain "chr", whereas the positionRecordMap does - add - illuminaMap.put(ChrPointPosition.valueOf("chr" + tempRec.getChr(), tempRec.getStart()), tempRec); + illuminaMap.put(ChrPointPosition.valueOf("chr" + rec.getChr(), rec.getStart()), rec); } } } diff --git a/qsignature/src/org/qcmg/sig/QSigCompare.java b/qsignature/src/org/qcmg/sig/QSigCompare.java index bd83d16e9..a12184013 100644 --- a/qsignature/src/org/qcmg/sig/QSigCompare.java +++ b/qsignature/src/org/qcmg/sig/QSigCompare.java @@ -33,10 +33,8 @@ import org.qcmg.common.model.ChrPosition; import org.qcmg.common.util.FileUtils; import org.qcmg.common.util.TabTokenizer; +import org.qcmg.qio.record.StringFileReader; import org.qcmg.sig.util.SignatureUtil; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; import org.w3c.dom.Document; import org.w3c.dom.Element; /** @@ -117,7 +115,7 @@ private void writeOutput() { String [] fileAttributes = null; // TabbedFileReader reader = null; - try (TabbedFileReader reader = new TabbedFileReader(f)){ + try (StringFileReader reader = new StringFileReader(f)){ fileAttributes = getDetailsFromVCFHeader(reader.getHeader()); } catch (Exception e) { logger.error("Couldn't retrieve file attributes", e); @@ -197,14 +195,14 @@ private void selectFilesForComparison() throws Exception { } private void doComparison(File f1, File f2) throws Exception { - TabbedFileReader vcf1 = new TabbedFileReader(f1); - TabbedFileReader vcf2 = new TabbedFileReader(f2); + StringFileReader vcf1 = new StringFileReader(f1); + StringFileReader vcf2 = new StringFileReader(f2); String [] s1PatientAndType = null; String [] s2PatientAndType = null; try { - TabbedHeader vcfHeader1 = vcf1.getHeader(); - TabbedHeader vcfHeader2 = vcf2.getHeader(); + List vcfHeader1 = vcf1.getHeader(); + List vcfHeader2 = vcf2.getHeader(); s1PatientAndType = getDetailsFromVCFHeader(vcfHeader1); s2PatientAndType = getDetailsFromVCFHeader(vcfHeader2); @@ -337,13 +335,13 @@ public static float compareRatios(final Map file1Ratios, return totalDifference; } - private Map loadRatiosFromFile(TabbedFileReader reader) { + private Map loadRatiosFromFile(StringFileReader reader) { logger.info("loading ratios from file: " + reader.getFile().getAbsolutePath()); Map ratios = new HashMap(); - for (TabbedRecord vcfRecord : reader) { - String[] params = TabTokenizer.tokenize(vcfRecord.getData()); + for (String vcfRecord : reader) { + String[] params = TabTokenizer.tokenize(vcfRecord); ChrPosition chrPos = ChrPointPosition.valueOf(params[0], Integer.parseInt(params[1])); char ref = params[3].charAt(0); if ('-' == ref || '.' == ref) { @@ -411,13 +409,13 @@ public static double getDiscretisedValue(double initialValue) { return DOUBLE_NAN; } - public static String [] getDetailsFromVCFHeader(TabbedHeader header) { + public static String [] getDetailsFromVCFHeader(List header) { String patient = null; String library = null; String inputType = null; String snpFile = null; - for (Iterator iter = header.iterator() ; iter.hasNext() ; ) { - String headerLine = iter.next(); + //for (Iterator iter = header.iterator() ; iter.hasNext() ; ) { + for (String headerLine : header) { if (headerLine.contains("patient_id")) { patient = headerLine.substring(headerLine.indexOf("=") + 1); } diff --git a/qsignature/src/org/qcmg/sig/QSigCompareDistance.java b/qsignature/src/org/qcmg/sig/QSigCompareDistance.java index 1a9279284..18d2bf0ba 100644 --- a/qsignature/src/org/qcmg/sig/QSigCompareDistance.java +++ b/qsignature/src/org/qcmg/sig/QSigCompareDistance.java @@ -35,11 +35,9 @@ import org.qcmg.common.util.BaseUtils; import org.qcmg.common.util.FileUtils; import org.qcmg.common.util.TabTokenizer; +import org.qcmg.qio.record.StringFileReader; import org.qcmg.sig.model.Comparison; import org.qcmg.sig.util.SignatureUtil; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -221,16 +219,14 @@ private void loadFileMetaData(File file) throws Exception { String[] metadata = fileStats.get(file); if (null == metadata) { metadata = new String[5]; - TabbedFileReader vcf1 = new TabbedFileReader(file); - try { - TabbedHeader vcfHeader1 = vcf1.getHeader(); + + try (StringFileReader vcf1 = new StringFileReader(file);){ + List vcfHeader1 = vcf1.getHeader(); String [] vcfHeaderDetails = QSigCompare.getDetailsFromVCFHeader(vcfHeader1); for (int i = 0 ; i < vcfHeaderDetails.length ; i++) { metadata[i] = vcfHeaderDetails[i]; } - } finally { - vcf1.close(); - } + } fileStats.put(file, metadata); } } @@ -471,7 +467,7 @@ public static Comparison compareRatiosFloat(final Map file } private Map loadRatiosFromFile(File file) throws Exception { - TabbedFileReader reader = new TabbedFileReader(file); + StringFileReader reader = new StringFileReader(file); Map ratios = null; int zeroCov = 0, invalidRefCount = 0; try { @@ -481,8 +477,8 @@ private Map loadRatiosFromFile(File file) throws Exceptio ratios = new HashMap<>(); - for (TabbedRecord vcfRecord : reader) { - String[] params = TabTokenizer.tokenize(vcfRecord.getData()); + for (String vcfRecord : reader) { + String[] params = TabTokenizer.tokenize(vcfRecord); ChrPosition chrPos = ChrPointPosition.valueOf(params[0], Integer.parseInt(params[1])); if (populateSnps) { diff --git a/qsignature/src/org/qcmg/sig/SignatureGenerator.java b/qsignature/src/org/qcmg/sig/SignatureGenerator.java index 054f47583..cfa2d2477 100644 --- a/qsignature/src/org/qcmg/sig/SignatureGenerator.java +++ b/qsignature/src/org/qcmg/sig/SignatureGenerator.java @@ -43,18 +43,16 @@ import org.qcmg.common.vcf.header.VcfHeader; import org.qcmg.common.vcf.header.VcfHeaderRecord; import org.qcmg.common.vcf.header.VcfHeaderUtils; -import org.qcmg.illumina.IlluminaFileReader; -import org.qcmg.illumina.IlluminaRecord; import org.qcmg.picard.SAMFileReaderFactory; import org.qcmg.picard.util.BAMFileUtils; import org.qcmg.picard.util.PileupElementUtil; import org.qcmg.picard.util.SAMUtils; -import org.qcmg.record.Record; +import org.qcmg.qio.illumina.IlluminaFileReader; +import org.qcmg.qio.illumina.IlluminaRecord; +import org.qcmg.qio.record.StringFileReader; +import org.qcmg.qio.record.RecordWriter; import org.qcmg.sig.model.BaseStrandPosition; import org.qcmg.sig.util.SignatureUtil; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; -import org.qcmg.vcf.VCFFileWriter; import gnu.trove.set.TIntSet; import gnu.trove.set.hash.TIntHashSet; @@ -235,9 +233,9 @@ private void loadIlluminaArraysDesign() throws IOException { // check that we can read the file if (null != illumiaArraysDesign && FileUtils.canFileBeRead(illumiaArraysDesign)) { - try (TabbedFileReader reader = new TabbedFileReader(new File(illumiaArraysDesign));) { - for (final TabbedRecord rec : reader) { - final String [] params = TabTokenizer.tokenize(rec.getData()); + try (StringFileReader reader = new StringFileReader(new File(illumiaArraysDesign));) { + for (final String rec : reader) { + final String [] params = TabTokenizer.tokenize(rec); final String id = params[0]; illuminaArraysDesign.put(id, params); } @@ -248,11 +246,9 @@ private void loadIlluminaArraysDesign() throws IOException { } static void loadIlluminaData(File illuminaFile, Map illuminaMap) throws IOException { - IlluminaRecord tempRec; + try (IlluminaFileReader reader = new IlluminaFileReader(illuminaFile);){ - for (final Record rec : reader) { - tempRec = (IlluminaRecord) rec; - + for (final IlluminaRecord tempRec : reader) { // only interested in illumina data if it has a gc score above 0.7, and a valid chromosome // ignore chromosome 0, and for XY, create 2 records, one for each! // skip if the B allele ratio or Log R ratios are NaN @@ -354,7 +350,7 @@ private void writeVCFOutput(File bamFile, VcfHeader header) throws Exception { // check that can wriite to new file if (FileUtils.canFileBeWrittenTo(outputVCFFile)) { - try (VCFFileWriter writer = new VCFFileWriter(outputVCFFile, true);){ + try (RecordWriter writer = new RecordWriter<>(outputVCFFile, true);){ // write header for(final VcfHeaderRecord re: header) { writer.addHeader(re.toString() ); @@ -601,10 +597,10 @@ private void updateResults(VcfRecord vcf, SAMRecord sam) { private void loadRandomSnpPositions(String randomSnpsFile) throws IOException { int count = 0; - try (TabbedFileReader reader = new TabbedFileReader(new File(randomSnpsFile));){ - for (final TabbedRecord rec : reader) { + try (StringFileReader reader = new StringFileReader(new File(randomSnpsFile));){ + for (final String rec : reader) { ++count; - final String[] params = TabTokenizer.tokenize(rec.getData()); + final String[] params = TabTokenizer.tokenize(rec); String ref = null; if (params.length > 4 && null != params[4] && params[4].length() == 1) { diff --git a/qsignature/src/org/qcmg/sig/SignatureGeneratorBespoke.java b/qsignature/src/org/qcmg/sig/SignatureGeneratorBespoke.java index 085775285..2e1130aaa 100644 --- a/qsignature/src/org/qcmg/sig/SignatureGeneratorBespoke.java +++ b/qsignature/src/org/qcmg/sig/SignatureGeneratorBespoke.java @@ -45,15 +45,13 @@ import org.qcmg.common.util.TabTokenizer; import org.qcmg.common.vcf.VcfRecord; import org.qcmg.common.vcf.header.VcfHeaderUtils; -import org.qcmg.illumina.IlluminaFileReader; -import org.qcmg.illumina.IlluminaRecord; import org.qcmg.picard.SAMFileReaderFactory; import org.qcmg.picard.util.BAMFileUtils; import org.qcmg.picard.util.SAMUtils; -import org.qcmg.record.Record; +import org.qcmg.qio.illumina.IlluminaFileReader; +import org.qcmg.qio.illumina.IlluminaRecord; +import org.qcmg.qio.record.StringFileReader; import org.qcmg.sig.util.SignatureUtil; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; import gnu.trove.map.TObjectIntMap; import gnu.trove.map.hash.THashMap; @@ -298,9 +296,9 @@ private void loadIlluminaArraysDesign() throws IOException { // check that we can read the file if (null != illumiaArraysDesign && FileUtils.canFileBeRead(illumiaArraysDesign)) { - try (TabbedFileReader reader= new TabbedFileReader(new File(illumiaArraysDesign));) { - for (final TabbedRecord rec : reader) { - final String [] params = TabTokenizer.tokenize(rec.getData()); + try (StringFileReader reader= new StringFileReader(new File(illumiaArraysDesign));) { + for (final String rec : reader) { + final String [] params = TabTokenizer.tokenize(rec); final String id = params[0]; illuminaArraysDesignMap.put(id, params); } @@ -311,10 +309,9 @@ private void loadIlluminaArraysDesign() throws IOException { } static void loadIlluminaData(File illuminaFile, Map illuminaMap) throws IOException { - IlluminaRecord tempRec; + ; try (IlluminaFileReader reader = new IlluminaFileReader(illuminaFile);) { - for (final Record rec : reader) { - tempRec = (IlluminaRecord) rec; + for (final IlluminaRecord tempRec : reader) { // only interested in illumina data if it has a gc score above 0.7, and a valid chromosome // ignore chromosome 0, and for XY, create 2 records, one for each! diff --git a/qsignature/src/org/qcmg/sig/SnpFileDetails.java b/qsignature/src/org/qcmg/sig/SnpFileDetails.java index f6f521cac..6dd84c053 100644 --- a/qsignature/src/org/qcmg/sig/SnpFileDetails.java +++ b/qsignature/src/org/qcmg/sig/SnpFileDetails.java @@ -17,8 +17,7 @@ import org.qcmg.common.util.FileUtils; import org.qcmg.common.util.TabTokenizer; import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.record.StringFileReader; /** * This class returns some (very) basic details on the snp positions file. @@ -40,23 +39,23 @@ private int engage() throws IOException { } private void loadRandomSnpPositions(String randomSnpsFile) throws IOException { - TabbedFileReader reader = new TabbedFileReader(new File(randomSnpsFile)); + StringFileReader reader = new StringFileReader(new File(randomSnpsFile)); try { int count = 0, emptyRefCount = 0, dashRef = 0; - for (TabbedRecord rec : reader) { + for (String rec : reader) { ++count; - String[] params = TabTokenizer.tokenize(rec.getData()); + String[] params = TabTokenizer.tokenize(rec); String ref = null; if (params.length > 4 && ! StringUtils.isNullOrEmpty(params[4])) { ref = params[4]; if ("-".equals(ref) || ".".equals(ref)) { dashRef++; - logger.info("dash ref: " + rec.getData()); + logger.info("dash ref: " + rec); } } else { emptyRefCount++; - logger.info("empty ref: " + rec.getData()); + logger.info("empty ref: " + rec); } String chr = params[0]; diff --git a/qsignature/src/org/qcmg/sig/util/SignatureUtil.java b/qsignature/src/org/qcmg/sig/util/SignatureUtil.java index e16cede6b..447ae98ca 100644 --- a/qsignature/src/org/qcmg/sig/util/SignatureUtil.java +++ b/qsignature/src/org/qcmg/sig/util/SignatureUtil.java @@ -53,13 +53,11 @@ import org.qcmg.common.util.TabTokenizer; import org.qcmg.common.vcf.VcfRecord; import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.illumina.IlluminaRecord; +import org.qcmg.qio.illumina.IlluminaRecord; +import org.qcmg.qio.record.StringFileReader; +import org.qcmg.qio.vcf.VCFFileReader; import org.qcmg.sig.model.Comparison; import org.qcmg.sig.model.SigMeta; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; -import org.qcmg.vcf.VCFFileReader; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -271,11 +269,12 @@ public static Map loadSignatureRatios(File file, int minC throw new IllegalArgumentException("Null file object passed to loadSignatureRatios"); } - try (TabbedFileReader reader = new TabbedFileReader(file)) { + try (StringFileReader reader = new StringFileReader(file)) { String line; - for (TabbedRecord vcfRecord : reader) { - line = vcfRecord.getData(); + for (String vcfRecord : reader) { + line = vcfRecord; + if (line.startsWith(Constants.HASH_STRING)) { continue; } @@ -309,11 +308,11 @@ public static Map loadSignatureRatiosFloat(File file, int throw new IllegalArgumentException("Null file object passed to loadSignatureRatios"); } - try (TabbedFileReader reader = new TabbedFileReader(file)) { + try (StringFileReader reader = new StringFileReader(file)) { String line; - for (TabbedRecord vcfRecord : reader) { - line = vcfRecord.getData(); + for (String vcfRecord : reader) { + line = vcfRecord; if (line.startsWith(Constants.HASH_STRING)) { continue; } @@ -353,11 +352,11 @@ public static TIntByteHashMap loadSignatureRatiosFloatGenotypeNew(File file, int throw new IllegalArgumentException("Null file object passed to loadSignatureRatios"); } - try (TabbedFileReader reader = new TabbedFileReader(file)) { + try (StringFileReader reader = new StringFileReader(file)) { String line; - for (TabbedRecord vcfRecord : reader) { - line = vcfRecord.getData(); + for (String vcfRecord : reader) { + line = vcfRecord; if (line.startsWith(Constants.HASH_STRING)) { continue; } @@ -403,7 +402,7 @@ public static TIntByteHashMap loadSignatureRatiosFloatGenotypeNew(File file, int return ratios; } - public static Optional>> getSigMetaAndRGsFromHeader(final TabbedHeader h) { + public static Optional>> getSigMetaAndRGsFromHeader(List h) { if (null == h) { return Optional.empty(); } else { @@ -503,8 +502,8 @@ public static Pair> loadSignatureGenotype Map rgIds = Collections.emptyMap(); SigMeta sm = null; - try (TabbedFileReader reader = new TabbedFileReader(file)) { - TabbedHeader h = reader.getHeader(); + try (StringFileReader reader = new StringFileReader(file, "#")) { + List h = reader.getHeader(); Optional>> metaAndRGsO = getSigMetaAndRGsFromHeader(h); if (metaAndRGsO.isPresent()) { @@ -523,7 +522,7 @@ public static Pair> loadSignatureGenotype } public static void getDataFromBespolkeLayout(File file, int minCoverage, int minRGCoverage, TIntByteHashMap ratios, - TMap rgRatios, Map rgIds, TabbedFileReader reader) { + TMap rgRatios, Map rgIds, StringFileReader reader) { int noOfRGs = rgIds.size(); logger.debug("Number of rgs for " + file.getAbsolutePath() + " is " + noOfRGs); @@ -531,8 +530,8 @@ public static void getDataFromBespolkeLayout(File file, int minCoverage, int min AtomicInteger cachePosition = new AtomicInteger(); - for (TabbedRecord vcfRecord : reader) { - line = vcfRecord.getData(); + for (String vcfRecord : reader) { + line = vcfRecord; if (line.startsWith(Constants.HASH_STRING)) { continue; } diff --git a/qsignature/test/org/qcmg/sig/SignatureGeneratorBespokeTest.java b/qsignature/test/org/qcmg/sig/SignatureGeneratorBespokeTest.java index 717f356f4..818bb7fd5 100644 --- a/qsignature/test/org/qcmg/sig/SignatureGeneratorBespokeTest.java +++ b/qsignature/test/org/qcmg/sig/SignatureGeneratorBespokeTest.java @@ -13,7 +13,7 @@ import org.qcmg.common.vcf.VcfRecord; import org.qcmg.common.vcf.header.VcfHeader; import org.qcmg.common.vcf.header.VcfHeaderRecord; -import org.qcmg.vcf.VCFFileReader; +import org.qcmg.qio.vcf.VCFFileReader; import gnu.trove.map.TObjectIntMap; import htsjdk.samtools.SAMFileHeader; @@ -206,7 +206,7 @@ public void runProcessWithHG19BamFile() throws Exception { recs.add(rec); System.out.println("rec: " + rec.toString()); } - VcfHeader header = reader.getHeader(); + VcfHeader header = reader.getVcfHeader(); // header.getAllMetaRecords().stream().forEach(System.out::println); assertEquals(true, header.getAllMetaRecords().contains(new VcfHeaderRecord("##rg0=null"))); } @@ -245,7 +245,7 @@ public void runProcessWithReadGroupsSetInHeader() throws Exception { recs.add(rec); System.out.println("rec: " + rec.toString()); } - VcfHeader header = reader.getHeader(); + VcfHeader header = reader.getVcfHeader(); header.getAllMetaRecords().stream().forEach(System.out::println); assertEquals(true, header.getAllMetaRecords().contains(new VcfHeaderRecord("##rg0=null"))); assertEquals(true, header.getAllMetaRecords().contains(new VcfHeaderRecord("##rg1=20130325103517169"))); diff --git a/qsignature/test/org/qcmg/sig/SignatureGeneratorTest.java b/qsignature/test/org/qcmg/sig/SignatureGeneratorTest.java index dc6e7640f..3ea7c9923 100644 --- a/qsignature/test/org/qcmg/sig/SignatureGeneratorTest.java +++ b/qsignature/test/org/qcmg/sig/SignatureGeneratorTest.java @@ -30,11 +30,11 @@ import org.qcmg.common.util.IlluminaUtils; import org.qcmg.common.util.TabTokenizer; import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.illumina.IlluminaRecord; import org.qcmg.picard.SAMOrBAMWriterFactory; import org.qcmg.picard.util.SAMUtils; +import org.qcmg.qio.illumina.IlluminaRecord; +import org.qcmg.qio.vcf.VCFFileReader; import org.qcmg.sig.util.SignatureUtil; -import org.qcmg.vcf.VCFFileReader; public class SignatureGeneratorTest { @@ -79,52 +79,6 @@ public void testUpdateResultsIllumina() { assertEquals(true, result.contains("C:17")); } -// @Test -// public void testCreateComparatorFromSAMHeader() throws IOException { -//// SignatureGenerator qss = new SignatureGenerator(); -//// qss.logger = QLoggerFactory.getLogger(SignatureGeneratorTest.class); -// try { -// qss.createComparatorFromSAMHeader(null); -// Assert.fail("Should have thrown an IAE"); -// } catch (final IllegalArgumentException iae) {} -// -// final File bamFile = testFolder.newFile("bamFile"); -// getBamFile(bamFile, false, true); -// qss.createComparatorFromSAMHeader(bamFile); -// -// // no seq in header - should default to ReferenceNameComparator sorting -// int i = qss.chrComparator.compare("chr1", "chr2"); -// assertEquals(true, i < 0); -// i = qss.chrComparator.compare("chr10", "chr2"); -// assertEquals(true, i > 0); -// -// // header is ordered chr5, 4, 3, 2, 1 -// getBamFile(bamFile, true, true); -// qss.createComparatorFromSAMHeader(bamFile); -// i = qss.chrComparator.compare("chr1", "chr2"); -// assertEquals(true, i < 0); -// } - -// @Test -// public void doesComparatorWorkForNonChrs() throws IOException { -// -// final File bamFile = testFolder.newFile("bamFile"); -// getBamFile(bamFile, false, true); -// qss.createComparatorFromSAMHeader(bamFile); -// -// // no seq in header - should default to ReferenceNameComparator sorting -// int i = qss.chrComparator.compare("chr1", "chr2"); -// assertEquals(true, i < 0); -// i = qss.chrComparator.compare("chr10", "chr2"); -// assertEquals(true, i > 0); -// -// getBamFile(bamFile, true, false); -// -// qss.createComparatorFromSAMHeader(bamFile); -// i = qss.chrComparator.compare("1", "2"); -// assertEquals(true, i < 0); -// } - /** * Method has been copied from QSignatureSequential.updateResultsIllumina @@ -238,7 +192,7 @@ private char[] getAlleleAandB(String snp, String strand) { private Executor execute(final String command) throws Exception { return new Executor(command, "org.qcmg.sig.SignatureGenerator"); } - + @Test public void runProcessWithEmptySnpChipFile() throws Exception { final File positionsOfInterestFile = testFolder.newFile("runProcessWithEmptySnpChipFile.txt"); @@ -246,12 +200,10 @@ public void runProcessWithEmptySnpChipFile() throws Exception { final File illuminaArraysDesignFile = testFolder.newFile("runProcessWithEmptySnpChipFile_snpChipIAD.txt"); final File logFile = testFolder.newFile("runProcessWithEmptySnpChipFile.log"); final File outputFile = testFolder.newFile("runProcessWithEmptySnpChipFile.qsig.vcf"); -// getBamFile(snpChipFile, true, null); - + ExpectedException.none(); final Executor exec = execute("--log " + logFile.getAbsolutePath() + " -i " + positionsOfInterestFile.getAbsolutePath() + " -i " + snpChipFile.getAbsolutePath()+ " -i " + illuminaArraysDesignFile.getAbsolutePath()); assertTrue(0 == exec.getErrCode()); - assertTrue(outputFile.exists()); } diff --git a/qsignature/test/org/qcmg/sig/util/SignatureUtilTest.java b/qsignature/test/org/qcmg/sig/util/SignatureUtilTest.java index abd190b22..8d33d1114 100644 --- a/qsignature/test/org/qcmg/sig/util/SignatureUtilTest.java +++ b/qsignature/test/org/qcmg/sig/util/SignatureUtilTest.java @@ -21,11 +21,11 @@ import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.qcmg.common.util.ChrPositionCache; -import org.qcmg.illumina.IlluminaRecord; +import org.qcmg.qio.illumina.IlluminaRecord; import org.qcmg.sig.CompareTest; import org.qcmg.sig.model.Comparison; import org.qcmg.sig.model.SigMeta; -import org.qcmg.tab.TabbedHeader; + public class SignatureUtilTest { @@ -117,8 +117,8 @@ public void getGenotypesAsByte() throws IOException { @Test public void doesOldStyleHeaderReturnASigMeta() { - TabbedHeader h = new TabbedHeader(BAM_HEADER_OLD_SKOOL); - Optional>> optional = SignatureUtil.getSigMetaAndRGsFromHeader(h); + //TabbedHeader h = new TabbedHeader(BAM_HEADER_OLD_SKOOL); + Optional>> optional = SignatureUtil.getSigMetaAndRGsFromHeader(BAM_HEADER_OLD_SKOOL); assertEquals(true, optional.isPresent()); SigMeta sm = optional.get().getKey(); assertEquals(false, sm.isValid()); @@ -136,18 +136,21 @@ public void doContigsStartWithDigit() { @Test public void getSigMetaEmptyHeader() { assertEquals(Optional.empty(), SignatureUtil.getSigMetaAndRGsFromHeader(null)); - TabbedHeader h = new TabbedHeader(null); - Optional>> o =SignatureUtil.getSigMetaAndRGsFromHeader(h); - assertEquals(true, o.isPresent()); - - assertEquals(false, o.get().getFirst().isValid()); // invalid SigMeta - assertEquals(true, o.get().getSecond().isEmpty()); // empty rg map + + + //new TabbedHeader(null); will create an empty list, but now it will not + //TabbedHeader h = new TabbedHeader(null); +// Optional>> o =SignatureUtil.getSigMetaAndRGsFromHeader(null); +// assertEquals(true, o.isPresent()); +// +// assertEquals(false, o.get().getFirst().isValid()); // invalid SigMeta +// assertEquals(true, o.get().getSecond().isEmpty()); // empty rg map } @Test public void getSigMetaBam() { - TabbedHeader h = new TabbedHeader(BAM_HEADER); - Optional>> o =SignatureUtil.getSigMetaAndRGsFromHeader(h); + //TabbedHeader h = new TabbedHeader(BAM_HEADER); + Optional>> o =SignatureUtil.getSigMetaAndRGsFromHeader(BAM_HEADER); assertEquals(true, o.isPresent()); assertEquals(true, o.get().getFirst().isValid()); // valid SigMeta @@ -162,8 +165,8 @@ public void getSigMetaBam() { @Test public void getSigMetaSnpChip() { - TabbedHeader h = new TabbedHeader(SNP_CHIP_HEADER); - Optional>> o =SignatureUtil.getSigMetaAndRGsFromHeader(h); + //TabbedHeader h = new TabbedHeader(SNP_CHIP_HEADER); + Optional>> o =SignatureUtil.getSigMetaAndRGsFromHeader(SNP_CHIP_HEADER); assertEquals(true, o.isPresent()); assertEquals(true, o.get().getFirst().isValid()); // valid SigMeta @@ -173,13 +176,13 @@ public void getSigMetaSnpChip() { @Test public void canSigMEtasBeCompared() { - TabbedHeader h = new TabbedHeader(SNP_CHIP_HEADER); - Optional>> o =SignatureUtil.getSigMetaAndRGsFromHeader(h); + //TabbedHeader h = new TabbedHeader(SNP_CHIP_HEADER); + Optional>> o =SignatureUtil.getSigMetaAndRGsFromHeader(SNP_CHIP_HEADER); assertEquals(true, o.isPresent()); SigMeta snpChpSM = o.get().getFirst(); - h = new TabbedHeader(BAM_HEADER); - o =SignatureUtil.getSigMetaAndRGsFromHeader(h); + //h = new TabbedHeader(BAM_HEADER); + o =SignatureUtil.getSigMetaAndRGsFromHeader(BAM_HEADER); assertEquals(true, o.isPresent()); SigMeta bamSM = o.get().getFirst(); From 4b47f10480d9b31dad71640db20c7cfabf9e5dcf Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 17:01:27 +1000 Subject: [PATCH 10/73] delete old gff --- qio/src/org/qcmg/gff/GFFIterator.java | 22 --- qio/src/org/qcmg/gff/GFFReader.java | 19 --- qio/src/org/qcmg/gff/GFFRecord.java | 180 ------------------------ qio/src/org/qcmg/gff/GFFSerializer.java | 59 -------- 4 files changed, 280 deletions(-) delete mode 100644 qio/src/org/qcmg/gff/GFFIterator.java delete mode 100644 qio/src/org/qcmg/gff/GFFReader.java delete mode 100644 qio/src/org/qcmg/gff/GFFRecord.java delete mode 100644 qio/src/org/qcmg/gff/GFFSerializer.java diff --git a/qio/src/org/qcmg/gff/GFFIterator.java b/qio/src/org/qcmg/gff/GFFIterator.java deleted file mode 100644 index 7775a41e5..000000000 --- a/qio/src/org/qcmg/gff/GFFIterator.java +++ /dev/null @@ -1,22 +0,0 @@ -package org.qcmg.gff; - -import java.io.InputStream; - -import org.qcmg.record.AbstractRecordIterator; - -public class GFFIterator extends AbstractRecordIterator { - - public GFFIterator(InputStream stream) throws Exception { - super(stream); - } - - @Override - protected void readNext() { -// try { - next = GFFSerializer.nextRecord(reader); -// } catch (Exception ex) { -// next = null; -// } - } - -} diff --git a/qio/src/org/qcmg/gff/GFFReader.java b/qio/src/org/qcmg/gff/GFFReader.java deleted file mode 100644 index ae8c685b8..000000000 --- a/qio/src/org/qcmg/gff/GFFReader.java +++ /dev/null @@ -1,19 +0,0 @@ -package org.qcmg.gff; - -import java.io.File; -import java.io.IOException; - -import org.qcmg.reader.AbstractReader; -import org.qcmg.record.AbstractRecordIterator; - -public class GFFReader extends AbstractReader { - - public GFFReader(File file) throws IOException { - super(file); - } - - @Override - public AbstractRecordIterator getRecordIterator() throws Exception { - return new GFFIterator(inputStream); - } -} diff --git a/qio/src/org/qcmg/gff/GFFRecord.java b/qio/src/org/qcmg/gff/GFFRecord.java deleted file mode 100644 index bf3c740c0..000000000 --- a/qio/src/org/qcmg/gff/GFFRecord.java +++ /dev/null @@ -1,180 +0,0 @@ -package org.qcmg.gff; - -import java.util.HashMap; -import java.util.Map; - -import org.qcmg.record.Record; - -/** - * @author jpearson - * @version $Id: GffRecord.java,v 1.8 2009/08/17 20:17:35 jbeckstr Exp $ - * - * Data container class for records from SOLiD GFF format sequence - * alignment files. GFF is a tab-separated text file with unix-style - * line endings and the following fields of which the last two are - * optional: - * - * Fieldname Example value 1. seqname 1231_644_1328_F3 2. source solid - * 3. feature read 4. start 97 5. end 121 6. score 13.5 7. strand - 8. - * frame . 9. [attributes] b=TAGGGTTAGGGTTGGGTTAGGGTTA; c=AAA; - * g=T320010320010100103000103; i=1; p=1.000; - * q=23,28,27,20,17,12,24,16,20,8,13,26,28,2 - * 4,13,13,27,14,19,4,23,16,19,9,14; r=20_2; s=a20; u=0,1 10. - * [comments] - */ -public class GFFRecord implements Record { - - // private String originalLine; // original line - private String seqname; // read ID - private String source; // should always be "solid" - private String feature; // should always be "read" - private int start; // start position of mapping to reference - private int end; // end position of mapping to reference - private double score; // quality of mapping - private String strand; // - or + - private String frame; // 1,2,3,. - private String attribStr; // this is the gold! - private String comments; // comments (seldom present) - private Map attributes; // deconstruct attribStr - - /** - * Constructor 1 - * - * @param textRecord - * text GFF Record typically read from GFF file - * @throws Exception - * @throws QProfilerException - */ - public GFFRecord(String[] fields) throws Exception { - // public GffRecord( String textRecord, String delimiter ) { - // this(); // call constructor 0 - // originalLine = textRecord; - - // String[] fields = textRecord.split( "\t" ); - seqname = fields[0]; - source = fields[1]; - feature = fields[2]; - start = Integer.parseInt(fields[3]); - end = Integer.parseInt(fields[4]); - score = Double.parseDouble(fields[5]); - strand = fields[6]; - frame = fields[7]; - - // Cope with the optional attribute field - if (fields.length > 8) { - attributes = new HashMap(); - - attribStr = fields[8]; - String[] tmpattribs = attribStr.split(";"); - for (int i = 0; i < tmpattribs.length; i++) { - String[] attrFields = tmpattribs[i].split("="); - if (attrFields.length < 2) { - throw new Exception("Attribute [" + tmpattribs[i] - + "] is badly formed"); - } - attributes.put(attrFields[0], attrFields[1]); - } - } - - // And comments is also optional - if (fields.length > 9) { - comments = fields[9]; - } - } - - public String getSeqname() { - return seqname; - } - - public void setSeqname(String seqname) { - this.seqname = seqname; - } - - public String getSource() { - return source; - } - - public void setSource(String source) { - this.source = source; - } - - public String getFeature() { - return feature; - } - - public void setFeature(String feature) { - this.feature = feature; - } - - public int getStart() { - return start; - } - - public void setStart(int start) { - this.start = start; - } - - public int getEnd() { - return end; - } - - public void setEnd(int end) { - this.end = end; - } - - public double getScore() { - return score; - } - - public void setScore(double score) { - this.score = score; - } - - public String getStrand() { - return strand; - } - - public void setStrand(String strand) { - this.strand = strand; - } - - public String getFrame() { - return frame; - } - - public void setFrame(String frame) { - this.frame = frame; - } - - public String getAttribStr() { - return attribStr; - } - - public void setAttribStr(String attribStr) { - this.attribStr = attribStr; - } - - public String getComments() { - return comments; - } - - public void setComments(String comments) { - this.comments = comments; - } - - public Map getAttributes() { - return attributes; - } - - public void setAttributes(Map attributes) { - this.attributes = attributes; - } - - public void setAttribute(String key, String value) { - this.attributes.put(key, value); - } - - public String getAttribute(String key) { - return null != attributes ? attributes.get(key) : null; - } -} diff --git a/qio/src/org/qcmg/gff/GFFSerializer.java b/qio/src/org/qcmg/gff/GFFSerializer.java deleted file mode 100644 index 243771cfb..000000000 --- a/qio/src/org/qcmg/gff/GFFSerializer.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * All code copyright The Queensland Centre for Medical Genomics. - * - * All rights reserved. - */ -package org.qcmg.gff; - -import java.io.BufferedReader; -import java.io.IOException; - -import org.qcmg.record.Record; - -public final class GFFSerializer { - - private static final String DEFAULT_HEADER_PREFIX = "#"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { - line = reader.readLine(); - } - return line; - } - - public static GFFRecord nextRecord(final BufferedReader reader) { - GFFRecord result = null; - - try { - String data = nextNonheaderLine(reader); - if (null != data) { - result = parseRecord(data); - } - } catch (Exception ex) { - // Fall through to return null - } - - return result; - } - - static String[] parseData(String data) throws Exception { - if (null == data) { - throw new AssertionError("Record was null"); - } - - String[] fields = data.split(Record.TAB_DELIMITER); - - if (fields.length < 8) { - throw new Exception("Not enough fields in the Record"); - } - - return fields; - } - - static GFFRecord parseRecord(final String data) throws Exception { - return new GFFRecord(parseData(data)); - } - -} From 32c3635c5cc5cb7936433d466aa263b9c5534aad Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 18:14:10 +1000 Subject: [PATCH 11/73] replace unit test for new gff --- .../qcmg/{ => qio}/gff/GFFSerializerTest.java | 0 qio/test/org/qcmg/qio/gff/GffReaderTest.java | 154 ++++++++++++++++++ 2 files changed, 154 insertions(+) rename qio/test/org/qcmg/{ => qio}/gff/GFFSerializerTest.java (100%) create mode 100644 qio/test/org/qcmg/qio/gff/GffReaderTest.java diff --git a/qio/test/org/qcmg/gff/GFFSerializerTest.java b/qio/test/org/qcmg/qio/gff/GFFSerializerTest.java similarity index 100% rename from qio/test/org/qcmg/gff/GFFSerializerTest.java rename to qio/test/org/qcmg/qio/gff/GFFSerializerTest.java diff --git a/qio/test/org/qcmg/qio/gff/GffReaderTest.java b/qio/test/org/qcmg/qio/gff/GffReaderTest.java new file mode 100644 index 000000000..20f0a05ed --- /dev/null +++ b/qio/test/org/qcmg/qio/gff/GffReaderTest.java @@ -0,0 +1,154 @@ +package org.qcmg.gff; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.qcmg.qio.gff.GffReader; +import org.qcmg.qio.gff.GffRecord; + +public class GffReaderTest { + private static File EMPTY_FILE ; + + @ClassRule + public static TemporaryFolder testFolder = new TemporaryFolder(); + + @BeforeClass + public static void setup() throws IOException { + EMPTY_FILE = testFolder.newFile("empty.gff"); + createTestFile(EMPTY_FILE.getAbsolutePath(), new ArrayList()); + } + + @Test + public void testParseDataInvalid() throws Exception { + try(GffReader reader = new GffReader(EMPTY_FILE);){ + + // test empty string + try { + reader.getRecord(""); + Assert.fail("Should have thrown an Exception"); + } catch (Exception e) { + Assert.assertEquals("Not enough fields in the Record", e.getMessage()); + } + try { + reader.getRecord(" "); + Assert.fail("Should have thrown an Exception"); + } catch (Exception e) { + Assert.assertEquals("Not enough fields in the Record", e.getMessage()); + } + + // test null + try { + reader.getRecord(null); + Assert.fail("Should have thrown an Exception"); + } catch (AssertionError e) { + Assert.assertEquals("Record was null", e.getMessage()); + } + + // string with fewer than 8 entries + try { + reader.getRecord("1 2 3 4 5 6 "); + Assert.fail("Should have thrown an Exception"); + } catch (Exception e) { + Assert.assertEquals("Not enough fields in the Record", e.getMessage()); + } + } + } + + @Test + public void testParseRecordInvalid() throws Exception { + + GffReader reader = new GffReader(EMPTY_FILE); + // test null + try { + reader.getRecord(null); + Assert.fail("Should have thrown an exception"); + } catch (AssertionError e) { + Assert.assertEquals("Record was null", e.getMessage()); + } + // test empty string + try { + reader.getRecord(""); + Assert.fail("Should have thrown an Exception"); + } catch (Exception e) { + Assert.assertEquals("Not enough fields in the Record", e.getMessage()); + } + try { + reader.getRecord(" "); + Assert.fail("Should have thrown an Exception"); + } catch (Exception e) { + Assert.assertEquals("Not enough fields in the Record", e.getMessage()); + } + + reader.close(); + } + + @Test + public void testParseRecord() throws Exception { + try (GffReader reader = new GffReader(EMPTY_FILE);){ + // 8 values + GffRecord record = reader.getRecord("this is a 0 1 0.0 works OK"); + Assert.assertNotNull(record); + Assert.assertEquals("this", record.getSeqname()); + Assert.assertEquals("OK", record.getFrame()); + } + } + + @Test + public void testParseRecordWithAttributes() throws Exception { + try (GffReader reader = new GffReader(EMPTY_FILE); ){ + + // real record containing attributes + GffRecord record = reader.getRecord("1 solid read 10148 10190 14.4 - . " + + "aID=1212_1636_246;at=F3;b=GGTTAGGGTTAGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGG;" + + "g=G0103200103201032001033001032001032001032001032001;mq=43;o=0;" + + "q=31,30,32,26,26,26,23,24,29,31,31,23,25,18,14,20,18,11,27,22,18,23,2,18,29,20,25,11,19,18," + + "18,13,14,18,19,16,14,5,16,23,18,21,16,16,14,20,13,17,20,11;r=23_2;s=a23;u=0,4,1,1"); + Assert.assertNotNull(record); + Assert.assertEquals("1", record.getSeqname()); + Assert.assertEquals("solid", record.getSource()); + Assert.assertEquals("read", record.getFeature()); + Assert.assertEquals(10148, record.getStart()); + Assert.assertEquals(10190, record.getEnd()); + Assert.assertTrue(14.4 == record.getScore()); + Assert.assertEquals("-", record.getStrand()); + Assert.assertEquals(".", record.getFrame()); + } + } + + @Test + public void testParseRecordWithInvalidAttributes() throws Exception { + try (GffReader reader = new GffReader(EMPTY_FILE); ){ + reader.getRecord("sequence source feature 0 1 99.99 strand frame attributes"); + Assert.fail("Should have thrown an exception"); + } catch (Exception e) { + Assert.assertEquals("Attribute [attributes] is badly formed", e.getMessage()); + } + } + + private static void createTestFile(String fileName, List data) { + + PrintWriter out; + try { + out = new PrintWriter(new BufferedWriter(new FileWriter(fileName))); + + for (String line : data) { + out.println(line); + } + out.close(); + } catch (IOException e) { + Logger.getLogger("QualSummarizerTest").log(Level.WARNING, "IOException caught whilst attempting to write to QUAL test file: " + fileName, e); + } + } +} From 7769a082592fdf2a1774ac508f2eaa1a4b1f087a Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 18:23:02 +1000 Subject: [PATCH 12/73] mv unused package under org.qcmg.unused --- qio/src/org/qcmg/bed/BEDFileReader.java | 37 - qio/src/org/qcmg/bed/BEDRecord.java | 97 --- qio/src/org/qcmg/bed/BEDRecordIterator.java | 49 -- .../qcmg/bed/BEDRecordPositionComparator.java | 21 - qio/src/org/qcmg/bed/BEDSerializer.java | 65 -- .../consensuscalls/ConsensusCallsFlag.java | 64 -- .../consensuscalls/ConsensusCallsRecord.java | 377 -------- .../ConsensusCallsSerializer.java | 68 -- .../exception/RecordIteratorException.java | 17 - .../qcmg/genesymbol/GeneSymbolFileReader.java | 38 - .../org/qcmg/genesymbol/GeneSymbolRecord.java | 32 - .../genesymbol/GeneSymbolRecordIterator.java | 49 -- .../qcmg/genesymbol/GeneSymbolSerializer.java | 44 - qio/src/org/qcmg/gff3/GFF3FileReader.java | 44 - qio/src/org/qcmg/gff3/GFF3FileWriter.java | 36 - qio/src/org/qcmg/gff3/GFF3Record.java | 244 ------ ...RecordChromosomeAndPositionComparator.java | 32 - qio/src/org/qcmg/gff3/GFF3RecordIterator.java | 49 -- .../gff3/GFF3RecordPositionComparator.java | 21 - qio/src/org/qcmg/gff3/GFF3Serializer.java | 94 -- .../org/qcmg/illumina/IlluminaFileReader.java | 21 - qio/src/org/qcmg/illumina/IlluminaRecord.java | 262 ------ .../qcmg/illumina/IlluminaRecordIterator.java | 21 - .../org/qcmg/illumina/IlluminaSerializer.java | 73 -- .../org/qcmg/qio/gff/GFFSerializerTest.java | 145 ---- qio/test/org/qcmg/qio/gff/GffReaderTest.java | 2 +- .../src/org/qcmg/qmule/AlignerCompare.java-- | 272 ++++++ .../qcmg/qmule/AnnotateDCCWithGFFRegions.java | 4 +- .../qmule/AnnotateDCCWithGFFRegions.java-- | 710 ++++++++++++++++ qmule/src/org/qcmg/qmule/BAM2CS.java-- | 183 ++++ qmule/src/org/qcmg/qmule/BAMCompress.java-- | 156 ++++ .../org/qcmg/qmule/BAMHeaderChecker.java-- | 250 ++++++ qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- | 124 +++ .../org/qcmg/qmule/BamMismatchCounts.java-- | 160 ++++ .../org/qcmg/qmule/BamRecordCounter.java-- | 44 + .../qcmg/qmule/CompareReferenceRegions.java | 4 +- .../qcmg/qmule/CompareReferenceRegions.java-- | 676 +++++++++++++++ .../src/org/qcmg/qmule/DbSnpChrLiftover.java | 8 +- .../org/qcmg/qmule/DbSnpChrLiftover.java-- | 86 ++ .../org/qcmg/qmule/GermlineDBStripper.java-- | 47 + qmule/src/org/qcmg/qmule/GetBamRecords.java-- | 226 +++++ qmule/src/org/qcmg/qmule/GetInsetSize.java-- | 35 + .../src/org/qcmg/qmule/IndelDCCHeader.java-- | 395 +++++++++ qmule/src/org/qcmg/qmule/MAF2DCC1.java | 8 +- qmule/src/org/qcmg/qmule/MAF2DCC1.java-- | 418 +++++++++ qmule/src/org/qcmg/qmule/Main.java-- | 100 +++ qmule/src/org/qcmg/qmule/Messages.java-- | 132 +++ qmule/src/org/qcmg/qmule/Options.java-- | 512 +++++++++++ qmule/src/org/qcmg/qmule/Pileup.java-- | 101 +++ qmule/src/org/qcmg/qmule/PileupStats.java-- | 254 ++++++ .../src/org/qcmg/qmule/QMuleException.java-- | 28 + qmule/src/org/qcmg/qmule/QueryCADDLib.java-- | 187 ++++ .../qcmg/qmule/ReAnnotateDccWithDbSNP.java | 4 +- .../qcmg/qmule/ReAnnotateDccWithDbSNP.java-- | 280 ++++++ .../src/org/qcmg/qmule/ReadPartGZFile.java-- | 152 ++++ qmule/src/org/qcmg/qmule/ReadsAppend.java-- | 95 +++ qmule/src/org/qcmg/qmule/RunGatk.java-- | 141 +++ .../org/qcmg/qmule/SmithWatermanGotoh.java-- | 368 ++++++++ .../qmule/SnpToReferenceRegionFilter.java | 8 +- .../qmule/SnpToReferenceRegionFilter.java-- | 647 ++++++++++++++ qmule/src/org/qcmg/qmule/SubSample.java-- | 165 ++++ .../src/org/qcmg/qmule/TestFileFinder.java-- | 23 + qmule/src/org/qcmg/qmule/TestJarUpdate.java-- | 191 +++++ qmule/src/org/qcmg/qmule/TestSort.java-- | 109 +++ .../org/qcmg/qmule/TranscriptomeMule.java-- | 192 +++++ .../src/org/qcmg/qmule/WiggleFromPileup.java | 6 +- .../org/qcmg/qmule/WiggleFromPileup.java-- | 302 +++++++ .../qcmg/qmule/WiggleFromPileupTakeTwo.java | 4 +- .../qcmg/qmule/WiggleFromPileupTakeTwo.java-- | 307 +++++++ qmule/src/org/qcmg/qmule/XCvsZP.java-- | 117 +++ qmule/src/org/qcmg/qmule/bam/CheckBam.java-- | 339 ++++++++ .../qmule/bam/GetContigsFromHeader.java-- | 127 +++ qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- | 226 +++++ qmule/src/org/qcmg/qmule/qcnv/Main.java-- | 57 ++ qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- | 152 ++++ qmule/src/org/qcmg/qmule/qcnv/Options.java-- | 169 ++++ qmule/src/org/qcmg/qmule/queryChrMT.java-- | 68 ++ .../org/qcmg/qmule/snppicker/CompareSnps.java | 2 +- .../qcmg/qmule/snppicker/CompareSnps.java-- | 205 +++++ .../snppicker/ExamineVerifiedSnps.java-- | 237 ++++++ .../qmule/snppicker/GatkUniqueSnps.java-- | 488 +++++++++++ .../src/org/qcmg/qmule/snppicker/Mule.java-- | 85 ++ .../org/qcmg/qmule/snppicker/SnpPicker.java | 10 +- .../org/qcmg/qmule/snppicker/SnpPicker.java-- | 802 ++++++++++++++++++ .../qcmg/qmule/snppicker/UniqueQSnps.java-- | 200 +++++ .../org/qcmg/qmule/snppicker/UniqueSnps.java | 4 +- .../qcmg/qmule/snppicker/UniqueSnps.java-- | 263 ++++++ .../qcmg/qmule/snppicker/VariantRecord.java-- | 193 +++++ .../qmule/util/IGVBatchFileGenerator.java-- | 78 ++ .../org/qcmg/qmule/util/TabbedDataLoader.java | 4 +- .../qcmg/qmule/util/TabbedDataLoader.java-- | 61 ++ .../src/org/qcmg/qmule/vcf/CompareVCFs.java-- | 269 ++++++ .../src/org/qcmg/qmule/vcf/ConvertVcfChr.java | 8 +- .../org/qcmg/qmule/vcf/ConvertVcfChr.java-- | 116 +++ .../qcmg/qmule/vcf/RefAndMultiGenotype.java-- | 101 +++ 95 files changed, 12459 insertions(+), 2038 deletions(-) delete mode 100644 qio/src/org/qcmg/bed/BEDFileReader.java delete mode 100644 qio/src/org/qcmg/bed/BEDRecord.java delete mode 100644 qio/src/org/qcmg/bed/BEDRecordIterator.java delete mode 100644 qio/src/org/qcmg/bed/BEDRecordPositionComparator.java delete mode 100644 qio/src/org/qcmg/bed/BEDSerializer.java delete mode 100644 qio/src/org/qcmg/consensuscalls/ConsensusCallsFlag.java delete mode 100644 qio/src/org/qcmg/consensuscalls/ConsensusCallsRecord.java delete mode 100644 qio/src/org/qcmg/consensuscalls/ConsensusCallsSerializer.java delete mode 100644 qio/src/org/qcmg/exception/RecordIteratorException.java delete mode 100644 qio/src/org/qcmg/genesymbol/GeneSymbolFileReader.java delete mode 100644 qio/src/org/qcmg/genesymbol/GeneSymbolRecord.java delete mode 100644 qio/src/org/qcmg/genesymbol/GeneSymbolRecordIterator.java delete mode 100644 qio/src/org/qcmg/genesymbol/GeneSymbolSerializer.java delete mode 100644 qio/src/org/qcmg/gff3/GFF3FileReader.java delete mode 100644 qio/src/org/qcmg/gff3/GFF3FileWriter.java delete mode 100644 qio/src/org/qcmg/gff3/GFF3Record.java delete mode 100644 qio/src/org/qcmg/gff3/GFF3RecordChromosomeAndPositionComparator.java delete mode 100644 qio/src/org/qcmg/gff3/GFF3RecordIterator.java delete mode 100644 qio/src/org/qcmg/gff3/GFF3RecordPositionComparator.java delete mode 100644 qio/src/org/qcmg/gff3/GFF3Serializer.java delete mode 100644 qio/src/org/qcmg/illumina/IlluminaFileReader.java delete mode 100644 qio/src/org/qcmg/illumina/IlluminaRecord.java delete mode 100644 qio/src/org/qcmg/illumina/IlluminaRecordIterator.java delete mode 100644 qio/src/org/qcmg/illumina/IlluminaSerializer.java delete mode 100644 qio/test/org/qcmg/qio/gff/GFFSerializerTest.java create mode 100644 qmule/src/org/qcmg/qmule/AlignerCompare.java-- create mode 100644 qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- create mode 100644 qmule/src/org/qcmg/qmule/BAM2CS.java-- create mode 100644 qmule/src/org/qcmg/qmule/BAMCompress.java-- create mode 100644 qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- create mode 100644 qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- create mode 100644 qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- create mode 100644 qmule/src/org/qcmg/qmule/BamRecordCounter.java-- create mode 100644 qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- create mode 100644 qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- create mode 100644 qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- create mode 100644 qmule/src/org/qcmg/qmule/GetBamRecords.java-- create mode 100644 qmule/src/org/qcmg/qmule/GetInsetSize.java-- create mode 100644 qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- create mode 100644 qmule/src/org/qcmg/qmule/MAF2DCC1.java-- create mode 100644 qmule/src/org/qcmg/qmule/Main.java-- create mode 100644 qmule/src/org/qcmg/qmule/Messages.java-- create mode 100644 qmule/src/org/qcmg/qmule/Options.java-- create mode 100644 qmule/src/org/qcmg/qmule/Pileup.java-- create mode 100644 qmule/src/org/qcmg/qmule/PileupStats.java-- create mode 100644 qmule/src/org/qcmg/qmule/QMuleException.java-- create mode 100644 qmule/src/org/qcmg/qmule/QueryCADDLib.java-- create mode 100644 qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- create mode 100644 qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- create mode 100644 qmule/src/org/qcmg/qmule/ReadsAppend.java-- create mode 100644 qmule/src/org/qcmg/qmule/RunGatk.java-- create mode 100644 qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java-- create mode 100644 qmule/src/org/qcmg/qmule/SnpToReferenceRegionFilter.java-- create mode 100644 qmule/src/org/qcmg/qmule/SubSample.java-- create mode 100644 qmule/src/org/qcmg/qmule/TestFileFinder.java-- create mode 100644 qmule/src/org/qcmg/qmule/TestJarUpdate.java-- create mode 100644 qmule/src/org/qcmg/qmule/TestSort.java-- create mode 100644 qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- create mode 100644 qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- create mode 100644 qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- create mode 100644 qmule/src/org/qcmg/qmule/XCvsZP.java-- create mode 100644 qmule/src/org/qcmg/qmule/bam/CheckBam.java-- create mode 100644 qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- create mode 100644 qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- create mode 100644 qmule/src/org/qcmg/qmule/qcnv/Main.java-- create mode 100644 qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- create mode 100644 qmule/src/org/qcmg/qmule/qcnv/Options.java-- create mode 100644 qmule/src/org/qcmg/qmule/queryChrMT.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/Mule.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- create mode 100644 qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- create mode 100644 qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- create mode 100644 qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- create mode 100644 qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- create mode 100644 qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- diff --git a/qio/src/org/qcmg/bed/BEDFileReader.java b/qio/src/org/qcmg/bed/BEDFileReader.java deleted file mode 100644 index 201e0f15f..000000000 --- a/qio/src/org/qcmg/bed/BEDFileReader.java +++ /dev/null @@ -1,37 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.bed; - -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; - -public final class BEDFileReader implements Closeable, Iterable { - private final File file; - private final InputStream inputStream; - - public BEDFileReader(final File file) throws IOException { - this.file = file; - FileInputStream fileStream = new FileInputStream(file); - inputStream = fileStream; - } - - public Iterator iterator() { - return getRecordIterator(); - } - - public BEDRecordIterator getRecordIterator() { - return new BEDRecordIterator(inputStream); - } - - public void close() throws IOException { - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/bed/BEDRecord.java b/qio/src/org/qcmg/bed/BEDRecord.java deleted file mode 100644 index 19b6013de..000000000 --- a/qio/src/org/qcmg/bed/BEDRecord.java +++ /dev/null @@ -1,97 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.bed; - - - -public class BEDRecord { - - private final static char T = '\t'; - - String chrom; - int chromStart; - int chromEnd; - String name; - int score; - String strand; - int thickStart; - int thickEnd; - String itemRGB; - int blockCount; - int blockSizes; - int blockStarts; - - public String getChrom() { - return chrom; - } - public void setChrom(String chrom) { - this.chrom = chrom; - } - public int getChromStart() { - return chromStart; - } - public void setChromStart(int chromStart) { - this.chromStart = chromStart; - } - public int getChromEnd() { - return chromEnd; - } - public void setChromEnd(int chromEnd) { - this.chromEnd = chromEnd; - } - public String getName() { - return name; - } - public void setName(String name) { - this.name = name; - } - public int getScore() { - return score; - } - public void setScore(int score) { - this.score = score; - } - public String getStrand() { - return strand; - } - public void setStrand(String strand) { - this.strand = strand; - } - public int getThickStart() { - return thickStart; - } - public void setThickStart(int thickStart) { - this.thickStart = thickStart; - } - public int getThickEnd() { - return thickEnd; - } - public void setThickEnd(int thickEnd) { - this.thickEnd = thickEnd; - } - public String getItemRGB() { - return itemRGB; - } - public void setItemRGB(String itemRGB) { - this.itemRGB = itemRGB; - } - public int getBlockCount() { - return blockCount; - } - public void setBlockCount(int blockCount) { - this.blockCount = blockCount; - } - public int getBlockSizes() { - return blockSizes; - } - public void setBlockSizes(int blockSizes) { - this.blockSizes = blockSizes; - } - public int getBlockStarts() { - return blockStarts; - } - public void setBlockStarts(int blockStarts) { - this.blockStarts = blockStarts; - } -} diff --git a/qio/src/org/qcmg/bed/BEDRecordIterator.java b/qio/src/org/qcmg/bed/BEDRecordIterator.java deleted file mode 100644 index 6a4e54934..000000000 --- a/qio/src/org/qcmg/bed/BEDRecordIterator.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.bed; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.NoSuchElementException; - -public final class BEDRecordIterator implements Iterator { - private final BufferedReader reader; - private BEDRecord next; - - public BEDRecordIterator(final InputStream stream) { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public BEDRecord next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - BEDRecord result = next; - readNext(); - return result; - } - - private void readNext() { - try { - next = BEDSerializer.nextRecord(reader); - } catch (NoSuchElementException e) { - throw e; - } catch (Exception ex) { - throw new RuntimeException(ex.getMessage(), ex); - } - } - - @SuppressWarnings("unchecked") - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/qio/src/org/qcmg/bed/BEDRecordPositionComparator.java b/qio/src/org/qcmg/bed/BEDRecordPositionComparator.java deleted file mode 100644 index 7373642f1..000000000 --- a/qio/src/org/qcmg/bed/BEDRecordPositionComparator.java +++ /dev/null @@ -1,21 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.bed; - -import java.util.Comparator; - -public class BEDRecordPositionComparator implements - Comparator { - public int compare(BEDRecord recordA, BEDRecord recordB) { - return compareStart(recordA, recordB) + compareEnd(recordA, recordB); - } - - public int compareStart(BEDRecord recordA, BEDRecord recordB) { - return recordA.getChromStart() - recordB.getChromStart(); - } - - public int compareEnd(BEDRecord recordA, BEDRecord recordB) { - return recordA.getChromEnd() - recordB.getChromEnd(); - } -} diff --git a/qio/src/org/qcmg/bed/BEDSerializer.java b/qio/src/org/qcmg/bed/BEDSerializer.java deleted file mode 100644 index d64eec5c0..000000000 --- a/qio/src/org/qcmg/bed/BEDSerializer.java +++ /dev/null @@ -1,65 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.bed; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.regex.Pattern; - -public final class BEDSerializer { - private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - private static final String DEFAULT_HEADER_PREFIX = "#"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { - line = reader.readLine(); - } - return line; - } - - public static BEDRecord nextRecord(final BufferedReader reader) - throws IOException , Exception { - BEDRecord result = null; - String line = nextNonheaderLine(reader); - if (null != line) { - result = parseRecord(line); - } - return result; - } - - static BEDRecord parseRecord(final String line) throws Exception { - String[] params = tabbedPattern.split(line, -1); - if (3 > params.length) { - throw new Exception("Bad BED format. Insufficient columns: '" + line + "'"); - } - BEDRecord result = new BEDRecord(); - result.setChrom(params[0]); - result.setChromStart(Integer.parseInt(params[1])); - result.setChromEnd(Integer.parseInt(params[2])); - if (params.length > 3) { - if (params.length >= 4) - result.setName(params[3]); - if (params.length >= 5) - result.setScore(Integer.parseInt(params[4])); - if (params.length >= 6) - result.setStrand(params[5]); - if (params.length >= 7) - result.setThickStart(Integer.parseInt(params[6])); - if (params.length >= 8) - result.setThickEnd(Integer.parseInt(params[7])); - if (params.length >= 9) - result.setItemRGB(params[8]); - if (params.length >= 10) - result.setBlockCount(Integer.parseInt(params[9])); - if (params.length >= 11) - result.setBlockSizes(Integer.parseInt(params[10])); - if (params.length >= 12) - result.setBlockStarts(Integer.parseInt(params[11])); - } - return result; - } - -} diff --git a/qio/src/org/qcmg/consensuscalls/ConsensusCallsFlag.java b/qio/src/org/qcmg/consensuscalls/ConsensusCallsFlag.java deleted file mode 100644 index 4f4993b77..000000000 --- a/qio/src/org/qcmg/consensuscalls/ConsensusCallsFlag.java +++ /dev/null @@ -1,64 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.consensuscalls; - - -public enum ConsensusCallsFlag { - - H_1("h1"), - H_2("h2"), - H_3("h3"), - H_4("h4"), - H_5("h5"), - H_6("h6"), - H_7("h7"), - H_8("h8"), - H_9("h9"), - H_10("h10"), - H_11("h11"), - H_12("h12"), - H_13("h13"), - H_14("h14"), - H_15("h15"), - H_16("h16"), - H_17("h17"), - H_18("h18"), - H_19("h19"), - H_20("h20"), - H_21("h21"), - H_22("h22"), - M_1("m1"), - M_2("m2"), - M_3("m3"), - M_4("m4"), - M_5("m5"), - M_6("m6"), - M_7("m7"), - M_8("m8"), - M_9("m9"), - M_10("m10"), - M_11("m11"), - M_12("m12"), - M_13("m13"); - - private final String value; - - ConsensusCallsFlag(String v) { - value = v; - } - - public String value() { - return value; - } - - public static ConsensusCallsFlag fromValue(String v) { - for (ConsensusCallsFlag c: ConsensusCallsFlag.values()) { - if (c.value.equals(v)) { - return c; - } - } - throw new IllegalArgumentException(v); - } - -} diff --git a/qio/src/org/qcmg/consensuscalls/ConsensusCallsRecord.java b/qio/src/org/qcmg/consensuscalls/ConsensusCallsRecord.java deleted file mode 100644 index b342a9870..000000000 --- a/qio/src/org/qcmg/consensuscalls/ConsensusCallsRecord.java +++ /dev/null @@ -1,377 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -// -// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 -// See http://java.sun.com/xml/jaxb -// Any modifications to this file will be lost upon recompilation of the source schema. -// Generated on: 2013.10.25 at 10:52:20 AM EST -// - - -package org.qcmg.consensuscalls; - -import java.util.ArrayList; -import java.util.List; -public class ConsensusCallsRecord { - - protected String chr; - protected int position; - protected String alleleDiColor1; - protected String alleleDiColor2; - protected String reference; - protected String genotype; - protected double pValue; - protected List flag; - protected int coverage; - protected int nCounts1StAllele; - protected int nCountsReferenceAllele; - protected int nCountsNonReferenceAllele; - protected int refAvgQV; - protected int novelAvgQV; - protected int heterozygous; - protected String algorithm; - protected String algorithmName; - - /** - * Gets the value of the chr property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getChr() { - return chr; - } - - /** - * Sets the value of the chr property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setChr(String value) { - this.chr = value; - } - - /** - * Gets the value of the position property. - * - */ - public int getPosition() { - return position; - } - - /** - * Sets the value of the position property. - * - */ - public void setPosition(int value) { - this.position = value; - } - - /** - * Gets the value of the alleleDiColor1 property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getAlleleDiColor1() { - return alleleDiColor1; - } - - /** - * Sets the value of the alleleDiColor1 property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setAlleleDiColor1(String value) { - this.alleleDiColor1 = value; - } - - /** - * Gets the value of the alleleDiColor2 property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getAlleleDiColor2() { - return alleleDiColor2; - } - - /** - * Sets the value of the alleleDiColor2 property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setAlleleDiColor2(String value) { - this.alleleDiColor2 = value; - } - - /** - * Gets the value of the reference property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getReference() { - return reference; - } - - /** - * Sets the value of the reference property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setReference(String value) { - this.reference = value; - } - - /** - * Gets the value of the genotype property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getGenotype() { - return genotype; - } - - /** - * Sets the value of the genotype property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setGenotype(String value) { - this.genotype = value; - } - - /** - * Gets the value of the pValue property. - * - */ - public double getPValue() { - return pValue; - } - - /** - * Sets the value of the pValue property. - * - */ - public void setPValue(double value) { - this.pValue = value; - } - - /** - * Gets the value of the flag property. - * - *

- * This accessor method returns a reference to the live list, - * not a snapshot. Therefore any modification you make to the - * returned list will be present inside the JAXB object. - * This is why there is not a set method for the flag property. - * - *

- * For example, to add a new item, do as follows: - *

-     *    getFlag().add(newItem);
-     * 
- * - * - *

- * Objects of the following type(s) are allowed in the list - * {@link ConsensusCallsFlag } - * - * - */ - public List getFlag() { - if (flag == null) { - flag = new ArrayList(); - } - return this.flag; - } - - /** - * Gets the value of the coverage property. - * - */ - public int getCoverage() { - return coverage; - } - - /** - * Sets the value of the coverage property. - * - */ - public void setCoverage(int value) { - this.coverage = value; - } - - /** - * Gets the value of the nCounts1StAllele property. - * - */ - public int getNCounts1StAllele() { - return nCounts1StAllele; - } - - /** - * Sets the value of the nCounts1StAllele property. - * - */ - public void setNCounts1StAllele(int value) { - this.nCounts1StAllele = value; - } - - /** - * Gets the value of the nCountsReferenceAllele property. - * - */ - public int getNCountsReferenceAllele() { - return nCountsReferenceAllele; - } - - /** - * Sets the value of the nCountsReferenceAllele property. - * - */ - public void setNCountsReferenceAllele(int value) { - this.nCountsReferenceAllele = value; - } - - /** - * Gets the value of the nCountsNonReferenceAllele property. - * - */ - public int getNCountsNonReferenceAllele() { - return nCountsNonReferenceAllele; - } - - /** - * Sets the value of the nCountsNonReferenceAllele property. - * - */ - public void setNCountsNonReferenceAllele(int value) { - this.nCountsNonReferenceAllele = value; - } - - /** - * Gets the value of the refAvgQV property. - * - */ - public int getRefAvgQV() { - return refAvgQV; - } - - /** - * Sets the value of the refAvgQV property. - * - */ - public void setRefAvgQV(int value) { - this.refAvgQV = value; - } - - /** - * Gets the value of the novelAvgQV property. - * - */ - public int getNovelAvgQV() { - return novelAvgQV; - } - - /** - * Sets the value of the novelAvgQV property. - * - */ - public void setNovelAvgQV(int value) { - this.novelAvgQV = value; - } - - /** - * Gets the value of the heterozygous property. - * - */ - public int getHeterozygous() { - return heterozygous; - } - - /** - * Sets the value of the heterozygous property. - * - */ - public void setHeterozygous(int value) { - this.heterozygous = value; - } - - /** - * Gets the value of the algorithm property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getAlgorithm() { - return algorithm; - } - - /** - * Sets the value of the algorithm property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setAlgorithm(String value) { - this.algorithm = value; - } - - /** - * Gets the value of the algorithmName property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getAlgorithmName() { - return algorithmName; - } - - /** - * Sets the value of the algorithmName property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setAlgorithmName(String value) { - this.algorithmName = value; - } - -} diff --git a/qio/src/org/qcmg/consensuscalls/ConsensusCallsSerializer.java b/qio/src/org/qcmg/consensuscalls/ConsensusCallsSerializer.java deleted file mode 100644 index 889bc8bb3..000000000 --- a/qio/src/org/qcmg/consensuscalls/ConsensusCallsSerializer.java +++ /dev/null @@ -1,68 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.consensuscalls; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.regex.Pattern; - -public final class ConsensusCallsSerializer { - private static final Pattern tabbedPattern = Pattern.compile("[\\t]+"); - private static final Pattern commaPattern = Pattern.compile("[,]+"); - - public static ConsensusCallsRecord nextRecord(final BufferedReader reader) - throws Exception, IOException { - ConsensusCallsRecord result = null; - try { - String line = reader.readLine(); - if (null != line) { - result = parseRecord(line); - } - } catch (IOException e) { - throw e; - } catch (Exception e) { - throw e; - } - return result; - } - - static ConsensusCallsRecord parseRecord(final String line) throws Exception { - String[] params = tabbedPattern.split(line); - if (17 != params.length) { - throw new Exception("Bad Consensus Calls format"); - } - ConsensusCallsRecord result = new ConsensusCallsRecord(); - result.setChr(params[0]); - result.setPosition(Integer.parseInt(params[1])); - result.setAlleleDiColor1(params[2]); - result.setAlleleDiColor2(params[3]); - result.setReference(params[4]); - result.setGenotype(params[5]); - result.setPValue(Double.parseDouble(params[6])); - parseFlags(result.getFlag(), params[7]); - result.setCoverage(Integer.parseInt(params[8])); - result.setNCountsNonReferenceAllele(Integer.parseInt(params[9])); - result.setNCountsReferenceAllele(Integer.parseInt(params[10])); - result.setNCountsNonReferenceAllele(Integer.parseInt(params[11])); - result.setRefAvgQV(Integer.parseInt(params[12])); - result.setNovelAvgQV(Integer.parseInt(params[13])); - result.setHeterozygous(Integer.parseInt(params[14])); - result.setAlgorithm(params[15]); - result.setAlgorithmName(params[16]); - return result; - } - - public static void parseFlags(final List list, final String value) throws Exception { - String[] params = commaPattern.split(value); - if (1 > params.length) { - throw new Exception("Bad Consensus Calls Flag format"); - } - for (String param : params) { - list.add(ConsensusCallsFlag.fromValue(param)); - } - } -} diff --git a/qio/src/org/qcmg/exception/RecordIteratorException.java b/qio/src/org/qcmg/exception/RecordIteratorException.java deleted file mode 100644 index 6320c24b7..000000000 --- a/qio/src/org/qcmg/exception/RecordIteratorException.java +++ /dev/null @@ -1,17 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.exception; - -public class RecordIteratorException extends RuntimeException { - - private static final long serialVersionUID = 7963940971937212428L; - - public RecordIteratorException() {} // default constructor - public RecordIteratorException(Exception e) { - super(e.getMessage(), e); - } - public RecordIteratorException(String message, Exception e) { - super(message, e); - } -} diff --git a/qio/src/org/qcmg/genesymbol/GeneSymbolFileReader.java b/qio/src/org/qcmg/genesymbol/GeneSymbolFileReader.java deleted file mode 100644 index d35b82028..000000000 --- a/qio/src/org/qcmg/genesymbol/GeneSymbolFileReader.java +++ /dev/null @@ -1,38 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.genesymbol; - -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; - -public final class GeneSymbolFileReader implements Closeable, Iterable { - private final File file; - private final InputStream inputStream; - - public GeneSymbolFileReader(final File file) throws IOException { - this.file = file; - FileInputStream fileStream = new FileInputStream(file); - inputStream = fileStream; - } - - public Iterator iterator() { - return getRecordIterator(); - } - - public GeneSymbolRecordIterator getRecordIterator() { - return new GeneSymbolRecordIterator(inputStream); - } - - public void close() throws IOException { - inputStream.close(); - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/genesymbol/GeneSymbolRecord.java b/qio/src/org/qcmg/genesymbol/GeneSymbolRecord.java deleted file mode 100644 index 85906d5ba..000000000 --- a/qio/src/org/qcmg/genesymbol/GeneSymbolRecord.java +++ /dev/null @@ -1,32 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.genesymbol; - - -public class GeneSymbolRecord { - - private String geneId; - private String transcriptId; - private String symbol; - - public String getGeneId() { - return geneId; - } - public void setGeneId(String geneId) { - this.geneId = geneId; - } - public String getTranscriptId() { - return transcriptId; - } - public void setTranscriptId(String transcriptId) { - this.transcriptId = transcriptId; - } - public String getSymbol() { - return symbol; - } - public void setSymbol(String symbol) { - this.symbol = symbol; - } - -} diff --git a/qio/src/org/qcmg/genesymbol/GeneSymbolRecordIterator.java b/qio/src/org/qcmg/genesymbol/GeneSymbolRecordIterator.java deleted file mode 100644 index 52d2bc03a..000000000 --- a/qio/src/org/qcmg/genesymbol/GeneSymbolRecordIterator.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.genesymbol; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.NoSuchElementException; - -public final class GeneSymbolRecordIterator implements Iterator { - private final BufferedReader reader; - private GeneSymbolRecord next; - - public GeneSymbolRecordIterator(final InputStream stream) { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public GeneSymbolRecord next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - GeneSymbolRecord result = next; - readNext(); - return result; - } - - private void readNext() { - try { - next = GeneSymbolSerializer.nextRecord(reader); - } catch (NoSuchElementException e) { - throw e; - } catch (Exception ex) { - throw new RuntimeException(ex.getMessage(), ex); - } - } - - @SuppressWarnings("unchecked") - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/qio/src/org/qcmg/genesymbol/GeneSymbolSerializer.java b/qio/src/org/qcmg/genesymbol/GeneSymbolSerializer.java deleted file mode 100644 index 6d2280653..000000000 --- a/qio/src/org/qcmg/genesymbol/GeneSymbolSerializer.java +++ /dev/null @@ -1,44 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.genesymbol; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.regex.Pattern; - -public final class GeneSymbolSerializer { - private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - private static final String DEFAULT_HEADER_PREFIX = "#"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { - line = reader.readLine(); - } - return line; - } - - public static GeneSymbolRecord nextRecord(final BufferedReader reader) - throws IOException , Exception { - GeneSymbolRecord result = null; - String line = nextNonheaderLine(reader); - if (null != line) { - result = parseRecord(line); - } - return result; - } - - static GeneSymbolRecord parseRecord(final String line) throws Exception { - String[] params = tabbedPattern.split(line, -1); - if (3 > params.length) { - throw new Exception("Bad Gene Symbol format. Insufficient columns: '" + line + "'"); - } - GeneSymbolRecord result = new GeneSymbolRecord(); - result.setGeneId(params[0]); - result.setTranscriptId(params[1]); - result.setSymbol(params[2]); - return result; - } -} diff --git a/qio/src/org/qcmg/gff3/GFF3FileReader.java b/qio/src/org/qcmg/gff3/GFF3FileReader.java deleted file mode 100644 index 8b103eb7b..000000000 --- a/qio/src/org/qcmg/gff3/GFF3FileReader.java +++ /dev/null @@ -1,44 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.gff3; - -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; - -public final class GFF3FileReader implements Closeable, Iterable { - private final File file; - private final InputStream inputStream; - - public GFF3FileReader(final File file) throws FileNotFoundException { - this.file = file; - FileInputStream fileStream = new FileInputStream(file); - inputStream = fileStream; - } - - @Override - public Iterator iterator() { - return getRecordIterator(); - } - - public GFF3RecordIterator getRecordIterator() { - return new GFF3RecordIterator(inputStream); - } - - @Override - public void close() throws IOException { - inputStream.close(); - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/gff3/GFF3FileWriter.java b/qio/src/org/qcmg/gff3/GFF3FileWriter.java deleted file mode 100644 index 16e76bc3c..000000000 --- a/qio/src/org/qcmg/gff3/GFF3FileWriter.java +++ /dev/null @@ -1,36 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.gff3; - -import java.io.Closeable; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -public final class GFF3FileWriter implements Closeable { - private final File file; - private final OutputStream outputStream; - - public GFF3FileWriter(final File file) throws FileNotFoundException { - this.file = file; - OutputStream stream = new FileOutputStream(file); - outputStream = stream; - } - - public void add(final GFF3Record record) throws IOException { - String encoded = GFF3Serializer.serialise(record) + "\n"; - outputStream.write(encoded.getBytes()); - outputStream.flush(); - } - - @Override - public void close() throws IOException { - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/gff3/GFF3Record.java b/qio/src/org/qcmg/gff3/GFF3Record.java deleted file mode 100644 index bceffc67e..000000000 --- a/qio/src/org/qcmg/gff3/GFF3Record.java +++ /dev/null @@ -1,244 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.gff3; - - -public class GFF3Record { - - protected String seqId; - protected String source; - protected String type; - protected int start; - protected int end; - protected String score; - protected String strand; - protected String phase; - protected String attributes; - protected String rawData; - - /** - * Gets the value of the seqId property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSeqId() { - return seqId; - } - - /** - * Sets the value of the seqId property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSeqId(String value) { - this.seqId = value; - } - - /** - * Gets the value of the source property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSource() { - return source; - } - - /** - * Sets the value of the source property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSource(String value) { - this.source = value; - } - - /** - * Gets the value of the type property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getType() { - return type; - } - - /** - * Sets the value of the type property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setType(String value) { - this.type = value; - } - - /** - * Gets the value of the start property. - * - */ - public int getStart() { - return start; - } - - /** - * Sets the value of the start property. - * - */ - public void setStart(int value) { - this.start = value; - } - - /** - * Gets the value of the end property. - * - */ - public int getEnd() { - return end; - } - - /** - * Sets the value of the end property. - * - */ - public void setEnd(int value) { - this.end = value; - } - - /** - * Gets the value of the score property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getScore() { - return score; - } - - /** - * Sets the value of the score property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setScore(String value) { - this.score = value; - } - - /** - * Gets the value of the strand property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getStrand() { - return strand; - } - - /** - * Sets the value of the strand property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setStrand(String value) { - this.strand = value; - } - - /** - * Gets the value of the phase property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getPhase() { - return phase; - } - - /** - * Sets the value of the phase property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setPhase(String value) { - this.phase = value; - } - - /** - * Gets the value of the attributes property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getAttributes() { - return attributes; - } - - /** - * Sets the value of the attributes property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setAttributes(String value) { - this.attributes = value; - } - - /** - * Gets the value of the rawData property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getRawData() { - return rawData; - } - - /** - * Sets the value of the rawData property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setRawData(String value) { - this.rawData = value; - } - -} diff --git a/qio/src/org/qcmg/gff3/GFF3RecordChromosomeAndPositionComparator.java b/qio/src/org/qcmg/gff3/GFF3RecordChromosomeAndPositionComparator.java deleted file mode 100644 index 412c13366..000000000 --- a/qio/src/org/qcmg/gff3/GFF3RecordChromosomeAndPositionComparator.java +++ /dev/null @@ -1,32 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.gff3; - -import java.util.Comparator; - -import org.qcmg.common.model.ReferenceNameComparator; - -public class GFF3RecordChromosomeAndPositionComparator implements - Comparator { - - private static final Comparator chrComp = new ReferenceNameComparator(); - - public int compare(GFF3Record recordA, GFF3Record recordB) { - - // first compare chromosome - int chrcompare = chrComp.compare(recordA.getSeqId(), recordB.getSeqId()); - - if (chrcompare != 0) return chrcompare; - - return compareStart(recordA, recordB) + compareEnd(recordA, recordB); - } - - public int compareStart(GFF3Record recordA, GFF3Record recordB) { - return recordA.getStart() - recordB.getStart(); - } - - public int compareEnd(GFF3Record recordA, GFF3Record recordB) { - return recordA.getEnd() - recordB.getEnd(); - } -} diff --git a/qio/src/org/qcmg/gff3/GFF3RecordIterator.java b/qio/src/org/qcmg/gff3/GFF3RecordIterator.java deleted file mode 100644 index 10e913f57..000000000 --- a/qio/src/org/qcmg/gff3/GFF3RecordIterator.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.gff3; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.NoSuchElementException; - -public final class GFF3RecordIterator implements Iterator { - private final BufferedReader reader; - private GFF3Record next; - - public GFF3RecordIterator(final InputStream stream) { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public GFF3Record next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - GFF3Record result = next; - readNext(); - return result; - } - - private void readNext() { - try { - next = GFF3Serializer.nextRecord(reader); - } catch (NoSuchElementException e) { - throw e; - } catch (Exception ex) { - throw new RuntimeException(ex.getMessage(), ex); - } - } - - @SuppressWarnings("unchecked") - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/qio/src/org/qcmg/gff3/GFF3RecordPositionComparator.java b/qio/src/org/qcmg/gff3/GFF3RecordPositionComparator.java deleted file mode 100644 index f9430eb0f..000000000 --- a/qio/src/org/qcmg/gff3/GFF3RecordPositionComparator.java +++ /dev/null @@ -1,21 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.gff3; - -import java.util.Comparator; - -public class GFF3RecordPositionComparator implements - Comparator { - public int compare(GFF3Record recordA, GFF3Record recordB) { - return compareStart(recordA, recordB) + compareEnd(recordA, recordB); - } - - public int compareStart(GFF3Record recordA, GFF3Record recordB) { - return recordA.getStart() - recordB.getStart(); - } - - public int compareEnd(GFF3Record recordA, GFF3Record recordB) { - return recordA.getEnd() - recordB.getEnd(); - } -} diff --git a/qio/src/org/qcmg/gff3/GFF3Serializer.java b/qio/src/org/qcmg/gff3/GFF3Serializer.java deleted file mode 100644 index 0dd63d718..000000000 --- a/qio/src/org/qcmg/gff3/GFF3Serializer.java +++ /dev/null @@ -1,94 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.gff3; - -import java.io.BufferedReader; -import java.io.IOException; - -import org.qcmg.common.util.Constants; -import org.qcmg.common.util.TabTokenizer; - -public final class GFF3Serializer { - private static final String DEFAULT_HEADER_PREFIX = "#"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { - line = reader.readLine(); - } - return line; - } - - public static GFF3Record nextRecord(final BufferedReader reader) - throws Exception, IOException { - GFF3Record result = null; - try { - - String line = nextNonheaderLine(reader); - if (null != line) { - result = parseRecord(line); - } - } catch (IOException e) { - throw e; - } catch (Exception e) { - throw e; - } - return result; - } - - static GFF3Record parseRecord(final String line) throws Exception { - String[] params = TabTokenizer.tokenize(line); - if (8 > params.length) { - throw new Exception("Bad GFF3 format. Insufficient columns: '" + line + "'"); - } - GFF3Record result = new GFF3Record(); - result.setRawData(line); - result.setSeqId(params[0]); - result.setSource(params[1]); - result.setType(params[2]); - result.setStart(Integer.parseInt(params[3])); - result.setEnd(Integer.parseInt(params[4])); - result.setScore(params[5]); - result.setStrand(params[6]); - result.setPhase(params[7]); - if (8 < params.length) { - result.setAttributes(params[8]); - } - return result; - } - - public static GFF3Record duplicate(final GFF3Record record) { - GFF3Record result = new GFF3Record(); - result.setSeqId(record.getSeqId()); - result.setSource(record.getSource()); - result.setType(record.getType()); - result.setStart(record.getStart()); - result.setEnd(record.getEnd()); - result.setScore(record.getScore()); - result.setStrand(record.getStrand()); - result.setPhase(record.getPhase()); - result.setAttributes(record.getAttributes()); - return result; - } - - public static String serialise(final GFF3Record record) { - StringBuilder result = new StringBuilder(record.getSeqId()).append(Constants.TAB); - result.append(record.getSource()).append(Constants.TAB); - result.append(record.getType()).append(Constants.TAB); - result.append(record.getStart()).append(Constants.TAB); - result.append(record.getEnd()).append(Constants.TAB); - result.append(record.getScore()).append(Constants.TAB); - result.append(record.getStrand()).append(Constants.TAB); - result.append(record.getPhase()).append(Constants.TAB); - if (null != record.getAttributes()) { - result.append(record.getAttributes()); - } - return result.toString(); - } - -} diff --git a/qio/src/org/qcmg/illumina/IlluminaFileReader.java b/qio/src/org/qcmg/illumina/IlluminaFileReader.java deleted file mode 100644 index 9b543605c..000000000 --- a/qio/src/org/qcmg/illumina/IlluminaFileReader.java +++ /dev/null @@ -1,21 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.illumina; - -import java.io.File; -import java.io.IOException; - -import org.qcmg.reader.AbstractReader; - -public final class IlluminaFileReader extends AbstractReader { - - public IlluminaFileReader(final File file) throws IOException { - super(file); - } - - public IlluminaRecordIterator getRecordIterator() throws Exception { - return new IlluminaRecordIterator(inputStream); - } - -} diff --git a/qio/src/org/qcmg/illumina/IlluminaRecord.java b/qio/src/org/qcmg/illumina/IlluminaRecord.java deleted file mode 100644 index c47af4501..000000000 --- a/qio/src/org/qcmg/illumina/IlluminaRecord.java +++ /dev/null @@ -1,262 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.illumina; - -import org.qcmg.common.string.StringUtils; -import org.qcmg.record.Record; - -public class IlluminaRecord implements Record { - - private String chr; - private int start; - private final String strand; - private String snpId; - - //TODO do we need this field? - private float GCScore; - - private char firstAllele; - private char secondAllele; - - private final char firstAlleleForward; - private final char secondAlleleForward; - - private final char firstAlleleCall; - private final char secondAlleleCall; - - //TODO do we need this field? - private boolean hom; - private boolean isSnp; - private String snp; - - private final float logRRatio; - private final float bAlleleFreq; - - private final int rawX; - private final int rawY; - - /** - * Constructor that takes in a String array, retrieving pertinent fields from the array to populate the record - * - * @param rawIlluminaData String[] representing a line in the raw Illumina data file - */ - public IlluminaRecord(String [] rawIlluminaData) { - // chromosome and position defined in the raw Illumina data file relate to an old version - // of the genome (hg18), so instead, we use the dbSNP id to get the more recent - //(hg19) chromosome and position details from the dbSNP file at a later date - int length = rawIlluminaData.length; - snpId = rawIlluminaData[0]; - GCScore = Float.parseFloat(rawIlluminaData[4]); - firstAlleleForward = rawIlluminaData[10].charAt(0); - secondAlleleForward = rawIlluminaData[11].charAt(0); - firstAllele = rawIlluminaData[12].charAt(0); - secondAllele = rawIlluminaData[13].charAt(0); - setHom(rawIlluminaData[14].equals(rawIlluminaData[15])); - chr = rawIlluminaData[16]; - start = Integer.parseInt(rawIlluminaData[17]); - snp = rawIlluminaData[20]; - rawX = Integer.parseInt(rawIlluminaData[length - 4]); - rawY = Integer.parseInt(rawIlluminaData[length - 3]); - bAlleleFreq = Float.parseFloat(rawIlluminaData[length - 2]); - String logRRatioString = rawIlluminaData[length - 1]; - if (StringUtils.isNullOrEmpty(logRRatioString)) - logRRatioString = "NaN"; - logRRatio = Float.parseFloat(logRRatioString); - firstAlleleCall = rawIlluminaData[14].charAt(0); - secondAlleleCall = rawIlluminaData[15].charAt(0); - strand = rawIlluminaData[22]; // use customer strand rather than illumina strand -// strand = rawIlluminaData[21]; - } - - - public String getChr() { - return chr; - } - public void setChr(String chr) { - this.chr = chr; - } - public int getStart() { - return start; - } - public void setStart(int start) { - this.start = start; - } - public String getSnpId() { - return snpId; - } - public void setSnpId(String snpId) { - this.snpId = snpId; - } - public float getGCScore() { - return GCScore; - } - public void setGCScore(float GCScore) { - this.GCScore = GCScore; - } - public char getFirstAllele() { - return firstAllele; - } - public void setFirstAllele(char firstAllele) { - this.firstAllele = firstAllele; - } - public char getSecondAllele() { - return secondAllele; - } - public void setSecondAllele(char secondAllele) { - this.secondAllele = secondAllele; - } - public String getSnp() { - return snp; - } - public void setSnp(String snp) { - this.snp = snp; - } - public void setHom(boolean hom) { - this.hom = hom; - } - public boolean isHom() { - return hom; - } - - public void setSnp(boolean isSnp) { - this.isSnp = isSnp; - } - - public boolean isSnp() { - return isSnp; - } - - public float getLogRRatio() { - return logRRatio; - } - - - public float getbAlleleFreq() { - return bAlleleFreq; - } - - - public char getFirstAlleleCall() { - return firstAlleleCall; - } - - - public char getSecondAlleleCall() { - return secondAlleleCall; - } - - public int getRawX() { - return rawX; - } - - public int getRawY() { - return rawY; - } - - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + Float.floatToIntBits(GCScore); - result = prime * result + Float.floatToIntBits(bAlleleFreq); - result = prime * result + ((chr == null) ? 0 : chr.hashCode()); - result = prime * result + firstAllele; - result = prime * result + firstAlleleCall; - result = prime * result + (hom ? 1231 : 1237); - result = prime * result + (isSnp ? 1231 : 1237); - result = prime * result + Float.floatToIntBits(logRRatio); - result = prime * result + rawX; - result = prime * result + rawY; - result = prime * result + secondAllele; - result = prime * result + secondAlleleCall; - result = prime * result + ((snp == null) ? 0 : snp.hashCode()); - result = prime * result + ((snpId == null) ? 0 : snpId.hashCode()); - result = prime * result + start; - return result; - } - - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - IlluminaRecord other = (IlluminaRecord) obj; - if (Float.floatToIntBits(GCScore) != Float - .floatToIntBits(other.GCScore)) - return false; - if (Float.floatToIntBits(bAlleleFreq) != Float - .floatToIntBits(other.bAlleleFreq)) - return false; - if (chr == null) { - if (other.chr != null) - return false; - } else if (!chr.equals(other.chr)) - return false; - if (firstAllele != other.firstAllele) - return false; - if (firstAlleleCall != other.firstAlleleCall) - return false; - if (hom != other.hom) - return false; - if (isSnp != other.isSnp) - return false; - if (Float.floatToIntBits(logRRatio) != Float - .floatToIntBits(other.logRRatio)) - return false; - if (rawX != other.rawX) - return false; - if (rawY != other.rawY) - return false; - if (secondAllele != other.secondAllele) - return false; - if (secondAlleleCall != other.secondAlleleCall) - return false; - if (snp == null) { - if (other.snp != null) - return false; - } else if (!snp.equals(other.snp)) - return false; - if (snpId == null) { - if (other.snpId != null) - return false; - } else if (!snpId.equals(other.snpId)) - return false; - if (start != other.start) - return false; - return true; - } - - - @Override - public String toString() { - return "IlluminaRecord [GCScore=" + GCScore + ", bAlleleFreq=" - + bAlleleFreq + ", chr=" + chr + ", firstAllele=" + firstAllele - + ", firstAlleleCall=" + firstAlleleCall + ", hom=" + hom - + ", isSnp=" + isSnp + ", logRRatio=" + logRRatio + ", rawX=" - + rawX + ", rawY=" + rawY + ", secondAllele=" + secondAllele - + ", secondAlleleCall=" + secondAlleleCall + ", snp=" + snp - + ", snpId=" + snpId + ", start=" + start + "]"; - } - - - public String getStrand() { - return strand; - } - - - public char getFirstAlleleForward() { - return firstAlleleForward; - } - - public char getSecondAlleleForward() { - return secondAlleleForward; - } - - -} diff --git a/qio/src/org/qcmg/illumina/IlluminaRecordIterator.java b/qio/src/org/qcmg/illumina/IlluminaRecordIterator.java deleted file mode 100644 index 4f1a8ba7f..000000000 --- a/qio/src/org/qcmg/illumina/IlluminaRecordIterator.java +++ /dev/null @@ -1,21 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.illumina; - -import java.io.InputStream; - -import org.qcmg.record.AbstractRecordIterator; - -public class IlluminaRecordIterator extends AbstractRecordIterator { - - public IlluminaRecordIterator(InputStream stream) throws Exception { - super(stream); - } - - @Override - protected void readNext() throws Exception { - next = IlluminaSerializer.nextRecord(reader); - } - -} diff --git a/qio/src/org/qcmg/illumina/IlluminaSerializer.java b/qio/src/org/qcmg/illumina/IlluminaSerializer.java deleted file mode 100644 index aba701597..000000000 --- a/qio/src/org/qcmg/illumina/IlluminaSerializer.java +++ /dev/null @@ -1,73 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.illumina; - -import java.io.BufferedReader; -import java.io.IOException; - -import org.qcmg.common.util.TabTokenizer; - -public final class IlluminaSerializer { - private static final String HEADER_LINE = "[Header]"; - private static final String DATA_LINE = "[Data]"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - // header lines are as follows: - /* -[Header] -GSGT Version 1.8.4 -Processing Date 8/12/2011 8:41 PM -Content HumanOmni1-Quad_v1-0_H.bpm -Num SNPs 1134514 -Total SNPs 1134514 -Num Samples 259 -Total Samples 260 -File 77 of 259 -[Data] -SNP Name Sample ID Allele1 - Top Allele2 - Top GC Score Sample Name Sample Group Sample Index SNP Index SNP Aux Allele1 - Forward Allele2 - Forward Allele1 - Design Allele2 - Design Allele1 - AB Allele2 - AB Chr Position GT Score Cluster Sep SNP ILMN Strand Customer Strand Top Genomic Sequence Theta R X Y -X Raw Y Raw B Allele Freq Log R Ratio - */ - - String line = reader.readLine(); - if (null != line && line.startsWith(HEADER_LINE)) { - - // ignore header lines until we hit [DATA] - line = reader.readLine(); - while (null != line && ! line.startsWith(DATA_LINE)) { - line = reader.readLine(); - } - // next line is still header.... - line = reader.readLine(); - line = reader.readLine(); - } - return line; - } - - public static IlluminaRecord nextRecord(final BufferedReader reader) throws Exception { - IlluminaRecord result = null; - - String data = nextNonheaderLine(reader); - if (null != data ) { - result = parseRecord(data); - } - - return result; - } - - static String[] parseData(final String value) throws Exception { - String[] dataArray = TabTokenizer.tokenize(value); - - // raw Illumina data has 32 fields... and the first one is an integer - if (dataArray.length != 32) throw new Exception("Bad Illumina data format - expecting 32 fields but saw " + dataArray.length); - - return dataArray; - } - - static IlluminaRecord parseRecord(final String record) - throws Exception { - return new IlluminaRecord(parseData(record)); - } - -} diff --git a/qio/test/org/qcmg/qio/gff/GFFSerializerTest.java b/qio/test/org/qcmg/qio/gff/GFFSerializerTest.java deleted file mode 100644 index 7c92d77a8..000000000 --- a/qio/test/org/qcmg/qio/gff/GFFSerializerTest.java +++ /dev/null @@ -1,145 +0,0 @@ -package org.qcmg.gff; - -import org.junit.Assert; -import org.junit.Test; - - -public class GFFSerializerTest { - - @Test - public void testParseDataInvalid() throws Exception { - // test empty string - try { - GFFSerializer.parseData(""); - Assert.fail("Should have thrown an Exception"); - } catch (Exception e) { - Assert.assertEquals("Not enough fields in the Record", e.getMessage()); - } - try { - GFFSerializer.parseData(" "); - Assert.fail("Should have thrown an Exception"); - } catch (Exception e) { - Assert.assertEquals("Not enough fields in the Record", e.getMessage()); - } - - // test null - try { - GFFSerializer.parseData(null); - Assert.fail("Should have thrown an Exception"); - } catch (AssertionError e) { - Assert.assertEquals("Record was null", e.getMessage()); - } - - // string with fewer than 8 entries - try { - GFFSerializer.parseData("1 2 3 4 5 6 "); - Assert.fail("Should have thrown an Exception"); - } catch (Exception e) { - Assert.assertEquals("Not enough fields in the Record", e.getMessage()); - } - } - - @Test - public void testParseData() throws Exception { - String[] returnedArray; - - // test with 8 entries - returnedArray = GFFSerializer.parseData("a b c d e f g h"); - Assert.assertEquals(8, returnedArray.length); - Assert.assertEquals("a", returnedArray[0]); - Assert.assertEquals("h", returnedArray[7]); - - // test with 9 entries - returnedArray = GFFSerializer.parseData("a b c d e f g h i"); - Assert.assertEquals(9, returnedArray.length); - Assert.assertEquals("a", returnedArray[0]); - Assert.assertEquals("h", returnedArray[7]); - Assert.assertEquals("i", returnedArray[8]); - - // test with 10 entries - returnedArray = GFFSerializer.parseData("a b c d e f g h i j"); - Assert.assertEquals(10, returnedArray.length); - Assert.assertEquals("a", returnedArray[0]); - Assert.assertEquals("h", returnedArray[7]); - Assert.assertEquals("j", returnedArray[9]); - - //test with realistic data string - returnedArray = GFFSerializer.parseData("1 solid read 10148 10190 14.4 - . " + - "aID=1212_1636_246;at=F3;b=GGTTAGGGTTAGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGG;" + - "g=G0103200103201032001033001032001032001032001032001;mq=43;o=0;" + - "q=31,30,32,26,26,26,23,24,29,31,31,23,25,18,14,20,18,11,27,22,18,23,2,18,29,20,25,11,19,18," + - "18,13,14,18,19,16,14,5,16,23,18,21,16,16,14,20,13,17,20,11;r=23_2;s=a23;u=0,4,1,1"); - Assert.assertEquals(9, returnedArray.length); - Assert.assertEquals("solid", returnedArray[1]); - Assert.assertEquals("read", returnedArray[2]); - } - - @Test - public void testParseRecordInvalid() throws Exception { - // test null - try { - GFFSerializer.parseRecord(null); - Assert.fail("Should have thrown an exception"); - } catch (AssertionError e) { - Assert.assertEquals("Record was null", e.getMessage()); - } - // test empty string - try { - GFFSerializer.parseRecord(""); - Assert.fail("Should have thrown an Exception"); - } catch (Exception e) { - Assert.assertEquals("Not enough fields in the Record", e.getMessage()); - } - try { - GFFSerializer.parseRecord(" "); - Assert.fail("Should have thrown an Exception"); - } catch (Exception e) { - Assert.assertEquals("Not enough fields in the Record", e.getMessage()); - } - } - - @Test - public void testParseRecord() throws Exception { - GFFRecord record; - - // 8 values - record = GFFSerializer.parseRecord("this is a 0 1 0.0 works OK"); - Assert.assertNotNull(record); - Assert.assertEquals("this", record.getSeqname()); - Assert.assertEquals("OK", record.getFrame()); - - - } - - @Test - public void testParseRecordWithAttributes() throws Exception { - GFFRecord record; - - // real record containing attributes - record = GFFSerializer.parseRecord("1 solid read 10148 10190 14.4 - . " + - "aID=1212_1636_246;at=F3;b=GGTTAGGGTTAGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGG;" + - "g=G0103200103201032001033001032001032001032001032001;mq=43;o=0;" + - "q=31,30,32,26,26,26,23,24,29,31,31,23,25,18,14,20,18,11,27,22,18,23,2,18,29,20,25,11,19,18," + - "18,13,14,18,19,16,14,5,16,23,18,21,16,16,14,20,13,17,20,11;r=23_2;s=a23;u=0,4,1,1"); - Assert.assertNotNull(record); - Assert.assertEquals("1", record.getSeqname()); - Assert.assertEquals("solid", record.getSource()); - Assert.assertEquals("read", record.getFeature()); - Assert.assertEquals(10148, record.getStart()); - Assert.assertEquals(10190, record.getEnd()); - Assert.assertTrue(14.4 == record.getScore()); - Assert.assertEquals("-", record.getStrand()); - Assert.assertEquals(".", record.getFrame()); - - } - - @Test - public void testParseRecordWithInvalidAttributes() throws Exception { - try { - GFFSerializer.parseRecord("sequence source feature 0 1 99.99 strand frame attributes"); - Assert.fail("Should have thrown an exception"); - } catch (Exception e) { - Assert.assertEquals("Attribute [attributes] is badly formed", e.getMessage()); - } - } -} diff --git a/qio/test/org/qcmg/qio/gff/GffReaderTest.java b/qio/test/org/qcmg/qio/gff/GffReaderTest.java index 20f0a05ed..4cab49d75 100644 --- a/qio/test/org/qcmg/qio/gff/GffReaderTest.java +++ b/qio/test/org/qcmg/qio/gff/GffReaderTest.java @@ -1,4 +1,4 @@ -package org.qcmg.gff; +package org.qcmg.qio.gff; import java.io.BufferedWriter; import java.io.File; diff --git a/qmule/src/org/qcmg/qmule/AlignerCompare.java-- b/qmule/src/org/qcmg/qmule/AlignerCompare.java-- new file mode 100644 index 000000000..5c8538a93 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/AlignerCompare.java-- @@ -0,0 +1,272 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.ArrayList; +import java.util.Objects; + +import htsjdk.samtools.SAMFileHeader.SortOrder; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordIterator; +import htsjdk.samtools.ValidationStringency; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.SAMOrBAMWriterFactory; + +public class AlignerCompare { + static QLogger logger = QLoggerFactory.getLogger(AlignerCompare.class); + boolean discardNonPrimary; + SamReader firReader; + SamReader secReader; + + SAMOrBAMWriterFactory sameWriter; + SAMOrBAMWriterFactory diffWriter_first; + SAMOrBAMWriterFactory diffWriter_second; + + SAMOrBAMWriterFactory unsureWriter_first; + SAMOrBAMWriterFactory unsureWriter_second; + + + long total_bam1 = 0; + long total_bam2 = 0; + long total_same = 0; + long noDiff_bam1 = 0; + long noDiff_bam2 = 0; + long noSecondary_bam1 = 0; + long nosupplementary_bam1 = 0; + long noSecondary_bam2 = 0; + long nosupplementary_bam2 = 0; + long nounsureAlignment = 0; + + + AlignerCompare(File firBam, File secBam, String prefix, boolean flag) throws Exception{ + //check inputs: sort by query name + firReader = SAMFileReaderFactory.createSAMFileReader(firBam, ValidationStringency.SILENT); + secReader = SAMFileReaderFactory.createSAMFileReader(secBam, ValidationStringency.SILENT); + discardNonPrimary = flag; + + if(! firReader.getFileHeader().getSortOrder().equals(SortOrder.queryname)) + throw new Exception("Please sort the input BAM by queryname: " + firBam.getAbsolutePath()); + + if(! secReader.getFileHeader().getSortOrder().equals(SortOrder.queryname)) + throw new Exception("Please sort the input BAM by queryname: " + secBam.getAbsolutePath()); + + + logger.info("input BAM1: " + firBam.getAbsolutePath()); + logger.info("input BAM2: " + secBam.getAbsolutePath()); + logger.info("discard secondary or supplementary alignments: " + String.valueOf(discardNonPrimary)); + + //create outputs + File outsame = new File(prefix + ".identical.bam" ); + File outdiff_first = new File(prefix + ".different.first.bam" ); + File outdiff_second = new File(prefix + ".different.second.bam" ); + + if(! firBam.getName().equals(secBam.getName())){ + outdiff_first = new File( prefix + ".different." + firBam.getName() ); + outdiff_second = new File( prefix + ".different." + secBam.getName() ); + } + + sameWriter = new SAMOrBAMWriterFactory(firReader.getFileHeader(), true, outsame); + diffWriter_first = new SAMOrBAMWriterFactory(firReader.getFileHeader(), true, outdiff_first ); + diffWriter_second = new SAMOrBAMWriterFactory(secReader.getFileHeader(), true, outdiff_second ); + + logger.info("output of identical alignments: " + outsame.getAbsolutePath()); + logger.info("output of different alignments from BAM1: " + outdiff_first.getAbsolutePath()); + logger.info("output of different alignments from BAM2: " + outdiff_second.getAbsolutePath()); + + //execute comparison + compareExecutor(); + + + //close IOs + firReader.close(); + secReader.close(); + sameWriter.closeWriter(); + diffWriter_first.closeWriter(); + diffWriter_second.closeWriter(); + + } + + void compareExecutor() throws Exception{ + ArrayList from1 = new ArrayList (); + ArrayList from2 = new ArrayList (); + SAMRecordIterator it1 = firReader.iterator(); + SAMRecordIterator it2 = secReader.iterator(); + //stats + long noRead = 0; + long noAlign1 = 1; + long noAlign2 = 1; + long noSame = 0; + + //initialize + SAMRecord record1 = it1.next(); + SAMRecord record2 = it2.next(); + String Id = record1.getReadName(); + from1.add(record1); + from2.add(record2); + + //get all aligner from same read + while( it1.hasNext() || it2.hasNext()){ + while(it1.hasNext()){ + noAlign1 ++; + record1 = it1.next() ; + if(record1.getReadName().equals(Id)){ + from1.add(record1); + }else //if not equals(Id) + break; + } //end while + + while( it2.hasNext() ){ + noAlign2 ++; + record2 = it2.next(); + if(record2.getReadName().equals(Id)){ + from2.add(record2); + }else + break; //exit while, record2 is read for next loop + } + //compare alignment in arraylist which filtered out secondary or supplenmentary alignments + noSame += classifyReads( AlignerFilter(from1, unsureWriter_first) , AlignerFilter(from2, unsureWriter_second) ); + + //clear arraylist and store current reads into arraylist for next loop + noRead ++; + from1.clear(); + from2.clear(); + from1.add(record1); + from2.add(record2); + Id = record1.getReadName(); + } + + logger.info(String.format("There are %d reads with %d alignments from BAM1", noRead, noAlign1)); + logger.info(String.format("There are %d reads with %d alignments from BAM2", noRead, noAlign2)); + logger.info(String.format("There are %d alignments are identical from both BAM", noSame)); + logger.info(String.format("Different alignments from BAM1 are %d, from BAM2 are %d", noDiff_bam1, noDiff_bam2)); + logger.info( String.format("discard %d secondary alignments and %d supplementary alignments from BAM1",noSecondary_bam1,nosupplementary_bam1)); + logger.info(String.format("discard %d secondary alignments and %d supplementary alignments from BAM2",noSecondary_bam2,nosupplementary_bam2)); + + + } + + /** + * + * @param from: an input alignments with same read id + * @return ArrayList : cleaned alignments excluding secondary and supplementary alignments + */ + ArrayList AlignerFilter(ArrayList from, SAMOrBAMWriterFactory factory) throws Exception{ + ArrayList cleaned = new ArrayList(); + + for(SAMRecord record : from) + if( discardNonPrimary && record.isSecondaryOrSupplementary()){ + if( record.getNotPrimaryAlignmentFlag()) + noSecondary_bam1 ++; + else if( record.getSupplementaryAlignmentFlag()) + nosupplementary_bam1 ++; + else + throw new Exception(record.getReadName() + " record flag error: record.isSecondaryOrSupplementary but not (secondary or supplementary) : " + record.getFlags()); + }else + cleaned.add(record); + +/* //record these multi alignments for further investigation + if(cleaned.size() != 2){ + for(SAMRecord record : cleaned){ + factory.getWriter().addAlignment(record); + nounsureAlignment ++; + + } + } +*/ + return cleaned; + } + + + int classifyReads(ArrayList from1, ArrayList from2) throws Exception{ + ArrayList toremove1 = new ArrayList(); + ArrayList toremove2 = new ArrayList(); + + for(SAMRecord record1 : from1){ + for(SAMRecord record2: from2){ + if(!record1.getReadName().equals(record2.getReadName())) + throw new Exception("error during process: reads with different name are store in arrayList for comparison: " + + record1.getReadName() + " != " + record2.getReadName() ) ; + if (record1.getFlags() == record2.getFlags() && + record1.getReferenceName().equals(record2.getReferenceName()) && + record1.getAlignmentStart() == record2.getAlignmentStart() && + record1.getAlignmentEnd() == record2.getAlignmentEnd() && + record1.getMappingQuality() == record2.getMappingQuality() && + record1.getCigarString().equals(record2.getCigarString()) && + Objects.equals(record1.getAttribute("MD") , record2.getAttribute("MD"))){ + sameWriter.getWriter().addAlignment(record1); + toremove1.add(record1); + toremove2.add(record2); + } + } + } + + //record the left differnt aligner + from1.removeAll(toremove1); + for(SAMRecord record1 : from1) + diffWriter_first.getWriter().addAlignment(record1); + + from2.removeAll(toremove2); + for(SAMRecord record2: from2) + diffWriter_second.getWriter().addAlignment(record2); + + //count unique alignment number + noDiff_bam1 += from1.size(); + noDiff_bam2 += from2.size(); + + return toremove1.size(); + } + + public static void main(String[] args) throws Exception{ + + Options op = new Options(AlignerCompare.class, args); + if(op.hasHelpOption()){ + System.out.println(Messages.getMessage("USAGE_AlignerCompare")); + op.displayHelp(); + System.exit(0); + } + + if( op.getInputFileNames().length != 2 + || op.getOutputFileNames().length != 1 ){ + System.err.println("improper parameters passed to command line, please refer to"); + System.out.println(Messages.getMessage("USAGE_AlignerCompare")); + op.displayHelp(); + System.exit(1); + } + + File f1 = new File(op.getInputFileNames()[0]); + File f2 = new File(op.getInputFileNames()[1]); + if(! f1.exists() || ! f2.exists()) + throw new Exception("input not exists: " + args[0] + " or " + args[1]); + + //assign to true if no "compareAll" option + boolean flag = ! op.hasCompareAllOption(); + + if(op.hasLogOption()) + logger = QLoggerFactory.getLogger(AlignerCompare.class,op.getLogFile(), op.getLogLevel()); + else + logger = QLoggerFactory.getLogger(AlignerCompare.class,op.getOutputFileNames()[0] + ".log", op.getLogLevel()); + + String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + logger.logInitialExecutionStats( "qmule " + AlignerCompare.class.getName(), version,args); + + long startTime = System.currentTimeMillis(); + AlignerCompare compare = new AlignerCompare( f1, f2, op.getOutputFileNames()[0], flag ); + + logger.info( String.format("It took %d hours, %d minutes to perform the comparison", + (int) (System.currentTimeMillis() - startTime) / (1000*60*60), + (int) ( (System.currentTimeMillis() - startTime) / (1000*60) ) % 60) ); + logger.logFinalExecutionStats(0); + + } + + +} diff --git a/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java b/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java index 6f4167cc2..ee7a1eb00 100644 --- a/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java +++ b/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java @@ -26,8 +26,8 @@ import org.qcmg.common.model.ChrRangePosition; import org.qcmg.common.util.FileUtils; import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; public class AnnotateDCCWithGFFRegions { diff --git a/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- b/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- new file mode 100644 index 000000000..ee7a1eb00 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- @@ -0,0 +1,710 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; +import java.util.Vector; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrPositionName; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; + + +public class AnnotateDCCWithGFFRegions { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private List chromosomes = new ArrayList(); + private final int exitStatus = 0; + private Map> inputRecords = new HashMap>(); + private final Map> compareRecords = new HashMap>(); + private int overlapCount = 0; + private int notOverlappingCount = 0; + private int recordCount; + private Vector inputFileHeader = new Vector(); + private String inputFileType; + private String compareFileType; + private static QLogger logger; + private static final String MAF = "maf"; + private static final String GFF3 = "gff3"; + private static final String BED = "bed"; + private static final String VCF = "vcf"; + private static final String TAB = "txt"; + private static final String DCC1 = "dcc1"; + private static final String DCCQ = "dccq"; + private BufferedWriter outputFileWriter; + private File outputFile; + private String[] features; + private boolean stranded; + private final int GFF_STRAND_INDEX = 6; + private int DCC_STRAND_INDEX = -1; + private int QCMGFLAG_COLUMN_INDEX = -1; + private int REFERENCE_ALLELE_INDEX = -1; + private int TUMOUR_ALLELE_INDEX = -1; + private String annotation; + private int MUTATION_TYPE_INDEX; + //private static final int PATIENT_MIN = 5; + + public int engage() throws Exception { + + loadGFFFile(cmdLineInputFiles[1], compareRecords); + if (compareRecords.isEmpty()) { + logger.info("No positions loaded from gff file"); + } + + logger.info("Starting to process DCC records."); + + outputFile = new File(cmdLineOutputFiles[0]); + + outputFileWriter = new BufferedWriter(new FileWriter(outputFile)); + + inputFileType = null; + inputFileType = getFileType(cmdLineInputFiles[0]); + recordCount = loadDCCFile(cmdLineInputFiles[0], inputFileHeader, inputFileType); + logger.info("Finished processing DCC records."); + outputFileWriter.close(); + logger.info("SUMMARY"); + logger.info("Total DCC Records: " + recordCount); + logger.info("Total Records in supplied reference regions: " + overlapCount); + logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); + return exitStatus; + } + + private String getFileType(String fileName) throws QMuleException { + int index = fileName.lastIndexOf(".") + 1; + String name = fileName.substring(index, fileName.length()); + + if (name.equals("dcc")) { + return "dcc1"; + } + + if (!name.equals(DCC1) && !name.equals(DCCQ)) { + throw new QMuleException("FILE_TYPE_ERROR"); + } + + return name; + } + + private int loadGFFFile(String file, Map> records) throws Exception { + TabbedFileReader reader = new TabbedFileReader(new File(file)); + int recordCount = 0; + try { + + Iterator iterator = reader.getRecordIterator(); + + while (iterator.hasNext()) { + + TabbedRecord tab = iterator.next(); + + if (tab.getData().startsWith("#")) { + continue; + } + recordCount++; + ChrPosition chrPos = getChrPosition(GFF3, tab, Integer.toString(recordCount)); + String key = chrPos.getChromosome().replace("chr", ""); + if (records.containsKey(key)) { + records.get(key).put(chrPos, tab); + } else { + TreeMap map = new TreeMap(); + map.put(chrPos, tab); + records.put(key,map); + } + if (!chromosomes.contains(key)) { + chromosomes.add(key); + } + } + } finally { + reader.close(); + } + + logger.info("loaded gff file, total records: " + recordCount); + return recordCount; + } + + private int loadDCCFile(String file, Vector header, String fileType) throws Exception { + TabbedFileReader reader = new TabbedFileReader(new File(file)); + + int recordCount = 0; + try { + + Iterator iterator = reader.getRecordIterator(); + + if (reader.getHeader() != null) { + Iterator iter = reader.getHeader().iterator(); + while (iter.hasNext()) { + header.add(iter.next()); + } + } + while (iterator.hasNext()) { + + TabbedRecord inputRecord = iterator.next(); + if (inputRecord.getData().startsWith("#") || inputRecord.getData().startsWith("Hugo") || inputRecord.getData().startsWith("analysis") || + inputRecord.getData().startsWith("mutation")) { + header.add(inputRecord.getData()); + continue; + } + + if (header.size() > 0) { + parseDCCHeader(header, fileType); + logger.info("Column of DCC file to annotate: " + QCMGFLAG_COLUMN_INDEX); + writeHeader(fileType, header); + header.clear(); + } + + recordCount++; + ChrPosition chrPos = getChrPosition(fileType, inputRecord, null); + String key = chrPos.getChromosome().replace("chr", ""); + TreeMap compareMap = compareRecords.get(key); + boolean isOverlapping = false; + if (compareMap != null) { + //check to see if it is overlapping with the comparison reference region + for (Entry compareEntry : compareMap.entrySet()) { + ChrPosition comparePos = compareEntry.getKey(); + if (comparePos.getEndPosition() < chrPos.getStartPosition()) { + continue; + } else if (comparePos.getStartPosition() > chrPos.getEndPosition()) { + break; + } else { + String[] vals = inputRecord.getDataArray(); + + if (annotation != null) { + String oldInfo = vals[QCMGFLAG_COLUMN_INDEX]; + if (!oldInfo.contains("GERM") && tabbedRecordMatchesCompareRecord(chrPos, inputRecord, compareEntry)) { + if (annotation != null && !oldInfo.contains("GERM")) { + if (annotateWithGermline(vals, compareEntry.getValue().getDataArray())) { + isOverlapping = true; + if (!oldInfo.equals("") && !oldInfo.endsWith(";")) { + oldInfo += ";"; + } + oldInfo += annotation; + inputRecord = buildOutputString(inputRecord, vals, oldInfo); + } + } + } + } else { + if (tabbedRecordFallsInCompareRecord(chrPos, inputRecord, compareEntry)) { + isOverlapping = true; + String oldInfo = vals[QCMGFLAG_COLUMN_INDEX]; + //annotate with gff feature + String feature = getFeatures(compareEntry.getValue()); + if (!oldInfo.equals("") && !oldInfo.endsWith(";") && !feature.equals("")) { + oldInfo += ";"; + } + oldInfo += feature; + inputRecord = buildOutputString(inputRecord, vals, oldInfo); + } + } + + } + } + } + + if (isOverlapping) { + overlapCount++; + } else { + notOverlappingCount++; + } + + writeRecord(inputRecord); + + if (recordCount % 50000 == 0) { + logger.info("Processed records: " + recordCount); + } + } + } finally { + reader.close(); + } + return recordCount; + } + + private TabbedRecord buildOutputString(TabbedRecord inputRecord, String[] vals, + String oldInfo) { + vals[QCMGFLAG_COLUMN_INDEX] = oldInfo; + String data= ""; + for (String s: vals) { + data += s + "\t"; + } + inputRecord.setData(data); + return inputRecord; + } + + private boolean annotateWithGermline(String[] inputValues, String[] gffValues) throws QMuleException { + String[] attribs = gffValues[getFeatureIndex("attribs")].split(";"); + String gffMotif = getGFF3Motif(attribs); + //int patientCount = getPatientCount(attribs); + if (gffMotif == null) { + String position = gffValues[0] + ":" + gffValues[3] + "-" + gffValues[4]; + throw new QMuleException("NULL_GFF_MOTIF", position); + } + String dccMotif = getDCCMotif(inputValues); + if ((dccMotif == null || gffMotif.equals(dccMotif))) { + return true; + } + + return false; + } + + private int getPatientCount(String[] attribs) { + for (String s: attribs) { + if (s.startsWith("PatientCount")) { + return new Integer(s.split("=")[1]); + } + } + return 0; + } + + private String getGFF3Motif(String[] attribs) { + + String referenceAllele = null; + String tumourAllele = null; + for (String s: attribs) { + if (s.startsWith("ReferenceAllele")) { + referenceAllele = s.split("=")[1]; + } + if (s.startsWith("TumourAllele")) { + tumourAllele = s.split("=")[1]; + } + } + + if (referenceAllele.contains("-") && !tumourAllele.contains("-")) { + return tumourAllele; + } + if (!referenceAllele.contains("-") && tumourAllele.contains("-")) { + return referenceAllele; + } + return null; + } + + private String getDCCMotif(String[] inputValues) { + String mutationType = inputValues[MUTATION_TYPE_INDEX]; + String refAllele = inputValues[REFERENCE_ALLELE_INDEX]; + String tumourAllele = inputValues[TUMOUR_ALLELE_INDEX]; + + if (mutationType.equals("2")) { + return tumourAllele; + } else if (mutationType.equals("3")) { + return refAllele; + } + return null; + } + + public void parseDCCHeader(List headers, String inputFileType) throws QMuleException { + + for (String header: headers) { + String[] values = header.split("\t"); + if (values.length == 28 && inputFileType.equals(DCC1) + || values.length == 39 && inputFileType.equals(DCCQ)) { + //check dcc header + for (int i=0; i compareEntry) { + if (compareEntry != null) { + ChrPosition compareChrPos = compareEntry.getKey(); + if ((inputChrPos.getStartPosition() == compareChrPos.getStartPosition() + && inputChrPos.getEndPosition() == compareChrPos.getEndPosition())) { + //check strand if this option is provided + if (stranded) { + String inputStrand = inputRecord.getDataArray()[DCC_STRAND_INDEX]; + String compareStrand = compareEntry.getValue().getDataArray()[GFF_STRAND_INDEX]; + if (inputStrand.equals(compareStrand)) { + return true; + } + } else { + return true; + } + } + } + return false; + } + + private boolean tabbedRecordFallsInCompareRecord(ChrPosition inputChrPos, TabbedRecord inputRecord, Entry entry) { + if (entry != null) { + ChrPosition compareChrPos = entry.getKey(); + if ((inputChrPos.getStartPosition() >= compareChrPos.getStartPosition() && inputChrPos.getStartPosition() <= compareChrPos.getEndPosition()) || + (inputChrPos.getEndPosition() >= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() <= compareChrPos.getEndPosition()) + || (inputChrPos.getStartPosition() <= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() >= compareChrPos.getEndPosition())) { + //check strand if this option is provided + if (stranded) { + String inputStrand = inputRecord.getDataArray()[DCC_STRAND_INDEX]; + String compareStrand = entry.getValue().getDataArray()[GFF_STRAND_INDEX]; + if (inputStrand.equals(compareStrand)) { + return true; + } + } else { + return true; + } + } + } + return false; + } + + public String[] getCmdLineInputFiles() { + return cmdLineInputFiles; + } + + public void setCmdLineInputFiles(String[] cmdLineInputFiles) { + this.cmdLineInputFiles = cmdLineInputFiles; + } + + + private void writeHeader(String file, Vector header) throws IOException { + + for (String h: header) { + outputFileWriter.write(h + "\n"); + } + } + + public List getChromosomes() { + return chromosomes; + } + + public void setChromosomes(List chromosomes) { + this.chromosomes = chromosomes; + } + + + public int getOverlapCount() { + return overlapCount; + } + + public void setOverlapCount(int overlapCount) { + this.overlapCount = overlapCount; + } + + public int getNotOverlappingCount() { + return notOverlappingCount; + } + + public void setNotOverlappingCount(int notOverlappingCount) { + this.notOverlappingCount = notOverlappingCount; + } + + public int getMafCount() { + return recordCount; + } + + public void setMafCount(int mafCount) { + this.recordCount = mafCount; + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(AnnotateDCCWithGFFRegions.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("AnnotateDCCWithGFFRegions", AnnotateDCCWithGFFRegions.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 2) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + cmdLineOutputFiles = options.getOutputFileNames(); + if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); + } + + for (String file : cmdLineOutputFiles) { + if (new File(file).exists() && !new File(file).isDirectory()) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); + } + } + features = options.getFeature(); + annotation = options.getAnnotation(); + if (features == null && annotation == null) { + logger.info("Features to annotate: " + "feature"); + } else if (features != null){ + String featureString = new String(); + for (String f : features) { + featureString += f; + } + logger.info("Features to annotate: " + featureString); + } + logger.info("Annotation is : " + annotation); + stranded = options.hasStrandedOption(); + if (options.getColumn() != null) { + this.QCMGFLAG_COLUMN_INDEX = new Integer(options.getColumn()) - 1; + } + + + + logger.info("Require matching strand: " + stranded); + logger.info("DCC file: " + cmdLineInputFiles[0]); + logger.info("GFF file: " + cmdLineInputFiles[1]); + + } + + return returnStatus; + } + + public static void main(String[] args) throws Exception { + AnnotateDCCWithGFFRegions sp = new AnnotateDCCWithGFFRegions(); + LoadReferencedClasses.loadClasses(AnnotateDCCWithGFFRegions.class); + sp.setup(args); + int exitStatus = sp.engage(); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + public String[] getCmdLineOutputFiles() { + return cmdLineOutputFiles; + } + + public void setCmdLineOutputFiles(String[] cmdLineOutputFiles) { + this.cmdLineOutputFiles = cmdLineOutputFiles; + } + + public Map> getInputRecords() { + return inputRecords; + } + + public void setInputRecords( + Map> inputRecords) { + this.inputRecords = inputRecords; + } + + public Vector getInputFileHeader() { + return inputFileHeader; + } + + public void setInputFileHeader(Vector inputFileHeader) { + this.inputFileHeader = inputFileHeader; + } + + public File getOutputFile() { + return outputFile; + } + + public int getREFERENCE_ALLELE_INDEX() { + return REFERENCE_ALLELE_INDEX; + } + + public void setREFERENCE_ALLELE_INDEX(int rEFERENCE_ALLELE_INDEX) { + REFERENCE_ALLELE_INDEX = rEFERENCE_ALLELE_INDEX; + } + + public int getTUMOUR_ALLELE_INDEX() { + return TUMOUR_ALLELE_INDEX; + } + + public void setTUMOUR_ALLELE_INDEX(int tUMOUR_ALLELE_INDEX) { + TUMOUR_ALLELE_INDEX = tUMOUR_ALLELE_INDEX; + } + + public int getMUTATION_TYPE_INDEX() { + return MUTATION_TYPE_INDEX; + } + + public void setMUTATION_TYPE_INDEX(int mUTATION_TYPE_INDEX) { + MUTATION_TYPE_INDEX = mUTATION_TYPE_INDEX; + } + + public void setOutputFile(File outputFile) { + this.outputFile = outputFile; + } + + public String getAnnotation() { + return this.annotation; + } + +} diff --git a/qmule/src/org/qcmg/qmule/BAM2CS.java-- b/qmule/src/org/qcmg/qmule/BAM2CS.java-- new file mode 100644 index 000000000..13d4d21f5 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BAM2CS.java-- @@ -0,0 +1,183 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; + +import java.io.*; +import java.net.InetAddress; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.HashMap; +import java.util.Iterator; + +import org.qcmg.common.string.StringUtils; + + +public class BAM2CS { + File inBAM; + File outDir; + HashMap outFast = new HashMap(); + HashMap outQual = new HashMap(); + + + BAM2CS(final String[] args) throws Exception{ + inBAM = new File(args[0]); + outDir = new File(args[1]); + printHeader(null); + } + + /** + * retrive the CS and CQ value from BAM record to output csfasta or qual file + * @throws Exception + */ + void CreateCSfile() throws Exception{ + + SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault(); + SamReader reader = samReaderFactory.open(inBAM); + int num = 0; + for (SAMRecord record : reader) { + String id = ">" + record.getReadName(); + Add2Fasta(id, record.getAttribute("CS").toString()); + add2Qual(id, record.getAttribute("CQ").toString()); + num ++; + } + + reader.close(); + closeWriters(); + + System.out.println(getTime() + " total output records " + num); + System.exit(0); + } + + /** + * Add header information to Writer. If Writer is null, print to STD + * @param Writer + * @throws Exception + */ + private void printHeader(PrintWriter Writer) throws Exception{ + if(Writer == null){ + System.out.println(getTime() + " tool name: qmule org.qcmg.qmule.BAM2CS"); + System.out.println(getTime() + " host: " + InetAddress.getLocalHost().getHostName()); + System.out.println(getTime() + " input: " + inBAM.getAbsolutePath()); + System.out.println(getTime() + " output directory: " + outDir.getAbsolutePath()); + }else{ + Writer.println("#" + getTime() + " tool name: qmule org.qcmg.qmule.BAM2CS"); + Writer.println("#" + getTime() + " host: " + InetAddress.getLocalHost().getHostName()); + Writer.println("#" + getTime() + " input: " + inBAM.getAbsolutePath()); + } + } + + private void closeWriters(){ + //close all csfasta files + Iterator itr = outFast.values().iterator(); + while(itr.hasNext()){ + PrintWriter Writer = itr.next(); + Writer.close(); + } + + //close all qual files + itr = outQual.values().iterator(); + while(itr.hasNext()){ + PrintWriter Writer = itr.next(); + Writer.close(); + } + } + + /** + * Add raw color sequence into output csfasta; If the output file isn't exist, create a new one with header lines + * @param id + * @param seq + * @throws Exception + */ + private void Add2Fasta(String id, String seq) throws Exception{ + //sequence length should -1 since it start with 'T' or 'G' + int len = seq.length() - 1; + PrintWriter Writer; + + //get writer or create an new one + if(outFast.containsKey(len)){ + Writer = outFast.get(len); + }else{ + String fname = inBAM.getName(); + int index = fname.lastIndexOf('.'); + fname = fname.substring(0,index) + "." + len + ".csfasta"; + File csFile = new File(outDir, fname); + Writer = new PrintWriter(new FileWriter(csFile)); + outFast.put(len, Writer); + printHeader(Writer); + System.out.println(getTime() + " creating output: " + csFile.getAbsolutePath() ); + } + + Writer.println(id); + Writer.println(seq); + } + /** + * cover CQ value into raw qual sequence and addto output qual; + * If the output file isn't exist, create a new one with header lines. + * @param id + * @param seq + * @throws Exception + */ + void add2Qual(String id, String seq) throws Exception{ + int len = seq.length(); + PrintWriter writer; + + //get writer or create an new one + if(outQual.containsKey(len)){ + writer = outQual.get(len); + }else{ + String fname = inBAM.getName(); + int index = fname.lastIndexOf('.'); + fname = fname.substring(0,index) + "." + len + ".qual"; + File csFile = new File(outDir, fname); + writer = new PrintWriter(new FileWriter(csFile)); + outQual.put(len, writer); + printHeader(writer); + System.out.println(getTime() + " creating output: " + csFile.getAbsolutePath() ); + } + + //convert ascii to int + String qual = ""; + for(int i = 0; i < len; i ++){ + char c = seq.charAt(i); + int j = c; + + if(StringUtils.isNullOrEmpty(qual)){ + qual += j; + } else { + qual += " " + j; + } + } + + writer.println(id); + writer.println(qual); + + } + + private String getTime(){ + Calendar currentDate = Calendar.getInstance(); + SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); + return "[" + formatter.format(currentDate.getTime()) + "]"; + } + public static void main(final String[] args) throws IOException, InterruptedException { + + try{ + BAM2CS myCS = new BAM2CS(args); + myCS.CreateCSfile(); + System.exit(0); + }catch(Exception e){ + System.err.println(e.toString()); + Thread.sleep(1); + System.out.println("usage: qmule org.qcmg.qmule.BAM2CS "); + System.exit(1); + } + + } +} diff --git a/qmule/src/org/qcmg/qmule/BAMCompress.java-- b/qmule/src/org/qcmg/qmule/BAMCompress.java-- new file mode 100644 index 000000000..7ae4254a3 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BAMCompress.java-- @@ -0,0 +1,156 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.List; + +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMFileWriterFactory; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.ValidationStringency; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.SAMOrBAMWriterFactory; + +public class BAMCompress { + static QLogger logger = QLoggerFactory.getLogger(BAMCompress.class); + private static File input; + private static File output; + private static int level; + + BAMCompress(File input, File output, int level) throws Exception{ + this.input = input; + this.output = output; + this.level = level; + + logger.info("input file: " + input.getAbsolutePath()); + logger.info("output file name: " + output.getAbsolutePath()); + logger.info("compress level for output BAM: " + level); + } + + public void replaceSeq() throws Exception{ + + SamReader reader = SAMFileReaderFactory.createSAMFileReader( input, ValidationStringency.SILENT); + SAMFileWriter writer = new SAMFileWriterFactory() .makeBAMWriter(reader.getFileHeader(), false, output, level); + + for( SAMRecord record : reader){ + //only replace fully mapped reads, that is no clipping, indels and pading + if( seekFullMppaed(record) && seekMismatch(record) ){ + byte[] base = record.getReadBases(); + for(int i = 0; i < base.length; i++) + base[i] = 'N'; + record.setReadBases(base); + } + + if(record.isValid() == null) // if valid + writer.addAlignment( record ); + } + + reader.close(); + writer.close(); + + logger.info( "input " + reportFileSize(input) ); + logger.info( "output " + reportFileSize(output) ); + + } + + public String reportFileSize(File f){ + + double bytes_in = f.length(); + double kilobytes = (bytes_in / 1024); + double megabytes = (kilobytes / 1024); + double gigabytes = (megabytes / 1024); + + return String.format("file size is %.2fG or %.2fK", gigabytes, kilobytes); + } + + + private boolean seekMismatch(SAMRecord r) { + String attribute = (String)r.getAttribute("MD"); + if (null != attribute) { + for (int i = 0, size = attribute.length() ; i < size ; ) { + char c = attribute.charAt(i); + if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { + return false; + } else if ( c == '^') { + //skip the insertion base + while (++i < size && Character.isLetter(attribute.charAt(i))) {} + } else i++; // need to increment this or could end up with infinite loop... + } + return true; + } + return false; + } + + private boolean seekFullMppaed(SAMRecord r){ + + if(r.getReadUnmappedFlag()) + return false; + + //reads with clips or indel, skips, pads + List ele = r.getCigar().getCigarElements(); + for (CigarElement element : r.getCigar().getCigarElements()){ + if( element.getLength() > 0){ + if(element.getOperator() == CigarOperator.H ||element.getOperator() == CigarOperator.S) { + return false; + }else if (element.getOperator() == CigarOperator.I ||element.getOperator() == CigarOperator.D){ + return false; + }else if (element.getOperator() == CigarOperator.P ||element.getOperator() == CigarOperator.N){ + return false; + } + } + } + + return true; + } + + + public static void main(String[] args) throws Exception{ + Options op = new Options(BAMCompress.class, args); + if(op.hasHelpOption()){ + System.out.println(Messages.getMessage("USAGE_BAMCompress")); + op.displayHelp(); + System.exit(0); + } + + String output = op.getOutputFileNames()[0]; + String input = op.getInputFileNames()[0]; + if(! new File(input).exists() ) + throw new Exception("input file not exists: " + args[0]); + + if(op.hasLogOption()) + logger = QLoggerFactory.getLogger(BAMCompress.class, op.getLogFile(), op.getLogLevel()); + else + logger = QLoggerFactory.getLogger(BAMCompress.class, output + ".log", op.getLogLevel()); + + String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + logger.logInitialExecutionStats( "qmule " + BAMCompress.class.getName(), version,args); + + int level = op.getcompressLevel(); //default compress level + + logger.logInitialExecutionStats( "qmule " + BAMCompress.class.getName(), null,args); + + long startTime = System.currentTimeMillis(); + BAMCompress compress = new BAMCompress(new File(input), new File(output) , level ); + compress.replaceSeq(); + + logger.info( String.format("It took %d hours, %d seconds to perform the compression", + (int) (System.currentTimeMillis() - startTime) / (1000*60*60), + (int) ( (System.currentTimeMillis() - startTime) / (1000*60) ) % 60) ); + logger.logFinalExecutionStats(0); + + } + + +} diff --git a/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- b/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- new file mode 100644 index 000000000..363f5ccbc --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- @@ -0,0 +1,250 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.sql.ResultSet; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMReadGroupRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +//import org.qcmg.db.ConnectionType; +//import org.qcmg.db.GeneusDBConnection; +import org.qcmg.picard.SAMFileReaderFactory; + +public class BAMHeaderChecker { + /* + private static final String SEPERATOR = "&"; + + private static QLogger logger; + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + + private final List bamFiles = new ArrayList(); + private List bamDirectories = new ArrayList(); + + private final Map results = new HashMap(); + + private int exitStatus; + + private int engage() throws Exception { + + bamDirectories = Arrays.asList(FileUtils.findDirectories(cmdLineInputFiles[0], "seq_final", true)); + + logger.info("Will check the following directories for bam files:"); + for (File f : bamDirectories) { + logger.info(f.getAbsolutePath()); + bamFiles.addAll(Arrays.asList(FileUtils.findFilesEndingWithFilter(f.getAbsolutePath(), ".bam"))); + } + + // only operates on seq_final bams +// bamFiles = Arrays.asList(FileUtils.findFiles(cmdLineInputFiles[0], ".bam")); + + // loop through each file and get patient, experiment and input_type + String patient = null; + String experiment = null; + String input = null; + + GeneusDBConnection conn = new GeneusDBConnection(ConnectionType.QCMG_MAPSET); + + try { + for (File bamFile : bamFiles) { + String bamFileName = bamFile.getAbsolutePath(); + logger.info("examining bam file: " + bamFileName); + String bamFileSmallName = bamFileName.substring(bamFileName.lastIndexOf(System.getProperty("file.separator")) + 1 , bamFileName.indexOf(".bam")); + + patient = bamFileSmallName.substring(0, 9); //APGI_1234 + experiment = bamFileSmallName.substring(10, bamFileSmallName.lastIndexOf(".")); //APGI_1234 + input = bamFileSmallName.substring(bamFileSmallName.lastIndexOf(".") + 1); //APGI_1234 + logger.info("patient: " + patient + ", experiment: " + experiment + ", input: " + input); + + // get details from bam header + List constituentFiles = getConstituentBamFiles(bamFile); + List trackliteConstituentFiles = getTrackliteBamFiles(patient, experiment, input, conn); + + //loop through tracklite constituentFiles and check that they all have an entry in bam header ConstituentFiles + for (String trackliteBam : trackliteConstituentFiles) { + String [] params = trackliteBam.split(SEPERATOR); + + String result = "OK"; + boolean trackliteMatch = false; + + for (String headerFileBam : constituentFiles) { + if (headerFileBam.contains(params[0]) && headerFileBam.contains(params[1])) { + trackliteMatch = true; + break; + } + } + + if ( ! trackliteMatch) { + result = "no corresponding entry in bam file header for tracklite details: " + params[0] + ":" + params[1]; + logger.warn(result); + } + results.put(bamFileSmallName, result); + } + } + } finally { + conn.closeConnection(); + } + + logger.info(""); + logger.info(""); + logger.info("SUMMARY:"); + for (Entry resultsEntry : results.entrySet()) { + logger.info(resultsEntry.getKey() + " : " + resultsEntry.getValue()); + } + logger.info("DONE"); + + return exitStatus; + } + + private List getTrackliteBamFiles(String patient, String experiment, String input, GeneusDBConnection conn) throws Exception { + List trackliteResults = new ArrayList (); + + String sql = "SELECT patient_id, run_name, barcode FROM tracklite_run tr, tracklite_sample ts" + + " WHERE tr.sample_id = ts.processing_id" + + " AND ts.patient_id = '" + patient.replace('_', '-') + "'" + + " AND tr.experiment_type = '" + experiment + "'" + + " AND tr.input_type = '" + input + "'" + + "AND tr.run_status = 'complete'"; + + ResultSet rs = null; + try { + rs = conn.executeSelectQuery(sql); + + while (rs.next()) { + String runName = rs.getString(2); + String barCode = rs.getString(3); + logger.debug("runName: " + runName + ", barCode: " + barCode); + trackliteResults.add(runName + SEPERATOR + barCode); + } + + } finally { + try { + if (null != rs && null != rs.getStatement() ) { + rs.getStatement().close(); + } + } finally { + if (null != rs) rs.close(); + } + } + + return trackliteResults; + } + + private List getConstituentBamFiles(File bamFile) { + List results = new ArrayList(); + SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFile); + try { + + SAMFileHeader header = reader.getFileHeader(); + // get the read groups + for (SAMReadGroupRecord readGroup : header.getReadGroups()) { + String constituentBamFile = readGroup.getAttribute("zc"); + if (null == constituentBamFile) + constituentBamFile = readGroup.getAttribute("ZC"); + + if (null != constituentBamFile) { + constituentBamFile = constituentBamFile.substring(2); + logger.debug("read group ZC attribute: " + constituentBamFile); + results.add(constituentBamFile); + } else { + logger.debug("null ZC attribute in file: " + bamFile.getAbsolutePath()); + } + } + + } finally { + reader.close(); + } + return results; + } + + + + + public static void main(String[] args) throws Exception { + BAMHeaderChecker sp = new BAMHeaderChecker(); + int exitStatus = 0; + try { + exitStatus = sp.setup(args); + } catch (Exception e) { + exitStatus = 1; + if (null != logger) + logger.error("Exception caught whilst running BAMHeaderChecker:", e); + else System.err.println("Exception caught whilst running BAMHeaderChecker"); + } + + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(BAMHeaderChecker.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("BAMHeaderChecker", BAMHeaderChecker.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + */ +} diff --git a/qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- b/qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- new file mode 100644 index 000000000..b8646c1ee --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- @@ -0,0 +1,124 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import htsjdk.samtools.Cigar; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; + +public class BAMPileupUtil { + + public static int SM_CUTOFF = 14; + public static int MD_CUTOFF = 3; + public static int CIGAR_CUTOFF = 34; + + public static int readLengthMatchCounter = 0; + public static int posiitonInDeletionCounter = 0; + + private static final QLogger logger = QLoggerFactory.getLogger(BAMPileupUtil.class); + + +// public static void examinePileup(List sams, VCFRecord record) { +//// int normalCoverage = 0; +// String pileup = ""; +// String qualities = ""; +// for (SAMRecord sam : sams ) { +// +// if ( eligibleSamRecord(sam)) { +//// ++normalCoverage; +// +// int offset = getReadPosition(sam, record.getPosition()); +// +// if (offset < 0) { +// logger.info("invalid offset position - position falls within deletion?? position: "+ record.getPosition() + ", alignment start: " + sam.getAlignmentStart() + ", alignment end: " + sam.getAlignmentEnd() + ", read length: " + sam.getReadLength() + " cigar: "+ sam.getCigarString()); +// continue; +// } +// +// if (offset >= sam.getReadLength()) { +//// throw new Exception("offset [position: " + record.getPosition() + ", read start pos(unclipped): " + sam.getUnclippedStart() + ", read end pos(unclipped): " + sam.getUnclippedEnd()+ "] is larger than read length!!!: " + sam.format()); +// // set to last entry in sequence +//// logger.info("adjusting offset to read length -1"); +//// String read = sam.getReadString(); +//// int refPosition = sam.getReferencePositionAtReadPosition(offset); +// logger.info("offset: " + offset + ", position: " + record.getPosition() + ", alignment start: " + sam.getAlignmentStart() + ", unclipped alignment start: " + sam.getUnclippedStart() + ", alignment end: " + sam.getAlignmentEnd()); +// logger.info( sam.format()); +//// offset = sam.getReadLength() -1; +//// logger.info("char at adjusted offset: " + read.charAt(offset)); +//// logger.info("md tag: " + sam.getStringAttribute("MD")); +// continue; +// } +// +// char c = sam.getReadString().charAt(offset); +// pileup += sam.getReadNegativeStrandFlag() ? Character.toLowerCase(c) : c; +// qualities += sam.getBaseQualityString().charAt(offset); +// } +// } +// +// +// if (pileup.length() > 0) +// record.setPileup(PileupUtil.getPileupCounts(pileup, qualities)); +// +// } + + /** + * Determines whether a sam record is eligible by applying some filtering criteria. + * Currently filters on the SM tag value, some of the flags, and the Cigar string + * + *

NOTE that we should also be filtering on MD tag, but GATK removes this + * tag when it does its local realignment, so there is no need to include this check for the time being + * + * @param record SAMRecord that is being put through the filter check + * @return boolean indicating if the record has passed the filter + */ + public static boolean eligibleSamRecord(SAMRecord record) { + if (null == record) return false; + Integer sm = record.getIntegerAttribute("SM"); + return ! record.getDuplicateReadFlag() + && (null == sm ? false : sm.intValue() > SM_CUTOFF) +// && tallyMDMismatches(record.getStringAttribute("MD")) < MD_CUTOFF // + && ((record.getReadPairedFlag() && record.getSecondOfPairFlag() && record.getProperPairFlag()) + || tallyCigarMatchMismatches(record.getCigar()) > CIGAR_CUTOFF); + + } + + public static int tallyCigarMatchMismatches(Cigar cigar) { + int tally = 0; + if (null != cigar) { + for (CigarElement element : cigar.getCigarElements()) { + if (CigarOperator.M == element.getOperator()) { + tally += element.getLength(); + } + } + } + return tally; + } + + public static int tallyMDMismatches(String mdData) { + int count = 0; + if (null != mdData) { + for (int i = 0, size = mdData.length() ; i < size ; ) { + + if (isValidMismatch(mdData.charAt(i))) { + count++; + i++; + } else if ('^' == mdData.charAt(i)) { + while (++i < size && Character.isLetter(mdData.charAt(i))) {} + } else i++; // need to increment this or could end up with infinite loop... + } + } + return count; + } + + private static boolean isValidMismatch(char c) { + return c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N'; + } + +} diff --git a/qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- b/qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- new file mode 100644 index 000000000..4501a5994 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- @@ -0,0 +1,160 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.ValidationStringency; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.SAMFileReaderFactory; + +public class BamMismatchCounts { + static QLogger logger = QLoggerFactory.getLogger(BamMismatchCounts.class); + static long[] mismatch = new long[100]; + + static HashMap counts = new HashMap(); + static long total = 0; + static long unmapped = 0; + static long clipped = 0; + static long indel = 0; + static long skipPad = 0; + static long fullMapped = 0; + static long noMDreads = 0; + + /** + * count the mismatch base number based on the MD field + * @param r: samrecord + */ + private static void countMismatch(SAMRecord r) { + String attribute = (String)r.getAttribute("MD"); + if (null != attribute) { + int count = 0; + for (int i = 0, size = attribute.length() ; i < size ; ) { + char c = attribute.charAt(i); + if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { + count++; + i++; + } else if ( c == '^') { + //skip the insertion base + while (++i < size && Character.isLetter(attribute.charAt(i))) {} + } else i++; // need to increment this or could end up with infinite loop... + } + mismatch[count] ++; + + }else + noMDreads ++; + + + } + + /** + * + * @param r: sam record + * @return true if this read is full length mapped without any indels, skips and pads + */ + static private Boolean seekFullMapped(SAMRecord r){ + + if(r.getReadUnmappedFlag()){ + unmapped ++; + return false; + } + //reads with clips or indel, skips, pads + else{ + List ele = r.getCigar().getCigarElements(); + for (CigarElement element : r.getCigar().getCigarElements()){ + if( element.getLength() > 0){ + if(element.getOperator() == CigarOperator.H ||element.getOperator() == CigarOperator.S) { + clipped ++; + return false; + }else if (element.getOperator() == CigarOperator.I ||element.getOperator() == CigarOperator.D){ + indel ++; + return false; + }else if (element.getOperator() == CigarOperator.P ||element.getOperator() == CigarOperator.N){ + skipPad ++; + return false; + } + } + } + //count mismatch after the for loop + return true; + } + } + + /** + * survey the mismatch stats on full length mapped reads + * @param args: SAM/BAM file with full path, log file with full path + * @throws Exception + */ + public static void main(final String[] args) throws Exception { + Options op = new Options(BamMismatchCounts.class, args); + if(op.hasHelpOption()){ + System.out.println(Messages.getMessage("USAGE_BamMismatchCounts")); + op.displayHelp(); + System.exit(0); + } + + + if(op.hasLogOption()) + logger = QLoggerFactory.getLogger(BamMismatchCounts.class, op.getLogFile(), op.getLogLevel()); + else + logger = QLoggerFactory.getLogger(BamMismatchCounts.class,op.getOutputFileNames()[0] + ".log", op.getLogLevel()); + + String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + logger.logInitialExecutionStats( "qmule " + BamMismatchCounts.class.getName(), version,args); + + String output = op.getOutputFileNames()[0]; + String input = op.getInputFileNames()[0]; + SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(input), + ValidationStringency.SILENT); + + for(int i = 0; i < 100; i++) mismatch[i] = 0; + for (SAMRecord r : reader){ + total ++; + if(seekFullMapped( r)){ + fullMapped ++; + countMismatch(r); + } + } + reader.close(); + + //report mismatch + String S_mismatch = "mismatch matrix for fully mapped reads is below:\nmismatch\treads_number\tratio_to_(fullmapped,total)\n"; + for(int i = 0; i < 100; i++) + if(mismatch[i] > 0){ + int p1 = Math.round(mismatch[i] * 100 / fullMapped); + int p2 = Math.round(mismatch[i] * 100 / total); + S_mismatch += String.format("%d\t%d\t(%d%%,%d%%)\n", i,mismatch[i],p1, p2); + } + + Files.write(Paths.get(output), S_mismatch.getBytes() ); + + logger.info("total records in file: " + total ); + logger.info("unmapped records: " + unmapped); + logger.info("records with clipping (CIGAR S,H): " + clipped); + logger.info("records with indel (CIGAR I,D) : " + indel); + logger.info("records with skipping or padding (CIGAR N,P) : " + skipPad); + logger.info("records mapped full-length: " + fullMapped); + logger.info("records mapped full-length but missing MD field: " + noMDreads); + logger.info("the mismatch counts matrix is outputed to " + args[1]); + logger.logFinalExecutionStats(0); + + } + +} diff --git a/qmule/src/org/qcmg/qmule/BamRecordCounter.java-- b/qmule/src/org/qcmg/qmule/BamRecordCounter.java-- new file mode 100644 index 000000000..d81e01a9c --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BamRecordCounter.java-- @@ -0,0 +1,44 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; + +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.SAMFileReaderFactory; + +public class BamRecordCounter { + + private static final QLogger logger = QLoggerFactory.getLogger(BamRecordCounter.class); + + public static void main(String args[]) { + + if (null != args && args.length > 0) { + for (String filename : args) { + SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(filename)); + long count = 0; + long duplicates = 0; + long startTime = System.currentTimeMillis(); + for (SAMRecord r : reader) { + count++; + if (r.getDuplicateReadFlag()) + duplicates++; + } + logger.info("no of records in file [" + filename + "] is: " + count); + logger.info("no of duplicate records: " + duplicates); + logger.info("It took " + (System.currentTimeMillis() - startTime) + "ms to perform the count."); + } + } else { + logger.info("USAGE: qmule " + BamRecordCounter.class.getName() + " "); + } + } + +} diff --git a/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java b/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java index 3f07576bf..3b3fbc798 100644 --- a/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java +++ b/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java @@ -23,8 +23,8 @@ import org.qcmg.common.model.ChrPositionName; import org.qcmg.common.model.ChrRangePosition; import org.qcmg.common.util.FileUtils; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; public class CompareReferenceRegions { diff --git a/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- b/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- new file mode 100644 index 000000000..3b3fbc798 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- @@ -0,0 +1,676 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.Map.Entry; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrPositionName; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; + + +public class CompareReferenceRegions { + + private static final String MODE_ONEWAY = "oneway"; + private static final String MODE_ANNOTATE = "annotate"; + private static final String MODE_TWOWAY = "twoway"; + private static final String MODE_INTERSECT = "intersect"; + private static final String MODE_UNIQUE = "unique"; + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private List chromosomes = new ArrayList(); + private int overlapCount = 0; + private int notOverlappingCount = 0; + private int recordCount; + private String mode; + private int column; + private String annotation; + private static QLogger logger; + private static final String MAF = "maf"; + private static final String GFF3 = "gff3"; + private static final String GTF = "gtf"; + private static final String BED = "bed"; + private static final String VCF = "vcf"; + private static final String TAB = "txt"; + private static final String DCC1 = "dcc1"; + + private void runOnewayComparison(File inputFile, File comparisonFile, + File outputOverlapFile, File outputNoOverlapFile) throws Exception { + + if (mode.equals(MODE_ANNOTATE)) { + logger.info("If overlapping, will annotate column: " + column+1 +" of file with the annotation " + annotation); + } + + //get a list of the chromosomes + setUp(inputFile, outputOverlapFile, outputNoOverlapFile); + + logger.info("Input file: " + inputFile.getAbsolutePath()); + logger.info("Comparison file: " + comparisonFile.getAbsolutePath()); + + logger.info("Chromosomes to analyze: " + chromosomes.size()); + + for (String c: chromosomes) { + logger.info("Getting records for chromosome: " + c); + Map inputRecords = readRecords(inputFile, c); + Map compareRecords = readRecords(comparisonFile, c); + compareRecords(inputRecords, compareRecords, outputOverlapFile, outputNoOverlapFile); + } + logSummary(); + clear(); + } + + private void logSummary() { + logger.info("SUMMARY"); + logger.info("Total Records: " + recordCount); + logger.info("Total Records in supplied reference regions: " + overlapCount); + logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); + } + + private void runAnnotateComparison(File inputFile, File comparisonFile, + File outputOverlapFile) throws Exception { + + //get a list of the chromosomes + setUp(inputFile, outputOverlapFile, null); + + logger.info("Input file: " + inputFile.getAbsolutePath()); + logger.info("Comparison file: " + comparisonFile.getAbsolutePath()); + + logger.info("Chromosomes to analyze: " + chromosomes.size()); + + for (String c: chromosomes) { + logger.info("Getting records for chromosome: " + c); + Map inputRecords = readRecords(inputFile, c); + Map compareRecords = readRecords(comparisonFile, c); + compareRecordsAndAnnotate(inputRecords, compareRecords, outputOverlapFile); + } + logSummary(); + clear(); + } + + private void runIntersectComparison() throws Exception { + //Set first input file as primary + File primaryInputFile = new File(cmdLineInputFiles[0]); + //Single output file + File outputFile = new File(cmdLineOutputFiles[0]); + + int[] counts = new int[cmdLineInputFiles.length]; + counts[0] = 0; + + setUp(primaryInputFile, outputFile, null); + + //logging + logger.info("Input file 1: " + primaryInputFile.getAbsolutePath()); + for (int i=1; i inputRecords = readRecords(primaryInputFile, c); + counts[0] += inputRecords.size(); + for (int i=1; i compareRecords = readRecords(compareFile, c); + counts[i] += compareRecords.size(); + compareOverlapRecords(c, inputRecords, compareRecords, getFileType(primaryInputFile)); + } + overlapCount += inputRecords.size(); + //any input records left at the end are intersecting + writeRecords(inputRecords, outputFile); + } + for (int i=0; i inputRecords = readRecords(primaryInputFile, c); + Map compareRecords = new TreeMap(); + counts[f] += inputRecords.size(); + for (int i=0; i currentRecords = readRecords(compareFile, c); + counts[i] = counts[i] + currentRecords.size(); + compareRecords.putAll(currentRecords); + } + } + compareOverlapRecords(c, inputRecords, compareRecords, getFileType(primaryInputFile)); + notOverlappingCount += inputRecords.size(); + //any input records left at the end are unique + writeRecords(inputRecords, outputFile); + logger.info(counts[f] + " total records for file " +cmdLineInputFiles[f]); + for (int i=0; i inputRecords, Map compareRecords, String inputFileType) throws Exception { + + Iterator> entries = inputRecords.entrySet().iterator(); + while (entries.hasNext()) { + Entry entry = entries.next(); + + boolean isOverlapping = compareRecord(entry, compareRecords, inputFileType); + + if (mode.equals(MODE_INTERSECT) && !isOverlapping) { + //remove input record if it isn't overlapping and won't intersect with all records + entries.remove(); + } + if (mode.equals(MODE_UNIQUE) && isOverlapping) { + entries.remove(); + } + } + } + + private void compareRecordsAndAnnotate(Map inputRecords, + Map compareRecords, + File outputOverlapFile) throws Exception { + BufferedWriter overlapWriter = new BufferedWriter(new FileWriter(outputOverlapFile, true)); + + try { + for (Entry entry : inputRecords.entrySet()) { + recordCount++; + boolean isOverlapping = compareRecord(entry, compareRecords, null); + + if (isOverlapping) { + overlapCount++; + } else { + notOverlappingCount++; + } + writeRecord(overlapWriter, entry.getValue()); + } + } finally { + overlapWriter.close(); + } + } + + private void compareRecords(Map inputRecords, + Map compareRecords, + File outputOverlapFile, File outputNoOverlapFile) throws Exception { + BufferedWriter overlapWriter = new BufferedWriter(new FileWriter(outputOverlapFile, true)); + BufferedWriter noOverlapWriter = new BufferedWriter(new FileWriter(outputNoOverlapFile, true)); + + try { + for (Entry entry : inputRecords.entrySet()) { + + recordCount++; + + boolean isOverlapping = compareRecord(entry, compareRecords, null); + + if (isOverlapping) { + overlapCount++; + writeRecord(overlapWriter, entry.getValue()); + } else { + notOverlappingCount++; + if (mode.equals(MODE_ANNOTATE)) { + + } else { + writeRecord(noOverlapWriter, entry.getValue()); + } + } + } + } finally { + overlapWriter.close(); + noOverlapWriter.close(); + } + } + + private boolean compareRecord(Entry entry, Map compareRecords, String inputFileType) throws Exception { + ChrPosition inputChrPos = entry.getKey(); + TabbedRecord inputRecord = entry.getValue(); + boolean isOverlapping = false; + //check to see if it is overlapping with the comparison reference region + for (Entry compareEntry : compareRecords.entrySet()) { + ChrPosition comparePos = compareEntry.getKey(); + if (comparePos.getEndPosition() < inputChrPos.getStartPosition()) { + continue; + } else if (comparePos.getStartPosition() > inputChrPos.getEndPosition()) { + break; + } else { + if (tabbedRecordFallsInCompareRecord(inputChrPos, inputRecord, compareEntry)) { + isOverlapping = true; + if (mode.equals(MODE_ANNOTATE)) { + String[] values = inputRecord.getDataArray(); + String oldVal = values[column]; + if (oldVal.equals("")) { + values[column] = annotation; + } else { + if (oldVal.endsWith(";")) { + values[column] = oldVal + annotation; + } else { + values[column] = oldVal + ";" + annotation; + } + } + String data = ""; + for (String s: values) { + data += s + "\t"; + } + inputRecord.setData(data); + } + if (mode.equals(MODE_INTERSECT)) { + //change the ends?? + int[] indexes = getChrIndex(inputFileType, entry.getValue().getData().split("\t")); + String[] array = inputRecord.getDataArray(); + + if (inputChrPos.getStartPosition() > compareEntry.getKey().getStartPosition()) { + array[indexes[1]] = Integer.toString(compareEntry.getKey().getStartPosition()); + } + if (inputChrPos.getEndPosition() < compareEntry.getKey().getEndPosition()) { + array[indexes[2]] = Integer.toString(compareEntry.getKey().getEndPosition()); + } + String data = ""; + for (String s: array) { + data += s + "\t"; + } + inputRecord.setData(data); + entry.setValue(inputRecord); + } + } + } + } + return isOverlapping; + } + + + private void writeRecords(Map records, File outputFile) throws IOException { + BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile, true)); + + for (Entry entry: records.entrySet()) { + writeRecord(writer, entry.getValue()); + } + writer.close(); + } + + private void writeRecord(BufferedWriter writer, TabbedRecord record) throws IOException { + if (!record.getData().endsWith("\n")) { + record.setData(record.getData() + "\n"); + } + writer.write(record.getData()); + } + + private TreeMap readRecords(File inputFile, String chromosome) throws Exception { + + TabbedFileReader reader = new TabbedFileReader(inputFile); + TreeMap records = new TreeMap(); + String fileType = getFileType(inputFile); + try { + + Iterator iterator = reader.getRecordIterator(); + + while (iterator.hasNext()) { + + TabbedRecord tab = iterator.next(); + if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo") || tab.getData().startsWith("analysis") || tab.getData().startsWith("Chr") ) { + continue; + } + ChrPosition chrPos = getChrPosition(fileType, tab); + if (chrPos.getChromosome().equals(chromosome)) { + records.put(chrPos, tab); + } + } + + } finally { + reader.close(); + } + + return records; + } + + private String getFileType(File inputFile) { + int index = inputFile.getName().lastIndexOf(".") + 1; + String name = inputFile.getName().substring(index, inputFile.getName().length()); + + if (name.equals("dcc")) { + return "dcc1"; + } + + return name; + } + + private void setUp(File file, File outputFileOne, File outputFileTwo) throws Exception { + TabbedFileReader reader = new TabbedFileReader(file); + Iterator iterator = reader.getRecordIterator(); + + String fileType = getFileType(file); + List header = new ArrayList(); + if (reader.getHeader() != null) { + Iterator iter = reader.getHeader().iterator(); + while (iter.hasNext()) { + header.add(iter.next()); + } + } + + while (iterator.hasNext()) { + + TabbedRecord tab = iterator.next(); + + if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo") || tab.getData().startsWith("analysis") || tab.getData().startsWith("Chr") ) { + header.add(tab.getData()); + continue; + } + + ChrPosition chrPos = getChrPosition(fileType, tab); + + if (!chromosomes.contains(chrPos.getChromosome())) { + chromosomes.add(chrPos.getChromosome()); + } + } + reader.close(); + + if (outputFileOne != null) { + writeHeader(header, outputFileOne); + } + if (outputFileTwo != null) { + writeHeader(header, outputFileTwo); + } + + } + + private int[] getChrIndex(String inputFileType, String[] values) throws Exception { + + int chrIndex = 0; + int startIndex = 0; + int endIndex = 0; + + if (inputFileType.equals(MAF)) { + chrIndex = 4; + startIndex = 5; + endIndex = 6; + } else if (inputFileType.equals(DCC1)) { + chrIndex = 4; + startIndex = 5; + endIndex = 6; + } else if (inputFileType.equals(BED)) { + chrIndex = 0; + startIndex = 1; + endIndex = 2; + } else if (inputFileType.equals(GFF3) || inputFileType.equals(GTF)) { + chrIndex = 0; + startIndex = 3; + endIndex = 4; + } else if (inputFileType.equals(VCF)) { + chrIndex = 0; + startIndex = 1; + endIndex = 1; + if (values.length >= 8) { + String[] infos = values[7].split("\t"); + + for (String info : infos) { + String[] params = info.split("="); + if (params.length == 2) { + if (params[0].equals("END")) { + endIndex = 2; + values[2] = params[1]; + } + } + } + } + //NEED TO CHANGE FOR INDELS + } else if (inputFileType.equals(TAB)) { + chrIndex = 0; + startIndex = 1; + endIndex = 2; + } else { + throw new Exception("Input file type is not recognized"); + } + int[] arr = {chrIndex, startIndex, endIndex}; + return arr; + } + + private ChrPosition getChrPosition(String inputFileType, TabbedRecord tab) throws Exception { + String[] values = tab.getData().split("\t"); + ChrPosition chr = null; + + int[] indexes = getChrIndex(inputFileType, values); + int chrIndex = indexes[0]; + int startIndex = indexes[1]; + int endIndex = indexes[2]; + + if (inputFileType.equals(BED)) { + chr = new ChrRangePosition(values[chrIndex], new Integer(values[startIndex])+1, new Integer(values[endIndex])+1); + } else { + String chromosome = values[chrIndex]; + if (!chromosome.contains("GL") && !chromosome.startsWith("chr")) { + chromosome = "chr" + chromosome; + } + if (chromosome.equals("chrM")) { + chromosome = "chrMT"; + } + if (inputFileType.equals(MAF)) { + chr = new ChrPositionName(chromosome, new Integer(values[startIndex]), new Integer(values[endIndex]), values[0]); + } else { + chr = new ChrRangePosition(chromosome, new Integer(values[startIndex]), new Integer(values[endIndex])); + } + } + return chr; + } + + private boolean tabbedRecordFallsInCompareRecord(ChrPosition inputChrPos, TabbedRecord inputRecord, Entry entry) { + if (entry != null) { + ChrPosition compareChrPos = entry.getKey(); + if ((inputChrPos.getStartPosition() >= compareChrPos.getStartPosition() && inputChrPos.getStartPosition() <= compareChrPos.getEndPosition()) || + (inputChrPos.getEndPosition() >= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() <= compareChrPos.getEndPosition()) + || (inputChrPos.getStartPosition() <= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() >= compareChrPos.getEndPosition())) { + return true; + } + } + return false; + } + + public String[] getCmdLineInputFiles() { + return cmdLineInputFiles; + } + + public void setCmdLineInputFiles(String[] cmdLineInputFiles) { + this.cmdLineInputFiles = cmdLineInputFiles; + } + + + private void writeHeader(List header, File outputOverlapFile) throws IOException { + BufferedWriter writer = new BufferedWriter(new FileWriter(outputOverlapFile, true)); + + for (String h: header) { + + writer.write(h + "\n"); + } + writer.close(); + } + + public List getChromosomes() { + return chromosomes; + } + + public void setChromosomes(List chromosomes) { + this.chromosomes = chromosomes; + } + + + public int getOverlapCount() { + return overlapCount; + } + + public void setOverlapCount(int overlapCount) { + this.overlapCount = overlapCount; + } + + public int getNotOverlappingCount() { + return notOverlappingCount; + } + + public void setNotOverlappingCount(int notOverlappingCount) { + this.notOverlappingCount = notOverlappingCount; + } + + public int getMafCount() { + return recordCount; + } + + public void setMafCount(int mafCount) { + this.recordCount = mafCount; + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(CompareReferenceRegions.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CompareReferenceRegions", CompareReferenceRegions.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + //output files + cmdLineOutputFiles = options.getOutputFileNames(); + + if (cmdLineOutputFiles.length >= 1) { + if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); + } + + for (String file : cmdLineOutputFiles) { + if (new File(file).exists() && !new File(file).isDirectory()) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); + } + } + } + mode = options.getMode(); + if (mode == null) { + mode = MODE_ONEWAY; + } + logger.info("Mode: " + mode); + + if (mode.equals(MODE_ANNOTATE)) { + //take away 1 to get index of column rather than column number + column = new Integer(options.getColumn()) -1; + annotation = options.getAnnotation(); + } + + return engage(); + } + + return returnStatus; + } + + + private int engage() throws Exception { + + if (mode.equals(MODE_ONEWAY) || mode.equals(MODE_TWOWAY)) { + runOnewayComparison(new File(cmdLineInputFiles[0]), new File(cmdLineInputFiles[1]), new File(cmdLineOutputFiles[0]), new File(cmdLineOutputFiles[1])); + if (mode.equals(MODE_TWOWAY)) { + runOnewayComparison(new File(cmdLineInputFiles[1]), new File(cmdLineInputFiles[0]), new File(cmdLineOutputFiles[2]), new File(cmdLineOutputFiles[3])); + } + } else if (mode.equals(MODE_ANNOTATE)) { + runAnnotateComparison(new File(cmdLineInputFiles[0]), new File(cmdLineInputFiles[1]), new File(cmdLineOutputFiles[0])); + } else if (mode.equals(MODE_INTERSECT)) { + runIntersectComparison(); + } else if (mode.equals(MODE_UNIQUE)) { + runUniqueComparison(); + } else { + throw new QMuleException("MODE_ERROR", mode); + } + return 0; + } + + + private void clear() { + recordCount = 0; + overlapCount = 0; + notOverlappingCount = 0; + } + + public static void main(String[] args) throws Exception { + CompareReferenceRegions sp = new CompareReferenceRegions(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + +} diff --git a/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java b/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java index 6664967ab..84fc72a32 100644 --- a/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java +++ b/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java @@ -8,10 +8,10 @@ import java.util.TreeSet; import org.qcmg.common.util.TabTokenizer; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedFileWriter; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedFileWriter; +import org.qcmg.qmule.tab.TabbedHeader; +import org.qcmg.qmule.tab.TabbedRecord; public class DbSnpChrLiftover { diff --git a/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- b/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- new file mode 100644 index 000000000..84fc72a32 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- @@ -0,0 +1,86 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.Set; +import java.util.TreeSet; + +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedFileWriter; +import org.qcmg.qmule.tab.TabbedHeader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class DbSnpChrLiftover { + + private static char TAB = '\t'; + + String inputVCF; + String outputVCF; + + + private final Set uniqueChrNames = new TreeSet(); + + public DbSnpChrLiftover() {} + + private void getUniqueChrNames() throws Exception { + TabbedFileReader reader = new TabbedFileReader(new File(inputVCF)); + TabbedFileWriter writer = new TabbedFileWriter(new File(outputVCF)); + try { + + TabbedHeader header = reader.getHeader(); + + // writer out header + writer.addHeader(header); + + for (TabbedRecord record : reader) { + String [] params = TabTokenizer.tokenize(record.getData()); + String chr = params[0]; + uniqueChrNames.add(chr); + + // switch the chr + params[0] = "chr" + chr; + + StringBuilder sb = new StringBuilder(); + for (int i = 0, len = params.length ; i < len ; i ++) { + sb.append(params[i]); + if (i < len-1) sb.append(TAB); + } + + record.setData(sb.toString()); + + writer.add(record); + } + + } finally { + try { + writer.close(); + } finally { + reader.close(); + } + } + + + for (String chr : uniqueChrNames) { + System.out.println("chr: " + chr); + } + } + + + public static void main(String[] args) throws Exception { + if (args.length < 2) + throw new IllegalArgumentException("USAGE: DbSnpChrLiftover "); + + DbSnpChrLiftover dcl = new DbSnpChrLiftover(); + + + dcl.inputVCF = args[0]; + dcl.outputVCF = args[1]; + + dcl.getUniqueChrNames(); + + } + +} diff --git a/qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- b/qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- new file mode 100644 index 000000000..71bd5e9cf --- /dev/null +++ b/qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- @@ -0,0 +1,47 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.IOException; + +import org.qcmg.germlinedb.GermlineDBFileReader; +import org.qcmg.germlinedb.GermlineDBFileWriter; +import org.qcmg.germlinedb.GermlineDBRecord; + +public class GermlineDBStripper { + + + public static void main(String[] args) throws IOException { + + String germlineDB = args[0]; + String germlineDBClassA = args[1]; + String header = "analysis_id\tcontrol_sample_id\tvariation_id\tvariation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tquality_score\tprobability\tread_count\tis_annotated\tvalidation_status\tvalidation_platform\txref_ensembl_var_id\tnote\tflag"; + + GermlineDBFileReader reader = new GermlineDBFileReader(new File(germlineDB)); + GermlineDBFileWriter writer = new GermlineDBFileWriter(new File(germlineDBClassA)); + + try { + writer.add(header+"\n"); + + // strip out all non-classA entities from Germline_DB + int totalCount = 0, classACount = 0; + for (GermlineDBRecord record : reader) { + ++totalCount; + if ("--".equals(record.getFlag())) { + ++classACount; + writer.add(record.getData() + "\n"); + } + } + System.out.println("total count: " + totalCount + ", classA count: " + classACount); + + } finally { + try { + reader.close(); + } finally { + writer.close(); + } + } + } +} diff --git a/qmule/src/org/qcmg/qmule/GetBamRecords.java-- b/qmule/src/org/qcmg/qmule/GetBamRecords.java-- new file mode 100644 index 000000000..114351d71 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/GetBamRecords.java-- @@ -0,0 +1,226 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMUtils; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.picard.QJumper; + +public class GetBamRecords { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + List records; + + private String position; + + private int exitStatus; + private static QLogger logger; + + + public int engage() throws Exception { + + logger.info("Setting up the QJumper"); + QJumper jumper = new QJumper(); + jumper.setupReader(cmdLineInputFiles[0]); + + String contig = position.substring(0, position.indexOf(":")); + int start = Integer.parseInt(position.substring(position.indexOf(":")+1)); + + logger.info("config: " + contig); + logger.info("start: " + start); + + records = jumper.getOverlappingRecordsAtPosition(contig, start, start); + + logger.info("unfiltered read count: " + records.size()+ ""); + + int filteredCount = 0, readsWithBaseAtPosition = 0, duplicateCount = 0, properlyPaired = 0,properlyPairedAll = 0, pairedAll = 0, paired = 0, notPrimaryAlignment = 0, unmapped = 0; + String qualityString = "", qualityPhredString = ""; + String baseString = ""; + int unmappedSecondaryDuplicates = 0, unmappedSecondaryDuplicatesProperly = 0; + + char[] novelStartBases = new char[1024]; // hmmmmm + Set forwardStrand = new HashSet(); + Set reverseStrand = new HashSet(); + int j = 0; + + for (SAMRecord rec : records) { + int readPosition = org.qcmg.picard.util.SAMUtils.getIndexInReadFromPosition(rec, start); + if (readPosition >= 0 && readPosition < rec.getReadLength()) { + char c = rec.getReadString().charAt(readPosition); + if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { + readsWithBaseAtPosition++; + if (rec.getDuplicateReadFlag()) { + duplicateCount++; + } else { + byte [] baseQuals = SAMUtils.fastqToPhred(rec.getBaseQualityString()); + qualityPhredString +=baseQuals[readPosition] + ","; + baseString += (rec.getReadNegativeStrandFlag() ? Character.toLowerCase(c) : c) + ""; +// baseString += c + ","; + qualityString +=rec.getBaseQualityString().charAt(readPosition) + ""; + + if (rec.getMappingQuality() >= 10 && rec.getBaseQualities()[readPosition] >= 10) { + if (rec.getReadNegativeStrandFlag()) { + if (reverseStrand.add(rec.getAlignmentStart())) { + novelStartBases[j++] = c; + } + } else { + if (forwardStrand.add(rec.getAlignmentStart())) { + novelStartBases[j++] = c; + } + } + } + } + } + + if (rec.getReadPairedFlag()) { + paired++; + if ( rec.getProperPairFlag()) properlyPaired++; + + } + if (rec.getReadUnmappedFlag()) unmapped++; + if (rec.getReadUnmappedFlag()) unmapped++; + if (rec.getNotPrimaryAlignmentFlag()) notPrimaryAlignment++; + + + if ( ! rec.getDuplicateReadFlag() && ! rec.getNotPrimaryAlignmentFlag() && ! rec.getReadUnmappedFlag()) + unmappedSecondaryDuplicates++; + if ( ! rec.getDuplicateReadFlag() && ! rec.getNotPrimaryAlignmentFlag() && ! rec.getReadUnmappedFlag() + && (rec.getReadPairedFlag() ? rec.getProperPairFlag() : true)) +// && (rec.getReadPairedFlag() && rec.getProperPairFlag())) + unmappedSecondaryDuplicatesProperly++; + } + + if (rec.getReadPairedFlag()) { + pairedAll++; + if (rec.getProperPairFlag()) properlyPairedAll++; + } + + if (BAMPileupUtil.eligibleSamRecord(rec)) { + ++filteredCount; + logger.info("***" + rec.getSAMString()); + } else logger.info(rec.getSAMString()); + + + + } + + + logger.info("SUMMARY: "); + logger.info("Total no of records: " + records.size() ); + logger.info("No of records with a base at position: " + readsWithBaseAtPosition); + logger.info("No of duplicate records (that have a base at position): " + duplicateCount); + logger.info("No of unique records (that have a base at position): " + (readsWithBaseAtPosition-duplicateCount)); + logger.info("No of unique paired records (that have a base at position): " + paired); + logger.info("No of unique properly paired records (that have a base at position): " + properlyPaired); + logger.info("No of records not primary aligned (that have a base at position): " + notPrimaryAlignment); + logger.info("No of records not mapped (that have a base at position): " + unmapped); + logger.info("unmappedSecondaryDuplicates (that have a base at position): " + unmappedSecondaryDuplicates); + logger.info("unmappedSecondaryDuplicatesProperly (that have a base at position): " + unmappedSecondaryDuplicatesProperly); + logger.info("No of paired records (all): " + pairedAll); + logger.info("No of properly paired records (all): " + properlyPairedAll); + logger.info("Unique record bases: " + baseString.substring(0,baseString.length() > 0 ? baseString.length() : 0)); + logger.info("Unique record base qualities: " + qualityString.substring(0,qualityString.length() > 0 ? qualityString.length() : 0)); + logger.info("Unique record base qualities (phred): " + qualityPhredString.substring(0,qualityPhredString.length() > 0 ? qualityPhredString.length() : 0)); + logger.info("filtered read count: " + filteredCount + " out of " + records.size() ); + logger.info("Novel start bases: " + new String(novelStartBases)); + + jumper.closeReader(); + + writeToFile(); + + return exitStatus; + } + + private void writeToFile() { + if (null != cmdLineOutputFiles && cmdLineOutputFiles.length == 1) { + try (FileWriter writer = new FileWriter(cmdLineOutputFiles[0]);){ + for (SAMRecord rec : records) { + writer.write(rec.getSAMString()); + } + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + } + + + + public static void main(String[] args) throws Exception { + GetBamRecords sp = new GetBamRecords(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(GetBamRecords.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("GetBamRecords", GetBamRecords.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + position = options.getPosition(); + position = options.getPosition(); + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/GetInsetSize.java-- b/qmule/src/org/qcmg/qmule/GetInsetSize.java-- new file mode 100644 index 000000000..44d5cc8c6 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/GetInsetSize.java-- @@ -0,0 +1,35 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; + +import org.qcmg.picard.SAMFileReaderFactory; + +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +public class GetInsetSize { + public static void main(String[] args) throws Exception{ + + File input = new File(args[0]); + SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); //new SAMFileReader(input); + int min =3000; + int max = 0; + String aaa = "AAA"; + for( SAMRecord record : reader){ + + if(record.getAttribute("XC").equals(aaa)){ + int size = Math.abs( record.getInferredInsertSize()); + if(size > max) max = size; + if(size < min) min = size; + } + } + reader.close(); + System.out.println(String.format("Insert range %d-%d\n", min, max)); + } + +} diff --git a/qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- b/qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- new file mode 100644 index 000000000..408ef9027 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- @@ -0,0 +1,395 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; + +import htsjdk.samtools.SAMFileHeader; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.meta.QDccMeta; +import org.qcmg.common.meta.QLimsMeta; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.util.QDccMetaFactory; +import org.qcmg.picard.util.QLimsMetaFactory; + +public class IndelDCCHeader { + + private String logFile; + private File somaticOutputFile; + private File germlineOutputFile; + private String mode; + private File normalBam; + private File tumourBam; + private String uuid; + private boolean qexecPresent = false; + private ArrayList qexec = new ArrayList(); + private boolean completeHeaderPresent = false; + private File somaticFile; + private File germlineFile; + private String tumourSampleId; + private String normalSampleId; + private static QLogger logger; + + public void setup(String args[]) throws Exception{ + + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(IndelDCCHeader.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("IndelDCCHeader", IndelDCCHeader.class.getPackage().getImplementationVersion(), args); + + // get list of file names + String[] cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 2) { + throw new QMuleException("INSUFFICIENT_INPUT_FILES"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + somaticFile = new File(cmdLineInputFiles[0]); + germlineFile = new File(cmdLineInputFiles[1]); + tumourBam = new File(options.getTumour()); + normalBam = new File(options.getNormal()); + + if ( ! FileUtils.canFileBeRead(tumourBam)) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , tumourBam.getAbsolutePath()); + } + if ( ! FileUtils.canFileBeRead(normalBam)) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , tumourBam.getAbsolutePath()); + } + + String[] cmdLineOutputFiles = options.getOutputFileNames(); + + somaticOutputFile = new File(cmdLineOutputFiles[0]); + germlineOutputFile = new File(cmdLineOutputFiles[1]); + + if (cmdLineOutputFiles.length != 2) { + throw new QMuleException("TOO_MANY_OUTPUTFILE"); + } + if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); + } + for (String file : cmdLineOutputFiles) { + if (new File(file).exists()) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); + } + } + + mode = options.getMode(); + + if (mode == null || (!mode.equals("pindel") && !mode.equals("gatk"))) { + throw new QMuleException("MODE_ERROR", mode); + } + + logger.info("Somatic input DCC: " + somaticFile.getAbsolutePath()); + logger.info("Germline input DCC: " + germlineFile.getAbsolutePath()); + logger.info("Output DCC: " + somaticOutputFile.getAbsolutePath()); + logger.info("Output DCC: " + germlineOutputFile.getAbsolutePath()); + logger.info("Tumour bam: " + tumourBam.getAbsolutePath()); + logger.info("Normal bam: " + normalBam.getAbsolutePath()); + logger.info("Mode: " + mode); + + } + } + + public int annotate() throws Exception { + //double check to make sure that uuid isn't already present + checkForUUid(); + + StringBuilder header = new StringBuilder(); + if (completeHeaderPresent) { + logger.info("UUid already present in header. No annotation is taking place"); + } else if (qexecPresent){ + StringBuilder sb = new StringBuilder(); + for (String s: qexec) { + sb.append(s + "\n"); + } + header.append(sb.toString()); + header.append(getDCCMeta()); + QLimsMeta tumour = QLimsMetaFactory.getLimsMeta("TEST", tumourBam.getAbsolutePath()); + tumourSampleId = tumour.getSample(); + header.append(tumour.getLimsMetaDataToString()); + QLimsMeta normal = QLimsMetaFactory.getLimsMeta("CONTROL", normalBam.getAbsolutePath()); + normalSampleId = normal.getSample(); + header.append(normal.getLimsMetaDataToString()); + //write somatic + writeOutputFile(header.toString(), somaticFile, somaticOutputFile, false); + //write germline + writeOutputFile(header.toString(), germlineFile, germlineOutputFile, true); + } + + return 0; + } + + public File getSomaticOutputFile() { + return somaticOutputFile; + } + + public void setSomaticOutputFile(File somaticOutputFile) { + this.somaticOutputFile = somaticOutputFile; + } + + public File getGermlineOutputFile() { + return germlineOutputFile; + } + + public void setGermlineOutputFile(File germlineOutputFile) { + this.germlineOutputFile = germlineOutputFile; + } + + public File getSomaticFile() { + return somaticFile; + } + + public void setSomaticFile(File somaticFile) { + this.somaticFile = somaticFile; + } + + public File getGermlineFile() { + return germlineFile; + } + + public void setGermlineFile(File germlineFile) { + this.germlineFile = germlineFile; + } + + public boolean isQexecPresent() { + return qexecPresent; + } + + public void setQexecPresent(boolean qexecPresent) { + this.qexecPresent = qexecPresent; + } + + public ArrayList getQexec() { + return qexec; + } + + public void setQexec(ArrayList qexec) { + this.qexec = qexec; + } + + public boolean isCompleteHeaderPresent() { + return completeHeaderPresent; + } + + public void setCompleteHeaderPresent(boolean completeHeaderPresent) { + this.completeHeaderPresent = completeHeaderPresent; + } + + public void checkForUUid() throws IOException, QMuleException { + BufferedReader reader = new BufferedReader(new FileReader(somaticFile)); + + String line; + boolean ddcMeta = false; + boolean uuidHere = false; + boolean uuidInResults = false; + qexec = new ArrayList(); + while((line = reader.readLine()) != null) { + if (line.startsWith("#") || line.startsWith("analysis")) { + if (line.contains("Uuid") || line.contains("uuid")) { + uuidHere = true; + } + if (line.startsWith("#Q_EXEC")) { + qexec.add(line); + } + if (line.startsWith("#Q_DCCMETA")) { + ddcMeta = true; + } + } else { + String[] values = line.split("\t"); + if (isCorrectUuidFormat(values[0])) { + uuidInResults = true; + } + } + } + reader.close(); + if (ddcMeta && uuidHere && uuidInResults) { + logger.info("Complete header already present."); + completeHeaderPresent = true; + } else if (uuidHere && qexec.size() == 14) { + qexecPresent = true; + logger.info("QExec header and uuid present."); + String q = ""; + for (String s: qexec) { + if (s.contains("Uuid")) { + q = s.replace("-", "_"); + String potentialUuid = s.split("\t")[2].replace("-", "_"); + if (isCorrectUuidFormat(potentialUuid)) { + uuid = potentialUuid; + } else { + logger.info("UUid was not correct format: " + potentialUuid); + throw new QMuleException("UUID_ERROR"); + } + } + } + qexec.remove(0); + qexec.add(0, q); + } else { + logger.info("Could not determine if UUid and DCC header is present"); + throw new QMuleException("UUID_ERROR"); + } + } + + public boolean isCorrectUuidFormat(String potentialUuid) { + if (potentialUuid.length() == 36 && potentialUuid.split("_").length == 5) { + return true; + } + return false; + } + + public String getDCCMeta() throws Exception { + SAMFileHeader tHeader = SAMFileReaderFactory.createSAMFileReader(tumourBam).getFileHeader(); + SAMFileHeader nHeader = SAMFileReaderFactory.createSAMFileReader(normalBam).getFileHeader(); + QDccMeta meta; + + meta = QDccMetaFactory.getDccMeta(uuid, nHeader, tHeader, mode); + return meta.getDCCMetaDataToString(); + } + + public void writeOutputFile(String header, File inputFile, File outputFile, boolean isGermline) throws IOException { + BufferedReader reader = new BufferedReader(new FileReader(inputFile)); + BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile)); + + if (!completeHeaderPresent) { + writer.write(header); + } + + String line; + while((line = reader.readLine()) != null) { + if (!line.startsWith("#") && !line.startsWith("analysis") && !completeHeaderPresent) { + writer.write(replaceIdsInLine(line, isGermline) + "\n"); + } else { + if (qexecPresent && !line.startsWith("#Q_EXEC")) { + writer.write(line + "\n"); + } + } + } + reader.close(); + writer.close(); + } + + public String getTumourSampleId() { + return tumourSampleId; + } + + public void setTumourSampleId(String tumourSampleId) { + this.tumourSampleId = tumourSampleId; + } + + public String getNormalSampleId() { + return normalSampleId; + } + + public void setNormalSampleId(String normalSampleId) { + this.normalSampleId = normalSampleId; + } + + public String replaceIdsInLine(String line, boolean isGermline) { + String[] values = line.split("\t"); + + StringBuilder sb = new StringBuilder(); + for (int i=0; i< values.length; i++) { + if (i==0 && !completeHeaderPresent) { + sb.append(uuid + "\t"); + } else if (i==1 && !completeHeaderPresent){ + if (isGermline) { + sb.append(normalSampleId + "\t"); + } else { + sb.append(tumourSampleId + "\t"); + } + } else if (i==2 && !completeHeaderPresent) { + String[] mutationStrs = values[i].split("_"); + String count = "_" + mutationStrs[mutationStrs.length-1]; + if (isGermline) { + sb.append(uuid + "_" + normalSampleId + count + "\t"); + } else { + sb.append(uuid + "_"+ tumourSampleId + count + "\t"); + } + } else { + sb.append(values[i] + "\t"); + } + } + return sb.toString(); + } + + public String getUuid() { + return uuid; + } + + public void setUuid(String uuid) { + this.uuid = uuid; + } + + public String getMode() { + return mode; + } + + public void setMode(String mode) { + this.mode = mode; + } + + public File getNormalBam() { + return normalBam; + } + + public void setNormalBam(File normalBam) { + this.normalBam = normalBam; + } + + public File getTumourBam() { + return tumourBam; + } + + public void setTumourBam(File tumourBam) { + this.tumourBam = tumourBam; + } + + public static void main(String[] args) throws Exception { + IndelDCCHeader sp = new IndelDCCHeader(); + LoadReferencedClasses.loadClasses(IndelDCCHeader.class); + sp.setup(args); + int exitStatus = sp.annotate(); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + +} diff --git a/qmule/src/org/qcmg/qmule/MAF2DCC1.java b/qmule/src/org/qcmg/qmule/MAF2DCC1.java index 5f52eed6e..998a34a10 100644 --- a/qmule/src/org/qcmg/qmule/MAF2DCC1.java +++ b/qmule/src/org/qcmg/qmule/MAF2DCC1.java @@ -18,10 +18,10 @@ import org.qcmg.common.model.ChrRangePosition; import org.qcmg.common.util.FileUtils; import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedFileWriter; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedFileWriter; +import org.qcmg.qmule.tab.TabbedHeader; +import org.qcmg.qmule.tab.TabbedRecord; public class MAF2DCC1 { diff --git a/qmule/src/org/qcmg/qmule/MAF2DCC1.java-- b/qmule/src/org/qcmg/qmule/MAF2DCC1.java-- new file mode 100644 index 000000000..998a34a10 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/MAF2DCC1.java-- @@ -0,0 +1,418 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedFileWriter; +import org.qcmg.qmule.tab.TabbedHeader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class MAF2DCC1 { + + private String logFile; + private File mafFile; + private final List dccFiles = new ArrayList(); + private File outputDccFile; + private static QLogger logger; + private Map> mafRecords = new HashMap<>(); + private int inputMafRecordCount; + private int[] mafColumnIndexes; + private int[] dccColumnIndexes; + private String mode; + + + public String getLogFile() { + return logFile; + } + + public File getMafFile() { + return mafFile; + } + + public File getOutputDccFile() { + return outputDccFile; + } + + public Map> getMafRecords() { + return mafRecords; + } + + public void setMafRecords(Map> mafRecords) { + this.mafRecords = mafRecords; + } + + public int[] getMafColumnIndexes() { + return mafColumnIndexes; + } + + public void setMafColumnIndexes(int[] mafColumnIndexes) { + this.mafColumnIndexes = mafColumnIndexes; + } + + public int[] getDccColumnIndexes() { + return dccColumnIndexes; + } + + public void setDccColumnIndexes(int[] dccColumnIndexes) { + this.dccColumnIndexes = dccColumnIndexes; + } + + public String getMode() { + return mode; + } + + public void setMode(String mode) { + this.mode = mode; + } + + public int getInputMafRecordCount() { + return inputMafRecordCount; + } + + public List getDccFiles() { + return dccFiles; + } + + public void setup(String args[]) throws Exception{ + + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(MAF2DCC1.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("MAF2DCC1", MAF2DCC1.class.getPackage().getImplementationVersion(), args); + + // get list of file names + String[] cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 2) { + throw new QMuleException("INSUFFICIENT_INPUT_FILES"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + mafFile = new File(cmdLineInputFiles[0]); + + for (int i=1; i 0) { + logger.warn("Could not find matches for the following records: "); + for (ChrPosition key : mafRecords.keySet()) { + logger.info("Missing at positions: " + key.toString()); + } + throw new QMuleException("MISSING_DCC_RECORDS", Integer.toString(mafRecords.size())); + } + + if (countInMaf != inputMafRecordCount || mafRecords.size() > 0) { + throw new QMuleException("COUNT_ERROR", Integer.toString(countInMaf), Integer.toString(inputMafRecordCount)); + } + + logger.info("Added " + countInMaf + " records to the dcc1 output file"); + + return 0; + } + + private void readMafFile() throws Exception { + TabbedFileReader reader = new TabbedFileReader(mafFile); + try { + int count = 0; + boolean checkForMissingColumnIndex = true; + for (TabbedRecord rec : reader) { + count++; + //header + if (rec.getData().startsWith("Hugo")) { + mafColumnIndexes = findColumnIndexesFromHeader(rec); + } else { + // only need to do this once + if (checkForMissingColumnIndex) { + if (missingColumnIndex(mafColumnIndexes)) { + throw new QMuleException("NO_COLUMN_INDEX", mafFile.getAbsolutePath()); + } + checkForMissingColumnIndex = false; + } + addToMafRecordMap(rec, count); + inputMafRecordCount++; + } + } + + logger.info("Number of input maf records: " + inputMafRecordCount); + + } finally { + reader.close(); + } + } + + private int compare(File dccFile, int count, TabbedFileWriter writer) throws Exception { + logger.info("Looking in dcc file: " + dccFile.getAbsolutePath()); + int countInMaf = 0; + int total = 0; + boolean checkForMissingColumnIndex = true; + + try (TabbedFileReader reader = new TabbedFileReader(dccFile);) { + if (count == 1) { + TabbedHeader header = reader.getHeader(); + writer.addHeader(header); + } + for (TabbedRecord rec : reader) { + //header + + if (rec.getData().startsWith("analysis_id")) { + //mutation id column + dccColumnIndexes = findColumnIndexesFromHeader(rec); + if (count == 1) { + writer.add(rec); + } + } else { + total++; + if (total % 10000 == 0) { + logger.info("Processed: " + total + " dcc records" ); + } + if (checkForMissingColumnIndex) { + if (missingColumnIndex(mafColumnIndexes)) { + throw new QMuleException("NO_MUTATION_ID", dccFile.getAbsolutePath()); + } + checkForMissingColumnIndex = false; + } + String[] strArray = rec.getDataArray(); + String chr = strArray[dccColumnIndexes[0]].replace("chr", ""); + if (chr.equals("M")) { + chr += "T"; + } + ChrPosition chrPos = new ChrRangePosition(chr, Integer.valueOf(strArray[dccColumnIndexes[1]]), Integer.valueOf(strArray[dccColumnIndexes[2]])); + if (recordInMaf(chrPos, rec)) { + writer.add(rec); + countInMaf++; + } + } + } + } + logger.info("Finished looking in dcc file: " + dccFile.getAbsolutePath() + " found " + countInMaf + " maf record/s." ); + return countInMaf; + } + + public void addToMafRecordMap(TabbedRecord rec, int count) throws QMuleException { + String[] strArray = rec.getDataArray(); + + //need to screw around with chr1 vs 1 vs chrMT vs chrM + String chr = strArray[mafColumnIndexes[0]].replace("chr", ""); + + if (chr.equals("M")) { + chr += "T"; + } + ChrPosition chrPos = new ChrRangePosition(chr, Integer.valueOf(strArray[mafColumnIndexes[1]]), Integer.valueOf(strArray[mafColumnIndexes[2]])); + + List recordsAtThisPosition = mafRecords.get(chrPos); + if (null == recordsAtThisPosition) { + recordsAtThisPosition = new ArrayList(2); + mafRecords.put(chrPos, recordsAtThisPosition); + } + recordsAtThisPosition.add(rec); + + } + + public boolean missingColumnIndex(int[] columnIndexes) throws QMuleException { + for (int i =0; i< columnIndexes.length; i++) { + if (columnIndexes[i] == -1) { + throw new QMuleException("NO_COLUMN_INDEX"); + } + } + return false; + } + + public int[] findColumnIndexesFromHeader(TabbedRecord rec) { + int[] mutationColumns = {-1, -1, -1, -1, -1, -1}; + String[] strArray = rec.getDataArray(); + for (int i=0; i recordsAtThisPosition = mafRecords.get(dccChrPos); + if (null != recordsAtThisPosition && ! recordsAtThisPosition.isEmpty()) { + + if (recordsAtThisPosition.size() > 1) { + logger.info("more than 1 record for position: " + dccChrPos); + } + + // check to see if any of the records match our dccRec + List recordsToRemove = new ArrayList<>(2); + + for (TabbedRecord tr : recordsAtThisPosition) { + if (matchOtherColumns(tr, dccRec)) { + matches++; + if (matches > 1) { + throw new QMuleException("T0O_MANY_MATCHES", dccChrPos.toString()); + } + + // remove record from array + recordsToRemove.add(tr); + matchFound = true; + } + } + + // remove records that have been matched + recordsAtThisPosition.removeAll(recordsToRemove); + + // check to see if there are any records left, if not, remove entry from map + if (recordsAtThisPosition.isEmpty()) { + mafRecords.remove(dccChrPos); + } + } + + return matchFound; + } + + public boolean matchOtherColumns(TabbedRecord mafRec, TabbedRecord dccRec) { + String[] mafValues = mafRec.getDataArray(); + String[] dccValues = dccRec.getDataArray(); + + if (mode.equals("snp")) { + if (matchingMutation(mafValues[mafColumnIndexes[3]], dccValues[dccColumnIndexes[3]])) { + return true; + } + } + if (mode.equals("indel")) { + if (matchingMutation(mafValues[mafColumnIndexes[3]], dccValues[dccColumnIndexes[3]]) && + mafValues[mafColumnIndexes[4]].equals(dccValues[dccColumnIndexes[4]]) && + mafValues[mafColumnIndexes[5]].equals(dccValues[dccColumnIndexes[5]])) { + return true; + } + } + + + return false; + } + + public boolean matchingMutation(String mafMutation, String dccMutation) { + if ((mafMutation.equals("SNP") && dccMutation.equals("1")) || + (mafMutation.equals("INS") && dccMutation.equals("2")) || + (mafMutation.equals("DEL") && dccMutation.equals("3"))) { + return true; + } + return false; + } + + public boolean match(ChrPosition mafChrPos, ChrPosition dccChrPos) { + if (mafChrPos.getChromosome().equals(dccChrPos.getChromosome()) + && mafChrPos.getStartPosition() == dccChrPos.getStartPosition() + && mafChrPos.getEndPosition() == dccChrPos.getEndPosition()) { + return true; + } + return false; + } + + + public static void main(String[] args) throws Exception { + MAF2DCC1 sp = new MAF2DCC1(); + LoadReferencedClasses.loadClasses(MAF2DCC1.class); + sp.setup(args); + + int exitStatus = sp.annotate(); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + +} diff --git a/qmule/src/org/qcmg/qmule/Main.java-- b/qmule/src/org/qcmg/qmule/Main.java-- new file mode 100644 index 000000000..fc7560b17 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/Main.java-- @@ -0,0 +1,100 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + + + +/** + * The entry point for the command-line SAM/BAM merging tool. + */ +public final class Main { + +// enum Tool { +// GetBamRecords("org.qcmg.qmule.GetBamRecords"); +//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"), +//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"), +//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"); +// +//// private final String name; +// private final String fullyQualifiedName; +// +// private Tool(String fullyQualifiedName) { +//// this.name = name; +// this.fullyQualifiedName = fullyQualifiedName; +// } +// +// public String getFullyQualifiedName() { +// return fullyQualifiedName; +// } +// public static Tool getTool(String name) { +// for (Tool t : Tool.values()) { +// if (name.equals(t.name())) return t; +// } +// throw new IllegalArgumentException("Tool not found: " + name); +// } +// } + + /** + * Performs a single merge based on the supplied arguments. Errors will + * terminate the merge and display error and usage messages. + * + * @param args + * the command-line arguments. + * @throws ClassNotFoundException + */ + public static void main(final String[] args) throws ClassNotFoundException { + Options options = null; + try { + options = new Options(args); + } catch (Exception e) { + e.printStackTrace(); + } + System.out.println(Messages.USAGE); + try { + options.displayHelp(); + } catch (Exception e) { + e.printStackTrace(); + } + +// String toolName = options.getToolName(); +// Tool t = Tool.getTool(toolName); +// Class tool = Class.forName(t.getFullyQualifiedName()); +// System.out.println("Class: " + tool.getCanonicalName()); +// // Create the array of Argument Types +// Class[] argTypes = { args.getClass()}; // array is Object! +// // Now find the method +// Method m = null; +// try { +// m = tool.getMethod("main", argTypes); +// } catch (SecurityException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } catch (NoSuchMethodException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } +// System.out.println(m); +// +// // Create the actual argument array +// Object passedArgv[] = { args }; +// +// // Now invoke the method. +// try { +// m.invoke(null, passedArgv); +// } catch (IllegalArgumentException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } catch (IllegalAccessException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } catch (InvocationTargetException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } + +//) Method m = tool.getMethod("main", Object.class); +// m.iinvoke(args); + System.exit(0); + } +} diff --git a/qmule/src/org/qcmg/qmule/Messages.java-- b/qmule/src/org/qcmg/qmule/Messages.java-- new file mode 100644 index 000000000..302f166f1 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/Messages.java-- @@ -0,0 +1,132 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.text.MessageFormat; +import java.util.ResourceBundle; + +/** + * Class used to lookup messages from this package's message bundles. + */ +public final class Messages { + + /** The Constant messages. */ + static final ResourceBundle messages = ResourceBundle + .getBundle("org.qcmg.qmule.messages"); + + /** The Constant ERROR_PREFIX. */ + static final String ERROR_PREFIX = getProgramName() + ": "; + + /** The Constant USAGE. */ + public static final String USAGE = getMessage("USAGE"); + + /** + * Gets the message. + * + * @param identifier the identifier + * @return the message + */ + public static String getMessage(final String identifier) { + return messages.getString(identifier); + } + + /** + * Gets the message. + * + * @param identifier the identifier + * @param argument the argument + * @return the message + */ + public static String getMessage(final String identifier, final String argument) { + final String message = Messages.getMessage(identifier); + Object[] arguments = { argument }; + return MessageFormat.format(message, arguments); + } + + /** + * Gets the message. + * + * @param identifier the identifier + * @param arg1 the arg1 + * @param arg2 the arg2 + * @return the message + */ + public static String getMessage(final String identifier, final String arg1, + final String arg2) { + final String message = Messages.getMessage(identifier); + Object[] arguments = { arg1, arg2 }; + return MessageFormat.format(message, arguments); + } + + /** + * Gets the message. + * + * @param identifier the identifier + * @param arg1 the arg1 + * @param arg2 the arg2 + * @param arg3 the arg3 + * @return the message + */ + public static String getMessage(final String identifier, final String arg1, + final String arg2, final String arg3) { + final String message = Messages.getMessage(identifier); + Object[] arguments = { arg1, arg2, arg3 }; + return MessageFormat.format(message, arguments); + } + + /** + * Gets the message. + * + * @param identifier the identifier + * @param arguments the arguments + * @return the message + */ + public static String getMessage(final String identifier, final Object[] arguments) { + final String message = Messages.getMessage(identifier); + return MessageFormat.format(message, arguments); + } + + /** + * Gets the program name. + * + * @return the program name + */ + static String getProgramName() { + return Messages.class.getPackage().getImplementationTitle(); + } + + /** + * Gets the program version. + * + * @return the program version + */ + static String getProgramVersion() { + return Messages.class.getPackage().getImplementationVersion(); + } + + /** + * Gets the version message. + * + * @return the version message + * @throws Exception the exception + */ + public static String getVersionMessage() throws Exception { + return getProgramName() + ", version " + getProgramVersion(); + } + + /** + * Reconstruct command line. + * + * @param args the args + * @return the string + */ + public static String reconstructCommandLine(final String[] args) { + String result = getProgramName() + " "; + for (final String arg : args) { + result += arg + " "; + } + return result; + } + +} diff --git a/qmule/src/org/qcmg/qmule/Options.java-- b/qmule/src/org/qcmg/qmule/Options.java-- new file mode 100644 index 000000000..c83f4812d --- /dev/null +++ b/qmule/src/org/qcmg/qmule/Options.java-- @@ -0,0 +1,512 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import static java.util.Arrays.asList; + +import java.io.IOException; +import java.util.List; +import java.util.Properties; + +import joptsimple.OptionParser; +import joptsimple.OptionSet; + +/** + * The Class Options. + */ +public final class Options { + + public enum Ids{ + PATIENT, + SOMATIC_ANALYSIS, + GEMLINE_ANALYSIS, + TUMOUR_SAMPLE, + NORMAL_SAMPLE; + } + + /** The Constant HELP_DESCRIPTION. */ + private static final String HELP_DESCRIPTION = Messages + .getMessage("HELP_OPTION_DESCRIPTION"); + + /** The Constant VERSION_DESCRIPTION. */ + private static final String VERSION_DESCRIPTION = Messages + .getMessage("VERSION_OPTION_DESCRIPTION"); + + /** The Constant INPUT_DESCRIPTION. */ + private static final String INPUT_DESCRIPTION = Messages + .getMessage("INPUT_OPTION_DESCRIPTION"); + + /** The Constant OUTPUT_DESCRIPTION. */ + private static final String OUTPUT_DESCRIPTION = Messages + .getMessage("OUTPUT_OPTION_DESCRIPTION"); + + /** The parser. */ + private final OptionParser parser = new OptionParser(); + + /** The options. */ + private final OptionSet options; + + /** The command line. */ + private final String commandLine; + + /** The input file names. */ + private final String[] inputFileNames; + + /** The output file names. */ + private final String[] outputFileNames; + + /** The log file */ + private String logFile; + + /** The log level */ + private String logLevel; + + private String patientId; + private String somaticAnalysisId; + private String germlineAnalysisId; + private String normalSampleId; + private String tumourSampleId; + private String position; + private String pileupFormat; + private int normalCoverage; + private int numberOfThreads; + private int tumourCoverage; + private int minCoverage; + private String mafMode; + private String gff; + private String fasta; + private String[] gffRegions; + private int noOfBases; + private String mode; + + + private String column; + + private String annotation; + + private String features; + + private String tumour; + + private String normal; + + private String analysis; + + /** + * Instantiates a new options. + * + * @param args the args + * @throws Exception the exception + */ + @SuppressWarnings("unchecked") + public Options(final String[] args) throws Exception { + commandLine = Messages.reconstructCommandLine(args); + +// parser.accepts("qmule", "Tool").withRequiredArg().ofType(String.class).describedAs("tool name"); + parser.accepts("output", OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); + parser.accepts("input", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("inputfile"); + parser.accepts("log", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("logfile"); + parser.accepts("loglevel", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("loglevel"); + parser.accepts("help", HELP_DESCRIPTION); + parser.accepts("version", VERSION_DESCRIPTION); + parser.accepts("patientId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("patientId"); + parser.accepts("somaticAnalysisId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("somaticAnalysisId"); + parser.accepts("germlineAnalysisId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("germlineAnalysisId"); + parser.accepts("normalSampleId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("normalSampleId"); + parser.accepts("tumourSampleId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("tumourSampleId"); + parser.accepts("position", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("position"); + parser.accepts("pileupFormat", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("pileupFormat"); + parser.accepts("normalCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) + .describedAs("normalCoverage"); + parser.accepts("numberOfThreads", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) + .describedAs("numberOfThreads"); + parser.accepts("tumourCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) + .describedAs("tumourCoverage"); + parser.accepts("minCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) + .describedAs("minCoverage"); + parser.accepts("mafMode", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("mafMode"); + parser.accepts("mode", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("mode"); + parser.accepts("column", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("column"); + parser.accepts("annotation", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("annotation"); + parser.accepts("gffFile", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("gffFile"); + parser.accepts("fasta", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("fasta"); + parser.accepts("feature", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("feature"); + parser.accepts("tumour", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("tumour"); + parser.accepts("normal", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("normal"); + parser.accepts("analysis", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("analysis"); + parser.accepts("verifiedInvalid", INPUT_DESCRIPTION); + parser.accepts("gffRegions", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).withValuesSeparatedBy(',').describedAs("gffRegions"); + parser.accepts("noOfBases", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class).describedAs("noOfBases"); + parser.accepts("proportion", Messages + .getMessage("PROPORTION_OPTION_DESCRIPTION")).withRequiredArg().ofType(Double.class); + parser.accepts("stranded", Messages + .getMessage("STRANDED_OPTION_DESCRIPTION")); + parser.accepts("compareAll",Messages.getMessage("COMPAREALL_OPTION")); + + parser.posixlyCorrect(true); + options = parser.parse(args); + + List inputList = options.valuesOf("input"); + inputFileNames = new String[inputList.size()]; + inputList.toArray(inputFileNames); + + List outputList = options.valuesOf("output"); + outputFileNames = new String[outputList.size()]; + outputList.toArray(outputFileNames); + + logFile = (String) options.valueOf("log"); + logLevel = (String) options.valueOf("loglevel"); + + patientId = (String) options.valueOf("patientId"); + somaticAnalysisId = (String) options.valueOf("somaticAnalysisId"); + germlineAnalysisId = (String) options.valueOf("germlineAnalysisId"); + normalSampleId = (String) options.valueOf("normalSampleId"); + tumourSampleId = (String) options.valueOf("tumourSampleId"); + + // WiggleFromPileup specific options + pileupFormat = (String) options.valueOf("pileupFormat"); + if (null != options.valueOf("normalCoverage")) + normalCoverage = (Integer) options.valueOf("normalCoverage"); + if (null != options.valueOf("tumourCoverage")) + tumourCoverage = (Integer) options.valueOf("tumourCoverage"); + // end of WiggleFromPileup specific options + + //compareReferenceRegions + mode = (String) options.valueOf("mode"); + column = (String) options.valueOf("column"); + annotation = (String) options.valueOf("annotation"); + features = (String) options.valueOf("feature"); + position = (String) options.valueOf("position"); + mafMode = (String) options.valueOf("mafMode"); + + gff = (String) options.valueOf("gffFile"); + fasta = (String) options.valueOf("fasta"); + + tumour = (String) options.valueOf("tumour"); + normal = (String) options.valueOf("normal"); + analysis = (String) options.valueOf("analysis"); + + // gffRegions + List gffRegionsArgs = (List) options.valuesOf("gffRegions"); + gffRegions = new String[gffRegionsArgs.size()]; + gffRegionsArgs.toArray(gffRegions); + + // MafAddCPG specific + if (null != options.valueOf("noOfBases")) + noOfBases = (Integer) options.valueOf("noOfBases"); + + // qsignature + if (null != options.valueOf("minCoverage")) + minCoverage = (Integer) options.valueOf("minCoverage"); + + if (null != options.valueOf("numberOfThreads")) + numberOfThreads = (Integer) options.valueOf("numberOfThreads"); + + } + + /** + * + * @param className + * @param args + * @throws Exception + */ + public Options( final Class myclass, final String[] args) throws Exception { + commandLine = Messages.reconstructCommandLine(args); + + parser.acceptsAll( asList("h", "help"), HELP_DESCRIPTION ); +// parser.acceptsAll( asList("v", "version"), VERSION_DESCRIPTION); + parser.acceptsAll( asList("i", "input"), INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("input"); + parser.acceptsAll(asList("o", "output"), OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); + parser.accepts("log", Messages.getMessage("LOG_OPTION_DESCRIPTION")).withRequiredArg().ofType(String.class).describedAs("logfile"); + parser.accepts("loglevel", Messages.getMessage("LOGLEVEL_OPTION_DESCRIPTION")).withRequiredArg().ofType(String.class).describedAs("loglevel"); + + if( myclass.equals(AlignerCompare.class) ){ + parser.accepts("compareAll",Messages.getMessage("COMPAREALL_OPTION")); + parser.acceptsAll( asList("o", "output"), Messages.getMessage("OUTPUT_AlignerCompare")).withRequiredArg().ofType(String.class).describedAs("output"); + }else if(myclass.equals(SubSample.class)) { + parser.accepts("proportion",Messages.getMessage("PROPORTION_OPTION_DESCRIPTION")).withRequiredArg().ofType(Double.class).describedAs("[0,1]"); + }else if(myclass.equals(BAMCompress.class)){ + parser.accepts("compressLevel",Messages.getMessage("COMPRESS_LEVEL_DESCRIPTION") ).withRequiredArg().ofType(Integer.class).describedAs("[0,9]"); + } + + + //else if( myclass.equals(BamMismatchCounts.class)){} + + options = parser.parse(args); + + List inputList = options.valuesOf("input"); + inputFileNames = new String[inputList.size()]; + inputList.toArray(inputFileNames); + + List outputList = options.valuesOf("output"); + outputFileNames = new String[outputList.size()]; + outputList.toArray(outputFileNames); + + } + + public String getTumour() { + return tumour; + } + + public void setTumour(String tumour) { + this.tumour = tumour; + } + + public String getNormal() { + return normal; + } + + public void setNormal(String normal) { + this.normal = normal; + } + + public String getAnalysis() { + return analysis; + } + + public void setAnalysis(String analysis) { + this.analysis = analysis; + } + + /** + * Checks for input option. + * + * @return true, if successful + */ + public boolean hasInputOption() { + return options.has("input"); + } + + /** + * Checks for output option. + * + * @return true, if successful + */ + public boolean hasOutputOption() { + return options.has("o") || options.has("output"); + } + + /** + * Checks for version option. + * + * @return true, if successful + */ + public boolean hasVersionOption() { + return options.has("version"); + } + + public boolean getIncludeInvalid() { + return options.has("verifiedInvalid"); + } + + /** + * Checks for help option. + * + * @return true, if successful + */ + public boolean hasHelpOption() { + return options.has("help"); + } + + public boolean hasCompareAllOption() { + return options.has("compareAll"); + } + + /** + * Checks for log option. + * + * @return true, if successful + */ + public boolean hasLogOption() { + return options.has("log"); + } + + /** + * Checks for non options. + * + * @return true, if successful + */ + public boolean hasNonOptions() { + return 0 != options.nonOptionArguments().size(); + } + + /** + * Gets the input file names. + * + * @return the input file names + */ + public String[] getInputFileNames() { + return inputFileNames; + } + + /** + * Gets the output file names. + * + * @return the output file names + */ + public String[] getOutputFileNames() { + return outputFileNames; + } + + /** + * Gets the command line. + * + * @return the command line + */ + public String getCommandLine() { + return commandLine; + } + + public boolean hasStrandedOption() { + return options.has("stranded"); + } + + public String getPosition() { + return position; + } + public String getPileupFormat() { + return pileupFormat; + } + public int getNormalCoverage() { + return normalCoverage; + } + public int getTumourCoverage() { + return tumourCoverage; + } + public int getMinCoverage() { + return minCoverage; + } + public int getNumberOfThreads() { + return numberOfThreads; + } + public String getMafMode() { + return mafMode; + } + public String getGffFile() { + return gff; + } + public String getFastaFile() { + return fasta; + } + + public String getMode() { + return mode; + } + + public int getcompressLevel() throws Exception{ + if(options.has("compressLevel")){ + int l = (int) options.valueOf("compressLevel"); + if(l >= 0 && l <= 9) + return l; + else + throw new Exception("compressLevel must between [0,9]"); + } + + return 5; + } + //subSample + public double getPROPORTION() throws Exception{ + if(options.has("proportion")){ + + double prop = (double) options.valueOf("proportion"); +// double prop = Double.parseDouble( (String) options.valueOf("proportion") ); + if(prop > 0 && prop <= 1){ + return prop; + + } + } + throw new Exception("no proportion are specified"); + } + + + /** + * Display help. + * + * @throws Exception the exception + */ + public void displayHelp() throws IOException { + parser.printHelpOn(System.out); + } + + /** + * Detect bad options. + * + * @throws Exception the exception + */ + public void detectBadOptions() throws Exception { + if (hasNonOptions()) { + throw new Exception("ALL_ARGUMENTS_MUST_BE_OPTIONS"); + } + if (hasOutputOption() && 1 != getOutputFileNames().length) { + throw new Exception("MULTIPLE_OUTPUT_FILES_SPECIFIED"); + } + if (!hasInputOption()) { + throw new Exception("MISSING_INPUT_OPTIONS"); + } + } + + public String getLogFile(){ + return logFile; + } + + public String getLogLevel(){ + return logLevel; + } + + public Properties getIds() { + Properties props = new Properties(); + props.put(Ids.PATIENT, patientId); + props.put(Ids.SOMATIC_ANALYSIS, somaticAnalysisId); + props.put(Ids.GEMLINE_ANALYSIS, germlineAnalysisId); + props.put(Ids.NORMAL_SAMPLE, normalSampleId); + props.put(Ids.TUMOUR_SAMPLE, tumourSampleId); + return props; + } + + public String[] getGffRegions() { + + return gffRegions; + } + + public int getNoOfBases() { + + return noOfBases; + } + + public String getColumn() { + return column; + } + + public String getAnnotation() { + return annotation; + } + + public String[] getFeature() { + if (features != null) { + return features.split(","); + } + return null; + } + +} diff --git a/qmule/src/org/qcmg/qmule/Pileup.java-- b/qmule/src/org/qcmg/qmule/Pileup.java-- new file mode 100644 index 000000000..c1503ab6a --- /dev/null +++ b/qmule/src/org/qcmg/qmule/Pileup.java-- @@ -0,0 +1,101 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; +import java.util.Map.Entry; + +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrPositionComparator; +import org.qcmg.common.model.QPileupSimpleRecord; +import org.qcmg.picard.SAMFileReaderFactory; + + +public class Pileup { + private static final Comparator COMPARATOR = new ChrPositionComparator(); + private static QLogger logger = QLoggerFactory.getLogger(Pileup.class); + + Map pileup = new TreeMap(); +// Map pileup = new HashMap(10000000, 0.99f); + + private void engage(String args[]) throws IOException { + + SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(args[0])); + FileWriter writer = new FileWriter(new File(args[1])); + + int counter = 0; + for (SAMRecord sr : reader) { + parseRecord(sr); + if (++counter % 100000 == 0) { + logger.info("hit " + counter + " reads in bam file, size of pileup map is: " + pileup.size()); + + // output contents of pileup to file to clear memory + // get current chromosome and position an write out + //all records a couple of hundred bases prior to that position + writePileup(writer, sr.getReferenceName(), sr.getAlignmentStart() - 500); + } + } + logger.info("Done!! No of reads in file: " + counter + ", size of pileup map is: " + pileup.size() ); + } + + private void writePileup(FileWriter writer, String chromosome, int position) throws IOException { + ChrPosition chrPos = ChrPointPosition.valueOf(chromosome, position); + + Iterator> iter = pileup.entrySet().iterator(); + + while (iter.hasNext()) { + Map.Entry entry = iter.next(); + if (0 < COMPARATOR.compare(chrPos, entry.getKey())) { + + writer.write(entry.getKey().getChromosome() + "\t" + + entry.getKey().getStartPosition() + "\t" + + entry.getValue().getFormattedString()); + + iter.remove(); + } + } + + } + + private void parseRecord(SAMRecord sr) { + + ChrPosition chrPos; + QPileupSimpleRecord pileupRec; + int position = 0; + + for (byte b : sr.getReadBases()) { + chrPos = ChrPointPosition.valueOf(sr.getReferenceName(), sr.getAlignmentStart() + position++); + pileupRec = pileup.get(chrPos); + if (null == pileupRec) { + pileupRec = new QPileupSimpleRecord(); + pileup.put(chrPos, pileupRec); + } + pileupRec.incrementBase(b); + } + + + } + + + + public static void main(String[] args) throws IOException { + Pileup p = new Pileup(); + p.engage(args); + } +} diff --git a/qmule/src/org/qcmg/qmule/PileupStats.java-- b/qmule/src/org/qcmg/qmule/PileupStats.java-- new file mode 100644 index 000000000..e2ea6d844 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/PileupStats.java-- @@ -0,0 +1,254 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Map.Entry; +import java.util.TreeMap; + +import htsjdk.samtools.Cigar; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordIterator; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.picard.SAMFileReaderFactory; + +public class PileupStats { + + private String logFile; + private File inputFile; + private File outputFile; + private File bamFile; + private static QLogger logger; + + public int engage() throws Exception { + + BufferedReader reader = new BufferedReader(new FileReader(inputFile)); + BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile)); + + writer.write(getHeader()); + String line; + int count = 0; + while ((line = reader.readLine()) != null) { + String[] values = line.split("\t"); + + String result = pileup(values[0], new Integer(values[1]), new Integer(values[2])); + + writer.write(line + "\t" + result + "\n"); + //System.out.println(line + "\t " + result); + if (count++ % 1000 == 0) { + logger.info("Number processed: " + count); + } + } + logger.info("Total processed: " + count); + reader.close(); + writer.close(); + + return 0; + } + + private String getHeader() { + return "chr\tposition\tposition\tbed\tbed\tbed\ttotal reads\ttotal unmapped" + + "\ttotal mates unmapped\ttotal indels\ttotal mismatch reads\ttotal soft clips" + + "\ttotal hard clips\ttotal spliced reads\ttotal duplicates\tmismatch counts\tsplice lengths\n"; + } + + private String pileup(String chromosome, int start, int end) throws IOException { + SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFile, "silent"); + + SAMRecordIterator iterator = reader.queryOverlapping(chromosome, start, end); + + int totalReads = 0; + int totalMatesUnmapped = 0; + int totalUnmapped = 0; + int totalDuplicates = 0; + int totalMismatches = 0; + int totalSpliced = 0; + int totalSoftClips = 0; + int totalHardClips = 0; + int totalIndels = 0; + TreeMap spliceMap = new TreeMap(); + TreeMap mismatchMap = new TreeMap(); + + while (iterator.hasNext()) { + SAMRecord record = iterator.next(); + if (record.getReadUnmappedFlag()) { + totalUnmapped++; + } else { + totalReads++; + if (record.getDuplicateReadFlag()) { + totalDuplicates++; + } else { + + if (record.getMateUnmappedFlag()) { + totalMatesUnmapped++; + } + + //cigars + Cigar cigar = record.getCigar(); + + for (CigarElement ce : cigar.getCigarElements()) { + if (ce.getOperator().equals(CigarOperator.DELETION) || ce.getOperator().equals(CigarOperator.INSERTION)) { + totalIndels++; + } + + if (ce.getOperator().equals(CigarOperator.SOFT_CLIP)) { + totalSoftClips++; + } + if (ce.getOperator().equals(CigarOperator.HARD_CLIP)) { + totalHardClips++; + } + if (ce.getOperator().equals(CigarOperator.N)) { + totalSpliced++; + Integer length = new Integer(ce.getLength()); + int count = 1; + if (spliceMap.containsKey(length)) { + count += spliceMap.get(length); + } + spliceMap.put(length, count); + } + } + + //MD tag + String mdData = (String) record.getAttribute("MD"); + int matches = tallyMDMismatches(mdData); + if (matches > 0) { + totalMismatches++; + } + int count = 1; + if (mismatchMap.containsKey(matches)) { + count += mismatchMap.get(matches); + } + mismatchMap.put(matches, count); + + } + } + + } + + iterator.close(); + reader.close(); + + String spliceCounts = getMapString(spliceMap); + String mismatchCounts = getMapString(mismatchMap); + + String result = totalReads + "\t" + totalUnmapped + "\t" + totalMatesUnmapped + "\t" + totalIndels + "\t" + + totalMismatches + "\t" + totalSoftClips + "\t" + totalHardClips + "\t" + totalSpliced + "\t" + totalDuplicates + + "\t" + mismatchCounts + "\t" + spliceCounts; + return result; + } + + private String getMapString(TreeMap map) { + StringBuilder sb = new StringBuilder(); + + for (Entry entry: map.entrySet()) { + sb.append(entry.getKey() + ":" + entry.getValue() + ";"); + } + + return sb.toString(); + } + + public int tallyMDMismatches(String mdData) { + int count = 0; + if (null != mdData) { + for (int i = 0, size = mdData.length() ; i < size ; ) { + char c = mdData.charAt(i); + if (isValidMismatch(c)) { + count++; + i++; + } else if ('^' == c) { + while (++i < size && Character.isLetter(mdData.charAt(i))) {} + } else i++; // need to increment this or could end up with infinite loop... + } + } + return count; + } + + private boolean isValidMismatch(char c) { + return c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N'; + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(PileupStats.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("PileupStats", PileupStats.class.getPackage().getImplementationVersion(), args); + + // get list of file names + String[] cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 2) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + String[] cmdLineOutputFiles = options.getOutputFileNames(); + if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); + } + + for (String file : cmdLineOutputFiles) { + if (new File(file).exists() && !new File(file).isDirectory()) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); + } + } + + bamFile = new File(cmdLineInputFiles[0]); + inputFile = new File(cmdLineInputFiles[1]); + outputFile = new File(cmdLineOutputFiles[0]); + logger.info("Bam file: " + bamFile); + logger.info("Input file: " + inputFile); + logger.info("Output file: " + outputFile); + + } + + return returnStatus; + } + + public static void main(String[] args) throws Exception { + PileupStats sp = new PileupStats(); + sp.setup(args); + int exitStatus = sp.engage(); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } +} diff --git a/qmule/src/org/qcmg/qmule/QMuleException.java-- b/qmule/src/org/qcmg/qmule/QMuleException.java-- new file mode 100644 index 000000000..2e85e03f0 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/QMuleException.java-- @@ -0,0 +1,28 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +public final class QMuleException extends Exception { + private static final long serialVersionUID = -4575755996356751582L; + + public QMuleException(final String identifier) { + super(Messages.getMessage(identifier)); + } + + public QMuleException(final String identifier, final String argument) { + super(Messages.getMessage(identifier, argument)); + } + + public QMuleException(final String identifier, final String arg1, final String arg2) { + super(Messages.getMessage(identifier, arg1, arg2)); + } + + public QMuleException(final String identifier, final String arg1, final String arg2, final String arg3) { + super(Messages.getMessage(identifier, arg1, arg2, arg3)); + } + + public QMuleException(final String identifier, final Object[] arguments) { + super(Messages.getMessage(identifier, arguments)); + } +} diff --git a/qmule/src/org/qcmg/qmule/QueryCADDLib.java-- b/qmule/src/org/qcmg/qmule/QueryCADDLib.java-- new file mode 100644 index 000000000..eece05fe3 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/QueryCADDLib.java-- @@ -0,0 +1,187 @@ +/** + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. +*/ +package org.qcmg.qmule; + + +import htsjdk.tribble.readers.TabixReader; + +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.header.VcfHeader; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.vcf.VCFFileReader; + + +public class QueryCADDLib { +// protected final static ArrayList libBlocks = new ArrayList<>(); +// protected final static ArrayList inputBlocks = new ArrayList<>(); +// protected final static ArrayList outputBlocks = new ArrayList<>(); + + protected final static Map positionRecordMap = new HashMap(); + protected static long outputNo = 0; + protected static long blockNo = 0; + protected static long inputNo = 0; + final String CADD = "CADD"; + + public QueryCADDLib(final String input_gzip_file, final String vcf, final String output, final int gap) throws IOException{ + + TabixReader tabix = new TabixReader( input_gzip_file); + String chr = null; + int pos = 0; + int start = -1; + + System.out.println("Below is the stats for each queried block, follow the format \norder: query(ref,start,end) [CADDLibBlockSize, inputVariantNo, outputVariantNo, runtime]"); + + try (VCFFileReader reader = new VCFFileReader(vcf); + FileWriter writer = new FileWriter(new File(output))) { + for (final VcfRecord re : reader){ + if(re.getChromosome().equals(chr) && + (re.getPosition() - pos) < gap ){ + pos = re.getPosition(); + add2Map(re); + }else{ + //s1: query(chr:start:pos), and output + if(chr != null){ + if(chr.startsWith("chr")) chr = chr.substring(3); + TabixReader.Iterator it = tabix.query(chr, start-1, pos); + //debug + System.out.print(String.format("%8d: query(%s, %8d, %8d) ", blockNo++, chr, start, pos)); + query( it, writer ); + + } + //s2: reset +// //debug bf clear +// for( Entry entry: positionRecordMap.entrySet()){ +// if(entry.getValue().getFilter() == null) +// System.out.println(entry.getValue().toString()); +// } + + positionRecordMap.clear(); + chr = re.getChromosome(); + start = re.getPosition(); + pos = re.getPosition(); + add2Map(re); + } + } + //last block + if(chr != null){ + if(chr.startsWith("chr")) chr = chr.substring(3); + TabixReader.Iterator it = tabix.query(chr, start, pos); + query( it, writer ); + } + + }//end try + + System.out.println("total input variants is " + inputNo); + System.out.println("total outputed and annotated variants is " + outputNo); + System.out.println("total query CADD library time is " + blockNo); + + } + + /** + * it remove "chr" string from reference name if exists + * @param re input vcf record + */ + private void add2Map(VcfRecord re){ + ChrPosition chr = re.getChrPosition(); + if(chr.getChromosome().startsWith("chr")) + chr = new ChrRangePosition(re.getChromosome().substring(3), re.getChrPosition().getStartPosition(), re.getChrPosition().getEndPosition()); // orig.getChromosome().substring(3); + + + re.setFilter(null); //for debug + positionRecordMap.put(chr, re); + } + + + private void query(TabixReader.Iterator it,FileWriter writer ) throws IOException{ + long startTime = System.currentTimeMillis(); + + String line; + String[] eles; + String last = null; + + int blockSize = 0; + int outputSize = 0; + + while(( line = it.next())!= null){ + blockSize ++; + eles = TabTokenizer.tokenize(line, '\t'); + int s = Integer.parseInt(eles[1]); //start position = second column + int e = s + eles[2].length() - 1; //start position + length -1 + + //only retrive the first annotation entry from CADD library + String entry = eles[0] + ":" + eles[1] + ":" +eles[2]+ ":" + eles[4]; + if(entry.equals(last)) continue; + else last = entry; + + VcfRecord inputVcf = positionRecordMap.get(new ChrRangePosition(eles[0], s, e )); + + if ( (null == inputVcf) || !inputVcf.getRef().equalsIgnoreCase(eles[2])) continue; + + String[] allels = {inputVcf.getAlt()}; + if(inputVcf.getAlt().contains(",")) + allels = TabTokenizer.tokenize(inputVcf.getAlt(), ','); + + String cadd = ""; + + //it will exit loop once find the matched allele + for(String al : allels) + if(al.equalsIgnoreCase(eles[4])){ + cadd = String.format("(%s=>%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s)", eles[2],eles[4],eles[8],eles[10],eles[11],eles[12],eles[17], + eles[21],eles[26],eles[35],eles[39],eles[72],eles[82],eles[83],eles[86],eles[92],eles[92],eles[93],eles[96]); + String info = inputVcf.getInfoRecord().getField(CADD); + info = (info == null)? CADD + "=" + cadd : CADD + "=" + info + "," + cadd; + inputVcf.appendInfo( info); + + writer.append(inputVcf.toString() + "\n"); + outputSize ++; + } + } + + //get stats + long endTime = System.currentTimeMillis(); + String time = QLogger.getRunTime(startTime, endTime); + System.out.println(String.format("[ %8d,%8d,%8d, %s ] ", blockSize, positionRecordMap.size(), outputSize, time)); + inputNo += positionRecordMap.size(); + outputNo += outputSize; + } + + + public static void main(String[] args) { + + long startTime = System.currentTimeMillis(); + try{ + String gzlib = args[0]; + String input = args[1]; + String output = args[2]; + int gap = 1000; + if(args.length > 3) + gap = Integer.parseInt(args[3]); + + new QueryCADDLib(gzlib, input, output, gap); + + }catch(Exception e){ + e.printStackTrace(); + System.err.println("Usage: java -cp qmule-0.1pre.jar QueryCADDLib "); + } + + long endTime = System.currentTimeMillis(); + String time = QLogger.getRunTime(startTime, endTime); + System.out.println("run Time is " + time); + } +} + diff --git a/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java b/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java index f1a3ff024..86499809c 100644 --- a/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java +++ b/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java @@ -24,8 +24,8 @@ import org.qcmg.common.util.FileUtils; import org.qcmg.common.util.TabTokenizer; import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; import org.qcmg.vcf.VCFFileReader; public class ReAnnotateDccWithDbSNP { diff --git a/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- b/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- new file mode 100644 index 000000000..86499809c --- /dev/null +++ b/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- @@ -0,0 +1,280 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileWriter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrPositionComparator; +import org.qcmg.common.string.StringUtils; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; +import org.qcmg.vcf.VCFFileReader; + +public class ReAnnotateDccWithDbSNP { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + private String header; + + private static QLogger logger; + + private Map dccs = new HashMap(); + + + public int engage() throws Exception { + + loadDccFile(); + + updateDBSnpData(); + + writeDCCOutput(); + + + return exitStatus; + } + + private void writeDCCOutput() throws Exception { + if ( ! StringUtils.isNullOrEmpty(cmdLineOutputFiles[0])) { + FileWriter writer = new FileWriter(new File(cmdLineOutputFiles[0])); + try { + //sort + List data = new ArrayList(dccs.keySet()); + Collections.sort(data, new ChrPositionComparator()); + + + writer.write(header + "\tdbSnpVer\n"); + + for (ChrPosition cp : data) { + String[] dcc = dccs.get(cp); + StringBuilder sb = new StringBuilder(); + for (String s : dcc) { + if (sb.length() > 0) sb.append('\t'); + sb.append(s); + } + writer.write(sb.toString() + '\n'); + } + + } finally { + writer.close(); + } + } + } + + + private void loadDccFile() throws Exception { + logger.info("Attempting to load dcc data"); + TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); + int count = 0; + try { + for (TabbedRecord rec : reader) { + if (++count == 1) { // header line + header = rec.getData(); + continue; + } + String[] params = TabTokenizer.tokenize(rec.getData()); + ChrPosition cp = ChrPointPosition.valueOf(params[4], Integer.parseInt(params[5])); + + // reset dbsnpid + params[20] = null; +// StringBuilder sb = new StringBuilder(); +// for (String s : params) { +// if (sb.length() > 0) sb.append('\t'); +// sb.append(s); +// } +// rec.setData(sb.toString()); + dccs.put(cp, params); + } + } finally { + reader.close(); + } + logger.info("Attempting to load dcc data - DONE with " + dccs.size() + " entries"); + } + + private void updateDBSnpData() throws Exception { + + VCFFileReader reader = new VCFFileReader(new File(cmdLineInputFiles[1])); + + int count = 0, multipleVersions = 0; + int pre30 = 0, thirty = 0, thirtyOne = 0, thirtyTwo = 0, thirtyThree = 0, thirtyFour = 0, thirtyFive = 0; + try { + for (VcfRecord dbSNPVcf : reader) { + if (++count % 1000000 == 0) + logger.info("hit " + count + " dbsnp records"); + + if ( ! StringUtils.doesStringContainSubString(dbSNPVcf.getInfo(), "VC=SNV", false)) continue; + // vcf dbSNP record chromosome does not contain "chr", whereas the positionRecordMap does - add + String[] params = dccs.get(ChrPointPosition.valueOf(dbSNPVcf.getChromosome(), dbSNPVcf.getPosition())); + if (null == params) continue; + + // if no dbsnp data - continue + String previousDBSnpValue = params[20]; + if ( ! StringUtils.isNullOrEmpty(previousDBSnpValue)) { + multipleVersions++; + continue; + } + +// logger.info("Resetting previousDBSnpValue of: " + previousDBSnpValue + " to " + dbSNPVcf.getId()); + + // only proceed if we have a SNP variant record + int startIndex = dbSNPVcf.getInfo().indexOf("dbSNPBuildID=") + 13; + int endIndex = dbSNPVcf.getInfo().indexOf(";" , startIndex); + String dbSnpVersion = dbSNPVcf.getInfo().substring(startIndex, endIndex); +// logger.info("dbsnp version = " + dbSnpVersion); + + int dbSnpVersionInt = Integer.parseInt(dbSnpVersion); + if (dbSnpVersionInt < 130) pre30++; + else if (dbSnpVersionInt == 130) thirty++; + else if (dbSnpVersionInt == 131) thirtyOne++; + else if (dbSnpVersionInt == 132) thirtyTwo++; + else if (dbSnpVersionInt == 133) thirtyThree++; + else if (dbSnpVersionInt == 134) thirtyFour++; + else if (dbSnpVersionInt == 135) thirtyFive++; + else if (dbSnpVersionInt > 135) logger.info("hmmm: " + dbSnpVersionInt); + + params[20] = dbSNPVcf.getId(); + params = Arrays.copyOf(params, params.length + 1); + params[params.length -1] = dbSnpVersion; + dccs.put(ChrPointPosition.valueOf(dbSNPVcf.getChromosome(), dbSNPVcf.getPosition()), params); + + +// GenotypeEnum tumour = snpRecord.getTumourGenotype(); +// //TODO should we continue if the tumour Genotype is null?? +// if (null == tumour) continue; +// +// // multiple dbSNP entries can exist for a position. +// // if we already have dbSNP info for this snp, check to see if the dbSNP alt is shorter than the existing dbSNP record +// // if so, proceed, and re-write dbSNP details (if applicable). +// int dbSNPAltLengh = dbSNPVcf.getAlt().length(); +// if (snpRecord.getDbSnpAltLength() > 0 && dbSNPAltLengh > snpRecord.getDbSnpAltLength()) { +// continue; +// } +// +// // deal with multiple alt bases +// String [] alts = null; +// if (dbSNPAltLengh == 1) { +// alts = new String[] {dbSNPVcf.getAlt()}; +// } else if (dbSNPAltLengh > 1){ +// alts = TabTokenizer.tokenize(dbSNPVcf.getAlt(), ','); +// } +// +// if (null != alts) { +// for (String alt : alts) { +// +// GenotypeEnum dbSnpGenotype = BaseUtils.getGenotypeEnum(dbSNPVcf.getRef() + alt); +// if (null == dbSnpGenotype) { +// logger.warn("Couldn't get Genotype from dbSNP position with variant: " + alt); +// continue; +// } +//// // no longer flip the genotype as dbSNP is reporting on the +ve strand +////// if (reverseStrand) { +////// dbSnpGenotype = dbSnpGenotype.getComplement(); +////// } +// if (tumour == dbSnpGenotype || (tumour.isHomozygous() && dbSnpGenotype.containsAllele(tumour.getFirstAllele()))) { +// boolean reverseStrand = StringUtils.doesStringContainSubString(dbSNPVcf.getInfo(), "RV", false); +//// boolean reverseStrand = VcfUtils.isDbSNPVcfRecordOnReverseStrand(dbSNPVcf.getInfo()); +// snpRecord.setDbSnpStrand(reverseStrand ? '-' : '+'); +// snpRecord.setDbSnpId(dbSNPVcf.getId()); +// snpRecord.setDbSnpGenotype(dbSnpGenotype); +// snpRecord.setDbSnpAltLength(dbSNPAltLengh); +// break; +// } +// } +// } + } + } finally { + reader.close(); + } + logger.info("STATS:"); + logger.info("No of dcc records with dbSNP version of pre 130: " + pre30); + logger.info("No of dcc records with dbSNP version of 130: " + thirty); + logger.info("No of dcc records with dbSNP version of 131: " + thirtyOne); + logger.info("No of dcc records with dbSNP version of 132: " + thirtyTwo); + logger.info("No of dcc records with dbSNP version of 133: " + thirtyThree); + logger.info("No of dcc records with dbSNP version of 134: " + thirtyFour); + logger.info("No of dcc records with dbSNP version of 135: " + thirtyFive); + logger.info("No of dcc records with duplicate dbSNP versions : " + multipleVersions); + logger.info("Total no of dcc records with dbSNP data : " + (pre30 + thirty + thirtyOne + thirtyTwo + thirtyThree + thirtyFour + thirtyFive)); + } + + public static void main(String[] args) throws Exception { + ReAnnotateDccWithDbSNP sp = new ReAnnotateDccWithDbSNP(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(ReAnnotateDccWithDbSNP.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("ReAnnotateDccWithDbSNP", ReAnnotateDccWithDbSNP.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- b/qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- new file mode 100644 index 000000000..ee8018ccc --- /dev/null +++ b/qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- @@ -0,0 +1,152 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import htsjdk.tribble.readers.TabixReader; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.HashSet; +import java.util.Set; +import java.util.zip.GZIPInputStream; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.util.FileUtils; +import org.qcmg.vcf.VCFSerializer; + + +public class ReadPartGZFile { + +// static InputStream getInputStream(File input_gzip_file) throws FileNotFoundException, IOException{ +// InputStream inputStream; +// // if (FileUtils.isFileGZip(input_gzip_file)) { +// if (FileUtils.isInputGZip(input_gzip_file)) { +// GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(input_gzip_file)); +// try(InputStreamReader streamReader = new InputStreamReader(gzis)){ +// inputStream = new GZIPInputStream(new FileInputStream(input_gzip_file)); +// } +// } else { +// FileInputStream stream = new FileInputStream(input_gzip_file); +// try(InputStreamReader streamReader = new InputStreamReader(stream)){ +// BufferedReader in = new BufferedReader(streamReader); +// inputStream = new FileInputStream(input_gzip_file); +// } +// } +// return inputStream; +// } + + + + + ReadPartGZFile(File input_gzip_file, int no) throws Exception{ + + //get a new stream rather than a closed one + InputStream inputStream = FileUtils.isInputGZip( input_gzip_file) ? + new GZIPInputStream(new FileInputStream(input_gzip_file), 65536) : new FileInputStream(input_gzip_file); + + try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream) )){ + int num = 0; + String line; + while( (line = reader.readLine() ) != null){ + if( ++num > no) break; + System.out.println(line); + } + } + + + } + static void countLines(File input_gzip_file) throws FileNotFoundException, IOException, InterruptedException{ + HashSet uniqRef = new HashSet(); + + long startTime = System.currentTimeMillis(); + long num = 0; +// InputStream inputStream = getInputStream(input_gzip_file); + InputStream inputStream = FileUtils.isInputGZip( input_gzip_file) ? + new GZIPInputStream(new FileInputStream(input_gzip_file), 65536) : new FileInputStream(input_gzip_file); + + try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream) )){ + String line; + while( (line = reader.readLine() ) != null){ + uniqRef.add(line.split("\\t")[0]); + num ++; + } + } + + System.out.println(String.format("Read file: %s\nLine number: %d", input_gzip_file.getAbsoluteFile(), num)); + System.out.println("Uniq reference name are " + uniqRef ); + + + } + + static void countUniqPosition(String input_gzip_file, String indexFile) throws IOException{ + // TabixReader tabix = new TabixReader( input_gzip_file, indexFile); + TabixReader tabix = new TabixReader( input_gzip_file); + Set chrs = tabix.getChromosomes(); + HashSet uniqPos = new HashSet(); + long total_uniq = 0; + long num = 0; + System.out.println("total reference number is " + chrs.size() + " from " + input_gzip_file); + for(String str : chrs){ + + uniqPos.clear(); + TabixReader.Iterator it = tabix.query(str); + + + String line; + while(( line = it.next())!= null){ + // String[] eles = TabTokenizer.tokenize(line, '\t'); + // uniqPos.add(eles[1]); + // uniqPos.add(line.split("\\t")[1]); + num ++; + } + + //debug + System.out.println("There are " + num+ " position recorded in reference " + str); + num ++; + + + // total_uniq += uniqPos.size(); + // System.out.println("There are " + uniqPos.size() + " uniq position recorded in reference " + str); + + } + +// System.out.println("Total uniq position recorded in all reference is " + total_uniq); +// System.out.println("Total records in whole file is " + num); + + } + + public static void main(String[] args) { + try{ + long startTime = System.currentTimeMillis(); + File input = new File(args[0]); + int no = Integer.parseInt(args[1]); + + if(no > 0) + new ReadPartGZFile(input, no ); + else if (no == 0) + countUniqPosition(args[0], null); + else + countLines(input); + + long endTime = System.currentTimeMillis(); + String time = QLogger.getRunTime(startTime, endTime); + System.out.println("run Time is " + time); + + }catch(Exception e){ + e.printStackTrace(); + //System.out.println(e.printStackTrace();); + System.err.println("Usage: java -cp qmule-0.1pre.jar ReadPartGZFile "); + + } + + } +} diff --git a/qmule/src/org/qcmg/qmule/ReadsAppend.java-- b/qmule/src/org/qcmg/qmule/ReadsAppend.java-- new file mode 100644 index 000000000..4c2ce5fab --- /dev/null +++ b/qmule/src/org/qcmg/qmule/ReadsAppend.java-- @@ -0,0 +1,95 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.IOException; + +import htsjdk.samtools.*; + +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.List; + +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.SAMOrBAMWriterFactory; + + +public class ReadsAppend { + File[] inputs; + File output; + + + ReadsAppend(File output, File[] inputs ) throws Exception{ + this.output = output; + this.inputs = inputs; + merging(); + } + + /** + * retrive the CS and CQ value from BAM record to output csfasta or qual file + * @throws Exception + */ + void merging() throws Exception{ + System.out.println("start time : " + getTime()); + + List readers = new ArrayList<>(); + for (File f: inputs) { + readers.add( SAMFileReaderFactory.createSAMFileReader(f)); + } + + SAMFileHeader header = readers.get(0).getFileHeader().clone(); + + SAMOrBAMWriterFactory factory = new SAMOrBAMWriterFactory(header, true, output,2000000 ); + SAMFileWriter writer = factory.getWriter(); + + for( SamReader reader : readers){ + for( SAMRecord record : reader) { + writer.addAlignment(record); + } + reader.close(); + } + + factory.closeWriter(); + System.out.println("end time : " + getTime()); + System.exit(0); + } + + + private String getTime(){ + Calendar currentDate = Calendar.getInstance(); + SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); + return "[" + formatter.format(currentDate.getTime()) + "]"; + } + public static void main(final String[] args) throws IOException, InterruptedException { + + try{ + if(args.length < 2) + throw new Exception("missing inputs or outputs name"); + + File output = new File(args[0]); + File[] inputs = new File[args.length-1]; + for (int i = 1; i < args.length; i++) { + inputs[i-1] = new File(args[i]) ; + + System.out.println(inputs[i-1].toString()); + } + + + new ReadsAppend(output, inputs ); + + System.exit(0); + }catch(Exception e){ + System.err.println(e.toString()); + Thread.sleep(1); + System.out.println("usage: qmule org.qcmg.qmule.ReadsAppend "); + System.exit(1); + } + + } +} diff --git a/qmule/src/org/qcmg/qmule/RunGatk.java-- b/qmule/src/org/qcmg/qmule/RunGatk.java-- new file mode 100644 index 000000000..b2e13458d --- /dev/null +++ b/qmule/src/org/qcmg/qmule/RunGatk.java-- @@ -0,0 +1,141 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + + +public class RunGatk { + +// public static String PATH="/panfs/home/oholmes/devel/QCMGScripts/o.holmes/gatk/pbs4java/"; +// public static final String PARAMS=" -l walltime=124:00:00 -v patient="; +// public static int jobCounter = 1; +// +// // inputs +// public static String patientId; +// public static String mixture; +// public static String normalBamFile; +// public static String tumourBamFile; +// public static String outputDir; +// +// public static String patientParams; +// public static String nodeName; +// public static String startPoint; +// +// public static void main(String[] args) throws IOException, InterruptedException, Exception { +// +// if (args.length < 5) throw new IllegalArgumentException("USAGE: RunGatk []"); +// +// patientId = args[0]; +// mixture = args[1]; +// normalBamFile = args[2]; +// tumourBamFile = args[3]; +// outputDir = args[4]; +// if (args.length == 6) { +// PATH = args[5]; +// } +// if (args.length == 7) { +// PATH = args[6]; +// } +// +// patientParams = PARAMS + patientId + ",mixture=" + mixture; +// +// String mergeParams = patientParams + ",normalBam=" + normalBamFile + ",tumourBam=" + tumourBamFile; +// +// +// String jobName = jobCounter++ + "RG_" + mixture; +// System.out.println("About to submit merge job"); +// +// Job merge = new Job(jobName, PATH + "run_gatk_merge_1.sh" + mergeParams); +//// merge.setQueue(queue); +// merge.queue(); +// String status = merge.getStatus(); +// System.out.println("1st job status: " + status); +// while ("N/A".equals(status)) { +// Thread.sleep(1500); +// String [] jobs = Job.SearchJobsByName(jobName, true); +// System.out.println("Sleeping till job status changes..." + status + ", id: " + merge.getId() + " no of jobs: " + jobs.length); +// +// for (int i = 0 ; i < jobs.length ; i++) { +// System.out.println("jobs[" + i + "] : " + jobs[i]); +// merge = Job.getJobById(jobs[i]); +// status = merge.getStatus(); +// System.out.println("job.getJobStatus: " + Job.getJobStatus(jobs[i])); +// +// } +// } +// nodeName = merge.getExecuteNode().substring(0, merge.getExecuteNode().indexOf('/')); +// +// +// +// System.out.println("About to submit clean 1 job"); +// // clean 1 +// String script = PATH + "run_gatk_clean_1.sh" + patientParams; +// Job clean1 = submitDependantJob(merge, "1", script, true); +// +// +// System.out.println("About to submit clean 2 job"); +// // clean 2 +// script = PATH + "run_gatk_clean_2.sh" + patientParams; +// Job clean2 = submitDependantJob(clean1, "1", script, true); +// +// // clean 3 +// script = PATH + "run_gatk_clean_3.sh" + patientParams; +// Job clean3 = submitDependantJob(clean2, "6", script, true); +// +//// String scriptToRun = PATH + "run_gatk_clean_4.sh" + patientParams; +// +// System.out.println("About to submit clean 4 job"); +// script = PATH + "run_gatk_clean_4.sh" + patientParams; +// Job clean4 = submitDependantJob(clean3, "1", script, true); +// +// // split +// System.out.println("About to submit split job"); +// script = PATH + "run_gatk_split.sh" + patientParams; +// Job split = submitDependantJob(clean4, "1", script, true); +// +// runMergeDelUG(split, "ND"); +// runMergeDelUG(split, "TD"); +// } +// +// private static void runMergeDelUG(Job splitJob, String type) throws IOException, InterruptedException, Exception { +// String script = PATH + "run_gatk_merge_2.sh" + patientParams + ",type=" + type; +// Job mergeJob = submitDependantJob(splitJob, "1", script, true); +// +// // delete +// script = PATH + "run_gatk_del_split_files.sh" + patientParams + ",type=" + type; +// Job deleteJob = submitDependantJob(mergeJob, "1", script, true); +// +// +// // UG +// script = PATH + "run_gatk_UG.sh" + patientParams + ",type=" + type; +// Job unifiedGenotyperJob = submitDependantJob(mergeJob, "4", script, false); +// +// } +// +// private static Job submitDependantJob(Job depJob, String ppn, String script, boolean onNode) throws IOException, InterruptedException, Exception { +// +// String jobName; +// ArrayList dependantJobs; +// String[] jobs; +// jobName = jobCounter++ + "RG_" + mixture; +// Job newJob = new Job(jobName, script); +//// Job newJob = new Job(jobName, PATH + script + patientParams + ",type=" + type); +//// newJob.setQueue(queue); +// if (onNode) { +// newJob.setExecuteNode(nodeName); +// newJob.setNodes(nodeName); +// } +// newJob.setPpn(ppn); +// dependantJobs = new ArrayList(); +// dependantJobs.add(depJob.getId() + " "); +// newJob.setAfterOK(dependantJobs); +// newJob.queue(); +// // sleep to allow job to make it to the queue +// Thread.sleep(1000); +// +// jobs = Job.SearchJobsByName(jobName, true); +// newJob = Job.getJobById(jobs[0]); +// return newJob; +// } + +} diff --git a/qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java-- b/qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java-- new file mode 100644 index 000000000..6730aa5ff --- /dev/null +++ b/qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java-- @@ -0,0 +1,368 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; + +public class SmithWatermanGotoh { + + private final float gapOpen; + private final float gapExtend; + private final int matchScore; + private final int mismatchScore; + private final String sequenceA; + private final String sequenceB; + private final int rows; + private final int columns; + private int[][] pointerMatrix; + private short[][] verticalGaps; + private short[][] horizontalGaps; + private int bestRow; + private int bestColumn; + private float bestScore; + private static final int STOP = 0; + private static final int LEFT = 1; + private static final int DIAGONAL = 2; + private static final int UP = 3; + private static final String GAP = "-"; + private static final String EMPTY = " "; + private static final String MISMATCH = "."; + private static final String MATCH = "|"; + private static final String TAB = ""; + + public SmithWatermanGotoh(File fileA, File fileB, int matchScore, int mismatchScore, float gapOpen, float gapExtend) throws IOException { + + this.sequenceA = readFastaFile(fileA); + this.sequenceB = readFastaFile(fileB); + this.gapOpen = gapOpen; + this.gapExtend = gapExtend; + this.matchScore = matchScore; + this.mismatchScore = mismatchScore; + this.rows = sequenceA.length() + 1;//i + this.columns = sequenceB.length() + 1;//j + align(); + } + + public SmithWatermanGotoh(String a, String b, int matchScore, int mismatchScore, float gapOpen, float gapExtend) { + + this.sequenceA = a; + this.sequenceB = b; + this.gapOpen = gapOpen; + this.gapExtend = gapExtend; + this.matchScore = matchScore; + this.mismatchScore = mismatchScore; + this.rows = sequenceA.length() + 1;//i + this.columns = sequenceB.length() + 1;//j + align(); + } + + private String readFastaFile(File file) throws IOException { + + try (FileInputStream inputStream = new FileInputStream(file)) { + StringBuilder buffer = new StringBuilder(); + int ch; + while ((ch = inputStream.read()) != -1) { + buffer.append((char)ch); + } + inputStream.close(); + + String seq = buffer.toString(); + + if (seq.startsWith(">")) { + int index = seq.indexOf("\n"); + return seq.substring(index, seq.length()).replace("\n", "").toUpperCase(); + } else { + return seq.replace("\n", "").toUpperCase(); + } + } + } + + private void align() { + fillMatrix(); + traceback(); +// System.out.println(getDiffs()); + } + + private void fillMatrix() { + //etutorials.org/Misc/blast/Part+II+Theory/Chapter+3.+Sequence+Alignment/3.2+Local+Alignment+Smith-Waterman/ + //Gotoh: http://www.cse.msu.edu/~cse891/Sect001/notes_alignment.pdf + //https://github.com/ekg/smithwaterman/blob/master/SmithWatermanGotoh.cpp + //http://cci.lbl.gov/cctbx_sources/mmtbx/alignment.py + // + //The dynamic programming algorithm was improved in performance by Gotoh (1982) by using the linear +// relationship for a gap weight wx = g + rx, where the weight for a gap of length x is the sum of a gap +// opening penalty (g) and a gap extension penalty (r) times the gap length (x), and by simplifying +// the dynamic programming algorithm. He reasoned that two of the terms that are maximized in the +// dynamic programming algorithm and designated here Pij and Qij depend only on the values in the +// current and previous row and column, as indicated below. + + initialize(); + + //storage for current calculations + float[] bestScores = new float[columns];//score if xi aligns to gap after yi + float[] queryGapScores = new float[columns];//best score of alignment x1..xi to y1..yi + + for (int i=0; i queryGapOpenScore) { + //add extend score + queryGapScores[column] = queryGapExtendScore; + //increase size of gap + int gapLength = verticalGaps[row-1][column] + 1; + verticalGaps[row][column] = (short) gapLength; + } else { + //add open score + queryGapScores[column] = queryGapOpenScore; + } + + //calculate horizontal gaps + referenceGapExtendScore = currentAnchorGapScore - gapExtend; + referenceGapOpenScore = bestScores[column-1] - gapOpen; + + if (referenceGapExtendScore > referenceGapOpenScore) { + //add extend score + currentAnchorGapScore = referenceGapExtendScore; + //increase size of gap + short gapLength = (short) (horizontalGaps[row][column-1] + 1); + horizontalGaps[row][column] = gapLength; + } else { + //add open score + currentAnchorGapScore = referenceGapOpenScore; + } + + //test scores + bestScoreDiagonal = bestScores[column]; + bestScores[column] = findMaximum(totalSimilarityScore, queryGapScores[column], currentAnchorGapScore); + + //determine trackback direction + float score = bestScores[column]; + if (score == 0) { + pointerMatrix[row][column] = STOP; + } else if (score == totalSimilarityScore) { + pointerMatrix[row][column] = DIAGONAL; + } else if (score == queryGapScores[column]) { + pointerMatrix[row][column] = UP; + } else { + pointerMatrix[row][column] = LEFT; + } + + //set current cell if this is the best score + if (score > bestScore) { + bestRow = row; + bestColumn = column; + bestScore = score; + } + } + } + } + + + private void initialize() { + pointerMatrix = new int[rows][columns]; + verticalGaps = new short[rows][columns]; + horizontalGaps = new short[rows][columns]; + for (int i=0; i chromosomes = new ArrayList(); +// private int exitStatus; +// private Map> vcfRecords = new HashMap>(); +// private Map> mafRecords = new HashMap>(); +// private Map> gffRecords = new HashMap>(); +// private Map> bedRecords = new HashMap>(); +// private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); +// private List overlappingMafRecords = new ArrayList(); +// private List notOverlappingMafRecords = new ArrayList(); +// private List overlappingVcfRecords = new ArrayList(); +// private List notOverlappingVcfRecords = new ArrayList(); +// private int overlapCount = 0; +// private int notOverlappingCount = 0; +// private int snpCount; +// private Vector header = new Vector(); +// private String inputSnpType; +// +// private static QLogger logger; +// +// public int engage() throws Exception { +// inputSnpType = null; +// if (cmdLineInputFiles[0].endsWith("maf")) { +// logger.info("MAF File: " + cmdLineInputFiles[0]); +// loadMafFile(); +// inputSnpType = "MAF"; +// if (mafRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from maf file"); +// } +// if (cmdLineInputFiles[0].endsWith("vcf")) { +// logger.info("VCF File: " + cmdLineInputFiles[0]); +// loadVCFFile(); +// inputSnpType = "VCF"; +// if (vcfRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from vcf file"); +// } +// if (cmdLineInputFiles[1].endsWith("bed")) { +// logger.info("BED File: " + cmdLineInputFiles[1]); +// } else if (cmdLineInputFiles[1].endsWith("gff3")) { +// logger.info("GFF3 File: " + cmdLineInputFiles[1]); +// } +// logger.info("Output file: " + cmdLineOutputFiles[0]); +// logger.info("Output file: " + cmdLineOutputFiles[1]); +// +// String fileType = null; +// if (cmdLineInputFiles[1].endsWith("bed")) { +// loadBedFile(); +// fileType = "bed"; +// } else if (cmdLineInputFiles[1].endsWith("gff3")) { +// fileType = "gff3"; +// loadGffFile(); +// } else { +// throw new IllegalArgumentException("File type for reference regions is not recognized. Must be bed or gff3"); +// } +// +// Collections.sort(chromosomes,COMPARATOR); +// +// writeHeader(); +// +// if (fileType.equals("bed")) { +// if (bedRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from bed file"); +// for (String c: chromosomes) { +// if (inputSnpType.equals("MAF")) { +// +// compareBedRecordsToMaf(c, bedRecords.get(c)); +// } +// if (inputSnpType.equals("VCF")) { +// compareBedRecordsToVcf(c, bedRecords.get(c)); +// } +// } +// } +// +// if (fileType.equals("gff3")) { +// if (gffRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); +// for (String c: chromosomes) { +// logger.info("Chromosome: " + c); +// if (inputSnpType.equals("MAF")) { +// compareGFFRecordsToMaf(c, gffRecords.get(c)); +// } +// if (inputSnpType.equals("VCF")) { +// compareGFFRecordsToVcf(c, gffRecords.get(c)); +// } +// } +// } +// +// logger.info("SUMMARY"); +// logger.info("Total Records in " +inputSnpType+ ": " + snpCount); +// logger.info("Total Records in supplied reference regions: " + overlapCount); +// logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); +// return exitStatus; +// } +// +// private void loadVCFFile() throws Exception { +// VCFFileReader reader = new VCFFileReader(new File(cmdLineInputFiles[0])); +// try { +// header = new Vector(); +// Iterator iterator = reader.getRecordIterator(); +// snpCount = 0; +// if (reader.getHeader() != null) { +// Iterator iter = reader.getHeader().iterator(); +// while (iter.hasNext()) { +// header.add(iter.next()); +// } +// } +// while (iterator.hasNext()) { +// +// VCFRecord vcfRec = iterator.next(); +// +// snpCount++; +// if (vcfRecords.containsKey(vcfRec.getChromosome())) { +// vcfRecords.get(vcfRec.getChromosome()).add(vcfRec); +// } else { +// List list = new ArrayList(); +// list.add(vcfRec); +// vcfRecords.put(vcfRec.getChromosome(),list); +// } +// if (!chromosomes.contains(vcfRec.getChromosome())) { +// chromosomes.add(vcfRec.getChromosome()); +// } +// } +// logger.info("loaded maf file, total records: " + snpCount); +// } finally { +// reader.close(); +// } +// } +// +// private void loadMafFile() throws Exception { +// TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); +// try { +// header = new Vector(); +// Iterator iterator = reader.getRecordIterator(); +// snpCount = 0; +// if (reader.getHeader() != null) { +// Iterator iter = reader.getHeader().iterator(); +// while (iter.hasNext()) { +// header.add(iter.next()); +// } +// } +// while (iterator.hasNext()) { +// +// TabbedRecord tab = iterator.next(); +// +// if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo")) { +// header.add(tab.getData()); +// continue; +// } +// snpCount++; +// MAFRecord mafRec = convertToMafRecord(tab.getData().split("\t")); +// mafRec.setData(tab.getData()); +// if (mafRecords.containsKey(mafRec.getChromosome())) { +// mafRecords.get(mafRec.getChromosome()).add(mafRec); +// } else { +// List list = new ArrayList(); +// list.add(mafRec); +// mafRecords.put(mafRec.getChromosome(),list); +// } +// if (!chromosomes.contains(mafRec.getChromosome())) { +// chromosomes.add(mafRec.getChromosome()); +// } +// } +// logger.info("loaded maf file, total records: " + snpCount); +// } finally { +// reader.close(); +// } +// } +// +// private void loadBedFile() throws IOException { +// BEDFileReader reader = new BEDFileReader(new File(cmdLineInputFiles[1])); +// try { +// int count = 0; +// for (BEDRecord record : reader) { +// count++; +// String chr = record.getChrom(); +// if (inputSnpType.equals("MAF")) { +// chr = record.getChrom().replace("chr", ""); +// } +// if (bedRecords.containsKey(chr)) { +// bedRecords.get(chr).put(record.getChromStart(), record); +// } else { +// TreeMap map = new TreeMap(); +// map.put(record.getChromStart(), record); +// bedRecords.put(chr,map); +// } +// } +// logger.info("loaded bed file, total record: " + count); +// } finally { +// reader.close(); +// } +// +// } +// +// private void loadGffFile() throws Exception { +// GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); +// try { +// int count = 0; +// for (GFF3Record record : reader) { +// count++; +// String chr = record.getSeqId(); +// if (inputSnpType.equals("MAF")) { +// chr = record.getSeqId().replace("chr", ""); +// } +// if (gffRecords.containsKey(chr)) { +// gffRecords.get(chr).put(record.getStart(), record); +// } else { +// TreeMap map = new TreeMap(); +// map.put(record.getStart(), record); +// gffRecords.put(chr,map); +// } +// } +// +// logger.info("loaded gff3 file, total record: " + count); +// } finally { +// reader.close(); +// } +// } +// +// public void compareBedRecordsToVcf(String chromosome, TreeMap map) throws IOException { +// List vcfList = vcfRecords.get(chromosome); +// +// //bed positions are zero based +// if (map != null) { +// +// for (VCFRecord snp : vcfList) { +// Entry floor = map.floorEntry(new Integer(snp.getPosition())); +// Entry ceiling = map.ceilingEntry(new Integer(snp.getPosition())); +// +// if (vcfRecordFallsInBEDRecord(snp, floor) || vcfRecordFallsInBEDRecord(snp, ceiling)) { +// overlapCount++; +// overlappingVcfRecords.add(snp); +// } else { +// notOverlappingCount++; +// notOverlappingVcfRecords.add(snp); +// if (notOverlappingCount % 10000 == 0) { +// logger.info("Processed records: " + notOverlappingCount); +// } +// } +// } +// } else { +// notOverlappingVcfRecords.addAll(vcfList); +// notOverlappingCount += vcfList.size(); +// } +// writeParsedVcfRecords(); +// } +// +// public void compareBedRecordsToMaf(String chromosome, TreeMap map) throws IOException { +// List mafList = mafRecords.get(chromosome); +// +// //bed positions are zero based +// if (map != null) { +// +// for (MAFRecord snp : mafList) { +// +// Entry floor = map.floorEntry(new Integer(snp.getStartPosition())); +// Entry ceiling = map.ceilingEntry(new Integer(snp.getStartPosition())); +// +// if (mafRecordFallsInBEDRecord(snp, floor) || mafRecordFallsInBEDRecord(snp, ceiling)) { +// overlapCount++; +// overlappingMafRecords.add(snp); +// } else { +// notOverlappingCount++; +// notOverlappingMafRecords.add(snp); +// if (notOverlappingCount % 10000 == 0) { +// logger.info("Processed records: " + notOverlappingCount); +// } +// } +// +// } +// } else { +// notOverlappingMafRecords.addAll(mafList); +// notOverlappingCount += mafList.size(); +// } +// writeParsedMafRecords(); +// } +// +// public void compareGFFRecordsToVcf(String chromosome, TreeMap map) throws IOException { +// List vcfList = vcfRecords.get(chromosome); +// +// if (map != null) { +// +// logger.info("List size: " + vcfList.size()); +// for (VCFRecord snp : vcfList) { +// Entry floor = map.floorEntry(new Integer(snp.getPosition())); +// Entry ceiling = map.ceilingEntry(new Integer(snp.getPosition())); +// +// if (vcfRecordFallsInGFF3Record(snp, floor) || vcfRecordFallsInGFF3Record(snp, ceiling)) { +// overlapCount++; +// overlappingVcfRecords.add(snp); +// } else { +// notOverlappingCount++; +// notOverlappingVcfRecords.add(snp); +// if (notOverlappingCount % 10000 == 0) { +// logger.info("Processed records: " + notOverlappingCount); +// } +// } +// } +// } else { +// notOverlappingVcfRecords.addAll(vcfList); +// notOverlappingCount += vcfList.size(); +// } +// writeParsedVcfRecords(); +// } +// +// public void compareGFFRecordsToMaf(String chromosome, TreeMap map) throws IOException { +// List mafList = mafRecords.get(chromosome); +// +// if (map != null) { +// +// for (MAFRecord snp : mafList) { +// +// Entry floor = map.floorEntry(new Integer(snp.getStartPosition())); +// Entry ceiling = map.ceilingEntry(new Integer(snp.getStartPosition())); +// +// if (mafRecordFallsInGFF3Record(snp, floor) || mafRecordFallsInGFF3Record(snp, ceiling)) { +// overlapCount++; +// overlappingMafRecords.add(snp); +// } else { +// notOverlappingCount++; +// notOverlappingMafRecords.add(snp); +// if (notOverlappingCount % 10000 == 0) { +// logger.info("Processed records: " + notOverlappingCount); +// } +// } +// } +// } else { +// notOverlappingMafRecords.addAll(mafList); +// notOverlappingCount += mafList.size(); +// } +// writeParsedMafRecords(); +// } +// +// +// private boolean mafRecordFallsInGFF3Record(MAFRecord snp, Entry entry) { +// if (entry != null) { +// if (snp.getStartPosition() >= entry.getValue().getStart() && snp.getStartPosition() <= entry.getValue().getEnd() || +// snp.getEndPosition() >= entry.getValue().getStart() && snp.getEndPosition() <= entry.getValue().getEnd()) { +// return true; +// } +// } +// return false; +// } +// +// private boolean mafRecordFallsInBEDRecord(MAFRecord snp, Entry entry) { +// if (entry != null) { +// if (snp.getStartPosition() >= entry.getValue().getChromStart()+1 && snp.getStartPosition() <= entry.getValue().getChromEnd() || +// snp.getEndPosition() >= entry.getValue().getChromStart()+1 && snp.getEndPosition() <= entry.getValue().getChromEnd()) { +// return true; +// } +// } +// return false; +// } +// +// private boolean vcfRecordFallsInGFF3Record(VCFRecord snp, Entry entry) { +// if (entry != null) { +// if (snp.getPosition() >= entry.getValue().getStart() && snp.getPosition() <= entry.getValue().getEnd()) { +// return true; +// } +// } +// return false; +// } +// +// private boolean vcfRecordFallsInBEDRecord(VCFRecord snp, Entry entry) { +// if (entry != null) { +// if (snp.getPosition() >= entry.getValue().getChromStart()+1 && snp.getPosition() <= entry.getValue().getChromEnd()) { +// return true; +// } +// } +// return false; +// } +// +// public String[] getCmdLineInputFiles() { +// return cmdLineInputFiles; +// } +// +// public void setCmdLineInputFiles(String[] cmdLineInputFiles) { +// this.cmdLineInputFiles = cmdLineInputFiles; +// } +// +// public String[] getCmdLineOutputFiles() { +// return cmdLineOutputFiles; +// } +// +// public void setCmdLineOutputFiles(String[] cmdLineOutputFiles) { +// this.cmdLineOutputFiles = cmdLineOutputFiles; +// } +// +// private void writeParsedMafRecords() throws IOException { +// writeMafRecordsToFile(cmdLineOutputFiles[0], overlappingMafRecords); +// writeMafRecordsToFile(cmdLineOutputFiles[1], notOverlappingMafRecords); +// } +// +// private void writeParsedVcfRecords() throws IOException { +// writeVcfRecordsToFile(cmdLineOutputFiles[0], overlappingVcfRecords); +// writeVcfRecordsToFile(cmdLineOutputFiles[1], notOverlappingVcfRecords); +// } +// +// private void writeHeader() throws IOException { +// writeHeader(cmdLineOutputFiles[0]); +// writeHeader(cmdLineOutputFiles[1]); +// } +// +// private void writeHeader(String fileName) throws IOException { +// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); +// +// for (String h: header) { +// writer.write(h + "\n"); +// } +// writer.close(); +// } +// +// private void writeMafRecordsToFile(String fileName, +// List outputRecords) throws IOException { +// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); +// +// for (MAFRecord r: outputRecords) { +// writer.write(r.getData() + "\n"); +// } +// +// writer.close(); +// outputRecords.clear(); +// } +// +// private void writeVcfRecordsToFile(String fileName, +// List outputRecords) throws IOException { +// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); +// +// for (VCFRecord r: outputRecords) { +// writer.write(r.toString() + "\n"); +// } +// +// writer.close(); +// outputRecords.clear(); +// } +// +// +// protected int setup(String args[]) throws Exception{ +// int returnStatus = 1; +// if (null == args || args.length == 0) { +// System.err.println(Messages.USAGE); +// System.exit(1); +// } +// Options options = new Options(args); +// +// if (options.hasHelpOption()) { +// System.err.println(Messages.USAGE); +// options.displayHelp(); +// returnStatus = 0; +// } else if (options.hasVersionOption()) { +// System.err.println(Messages.getVersionMessage()); +// returnStatus = 0; +// } else if (options.getInputFileNames().length < 1) { +// System.err.println(Messages.USAGE); +// } else if ( ! options.hasLogOption()) { +// System.err.println(Messages.USAGE); +// } else { +// // configure logging +// logFile = options.getLogFile(); +// logger = QLoggerFactory.getLogger(SnpToReferenceRegionFilter.class, logFile, options.getLogLevel()); +// logger.logInitialExecutionStats("SnpMafBedFileComparison", SnpToReferenceRegionFilter.class.getPackage().getImplementationVersion(), args); +// +// // get list of file names +// cmdLineInputFiles = options.getInputFileNames(); +// if (cmdLineInputFiles.length < 1) { +// throw new QMuleException("INSUFFICIENT_ARGUMENTS"); +// } else { +// // loop through supplied files - check they can be read +// for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { +// if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { +// throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); +// } +// } +// } +// +// // check supplied output files can be written to +// if (null != options.getOutputFileNames()) { +// cmdLineOutputFiles = options.getOutputFileNames(); +// for (String outputFile : cmdLineOutputFiles) { +// if ( ! FileUtils.canFileBeWrittenTo(outputFile)) +// throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); +// } +// } +// return engage(); +// } +// return returnStatus; +// } +// +// +// public static void main(String[] args) throws Exception { +// SnpToReferenceRegionFilter sp = new SnpToReferenceRegionFilter(); +// int exitStatus = sp.setup(args); +// if (null != logger) +// logger.logFinalExecutionStats(exitStatus); +// +// System.exit(exitStatus); +// } +// +// public static MAFRecord convertToMafRecord(String[] params) { +// MAFRecord maf = new MAFRecord(); +//// System.out.println(params[0]); +//// maf.setHugoSymbol(params[0]); +//// maf.setEntrezGeneId(params[1]); +//// maf.setCenter(params[2]); +//// maf.setNcbiBuild(Integer.parseInt(params[3])); +// maf.setChromosome(params[0]); +// maf.setStartPosition(Integer.parseInt(params[1])); +// maf.setEndPosition(Integer.parseInt(params[1])); +//// maf.setStrand(params[7].charAt(0)); +//// maf.setVariantClassification(params[8]); +//// maf.setVariantType(params[9]); +//// maf.setRef(params[10]); +//// maf.setTumourAllele1(params[11]); +//// maf.setTumourAllele2(params[12]); +//// maf.setDbSnpId(params[13]); +//// maf.setDbSnpValStatus(params[14]); +//// maf.setTumourSampleBarcode(params[15]); +//// maf.setNormalSampleBarcode(params[16]); +//// maf.setNormalAllele1(params[17]); +//// maf.setNormalAllele2(params[18]); +//// maf.setTumourValidationAllele1(params[19]); +//// maf.setTumourValidationAllele2(params[20]); +//// maf.setNormalValidationAllele1(params[21]); +//// maf.setNormalValidationAllele2(params[22]); +//// maf.setVerificationStatus(params[23]); +//// maf.setValidationStatus(params[24]); +//// maf.setMutationStatus(params[25]); +//// maf.setSequencingPhase(params[26]); +//// maf.setSequencingSource(params[27]); +//// maf.setValidationMethod(params[28]); +//// maf.setScore(params[29]); +//// maf.setBamFile(params[30]); +//// maf.setSequencer(params[31]); +//// // QCMG +//// if (params.length > 32) +//// maf.setFlag(params[32]); +//// if (params.length > 33) +//// maf.setNd(params[33]); +//// if (params.length > 34) +//// maf.setTd(params[34]); +//// if (params.length > 35) +//// maf.setCanonicalTranscriptId(params[35]); +//// if (params.length > 36) +//// maf.setCanonicalAAChange(params[36]); +//// if (params.length > 37) +//// maf.setCanonicalBaseChange(params[37]); +//// if (params.length > 38) +//// maf.setAlternateTranscriptId(params[38]); +//// if (params.length > 39) +//// maf.setAlternateAAChange(params[39]); +//// if (params.length > 40) +//// maf.setAlternateBaseChange(params[40]); +// +// return maf; +// } +// +// public List getChromosomes() { +// return chromosomes; +// } +// +// public void setChromosomes(List chromosomes) { +// this.chromosomes = chromosomes; +// } +// +// public Map> getMafRecords() { +// return mafRecords; +// } +// +// public void setMafRecords(Map> mafRecords) { +// this.mafRecords = mafRecords; +// } +// +// public List getOverlappingRecords() { +// return overlappingMafRecords; +// } +// +// public void setOverlappingRecords(List overlappingRecords) { +// this.overlappingMafRecords = overlappingRecords; +// } +// +// public List getNotOverlappingRecords() { +// return notOverlappingMafRecords; +// } +// +// public void setNotOverlappingRecords(List notOverlappingRecords) { +// this.notOverlappingMafRecords = notOverlappingRecords; +// } +// +// public int getOverlapCount() { +// return overlapCount; +// } +// +// public void setOverlapCount(int overlapCount) { +// this.overlapCount = overlapCount; +// } +// +// public int getNotOverlappingCount() { +// return notOverlappingCount; +// } +// +// public void setNotOverlappingCount(int notOverlappingCount) { +// this.notOverlappingCount = notOverlappingCount; +// } +// +// public int getMafCount() { +// return snpCount; +// } +// +// public void setMafCount(int mafCount) { +// this.snpCount = mafCount; +// } +// +// +// +//} diff --git a/qmule/src/org/qcmg/qmule/SubSample.java-- b/qmule/src/org/qcmg/qmule/SubSample.java-- new file mode 100644 index 000000000..63f71a737 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/SubSample.java-- @@ -0,0 +1,165 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.ArrayList; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.HeaderUtils; +import org.qcmg.picard.SAMFileReaderFactory; + +import htsjdk.samtools.*; + +public class SubSample { + SamReader reader; + SAMFileWriter writer; + double proportion; + QLogger logger; + + SubSample(Options op, QLogger log) throws Exception{ + + proportion = op.getPROPORTION(); + logger = log; + + String[] inputs =op.getInputFileNames(); + String[] outputs =op.getOutputFileNames(); + if(inputs.length == 0 || outputs.length == 0) + throw new Exception("please specify input/output"); + + //get initialized logger + File input = new File(inputs[0]); + File output = new File(outputs[0]); + if(!input.canRead()) + throw new Exception("unreadable input: " + input.getAbsolutePath()); + + reader = SAMFileReaderFactory.createSAMFileReader(input,ValidationStringency.LENIENT); + SAMFileHeader header = reader.getFileHeader(); + if(header.getSortOrder() != SAMFileHeader.SortOrder.queryname){ + throw new Exception("the input BAM is not sorted by queryname"); + } + SAMFileWriterFactory writeFactory = new SAMFileWriterFactory(); + HeaderUtils.addProgramRecord(header, op.getCommandLine(), null ); + + writer = writeFactory.makeSAMOrBAMWriter(header, false, output ); + + + } + + void run() throws Exception{ + int numPair = 0; + int numSingle = 0; + int numtotal = 0; + SAMRecordIterator ie = reader.iterator(); + ArrayList adjacents = new ArrayList(); + adjacents.add(ie.next()); + + while(ie.hasNext()){ + numtotal ++; + SAMRecord record = ie.next(); + + //select reads + if(! record.getReadName().equals(adjacents.get(0).getReadName())){ + //select pairs + if(adjacents.size() > 1) + numPair += selectPair( adjacents); + //select single + else if(Math.random() < proportion ){ + writer.addAlignment(adjacents.get(0)); + numSingle ++; + } + //after reporting clear the arraylist + adjacents.clear(); + } + adjacents.add(record); + + } + + //select last records + if(adjacents.size() > 1) + selectPair( adjacents); + else if(Math.random() < proportion ) + writer.addAlignment(adjacents.get(0)); + + reader.close(); + writer.close(); + + logger.info("total reads in input is " + numtotal); + logger.info("select paired reads is " + numPair); + logger.info("select single reads is " + numSingle); + logger.info("the rate of selected reads is "+ ((double)(numPair + numSingle)) / numtotal); + + } + + private int selectPair(ArrayList pairs) { + + if(pairs.size() == 0 ){ + logger.error("Program Error: select reads from empty arraylist! "); + return 0; + } + if(pairs.size() == 1 ){ + logger.error("program Error: single read in paired arraylist -- " + pairs.get(0).getReadName()); + return 0; + } + + int num = 0; + while(pairs.size() >= 2){ + //seek pair one by one + SAMRecord first = pairs.get(0); + SAMRecord mate = null; + pairs.remove(first); + + for(int i = 0; i < pairs.size(); i ++){ + if(first.getReadGroup().getId().equals(pairs.get(i).getReadGroup().getId())){ + mate = pairs.get(i); + pairs.remove(mate); + break; + } + } + + + if(Math.random() < proportion ){ + num ++; //number of selected paired reads + writer.addAlignment(first); + if(mate != null){ + num ++; + writer.addAlignment(mate); + }else{ + logger.error("paired reads missing mate -- " + pairs.get(0).getReadName()); + } + } + } + + return num; + } + + public static void main(String[] args) throws Exception{ + Options op = new Options(SubSample.class, args); + if(op.hasHelpOption()){ + System.out.println(Messages.getMessage("USAGE_SUBSAMPLE")); + op.displayHelp(); + System.exit(0); + } + + String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + QLogger logger = QLoggerFactory.getLogger(SubSample.class, op.getLogFile(), op.getLogLevel()); + try{ + logger.logInitialExecutionStats(SubSample.class.toString(), version, args); + logger.exec("Porportion " + op.getPROPORTION()); + SubSample mySample = new SubSample(op, logger); + mySample.run(); + logger.logFinalExecutionStats(0); + System.exit(0); + }catch(Exception e){ + System.err.println( e.getMessage() + e.toString()); + logger.logFinalExecutionStats(-1); + System.exit(1); + } + } + +} diff --git a/qmule/src/org/qcmg/qmule/TestFileFinder.java-- b/qmule/src/org/qcmg/qmule/TestFileFinder.java-- new file mode 100644 index 000000000..28da0aa08 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/TestFileFinder.java-- @@ -0,0 +1,23 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; + +import org.qcmg.common.util.FileUtils; + +public class TestFileFinder { + public static void main(String[] args) { + File [] files = FileUtils.findDirectories(args[0], "seq_final", true); + System.out.println("no of files: " + files.length); + for (File f : files) { + System.out.println("file found: " + f.getAbsolutePath()); + } +// File [] files = FileUtils.findFiles(args[0], "java", true); +// System.out.println("no of files: " + files.length); +// for (File f : files) { +// System.out.println("file found: " + f.getAbsolutePath()); +// } + } +} diff --git a/qmule/src/org/qcmg/qmule/TestJarUpdate.java-- b/qmule/src/org/qcmg/qmule/TestJarUpdate.java-- new file mode 100644 index 000000000..c1937f55a --- /dev/null +++ b/qmule/src/org/qcmg/qmule/TestJarUpdate.java-- @@ -0,0 +1,191 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMFileWriterFactory; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.picard.SAMFileReaderFactory; + +public class TestJarUpdate { + + private SAMFileWriter writer; + private SamReader reader; + + private void doWork() throws Exception{ + try { + + LoadReferencedClasses.loadClasses(getClass()); + +// URL className = getClass().getResource(TestJarUpdate.class.getName()); +// if (null != className) +// System.out.println("url: " + className.getFile()); +// else +// System.out.println("url: " + null); +// +// File jarFile = new File(TestJarUpdate.class.getProtectionDomain().getCodeSource().getLocation().toURI()); +// if (null != jarFile) +// System.out.println("jarFile: " + jarFile.getName()); +// else +// System.out.println("jarFile: " + null); +// +// System.out.println("is file type valid jar: " + FileUtils.isFileTypeValid(jarFile, "jar")); +// +// System.out.println("BEFORE: no of loaded packages: " + Package.getPackages().length); +// +// if (FileUtils.isFileTypeValid(jarFile, "jar")) { +// +// // got jar file - load and +// JarFile jf = new JarFile(jarFile); +// Attributes att = jf.getManifest().getMainAttributes(); +// System.out.println("att.size" + att.size()); +// String classpath = att.getValue("Class-Path"); +// System.out.println("classpath: " + classpath); +// +// String [] jars = classpath.split(" "); +// for (String jar : jars) { +// JarFile internalJarFile = new JarFile(jar); +// Enumeration enums = internalJarFile.entries(); +// while (enums.hasMoreElements()) { +// JarEntry je = enums.nextElement(); +// if (FileUtils.isFileTypeValid(je.getName(), "class")) { +// String blah = je.getName().replace(".class", ""); +// blah = blah.replaceAll(System.getProperty("file.separator"), "."); +// System.out.println("about to load class: " + blah); +// this.getClass().getClassLoader().loadClass(blah); +// } +// } +// } +// +// } +// +// System.out.println("AFTER: no of loaded packages: " + Package.getPackages().length); + + + // write to bam file + // sleep for a few mins to allow the sam jar file to be removed/replaced + // close bam file + // tinker with class loader + File inputFile = File.createTempFile("testJarUpdateInput", ".sam"); + inputFile.deleteOnExit(); + File outputFile = File.createTempFile("testJarUpdateOutput", ".bam"); +// outputFile.deleteOnExit(); + + createCoverageSam(inputFile); + + reader = SAMFileReaderFactory.createSAMFileReader(inputFile); + + SAMFileHeader header = reader.getFileHeader(); + List recs = new ArrayList(); + + for( SAMRecord rec : reader) { + recs.add(rec); + } + + + SAMFileWriterFactory factory = new SAMFileWriterFactory(); + + writer = factory.makeSAMOrBAMWriter(header, true, outputFile); + +// for (int i = 0 ; i < 100 ; i++) + for( SAMRecord rec : recs) { + for (int i = 0 ; i < 100 ; i++) + writer.addAlignment(rec); + } + + System.out.println("About to sleep!"); + System.gc(); + Thread.sleep(60000); + System.out.println("Am awake now"); + + close(); + System.out.println("DONE!!!"); + } finally { + System.out.println("about to run close quietly"); + closeQuietly(); + System.out.println("DONE!!! again"); + } + } + + + public static void main(String[] args) throws Exception { + TestJarUpdate tju = new TestJarUpdate(); + tju.doWork(); + } + + + private void close() throws Exception { + try { + writer.close(); + reader.close(); + } catch (Exception e) { + System.out.println("Exception caught in close(): "); +// e.printStackTrace(); + throw new Exception("CANNOT_CLOSE_FILES"); + } + } + + private void closeQuietly() { + try { + close(); + } catch (Exception e) { +// e.printStackTrace(); + } + } + + public static final void createCoverageSam(final File fileName) throws Exception { + + OutputStream os = new FileOutputStream(fileName); + PrintStream ps = new PrintStream(os); + + ps.println("@HD VN:1.0 SO:coordinate"); + ps.println("@RG ID:ZZ SM:ES DS:rl=50 "); + ps.println("@RG ID:ZZZ SM:ES DS:rl=50 "); + ps.println("@PG ID:SOLID-GffToSam VN:1.4.3"); + ps.println("@SQ SN:chr1 LN:249250621"); + ps.println("@SQ SN:chr2 LN:243199373"); + ps.println("@SQ SN:chr3 LN:198022430"); + ps.println("@SQ SN:chr4 LN:191154276"); + ps.println("@SQ SN:chr5 LN:180915260"); + ps.println("@SQ SN:chr6 LN:171115067"); + ps.println("@SQ SN:chr7 LN:159138663"); + ps.println("@SQ SN:chr8 LN:146364022"); + ps.println("@SQ SN:chr9 LN:141213431"); + ps.println("@SQ SN:chr10 LN:135534747"); + ps.println("@SQ SN:chr11 LN:135006516"); + ps.println("@SQ SN:chr12 LN:133851895"); + ps.println("@SQ SN:chr13 LN:115169878"); + ps.println("@SQ SN:chr14 LN:107349540"); + ps.println("@SQ SN:chr15 LN:102531392"); + ps.println("@SQ SN:chr16 LN:90354753"); + ps.println("@SQ SN:chr17 LN:81195210"); + ps.println("@SQ SN:chr18 LN:78077248"); + ps.println("@SQ SN:chr19 LN:59128983"); + ps.println("@SQ SN:chr20 LN:63025520"); + ps.println("@SQ SN:chr21 LN:48129895"); + ps.println("@SQ SN:chr22 LN:51304566"); + ps.println("@SQ SN:chrX LN:155270560"); + ps.println("@SQ SN:chrY LN:59373566"); + ps.println("@SQ SN:chrM LN:16571"); + ps.println("1290_738_1025 0 chr1 54026 255 45M5H * 0 0 AACATTCCAAAAGTCAACCATCCAAGTTTATTCTAAATAGATGTG !DDDDDDDDDDDDDDDD''DDDDDD9DDDDDDDDD:<3B''DDD! RG:Z:ZZ CS:Z:T301130201000212101113201021003302230033233111 CQ:Z:BBB=B:@5?>B9A5?>B?'A49<475%@;6<+;9@'4)+8'1?:>"); + ps.println("2333_755_492 16 chr2 10103 255 10H40M * 0 0 CACACCACACCCACACACCACACACCACACCCACACCCAC !=DD?%+DD<)=DDD<@9)9C:DA.:DD>%%,?+%;<-1"); + ps.println("1879_282_595 0 chr3 60775 255 40M10H * 0 0 TCTAAATTTGTTTGATCACATACTCCTTTTCTGGCTAACA !DD,*@DDD''DD>5:DD>;DDDD=CDD8%%DA9-DDC0! RG:Z:ZZ CS:Z:T0223303001200123211133122020003210323011 CQ:Z:=><=,*7685'970/'437(4<:54*:84%%;/3''?;)("); + ps.close(); + os.close(); + } +} diff --git a/qmule/src/org/qcmg/qmule/TestSort.java-- b/qmule/src/org/qcmg/qmule/TestSort.java-- new file mode 100644 index 000000000..cf9faddb6 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/TestSort.java-- @@ -0,0 +1,109 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.IOException; +import java.net.InetAddress; +import java.text.SimpleDateFormat; +import java.util.Calendar; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMFileWriterFactory; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.picard.SAMFileReaderFactory; + +public class TestSort { + private final File input; + private final File output; + private final int maxRecordsInRam; + private SAMFileHeader.SortOrder sort = SAMFileHeader.SortOrder.unsorted; + + TestSort(final String[] args) throws Exception{ + input = new File(args[0]); + output = new File(args[1]); + maxRecordsInRam = Integer.parseInt(args[2]); + + String sortOrder = args[3]; + if(sortOrder.equalsIgnoreCase("coordinate")) + sort = SAMFileHeader.SortOrder.coordinate; + else if(sortOrder.equalsIgnoreCase("queryname")) + sort = SAMFileHeader.SortOrder.queryname; + else if(! sortOrder.equalsIgnoreCase("unsorted")) + throw new Exception( sortOrder + " isn't valid SAMFileHeader sort order!"); + + System.out.println(getTime() + " host: " + InetAddress.getLocalHost().getHostName()); + System.out.println(getTime() + " input: " + input.getAbsolutePath()); + System.out.println(getTime() + " output: " + output.getAbsolutePath()); + System.out.println(getTime() + " sort order: " + sortOrder); + System.out.println(getTime() + " max Records In RAM: " + maxRecordsInRam); + } + + public void Sorting() throws Exception{ + SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); + SAMFileHeader header = reader.getFileHeader(); + + SAMFileWriterFactory writeFactory = new SAMFileWriterFactory(); + htsjdk.samtools.SAMFileWriterImpl.setDefaultMaxRecordsInRam(maxRecordsInRam ); + header.setSortOrder(sort); + if(sort.equals(SAMFileHeader.SortOrder.coordinate)) + writeFactory.setCreateIndex(true); + final SAMFileWriter writer = writeFactory.makeSAMOrBAMWriter(header, false, output); + + int num = 0; + for (SAMRecord record : reader) { + if(num % maxRecordsInRam == 0) + printRunInfo(num); + + writer.addAlignment(record); + num ++; + } + +// System.out.println(getTime() + " Merging tmp into output BAM, tmp location are " + htsjdk.samtools.util.IOUtil.getDefaultTmpDir()); + reader.close(); + writer.close(); + + System.out.println(getTime() + " created output: " + output.getAbsolutePath()); + } + + private void printRunInfo(int number) throws IOException{ + Runtime runtime = Runtime.getRuntime(); + int mb = 1024 * 1024; + long totalRAM = runtime.totalMemory() / mb; + long usedRAM = (runtime.totalMemory() - runtime.freeMemory()) / mb; + + String dateNow = getTime(); + + String info = String.format("%s read %d record. Total memeory: %dM, used memory: %dM", + dateNow, number, totalRAM, usedRAM); + + System.out.println(info); + } + + private String getTime(){ + Calendar currentDate = Calendar.getInstance(); + SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); + return "[" + formatter.format(currentDate.getTime()) + "]"; + } + + public static void main(final String[] args) { + try{ + TestSort mysort = new TestSort(args); + mysort.Sorting(); + System.exit(0); + }catch(Exception e){ + System.err.println("usage:qmule.TestSort [queryname/coordinate/unsorted]"); + System.err.println(e.toString()); + System.exit(1); + } + + + } +} diff --git a/qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- b/qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- new file mode 100644 index 000000000..c9b4f95f5 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- @@ -0,0 +1,192 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.PileupElement; +import org.qcmg.common.string.StringUtils; +import org.qcmg.common.util.Constants; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.PileupUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.picard.util.PileupElementUtil; +import org.qcmg.pileup.PileupFileReader; + +public class TranscriptomeMule { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + private final static int MIN_COVERAGE = 3; + // assuming all the tumours have been merged together, and we only have a single entry +// private static int[] tumourStartPositions = null; + private int[] tumourStartPositions = null; + + private final List positions = new ArrayList<>(100000); + + private static QLogger logger; + + public int engage() throws Exception { + logger.info("loading samtools mpileup data"); + walkPileup(cmdLineInputFiles[0]); + logger.info("loading samtools mpileup data - DONE [" + positions.size() + "]"); + + logger.info("outputting data"); + writeOutput(cmdLineOutputFiles[0]); + logger.info("outputting data - DONE"); + + return exitStatus; + } + + private void writeOutput(String outputFile) throws IOException { + FileWriter writer = new FileWriter(outputFile); + String header = "chr\tposition\tref\tpileup"; + + try { + writer.write(header + "\n"); + for (StringBuilder sb : positions) { + writer.write(sb.toString() + Constants.NEW_LINE); + } +// for (QSnpRecord record : positions) +// writer.write(record.getChromosome() + "\t" +// + record.getPosition() + "\t" +// + record.getRef() + "\t" +// + record.getTumourNucleotides() + "\n"); + } finally { + writer.close(); + } + } + + private void parsePileup(String record) { +// private void parsePileup(PileupRecord record) { + String[] params = TabTokenizer.tokenize(record); +// String[] params = tabbedPattern.split(record.getPileup(), -1); + if (null == tumourStartPositions) { + // set up the number of tumour start positions + // dependent on the number of columns in the input + // HUGE assumption that the mpileup data only contains tumour data here... + + //TODO is this right? + // first 3 columns are chr pos ref + int noOfSamples = (params.length -3) /3; + tumourStartPositions = new int[noOfSamples]; + for (int i = 0 ; i < noOfSamples ; i++) { + tumourStartPositions[i] = (i+1) * 3; + } + } + + // get coverage for both normal and tumour + int tumourCoverage = PileupUtils.getCoverageCount(params, tumourStartPositions); + if (tumourCoverage < MIN_COVERAGE) return; + + String tumourBases = PileupUtils.getBases(params, tumourStartPositions); + + // means there is an indel at this position - ignore + if (tumourBases.contains("+") || tumourBases.contains("-")) return; + String tumourBaseQualities = PileupUtils.getQualities(params, tumourStartPositions); + + // get bases as PileupElement collections + List tumourBaseCounts = PileupElementUtil.getPileupCounts(tumourBases, tumourBaseQualities); + + // get variant count for both + int tumourVariantCount = PileupElementUtil.getLargestVariantCount(tumourBaseCounts); + + if (tumourVariantCount >= 3) { + // keeper + StringBuilder sb = new StringBuilder(params[0]); + StringUtils.updateStringBuilder(sb, params[1], Constants.TAB); + StringUtils.updateStringBuilder(sb, params[2], Constants.TAB); + StringUtils.updateStringBuilder(sb, PileupElementUtil.getOABS(tumourBaseCounts, params[2].charAt(0)), Constants.TAB); + +// QSnpRecord rec = new QSnpRecord(params[0], Integer.parseInt(params[1]), params[2]); +// rec.setTumourOABS(PileupElementUtil.getOABS(tumourBaseCounts, rec.getRef().charAt(0))); + positions.add(sb); + } + + } + + private void walkPileup(String pileupFileName) throws Exception { + PileupFileReader reader = new PileupFileReader(new File(pileupFileName)); + int count = 0; + try { + for (String record : reader) { +// for (PileupRecord record : reader) { + parsePileup(record); + if (++count % 1000000 == 0) + logger.info("hit " + count + " pileup records, with " + positions.size() + " keepers."); + } + } finally { + reader.close(); + } + } + + public static void main(String[] args) throws Exception { + TranscriptomeMule sp = new TranscriptomeMule(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(TranscriptomeMule.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("Example", TranscriptomeMule.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileup.java b/qmule/src/org/qcmg/qmule/WiggleFromPileup.java index 9346b834f..222727290 100644 --- a/qmule/src/org/qcmg/qmule/WiggleFromPileup.java +++ b/qmule/src/org/qcmg/qmule/WiggleFromPileup.java @@ -21,9 +21,9 @@ import org.qcmg.common.model.ReferenceNameComparator; import org.qcmg.common.util.FileUtils; import org.qcmg.common.util.PileupUtils; -import org.qcmg.gff3.GFF3FileReader; -import org.qcmg.gff3.GFF3Record; -import org.qcmg.gff3.GFF3RecordChromosomeAndPositionComparator; +import org.qcmg.qmule.gff3.GFF3FileReader; +import org.qcmg.qmule.gff3.GFF3Record; +import org.qcmg.qmule.gff3.GFF3RecordChromosomeAndPositionComparator; import org.qcmg.pileup.PileupFileReader; public class WiggleFromPileup { diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- b/qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- new file mode 100644 index 000000000..222727290 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- @@ -0,0 +1,302 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Pattern; +import java.util.zip.GZIPOutputStream; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ReferenceNameComparator; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.PileupUtils; +import org.qcmg.qmule.gff3.GFF3FileReader; +import org.qcmg.qmule.gff3.GFF3Record; +import org.qcmg.qmule.gff3.GFF3RecordChromosomeAndPositionComparator; +import org.qcmg.pileup.PileupFileReader; + +public class WiggleFromPileup { + + private final static Pattern tabbedPattern = Pattern.compile("[\\t]"); + private boolean compressOutput; + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + private String pileupFormat; + private int normalCoverage, tumourCoverage; + private int noOfNormalFiles, noOfTumourFiles; + private long covGood, covBad, totalCov; + private int[] normalStartPositions, tumourStartPositions; + private String currentChromosome = "chr1"; + + private int lastPosition; + + private final List gffs = new ArrayList(); + + private static GFF3Record gffRecord; + private static Iterator iter; + + private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); + private final static GFF3RecordChromosomeAndPositionComparator CHR_POS_COMP = new GFF3RecordChromosomeAndPositionComparator(); + + + private static QLogger logger; + + public int engage() throws Exception { + + // setup + initialise(); + + loadGffFile(); + + Collections.sort(gffs, CHR_POS_COMP); + + if (gffs.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); + + // parse pileup file + parsePileup(); + + logger.info("bases with enough coverage: " + covGood + ", those with not enough coverage: " + covBad + ", total: " + totalCov); + + return exitStatus; + } + + private void loadGffFile() throws Exception { + GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); + try { + int totalNoOfbaits = 0, ignoredBaits = 0; + for (GFF3Record record : reader) { + totalNoOfbaits++; + if (isGff3RecordBait(record.getType())) { + gffs.add(record); + } else ignoredBaits++; + } + + logger.info("loaded gff3 file, total no of baits: " + totalNoOfbaits + ", entries in collection: " + gffs.size() + ", entries that didn't make it: " + ignoredBaits); + } finally { + reader.close(); + } + } + + protected static boolean isGff3RecordBait(String type) { + return "exon".equals(type); + } +// protected static boolean isGff3RecordBait(String type) { +// return "bait_1_100".equals(type) +// || "bait".equals(type) +// || "highbait".equals(type) +// || "lowbait".equals(type); +// } + + private void initialise() { + noOfNormalFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'N'); + noOfTumourFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'T'); + normalStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, true); + tumourStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, false); + +// logger.info("start positions: " + Arrays.deepToString(normalStartPositions) + ", " + Arrays.deepToString(tumourStartPositions)); + } + + private void parsePileup() throws Exception { + Writer writer = getWriter(cmdLineOutputFiles[0]); + + iter = gffs.iterator(); + if (iter.hasNext()) { + setGffRecord(iter.next()); + } else { + throw new RuntimeException("Unable to set next Gff record"); + } + + PileupFileReader reader = new PileupFileReader(new File(cmdLineInputFiles[0])); + StringBuilder sb = new StringBuilder(); + try { + for (String pr : reader) { +// for (PileupRecord pr : reader) { + addWiggleData(pr, sb); +// addWiggleData(tabbedPattern.split(pr.getPileup(), -1), sb); + if (++totalCov % 100000 == 0 && sb.length() > 0) { + writer.write(sb.toString()); + sb = new StringBuilder(); + + if (totalCov % 10000000 == 0) + logger.info("hit " + totalCov + " pileup records"); + } + } + + // empty contents of StringBuilder to writer + if (sb.length() > 0) writer.write(sb.toString()); + + } finally { + writer.close(); + reader.close(); + } + } + + private Writer getWriter(String fileName) throws IOException { + Writer writer = null; + if (compressOutput) { + writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(fileName))); + } else { + writer = new FileWriter(new File(fileName)); + } + return writer; + } + + protected static boolean isPositionInBait(String chromosome, int position, Iterator iter, GFF3Record currentRecord) { + + if (chromosome.equals(currentRecord.getSeqId())) { + + if (position < currentRecord.getStart()) { + return false; + } else if (position <= currentRecord.getEnd()) { + return true; + } else { + return advanceGff3Record(chromosome, position, iter); + } + } else if (COMPARATOR.compare(chromosome, currentRecord.getSeqId()) < 0) { + // pileup position is in lower chromosome than gffRecord + return false; + } else { + // pileup position is in higher chromosome than gffRecord + // advance iterator + return advanceGff3Record(chromosome, position, iter); + } + } + + private static boolean advanceGff3Record(String chromosome, int position, + Iterator iter) { + if ( ! iter.hasNext()) { + // no more entries in gffs + return false; + } else { + setGffRecord(iter.next()); + return isPositionInBait(chromosome, position, iter, getGffRecord()); + } + } + + private void addWiggleData(String paramString, StringBuilder sb) { + int firstTabIndex = paramString.indexOf('\t'); + String chromosome = paramString.substring(0, firstTabIndex); + int position = Integer.parseInt(paramString.substring(firstTabIndex+1, paramString.indexOf('\t', firstTabIndex+1))); + + if ( ! isPositionInBait(chromosome, position, iter, getGffRecord())) return; +// if ( ! isPositionInBait(chromosome, position)) return; + + if (position != lastPosition +1 || ! currentChromosome.equalsIgnoreCase(chromosome)) { + // add new header to the StringBuilder + String wiggleHeader = "fixedStep chrom=" + chromosome + " start=" + position + " step=1\n"; + sb.append(wiggleHeader); + + // update last position and current chromosome + currentChromosome = chromosome; + } + lastPosition = position; + String [] params = tabbedPattern.split(paramString, -1); + + if (PileupUtils.getCoverageCount(params, normalStartPositions) < normalCoverage) { + sb.append("0\n"); + ++covBad; + } else { + if (PileupUtils.getCoverageCount(params, tumourStartPositions) >= tumourCoverage) { + sb.append("1\n"); + ++covGood; + } else { + sb.append("0\n"); + ++covBad; + } + } + } + + public static void main(String[] args) throws Exception { + WiggleFromPileup sp = new WiggleFromPileup(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(WiggleFromPileup.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("WiggleFromPileup", WiggleFromPileup.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + // get app specific options + pileupFormat = options.getPileupFormat(); + normalCoverage = options.getNormalCoverage(); + tumourCoverage = options.getTumourCoverage(); + compressOutput = FileUtils.isFileNameGZip(new File(cmdLineOutputFiles[0])); + + if (null == pileupFormat) throw new IllegalArgumentException("Please specify a pileupFormat relating to the pileup file (eg. NNTT)"); + if (normalCoverage == 0) throw new IllegalArgumentException("Please specify a normal coverage value (eg. 20)"); + if (tumourCoverage == 0) throw new IllegalArgumentException("Please specify a tumour coverage value (eg. 20)"); + + logger.tool("about to run with pileupFormat: " + pileupFormat + ", normal cov: " + normalCoverage + ", tumour cov: " + tumourCoverage + ", compressOutput: " + compressOutput); + + return engage(); + } + return returnStatus; + } + + protected static void setGffRecord(GFF3Record gffRecord) { + WiggleFromPileup.gffRecord = gffRecord; + } + + protected static GFF3Record getGffRecord() { + return gffRecord; + } +} diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java b/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java index 369af5c63..36c6a7a8f 100644 --- a/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java +++ b/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java @@ -22,8 +22,8 @@ import org.qcmg.common.util.LoadReferencedClasses; import org.qcmg.common.util.PileupUtils; import org.qcmg.common.util.TabTokenizer; -import org.qcmg.gff3.GFF3FileReader; -import org.qcmg.gff3.GFF3Record; +import org.qcmg.qmule.gff3.GFF3FileReader; +import org.qcmg.qmule.gff3.GFF3Record; import org.qcmg.pileup.PileupFileReader; public class WiggleFromPileupTakeTwo { diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- b/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- new file mode 100644 index 000000000..36c6a7a8f --- /dev/null +++ b/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- @@ -0,0 +1,307 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.zip.GZIPOutputStream; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.PositionRange; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.common.util.PileupUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.qmule.gff3.GFF3FileReader; +import org.qcmg.qmule.gff3.GFF3Record; +import org.qcmg.pileup.PileupFileReader; + +public class WiggleFromPileupTakeTwo { + + private static QLogger logger; +// private final static Pattern tabbedPattern = Pattern.compile("[\\t]"); +// private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); + + private boolean compressOutput; + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + private String pileupFormat; + private int normalCoverage, tumourCoverage; + private int noOfNormalFiles, noOfTumourFiles; + private long covGood, covBad, totalCov; + private int[] normalStartPositions, tumourStartPositions; + private String currentChromosome; + private String[] gffRegions; + private PriorityQueue currentQueue; + private PositionRange currentRange; + private int lastPosition; + private final Map> regionsOfInterest = new HashMap>(); + + + public int engage() throws Exception { + // setup + initialise(); + + loadGffFile(); + + logger.info("no of entries in regionsOfInterest: " + regionsOfInterest.size()); + + long baseCount = 0; + for (PriorityQueue ranges : regionsOfInterest.values()) { + for (PositionRange pr : ranges) { + baseCount += (pr.getEnd() - pr.getStart()); + } + } + logger.info("total no of bases covered by gff regions of interest: " + baseCount); + + + if (regionsOfInterest.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); + + // parse pileup file + parsePileup(); + + logger.info("bases with enough coverage: " + covGood + ", those with not enough coverage: " + covBad + ", total: " + totalCov); + + return exitStatus; + } + + private void loadGffFile() throws Exception { + GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); + try { + int totalNoOfbaits = 0, ignoredBaits = 0; + for (GFF3Record record : reader) { + totalNoOfbaits++; + if (isGff3RecordCorrectType(record.getType())) { + populateRegionsOfInterest(record); + } else ignoredBaits++; + } + + logger.info("loaded gff3 file, total no of baits: " + totalNoOfbaits + ", entries in collection: " + (totalNoOfbaits - ignoredBaits) + ", entries that didn't make it: " + ignoredBaits); + } finally { + reader.close(); + } + } + + private void populateRegionsOfInterest(GFF3Record record) { + // get collection corresponding to chromosome + PriorityQueue ranges = regionsOfInterest.get(record.getSeqId()); + if (null == ranges) { + ranges = new PriorityQueue(); + ranges.add(new PositionRange(record.getStart(), record.getEnd())); + regionsOfInterest.put(record.getSeqId(), ranges); + } else { + // loop through PositionRanges and see if any are adjacent + // not very efficient, but will do for now + boolean rangeExtended = false; + for (PositionRange pr : ranges) { + if (pr.isAdjacentToEnd(record.getStart())) { + pr.extendRange(record.getEnd()); + rangeExtended = true; + break; + } + } + if ( ! rangeExtended) { + // add new PositionRange + ranges.add(new PositionRange(record.getStart(), record.getEnd())); + } + } + } + + protected boolean isGff3RecordCorrectType(String type) { + for (String regionName : gffRegions) { + if (type.equals(regionName)) return true; + } + return false; + } + + private void initialise() { + noOfNormalFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'N'); + noOfTumourFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'T'); + normalStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, true); + tumourStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, false); + } + + private void parsePileup() throws Exception { + Writer writer = getWriter(cmdLineOutputFiles[0]); + + PileupFileReader reader = new PileupFileReader(new File(cmdLineInputFiles[0])); + StringBuilder sb = new StringBuilder(); + try { + for (String pr : reader) { +// for (PileupRecord pr : reader) { + addWiggleData(pr, sb); +// addWiggleData(tabbedPattern.split(pr.getPileup(), -1), sb); + if (++totalCov % 100000 == 0 && sb.length() > 0) { + writer.write(sb.toString()); + sb = new StringBuilder(); + + if (totalCov % 10000000 == 0) + logger.info("hit " + totalCov + " pileup records"); + } + } + + // empty contents of StringBuilder to writer + if (sb.length() > 0) writer.write(sb.toString()); + + } finally { + writer.close(); + reader.close(); + } + } + + private Writer getWriter(String fileName) throws IOException { + Writer writer = null; + if (compressOutput) { + writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(fileName))); + } else { + writer = new FileWriter(new File(fileName)); + } + return writer; + } + + protected boolean isPositionInRegionOfInterest(int position, PriorityQueue ranges) { + if (null == currentRange) return false; + + if (position < currentRange.getStart()) { + return false; + } else if (position <= currentRange.getEnd()) { + return true; + } else { + // advance queue + currentRange = ranges.poll(); + return isPositionInRegionOfInterest(position, ranges); + } + } + + private void addWiggleData(String paramString, StringBuilder sb) { + int firstTabIndex = paramString.indexOf('\t'); + String chromosome = paramString.substring(0, firstTabIndex); + int position = Integer.parseInt(paramString.substring(firstTabIndex+1, paramString.indexOf('\t', firstTabIndex+1))); + boolean chromosomeUpdated = false; + if ( ! chromosome.equalsIgnoreCase(currentChromosome)) { + // update last position and current chromosome + currentChromosome = chromosome; + chromosomeUpdated = true; + currentQueue = regionsOfInterest.get(chromosome); + if (null == currentQueue) { + logger.warn("no ranges found for chr: " + chromosome); + currentRange = null; + } else { + currentRange = currentQueue.poll(); + } + } + + if ( ! isPositionInRegionOfInterest(position, currentQueue)) return; + + if (position != lastPosition +1 || chromosomeUpdated) { + String wiggleHeader = "fixedStep chrom=" + chromosome + " start=" + position + " step=1\n"; + sb.append(wiggleHeader); + } + lastPosition = position; + String [] params = TabTokenizer.tokenize(paramString); +// String [] params = tabbedPattern.split(paramString, -1); + + if (PileupUtils.getCoverageCount(params, normalStartPositions) < normalCoverage) { + sb.append("0\n"); + ++covBad; + } else { + if (PileupUtils.getCoverageCount(params, tumourStartPositions) >= tumourCoverage) { + sb.append("1\n"); + ++covGood; + } else { + sb.append("0\n"); + ++covBad; + } + } + } + + public static void main(String[] args) throws Exception { + LoadReferencedClasses.loadClasses(WiggleFromPileupTakeTwo.class); + WiggleFromPileupTakeTwo sp = new WiggleFromPileupTakeTwo(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(WiggleFromPileupTakeTwo.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("WiggleFromPileup", WiggleFromPileupTakeTwo.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + // get app specific options + pileupFormat = options.getPileupFormat(); + normalCoverage = options.getNormalCoverage(); + tumourCoverage = options.getTumourCoverage(); + compressOutput = FileUtils.isFileNameGZip(new File(cmdLineOutputFiles[0])); + gffRegions = options.getGffRegions(); + + + if (null == pileupFormat) throw new IllegalArgumentException("Please specify a pileupFormat relating to the pileup file (eg. NNTT)"); + if (normalCoverage == 0) throw new IllegalArgumentException("Please specify a normal coverage value (eg. 20)"); + if (tumourCoverage == 0) throw new IllegalArgumentException("Please specify a tumour coverage value (eg. 20)"); + if (gffRegions.length == 0) throw new IllegalArgumentException("Please specify the region names within the gff3 file you are interested in"); + + logger.tool("about to run with pileupFormat: " + pileupFormat + ", normal cov: " + normalCoverage + ", tumour cov: " + tumourCoverage + ", compressOutput: " + compressOutput + ", gff regions: " + Arrays.deepToString(gffRegions)); + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/XCvsZP.java-- b/qmule/src/org/qcmg/qmule/XCvsZP.java-- new file mode 100644 index 000000000..e7973ade9 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/XCvsZP.java-- @@ -0,0 +1,117 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.lang.Math; + +import org.qcmg.picard.SAMFileReaderFactory; + +import htsjdk.samtools.BAMIndex; +import htsjdk.samtools.BAMIndexMetaData; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; + +public class XCvsZP { + + + XCvsZP(File input) throws Exception{ + SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); //new SAMFileReader(input); + + HashMap matric = countToMatric( reader ); + + ArrayList keys = getKeys(matric ); + printMatric(matric, keys); + + reader.close(); + + } + + ArrayList getKeys( HashMap matric ){ + Set myset = new HashSet(); + + Iterator itr = matric.keySet().iterator(); + while( itr.hasNext()){ + String key = itr.next().toString(); + String[] zpxc = key.split("_"); + myset.add(zpxc[0]); + myset.add(zpxc[1]); + } + ArrayList mylist = new ArrayList(myset); + Collections.sort(mylist); + + + return mylist; + } + + + void printMatric( HashMap matric, ArrayList keys ){ + System.out.print("\t\tZP \t(reads_Number/total_number)\n"); + System.out.print("-------------------------------------------------------------------------------------------------------------------------------------------------\n XC\t|" ); + for(int i = 0; i < keys.size(); i ++) + System.out.print( "\t " + keys.get(i) + " "); + + for(int i = 0; i < keys.size(); i ++){ + System.out.print( "\n\t|" + keys.get(i) + "|\t"); + for(int j = 0; j < keys.size(); j ++){ + String xc_zp = keys.get(i) + "_" + keys.get(j); + if(matric.containsKey(xc_zp)) + System.out.print(String.format("%.4f\t", matric.get(xc_zp)) ); + else + System.out.print("-----\t"); + } + } + } + + + HashMap countToMatric( SamReader reader) throws Exception{ + + HashMap matric = new HashMap(); + HashMap rateMatric = new HashMap(); + + long numRead = 0; + for( SAMRecord record : reader){ + String xc = record.getAttribute("XC").toString(); + String zp = record.getAttribute("ZP").toString(); + String key = xc + "_" + zp; + + long value = 1; + if( matric.containsKey(key)) + value = matric.get(key) + 1; + + matric.put(key, value); + numRead ++; + } + + System.out.println("Total number of reads is " + numRead + "\n"); + + //convert to float with %.4f formart + for(Map.Entry set: matric.entrySet()){ + String key = set.getKey(); + int value = Math.round((set.getValue() * 10000 )/ numRead ); + rateMatric.put(key, ((float) value/10000 )); + } + + return rateMatric; + } + + + + public static void main(String[] args) throws Exception{ + + XCvsZP vs = new XCvsZP(new File(args[0]) ); + + } +} diff --git a/qmule/src/org/qcmg/qmule/bam/CheckBam.java-- b/qmule/src/org/qcmg/qmule/bam/CheckBam.java-- new file mode 100644 index 000000000..3154595c1 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/bam/CheckBam.java-- @@ -0,0 +1,339 @@ +/** + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. +*/ +package org.qcmg.qmule.bam; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordIterator; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.SamReader; + +import java.io.File; +import java.util.AbstractQueue; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicLongArray; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.qmule.GetBamRecords; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; + + +public class CheckBam { + + private final static String UNMAPPED_READS = "Unmapped"; + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private File bamFIle; + private int numberOfThreads = 1; + + + private static final int READ_PAIRED_FLAG = 0x1; + private static final int PROPER_PAIR_FLAG = 0x2; + private static final int READ_UNMAPPED_FLAG = 0x4; + private static final int MATE_UNMAPPED_FLAG = 0x8; + private static final int READ_STRAND_FLAG = 0x10; + private static final int MATE_STRAND_FLAG = 0x20; + private static final int FIRST_OF_PAIR_FLAG = 0x40; + private static final int SECOND_OF_PAIR_FLAG = 0x80; + private static final int NOT_PRIMARY_ALIGNMENT_FLAG = 0x100; + private static final int READ_FAILS_VENDOR_QUALITY_CHECK_FLAG = 0x200; + private static final int DUPLICATE_READ_FLAG = 0x400; + private static final int SUPPLEMENTARY_ALIGNMENT_FLAG = 0x800; + + + private int exitStatus; + private static QLogger logger; + + private final AtomicLong counter = new AtomicLong(); + +// long [] flagCounter = new long[5000]; + AtomicLongArray flags = new AtomicLongArray(5000); + + + public int engage() throws Exception { + + logger.info("Get reference contigs from bam header"); + bamFIle = new File(cmdLineInputFiles[0]); + + final AbstractQueue sequences = new ConcurrentLinkedQueue(); + + try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { + if ( ! reader.hasIndex() && numberOfThreads > 1) { + logger.warn("Using 1 producer thread - no index found for bam file: " + bamFIle.getAbsolutePath()); + numberOfThreads = 1; + } + + SAMFileHeader header = reader.getFileHeader(); + List samSequences = header.getSequenceDictionary().getSequences(); + List orderedSamSequences = new ArrayList(); + orderedSamSequences.addAll(samSequences); + Collections.sort(orderedSamSequences, new Comparator(){ + @Override + public int compare(SAMSequenceRecord o1, SAMSequenceRecord o2) { + return o2.getSequenceLength() - o1.getSequenceLength(); + } + }); + // add the unmapped reads marker + sequences.add(UNMAPPED_READS); + + for (SAMSequenceRecord rec : orderedSamSequences) { + sequences.add(rec.getSequenceName()); + } + } + + + logger.info("will create " + numberOfThreads + " threads"); + + final CountDownLatch pLatch = new CountDownLatch(numberOfThreads); +// setpup and kick-off single Producer thread + ExecutorService producerThreads = Executors.newFixedThreadPool(numberOfThreads); + if (1 == numberOfThreads) { + producerThreads.execute(new SingleProducer(Thread.currentThread(), pLatch)); + } else { + for (int i = 0 ; i < numberOfThreads ; i++) { + producerThreads.execute(new Producer(Thread.currentThread(), pLatch, sequences)); + } + } + + // don't allow any new threads to start + producerThreads.shutdown(); + + logger.info("waiting for Producer thread to finish"); + pLatch.await(); + logger.info("Producer thread finished, counter size: " + counter.longValue()); + // output flag stats too + long dups = 0; + long sups = 0; + long mapped = 0; + long paired = 0; + long properPair = 0; + long r1 = 0; + long r2 = 0; + for (int i = 0 ; i < flags.length() ; i++) { + long l = flags.get(i); + if (l > 0) { + + if ((i & READ_PAIRED_FLAG) != 0) { + paired += l; + } + if ((i & PROPER_PAIR_FLAG) != 0) { + properPair += l; + } + if ((i & READ_UNMAPPED_FLAG) == 0) { + mapped += l; + } + if ((i & FIRST_OF_PAIR_FLAG) != 0) { + r1 += l; + } + if ((i & SECOND_OF_PAIR_FLAG) != 0) { + r2 += l; + } + if ((i & DUPLICATE_READ_FLAG) != 0) { + dups += l; + } + if ((i & SUPPLEMENTARY_ALIGNMENT_FLAG) != 0) { + sups += l; + } + logger.info("flag: " + i + " : " + l + " hits"); + } + } + logger.info("total read count: " + counter.longValue()); + logger.info("dups: " + dups + " (" + (((double) dups / counter.longValue()) * 100) + "%)"); + logger.info("sups: " + sups + " (" + (((double) sups / counter.longValue()) * 100) + "%)"); + logger.info("mapped: " + mapped + " (" + (((double) mapped / counter.longValue()) * 100) + "%)"); + logger.info("paired: " + paired + " (" + (((double) paired / counter.longValue()) * 100) + "%)"); + logger.info("properPair: " + properPair + " (" + (((double)properPair / counter.longValue()) * 100) + "%)"); + logger.info("r1: " + r1 + " (" + (((double) r1 / counter.longValue()) * 100) + "%)"); + logger.info("r2: " + r2 + " (" + (((double) r2 / counter.longValue()) * 100) + "%)"); + + return exitStatus; + } + + + + public class Producer implements Runnable { + private final Thread mainThread; + private final CountDownLatch pLatch; + private final AbstractQueue sequences; + private final QLogger log = QLoggerFactory.getLogger(Producer.class); + + private final long [] flagCounter = new long[5000]; + + Producer(Thread mainThread, CountDownLatch pLatch, AbstractQueue sequences) { + this.mainThread = mainThread; + this.pLatch = pLatch; + this.sequences = sequences; + } + + @Override + public void run() { + log.debug("Start Producer "); + + long count = 0; + + try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { + + while (true) { + String sequence = sequences.poll(); + if (null == sequence) break; + SAMRecordIterator iter = UNMAPPED_READS.equals(sequence) ? reader.queryUnmapped() : reader.query(sequence, 0, 0, false) ; + log.info("retrieving records for sequence: " + sequence); + while (iter.hasNext()) { + int flag = iter.next().getFlags(); + flagCounter[flag] ++ ; + // update count for this flag + if (++count % 2000000 == 0) { + log.info("added " + count/1000000 + "M"); + } + } + iter.close(); + } + + } catch (Exception e) { + log.error(Thread.currentThread().getName() + " " + e.getMessage(), e); + mainThread.interrupt(); + } finally { + pLatch.countDown(); + } + // update the shared counter + counter.addAndGet(count); + //update the flag Counter + int i = 0 ; + for (long l : flagCounter) { + if (l > 0) { + flags.addAndGet(i, l); + } + i++; + } + } + } + + public class SingleProducer implements Runnable { + private final Thread mainThread; + private final QLogger log = QLoggerFactory.getLogger(SingleProducer.class); + private final CountDownLatch pLatch; + private final long [] flagCounter = new long[5000]; + + SingleProducer(Thread mainThread, CountDownLatch pLatch) { + this.mainThread = mainThread; + this.pLatch = pLatch; + } + + @Override + public void run() { + log.debug("Start SingleProducer "); + + long count = 0; + + try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { + + for (SAMRecord r : reader) { + int flag = r.getFlags(); + flagCounter[flag] ++ ; + if (++count % 2000000 == 0) { + log.info("added " + count/1000000 + "M"); + } + } + + } catch (Exception e) { + log.error(Thread.currentThread().getName() + " " + e.getMessage(), e); + mainThread.interrupt(); + } finally { + pLatch.countDown(); + } + // update the shared counter + counter.addAndGet(count); + //update the flag Counter + int i = 0 ; + for (long l : flagCounter) { + if (l > 0) { + flags.addAndGet(i, l); + } + i++; + } + } + } + + public static void main(String[] args) throws Exception { + CheckBam sp = new CheckBam(); + int exitStatus = sp.setup(args); + if (null != logger) { + logger.logFinalExecutionStats(exitStatus); + } + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(GetBamRecords.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CheckBam", CheckBam.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + int nt = options.getNumberOfThreads(); + if (nt > 0) { + numberOfThreads = nt; + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- b/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- new file mode 100644 index 000000000..b480f21f4 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- @@ -0,0 +1,127 @@ +package org.qcmg.qmule.bam; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; + +import org.qcmg.common.log.QLogger; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; + +public class GetContigsFromHeader { + + private static QLogger logger; + + private int setup(String [] args) throws IOException { + /* + * first arg should be the header, + * second arg (if present) should be how many times the genome should be diviied up + */ + + SamReaderFactory factory = SamReaderFactory.make(); + SamReader reader = factory.open(new File(args[0])); + SAMFileHeader header = reader.getFileHeader(); + + SAMSequenceDictionary dict = header.getSequenceDictionary(); + Map map = dict.getSequences().stream().collect(Collectors.groupingBy(SAMSequenceRecord::getSequenceName, Collectors.summingInt(SAMSequenceRecord::getSequenceLength))); + + + + if (args.length > 1 && null != args[1]) { + int numberOfContigs = map.keySet().size(); + long length = map.values().stream().mapToLong(Integer::longValue).sum(); + int numberOfEntries = Integer.parseInt(args[1]) - 1; + + long noOFBasesPerEntry = length / numberOfEntries; + + System.out.println("genome length: " + length + ", numberOfEntries: " + numberOfEntries + ", noOFBasesPerEntry: " + noOFBasesPerEntry + ", numberOfContigs: " + numberOfContigs); + + + Map results = new HashMap<>(); + Set contigs = new HashSet<>(); + + List sortedContigs = map.entrySet().stream().sorted((e1, e2) -> e2.getValue() - e1.getValue()).map(e -> e.getKey()).collect(Collectors.toList()); + + + for (String contig : sortedContigs) { + System.out.println("looking at contig: " + contig); + Integer contigLength = map.get(contig); + if ( ! contigs.contains(contig)) { + if (contigLength >= noOFBasesPerEntry) { + results.put(contig, contigLength); + contigs.add(contig); + } else { + AtomicLong basesToMakeUp = new AtomicLong(noOFBasesPerEntry - contigLength); +// long basesToMakeUp = noOFBasesPerEntry - e.getValue(); + StringBuilder key = new StringBuilder(); + key.append(contig); + contigs.add(contig); + while (basesToMakeUp.longValue() > 1000000) { + Optional> e1 = map.entrySet().stream().filter(en -> ! contigs.contains(en.getKey())).filter(en -> en.getValue() < basesToMakeUp.longValue()).max((en1, en2) -> en2.getValue() - en1.getValue()); + if (e1.isPresent()) { + key.append(" -L "); + key.append(e1.get().getKey()); + basesToMakeUp.addAndGet( - e1.get().getValue()); + contigs.add(e1.get().getKey()); + } else { + break; + } + } + results.put(key.toString(), (int)noOFBasesPerEntry - basesToMakeUp.intValue()); + } + } + } + + results.forEach((k,v) -> System.out.println("contigs: " + k + ", size: " + v)); + System.out.println("contigs.size(): " + contigs.size()); + + /* + * write file + */ + if (args.length > 2 && null != args[2]) { + try (Writer writer = new FileWriter(args[2]);) { + + /* + * sort according to number of bases + */ + results.entrySet().stream().sorted((e1, e2) -> e2.getValue() - e1.getValue()).forEach(e -> { + try { + writer.write(e.getKey() + "\n"); + } catch (IOException e3) { + // TODO Auto-generated catch block + e3.printStackTrace(); + } + }); + } + } + } + + return 0; + } + + public static void main(String[] args) throws Exception { + GetContigsFromHeader sp = new GetContigsFromHeader(); + int exitStatus = sp.setup(args); + if (null != logger) { + logger.logFinalExecutionStats(exitStatus); + } + + System.exit(exitStatus); + } + +} + diff --git a/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- b/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- new file mode 100644 index 000000000..707c4726d --- /dev/null +++ b/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- @@ -0,0 +1,226 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.qcnv; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import htsjdk.samtools.*; + +import java.lang.Math; +import java.io.*; + +import org.qcmg.picard.SAMFileReaderFactory; + + +public class CNVseq { + + private static final boolean String = false; + //in cnv-seq.pl it call below R to get value + //echo 'options(digits=16);qnorm(1-0.5*0.001)' | R --vanilla --slave (result: 3.290526731491926) + public static double bt = 3.290526731491926; + //$echo 'options(digits=16);qnorm(0.5*0.001)' | R --vanilla --slave (result: -3.290526731491894) + public static double st = -3.290526731491894; + + public static double pvalue = 0.001; + public static int min_windoe = 4; + public static double log2 = 0.6; + public static double bigger = 1.5; + //public static int debug = 0; + //public static String Rexe = "R"; + + private final Map refSeq; + + private final long genomeSize ; + private final long numTest; + private final long numRef; + + private final double biglog2_window; + private final double smalog2_window; + private final int windowSize; + + private final File ftest; + private final File fref; + + + /** + * it caculate the window size based on genome size, TEST and REF BAM records number + * @param test: File of TEST BAM + * @param ref: File of reference BAM + * @throws Exception + */ + CNVseq(File test, File ref, int window ) throws Exception { + //open file + SamReader rtest = SAMFileReaderFactory.createSAMFileReader(test );//new SAMFileReader(test); + SamReader rref = SAMFileReaderFactory.createSAMFileReader(ref );//new SAMFileReader(ref); + + //check whether index file exist or not + if(!rtest.hasIndex()){ + throw new Exception("can't find index for: " + test.getName()); + } + if(!rref.hasIndex()){ + throw new Exception("can't find index for: " + ref.getName()); + } + ftest = test; + fref = ref; + + //check SAM header + SAMFileHeader htest = rtest.getFileHeader(); + SAMFileHeader href = rref.getFileHeader(); + + //get sequence information from both inputs + Map seqTest = new HashMap (); + Map seqRef = new HashMap (); + + List genome = htest.getSequenceDictionary().getSequences(); + for(SAMSequenceRecord re : genome){ + seqTest.put(re.getSequenceName(),re.getSequenceLength()); + } + + genome = href.getSequenceDictionary().getSequences(); + for(SAMSequenceRecord re : genome){ + seqRef.put(re.getSequenceName(),re.getSequenceLength()); + } + + // check both @SQ line are same or not + if(seqRef.size() != seqTest.size()){ + throw new Exception("the sequence size are different between two inputs: \n" + ftest.getName() + "\n" + fref.getName() ); + } + + for (String key : seqTest.keySet()){ + //first convert Integer to int + int l1 = seqTest.get(key); + int l2 = seqRef.get(key); + if(l1 != l2){ + throw new Exception("the sequence size of " + key + " are different between two inputs : \n" + ftest.getName() + "\n" + fref.getName() ); + } + } + + // assign one of the identical reference info into the hash map + refSeq = seqTest; + + //caculate the genome size based on the identail reference + long size = 0; + for(String key : refSeq.keySet()){ size += refSeq.get(key); } + genomeSize = size; +//-debug +//genomeSize = 3253037807L; + + //count mapped record number based on index file + BAMIndex tIndex = rtest.indexing().getIndex(); + BAMIndex rIndex = rref.indexing().getIndex(); + BAMIndexMetaData meta; + int tMapped = 0; + int rMapped = 0; + for(int i = 0; i < seqRef.size(); i ++ ){ + meta = tIndex.getMetaData(i); + tMapped += meta.getAlignedRecordCount(); + meta = rIndex.getMetaData(i); + rMapped += meta.getAlignedRecordCount(); + } + numTest = tMapped; + numRef = rMapped; + + //close files + rtest.close(); + rref.close(); + + //caculate window size + double brp = Math.pow(2, log2); + double srp = 1.0 / brp; + + + biglog2_window = (numTest * Math.pow(brp, 2) + numRef) * genomeSize * Math.pow(bt, 2) / ( Math.pow((1- brp),2 ) * numTest * numRef); + smalog2_window = (numTest * Math.pow(srp, 2) + numRef) * genomeSize * Math.pow(st, 2) / ( Math.pow((1- srp),2 ) * numTest * numRef); + if(window == 0 ){ + windowSize = (int) (Math.max(biglog2_window, smalog2_window) * bigger) ; + }else{ + windowSize = window; + } + + } + + /** + * it create an Iterator and query on each window; finally it close the iterator + * @param f: SAMFileReader + * @param chr: genoeme name + * @param start: window start postion + * @param end: window end position + * @return the totoal number of records mapped overlapped on this window region + */ + int exeQuery (SamReader reader, String chr, int start, int end){ + + SAMRecordIterator block_ite = reader.queryOverlapping(chr, start, end); + int num = 0; + while(block_ite.hasNext()){ + num ++; + block_ite.next(); + } + + block_ite.close(); + + return num; + } + + /** + * + * @return total SAM records number in Test input file + */ + long getTestReadsNumber(){return numTest;} + + /** + * + * @return total SAM records number in Ref input file + */ + long getRefReadsNumber(){return numRef;} + + /** + * + * @return a hash table list each sequence reference name and length + */ + Map getrefseq(){return refSeq;} + + /** + * + * @return return the minimum window size for detecting log2>=0.6 + */ + double getpositivelog2window(){ return biglog2_window;} + + /** + * + * @return The minimum window size for detecting log2<=-0.6 + */ + double getnegativelog2window(){return smalog2_window;} + + /** + * + * @return The window size to use is max(100138.993801, 66550.928197) * 1.500000 + */ + int getWindowSize(){ return windowSize; } + + /** + * + * @return the total length of reference sequence listed on BAM @SQ lines + */ + long getGenomeSize( ){ return genomeSize;} + + /** + * + * @return the Test File with File type + */ + File getTestFile(){return ftest;} + + /** + * + * @return the Ref File with File type + */ + File getRefFile(){return fref;} + +} diff --git a/qmule/src/org/qcmg/qmule/qcnv/Main.java-- b/qmule/src/org/qcmg/qmule/qcnv/Main.java-- new file mode 100644 index 000000000..41b681505 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/qcnv/Main.java-- @@ -0,0 +1,57 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.qcnv; + +import org.qcmg.common.log.*; +import htsjdk.samtools.*; +import java.util.*; +import java.util.Map.Entry; +import java.io.*; + +public class Main { + + public static void main(String[] args) throws Exception{ + //check arguments + Options options = new Options( args); + if(! options.commandCheck()){ System.exit(1); } + + QLogger logger = options.getLogger(args); + try{ + File ftest = new File(options.getIO("test")); + File fref = new File(options.getIO("ref")); + CNVseq cnvseq = new CNVseq(ftest, fref, options.getWindowSize()); + + logger.info("genome size used for calculation is " + cnvseq.getGenomeSize()); + logger.info(ftest.getName() + "contains records number: " + cnvseq.getTestReadsNumber()); + logger.info(fref.getName() + "contains records number: " + cnvseq.getRefReadsNumber()); + if(options.getWindowSize() == 0){ + logger.info("The minimum window size for detecting log2>=" + CNVseq.log2 +" should be " + cnvseq.getpositivelog2window()); + logger.info("The minimum window size for detecting log2<=-" + CNVseq.log2 +" should be " + cnvseq.getnegativelog2window()); + logger.info(String.format("The window size to use is max(%f, %f) * %f = %d", + cnvseq.getpositivelog2window(),cnvseq.getnegativelog2window(), CNVseq.bigger, cnvseq.getWindowSize())); + }else{ + logger.info("The window size used in this run is " + options.getWindowSize()); + } + + //count reads number in each window and output + MtCNVSeq cnvThread = new MtCNVSeq(cnvseq, new File(options.getIO("output")), options.getThreadNumber(), options.getTmpDir()); + cnvThread.cnvCount(logger); + + logger.logFinalExecutionStats(0); + System.exit(0); + }catch(Exception e){ + logger.error(e.toString()); + logger.logFinalExecutionStats(1); + System.err.println(e.toString()); + System.exit(1); + } + } + + + + +} diff --git a/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- b/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- new file mode 100644 index 000000000..b8bdbfcf5 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- @@ -0,0 +1,152 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.qcnv; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.File; +import java.io.IOException; + +import java.util.HashMap; +import java.util.Map; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecordIterator; +import htsjdk.samtools.ValidationStringency; + +import org.qcmg.common.log.*; +import org.qcmg.common.util.Constants; +import org.qcmg.picard.SAMFileReaderFactory; + + +public class MtCNVSeq { + + final CNVseq cnvseq; + final File Output; + final int noOfThreads; + final File tmpPath; + + MtCNVSeq(CNVseq cnvseq, File output, int noOfThreads, File tmpdir) throws IOException{ + this.cnvseq = cnvseq; + this.Output = output; + this.noOfThreads = noOfThreads; + if(tmpdir == null) + tmpPath = File.createTempFile( "qcnv", "", Output.getParentFile()); + else + tmpPath = File.createTempFile( "qcnv", "",tmpdir); + } + /** + * it call threads, parallel the BAMFileReader.query for single genome + * @param logger: an instance of QLogger + * @throws IOException + * @throws InterruptedException + */ + void cnvCount(QLogger logger) throws IOException, InterruptedException{ + + Map refseq = cnvseq.getrefseq(); + Map tmpoutput = new HashMap(); + ExecutorService queryThreads = Executors.newFixedThreadPool(noOfThreads); + + logger.debug("start parallel query based on genome file name"); + + + if(!(tmpPath.delete())) + throw new IOException("Could not delete tmp file: " + tmpPath.getAbsolutePath()); + if(! tmpPath.mkdirs()) + throw new IOException("Could not create tmp directory: " + tmpPath.getAbsolutePath()); + + //parallel query by genomes and output to tmp files + for ( Map.Entry chr : refseq.entrySet()){ + File tmp = File.createTempFile(chr.getKey(), ".count", tmpPath); + tmpoutput.put(chr.getKey(), tmp); + queryThreads.execute(new ExeQuery(cnvseq,chr, tmp)); + } + //wait threads finish + queryThreads.shutdown(); + queryThreads.awaitTermination(Constants.EXECUTOR_SERVICE_AWAIT_TERMINATION, TimeUnit.HOURS); + queryThreads.shutdownNow(); + logger.debug("completed parallel query based on genome file name"); + + + //collect outputs from tmp files into + logger.debug("starting collect each genome counts into final output"); + FileWriter writer = new FileWriter(Output); + writer.write("reference\tstart\tend\ttest\tref\n"); + for( Map.Entry tmp : tmpoutput.entrySet()){ + BufferedReader input = new BufferedReader(new FileReader(tmp.getValue())); + String line = null; + while((line = input.readLine()) != null){ + writer.write(line + "\n"); + } + input.close(); + tmp.getValue().deleteOnExit(); + } + tmpPath.delete(); + writer.close(); + logger.debug("created final output"); + } + + /** + * query on Test BAM and Ref BAM records which mapped to specified gemoem + * @author q.xu + * + */ + public static class ExeQuery implements Runnable { + CNVseq cnvseq; + File Output; + File Test; + File Ref; + QLogger logger; + int chrSize; + int winSize; + String chrName; + + ExeQuery(CNVseq cnvseq, Map.Entry chr,File tmp) { + Output = tmp; + Test = cnvseq.getTestFile(); + Ref = cnvseq.getRefFile(); + chrSize = chr.getValue(); + chrName = chr.getKey(); + winSize = cnvseq.getWindowSize(); + this.cnvseq = cnvseq; + } + + public void run() { + try { + FileWriter writer = new FileWriter(Output); + SamReader rTest = SAMFileReaderFactory.createSAMFileReader(Test,ValidationStringency.SILENT); + SamReader rRef = SAMFileReaderFactory.createSAMFileReader(Ref,ValidationStringency.SILENT); + + int win_num = chrSize / winSize + 1; + + for (int i = 0; i < win_num; i++){ + int start = i * winSize + 1; + int end = (i + 1 ) * winSize; + int num_test = cnvseq.exeQuery(rTest, chrName, start, end); + int num_ref = cnvseq.exeQuery(rRef, chrName, start, end); + writer.write(String.format("%s\t%d\t%d\t%d\t%d\n", chrName, start, end, num_test, num_ref )); + } + + rRef.close(); + writer.close(); + rTest.close(); + + } catch (Exception e) { + System.out.println(Thread.currentThread().getName() + " " + + e.getMessage()); + Thread.currentThread().interrupt(); + } + + } + } + +} diff --git a/qmule/src/org/qcmg/qmule/qcnv/Options.java-- b/qmule/src/org/qcmg/qmule/qcnv/Options.java-- new file mode 100644 index 000000000..3f4dc850b --- /dev/null +++ b/qmule/src/org/qcmg/qmule/qcnv/Options.java-- @@ -0,0 +1,169 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.qcnv; + + +import java.io.File; +import java.util.List; + +import joptsimple.OptionParser; +import joptsimple.OptionSet; + +import org.qcmg.qmule.Messages; +import org.qcmg.common.log.*; + +public class Options { + private static final String HELP_DESCRIPTION = Messages.getMessage("HELP_OPTION_DESCRIPTION"); + private static final String VERSION_DESCRIPTION = Messages.getMessage("VERSION_OPTION_DESCRIPTION"); + private static final String LOG_DESCRIPTION = Messages.getMessage("LOG_OPTION_DESCRIPTION"); + private static final String LOGLEVEL_DESCRIPTION = Messages.getMessage("LOGLEVEL_OPTION_DESCRIPTION"); + + private static final String OUTPUT_DESCRIPTION = Messages.getMessage("OUTPUT_OPTION_DESCRIPTION"); + private static final String TEST_DESCRIPTION = Messages.getMessage("TEST_OPTION_DESCRIPTION"); + private static final String REF_DESCRIPTION = Messages.getMessage("REF_OPTION_DESCRIPTION"); + private static final String THREAD_DESCRIPTION = Messages.getMessage("THREAD_OPTION_DESCRIPTION"); + private static final String WINDOW_DESCRIPTION = Messages.getMessage("WINDOW_SIZE_DESCRIPTION"); + private static final String TMPDIR_DESCRIPTION = Messages.getMessage("TMPDIR_DESCRIPTION"); + private final OptionParser parser = new OptionParser(); + private final OptionSet options; + + final static int DEFAULT_THREAD = 2; + final String commandLine; + final String USAGE = Messages.getMessage("USAGE_QCNV"); + final String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + + public Options( final String[] args) throws Exception { + parser.accepts("output", OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); + parser.accepts("ref", REF_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("Normal BAM"); + parser.accepts("test", TEST_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("Tumor BAM"); + parser.accepts("thread", THREAD_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("thread number"); + parser.accepts("window", WINDOW_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("window size"); + parser.accepts("tmpdir", TMPDIR_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("window size"); + + + parser.accepts("log", LOG_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("logfile"); + parser.accepts("loglevel", LOGLEVEL_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("loglevel"); + parser.accepts("version", VERSION_DESCRIPTION); + parser.accepts("help", HELP_DESCRIPTION); + + options = parser.parse(args); + commandLine = Messages.reconstructCommandLine(args); + } + + //IO parameters + String getIO(String io) throws Exception{ + + int size = options.valuesOf(io).size(); + if( size > 1){ + throw new Exception("multiple "+ io + " files specified" ); + } + else if( size < 1 ){ + throw new Exception(" missing or invalid IO option specified: " + io ); + } + + return options.valueOf(io).toString(); + } + + File getTmpDir() throws Exception{ + if(options.has("tmpdir")) + return new File (options.valueOf("tmpdir").toString()); + + + + return null; + + } + + int getThreadNumber(){ + + if(options.has("thread")){ + return Integer.parseInt((String) options.valueOf("thread")); + } + + return DEFAULT_THREAD; + } + + int getWindowSize(){ + + if(options.has("window")){ + return Integer.parseInt((String) options.valueOf("window")); + } + + return 0; + } + + QLogger getLogger(String[] args) throws Exception{ + + // configure logging + QLogger logger; + String logLevel = (String) options.valueOf("loglevel"); + String logFile; + if(options.has("log")){ + logFile = options.valueOf("log").toString(); + } + else{ + logFile = options.valueOf("output") + ".log"; + } + + logger = QLoggerFactory.getLogger( Main.class, logFile,logLevel); + logger.logInitialExecutionStats(Main.class.toString(), version, args); + return logger; + } + + boolean hasHelp() throws Exception{ + if(options.has("h") || options.has("help")){ + System.out.println(USAGE); + System.out.println(HELP_DESCRIPTION); + parser.printHelpOn(System.err); + return true; + } + return false; + } + + boolean hasVersion()throws Exception{ + if(options.has("v") || options.has("version")){ + System.out.println(VERSION_DESCRIPTION); + System.err.println(version); + return true; + } + return false; + } + + boolean commandCheck() throws Exception{ + //quit system after provide help or version info + if( hasHelp() || hasVersion() ){ + System.exit(0); + } + + + if (options.nonOptionArguments().size() > 0) { + List nonoptions = (List) options.nonOptionArguments(); + + for(String str : nonoptions){ + System.err.println("INVALID OPTION: " + str); + } + return false; + } + + if(getIO("ref") == null || getIO("test") == null){ + System.err.println("Missing ref or test option"); + return false; + } + if( getIO("ref").equals(getIO("output"))){ + System.err.println(Messages.getMessage("SAME_FILES", "ref", "output")); + return false; + } + if(options.has("thread")){ + int thread = Integer.parseInt((String) options.valueOf("thread")); + if(thread < 1){ + System.err.println("THREAD NUMBER MUST GREATER THAN ONE: " + options.valueOf("thread") ); + } + } + + return true; + } +} diff --git a/qmule/src/org/qcmg/qmule/queryChrMT.java-- b/qmule/src/org/qcmg/qmule/queryChrMT.java-- new file mode 100644 index 000000000..d9dcad3ff --- /dev/null +++ b/qmule/src/org/qcmg/qmule/queryChrMT.java-- @@ -0,0 +1,68 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; + +import htsjdk.samtools.*; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.ValidationStringency; +import htsjdk.samtools.SAMRecordIterator; + +import java.io.*; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; + +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.SAMOrBAMWriterFactory; + + +public class queryChrMT { + + public static void main(final String[] args) throws IOException, InterruptedException { + + try{ + + File inBAM = new File(args[0]); + String outputName = inBAM.getName().replace(".bam", ".chrMT.primary.bam"); + File output = new File(args[1], outputName); + + SamReader reader = SAMFileReaderFactory.createSAMFileReader(inBAM,ValidationStringency.SILENT); + SAMFileHeader he = reader.getFileHeader().clone(); + SAMOrBAMWriterFactory writeFactory = new SAMOrBAMWriterFactory(he , true, output); + SAMRecordIterator ite = reader.query("chrMT",0, 16569, false); + + SAMRecord record; + while(ite.hasNext()){ + record = ite.next(); + if(!record.getNotPrimaryAlignmentFlag()) + writeFactory.getWriter().addAlignment(record ); + + } + writeFactory.closeWriter(); + reader.close(); + + System.exit(0); + }catch(Exception e){ + System.err.println(e.toString()); + Thread.sleep(1); + System.out.println("usage: qmule org.qcmg.qmule.queryChrMT "); + System.exit(1); + } + + } + + +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java b/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java index aef93721d..e405206bd 100644 --- a/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java +++ b/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java @@ -23,7 +23,7 @@ import org.qcmg.qmule.QMuleException; import org.qcmg.qmule.util.IGVBatchFileGenerator; import org.qcmg.qmule.util.TabbedDataLoader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qmule.tab.TabbedRecord; public class CompareSnps { diff --git a/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- new file mode 100644 index 000000000..e405206bd --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- @@ -0,0 +1,205 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map.Entry; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.SnpUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.qmule.util.IGVBatchFileGenerator; +import org.qcmg.qmule.util.TabbedDataLoader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class CompareSnps { + + private final ConcurrentMap firstSnpMap = new ConcurrentHashMap(30000); //not expecting more than 100000 + private final ConcurrentMap secondSnpMap = new ConcurrentHashMap(30000); + private final List firstList = new ArrayList(); + private final List secondList = new ArrayList(); +// private final ConcurrentMap uniqueTumourVCFMap = new ConcurrentHashMap(40000); + + private static QLogger logger; + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + public int engage() throws Exception { + + logger.info("loading snp data from file: " + cmdLineInputFiles[0]); + TabbedDataLoader.loadTabbedData(cmdLineInputFiles[0], -2, firstSnpMap); + logger.info("loading snp data - DONE [" + firstSnpMap.size() + "]"); + logger.info("loading snp data from file: " + cmdLineInputFiles[1]); + TabbedDataLoader.loadTabbedData(cmdLineInputFiles[1], -2, secondSnpMap); + logger.info("loading snp data - DONE [" + secondSnpMap.size() + "]"); + + compare(); + + outputIGVBatchFiles(); + +// addPileupFromNormalBam(); + + return exitStatus; + } + + private void outputIGVBatchFiles() throws IOException { + IGVBatchFileGenerator.generate(firstList, cmdLineOutputFiles[0]); + IGVBatchFileGenerator.generate(secondList, cmdLineOutputFiles[1]); + } + + protected void compare() { + + // total counts + int firstMapCount = 0, secondMapCount = 0; + // count of snps unique to each input + int uniqueToFirstMap = 0, uniqueToSecondMap = 0; + int commonSnps = 0, commonAndAlsoClassABStopNonSynon = 0; + + // loop through first set + for (Entry entry : firstSnpMap.entrySet()) { + + TabbedRecord firstRecord = entry.getValue(); + + if (isClassAB(firstRecord, -1) && isStopNonSynonymous(firstRecord, 22)) { + firstMapCount++; + + TabbedRecord secondRecord = secondSnpMap.get(entry.getKey()); + if (null == secondRecord || ! (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22))) { + uniqueToFirstMap++; + firstList.add(entry.getKey()); + logger.info("Unique to first: " + entry.getKey().getChromosome() + ":" + entry.getKey().getStartPosition()); + } else { + commonSnps++; +// if (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22)) { +// commonAndAlsoClassABStopNonSynon++; +// } + } + } + + } + + // loop through second set + for (Entry entry : secondSnpMap.entrySet()) { + + TabbedRecord secondRecord = entry.getValue(); + + if (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22)) { + secondMapCount++; + + TabbedRecord firstRecord = firstSnpMap.get(entry.getKey()); + if (null == firstRecord || ! (isClassAB(firstRecord, -1) && isStopNonSynonymous(firstRecord, 22))) { + uniqueToSecondMap++; + secondList.add(entry.getKey()); + logger.info("Unique to second: " + entry.getKey().getChromosome() + ":" + entry.getKey().getStartPosition()); +// logger.info("IGV: " + entry.getValue().getData()); + } + } + } + + logger.info("SUMMARY:"); + logger.info("firstMapCount: " + firstMapCount); + logger.info("secondMapCount: " + secondMapCount); + logger.info("uniqueToFirstMap: " + uniqueToFirstMap); + logger.info("uniqueToSecondMap: " + uniqueToSecondMap); + logger.info("commonSnps: " + commonSnps); +// logger.info("commonAndAlsoClassABStopNonSynon: " + commonAndAlsoClassABStopNonSynon); + + } + + + + protected static boolean isClassAB(TabbedRecord record, int index) { + if (null == record || null == record.getData()) throw new IllegalArgumentException("null or empty Tabbed record"); + String [] params = TabbedDataLoader.tabbedPattern.split(record.getData()); + String qcmgFlag = TabbedDataLoader.getStringFromArray(params, index); + + return SnpUtils.isClassAorB(qcmgFlag); +// return "--".equals(qcmgFlag) || "less than 12 reads coverage in normal".equals(qcmgFlag) +// || "less than 3 reads coverage in normal".equals(qcmgFlag); + + } + + protected static boolean isStopNonSynonymous(TabbedRecord record, int index) { + if (null == record || null == record.getData()) throw new IllegalArgumentException("null or empty Tabbed record"); + String [] params = TabbedDataLoader.tabbedPattern.split(record.getData()); +// String consequenceType = params[index]; + String consequenceType = TabbedDataLoader.getStringFromArray(params, index); + + return consequenceType.contains("STOP") || consequenceType.contains("NON_SYNONYMOUS"); + } + + + + public static void main(String[] args) throws Exception { + CompareSnps sp = new CompareSnps(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(CompareSnps.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CompareSnps", CompareSnps.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- new file mode 100644 index 000000000..322cbd5d1 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- @@ -0,0 +1,237 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.VcfUtils; +import org.qcmg.pileup.QPileupFileReader; +import org.qcmg.pileup.QSnpRecord; +import org.qcmg.pileup.VerifiedSnpFileReader; +import org.qcmg.pileup.VerifiedSnpRecord; +import org.qcmg.vcf.VCFFileReader; + +public class ExamineVerifiedSnps { + + private static final QLogger logger = QLoggerFactory.getLogger(ExamineVerifiedSnps.class); + + private static Map pileup = new HashMap<>(80000); + private static Map vcfRecords = new HashMap<>(80000); + private static Map verifiedSNPs = new HashMap<>(250); + + public static void main(String[] args) throws Exception { + logger.info("hello..."); + + String filename = args[0]; + boolean runQPileup = true; + // filename type depends on whether to load qpileup or vcf + if (FileUtils.isFileTypeValid(filename, "vcf")) { + runQPileup = false; + } + + loadVerifiedSnps(args[1]); + logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); + + if (runQPileup) { + // load the existing pileup into memory + logger.info("running in pileup mode"); + loadQPileup(args[0]); + logger.info("loaded " + pileup.size() + " entries into the pileup map"); + examine(args[2]); + } else { + logger.info("running in vcf mode"); + loadGATKData(args[0]); + logger.info("loaded " + vcfRecords.size() + " entries into the vcf map"); + examineVCF(args[2]); + } + logger.info("goodbye..."); + } + + private static void examine(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + + int verifiedYes = 0, qsnpVerifiedYes = 0; + int verifiedNo = 0, qsnpVerifiedNo = 0; + int verifiedNoGL = 0, qsnpVerifiedNoGL = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + for (final Map.Entry entry : verifiedSNPs.entrySet()) { + + QSnpRecord qpr = pileup.get(entry.getKey()); + VerifiedSnpRecord vsr = entry.getValue(); + + // only interested in exome data + if ( ! "exome".equals(vsr.getAnalysis())) continue; + + + if ("no".equals(vsr.getStatus())) { + verifiedNo++; + // if we don't have a matching qpr - good, otherwise, print details + if (null == qpr) { + qsnpVerifiedNo++; + writer.write(vsr.getFormattedString() + "\tOK - no entry in qsnp\n"); + } else { + writer.write(vsr.getFormattedString() + "\t???\t" + qpr.getClassification() + "\t" + + getAnnotationAndNote(qpr) + "\n"); +// + qpr.getMutation() + getAnnotationAndNote(qpr) + "\n"); + } + + } else if ("yes".equals(vsr.getStatus())) { + verifiedYes++; + if (null != qpr) { + qsnpVerifiedYes++; + writer.write(vsr.getFormattedString() + "\tOK - entry in qsnp\t" + qpr.getClassification() + "\t" + + getAnnotationAndNote(qpr) +"\n"); +// + qpr.getMutation() + getAnnotationAndNote(qpr) +"\n"); + } else { + writer.write(vsr.getFormattedString() + "\t???\n"); + } + } else if ("no -GL".equals(vsr.getStatus())) { + verifiedNoGL++; + if (null != qpr) { + qsnpVerifiedNoGL++; + + writer.write(vsr.getFormattedString() + "\tentry in qsnp\t" + qpr.getClassification() + "\t" + + getAnnotationAndNote(qpr) +"\n"); +// + qpr.getMutation() + getAnnotationAndNote(qpr) +"\n"); + } else { + writer.write(vsr.getFormattedString() + "\tNo entry in qsnp\n"); + } + } + } + + writer.close(); + logger.info("verified yes: " + verifiedYes + ", in qsnp: " + qsnpVerifiedYes); + logger.info("verified no: " + verifiedNo + ", in qsnp: " + (verifiedNo-qsnpVerifiedNo)); + logger.info("verified no -GL: " + verifiedNoGL + ", in qsnp: " + qsnpVerifiedNoGL); + } + } + + private static void examineVCF(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + + int verifiedYes = 0, gatkVerifiedYes = 0; + int verifiedNo = 0, gatkVerifiedNo = 0; + int verifiedNoGL = 0, gatkVerifiedNoGL = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + for (final Map.Entry entry : verifiedSNPs.entrySet()) { + + VcfRecord qpr = vcfRecords.get(entry.getKey()); + VerifiedSnpRecord vsr = entry.getValue(); + + // only interested in exome data + if ( ! "exome".equals(vsr.getAnalysis())) continue; + + if ("no".equals(vsr.getStatus())) { + verifiedNo++; + // if we don't have a matching qpr - good, otherwise, print details + if (null == qpr) { + gatkVerifiedNo++; + writer.write(vsr.getFormattedString() + "\tOK - no entry in GATK\n"); + } else { + writer.write(vsr.getFormattedString() + "\t???\t" + + VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() + "\n"); +// writer.write(vsr.getFormattedString() + "\t???\t" + qpr.getGenotype() + "\t" + qpr.getAlt() + "\n"); + } + + } else if ("yes".equals(vsr.getStatus())) { + verifiedYes++; + if (null != qpr) { + gatkVerifiedYes++; + writer.write(vsr.getFormattedString() + "\tOK - entry in GATK\t" + + VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() +"\n"); + } else { + writer.write(vsr.getFormattedString() + "\t???\n"); + } + } else if ("no -GL".equals(vsr.getStatus())) { + verifiedNoGL++; + if (null != qpr) { + gatkVerifiedNoGL++; + + writer.write(vsr.getFormattedString() + "\tentry in GATK\t" + + VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() +"\n"); + } else { + writer.write(vsr.getFormattedString() + "\tNo entry in GATK\n"); + } + } + } + + writer.close(); + logger.info("verified yes: " + verifiedYes + ", in GATK: " + gatkVerifiedYes); + logger.info("verified no: " + verifiedNo + ", in GATK: " + (verifiedNo-gatkVerifiedNo)); + logger.info("verified no -GL: " + verifiedNoGL + ", in GATK: " + gatkVerifiedNoGL); + } + } + + private static String getAnnotationAndNote(QSnpRecord record) { + if ( isNull(record.getAnnotation())) return "\tClassA"; + else if (isNull(record.getAnnotation())) return "\tClassB"; + else return "\tClassB\t" + record.getAnnotation(); + } + + private static boolean isNull(String string) { + return null == string || "null".equals(string) || 0 == string.length(); + } + + private static void loadQPileup(String pileupFile) throws IOException { + if (FileUtils.canFileBeRead(pileupFile)) { + QPileupFileReader reader = new QPileupFileReader(new File(pileupFile)); + try { + for (QSnpRecord qpr : reader) { + pileup.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); + } + } finally { + reader.close(); + } + } + } + + private static void loadGATKData(String pileupFile) throws Exception { + if (FileUtils.canFileBeRead(pileupFile)) { + + VCFFileReader reader = new VCFFileReader(new File(pileupFile)); + try { + for (VcfRecord qpr : reader) { + vcfRecords.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); + } + } finally { + reader.close(); + } + } + } + private static void loadVerifiedSnps(String verifiedSnpFile) throws IOException { + if (FileUtils.canFileBeRead(verifiedSnpFile)) { + VerifiedSnpFileReader reader = new VerifiedSnpFileReader(new File(verifiedSnpFile)); + try { + for (VerifiedSnpRecord vsr : reader) { + verifiedSNPs.put(ChrPointPosition.valueOf(vsr.getChromosome(), vsr.getPosition()),vsr); + } + } finally { + reader.close(); + } + } + } +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- new file mode 100644 index 000000000..6758eb70e --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- @@ -0,0 +1,488 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import htsjdk.samtools.SAMRecord; + +import org.qcmg.chrconv.ChrConvFileReader; +import org.qcmg.chrconv.ChromosomeConversionRecord; +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.Classification; +import org.qcmg.common.model.GenotypeEnum; +import org.qcmg.common.model.QSnpGATKRecord; +import org.qcmg.common.util.BaseUtils; +import org.qcmg.common.util.Constants; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.VcfUtils; +import org.qcmg.common.vcf.header.VcfHeaderUtils; +import org.qcmg.germlinedb.GermlineDBFileReader; +import org.qcmg.germlinedb.GermlineDBRecord; +import org.qcmg.picard.QJumper; +import org.qcmg.pileup.QSnpRecord; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.Options.Ids; +import org.qcmg.qmule.QMuleException; +import org.qcmg.vcf.VCFFileReader; + +public class GatkUniqueSnps { + +// private static final QLogger logger = QLoggerFactory.getLogger(GatkUniqueSnps.class); + private static QLogger logger; + + private static Map tumourRecords = new HashMap(100000); + private static Map normalRecords = new HashMap(100000); + +// private static Map classABRecords = new HashMap(100000); + private static List qPileupRecords = new ArrayList(15000); + + // map to hold chromosome conversion data + private static final Map ensembleToQCMG = new HashMap(110); + + + // constants + private String mutationIdPrefix; + private String tumourSampleId; + private String normalSampleId; + private String patientId; + private String somaticAnalysisId; + private String germlineAnalysisId; +// private String analysisId; +// private static final String mutationIdPrefix = "APGI_1992_"; +// private static final String analysisId = "qcmg_ssm_20110524_1"; +// private static final String tumourSampleId = "ICGC-ABMP-20091203-06-TD"; + + + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private Properties ids; + + private int exitStatus; + + + private static String bamFile1; + private static String bamFile1Index; +// private static String bamFile2; +// private static String bamFile2Index; + + private static QJumper jumper1; +// private static QJumper jumper2; + + public int engage() throws Exception { + + setupIds(); + + logger.info("loading normal vcf file"); + loadGATKData(cmdLineInputFiles[0], normalRecords); + logger.info("loaded " + normalRecords.size() + " normal vcf's"); + + logger.info("loading tumour vcf file"); + loadGATKData(cmdLineInputFiles[1], tumourRecords); + logger.info("loaded " + tumourRecords.size() + " tumour vcf's"); + + bamFile1 = cmdLineInputFiles[2]; + bamFile1Index = cmdLineInputFiles[3]; +// bamFile2 = args[4]; +// bamFile2Index = args[5]; + + + jumper1 = new QJumper(); + jumper1.setupReader(bamFile1, bamFile1Index); +// jumper2 = new QJumper(); +// jumper2.setupReader(bamFile2, bamFile2Index); + + + logger.info("about to call examine"); + examine(); + logger.info("about to call examine - DONE"); + + // close the qjumper + jumper1.closeReader(); + + logger.info("about to load chromosome conversion data"); + loadChromosomeConversionData(cmdLineInputFiles[4]); + logger.info("about to load chromosome conversion data - DONE"); + + logger.info("about to add germlineDB info"); + addGermlineDBData(cmdLineInputFiles[5]); + + int noAnnotation = 0; + for (final QSnpRecord qpr : qPileupRecords) if (null == qpr.getAnnotation()) noAnnotation++; + logger.info("class A after addition of germlinedb data: " + noAnnotation ); + + + logger.info("writing output"); + writeOutputForDCC(cmdLineOutputFiles[0]); + logger.info("DONE"); + + return exitStatus; + } + + private void setupIds() throws Exception { + if (null != ids) { + + somaticAnalysisId = (String) ids.get(Ids.SOMATIC_ANALYSIS); + germlineAnalysisId = (String) ids.get(Ids.GEMLINE_ANALYSIS); + tumourSampleId = (String) ids.get(Ids.TUMOUR_SAMPLE); + normalSampleId = (String) ids.get(Ids.NORMAL_SAMPLE); + patientId = (String) ids.get(Ids.PATIENT); + mutationIdPrefix = patientId + "_SNP_"; + + logger.tool("somaticAnalysisId: " + somaticAnalysisId); + logger.tool("germlineAnalysisId: " + germlineAnalysisId); + logger.tool("normalSampleId: " + normalSampleId); + logger.tool("tumourSampleId: " + tumourSampleId); + logger.tool("patientId: " + patientId); + logger.tool("mutationIdPrefix: " + mutationIdPrefix); + + } else { + logger.error("No ids were passed into the program"); + throw new Exception("Invalid arguments to GatkUniqueSnps"); + } + } + + private static void examine() throws Exception { + + int existsInNormalAndTumour = 0, sameGenotype = 0; + // loop through the tumour map + + for (final Entry tumourEntry : tumourRecords.entrySet()) { + + // see if a position exists in the normal map + final QSnpGATKRecord normalRecord = normalRecords.get(tumourEntry.getKey()); + if (null != normalRecord) { + existsInNormalAndTumour++; + + final GenotypeEnum normalGenotype = normalRecord.getGenotypeEnum(); + final GenotypeEnum tumourGenotype = tumourEntry.getValue().getGenotypeEnum(); + + if (normalGenotype == tumourGenotype) { + sameGenotype++; + } else { + if (tumourGenotype.containsAllele(normalRecord.getAlt().charAt(0))) { + //tumourEntry.getValue().getVCFRecord().addInfo("MIN"); + tumourEntry.getValue().getVCFRecord().appendInfo("MIN");; + } + if ( tumourGenotype.isHeterozygous() && ! tumourGenotype.containsAllele(tumourEntry.getValue().getRef().charAt(0))) + //tumourEntry.getValue().getVCFRecord().addInfo("tumour heterozygous for two non-reference alleles"); + tumourEntry.getValue().getVCFRecord().appendInfo("tumour heterozygous for two non-reference alleles"); +// if (null == tumourEntry.getValue().getAnnotation()) { + qPileupRecords.add(getQPileupRecord(tumourEntry.getValue())); +// } + } + } else { + // interested primarily in these fellas + qPileupRecords.add(getQPileupRecord(tumourEntry.getValue())); + } + } + + logger.info("exists in both normal and tumour: " + existsInNormalAndTumour + ", same Genotype: " + sameGenotype); + + logger.info("potential number of class A&B's before pileup: " + qPileupRecords.size() ); + + int noAnnotation = 0, count = 0; + for (final QSnpRecord qpr : qPileupRecords) { + getPileup(jumper1, qpr); + + if (++count % 100 == 0) + logger.info("hit " + count + " vcf records, " + qpr.toString()); + + if (qpr.getAnnotation() == null) + noAnnotation++; + } + + logger.info("class A after pileup: " + noAnnotation ); + + } + + private static void loadChromosomeConversionData(String chrConvFile) throws IOException { + final ChrConvFileReader reader = new ChrConvFileReader(new File(chrConvFile)); + try { + for (final ChromosomeConversionRecord record : reader) { + // add extra map inserts here as required + ensembleToQCMG.put(record.getEnsembleV55(), record.getQcmg()); + } + } finally { + reader.close(); + } + } + + private void writeOutputForDCC(String dccSomaticFile) throws IOException { + if (dccSomaticFile.contains("Germline_DB.txt")) throw new IOException("Wrong output file!!!"); + + final FileWriter somaticWriter = new FileWriter(new File(dccSomaticFile)); + + final String somaticHeader = "analysis_id\ttumour_sample_id\tmutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation\tquality_score\tprobability\tread_count\tis_annotated\tvalidation_status\tvalidation_platform\txref_ensembl_var_id\tnote\tQCMGflag\n"; + final int counter = 1; + try { + + somaticWriter.write(somaticHeader); + for (final QSnpRecord record : qPileupRecords) { + + String ensemblChr = null; + // get ensembl chromosome + for (final Map.Entry entry : ensembleToQCMG.entrySet()) { + if (record.getChromosome().equals(entry.getValue())) { + ensemblChr = entry.getKey(); + break; + } + } + somaticWriter.write(somaticAnalysisId + "\t" + tumourSampleId + "\t" + + "\n"); +// + record.getDCCData(mutationIdPrefix, ensemblChr) + "\n"); + } + } finally { + somaticWriter.close(); + } + } + + private static QSnpRecord getQPileupRecord(QSnpGATKRecord vcfRec) { + final QSnpRecord qpr = new QSnpRecord(vcfRec.getChromosome(), vcfRec.getPosition(), vcfRec.getRef()); + qpr.setTumourGenotype(vcfRec.getGenotypeEnum()); +// qpr.setMutation(vcfRec.getRef() + Constants.MUT_DELIM + vcfRec.getAlt()); +// qpr.getVcfRecord().setFilter(vcfRec.getAnnotation()); + qpr.setClassification(Classification.SOMATIC); + return qpr; + } + + + public static void getPileup(QJumper jumper, QSnpRecord record) throws Exception { + + final List firstSet = jumper.getRecordsAtPosition(record.getChromosome(), record.getPosition()); +// List secondSet = jumper2.getRecordsAtPosition(record.getChromosome(), record.getPosition()); + + + examinePileup(firstSet, record); + + +// char mutation = record.getMutation().charAt(record.getMutation().length() -1); +// boolean mutationFoundInNormal = false; +// int normalCoverage = 0; +// for (SAMRecord sam : firstSet ) { +// if ( ! sam.getDuplicateReadFlag()) { +// ++normalCoverage; +// +// // need to get the base at the position +// int offset = record.getPosition() - sam.getAlignmentStart(); +// if (offset < 0) throw new Exception("invalid start position!!!"); +// +// if (sam.getReadBases()[offset] == mutation) { +// mutationFoundInNormal = true; +// break; +// } +// } +// } +// +// if (mutationFoundInNormal) { +// record.addAnnotation("mutation also found in pileup of normal"); +// } +// +// record.setNormalCount(normalCoverage); +// +// if (normalCoverage < 12) +// record.addAnnotation("less than 12 reads coverage in normal"); + + } + + + public static void examinePileup(List sams, QSnpRecord record) throws Exception { + + final char mutation = record.getAlt().charAt(0); +// final char mutation = record.getMutation().charAt(record.getMutation().length() -1); + boolean mutationFoundInNormal = false; + int normalCoverage = 0; + for (final SAMRecord sam : sams ) { + if ( ! sam.getDuplicateReadFlag()) { + ++normalCoverage; + + // need to get the base at the position +// int offset = record.getPosition() - sam.getUnclippedStart(); + int offset = record.getPosition() - sam.getAlignmentStart(); + if (offset < 0) throw new Exception("invalid start position!!!: "+ sam.format()); + + if (offset >= sam.getReadLength()) { +// throw new Exception("offset [position: " + record.getPosition() + ", read start pos(unclipped): " + sam.getUnclippedStart() + ", read end pos(unclipped): " + sam.getUnclippedEnd()+ "] is larger than read length!!!: " + sam.format()); + // set to last entry in sequence + offset = sam.getReadLength() -1; + } + + if (sam.getReadBases()[offset] == mutation) { + mutationFoundInNormal = true; +// break; + } + } + } + + if (mutationFoundInNormal) { + VcfUtils.updateFilter(record.getVcfRecord(), VcfHeaderUtils.FILTER_MUTATION_IN_NORMAL); + } + +// record.setNormalCount(normalCoverage); + + if (normalCoverage < 12) { + VcfUtils.updateFilter(record.getVcfRecord(), VcfHeaderUtils.FILTER_COVERAGE); + } + + + } + + +// private static void getPileup(VCFRecord record) { +// +// List firstSet = jumper1.getRecordsAtPosition(record.getChromosome(), record.getPosition()); +//// List secondSet = jumper2.getRecordsAtPosition(record.getChromosome(), record.getPosition()); +// +// int normalCoverage = 0; +// for (SAMRecord sam : firstSet ) { +// if ( ! sam.getDuplicateReadFlag()) +// ++normalCoverage; +// } +// +// +//// int normalCoverage = firstSet.size(); +//// int normalCoverage = firstSet.size() + secondSet.size(); +// record.setNormalCoverage(normalCoverage); +// +// if (normalCoverage < 12) +// record.addAnnotation("less than 12 reads coverage in normal"); +// +// } + + + private static void addGermlineDBData(String germlineDBFile) throws IOException { + + final GermlineDBFileReader reader = new GermlineDBFileReader(new File(germlineDBFile)); + // create map of SOMATIC classified SNPs + final Map somaticPileupMap = new HashMap(qPileupRecords.size(), 1); + for (final QSnpRecord pileupRecord : qPileupRecords) { + somaticPileupMap.put(ChrPointPosition.valueOf(pileupRecord.getChromosome(), pileupRecord.getPosition()), pileupRecord); + } + + int updateCount = 0, count = 0; + try { + for (final GermlineDBRecord rec : reader) { + + // get QCMG chromosome from map + final String chr = ensembleToQCMG.get(rec.getChromosome()); + final ChrPosition id = ChrPointPosition.valueOf(chr, rec.getPosition()); + + final QSnpRecord qpr = somaticPileupMap.get(id); + if (null != qpr && null != qpr.getAlt() && (null == qpr.getAnnotation() || ! qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_GERMLINE))) { + final String mutation = qpr.getAlt(); + if (mutation.length() == 3) { + final char c = mutation.charAt(2); + + final GenotypeEnum germlineDBGenotype = BaseUtils.getGenotypeEnum(rec.getNormalGenotype()); + if (germlineDBGenotype.containsAllele(c)) { + updateCount++; + + VcfUtils.updateFilter(qpr.getVcfRecord(), VcfHeaderUtils.FILTER_GERMLINE); + } + + + } else { + logger.info("mutation string length: " + mutation.length()); + } + } + + if (++count % 1000000 == 0) + logger.info("hit " + count + " germline reords"); + + } + } finally { + reader.close(); + } + logger.info("updated: " + updateCount + " somatic positions with germlineDB info"); + } + + private static void loadGATKData(String pileupFile, Map map) throws Exception { + if (FileUtils.canFileBeRead(pileupFile)) { + + final VCFFileReader reader = new VCFFileReader(new File(pileupFile)); + try { + for (final VcfRecord qpr : reader) { + map.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()), new QSnpGATKRecord(qpr)); + } + } finally { + reader.close(); + } + } + } + + public static void main(String[] args) throws Exception { + final GatkUniqueSnps gus = new GatkUniqueSnps(); + final int exitStatus = gus.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + final Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logger = QLoggerFactory.getLogger(GatkUniqueSnps.class, options.getLogFile(), options.getLogLevel()); + logger.logInitialExecutionStats("GatkUniqueSnps", GatkUniqueSnps.class.getPackage().getImplementationVersion()); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (final String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + ids = options.getIds(); + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/Mule.java-- b/qmule/src/org/qcmg/qmule/snppicker/Mule.java-- new file mode 100644 index 000000000..6b3b7f4a7 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/Mule.java-- @@ -0,0 +1,85 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; + +public class Mule { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + + private static QLogger logger; + + public int engage() { + return 1; + } + + + + public static void main(String[] args) throws Exception { + Mule sp = new Mule(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(Mule.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("Example", Mule.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java index 404d2ea8f..ad7f90ae8 100644 --- a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java +++ b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java @@ -30,16 +30,16 @@ import org.qcmg.common.vcf.VcfUtils; import org.qcmg.dbsnp.Dbsnp130Record; import org.qcmg.dbsnp.DbsnpFileReader; -import org.qcmg.gff3.GFF3FileReader; -import org.qcmg.gff3.GFF3Record; -import org.qcmg.illumina.IlluminaFileReader; -import org.qcmg.illumina.IlluminaRecord; +import org.qcmg.qmule.gff3.GFF3FileReader; +import org.qcmg.qmule.gff3.GFF3Record; import org.qcmg.picard.QJumper; import org.qcmg.pileup.PileupFileReader; import org.qcmg.qmule.Messages; import org.qcmg.qmule.Options; import org.qcmg.qmule.QMuleException; -import org.qcmg.record.Record; +import org.qcmg.qmule.record.Record; +import org.qcmg.unused.illumina.IlluminaFileReader; +import org.qcmg.unused.illumina.IlluminaRecord; import org.qcmg.vcf.VCFFileReader; public class SnpPicker { diff --git a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- new file mode 100644 index 000000000..63193c01a --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- @@ -0,0 +1,802 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import htsjdk.samtools.SAMRecord; + +import org.qcmg.chrconv.ChrConvFileReader; +import org.qcmg.chrconv.ChromosomeConversionRecord; +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.Genotype; +import org.qcmg.common.util.BaseUtils; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.VcfUtils; +import org.qcmg.dbsnp.Dbsnp130Record; +import org.qcmg.dbsnp.DbsnpFileReader; +import org.qcmg.qmule.gff3.GFF3FileReader; +import org.qcmg.qmule.gff3.GFF3Record; +import org.qcmg.picard.QJumper; +import org.qcmg.pileup.PileupFileReader; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.record.Record; +import org.qcmg.unused.illumina.IlluminaFileReader; +import org.qcmg.unused.illumina.IlluminaRecord; +import org.qcmg.vcf.VCFFileReader; + +public class SnpPicker { + + private static final char DEFAULT_CHAR = '\u0000'; + private static QLogger logger; +// private static DecimalFormat df = new DecimalFormat("0.0000"); + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + private static boolean isNormal; + +// private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + + Map illuminaMap = new HashMap(1000000,0.99f); // not expecting more than 1000000 + + Map variantMap = new HashMap(2000000); + + // map to hold chromosome conversion data + Map gffToQCMG = new HashMap(100, 0.99f); + +// List illuminaRecords = new ArrayList(); +// List dbSNPRecords = new ArrayList(13000000); + + private int engage() throws Exception { + + // populate the chromosome conversion map + logger.info("about to load chromosome conversion data"); + loadChromosomeConversionData(); + logger.info("about to load chromosome conversion data - DONE"); + + // we are working off the raw illumina data here - first convert it into filtered format, and use that as the input + + logger.info("About to load raw illumina data"); + loadRawIlluminaData(); +// logger.info("No of variant records: " + variantMap.size() + " in file: " + cmdLineInputFiles[0]); + + logger.info("About to load gff3 data"); + loadGff3Data(); + logger.info("No of variant records: " + variantMap.size()); + +// logger.info("About to load vcf data"); +// loadVCFData(); +// logger.info("No of variant records: " + variantMap.size()); + + logger.info("About to load qsnp data"); + loadQSnpData(); + logger.info("No of variant records: " + variantMap.size()); + + + + + logger.info("About to load dbSNP data"); + loadDbSnpData(); +// logger.info("No of variant records: " + variantMap.size()); + + // update variantMap with details from illuminaMap + logger.info("About to load filtered illumina data into variant map"); + convertIlluminaToVariant(); + logger.info("About to load filtered illumina data into variant map - DONE"); + + // get some stats + displayStats(); + + // pileup + logger.info("time for pileup..."); + getPileup(); + logger.info("time for pileup - DONE"); + + // more stats + displayStats2(); + + logger.info("Will now attempt to write out variant data" ); + outputVariantData(); + logger.info("Will now attempt to write out variant data - DONE"); + + return exitStatus; + } + + private void getPileup() throws Exception { + QJumper qj = new QJumper(); + qj.setupReader(cmdLineInputFiles[5], cmdLineInputFiles[6]); + + VariantRecord rec; + StringBuilder pileup = new StringBuilder(); + List reads; +// String chr; + int position; + int offset; + + int pileupCount = 0; + for (Map.Entry entry : variantMap.entrySet()) { + // only want pileup if we have gff or vcf data + rec = entry.getValue(); + if (DEFAULT_CHAR != rec.getGffRef() || null != rec.getVcfGenotype()) { +// chr = ( ! entry.getKey().getChromosome().startsWith("GL") ? "chr" : "") + entry.getKey().getChromosome(); + + reads = qj.getRecordsAtPosition(entry.getKey().getChromosome(), entry.getKey().getStartPosition()); + // do something with the reads + position = entry.getKey().getStartPosition(); + for (SAMRecord sr : reads) { + offset = position - sr.getAlignmentStart(); + pileup.append((char)sr.getReadBases()[offset]); + } + rec.setPileup(pileup.toString()); + + // reset the StringBuilder + pileup.setLength(0); + + if (++pileupCount % 1000 == 0) + logger.info("Run " + pileupCount + " pileups so far, " + reads.size() + " sam records returned from picard"); + } + } + } + + private void loadChromosomeConversionData() { + String chrConvFile = cmdLineInputFiles[4]; + ChrConvFileReader reader = null; + try { + reader = new ChrConvFileReader(new File(chrConvFile)); + } catch (Exception e) { + logger.error("Exception caught whilst trying to instantiate ChrConvFileReader", e); + exitStatus = -1; + } + + if (null != reader) { + for (ChromosomeConversionRecord record : reader) { + // add extra map inserts here as required + // diBayes field is no longer present in chr conv file +// gffToQCMG.put(record.getDiBayes(), record.getQcmg()); + // guessing we want ensemble in here as the key + gffToQCMG.put(record.getEnsembleV55(), record.getQcmg()); + } + + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close ChrConvFileReader", e); + exitStatus = -1; + } + } + } + + private void displayStats() { + int illuminaOnly = 0; + int gff3Only = 0; + int vcfOnly = 0; + int vcfANDgff = 0; + int vcfANDillumina = 0; + int gffANDillumina = 0; + int allThree = 0; + for (VariantRecord record : variantMap.values()) { + + boolean illuminaDataPresent = null != record.getIlluminaRef(); + boolean gffDataPresent = DEFAULT_CHAR != record.getGffRef(); + boolean vcfDataPresent = DEFAULT_CHAR != record.getVcfRef(); + + if (illuminaDataPresent && gffDataPresent && vcfDataPresent) { + allThree++; + record.setPositionMatch("IGV"); + } else if (gffDataPresent && vcfDataPresent) { + vcfANDgff++; + record.setPositionMatch("GV"); + } else if (illuminaDataPresent && vcfDataPresent) { + vcfANDillumina++; + record.setPositionMatch("IV"); + } else if (illuminaDataPresent && gffDataPresent) { + gffANDillumina++; + record.setPositionMatch("IG"); + } else if ( gffDataPresent) { + gff3Only++; + record.setPositionMatch("G"); + }else if ( vcfDataPresent) { + vcfOnly++; + record.setPositionMatch("V"); + }else if ( illuminaDataPresent) { + illuminaOnly++; + record.setPositionMatch("I"); + } + + record.setGenotypeMatch(getGenotypeMatchInfo(record)); + } + + logger.info("allThree: " + allThree); + logger.info("illuminaOnly: " + illuminaOnly); + logger.info("gff3Only: " + gff3Only); + logger.info("vcfANDgff: " + vcfANDgff); + logger.info("vcfANDillumina: " + vcfANDillumina); + logger.info("gffANDillumina: " + gffANDillumina); + logger.info("vcfOnly: " + vcfOnly); + + int total = allThree + illuminaOnly + gff3Only + vcfANDgff + vcfANDillumina + gffANDillumina + vcfOnly; + logger.info("Sum of above numbers: " + total); + logger.info("No of records in map: " + variantMap.size()); + + } + + private void displayStats2() { + final String IGV = "IGV"; + final String IG = "IG"; + final String IV = "IV"; + final String GV = "GV"; + final String I = "I"; + final String G = "G"; + final String V = "V"; + + int positionIGV=0, positionIG=0, positionIV=0, positionGV=0, positionI=0, positionG=0, positionV = 0; + int pIGVgIGV=0, pIGVgIG=0, pIGVgIV=0, pIGVgGV=0; + int pIGgIG=0; + int pIVgIV=0; + int pGVgGV=0; + + + for (VariantRecord record : variantMap.values()) { + + String positionMatch = record.getPositionMatch(); + String genotypeMatch = record.getGenotypeMatch(); + + if (IGV.equals(positionMatch)) { + positionIGV++; + if (IGV.equals(genotypeMatch)) pIGVgIGV++; + else if (IG.equals(genotypeMatch)) pIGVgIG++; + else if (IV.equals(genotypeMatch)) pIGVgIV++; + else if (GV.equals(genotypeMatch)) pIGVgGV++; + + } else if (IG.equals(positionMatch)) { + positionIG++; + if (IG.equals(genotypeMatch)) pIGgIG++; + + } else if (IV.equals(positionMatch)) { + positionIV++; + if (IV.equals(genotypeMatch)) pIVgIV++; + + } else if (GV.equals(positionMatch)) { + positionGV++; + if (GV.equals(genotypeMatch)) pGVgGV++; + + } else if (I.equals(positionMatch)) positionI++; + else if ( G.equals(positionMatch)) positionG++; + else if ( V.equals(positionMatch)) positionV++; + } + + logger.info("position IGV: " + positionIGV + ", genotype IGV: " + pIGVgIGV + ", genotype IG: " + pIGVgIG + ", genotype IV: " + pIGVgIV + ", genotype GV: " + pIGVgGV); + logger.info("position IG: " + positionIG + ", genotype IG: " + pIGgIG); + logger.info("position IV: " + positionIV + ", genotype IV: " + pIVgIV); + logger.info("position GV: " + positionGV + ", genotype GV: " + pGVgGV); + + logger.info("position I: " + positionI); + logger.info("position G: " + positionG); + logger.info("position V: " + positionV); + + int total = positionIGV + positionIG + positionIV + positionGV + positionI + positionG + positionV; + logger.info("Sum of above numbers: " + total); + logger.info("No of records in map: " + variantMap.size()); + + } + + private String getGenotypeMatchInfo(VariantRecord record) { + Genotype illuminaGen = BaseUtils.getGenotype(record.getIllAllele1() , record.getIllAllele2()); +// String illuminaGen = record.getIlluminaRef(); + Genotype gffGen = BaseUtils.getGenotypeFromIUPACCode(record.getGffGenotype()); + Genotype vcfGen = null; + if (DEFAULT_CHAR != record.getVcfAlt()) + vcfGen = BaseUtils.getGenotypeFromVcf(record.getVcfGenotype(), record.getVcfRef(), record.getVcfAlt()); + else + vcfGen = BaseUtils.getGenotype(record.getVcfGenotype()); + + String result = null; + + if (illuminaGen.equals( gffGen) && illuminaGen.equals(vcfGen)) result = "IGV"; + else if (illuminaGen.equals(gffGen)) result = "IG"; + else if (illuminaGen.equals(vcfGen)) result = "IV"; + else if (null != gffGen && gffGen.equals(vcfGen)) result = "GV"; +// if (doStringsMatch(illuminaGen, gffGen) && doStringsMatch(illuminaGen, vcfGen)) result = "IGV"; +// else if (doStringsMatch(illuminaGen, gffGen)) result = "IG"; +// else if (doStringsMatch(illuminaGen, vcfGen)) result = "IV"; +// else if (doStringsMatch(gffGen, vcfGen)) result = "GV"; + + return result; + } + + private boolean doStringsMatch(String a, String b) { + return null == a ? false : a.equals(b); + } + + private void loadDbSnpData() { + // update records with dbsnp info + // should be second of the input files + String dbSNPFile = cmdLineInputFiles[3]; + DbsnpFileReader dbSNPReader = null; + try { + dbSNPReader = new DbsnpFileReader(new File(dbSNPFile)); + } catch (Exception e) { + logger.error("Error caught whilst trying to instantiate DbsnpFileReader", e); + exitStatus = -1; + } + + int updateCount = 0; + int noOfDbSnps = 0; + if (null != dbSNPReader) { + + ChrPosition varId; + VariantRecord varRec; + IlluminaRecord illRec; + int illuminaDbSnpCount = 0; + + for (Dbsnp130Record rec : dbSNPReader) { + // update illumina array with dbSNP details + illRec = illuminaMap.get(rec.getRefSnp()); + if (null != illRec) { + if (null != illRec.getChr()) { + logger.info("illumina rec: " + illRec.getChr() + ":" + illRec.getStart() + ":" + illRec.getSnpId() +" has already been updated - dbSNP: " + rec.getChromosome() + ":" + rec.getChromosomePosition() + ":" + rec.getRefSnp()); + // dbSNP id has more than 1 chr and position - create another IlluminaRecord in the variantMap + //TODO deal with multiple dbSnps for same id here!!! + } else { + updateIlluminaRecord(illRec, rec); + } + illuminaDbSnpCount++; + } + + varId = ChrPointPosition.valueOf(rec.getChromosome(), rec.getChromosomePosition()); + // lookup variant map to see if we have a matching record + varRec = variantMap.get(varId); + if (null == varRec && null != illRec && illRec.isSnp()) { + // don't have an existing record at this position, but we want to put illumina data in here if its a snp + varRec = new VariantRecord(); + variantMap.put(varId, varRec); + } + + if (null != varRec) { + // update required fields + varRec.setDbSnpID(rec.getRefSnp()); + varRec.setDbSnpStrand(rec.getStrand().charAt(0)); + varRec.setDbSnpRef_Alt(rec.getRefGenome() + "__" + rec.getVariant()); + + if (++updateCount % 100000 == 0) + logger.info("updated " + updateCount + " variant records with dbSNP ids"); + } + +// dbSNPRecords.add(rec); + if (++noOfDbSnps % 1000000 == 0) + logger.info("hit " + noOfDbSnps + " dbSnp records"); + } + + logger.info("match count for dbSnp and Illumina: " + illuminaDbSnpCount); + + try { + dbSNPReader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close DbsnpFileReader", e); + exitStatus = -1; + } + } + + logger.info("No of dbSnp records: " + noOfDbSnps + " in file: " + dbSNPFile); + logger.info("No of updated variant records: " + updateCount); + } + + private void loadVCFData() { + String vcfFile = cmdLineInputFiles[2]; + VCFFileReader reader = null; + try { + reader = new VCFFileReader(new File(vcfFile)); + } catch (Exception e) { + logger.error("Error caught whilst trying to instantiate VCFFileReader", e); + exitStatus = -1; + } + + if (null != reader) { + int vcfCount = 0; + ChrPosition id; + VariantRecord value; + + for (VcfRecord rec : reader) { + + id = ChrPointPosition.valueOf(rec.getChromosome(), rec.getPosition()); + + value = variantMap.get(id); + if (null == value) { + value = new VariantRecord(); + variantMap.put(id, value); + } + value.setVcfRef(rec.getRefChar()); + value.setVcfAlt(rec.getAlt().charAt(0)); + value.setVcfGenotype(VcfUtils.getGenotypeFromGATKVCFRecord(rec)); + vcfCount++; + } + logger.info("there were " + vcfCount + " records in the vcf file"); + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close VCFFileReader", e); + exitStatus = -1; + } + } + } + + private void loadQSnpData() { + String qSnpFile = cmdLineInputFiles[2]; + PileupFileReader reader = null; + try { + reader = new PileupFileReader(new File(qSnpFile)); + } catch (Exception e) { + logger.error("Error caught whilst trying to instantiate PileupFileReader", e); + exitStatus = -1; + } + + if (null != reader) { + int vcfCount = 0; + ChrPosition id; + VariantRecord value; + + for (String rec : reader) { +// for (PileupRecord rec : reader) { + // got some work to do here - need to split the pileup attribute to construct the object + String [] params = TabTokenizer.tokenize(rec); +// String [] params = tabbedPattern.split(rec.getPileup(), -1); + + // skip if the tumour genotype is null + String genotype = params[params.length-(isNormal ? 2 : 1)]; + if (null != genotype && ! "null".equals(genotype)) { + + id = ChrPointPosition.valueOf(params[0], Integer.parseInt(params[1])); + + value = variantMap.get(id); + if (null == value) { + value = new VariantRecord(); + variantMap.put(id, value); + } + value.setVcfRef(params[2].charAt(0)); + // value.setVcfAlt(rec.getAlt()); + value.setVcfGenotype(genotype); + vcfCount++; + } + } + logger.info("there were " + vcfCount + " records in the qsnp file"); + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close PileupFileReader", e); + exitStatus = -1; + } + } + } + + private void loadGff3Data() { + String gff3File = cmdLineInputFiles[1]; + GFF3FileReader reader = null; + try { + reader = new GFF3FileReader(new File(gff3File)); + } catch (Exception e) { + logger.error("Exception caught whilst trying to instantiate GFF3FileReader", e); + exitStatus = -1; + } + + if (null != reader) { + int gff3Count = 0; + ChrPosition id; + VariantRecord value; + String chr; + + for (GFF3Record rec : reader) { + // get QCMG chromosome from map + chr = gffToQCMG.get(rec.getSeqId()); + + id = ChrPointPosition.valueOf(chr, rec.getStart()); + + value = variantMap.get(id); + if (null == value) { + value = new VariantRecord(); + variantMap.put(id, value); + } + String attributes = rec.getAttributes(); + char genotype = attributes.charAt(attributes.indexOf("genotype=")+9); + char reference = attributes.charAt(attributes.indexOf("reference=")+10); +// value.setGffAlt(genotype+""); + value.setGffGenotype(genotype); + value.setGffRef(reference); + gff3Count++; + } + logger.info("there were " + gff3Count + " records in the gff3 file"); + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close GFF3FileReader", e); + exitStatus = -1; + } + } + } + + private void loadRawIlluminaData() { + String illuminaFile = cmdLineInputFiles[0]; + + isNormal = illuminaFile.contains("ND_"); + + IlluminaFileReader reader = null; + try { + reader = new IlluminaFileReader(new File(illuminaFile)); + } catch (Exception e) { + logger.error("Error caught whilst trying to instantiate IlluminaFileReader", e); + exitStatus = -1; + } + + if (null != reader) { + IlluminaRecord tempRec; + for (Record rec : reader) { + tempRec = (IlluminaRecord) rec; + illuminaMap.put(tempRec.getSnpId(), tempRec); + } + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close IlluminaFileReader", e); + exitStatus = -1; + } + } + logger.info("Loaded " + illuminaMap.size() + " entries into the illumina map"); + } + +// private void loadIlluminaData() { +// String illuminaFile = cmdLineInputFiles[0]; +// IlluminaFileReader reader = null; +// try { +// reader = new IlluminaFileReader(new File(illuminaFile)); +// } catch (Exception e) { +// logger.error("Error caught whilst trying to instantiate IlluminaFileReader", e); +// exitStatus = -1; +// } +// +// if (null != reader) { +// VariantID id; +// IlluminaRecord tempRec; +// +// for (Record rec : reader) { +// tempRec = (IlluminaRecord) rec; +// +// id = new VariantID(tempRec.getChr(), tempRec.getStart()); +// +// VariantRecord value = variantMap.get(id); +// if (null == value) { +// value = new VariantRecord(); +// variantMap.put(id, value); +// } +// value.setIlluminaSNP(tempRec.getSnp()); +// } +// try { +// reader.close(); +// } catch (IOException e) { +// logger.error("IOException caught whilst trying to close IlluminaFileReader", e); +// exitStatus = -1; +// } +// } +// } + + private void convertIlluminaToVariant() { + ChrPosition id; + VariantRecord value; + + // loop through the illumina map converting all entries into the variantMap + for (IlluminaRecord illuminaRec : illuminaMap.values()) { + + // TODO check this !!! + // ignore records that did not have a dbSNP + if (null != illuminaRec.getChr()) { + + id = ChrPointPosition.valueOf(illuminaRec.getChr(), illuminaRec.getStart()); + + value = variantMap.get(id); + if (null == value && illuminaRec.isSnp()) { + // only want to populate our map with illumina data that does not have a corresponding gff or vcf record + // if it contains a snp + value = new VariantRecord(); + variantMap.put(id, value); + } + + if (null != value) { + value.setDbSnpID(illuminaRec.getSnpId()); +// value.setIlluminaAlt(illuminaRec.getRefGenomeRefSNPAllele()); + value.setIlluminaRef(illuminaRec.getSnp()); + value.setIllAllele1(illuminaRec.getFirstAllele()); + value.setIllAllele2(illuminaRec.getSecondAllele()); + value.setIllGCScore(illuminaRec.getGCScore()); + value.setIllTypeHom(illuminaRec.isHom()); + } + } + } + + // clear illuminaMap - no longer required + illuminaMap.clear(); + } + + + private void updateIlluminaRecord(IlluminaRecord illuminaRec, Dbsnp130Record dbSnpRec) { + // standard value setting here... + char dbSnpStrand = dbSnpRec.getStrand().charAt(0); + illuminaRec.setChr(dbSnpRec.getChromosome()); + illuminaRec.setStart(dbSnpRec.getChromosomePosition()); +// illuminaRec.setRefGenomeRefSNPAllele(dbSnpRec.getRefGenome() + "__" + dbSnpRec.getVariant()); + + // now gets a bit more interesting + char strand; + // if illumina alleles are equal to dbsnp alleles + if (BaseUtils.areGenotypesEqual(dbSnpRec.getVariant(), illuminaRec.getSnp())) { + strand = dbSnpStrand; + } else strand = '+' == dbSnpStrand ? '-' : '+'; +// if (illuminaRec.getReference().charAt(1) == dbAlleles.charAt(0) && +// illuminaRec.getReference().charAt(3) == dbAlleles.charAt(2)) { +// strand = dbSnpStrand; +// } else strand = '+' == dbSnpStrand ? '-' : '+'; + + // no longer switch the illumina snp call, but the actual allele data +// if ('-' == strand) +// illuminaRec.setReference(BaseUtils.getComplementFromString(illuminaRec.getReference())); +// else +// illuminaRec.setReference(illuminaRec.getReference().substring(1, illuminaRec.getReference().length()-1)); + if ('-' == strand) { + illuminaRec.setFirstAllele(BaseUtils.getComplement(illuminaRec.getFirstAllele())); + illuminaRec.setSecondAllele(BaseUtils.getComplement(illuminaRec.getSecondAllele())); + } + // trim illumina snp + illuminaRec.setSnp(illuminaRec.getSnp().substring(1, illuminaRec.getSnp().length()-1)); + + // set snp + illuminaRec.setSnp(isSnp(dbSnpRec.getRefGenome(), illuminaRec.getFirstAllele(), illuminaRec.getSecondAllele())); + } + + private boolean isSnp(String ref, char alleleOne, char alleleTwo) { + if (null == ref || DEFAULT_CHAR == alleleOne || DEFAULT_CHAR == alleleTwo) + return false; + return ref.charAt(0) != alleleOne || ref.charAt(0) != alleleTwo; + } +// private boolean isSnp(String ref, String genotype) { +// if (null == ref || null == genotype) +// return false; +// // assume ref is of type A +// // assume genotype is of the form A/G +// return ref.charAt(0) != genotype.charAt(0) || ref.charAt(0) != genotype.charAt(2); +// } + + + private void outputVariantData() { + FileWriter allRecordsWriter = null; + FileWriter nonDbSnpwriter = null; + try { + allRecordsWriter = new FileWriter(new File(cmdLineOutputFiles[0])); // should be the first output file supplied + nonDbSnpwriter = new FileWriter(new File(cmdLineOutputFiles[1])); // should be the second output file supplied + allRecordsWriter.write("#chr\tstart\tdbSNP_id\tstrand\trg_rsa\t" + //dbSNP + "Ill_gc\ta1\ta2\ttype\tref\t" + //illumina + "gff3_ref\talt\tgen" + //gff + "\tvfc_ref\talt\tgen\t" + //vcf + "pileup\t" + //pileup + "posMatch\tgenMatch\n"); //matching + + nonDbSnpwriter.write("#chr\tstart\tdbSNP_id\tstrand\trg_rsa\t" + //dbSNP + "Ill_gc\ta1\ta2\ttype\tref\t" + //illumina + "gff3_ref\talt\tgen" + //gff + "\tvfc_ref\talt\tgen\t" + //vcf + "pileup\n" + //pileup + "posMatch\tgenMatch\n"); //matching + } catch (IOException ioe) { + logger.error("IOException caught whilst outputting data", ioe); + } + + //plonk the data into a TreeMap to bring some order to the proceedings.. + TreeMap sortedVariantMap = new TreeMap(variantMap); + + ChrPosition id; + VariantRecord value; +// String chr; + + for (Map.Entry entry : sortedVariantMap.entrySet()) { + id = entry.getKey(); + value = entry.getValue(); +// chr = ( ! id.getChromosome().startsWith("GL") ? "chr" : "") + id.getChromosome(); + + try { + allRecordsWriter.write(id.getChromosome() + "\t" + + id.getStartPosition() + "\t" + + value.formattedRecord() ); + // only want non dbSNP records + if (null == value.getDbSnpID()) { + nonDbSnpwriter.write(id.getChromosome() + "\t" + + id.getStartPosition() + "\t" + + value.formattedRecord() ); + } + } catch (IOException e) { + logger.error("IOException caught whilst outputting data", e); + } + } + + // close up + try { + allRecordsWriter.close(); + nonDbSnpwriter.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close output files", e); + } + } + + + public static void main(String[] args) throws Exception { + SnpPicker sp = new SnpPicker(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(SnpPicker.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("SnpPicker", SnpPicker.class.getPackage().getImplementationVersion()); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- new file mode 100644 index 000000000..7e6275fe1 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- @@ -0,0 +1,200 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Pattern; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.pileup.PileupFileReader; + +public class UniqueQSnps { + + private static final QLogger logger = QLoggerFactory.getLogger(UniqueQSnps.class); + + private static Map qSnpPileup = new HashMap(10000); +// private static Map qSnpPileup = new HashMap(10000); + private static Map gatkVcfs = new HashMap(10000); +// private static Map gatkVcfs = new HashMap(10000); + private static Map verifiedSNPs = new HashMap(500); +// private static Map verifiedSNPs = new HashMap(500); + + private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + + + public static void main(String[] args) throws Exception { + logger.info("hello..."); + + String filename = args[0]; + boolean runQPileup = true; + // filename type depends on whether to load qpileup or vcf + if (FileUtils.isFileTypeValid(filename, "vcf")) { + runQPileup = false; + } + loadVerifiedSnps(args[1]); + logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); + + + if (runQPileup) { + // load the existing pileup into memory + logger.info("running in pileup mode"); + loadQPileup(args[0]); + logger.info("loaded " + qSnpPileup.size() + " entries into the pileup map"); + examine(args[2]); + } else { + logger.info("running in vcf mode"); + loadGatkData(args[0]); + logger.info("loaded " + gatkVcfs.size() + " entries into the vcf map"); + examineVCFs(args[2]); + } + + + // load the existing pileup into memory + + examine(args[2]); + logger.info("goodbye..."); + } + + + private static void examine(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + for (final Map.Entry entry : qSnpPileup.entrySet()) { + ++totalCount; + String verifiedRecord = verifiedSNPs.get(entry.getKey()); +// PileupRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); + String qSnpRecord = entry.getValue(); + + if (null == verifiedRecord) { + String [] params = TabTokenizer.tokenize(qSnpRecord); +// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); + String annotation = params[params.length-1]; + if ("--".equals(annotation)) { + ++uniqueQSnpClassACount; + writer.write(qSnpRecord + "\n"); + } else if ("less than 12 reads coverage in normal".equals(annotation)) { + ++uniqueQSnpClassBCount; + writer.write(qSnpRecord + "\n"); + } + } + } + + writer.close(); + logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); + } + } + + private static void examineVCFs(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + for (final Map.Entry entry : qSnpPileup.entrySet()) { + ++totalCount; + String verifiedRecord = verifiedSNPs.get(entry.getKey()); +// PileupRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); + String qSnpRecord = entry.getValue(); +// PileupRecord qSnpRecord = entry.getValue(); + + if (null == verifiedRecord) { + String [] params = TabTokenizer.tokenize(qSnpRecord); +// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); + String annotation = params[params.length-1]; + if ("--".equals(annotation)) { + ++uniqueQSnpClassACount; + writer.write(qSnpRecord + "\n"); + } else if ("less than 12 reads coverage in normal".equals(annotation)) { + ++uniqueQSnpClassBCount; + writer.write(qSnpRecord + "\n"); + } + } + } + + writer.close(); + logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); + } + } + + + private static void loadQPileup(String pileupFile) throws Exception { + if (FileUtils.canFileBeRead(pileupFile)) { + PileupFileReader reader = new PileupFileReader(new File(pileupFile)); + for (String pr : reader) { +// for (PileupRecord pr : reader) { + String [] params = TabTokenizer.tokenize(pr); +// String [] params = tabbedPattern.split(pr.getPileup()); + String chrPosition = params[params.length-2]; +// logger.info("chrPosition: " + chrPosition); + //TODO refactor to use StringUtils.getChrPositionFromString() + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + + qSnpPileup.put(chrPos,pr); + } + reader.close(); + } + } + + private static void loadGatkData(String pileupFile) throws Exception { + if (FileUtils.canFileBeRead(pileupFile)) { + PileupFileReader reader = new PileupFileReader(new File(pileupFile)); + for (String pr : reader) { +// for (PileupRecord pr : reader) { + String [] params = TabTokenizer.tokenize(pr); +// String [] params = tabbedPattern.split(pr.getPileup()); + String chrPosition = params[params.length-2]; +// logger.info("chrPosition: " + chrPosition); + //TODO refactor to use StringUtils.getChrPositionFromString() + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + + gatkVcfs.put(chrPos,pr); + } + reader.close(); + } + } + + private static void loadVerifiedSnps(String verifiedSnpFile) throws Exception { + if (FileUtils.canFileBeRead(verifiedSnpFile)) { + + PileupFileReader reader = new PileupFileReader(new File(verifiedSnpFile)); + for (String pr : reader) { +// for (PileupRecord pr : reader) { + String [] params = TabTokenizer.tokenize(pr); +// String [] params = tabbedPattern.split(pr.getPileup()); + String chrPosition = params[2]; +// logger.info("chrPosition: " + chrPosition); + //TODO refactor to use StringUtils.getChrPositionFromString() + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + + verifiedSNPs.put(chrPos,pr); + } + reader.close(); + } + } + +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java b/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java index 84eb0320e..4ac4d5586 100644 --- a/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java +++ b/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java @@ -21,8 +21,8 @@ import org.qcmg.qmule.Messages; import org.qcmg.qmule.Options; import org.qcmg.qmule.QMuleException; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; public class UniqueSnps { diff --git a/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- new file mode 100644 index 000000000..4ac4d5586 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- @@ -0,0 +1,263 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Pattern; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class UniqueSnps { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + + private static QLogger logger; + +// private static Map qSnpPileup = new HashMap(10000); +// private static Map gatkVcfs = new HashMap(10000); + private static Map verifiedSNPs = new HashMap(500); + private static Map unVerifiedSNPs = new HashMap(10000); + + private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + + + public int engage() throws Exception { + logger.info("hello..."); + + loadVerifiedSnps(cmdLineInputFiles[1]); + logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); + if (verifiedSNPs.isEmpty()) exitStatus = 1; + + loadUnverifiedSnps(cmdLineInputFiles[0]); + logger.info("loaded " + unVerifiedSNPs.size() + " entries into the un-verifiedSNPs map"); + if (unVerifiedSNPs.isEmpty()) exitStatus = 1; + + +// examine(args[2]); +// if (runQPileup) { +// // load the existing pileup into memory +// logger.info("running in pileup mode"); +// loadUnverifiedSnps(args[0]); +// logger.info("loaded " + qSnpPileup.size() + " entries into the pileup map"); +// } else { +// logger.info("running in vcf mode"); +// loadGatkData(args[0]); +// logger.info("loaded " + gatkVcfs.size() + " entries into the vcf map"); +// examineVCFs(args[2]); +// } + + + // load the existing pileup into memory + + examine(cmdLineOutputFiles[0]); + logger.info("goodbye..."); + + return exitStatus; + } + + + private static void examine(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + int totalCount = 0, uniqueClassA = 0, uniqueClassB = 0, uniqueClassC = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + try { + for (final Map.Entry unVerifiedEntry : unVerifiedSNPs.entrySet()) { + TabbedRecord unVerifiedRecord = unVerifiedEntry.getValue(); + String [] params = tabbedPattern.split(unVerifiedRecord.getData()); + String consequenceType = params[22]; + if (consequenceType.contains("STOP") || consequenceType.contains("NON_SYNONYMOUS")) { + + ++totalCount; + + TabbedRecord verifiedRecord = verifiedSNPs.get(unVerifiedEntry.getKey()); + + if (null == verifiedRecord) { + String annotation = params[params.length-1]; + if ("--".equals(annotation)) { + ++uniqueClassA; + writer.write(unVerifiedRecord.getData() + "\n"); + } else if ("less than 12 reads coverage in normal".equals(annotation) + || "less than 3 reads coverage in normal".equals(annotation)) { + ++uniqueClassB; + writer.write(unVerifiedRecord.getData() + "\n"); + } + } + } + } + } finally { + writer.close(); + } + logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueClassA + ", uniqueQSnpCount (class B): " + uniqueClassB ); + } + } + +// private static void examineVCFs(String outputFile) throws IOException { +// if (FileUtils.canFileBeWrittenTo(outputFile)) { +// +// int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; +// +// FileWriter writer = new FileWriter(new File(outputFile)); +// +// // loop through the verified snps +// +// for (final Map.Entry entry : qSnpPileup.entrySet()) { +// ++totalCount; +// TabbedRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); +// TabbedRecord qSnpRecord = entry.getValue(); +// +// if (null == verifiedRecord) { +// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); +// String annotation = params[params.length-1]; +// if ("--".equals(annotation)) { +// ++uniqueQSnpClassACount; +// writer.write(qSnpRecord.getPileup() + "\n"); +// } else if ("less than 12 reads coverage in normal".equals(annotation)) { +// ++uniqueQSnpClassBCount; +// writer.write(qSnpRecord.getPileup() + "\n"); +// } +// } +// } +// +// writer.close(); +// logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); +// } +// } + + + private static void loadUnverifiedSnps(String file) throws Exception { + if (FileUtils.canFileBeRead(file)) { + TabbedFileReader reader = new TabbedFileReader(new File(file)); + try { + for (TabbedRecord tr : reader) { + String [] params = tabbedPattern.split(tr.getData()); + String chrPosition = params[params.length-2]; +// logger.info("chrPosition: " + chrPosition); + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + unVerifiedSNPs.put(chrPos,tr); + } + } finally { + reader.close(); + } + } + } + +// private static void loadGatkData(String pileupFile) throws IOException { +// if (FileUtils.canFileBeRead(pileupFile)) { +// TabbedFileReader reader = new TabbedFileReader(new File(pileupFile)); +// for (TabbedRecord pr : reader) { +// String [] params = tabbedPattern.split(pr.getPileup()); +// String chrPosition = params[params.length-2]; +//// logger.info("chrPosition: " + chrPosition); +// ChrPosition chrPos = new ChrPosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-")))); +// +// gatkVcfs.put(chrPos,pr); +// } +// reader.close(); +// } +// } + + private void loadVerifiedSnps(String verifiedSnpFile) throws Exception { + if (FileUtils.canFileBeRead(verifiedSnpFile)) { + + TabbedFileReader reader = new TabbedFileReader(new File(verifiedSnpFile)); + try { + for (TabbedRecord tr : reader) { + String [] params = tabbedPattern.split(tr.getData()); + String chrPosition = params[2]; + // logger.info("chrPosition: " + chrPosition); + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1),start, start); + + verifiedSNPs.put(chrPos,tr); + } + } finally { + reader.close(); + } + } + } + + public static void main(String[] args) throws Exception { + UniqueSnps sp = new UniqueSnps(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(UniqueSnps.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("UniqueSnps", UniqueSnps.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- b/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- new file mode 100644 index 000000000..eefbdd9ed --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- @@ -0,0 +1,193 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.text.DecimalFormat; + +public class VariantRecord { + + private final static char DEFAULT_CHAR = '\u0000'; + private final static DecimalFormat df = new DecimalFormat("0.0000"); + + private String dbSnpID; + private char dbSnpStrand; + private String dbSnpRef_Alt; + private float illGCScore; + private char illAllele1; + private char illAllele2; + private boolean illTypeHom; + private String illuminaRef; +// private String illuminaAlt; + private String illuminaSNP; + private char gffRef; + private char gffGenotype; + private String gffAlt; + private char vcfRef; + private char vcfAlt; + private String vcfGenotype; + private String pileup; + private String positionMatch; + private String genotypeMatch; + + public String getDbSnpID() { + return dbSnpID; + } + public void setDbSnpID(String dbSnpID) { + this.dbSnpID = dbSnpID; + } + public String getIlluminaRef() { + return illuminaRef; + } + public void setIlluminaRef(String illuminaRef) { + this.illuminaRef = illuminaRef; + } +// public String getIlluminaAlt() { +// return illuminaAlt; +// } +// public void setIlluminaAlt(String illuminaAlt) { +// this.illuminaAlt = illuminaAlt; +// } + public char getGffRef() { + return gffRef; + } + public void setGffRef(char gffRef) { + this.gffRef = gffRef; + } + public char getGffGenotype() { + return gffGenotype; + } + public void setGffGenotype(char gffGenotype) { + this.gffGenotype = gffGenotype; + } + public String getGffAlt() { + return gffAlt; + } + public void setGffAlt(String gffAlt) { + this.gffAlt = gffAlt; + } + public char getVcfRef() { + return vcfRef; + } + public void setVcfRef(char vcfRef) { + this.vcfRef = vcfRef; + } + public char getVcfAlt() { + return vcfAlt; + } + public void setVcfAlt(char vcfAlt) { + this.vcfAlt = vcfAlt; + } + public String getVcfGenotype() { + return vcfGenotype; + } + public void setVcfGenotype(String vcfGenotype) { + this.vcfGenotype = vcfGenotype; + } + public void setIlluminaSNP(String illuminaSNP) { + this.illuminaSNP = illuminaSNP; + } + public String getIlluminaSNP() { + return illuminaSNP; + } + + public String formattedRecord() { + StringBuilder sb = new StringBuilder(); + + sb.append(null != dbSnpID ? dbSnpID : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != dbSnpStrand ? dbSnpStrand : ""); + sb.append("\t"); + sb.append(null != dbSnpRef_Alt ? dbSnpRef_Alt : ""); + sb.append("\t"); + sb.append(illGCScore != 0.0f ? df.format(illGCScore) : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != illAllele1 ? illAllele1 : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != illAllele2 ? illAllele2 : ""); + sb.append("\t"); + sb.append(null != illuminaRef ? (illTypeHom ? "hom" : "het") : ""); + sb.append("\t"); + sb.append(null != illuminaRef ? illuminaRef : ""); + sb.append("\t"); +// sb.append(null != illuminaAlt ? illuminaAlt : ""); +// sb.append("\t"); +// sb.append(null != illuminaSNP ? illuminaSNP : ""); +// sb.append("\t"); + sb.append(DEFAULT_CHAR != gffRef ? gffRef : ""); + sb.append("\t"); + sb.append(null != gffAlt ? gffAlt : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != gffGenotype ? gffGenotype : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != vcfRef ? vcfRef : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != vcfAlt ? vcfAlt: ""); + sb.append("\t"); + sb.append(null != vcfGenotype ? vcfGenotype: ""); + sb.append("\t"); + sb.append(null != pileup ? pileup: ""); + sb.append("\t"); + sb.append(null != positionMatch ? positionMatch: ""); + sb.append("\t"); + sb.append(null != genotypeMatch ? genotypeMatch: ""); + sb.append("\n"); + + return sb.toString(); + } + public float getIllGCScore() { + return illGCScore; + } + public void setIllGCScore(float illGCScore) { + this.illGCScore = illGCScore; + } + public char getIllAllele1() { + return illAllele1; + } + public void setIllAllele1(char illAllele1) { + this.illAllele1 = illAllele1; + } + public char getIllAllele2() { + return illAllele2; + } + public void setIllAllele2(char illAllele2) { + this.illAllele2 = illAllele2; + } + public boolean isIllTypeHom() { + return illTypeHom; + } + public void setIllTypeHom(boolean illTypeHom) { + this.illTypeHom = illTypeHom; + } + public char getDbSnpStrand() { + return dbSnpStrand; + } + public void setDbSnpStrand(char dbSnpStrand) { + this.dbSnpStrand = dbSnpStrand; + } + public String getDbSnpRef_Alt() { + return dbSnpRef_Alt; + } + public void setDbSnpRef_Alt(String dbSnpRefAlt) { + dbSnpRef_Alt = dbSnpRefAlt; + } + public void setPileup(String pileup) { + this.pileup = pileup; + } + public String getPileup(String pileup) { + return pileup; + } + public String getPositionMatch() { + return positionMatch; + } + public void setPositionMatch(String positionMatch) { + this.positionMatch = positionMatch; + } + public String getGenotypeMatch() { + return genotypeMatch; + } + public void setGenotypeMatch(String genotypeMatch) { + this.genotypeMatch = genotypeMatch; + } + +} diff --git a/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- b/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- new file mode 100644 index 000000000..3a1e039aa --- /dev/null +++ b/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- @@ -0,0 +1,78 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.util; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.List; + +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.util.FileUtils; + +public class IGVBatchFileGenerator { + + public static final String GENOME = "GRCh37_ICGC_standard_v2"; + + + public static void generate(final List positions, final String outputFile) throws IOException { + // check that list is not empty + if (positions == null || positions.isEmpty()) + throw new IllegalArgumentException("Null or empty list passed to IGVBatchFileGenerator"); + + // can we write to the outputFile? + File output = new File(outputFile); + if( ! FileUtils.canFileBeWrittenTo(output)) + throw new IllegalArgumentException("Can't write to output file: " + outputFile); + + FileWriter writer = new FileWriter(output); + + try { + writer.write(getHeaderInfo(output)); + + for (ChrPosition position : positions) { + writer.write(getLocationString(position)); + } + + } finally { + writer.close(); + } + + } + + private static String getHeaderInfo(File output) { + String path = output.getParent(); + return "snapshotDirectory " + path + "\n" + + "genome " + GENOME + "\n"; + } + + private static String getLocationString(ChrPosition chrPos) { + return "goto " + chrPos.toIGVString() + + "\nsort base\n" + + "collapse\n" + + "snapshot " + chrPos.getChromosome() + ":" + chrPos.getStartPosition() + ".png\n"; + } + + + +// snapshotDirectory C:/IGV_sessions/exonorama/APGI_1992 +// genome GRCh37_ICGC_standard_v2 +// goto chr8:93156526-93156566 +// sort base +// collapse +// snapshot APGI_1992_SNP_35325-chr8-93156546-var-CtoT-WITHIN_NON_CODING_GENE-ENSG00000233778.png +// goto chr12:114377865-114377905 +// sort base +// collapse +// snapshot APGI_1992_SNP_50905-chr12-114377885-var-GtoC-SYNONYMOUS_CODING-RBM19.png +// goto chr1:228481880-228481920 +// sort base +// collapse +// snapshot APGI_1992_SNP_6964-chr1-228481900-var-GtoA-NON_SYNONYMOUS_CODING-OBSCN.png + + +} diff --git a/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java b/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java index 0003812a3..60389d85b 100644 --- a/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java +++ b/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java @@ -12,8 +12,8 @@ import org.qcmg.common.model.ChrPosition; import org.qcmg.common.string.StringUtils; import org.qcmg.common.util.FileUtils; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; public class TabbedDataLoader { diff --git a/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- b/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- new file mode 100644 index 000000000..60389d85b --- /dev/null +++ b/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- @@ -0,0 +1,61 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.util; + +import java.io.File; +import java.util.Map; +import java.util.regex.Pattern; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.string.StringUtils; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class TabbedDataLoader { + + public static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + private static final QLogger logger = QLoggerFactory.getLogger(TabbedDataLoader.class); + + + public static void loadTabbedData(String tabbedDataFile, int position, Map collection) throws Exception { + if (FileUtils.canFileBeRead(tabbedDataFile)) { + + TabbedFileReader reader = new TabbedFileReader(new File(tabbedDataFile)); + try { + for (TabbedRecord tr : reader) { + String [] params = tabbedPattern.split(tr.getData()); + String chrPosition = getStringFromArray(params, position); + + if (null != chrPosition) { + ChrPosition chrPos = StringUtils.getChrPositionFromString(chrPosition); + if (null != chrPos) collection.put(chrPos,tr); + } + } + + logger.info("Added " + collection.size() + " entries to the tabbed data collection"); + + } finally { + reader.close(); + } + } else { + throw new IllegalArgumentException("data file: " + tabbedDataFile + " could not be read"); + } + } + + public static String getStringFromArray(String[] params, int index) { + String result = null; + if (null != params && params.length > 0) { + if (index >= 0) { + result = params[(index > params.length ? params.length : index)]; + } else if (params.length + index >= 0 & params.length + index < params.length){ + result = params[params.length + index]; // adding a negative number! + } + } + return result; + } + +} diff --git a/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- b/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- new file mode 100644 index 000000000..03a4e2f03 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- @@ -0,0 +1,269 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.vcf; + +import java.io.File; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.atomic.AtomicLong; + +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.GenotypeEnum; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.VcfUtils; +import org.qcmg.picard.QJumper; +import org.qcmg.picard.util.SAMUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.vcf.VCFFileReader; + +public class CompareVCFs { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + private static QLogger logger; + + private final ConcurrentMap normalVCFMap = new ConcurrentHashMap(12500); //not expecting more than 100000 + private final ConcurrentMap tumourVCFMap = new ConcurrentHashMap(12500); + private final ConcurrentMap uniqueTumourVCFMap = new ConcurrentHashMap(40000); + + public int engage() throws Exception { + + logger.info("loading normal vcf data"); + loadVCFData(cmdLineInputFiles[0], normalVCFMap); + logger.info("loading normal vcf data - DONE [" + normalVCFMap.size() + "]"); + + logger.info("loading tumour vcf data"); + loadVCFData(cmdLineInputFiles[1], tumourVCFMap); + logger.info("loading tumour vcf data - DONE [" + tumourVCFMap.size() + "]"); + + examine(); + + addPileupFromNormalBam(); + + return exitStatus; + } + + private void addPileupFromNormalBam() throws Exception { + // loop through each position in the unique map and get the entries in the normal GATK cleaned BAM file. + int notEnoughCoverage = 0, mutationFoundInNormal = 0; + StringBuilder sb = new StringBuilder(); + QJumper qj = new QJumper(); + qj.setupReader(cmdLineInputFiles[2]); + + for (Entry entry : uniqueTumourVCFMap.entrySet()) { + int position = entry.getKey().getStartPosition(); + boolean foundInNormal = false; + List sams = qj.getOverlappingRecordsAtPosition(entry.getKey().getChromosome(), position, position); + + for (SAMRecord sam : sams) { + int offset = SAMUtils.getIndexInReadFromPosition(sam, position); + if (offset > -1 && offset < sam.getReadLength()) { + char c = sam.getReadString().charAt(offset); + if (c == entry.getValue().getAlt().charAt(0)) { + foundInNormal = true; + mutationFoundInNormal++; + break; + } + } + } + + if ( ! foundInNormal && sams.size() < 8) + notEnoughCoverage++; + else if ( ! foundInNormal) + sb.append(entry.getKey().getChromosome() + ":" + position + "\n"); + } + + logger.info("total positions examined: " + uniqueTumourVCFMap.size()); + logger.info("positions where mutation was also found in normal (class C): " + mutationFoundInNormal); + logger.info("positions where coverage in normal was less than 8 (class B): " + notEnoughCoverage); + logger.info("Potential class A positions: "); + logger.info(sb.toString()); + } + + private void examine() { + + final Map diffGenotypes = new HashMap(); + + // we want to know the following... + // number unique to normal + // number unique to tumour + // no of common positions + int normalUnique = 0, tumourUnique = 0, normalAndTumour = 0; + + // for the common positions... + // no that have the same mutation + // no that have a different mutation + // no of those that have the same genotype + + int sameMutation = 0, sameMutationSameGenotype = 0; + int diffMutation = 0, diffMutationSameGenotype = 0; + + // here we go + + for (Entry entry : normalVCFMap.entrySet()) { + + VcfRecord normalVCF = entry.getValue(); + VcfRecord tumourVCF = tumourVCFMap.get(entry.getKey()); + + if (null == tumourVCF) { + normalUnique++; + } else { + ++normalAndTumour; + + // sanity check - compare ref - if not the same - oh dear... + assert normalVCF.getRef().equals(tumourVCF.getRef()); + + // compare mutations + char normalMut = normalVCF.getAlt().charAt(0); + char tumourMut = tumourVCF.getAlt().charAt(0); + + // need to get the genotype from the VCFRecord + + GenotypeEnum normalGenotype = VcfUtils.calculateGenotypeEnum( + normalVCF.getInfo().substring(0, 3), normalVCF.getRefChar(), normalVCF.getAlt().charAt(0)); + GenotypeEnum tumourGenotype = VcfUtils.calculateGenotypeEnum( + tumourVCF.getInfo().substring(0, 3), tumourVCF.getRefChar(), tumourVCF.getAlt().charAt(0)); + + if (normalMut == tumourMut) { + sameMutation++; + if (normalGenotype == tumourGenotype) + ++sameMutationSameGenotype; + else { + RefAndMultiGenotype ramg = new RefAndMultiGenotype(normalVCF.getRefChar(), normalGenotype, tumourGenotype); + AtomicLong al = diffGenotypes.get(ramg); + if (null == al) { + al = new AtomicLong(); + diffGenotypes.put(ramg, al); + } + al.incrementAndGet(); + } + } else { + diffMutation++; + if (normalGenotype == tumourGenotype) + ++diffMutationSameGenotype; + } + } + } + + for (ChrPosition position : tumourVCFMap.keySet()) { + if (null == normalVCFMap.get(position)) { + tumourUnique++; + uniqueTumourVCFMap.put(position, tumourVCFMap.get(position)); + } + } + + // now print out some stats + StringBuilder sb = new StringBuilder("\nSTATS\n"); + sb.append("No of positions in normal map: " + normalVCFMap.size()); + sb.append("\nNo of unique positions in normal map: " + normalUnique); + sb.append("\nNo of positions in tumour map: " + tumourVCFMap.size()); + sb.append("\nNo of unique positions in tumour map: " + tumourUnique); + sb.append("\nNo of shared positions: " + normalAndTumour); + sb.append("\n"); + sb.append("\nNo of positions with same mutation: " + sameMutation); + sb.append("\nNo of positions with same mutation and same genotype: " + sameMutationSameGenotype); + + sb.append("\npositions with same mutation and diff genotype: "); + + for (Entry entry : diffGenotypes.entrySet()) { + sb.append("\n" + entry.getKey().toString() + " count: " + entry.getValue().get()); + } + sb.append("\nNo of positions with diff mutation: " + diffMutation); + sb.append("\nNo of positions with diff mutation and same genotype: " + diffMutationSameGenotype); + + logger.info(sb.toString()); + + + } + + private void loadVCFData(String vcfFile, Map map) throws Exception { + if (FileUtils.canFileBeRead(vcfFile)) { + + VCFFileReader reader = new VCFFileReader(new File(vcfFile)); + try { + for (VcfRecord qpr : reader) { + map.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); + } + } finally { + reader.close(); + } + } + } + + + public static void main(String[] args) throws Exception { + CompareVCFs sp = new CompareVCFs(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(CompareVCFs.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CompareVCFs", CompareVCFs.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java b/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java index 3877a0bbd..29bb7c4c1 100644 --- a/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java +++ b/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java @@ -11,10 +11,10 @@ import org.qcmg.qmule.Messages; import org.qcmg.qmule.Options; import org.qcmg.qmule.QMuleException; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedFileWriter; -import org.qcmg.tab.TabbedHeader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedFileWriter; +import org.qcmg.qmule.tab.TabbedHeader; +import org.qcmg.qmule.tab.TabbedRecord; public class ConvertVcfChr { diff --git a/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- b/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- new file mode 100644 index 000000000..29bb7c4c1 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- @@ -0,0 +1,116 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.vcf; + +import java.io.File; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedFileWriter; +import org.qcmg.qmule.tab.TabbedHeader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class ConvertVcfChr { + + private static final String CHR = "chr"; + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + private static QLogger logger; + + + private int engage() throws Exception { + + // load + if (FileUtils.canFileBeRead(cmdLineInputFiles[0])) { + TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); + TabbedHeader header = reader.getHeader(); + + TabbedFileWriter writer = new TabbedFileWriter(new File(cmdLineOutputFiles[0])); + writer.addHeader(header); + + try { + for (TabbedRecord tabRec : reader) { + if ( ! tabRec.getData().startsWith(CHR)) { + tabRec.setData(CHR + tabRec.getData()); + } + writer.add(tabRec); + } + } finally { + try { + writer.close(); + } finally { + reader.close(); + } + } + } + return exitStatus; + } + + public static void main(String[] args) throws Exception { + ConvertVcfChr sp = new ConvertVcfChr(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(ConvertVcfChr.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CompareVCFs", ConvertVcfChr.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- b/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- new file mode 100644 index 000000000..b0aad1b7f --- /dev/null +++ b/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- @@ -0,0 +1,101 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.vcf; + +import org.qcmg.common.model.GenotypeEnum; +import org.qcmg.common.model.Classification; + +public class RefAndMultiGenotype { + + private final char ref; + private final GenotypeEnum normal; + private final GenotypeEnum tumour; + + public RefAndMultiGenotype(char ref, GenotypeEnum normal, GenotypeEnum tumour) { + this.ref = ref; + this.normal = normal; + this.tumour = tumour; + } + + @Override + public String toString() { + return ref + " : " + normal.getDisplayString() + " : " + tumour.getDisplayString() + " : " + getClassification(); + } + + public String getClassification() { + if (normal == tumour) { + return Classification.GERMLINE.name(); + + } else if (normal.isHomozygous() && tumour.isHomozygous()) { + // not equal but both are homozygous + return Classification.SOMATIC.name(); + } else if (normal.isHeterozygous() && tumour.isHeterozygous()) { + // not equal but both are heterozygous + return Classification.SOMATIC.name(); + } + + /////////////////////////////////////////////////////// + // normal is HOM and tumour is HET + /////////////////////////////////////////////////////// + if (normal.isHomozygous() && tumour.isHeterozygous()) { + + GenotypeEnum refAndNormalGenotype = GenotypeEnum.getGenotypeEnum(ref, normal.getFirstAllele()); + + if (tumour == refAndNormalGenotype) { + return Classification.GERMLINE.name(); +// mutation = normal.getFirstAllele() + MUT_DELIM + record.getRef(); + } else { + return Classification.SOMATIC.name(); + } + } + + /////////////////////////////////////////////////////// + // normal is HET and tumour is HOM + ////////////////////////////////////////////////////// + else if (normal.isHeterozygous() && tumour.isHomozygous()){ + + if (normal.containsAllele(tumour.getFirstAllele())) { + return Classification.GERMLINE.name(); + } else { + return Classification.SOMATIC.name(); + } + } + return null; + } + + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((normal == null) ? 0 : normal.hashCode()); + result = prime * result + ref; + result = prime * result + ((tumour == null) ? 0 : tumour.hashCode()); + return result; + } + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + RefAndMultiGenotype other = (RefAndMultiGenotype) obj; + if (normal == null) { + if (other.normal != null) + return false; + } else if (!normal.equals(other.normal)) + return false; + if (ref != other.ref) + return false; + if (tumour == null) { + if (other.tumour != null) + return false; + } else if (!tumour.equals(other.tumour)) + return false; + return true; + } + +} From 1a2a21e288a41208c06aa0c8d7c0db040f31349e Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 18:23:56 +1000 Subject: [PATCH 13/73] remove io class from qmule which as moved wrong location --- qmule/src/org/qcmg/qmule/AlignerCompare.java | 272 ------ .../src/org/qcmg/qmule/AlignerCompare.java-- | 272 ------ .../qcmg/qmule/AnnotateDCCWithGFFRegions.java | 710 ---------------- .../qmule/AnnotateDCCWithGFFRegions.java-- | 710 ---------------- qmule/src/org/qcmg/qmule/BAM2CS.java | 183 ---- qmule/src/org/qcmg/qmule/BAM2CS.java-- | 183 ---- qmule/src/org/qcmg/qmule/BAMCompress.java | 156 ---- qmule/src/org/qcmg/qmule/BAMCompress.java-- | 156 ---- .../src/org/qcmg/qmule/BAMHeaderChecker.java | 250 ------ .../org/qcmg/qmule/BAMHeaderChecker.java-- | 250 ------ qmule/src/org/qcmg/qmule/BAMPileupUtil.java | 124 --- qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- | 124 --- .../src/org/qcmg/qmule/BamMismatchCounts.java | 160 ---- .../org/qcmg/qmule/BamMismatchCounts.java-- | 160 ---- .../src/org/qcmg/qmule/BamRecordCounter.java | 44 - .../org/qcmg/qmule/BamRecordCounter.java-- | 44 - .../qcmg/qmule/CompareReferenceRegions.java | 676 --------------- .../qcmg/qmule/CompareReferenceRegions.java-- | 676 --------------- .../src/org/qcmg/qmule/DbSnpChrLiftover.java | 86 -- .../org/qcmg/qmule/DbSnpChrLiftover.java-- | 86 -- .../org/qcmg/qmule/GermlineDBStripper.java | 47 - .../org/qcmg/qmule/GermlineDBStripper.java-- | 47 - qmule/src/org/qcmg/qmule/GetBamRecords.java | 226 ----- qmule/src/org/qcmg/qmule/GetBamRecords.java-- | 226 ----- qmule/src/org/qcmg/qmule/GetInsetSize.java | 35 - qmule/src/org/qcmg/qmule/GetInsetSize.java-- | 35 - qmule/src/org/qcmg/qmule/IndelDCCHeader.java | 395 --------- .../src/org/qcmg/qmule/IndelDCCHeader.java-- | 395 --------- qmule/src/org/qcmg/qmule/MAF2DCC1.java | 418 --------- qmule/src/org/qcmg/qmule/MAF2DCC1.java-- | 418 --------- qmule/src/org/qcmg/qmule/Main.java | 100 --- qmule/src/org/qcmg/qmule/Main.java-- | 100 --- qmule/src/org/qcmg/qmule/Messages.java | 132 --- qmule/src/org/qcmg/qmule/Messages.java-- | 132 --- qmule/src/org/qcmg/qmule/Options.java | 512 ----------- qmule/src/org/qcmg/qmule/Options.java-- | 512 ----------- qmule/src/org/qcmg/qmule/Pileup.java | 101 --- qmule/src/org/qcmg/qmule/Pileup.java-- | 101 --- qmule/src/org/qcmg/qmule/PileupStats.java | 254 ------ qmule/src/org/qcmg/qmule/PileupStats.java-- | 254 ------ qmule/src/org/qcmg/qmule/QMuleException.java | 28 - .../src/org/qcmg/qmule/QMuleException.java-- | 28 - qmule/src/org/qcmg/qmule/QueryCADDLib.java | 187 ---- qmule/src/org/qcmg/qmule/QueryCADDLib.java-- | 187 ---- .../qcmg/qmule/ReAnnotateDccWithDbSNP.java | 280 ------ .../qcmg/qmule/ReAnnotateDccWithDbSNP.java-- | 280 ------ qmule/src/org/qcmg/qmule/ReadPartGZFile.java | 152 ---- .../src/org/qcmg/qmule/ReadPartGZFile.java-- | 152 ---- qmule/src/org/qcmg/qmule/ReadsAppend.java | 95 --- qmule/src/org/qcmg/qmule/ReadsAppend.java-- | 95 --- qmule/src/org/qcmg/qmule/RunGatk.java | 141 --- qmule/src/org/qcmg/qmule/RunGatk.java-- | 141 --- .../org/qcmg/qmule/SmithWatermanGotoh.java | 368 -------- .../org/qcmg/qmule/SmithWatermanGotoh.java-- | 368 -------- .../qmule/SnpToReferenceRegionFilter.java | 647 -------------- .../qmule/SnpToReferenceRegionFilter.java-- | 647 -------------- qmule/src/org/qcmg/qmule/SubSample.java | 165 ---- qmule/src/org/qcmg/qmule/SubSample.java-- | 165 ---- qmule/src/org/qcmg/qmule/TestFileFinder.java | 23 - .../src/org/qcmg/qmule/TestFileFinder.java-- | 23 - qmule/src/org/qcmg/qmule/TestJarUpdate.java | 191 ----- qmule/src/org/qcmg/qmule/TestJarUpdate.java-- | 191 ----- qmule/src/org/qcmg/qmule/TestSort.java | 109 --- qmule/src/org/qcmg/qmule/TestSort.java-- | 109 --- .../src/org/qcmg/qmule/TranscriptomeMule.java | 192 ----- .../org/qcmg/qmule/TranscriptomeMule.java-- | 192 ----- .../src/org/qcmg/qmule/WiggleFromPileup.java | 302 ------- .../org/qcmg/qmule/WiggleFromPileup.java-- | 302 ------- .../qcmg/qmule/WiggleFromPileupTakeTwo.java | 307 ------- .../qcmg/qmule/WiggleFromPileupTakeTwo.java-- | 307 ------- qmule/src/org/qcmg/qmule/XCvsZP.java | 117 --- qmule/src/org/qcmg/qmule/XCvsZP.java-- | 117 --- qmule/src/org/qcmg/qmule/bam/CheckBam.java | 339 -------- qmule/src/org/qcmg/qmule/bam/CheckBam.java-- | 339 -------- .../qcmg/qmule/bam/GetContigsFromHeader.java | 127 --- .../qmule/bam/GetContigsFromHeader.java-- | 127 --- qmule/src/org/qcmg/qmule/messages.properties | 107 --- qmule/src/org/qcmg/qmule/qcnv/CNVseq.java | 226 ----- qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- | 226 ----- qmule/src/org/qcmg/qmule/qcnv/Main.java | 57 -- qmule/src/org/qcmg/qmule/qcnv/Main.java-- | 57 -- qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java | 152 ---- qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- | 152 ---- qmule/src/org/qcmg/qmule/qcnv/Options.java | 169 ---- qmule/src/org/qcmg/qmule/qcnv/Options.java-- | 169 ---- qmule/src/org/qcmg/qmule/queryChrMT.java | 68 -- qmule/src/org/qcmg/qmule/queryChrMT.java-- | 68 -- .../org/qcmg/qmule/snppicker/CompareSnps.java | 205 ----- .../qcmg/qmule/snppicker/CompareSnps.java-- | 205 ----- .../qmule/snppicker/ExamineVerifiedSnps.java | 237 ------ .../snppicker/ExamineVerifiedSnps.java-- | 237 ------ .../qcmg/qmule/snppicker/GatkUniqueSnps.java | 488 ----------- .../qmule/snppicker/GatkUniqueSnps.java-- | 488 ----------- qmule/src/org/qcmg/qmule/snppicker/Mule.java | 85 -- .../src/org/qcmg/qmule/snppicker/Mule.java-- | 85 -- .../org/qcmg/qmule/snppicker/SnpPicker.java | 802 ------------------ .../org/qcmg/qmule/snppicker/SnpPicker.java-- | 802 ------------------ .../org/qcmg/qmule/snppicker/UniqueQSnps.java | 200 ----- .../qcmg/qmule/snppicker/UniqueQSnps.java-- | 200 ----- .../org/qcmg/qmule/snppicker/UniqueSnps.java | 263 ------ .../qcmg/qmule/snppicker/UniqueSnps.java-- | 263 ------ .../qcmg/qmule/snppicker/VariantRecord.java | 193 ----- .../qcmg/qmule/snppicker/VariantRecord.java-- | 193 ----- .../qmule/util/IGVBatchFileGenerator.java | 78 -- .../qmule/util/IGVBatchFileGenerator.java-- | 78 -- .../org/qcmg/qmule/util/TabbedDataLoader.java | 61 -- .../qcmg/qmule/util/TabbedDataLoader.java-- | 61 -- qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java | 269 ------ .../src/org/qcmg/qmule/vcf/CompareVCFs.java-- | 269 ------ .../src/org/qcmg/qmule/vcf/ConvertVcfChr.java | 116 --- .../org/qcmg/qmule/vcf/ConvertVcfChr.java-- | 116 --- .../qcmg/qmule/vcf/RefAndMultiGenotype.java | 101 --- .../qcmg/qmule/vcf/RefAndMultiGenotype.java-- | 101 --- 113 files changed, 24949 deletions(-) delete mode 100644 qmule/src/org/qcmg/qmule/AlignerCompare.java delete mode 100644 qmule/src/org/qcmg/qmule/AlignerCompare.java-- delete mode 100644 qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java delete mode 100644 qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- delete mode 100644 qmule/src/org/qcmg/qmule/BAM2CS.java delete mode 100644 qmule/src/org/qcmg/qmule/BAM2CS.java-- delete mode 100644 qmule/src/org/qcmg/qmule/BAMCompress.java delete mode 100644 qmule/src/org/qcmg/qmule/BAMCompress.java-- delete mode 100644 qmule/src/org/qcmg/qmule/BAMHeaderChecker.java delete mode 100644 qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- delete mode 100644 qmule/src/org/qcmg/qmule/BAMPileupUtil.java delete mode 100644 qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- delete mode 100644 qmule/src/org/qcmg/qmule/BamMismatchCounts.java delete mode 100644 qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- delete mode 100644 qmule/src/org/qcmg/qmule/BamRecordCounter.java delete mode 100644 qmule/src/org/qcmg/qmule/BamRecordCounter.java-- delete mode 100644 qmule/src/org/qcmg/qmule/CompareReferenceRegions.java delete mode 100644 qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- delete mode 100644 qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java delete mode 100644 qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- delete mode 100644 qmule/src/org/qcmg/qmule/GermlineDBStripper.java delete mode 100644 qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- delete mode 100644 qmule/src/org/qcmg/qmule/GetBamRecords.java delete mode 100644 qmule/src/org/qcmg/qmule/GetBamRecords.java-- delete mode 100644 qmule/src/org/qcmg/qmule/GetInsetSize.java delete mode 100644 qmule/src/org/qcmg/qmule/GetInsetSize.java-- delete mode 100644 qmule/src/org/qcmg/qmule/IndelDCCHeader.java delete mode 100644 qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- delete mode 100644 qmule/src/org/qcmg/qmule/MAF2DCC1.java delete mode 100644 qmule/src/org/qcmg/qmule/MAF2DCC1.java-- delete mode 100644 qmule/src/org/qcmg/qmule/Main.java delete mode 100644 qmule/src/org/qcmg/qmule/Main.java-- delete mode 100644 qmule/src/org/qcmg/qmule/Messages.java delete mode 100644 qmule/src/org/qcmg/qmule/Messages.java-- delete mode 100644 qmule/src/org/qcmg/qmule/Options.java delete mode 100644 qmule/src/org/qcmg/qmule/Options.java-- delete mode 100644 qmule/src/org/qcmg/qmule/Pileup.java delete mode 100644 qmule/src/org/qcmg/qmule/Pileup.java-- delete mode 100644 qmule/src/org/qcmg/qmule/PileupStats.java delete mode 100644 qmule/src/org/qcmg/qmule/PileupStats.java-- delete mode 100644 qmule/src/org/qcmg/qmule/QMuleException.java delete mode 100644 qmule/src/org/qcmg/qmule/QMuleException.java-- delete mode 100644 qmule/src/org/qcmg/qmule/QueryCADDLib.java delete mode 100644 qmule/src/org/qcmg/qmule/QueryCADDLib.java-- delete mode 100644 qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java delete mode 100644 qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- delete mode 100644 qmule/src/org/qcmg/qmule/ReadPartGZFile.java delete mode 100644 qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- delete mode 100644 qmule/src/org/qcmg/qmule/ReadsAppend.java delete mode 100644 qmule/src/org/qcmg/qmule/ReadsAppend.java-- delete mode 100644 qmule/src/org/qcmg/qmule/RunGatk.java delete mode 100644 qmule/src/org/qcmg/qmule/RunGatk.java-- delete mode 100644 qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java delete mode 100644 qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java-- delete mode 100644 qmule/src/org/qcmg/qmule/SnpToReferenceRegionFilter.java delete mode 100644 qmule/src/org/qcmg/qmule/SnpToReferenceRegionFilter.java-- delete mode 100644 qmule/src/org/qcmg/qmule/SubSample.java delete mode 100644 qmule/src/org/qcmg/qmule/SubSample.java-- delete mode 100644 qmule/src/org/qcmg/qmule/TestFileFinder.java delete mode 100644 qmule/src/org/qcmg/qmule/TestFileFinder.java-- delete mode 100644 qmule/src/org/qcmg/qmule/TestJarUpdate.java delete mode 100644 qmule/src/org/qcmg/qmule/TestJarUpdate.java-- delete mode 100644 qmule/src/org/qcmg/qmule/TestSort.java delete mode 100644 qmule/src/org/qcmg/qmule/TestSort.java-- delete mode 100644 qmule/src/org/qcmg/qmule/TranscriptomeMule.java delete mode 100644 qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- delete mode 100644 qmule/src/org/qcmg/qmule/WiggleFromPileup.java delete mode 100644 qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- delete mode 100644 qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java delete mode 100644 qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- delete mode 100644 qmule/src/org/qcmg/qmule/XCvsZP.java delete mode 100644 qmule/src/org/qcmg/qmule/XCvsZP.java-- delete mode 100644 qmule/src/org/qcmg/qmule/bam/CheckBam.java delete mode 100644 qmule/src/org/qcmg/qmule/bam/CheckBam.java-- delete mode 100644 qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java delete mode 100644 qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- delete mode 100644 qmule/src/org/qcmg/qmule/messages.properties delete mode 100644 qmule/src/org/qcmg/qmule/qcnv/CNVseq.java delete mode 100644 qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- delete mode 100644 qmule/src/org/qcmg/qmule/qcnv/Main.java delete mode 100644 qmule/src/org/qcmg/qmule/qcnv/Main.java-- delete mode 100644 qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java delete mode 100644 qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- delete mode 100644 qmule/src/org/qcmg/qmule/qcnv/Options.java delete mode 100644 qmule/src/org/qcmg/qmule/qcnv/Options.java-- delete mode 100644 qmule/src/org/qcmg/qmule/queryChrMT.java delete mode 100644 qmule/src/org/qcmg/qmule/queryChrMT.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/Mule.java delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/Mule.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- delete mode 100644 qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java delete mode 100644 qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- delete mode 100644 qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java delete mode 100644 qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- delete mode 100644 qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java delete mode 100644 qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- delete mode 100644 qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java delete mode 100644 qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- delete mode 100644 qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java delete mode 100644 qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- diff --git a/qmule/src/org/qcmg/qmule/AlignerCompare.java b/qmule/src/org/qcmg/qmule/AlignerCompare.java deleted file mode 100644 index 5c8538a93..000000000 --- a/qmule/src/org/qcmg/qmule/AlignerCompare.java +++ /dev/null @@ -1,272 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.ArrayList; -import java.util.Objects; - -import htsjdk.samtools.SAMFileHeader.SortOrder; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordIterator; -import htsjdk.samtools.ValidationStringency; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.SAMOrBAMWriterFactory; - -public class AlignerCompare { - static QLogger logger = QLoggerFactory.getLogger(AlignerCompare.class); - boolean discardNonPrimary; - SamReader firReader; - SamReader secReader; - - SAMOrBAMWriterFactory sameWriter; - SAMOrBAMWriterFactory diffWriter_first; - SAMOrBAMWriterFactory diffWriter_second; - - SAMOrBAMWriterFactory unsureWriter_first; - SAMOrBAMWriterFactory unsureWriter_second; - - - long total_bam1 = 0; - long total_bam2 = 0; - long total_same = 0; - long noDiff_bam1 = 0; - long noDiff_bam2 = 0; - long noSecondary_bam1 = 0; - long nosupplementary_bam1 = 0; - long noSecondary_bam2 = 0; - long nosupplementary_bam2 = 0; - long nounsureAlignment = 0; - - - AlignerCompare(File firBam, File secBam, String prefix, boolean flag) throws Exception{ - //check inputs: sort by query name - firReader = SAMFileReaderFactory.createSAMFileReader(firBam, ValidationStringency.SILENT); - secReader = SAMFileReaderFactory.createSAMFileReader(secBam, ValidationStringency.SILENT); - discardNonPrimary = flag; - - if(! firReader.getFileHeader().getSortOrder().equals(SortOrder.queryname)) - throw new Exception("Please sort the input BAM by queryname: " + firBam.getAbsolutePath()); - - if(! secReader.getFileHeader().getSortOrder().equals(SortOrder.queryname)) - throw new Exception("Please sort the input BAM by queryname: " + secBam.getAbsolutePath()); - - - logger.info("input BAM1: " + firBam.getAbsolutePath()); - logger.info("input BAM2: " + secBam.getAbsolutePath()); - logger.info("discard secondary or supplementary alignments: " + String.valueOf(discardNonPrimary)); - - //create outputs - File outsame = new File(prefix + ".identical.bam" ); - File outdiff_first = new File(prefix + ".different.first.bam" ); - File outdiff_second = new File(prefix + ".different.second.bam" ); - - if(! firBam.getName().equals(secBam.getName())){ - outdiff_first = new File( prefix + ".different." + firBam.getName() ); - outdiff_second = new File( prefix + ".different." + secBam.getName() ); - } - - sameWriter = new SAMOrBAMWriterFactory(firReader.getFileHeader(), true, outsame); - diffWriter_first = new SAMOrBAMWriterFactory(firReader.getFileHeader(), true, outdiff_first ); - diffWriter_second = new SAMOrBAMWriterFactory(secReader.getFileHeader(), true, outdiff_second ); - - logger.info("output of identical alignments: " + outsame.getAbsolutePath()); - logger.info("output of different alignments from BAM1: " + outdiff_first.getAbsolutePath()); - logger.info("output of different alignments from BAM2: " + outdiff_second.getAbsolutePath()); - - //execute comparison - compareExecutor(); - - - //close IOs - firReader.close(); - secReader.close(); - sameWriter.closeWriter(); - diffWriter_first.closeWriter(); - diffWriter_second.closeWriter(); - - } - - void compareExecutor() throws Exception{ - ArrayList from1 = new ArrayList (); - ArrayList from2 = new ArrayList (); - SAMRecordIterator it1 = firReader.iterator(); - SAMRecordIterator it2 = secReader.iterator(); - //stats - long noRead = 0; - long noAlign1 = 1; - long noAlign2 = 1; - long noSame = 0; - - //initialize - SAMRecord record1 = it1.next(); - SAMRecord record2 = it2.next(); - String Id = record1.getReadName(); - from1.add(record1); - from2.add(record2); - - //get all aligner from same read - while( it1.hasNext() || it2.hasNext()){ - while(it1.hasNext()){ - noAlign1 ++; - record1 = it1.next() ; - if(record1.getReadName().equals(Id)){ - from1.add(record1); - }else //if not equals(Id) - break; - } //end while - - while( it2.hasNext() ){ - noAlign2 ++; - record2 = it2.next(); - if(record2.getReadName().equals(Id)){ - from2.add(record2); - }else - break; //exit while, record2 is read for next loop - } - //compare alignment in arraylist which filtered out secondary or supplenmentary alignments - noSame += classifyReads( AlignerFilter(from1, unsureWriter_first) , AlignerFilter(from2, unsureWriter_second) ); - - //clear arraylist and store current reads into arraylist for next loop - noRead ++; - from1.clear(); - from2.clear(); - from1.add(record1); - from2.add(record2); - Id = record1.getReadName(); - } - - logger.info(String.format("There are %d reads with %d alignments from BAM1", noRead, noAlign1)); - logger.info(String.format("There are %d reads with %d alignments from BAM2", noRead, noAlign2)); - logger.info(String.format("There are %d alignments are identical from both BAM", noSame)); - logger.info(String.format("Different alignments from BAM1 are %d, from BAM2 are %d", noDiff_bam1, noDiff_bam2)); - logger.info( String.format("discard %d secondary alignments and %d supplementary alignments from BAM1",noSecondary_bam1,nosupplementary_bam1)); - logger.info(String.format("discard %d secondary alignments and %d supplementary alignments from BAM2",noSecondary_bam2,nosupplementary_bam2)); - - - } - - /** - * - * @param from: an input alignments with same read id - * @return ArrayList : cleaned alignments excluding secondary and supplementary alignments - */ - ArrayList AlignerFilter(ArrayList from, SAMOrBAMWriterFactory factory) throws Exception{ - ArrayList cleaned = new ArrayList(); - - for(SAMRecord record : from) - if( discardNonPrimary && record.isSecondaryOrSupplementary()){ - if( record.getNotPrimaryAlignmentFlag()) - noSecondary_bam1 ++; - else if( record.getSupplementaryAlignmentFlag()) - nosupplementary_bam1 ++; - else - throw new Exception(record.getReadName() + " record flag error: record.isSecondaryOrSupplementary but not (secondary or supplementary) : " + record.getFlags()); - }else - cleaned.add(record); - -/* //record these multi alignments for further investigation - if(cleaned.size() != 2){ - for(SAMRecord record : cleaned){ - factory.getWriter().addAlignment(record); - nounsureAlignment ++; - - } - } -*/ - return cleaned; - } - - - int classifyReads(ArrayList from1, ArrayList from2) throws Exception{ - ArrayList toremove1 = new ArrayList(); - ArrayList toremove2 = new ArrayList(); - - for(SAMRecord record1 : from1){ - for(SAMRecord record2: from2){ - if(!record1.getReadName().equals(record2.getReadName())) - throw new Exception("error during process: reads with different name are store in arrayList for comparison: " - + record1.getReadName() + " != " + record2.getReadName() ) ; - if (record1.getFlags() == record2.getFlags() && - record1.getReferenceName().equals(record2.getReferenceName()) && - record1.getAlignmentStart() == record2.getAlignmentStart() && - record1.getAlignmentEnd() == record2.getAlignmentEnd() && - record1.getMappingQuality() == record2.getMappingQuality() && - record1.getCigarString().equals(record2.getCigarString()) && - Objects.equals(record1.getAttribute("MD") , record2.getAttribute("MD"))){ - sameWriter.getWriter().addAlignment(record1); - toremove1.add(record1); - toremove2.add(record2); - } - } - } - - //record the left differnt aligner - from1.removeAll(toremove1); - for(SAMRecord record1 : from1) - diffWriter_first.getWriter().addAlignment(record1); - - from2.removeAll(toremove2); - for(SAMRecord record2: from2) - diffWriter_second.getWriter().addAlignment(record2); - - //count unique alignment number - noDiff_bam1 += from1.size(); - noDiff_bam2 += from2.size(); - - return toremove1.size(); - } - - public static void main(String[] args) throws Exception{ - - Options op = new Options(AlignerCompare.class, args); - if(op.hasHelpOption()){ - System.out.println(Messages.getMessage("USAGE_AlignerCompare")); - op.displayHelp(); - System.exit(0); - } - - if( op.getInputFileNames().length != 2 - || op.getOutputFileNames().length != 1 ){ - System.err.println("improper parameters passed to command line, please refer to"); - System.out.println(Messages.getMessage("USAGE_AlignerCompare")); - op.displayHelp(); - System.exit(1); - } - - File f1 = new File(op.getInputFileNames()[0]); - File f2 = new File(op.getInputFileNames()[1]); - if(! f1.exists() || ! f2.exists()) - throw new Exception("input not exists: " + args[0] + " or " + args[1]); - - //assign to true if no "compareAll" option - boolean flag = ! op.hasCompareAllOption(); - - if(op.hasLogOption()) - logger = QLoggerFactory.getLogger(AlignerCompare.class,op.getLogFile(), op.getLogLevel()); - else - logger = QLoggerFactory.getLogger(AlignerCompare.class,op.getOutputFileNames()[0] + ".log", op.getLogLevel()); - - String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - logger.logInitialExecutionStats( "qmule " + AlignerCompare.class.getName(), version,args); - - long startTime = System.currentTimeMillis(); - AlignerCompare compare = new AlignerCompare( f1, f2, op.getOutputFileNames()[0], flag ); - - logger.info( String.format("It took %d hours, %d minutes to perform the comparison", - (int) (System.currentTimeMillis() - startTime) / (1000*60*60), - (int) ( (System.currentTimeMillis() - startTime) / (1000*60) ) % 60) ); - logger.logFinalExecutionStats(0); - - } - - -} diff --git a/qmule/src/org/qcmg/qmule/AlignerCompare.java-- b/qmule/src/org/qcmg/qmule/AlignerCompare.java-- deleted file mode 100644 index 5c8538a93..000000000 --- a/qmule/src/org/qcmg/qmule/AlignerCompare.java-- +++ /dev/null @@ -1,272 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.ArrayList; -import java.util.Objects; - -import htsjdk.samtools.SAMFileHeader.SortOrder; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordIterator; -import htsjdk.samtools.ValidationStringency; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.SAMOrBAMWriterFactory; - -public class AlignerCompare { - static QLogger logger = QLoggerFactory.getLogger(AlignerCompare.class); - boolean discardNonPrimary; - SamReader firReader; - SamReader secReader; - - SAMOrBAMWriterFactory sameWriter; - SAMOrBAMWriterFactory diffWriter_first; - SAMOrBAMWriterFactory diffWriter_second; - - SAMOrBAMWriterFactory unsureWriter_first; - SAMOrBAMWriterFactory unsureWriter_second; - - - long total_bam1 = 0; - long total_bam2 = 0; - long total_same = 0; - long noDiff_bam1 = 0; - long noDiff_bam2 = 0; - long noSecondary_bam1 = 0; - long nosupplementary_bam1 = 0; - long noSecondary_bam2 = 0; - long nosupplementary_bam2 = 0; - long nounsureAlignment = 0; - - - AlignerCompare(File firBam, File secBam, String prefix, boolean flag) throws Exception{ - //check inputs: sort by query name - firReader = SAMFileReaderFactory.createSAMFileReader(firBam, ValidationStringency.SILENT); - secReader = SAMFileReaderFactory.createSAMFileReader(secBam, ValidationStringency.SILENT); - discardNonPrimary = flag; - - if(! firReader.getFileHeader().getSortOrder().equals(SortOrder.queryname)) - throw new Exception("Please sort the input BAM by queryname: " + firBam.getAbsolutePath()); - - if(! secReader.getFileHeader().getSortOrder().equals(SortOrder.queryname)) - throw new Exception("Please sort the input BAM by queryname: " + secBam.getAbsolutePath()); - - - logger.info("input BAM1: " + firBam.getAbsolutePath()); - logger.info("input BAM2: " + secBam.getAbsolutePath()); - logger.info("discard secondary or supplementary alignments: " + String.valueOf(discardNonPrimary)); - - //create outputs - File outsame = new File(prefix + ".identical.bam" ); - File outdiff_first = new File(prefix + ".different.first.bam" ); - File outdiff_second = new File(prefix + ".different.second.bam" ); - - if(! firBam.getName().equals(secBam.getName())){ - outdiff_first = new File( prefix + ".different." + firBam.getName() ); - outdiff_second = new File( prefix + ".different." + secBam.getName() ); - } - - sameWriter = new SAMOrBAMWriterFactory(firReader.getFileHeader(), true, outsame); - diffWriter_first = new SAMOrBAMWriterFactory(firReader.getFileHeader(), true, outdiff_first ); - diffWriter_second = new SAMOrBAMWriterFactory(secReader.getFileHeader(), true, outdiff_second ); - - logger.info("output of identical alignments: " + outsame.getAbsolutePath()); - logger.info("output of different alignments from BAM1: " + outdiff_first.getAbsolutePath()); - logger.info("output of different alignments from BAM2: " + outdiff_second.getAbsolutePath()); - - //execute comparison - compareExecutor(); - - - //close IOs - firReader.close(); - secReader.close(); - sameWriter.closeWriter(); - diffWriter_first.closeWriter(); - diffWriter_second.closeWriter(); - - } - - void compareExecutor() throws Exception{ - ArrayList from1 = new ArrayList (); - ArrayList from2 = new ArrayList (); - SAMRecordIterator it1 = firReader.iterator(); - SAMRecordIterator it2 = secReader.iterator(); - //stats - long noRead = 0; - long noAlign1 = 1; - long noAlign2 = 1; - long noSame = 0; - - //initialize - SAMRecord record1 = it1.next(); - SAMRecord record2 = it2.next(); - String Id = record1.getReadName(); - from1.add(record1); - from2.add(record2); - - //get all aligner from same read - while( it1.hasNext() || it2.hasNext()){ - while(it1.hasNext()){ - noAlign1 ++; - record1 = it1.next() ; - if(record1.getReadName().equals(Id)){ - from1.add(record1); - }else //if not equals(Id) - break; - } //end while - - while( it2.hasNext() ){ - noAlign2 ++; - record2 = it2.next(); - if(record2.getReadName().equals(Id)){ - from2.add(record2); - }else - break; //exit while, record2 is read for next loop - } - //compare alignment in arraylist which filtered out secondary or supplenmentary alignments - noSame += classifyReads( AlignerFilter(from1, unsureWriter_first) , AlignerFilter(from2, unsureWriter_second) ); - - //clear arraylist and store current reads into arraylist for next loop - noRead ++; - from1.clear(); - from2.clear(); - from1.add(record1); - from2.add(record2); - Id = record1.getReadName(); - } - - logger.info(String.format("There are %d reads with %d alignments from BAM1", noRead, noAlign1)); - logger.info(String.format("There are %d reads with %d alignments from BAM2", noRead, noAlign2)); - logger.info(String.format("There are %d alignments are identical from both BAM", noSame)); - logger.info(String.format("Different alignments from BAM1 are %d, from BAM2 are %d", noDiff_bam1, noDiff_bam2)); - logger.info( String.format("discard %d secondary alignments and %d supplementary alignments from BAM1",noSecondary_bam1,nosupplementary_bam1)); - logger.info(String.format("discard %d secondary alignments and %d supplementary alignments from BAM2",noSecondary_bam2,nosupplementary_bam2)); - - - } - - /** - * - * @param from: an input alignments with same read id - * @return ArrayList : cleaned alignments excluding secondary and supplementary alignments - */ - ArrayList AlignerFilter(ArrayList from, SAMOrBAMWriterFactory factory) throws Exception{ - ArrayList cleaned = new ArrayList(); - - for(SAMRecord record : from) - if( discardNonPrimary && record.isSecondaryOrSupplementary()){ - if( record.getNotPrimaryAlignmentFlag()) - noSecondary_bam1 ++; - else if( record.getSupplementaryAlignmentFlag()) - nosupplementary_bam1 ++; - else - throw new Exception(record.getReadName() + " record flag error: record.isSecondaryOrSupplementary but not (secondary or supplementary) : " + record.getFlags()); - }else - cleaned.add(record); - -/* //record these multi alignments for further investigation - if(cleaned.size() != 2){ - for(SAMRecord record : cleaned){ - factory.getWriter().addAlignment(record); - nounsureAlignment ++; - - } - } -*/ - return cleaned; - } - - - int classifyReads(ArrayList from1, ArrayList from2) throws Exception{ - ArrayList toremove1 = new ArrayList(); - ArrayList toremove2 = new ArrayList(); - - for(SAMRecord record1 : from1){ - for(SAMRecord record2: from2){ - if(!record1.getReadName().equals(record2.getReadName())) - throw new Exception("error during process: reads with different name are store in arrayList for comparison: " - + record1.getReadName() + " != " + record2.getReadName() ) ; - if (record1.getFlags() == record2.getFlags() && - record1.getReferenceName().equals(record2.getReferenceName()) && - record1.getAlignmentStart() == record2.getAlignmentStart() && - record1.getAlignmentEnd() == record2.getAlignmentEnd() && - record1.getMappingQuality() == record2.getMappingQuality() && - record1.getCigarString().equals(record2.getCigarString()) && - Objects.equals(record1.getAttribute("MD") , record2.getAttribute("MD"))){ - sameWriter.getWriter().addAlignment(record1); - toremove1.add(record1); - toremove2.add(record2); - } - } - } - - //record the left differnt aligner - from1.removeAll(toremove1); - for(SAMRecord record1 : from1) - diffWriter_first.getWriter().addAlignment(record1); - - from2.removeAll(toremove2); - for(SAMRecord record2: from2) - diffWriter_second.getWriter().addAlignment(record2); - - //count unique alignment number - noDiff_bam1 += from1.size(); - noDiff_bam2 += from2.size(); - - return toremove1.size(); - } - - public static void main(String[] args) throws Exception{ - - Options op = new Options(AlignerCompare.class, args); - if(op.hasHelpOption()){ - System.out.println(Messages.getMessage("USAGE_AlignerCompare")); - op.displayHelp(); - System.exit(0); - } - - if( op.getInputFileNames().length != 2 - || op.getOutputFileNames().length != 1 ){ - System.err.println("improper parameters passed to command line, please refer to"); - System.out.println(Messages.getMessage("USAGE_AlignerCompare")); - op.displayHelp(); - System.exit(1); - } - - File f1 = new File(op.getInputFileNames()[0]); - File f2 = new File(op.getInputFileNames()[1]); - if(! f1.exists() || ! f2.exists()) - throw new Exception("input not exists: " + args[0] + " or " + args[1]); - - //assign to true if no "compareAll" option - boolean flag = ! op.hasCompareAllOption(); - - if(op.hasLogOption()) - logger = QLoggerFactory.getLogger(AlignerCompare.class,op.getLogFile(), op.getLogLevel()); - else - logger = QLoggerFactory.getLogger(AlignerCompare.class,op.getOutputFileNames()[0] + ".log", op.getLogLevel()); - - String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - logger.logInitialExecutionStats( "qmule " + AlignerCompare.class.getName(), version,args); - - long startTime = System.currentTimeMillis(); - AlignerCompare compare = new AlignerCompare( f1, f2, op.getOutputFileNames()[0], flag ); - - logger.info( String.format("It took %d hours, %d minutes to perform the comparison", - (int) (System.currentTimeMillis() - startTime) / (1000*60*60), - (int) ( (System.currentTimeMillis() - startTime) / (1000*60) ) % 60) ); - logger.logFinalExecutionStats(0); - - } - - -} diff --git a/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java b/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java deleted file mode 100644 index ee7a1eb00..000000000 --- a/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java +++ /dev/null @@ -1,710 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.TreeMap; -import java.util.Vector; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrPositionName; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; - - -public class AnnotateDCCWithGFFRegions { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private List chromosomes = new ArrayList(); - private final int exitStatus = 0; - private Map> inputRecords = new HashMap>(); - private final Map> compareRecords = new HashMap>(); - private int overlapCount = 0; - private int notOverlappingCount = 0; - private int recordCount; - private Vector inputFileHeader = new Vector(); - private String inputFileType; - private String compareFileType; - private static QLogger logger; - private static final String MAF = "maf"; - private static final String GFF3 = "gff3"; - private static final String BED = "bed"; - private static final String VCF = "vcf"; - private static final String TAB = "txt"; - private static final String DCC1 = "dcc1"; - private static final String DCCQ = "dccq"; - private BufferedWriter outputFileWriter; - private File outputFile; - private String[] features; - private boolean stranded; - private final int GFF_STRAND_INDEX = 6; - private int DCC_STRAND_INDEX = -1; - private int QCMGFLAG_COLUMN_INDEX = -1; - private int REFERENCE_ALLELE_INDEX = -1; - private int TUMOUR_ALLELE_INDEX = -1; - private String annotation; - private int MUTATION_TYPE_INDEX; - //private static final int PATIENT_MIN = 5; - - public int engage() throws Exception { - - loadGFFFile(cmdLineInputFiles[1], compareRecords); - if (compareRecords.isEmpty()) { - logger.info("No positions loaded from gff file"); - } - - logger.info("Starting to process DCC records."); - - outputFile = new File(cmdLineOutputFiles[0]); - - outputFileWriter = new BufferedWriter(new FileWriter(outputFile)); - - inputFileType = null; - inputFileType = getFileType(cmdLineInputFiles[0]); - recordCount = loadDCCFile(cmdLineInputFiles[0], inputFileHeader, inputFileType); - logger.info("Finished processing DCC records."); - outputFileWriter.close(); - logger.info("SUMMARY"); - logger.info("Total DCC Records: " + recordCount); - logger.info("Total Records in supplied reference regions: " + overlapCount); - logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); - return exitStatus; - } - - private String getFileType(String fileName) throws QMuleException { - int index = fileName.lastIndexOf(".") + 1; - String name = fileName.substring(index, fileName.length()); - - if (name.equals("dcc")) { - return "dcc1"; - } - - if (!name.equals(DCC1) && !name.equals(DCCQ)) { - throw new QMuleException("FILE_TYPE_ERROR"); - } - - return name; - } - - private int loadGFFFile(String file, Map> records) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(file)); - int recordCount = 0; - try { - - Iterator iterator = reader.getRecordIterator(); - - while (iterator.hasNext()) { - - TabbedRecord tab = iterator.next(); - - if (tab.getData().startsWith("#")) { - continue; - } - recordCount++; - ChrPosition chrPos = getChrPosition(GFF3, tab, Integer.toString(recordCount)); - String key = chrPos.getChromosome().replace("chr", ""); - if (records.containsKey(key)) { - records.get(key).put(chrPos, tab); - } else { - TreeMap map = new TreeMap(); - map.put(chrPos, tab); - records.put(key,map); - } - if (!chromosomes.contains(key)) { - chromosomes.add(key); - } - } - } finally { - reader.close(); - } - - logger.info("loaded gff file, total records: " + recordCount); - return recordCount; - } - - private int loadDCCFile(String file, Vector header, String fileType) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(file)); - - int recordCount = 0; - try { - - Iterator iterator = reader.getRecordIterator(); - - if (reader.getHeader() != null) { - Iterator iter = reader.getHeader().iterator(); - while (iter.hasNext()) { - header.add(iter.next()); - } - } - while (iterator.hasNext()) { - - TabbedRecord inputRecord = iterator.next(); - if (inputRecord.getData().startsWith("#") || inputRecord.getData().startsWith("Hugo") || inputRecord.getData().startsWith("analysis") || - inputRecord.getData().startsWith("mutation")) { - header.add(inputRecord.getData()); - continue; - } - - if (header.size() > 0) { - parseDCCHeader(header, fileType); - logger.info("Column of DCC file to annotate: " + QCMGFLAG_COLUMN_INDEX); - writeHeader(fileType, header); - header.clear(); - } - - recordCount++; - ChrPosition chrPos = getChrPosition(fileType, inputRecord, null); - String key = chrPos.getChromosome().replace("chr", ""); - TreeMap compareMap = compareRecords.get(key); - boolean isOverlapping = false; - if (compareMap != null) { - //check to see if it is overlapping with the comparison reference region - for (Entry compareEntry : compareMap.entrySet()) { - ChrPosition comparePos = compareEntry.getKey(); - if (comparePos.getEndPosition() < chrPos.getStartPosition()) { - continue; - } else if (comparePos.getStartPosition() > chrPos.getEndPosition()) { - break; - } else { - String[] vals = inputRecord.getDataArray(); - - if (annotation != null) { - String oldInfo = vals[QCMGFLAG_COLUMN_INDEX]; - if (!oldInfo.contains("GERM") && tabbedRecordMatchesCompareRecord(chrPos, inputRecord, compareEntry)) { - if (annotation != null && !oldInfo.contains("GERM")) { - if (annotateWithGermline(vals, compareEntry.getValue().getDataArray())) { - isOverlapping = true; - if (!oldInfo.equals("") && !oldInfo.endsWith(";")) { - oldInfo += ";"; - } - oldInfo += annotation; - inputRecord = buildOutputString(inputRecord, vals, oldInfo); - } - } - } - } else { - if (tabbedRecordFallsInCompareRecord(chrPos, inputRecord, compareEntry)) { - isOverlapping = true; - String oldInfo = vals[QCMGFLAG_COLUMN_INDEX]; - //annotate with gff feature - String feature = getFeatures(compareEntry.getValue()); - if (!oldInfo.equals("") && !oldInfo.endsWith(";") && !feature.equals("")) { - oldInfo += ";"; - } - oldInfo += feature; - inputRecord = buildOutputString(inputRecord, vals, oldInfo); - } - } - - } - } - } - - if (isOverlapping) { - overlapCount++; - } else { - notOverlappingCount++; - } - - writeRecord(inputRecord); - - if (recordCount % 50000 == 0) { - logger.info("Processed records: " + recordCount); - } - } - } finally { - reader.close(); - } - return recordCount; - } - - private TabbedRecord buildOutputString(TabbedRecord inputRecord, String[] vals, - String oldInfo) { - vals[QCMGFLAG_COLUMN_INDEX] = oldInfo; - String data= ""; - for (String s: vals) { - data += s + "\t"; - } - inputRecord.setData(data); - return inputRecord; - } - - private boolean annotateWithGermline(String[] inputValues, String[] gffValues) throws QMuleException { - String[] attribs = gffValues[getFeatureIndex("attribs")].split(";"); - String gffMotif = getGFF3Motif(attribs); - //int patientCount = getPatientCount(attribs); - if (gffMotif == null) { - String position = gffValues[0] + ":" + gffValues[3] + "-" + gffValues[4]; - throw new QMuleException("NULL_GFF_MOTIF", position); - } - String dccMotif = getDCCMotif(inputValues); - if ((dccMotif == null || gffMotif.equals(dccMotif))) { - return true; - } - - return false; - } - - private int getPatientCount(String[] attribs) { - for (String s: attribs) { - if (s.startsWith("PatientCount")) { - return new Integer(s.split("=")[1]); - } - } - return 0; - } - - private String getGFF3Motif(String[] attribs) { - - String referenceAllele = null; - String tumourAllele = null; - for (String s: attribs) { - if (s.startsWith("ReferenceAllele")) { - referenceAllele = s.split("=")[1]; - } - if (s.startsWith("TumourAllele")) { - tumourAllele = s.split("=")[1]; - } - } - - if (referenceAllele.contains("-") && !tumourAllele.contains("-")) { - return tumourAllele; - } - if (!referenceAllele.contains("-") && tumourAllele.contains("-")) { - return referenceAllele; - } - return null; - } - - private String getDCCMotif(String[] inputValues) { - String mutationType = inputValues[MUTATION_TYPE_INDEX]; - String refAllele = inputValues[REFERENCE_ALLELE_INDEX]; - String tumourAllele = inputValues[TUMOUR_ALLELE_INDEX]; - - if (mutationType.equals("2")) { - return tumourAllele; - } else if (mutationType.equals("3")) { - return refAllele; - } - return null; - } - - public void parseDCCHeader(List headers, String inputFileType) throws QMuleException { - - for (String header: headers) { - String[] values = header.split("\t"); - if (values.length == 28 && inputFileType.equals(DCC1) - || values.length == 39 && inputFileType.equals(DCCQ)) { - //check dcc header - for (int i=0; i compareEntry) { - if (compareEntry != null) { - ChrPosition compareChrPos = compareEntry.getKey(); - if ((inputChrPos.getStartPosition() == compareChrPos.getStartPosition() - && inputChrPos.getEndPosition() == compareChrPos.getEndPosition())) { - //check strand if this option is provided - if (stranded) { - String inputStrand = inputRecord.getDataArray()[DCC_STRAND_INDEX]; - String compareStrand = compareEntry.getValue().getDataArray()[GFF_STRAND_INDEX]; - if (inputStrand.equals(compareStrand)) { - return true; - } - } else { - return true; - } - } - } - return false; - } - - private boolean tabbedRecordFallsInCompareRecord(ChrPosition inputChrPos, TabbedRecord inputRecord, Entry entry) { - if (entry != null) { - ChrPosition compareChrPos = entry.getKey(); - if ((inputChrPos.getStartPosition() >= compareChrPos.getStartPosition() && inputChrPos.getStartPosition() <= compareChrPos.getEndPosition()) || - (inputChrPos.getEndPosition() >= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() <= compareChrPos.getEndPosition()) - || (inputChrPos.getStartPosition() <= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() >= compareChrPos.getEndPosition())) { - //check strand if this option is provided - if (stranded) { - String inputStrand = inputRecord.getDataArray()[DCC_STRAND_INDEX]; - String compareStrand = entry.getValue().getDataArray()[GFF_STRAND_INDEX]; - if (inputStrand.equals(compareStrand)) { - return true; - } - } else { - return true; - } - } - } - return false; - } - - public String[] getCmdLineInputFiles() { - return cmdLineInputFiles; - } - - public void setCmdLineInputFiles(String[] cmdLineInputFiles) { - this.cmdLineInputFiles = cmdLineInputFiles; - } - - - private void writeHeader(String file, Vector header) throws IOException { - - for (String h: header) { - outputFileWriter.write(h + "\n"); - } - } - - public List getChromosomes() { - return chromosomes; - } - - public void setChromosomes(List chromosomes) { - this.chromosomes = chromosomes; - } - - - public int getOverlapCount() { - return overlapCount; - } - - public void setOverlapCount(int overlapCount) { - this.overlapCount = overlapCount; - } - - public int getNotOverlappingCount() { - return notOverlappingCount; - } - - public void setNotOverlappingCount(int notOverlappingCount) { - this.notOverlappingCount = notOverlappingCount; - } - - public int getMafCount() { - return recordCount; - } - - public void setMafCount(int mafCount) { - this.recordCount = mafCount; - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(AnnotateDCCWithGFFRegions.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("AnnotateDCCWithGFFRegions", AnnotateDCCWithGFFRegions.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 2) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - cmdLineOutputFiles = options.getOutputFileNames(); - if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); - } - - for (String file : cmdLineOutputFiles) { - if (new File(file).exists() && !new File(file).isDirectory()) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); - } - } - features = options.getFeature(); - annotation = options.getAnnotation(); - if (features == null && annotation == null) { - logger.info("Features to annotate: " + "feature"); - } else if (features != null){ - String featureString = new String(); - for (String f : features) { - featureString += f; - } - logger.info("Features to annotate: " + featureString); - } - logger.info("Annotation is : " + annotation); - stranded = options.hasStrandedOption(); - if (options.getColumn() != null) { - this.QCMGFLAG_COLUMN_INDEX = new Integer(options.getColumn()) - 1; - } - - - - logger.info("Require matching strand: " + stranded); - logger.info("DCC file: " + cmdLineInputFiles[0]); - logger.info("GFF file: " + cmdLineInputFiles[1]); - - } - - return returnStatus; - } - - public static void main(String[] args) throws Exception { - AnnotateDCCWithGFFRegions sp = new AnnotateDCCWithGFFRegions(); - LoadReferencedClasses.loadClasses(AnnotateDCCWithGFFRegions.class); - sp.setup(args); - int exitStatus = sp.engage(); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - public String[] getCmdLineOutputFiles() { - return cmdLineOutputFiles; - } - - public void setCmdLineOutputFiles(String[] cmdLineOutputFiles) { - this.cmdLineOutputFiles = cmdLineOutputFiles; - } - - public Map> getInputRecords() { - return inputRecords; - } - - public void setInputRecords( - Map> inputRecords) { - this.inputRecords = inputRecords; - } - - public Vector getInputFileHeader() { - return inputFileHeader; - } - - public void setInputFileHeader(Vector inputFileHeader) { - this.inputFileHeader = inputFileHeader; - } - - public File getOutputFile() { - return outputFile; - } - - public int getREFERENCE_ALLELE_INDEX() { - return REFERENCE_ALLELE_INDEX; - } - - public void setREFERENCE_ALLELE_INDEX(int rEFERENCE_ALLELE_INDEX) { - REFERENCE_ALLELE_INDEX = rEFERENCE_ALLELE_INDEX; - } - - public int getTUMOUR_ALLELE_INDEX() { - return TUMOUR_ALLELE_INDEX; - } - - public void setTUMOUR_ALLELE_INDEX(int tUMOUR_ALLELE_INDEX) { - TUMOUR_ALLELE_INDEX = tUMOUR_ALLELE_INDEX; - } - - public int getMUTATION_TYPE_INDEX() { - return MUTATION_TYPE_INDEX; - } - - public void setMUTATION_TYPE_INDEX(int mUTATION_TYPE_INDEX) { - MUTATION_TYPE_INDEX = mUTATION_TYPE_INDEX; - } - - public void setOutputFile(File outputFile) { - this.outputFile = outputFile; - } - - public String getAnnotation() { - return this.annotation; - } - -} diff --git a/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- b/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- deleted file mode 100644 index ee7a1eb00..000000000 --- a/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- +++ /dev/null @@ -1,710 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.TreeMap; -import java.util.Vector; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrPositionName; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; - - -public class AnnotateDCCWithGFFRegions { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private List chromosomes = new ArrayList(); - private final int exitStatus = 0; - private Map> inputRecords = new HashMap>(); - private final Map> compareRecords = new HashMap>(); - private int overlapCount = 0; - private int notOverlappingCount = 0; - private int recordCount; - private Vector inputFileHeader = new Vector(); - private String inputFileType; - private String compareFileType; - private static QLogger logger; - private static final String MAF = "maf"; - private static final String GFF3 = "gff3"; - private static final String BED = "bed"; - private static final String VCF = "vcf"; - private static final String TAB = "txt"; - private static final String DCC1 = "dcc1"; - private static final String DCCQ = "dccq"; - private BufferedWriter outputFileWriter; - private File outputFile; - private String[] features; - private boolean stranded; - private final int GFF_STRAND_INDEX = 6; - private int DCC_STRAND_INDEX = -1; - private int QCMGFLAG_COLUMN_INDEX = -1; - private int REFERENCE_ALLELE_INDEX = -1; - private int TUMOUR_ALLELE_INDEX = -1; - private String annotation; - private int MUTATION_TYPE_INDEX; - //private static final int PATIENT_MIN = 5; - - public int engage() throws Exception { - - loadGFFFile(cmdLineInputFiles[1], compareRecords); - if (compareRecords.isEmpty()) { - logger.info("No positions loaded from gff file"); - } - - logger.info("Starting to process DCC records."); - - outputFile = new File(cmdLineOutputFiles[0]); - - outputFileWriter = new BufferedWriter(new FileWriter(outputFile)); - - inputFileType = null; - inputFileType = getFileType(cmdLineInputFiles[0]); - recordCount = loadDCCFile(cmdLineInputFiles[0], inputFileHeader, inputFileType); - logger.info("Finished processing DCC records."); - outputFileWriter.close(); - logger.info("SUMMARY"); - logger.info("Total DCC Records: " + recordCount); - logger.info("Total Records in supplied reference regions: " + overlapCount); - logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); - return exitStatus; - } - - private String getFileType(String fileName) throws QMuleException { - int index = fileName.lastIndexOf(".") + 1; - String name = fileName.substring(index, fileName.length()); - - if (name.equals("dcc")) { - return "dcc1"; - } - - if (!name.equals(DCC1) && !name.equals(DCCQ)) { - throw new QMuleException("FILE_TYPE_ERROR"); - } - - return name; - } - - private int loadGFFFile(String file, Map> records) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(file)); - int recordCount = 0; - try { - - Iterator iterator = reader.getRecordIterator(); - - while (iterator.hasNext()) { - - TabbedRecord tab = iterator.next(); - - if (tab.getData().startsWith("#")) { - continue; - } - recordCount++; - ChrPosition chrPos = getChrPosition(GFF3, tab, Integer.toString(recordCount)); - String key = chrPos.getChromosome().replace("chr", ""); - if (records.containsKey(key)) { - records.get(key).put(chrPos, tab); - } else { - TreeMap map = new TreeMap(); - map.put(chrPos, tab); - records.put(key,map); - } - if (!chromosomes.contains(key)) { - chromosomes.add(key); - } - } - } finally { - reader.close(); - } - - logger.info("loaded gff file, total records: " + recordCount); - return recordCount; - } - - private int loadDCCFile(String file, Vector header, String fileType) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(file)); - - int recordCount = 0; - try { - - Iterator iterator = reader.getRecordIterator(); - - if (reader.getHeader() != null) { - Iterator iter = reader.getHeader().iterator(); - while (iter.hasNext()) { - header.add(iter.next()); - } - } - while (iterator.hasNext()) { - - TabbedRecord inputRecord = iterator.next(); - if (inputRecord.getData().startsWith("#") || inputRecord.getData().startsWith("Hugo") || inputRecord.getData().startsWith("analysis") || - inputRecord.getData().startsWith("mutation")) { - header.add(inputRecord.getData()); - continue; - } - - if (header.size() > 0) { - parseDCCHeader(header, fileType); - logger.info("Column of DCC file to annotate: " + QCMGFLAG_COLUMN_INDEX); - writeHeader(fileType, header); - header.clear(); - } - - recordCount++; - ChrPosition chrPos = getChrPosition(fileType, inputRecord, null); - String key = chrPos.getChromosome().replace("chr", ""); - TreeMap compareMap = compareRecords.get(key); - boolean isOverlapping = false; - if (compareMap != null) { - //check to see if it is overlapping with the comparison reference region - for (Entry compareEntry : compareMap.entrySet()) { - ChrPosition comparePos = compareEntry.getKey(); - if (comparePos.getEndPosition() < chrPos.getStartPosition()) { - continue; - } else if (comparePos.getStartPosition() > chrPos.getEndPosition()) { - break; - } else { - String[] vals = inputRecord.getDataArray(); - - if (annotation != null) { - String oldInfo = vals[QCMGFLAG_COLUMN_INDEX]; - if (!oldInfo.contains("GERM") && tabbedRecordMatchesCompareRecord(chrPos, inputRecord, compareEntry)) { - if (annotation != null && !oldInfo.contains("GERM")) { - if (annotateWithGermline(vals, compareEntry.getValue().getDataArray())) { - isOverlapping = true; - if (!oldInfo.equals("") && !oldInfo.endsWith(";")) { - oldInfo += ";"; - } - oldInfo += annotation; - inputRecord = buildOutputString(inputRecord, vals, oldInfo); - } - } - } - } else { - if (tabbedRecordFallsInCompareRecord(chrPos, inputRecord, compareEntry)) { - isOverlapping = true; - String oldInfo = vals[QCMGFLAG_COLUMN_INDEX]; - //annotate with gff feature - String feature = getFeatures(compareEntry.getValue()); - if (!oldInfo.equals("") && !oldInfo.endsWith(";") && !feature.equals("")) { - oldInfo += ";"; - } - oldInfo += feature; - inputRecord = buildOutputString(inputRecord, vals, oldInfo); - } - } - - } - } - } - - if (isOverlapping) { - overlapCount++; - } else { - notOverlappingCount++; - } - - writeRecord(inputRecord); - - if (recordCount % 50000 == 0) { - logger.info("Processed records: " + recordCount); - } - } - } finally { - reader.close(); - } - return recordCount; - } - - private TabbedRecord buildOutputString(TabbedRecord inputRecord, String[] vals, - String oldInfo) { - vals[QCMGFLAG_COLUMN_INDEX] = oldInfo; - String data= ""; - for (String s: vals) { - data += s + "\t"; - } - inputRecord.setData(data); - return inputRecord; - } - - private boolean annotateWithGermline(String[] inputValues, String[] gffValues) throws QMuleException { - String[] attribs = gffValues[getFeatureIndex("attribs")].split(";"); - String gffMotif = getGFF3Motif(attribs); - //int patientCount = getPatientCount(attribs); - if (gffMotif == null) { - String position = gffValues[0] + ":" + gffValues[3] + "-" + gffValues[4]; - throw new QMuleException("NULL_GFF_MOTIF", position); - } - String dccMotif = getDCCMotif(inputValues); - if ((dccMotif == null || gffMotif.equals(dccMotif))) { - return true; - } - - return false; - } - - private int getPatientCount(String[] attribs) { - for (String s: attribs) { - if (s.startsWith("PatientCount")) { - return new Integer(s.split("=")[1]); - } - } - return 0; - } - - private String getGFF3Motif(String[] attribs) { - - String referenceAllele = null; - String tumourAllele = null; - for (String s: attribs) { - if (s.startsWith("ReferenceAllele")) { - referenceAllele = s.split("=")[1]; - } - if (s.startsWith("TumourAllele")) { - tumourAllele = s.split("=")[1]; - } - } - - if (referenceAllele.contains("-") && !tumourAllele.contains("-")) { - return tumourAllele; - } - if (!referenceAllele.contains("-") && tumourAllele.contains("-")) { - return referenceAllele; - } - return null; - } - - private String getDCCMotif(String[] inputValues) { - String mutationType = inputValues[MUTATION_TYPE_INDEX]; - String refAllele = inputValues[REFERENCE_ALLELE_INDEX]; - String tumourAllele = inputValues[TUMOUR_ALLELE_INDEX]; - - if (mutationType.equals("2")) { - return tumourAllele; - } else if (mutationType.equals("3")) { - return refAllele; - } - return null; - } - - public void parseDCCHeader(List headers, String inputFileType) throws QMuleException { - - for (String header: headers) { - String[] values = header.split("\t"); - if (values.length == 28 && inputFileType.equals(DCC1) - || values.length == 39 && inputFileType.equals(DCCQ)) { - //check dcc header - for (int i=0; i compareEntry) { - if (compareEntry != null) { - ChrPosition compareChrPos = compareEntry.getKey(); - if ((inputChrPos.getStartPosition() == compareChrPos.getStartPosition() - && inputChrPos.getEndPosition() == compareChrPos.getEndPosition())) { - //check strand if this option is provided - if (stranded) { - String inputStrand = inputRecord.getDataArray()[DCC_STRAND_INDEX]; - String compareStrand = compareEntry.getValue().getDataArray()[GFF_STRAND_INDEX]; - if (inputStrand.equals(compareStrand)) { - return true; - } - } else { - return true; - } - } - } - return false; - } - - private boolean tabbedRecordFallsInCompareRecord(ChrPosition inputChrPos, TabbedRecord inputRecord, Entry entry) { - if (entry != null) { - ChrPosition compareChrPos = entry.getKey(); - if ((inputChrPos.getStartPosition() >= compareChrPos.getStartPosition() && inputChrPos.getStartPosition() <= compareChrPos.getEndPosition()) || - (inputChrPos.getEndPosition() >= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() <= compareChrPos.getEndPosition()) - || (inputChrPos.getStartPosition() <= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() >= compareChrPos.getEndPosition())) { - //check strand if this option is provided - if (stranded) { - String inputStrand = inputRecord.getDataArray()[DCC_STRAND_INDEX]; - String compareStrand = entry.getValue().getDataArray()[GFF_STRAND_INDEX]; - if (inputStrand.equals(compareStrand)) { - return true; - } - } else { - return true; - } - } - } - return false; - } - - public String[] getCmdLineInputFiles() { - return cmdLineInputFiles; - } - - public void setCmdLineInputFiles(String[] cmdLineInputFiles) { - this.cmdLineInputFiles = cmdLineInputFiles; - } - - - private void writeHeader(String file, Vector header) throws IOException { - - for (String h: header) { - outputFileWriter.write(h + "\n"); - } - } - - public List getChromosomes() { - return chromosomes; - } - - public void setChromosomes(List chromosomes) { - this.chromosomes = chromosomes; - } - - - public int getOverlapCount() { - return overlapCount; - } - - public void setOverlapCount(int overlapCount) { - this.overlapCount = overlapCount; - } - - public int getNotOverlappingCount() { - return notOverlappingCount; - } - - public void setNotOverlappingCount(int notOverlappingCount) { - this.notOverlappingCount = notOverlappingCount; - } - - public int getMafCount() { - return recordCount; - } - - public void setMafCount(int mafCount) { - this.recordCount = mafCount; - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(AnnotateDCCWithGFFRegions.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("AnnotateDCCWithGFFRegions", AnnotateDCCWithGFFRegions.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 2) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - cmdLineOutputFiles = options.getOutputFileNames(); - if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); - } - - for (String file : cmdLineOutputFiles) { - if (new File(file).exists() && !new File(file).isDirectory()) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); - } - } - features = options.getFeature(); - annotation = options.getAnnotation(); - if (features == null && annotation == null) { - logger.info("Features to annotate: " + "feature"); - } else if (features != null){ - String featureString = new String(); - for (String f : features) { - featureString += f; - } - logger.info("Features to annotate: " + featureString); - } - logger.info("Annotation is : " + annotation); - stranded = options.hasStrandedOption(); - if (options.getColumn() != null) { - this.QCMGFLAG_COLUMN_INDEX = new Integer(options.getColumn()) - 1; - } - - - - logger.info("Require matching strand: " + stranded); - logger.info("DCC file: " + cmdLineInputFiles[0]); - logger.info("GFF file: " + cmdLineInputFiles[1]); - - } - - return returnStatus; - } - - public static void main(String[] args) throws Exception { - AnnotateDCCWithGFFRegions sp = new AnnotateDCCWithGFFRegions(); - LoadReferencedClasses.loadClasses(AnnotateDCCWithGFFRegions.class); - sp.setup(args); - int exitStatus = sp.engage(); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - public String[] getCmdLineOutputFiles() { - return cmdLineOutputFiles; - } - - public void setCmdLineOutputFiles(String[] cmdLineOutputFiles) { - this.cmdLineOutputFiles = cmdLineOutputFiles; - } - - public Map> getInputRecords() { - return inputRecords; - } - - public void setInputRecords( - Map> inputRecords) { - this.inputRecords = inputRecords; - } - - public Vector getInputFileHeader() { - return inputFileHeader; - } - - public void setInputFileHeader(Vector inputFileHeader) { - this.inputFileHeader = inputFileHeader; - } - - public File getOutputFile() { - return outputFile; - } - - public int getREFERENCE_ALLELE_INDEX() { - return REFERENCE_ALLELE_INDEX; - } - - public void setREFERENCE_ALLELE_INDEX(int rEFERENCE_ALLELE_INDEX) { - REFERENCE_ALLELE_INDEX = rEFERENCE_ALLELE_INDEX; - } - - public int getTUMOUR_ALLELE_INDEX() { - return TUMOUR_ALLELE_INDEX; - } - - public void setTUMOUR_ALLELE_INDEX(int tUMOUR_ALLELE_INDEX) { - TUMOUR_ALLELE_INDEX = tUMOUR_ALLELE_INDEX; - } - - public int getMUTATION_TYPE_INDEX() { - return MUTATION_TYPE_INDEX; - } - - public void setMUTATION_TYPE_INDEX(int mUTATION_TYPE_INDEX) { - MUTATION_TYPE_INDEX = mUTATION_TYPE_INDEX; - } - - public void setOutputFile(File outputFile) { - this.outputFile = outputFile; - } - - public String getAnnotation() { - return this.annotation; - } - -} diff --git a/qmule/src/org/qcmg/qmule/BAM2CS.java b/qmule/src/org/qcmg/qmule/BAM2CS.java deleted file mode 100644 index 13d4d21f5..000000000 --- a/qmule/src/org/qcmg/qmule/BAM2CS.java +++ /dev/null @@ -1,183 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; - -import java.io.*; -import java.net.InetAddress; -import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.HashMap; -import java.util.Iterator; - -import org.qcmg.common.string.StringUtils; - - -public class BAM2CS { - File inBAM; - File outDir; - HashMap outFast = new HashMap(); - HashMap outQual = new HashMap(); - - - BAM2CS(final String[] args) throws Exception{ - inBAM = new File(args[0]); - outDir = new File(args[1]); - printHeader(null); - } - - /** - * retrive the CS and CQ value from BAM record to output csfasta or qual file - * @throws Exception - */ - void CreateCSfile() throws Exception{ - - SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault(); - SamReader reader = samReaderFactory.open(inBAM); - int num = 0; - for (SAMRecord record : reader) { - String id = ">" + record.getReadName(); - Add2Fasta(id, record.getAttribute("CS").toString()); - add2Qual(id, record.getAttribute("CQ").toString()); - num ++; - } - - reader.close(); - closeWriters(); - - System.out.println(getTime() + " total output records " + num); - System.exit(0); - } - - /** - * Add header information to Writer. If Writer is null, print to STD - * @param Writer - * @throws Exception - */ - private void printHeader(PrintWriter Writer) throws Exception{ - if(Writer == null){ - System.out.println(getTime() + " tool name: qmule org.qcmg.qmule.BAM2CS"); - System.out.println(getTime() + " host: " + InetAddress.getLocalHost().getHostName()); - System.out.println(getTime() + " input: " + inBAM.getAbsolutePath()); - System.out.println(getTime() + " output directory: " + outDir.getAbsolutePath()); - }else{ - Writer.println("#" + getTime() + " tool name: qmule org.qcmg.qmule.BAM2CS"); - Writer.println("#" + getTime() + " host: " + InetAddress.getLocalHost().getHostName()); - Writer.println("#" + getTime() + " input: " + inBAM.getAbsolutePath()); - } - } - - private void closeWriters(){ - //close all csfasta files - Iterator itr = outFast.values().iterator(); - while(itr.hasNext()){ - PrintWriter Writer = itr.next(); - Writer.close(); - } - - //close all qual files - itr = outQual.values().iterator(); - while(itr.hasNext()){ - PrintWriter Writer = itr.next(); - Writer.close(); - } - } - - /** - * Add raw color sequence into output csfasta; If the output file isn't exist, create a new one with header lines - * @param id - * @param seq - * @throws Exception - */ - private void Add2Fasta(String id, String seq) throws Exception{ - //sequence length should -1 since it start with 'T' or 'G' - int len = seq.length() - 1; - PrintWriter Writer; - - //get writer or create an new one - if(outFast.containsKey(len)){ - Writer = outFast.get(len); - }else{ - String fname = inBAM.getName(); - int index = fname.lastIndexOf('.'); - fname = fname.substring(0,index) + "." + len + ".csfasta"; - File csFile = new File(outDir, fname); - Writer = new PrintWriter(new FileWriter(csFile)); - outFast.put(len, Writer); - printHeader(Writer); - System.out.println(getTime() + " creating output: " + csFile.getAbsolutePath() ); - } - - Writer.println(id); - Writer.println(seq); - } - /** - * cover CQ value into raw qual sequence and addto output qual; - * If the output file isn't exist, create a new one with header lines. - * @param id - * @param seq - * @throws Exception - */ - void add2Qual(String id, String seq) throws Exception{ - int len = seq.length(); - PrintWriter writer; - - //get writer or create an new one - if(outQual.containsKey(len)){ - writer = outQual.get(len); - }else{ - String fname = inBAM.getName(); - int index = fname.lastIndexOf('.'); - fname = fname.substring(0,index) + "." + len + ".qual"; - File csFile = new File(outDir, fname); - writer = new PrintWriter(new FileWriter(csFile)); - outQual.put(len, writer); - printHeader(writer); - System.out.println(getTime() + " creating output: " + csFile.getAbsolutePath() ); - } - - //convert ascii to int - String qual = ""; - for(int i = 0; i < len; i ++){ - char c = seq.charAt(i); - int j = c; - - if(StringUtils.isNullOrEmpty(qual)){ - qual += j; - } else { - qual += " " + j; - } - } - - writer.println(id); - writer.println(qual); - - } - - private String getTime(){ - Calendar currentDate = Calendar.getInstance(); - SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); - return "[" + formatter.format(currentDate.getTime()) + "]"; - } - public static void main(final String[] args) throws IOException, InterruptedException { - - try{ - BAM2CS myCS = new BAM2CS(args); - myCS.CreateCSfile(); - System.exit(0); - }catch(Exception e){ - System.err.println(e.toString()); - Thread.sleep(1); - System.out.println("usage: qmule org.qcmg.qmule.BAM2CS "); - System.exit(1); - } - - } -} diff --git a/qmule/src/org/qcmg/qmule/BAM2CS.java-- b/qmule/src/org/qcmg/qmule/BAM2CS.java-- deleted file mode 100644 index 13d4d21f5..000000000 --- a/qmule/src/org/qcmg/qmule/BAM2CS.java-- +++ /dev/null @@ -1,183 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; - -import java.io.*; -import java.net.InetAddress; -import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.HashMap; -import java.util.Iterator; - -import org.qcmg.common.string.StringUtils; - - -public class BAM2CS { - File inBAM; - File outDir; - HashMap outFast = new HashMap(); - HashMap outQual = new HashMap(); - - - BAM2CS(final String[] args) throws Exception{ - inBAM = new File(args[0]); - outDir = new File(args[1]); - printHeader(null); - } - - /** - * retrive the CS and CQ value from BAM record to output csfasta or qual file - * @throws Exception - */ - void CreateCSfile() throws Exception{ - - SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault(); - SamReader reader = samReaderFactory.open(inBAM); - int num = 0; - for (SAMRecord record : reader) { - String id = ">" + record.getReadName(); - Add2Fasta(id, record.getAttribute("CS").toString()); - add2Qual(id, record.getAttribute("CQ").toString()); - num ++; - } - - reader.close(); - closeWriters(); - - System.out.println(getTime() + " total output records " + num); - System.exit(0); - } - - /** - * Add header information to Writer. If Writer is null, print to STD - * @param Writer - * @throws Exception - */ - private void printHeader(PrintWriter Writer) throws Exception{ - if(Writer == null){ - System.out.println(getTime() + " tool name: qmule org.qcmg.qmule.BAM2CS"); - System.out.println(getTime() + " host: " + InetAddress.getLocalHost().getHostName()); - System.out.println(getTime() + " input: " + inBAM.getAbsolutePath()); - System.out.println(getTime() + " output directory: " + outDir.getAbsolutePath()); - }else{ - Writer.println("#" + getTime() + " tool name: qmule org.qcmg.qmule.BAM2CS"); - Writer.println("#" + getTime() + " host: " + InetAddress.getLocalHost().getHostName()); - Writer.println("#" + getTime() + " input: " + inBAM.getAbsolutePath()); - } - } - - private void closeWriters(){ - //close all csfasta files - Iterator itr = outFast.values().iterator(); - while(itr.hasNext()){ - PrintWriter Writer = itr.next(); - Writer.close(); - } - - //close all qual files - itr = outQual.values().iterator(); - while(itr.hasNext()){ - PrintWriter Writer = itr.next(); - Writer.close(); - } - } - - /** - * Add raw color sequence into output csfasta; If the output file isn't exist, create a new one with header lines - * @param id - * @param seq - * @throws Exception - */ - private void Add2Fasta(String id, String seq) throws Exception{ - //sequence length should -1 since it start with 'T' or 'G' - int len = seq.length() - 1; - PrintWriter Writer; - - //get writer or create an new one - if(outFast.containsKey(len)){ - Writer = outFast.get(len); - }else{ - String fname = inBAM.getName(); - int index = fname.lastIndexOf('.'); - fname = fname.substring(0,index) + "." + len + ".csfasta"; - File csFile = new File(outDir, fname); - Writer = new PrintWriter(new FileWriter(csFile)); - outFast.put(len, Writer); - printHeader(Writer); - System.out.println(getTime() + " creating output: " + csFile.getAbsolutePath() ); - } - - Writer.println(id); - Writer.println(seq); - } - /** - * cover CQ value into raw qual sequence and addto output qual; - * If the output file isn't exist, create a new one with header lines. - * @param id - * @param seq - * @throws Exception - */ - void add2Qual(String id, String seq) throws Exception{ - int len = seq.length(); - PrintWriter writer; - - //get writer or create an new one - if(outQual.containsKey(len)){ - writer = outQual.get(len); - }else{ - String fname = inBAM.getName(); - int index = fname.lastIndexOf('.'); - fname = fname.substring(0,index) + "." + len + ".qual"; - File csFile = new File(outDir, fname); - writer = new PrintWriter(new FileWriter(csFile)); - outQual.put(len, writer); - printHeader(writer); - System.out.println(getTime() + " creating output: " + csFile.getAbsolutePath() ); - } - - //convert ascii to int - String qual = ""; - for(int i = 0; i < len; i ++){ - char c = seq.charAt(i); - int j = c; - - if(StringUtils.isNullOrEmpty(qual)){ - qual += j; - } else { - qual += " " + j; - } - } - - writer.println(id); - writer.println(qual); - - } - - private String getTime(){ - Calendar currentDate = Calendar.getInstance(); - SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); - return "[" + formatter.format(currentDate.getTime()) + "]"; - } - public static void main(final String[] args) throws IOException, InterruptedException { - - try{ - BAM2CS myCS = new BAM2CS(args); - myCS.CreateCSfile(); - System.exit(0); - }catch(Exception e){ - System.err.println(e.toString()); - Thread.sleep(1); - System.out.println("usage: qmule org.qcmg.qmule.BAM2CS "); - System.exit(1); - } - - } -} diff --git a/qmule/src/org/qcmg/qmule/BAMCompress.java b/qmule/src/org/qcmg/qmule/BAMCompress.java deleted file mode 100644 index 7ae4254a3..000000000 --- a/qmule/src/org/qcmg/qmule/BAMCompress.java +++ /dev/null @@ -1,156 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.List; - -import htsjdk.samtools.SAMFileWriter; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMFileWriterFactory; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.ValidationStringency; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.SAMOrBAMWriterFactory; - -public class BAMCompress { - static QLogger logger = QLoggerFactory.getLogger(BAMCompress.class); - private static File input; - private static File output; - private static int level; - - BAMCompress(File input, File output, int level) throws Exception{ - this.input = input; - this.output = output; - this.level = level; - - logger.info("input file: " + input.getAbsolutePath()); - logger.info("output file name: " + output.getAbsolutePath()); - logger.info("compress level for output BAM: " + level); - } - - public void replaceSeq() throws Exception{ - - SamReader reader = SAMFileReaderFactory.createSAMFileReader( input, ValidationStringency.SILENT); - SAMFileWriter writer = new SAMFileWriterFactory() .makeBAMWriter(reader.getFileHeader(), false, output, level); - - for( SAMRecord record : reader){ - //only replace fully mapped reads, that is no clipping, indels and pading - if( seekFullMppaed(record) && seekMismatch(record) ){ - byte[] base = record.getReadBases(); - for(int i = 0; i < base.length; i++) - base[i] = 'N'; - record.setReadBases(base); - } - - if(record.isValid() == null) // if valid - writer.addAlignment( record ); - } - - reader.close(); - writer.close(); - - logger.info( "input " + reportFileSize(input) ); - logger.info( "output " + reportFileSize(output) ); - - } - - public String reportFileSize(File f){ - - double bytes_in = f.length(); - double kilobytes = (bytes_in / 1024); - double megabytes = (kilobytes / 1024); - double gigabytes = (megabytes / 1024); - - return String.format("file size is %.2fG or %.2fK", gigabytes, kilobytes); - } - - - private boolean seekMismatch(SAMRecord r) { - String attribute = (String)r.getAttribute("MD"); - if (null != attribute) { - for (int i = 0, size = attribute.length() ; i < size ; ) { - char c = attribute.charAt(i); - if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { - return false; - } else if ( c == '^') { - //skip the insertion base - while (++i < size && Character.isLetter(attribute.charAt(i))) {} - } else i++; // need to increment this or could end up with infinite loop... - } - return true; - } - return false; - } - - private boolean seekFullMppaed(SAMRecord r){ - - if(r.getReadUnmappedFlag()) - return false; - - //reads with clips or indel, skips, pads - List ele = r.getCigar().getCigarElements(); - for (CigarElement element : r.getCigar().getCigarElements()){ - if( element.getLength() > 0){ - if(element.getOperator() == CigarOperator.H ||element.getOperator() == CigarOperator.S) { - return false; - }else if (element.getOperator() == CigarOperator.I ||element.getOperator() == CigarOperator.D){ - return false; - }else if (element.getOperator() == CigarOperator.P ||element.getOperator() == CigarOperator.N){ - return false; - } - } - } - - return true; - } - - - public static void main(String[] args) throws Exception{ - Options op = new Options(BAMCompress.class, args); - if(op.hasHelpOption()){ - System.out.println(Messages.getMessage("USAGE_BAMCompress")); - op.displayHelp(); - System.exit(0); - } - - String output = op.getOutputFileNames()[0]; - String input = op.getInputFileNames()[0]; - if(! new File(input).exists() ) - throw new Exception("input file not exists: " + args[0]); - - if(op.hasLogOption()) - logger = QLoggerFactory.getLogger(BAMCompress.class, op.getLogFile(), op.getLogLevel()); - else - logger = QLoggerFactory.getLogger(BAMCompress.class, output + ".log", op.getLogLevel()); - - String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - logger.logInitialExecutionStats( "qmule " + BAMCompress.class.getName(), version,args); - - int level = op.getcompressLevel(); //default compress level - - logger.logInitialExecutionStats( "qmule " + BAMCompress.class.getName(), null,args); - - long startTime = System.currentTimeMillis(); - BAMCompress compress = new BAMCompress(new File(input), new File(output) , level ); - compress.replaceSeq(); - - logger.info( String.format("It took %d hours, %d seconds to perform the compression", - (int) (System.currentTimeMillis() - startTime) / (1000*60*60), - (int) ( (System.currentTimeMillis() - startTime) / (1000*60) ) % 60) ); - logger.logFinalExecutionStats(0); - - } - - -} diff --git a/qmule/src/org/qcmg/qmule/BAMCompress.java-- b/qmule/src/org/qcmg/qmule/BAMCompress.java-- deleted file mode 100644 index 7ae4254a3..000000000 --- a/qmule/src/org/qcmg/qmule/BAMCompress.java-- +++ /dev/null @@ -1,156 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.List; - -import htsjdk.samtools.SAMFileWriter; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMFileWriterFactory; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.ValidationStringency; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.SAMOrBAMWriterFactory; - -public class BAMCompress { - static QLogger logger = QLoggerFactory.getLogger(BAMCompress.class); - private static File input; - private static File output; - private static int level; - - BAMCompress(File input, File output, int level) throws Exception{ - this.input = input; - this.output = output; - this.level = level; - - logger.info("input file: " + input.getAbsolutePath()); - logger.info("output file name: " + output.getAbsolutePath()); - logger.info("compress level for output BAM: " + level); - } - - public void replaceSeq() throws Exception{ - - SamReader reader = SAMFileReaderFactory.createSAMFileReader( input, ValidationStringency.SILENT); - SAMFileWriter writer = new SAMFileWriterFactory() .makeBAMWriter(reader.getFileHeader(), false, output, level); - - for( SAMRecord record : reader){ - //only replace fully mapped reads, that is no clipping, indels and pading - if( seekFullMppaed(record) && seekMismatch(record) ){ - byte[] base = record.getReadBases(); - for(int i = 0; i < base.length; i++) - base[i] = 'N'; - record.setReadBases(base); - } - - if(record.isValid() == null) // if valid - writer.addAlignment( record ); - } - - reader.close(); - writer.close(); - - logger.info( "input " + reportFileSize(input) ); - logger.info( "output " + reportFileSize(output) ); - - } - - public String reportFileSize(File f){ - - double bytes_in = f.length(); - double kilobytes = (bytes_in / 1024); - double megabytes = (kilobytes / 1024); - double gigabytes = (megabytes / 1024); - - return String.format("file size is %.2fG or %.2fK", gigabytes, kilobytes); - } - - - private boolean seekMismatch(SAMRecord r) { - String attribute = (String)r.getAttribute("MD"); - if (null != attribute) { - for (int i = 0, size = attribute.length() ; i < size ; ) { - char c = attribute.charAt(i); - if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { - return false; - } else if ( c == '^') { - //skip the insertion base - while (++i < size && Character.isLetter(attribute.charAt(i))) {} - } else i++; // need to increment this or could end up with infinite loop... - } - return true; - } - return false; - } - - private boolean seekFullMppaed(SAMRecord r){ - - if(r.getReadUnmappedFlag()) - return false; - - //reads with clips or indel, skips, pads - List ele = r.getCigar().getCigarElements(); - for (CigarElement element : r.getCigar().getCigarElements()){ - if( element.getLength() > 0){ - if(element.getOperator() == CigarOperator.H ||element.getOperator() == CigarOperator.S) { - return false; - }else if (element.getOperator() == CigarOperator.I ||element.getOperator() == CigarOperator.D){ - return false; - }else if (element.getOperator() == CigarOperator.P ||element.getOperator() == CigarOperator.N){ - return false; - } - } - } - - return true; - } - - - public static void main(String[] args) throws Exception{ - Options op = new Options(BAMCompress.class, args); - if(op.hasHelpOption()){ - System.out.println(Messages.getMessage("USAGE_BAMCompress")); - op.displayHelp(); - System.exit(0); - } - - String output = op.getOutputFileNames()[0]; - String input = op.getInputFileNames()[0]; - if(! new File(input).exists() ) - throw new Exception("input file not exists: " + args[0]); - - if(op.hasLogOption()) - logger = QLoggerFactory.getLogger(BAMCompress.class, op.getLogFile(), op.getLogLevel()); - else - logger = QLoggerFactory.getLogger(BAMCompress.class, output + ".log", op.getLogLevel()); - - String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - logger.logInitialExecutionStats( "qmule " + BAMCompress.class.getName(), version,args); - - int level = op.getcompressLevel(); //default compress level - - logger.logInitialExecutionStats( "qmule " + BAMCompress.class.getName(), null,args); - - long startTime = System.currentTimeMillis(); - BAMCompress compress = new BAMCompress(new File(input), new File(output) , level ); - compress.replaceSeq(); - - logger.info( String.format("It took %d hours, %d seconds to perform the compression", - (int) (System.currentTimeMillis() - startTime) / (1000*60*60), - (int) ( (System.currentTimeMillis() - startTime) / (1000*60) ) % 60) ); - logger.logFinalExecutionStats(0); - - } - - -} diff --git a/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java b/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java deleted file mode 100644 index 363f5ccbc..000000000 --- a/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java +++ /dev/null @@ -1,250 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.sql.ResultSet; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMReadGroupRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -//import org.qcmg.db.ConnectionType; -//import org.qcmg.db.GeneusDBConnection; -import org.qcmg.picard.SAMFileReaderFactory; - -public class BAMHeaderChecker { - /* - private static final String SEPERATOR = "&"; - - private static QLogger logger; - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - - private final List bamFiles = new ArrayList(); - private List bamDirectories = new ArrayList(); - - private final Map results = new HashMap(); - - private int exitStatus; - - private int engage() throws Exception { - - bamDirectories = Arrays.asList(FileUtils.findDirectories(cmdLineInputFiles[0], "seq_final", true)); - - logger.info("Will check the following directories for bam files:"); - for (File f : bamDirectories) { - logger.info(f.getAbsolutePath()); - bamFiles.addAll(Arrays.asList(FileUtils.findFilesEndingWithFilter(f.getAbsolutePath(), ".bam"))); - } - - // only operates on seq_final bams -// bamFiles = Arrays.asList(FileUtils.findFiles(cmdLineInputFiles[0], ".bam")); - - // loop through each file and get patient, experiment and input_type - String patient = null; - String experiment = null; - String input = null; - - GeneusDBConnection conn = new GeneusDBConnection(ConnectionType.QCMG_MAPSET); - - try { - for (File bamFile : bamFiles) { - String bamFileName = bamFile.getAbsolutePath(); - logger.info("examining bam file: " + bamFileName); - String bamFileSmallName = bamFileName.substring(bamFileName.lastIndexOf(System.getProperty("file.separator")) + 1 , bamFileName.indexOf(".bam")); - - patient = bamFileSmallName.substring(0, 9); //APGI_1234 - experiment = bamFileSmallName.substring(10, bamFileSmallName.lastIndexOf(".")); //APGI_1234 - input = bamFileSmallName.substring(bamFileSmallName.lastIndexOf(".") + 1); //APGI_1234 - logger.info("patient: " + patient + ", experiment: " + experiment + ", input: " + input); - - // get details from bam header - List constituentFiles = getConstituentBamFiles(bamFile); - List trackliteConstituentFiles = getTrackliteBamFiles(patient, experiment, input, conn); - - //loop through tracklite constituentFiles and check that they all have an entry in bam header ConstituentFiles - for (String trackliteBam : trackliteConstituentFiles) { - String [] params = trackliteBam.split(SEPERATOR); - - String result = "OK"; - boolean trackliteMatch = false; - - for (String headerFileBam : constituentFiles) { - if (headerFileBam.contains(params[0]) && headerFileBam.contains(params[1])) { - trackliteMatch = true; - break; - } - } - - if ( ! trackliteMatch) { - result = "no corresponding entry in bam file header for tracklite details: " + params[0] + ":" + params[1]; - logger.warn(result); - } - results.put(bamFileSmallName, result); - } - } - } finally { - conn.closeConnection(); - } - - logger.info(""); - logger.info(""); - logger.info("SUMMARY:"); - for (Entry resultsEntry : results.entrySet()) { - logger.info(resultsEntry.getKey() + " : " + resultsEntry.getValue()); - } - logger.info("DONE"); - - return exitStatus; - } - - private List getTrackliteBamFiles(String patient, String experiment, String input, GeneusDBConnection conn) throws Exception { - List trackliteResults = new ArrayList (); - - String sql = "SELECT patient_id, run_name, barcode FROM tracklite_run tr, tracklite_sample ts" + - " WHERE tr.sample_id = ts.processing_id" + - " AND ts.patient_id = '" + patient.replace('_', '-') + "'" + - " AND tr.experiment_type = '" + experiment + "'" + - " AND tr.input_type = '" + input + "'" + - "AND tr.run_status = 'complete'"; - - ResultSet rs = null; - try { - rs = conn.executeSelectQuery(sql); - - while (rs.next()) { - String runName = rs.getString(2); - String barCode = rs.getString(3); - logger.debug("runName: " + runName + ", barCode: " + barCode); - trackliteResults.add(runName + SEPERATOR + barCode); - } - - } finally { - try { - if (null != rs && null != rs.getStatement() ) { - rs.getStatement().close(); - } - } finally { - if (null != rs) rs.close(); - } - } - - return trackliteResults; - } - - private List getConstituentBamFiles(File bamFile) { - List results = new ArrayList(); - SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFile); - try { - - SAMFileHeader header = reader.getFileHeader(); - // get the read groups - for (SAMReadGroupRecord readGroup : header.getReadGroups()) { - String constituentBamFile = readGroup.getAttribute("zc"); - if (null == constituentBamFile) - constituentBamFile = readGroup.getAttribute("ZC"); - - if (null != constituentBamFile) { - constituentBamFile = constituentBamFile.substring(2); - logger.debug("read group ZC attribute: " + constituentBamFile); - results.add(constituentBamFile); - } else { - logger.debug("null ZC attribute in file: " + bamFile.getAbsolutePath()); - } - } - - } finally { - reader.close(); - } - return results; - } - - - - - public static void main(String[] args) throws Exception { - BAMHeaderChecker sp = new BAMHeaderChecker(); - int exitStatus = 0; - try { - exitStatus = sp.setup(args); - } catch (Exception e) { - exitStatus = 1; - if (null != logger) - logger.error("Exception caught whilst running BAMHeaderChecker:", e); - else System.err.println("Exception caught whilst running BAMHeaderChecker"); - } - - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(BAMHeaderChecker.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("BAMHeaderChecker", BAMHeaderChecker.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - */ -} diff --git a/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- b/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- deleted file mode 100644 index 363f5ccbc..000000000 --- a/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- +++ /dev/null @@ -1,250 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.sql.ResultSet; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMReadGroupRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -//import org.qcmg.db.ConnectionType; -//import org.qcmg.db.GeneusDBConnection; -import org.qcmg.picard.SAMFileReaderFactory; - -public class BAMHeaderChecker { - /* - private static final String SEPERATOR = "&"; - - private static QLogger logger; - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - - private final List bamFiles = new ArrayList(); - private List bamDirectories = new ArrayList(); - - private final Map results = new HashMap(); - - private int exitStatus; - - private int engage() throws Exception { - - bamDirectories = Arrays.asList(FileUtils.findDirectories(cmdLineInputFiles[0], "seq_final", true)); - - logger.info("Will check the following directories for bam files:"); - for (File f : bamDirectories) { - logger.info(f.getAbsolutePath()); - bamFiles.addAll(Arrays.asList(FileUtils.findFilesEndingWithFilter(f.getAbsolutePath(), ".bam"))); - } - - // only operates on seq_final bams -// bamFiles = Arrays.asList(FileUtils.findFiles(cmdLineInputFiles[0], ".bam")); - - // loop through each file and get patient, experiment and input_type - String patient = null; - String experiment = null; - String input = null; - - GeneusDBConnection conn = new GeneusDBConnection(ConnectionType.QCMG_MAPSET); - - try { - for (File bamFile : bamFiles) { - String bamFileName = bamFile.getAbsolutePath(); - logger.info("examining bam file: " + bamFileName); - String bamFileSmallName = bamFileName.substring(bamFileName.lastIndexOf(System.getProperty("file.separator")) + 1 , bamFileName.indexOf(".bam")); - - patient = bamFileSmallName.substring(0, 9); //APGI_1234 - experiment = bamFileSmallName.substring(10, bamFileSmallName.lastIndexOf(".")); //APGI_1234 - input = bamFileSmallName.substring(bamFileSmallName.lastIndexOf(".") + 1); //APGI_1234 - logger.info("patient: " + patient + ", experiment: " + experiment + ", input: " + input); - - // get details from bam header - List constituentFiles = getConstituentBamFiles(bamFile); - List trackliteConstituentFiles = getTrackliteBamFiles(patient, experiment, input, conn); - - //loop through tracklite constituentFiles and check that they all have an entry in bam header ConstituentFiles - for (String trackliteBam : trackliteConstituentFiles) { - String [] params = trackliteBam.split(SEPERATOR); - - String result = "OK"; - boolean trackliteMatch = false; - - for (String headerFileBam : constituentFiles) { - if (headerFileBam.contains(params[0]) && headerFileBam.contains(params[1])) { - trackliteMatch = true; - break; - } - } - - if ( ! trackliteMatch) { - result = "no corresponding entry in bam file header for tracklite details: " + params[0] + ":" + params[1]; - logger.warn(result); - } - results.put(bamFileSmallName, result); - } - } - } finally { - conn.closeConnection(); - } - - logger.info(""); - logger.info(""); - logger.info("SUMMARY:"); - for (Entry resultsEntry : results.entrySet()) { - logger.info(resultsEntry.getKey() + " : " + resultsEntry.getValue()); - } - logger.info("DONE"); - - return exitStatus; - } - - private List getTrackliteBamFiles(String patient, String experiment, String input, GeneusDBConnection conn) throws Exception { - List trackliteResults = new ArrayList (); - - String sql = "SELECT patient_id, run_name, barcode FROM tracklite_run tr, tracklite_sample ts" + - " WHERE tr.sample_id = ts.processing_id" + - " AND ts.patient_id = '" + patient.replace('_', '-') + "'" + - " AND tr.experiment_type = '" + experiment + "'" + - " AND tr.input_type = '" + input + "'" + - "AND tr.run_status = 'complete'"; - - ResultSet rs = null; - try { - rs = conn.executeSelectQuery(sql); - - while (rs.next()) { - String runName = rs.getString(2); - String barCode = rs.getString(3); - logger.debug("runName: " + runName + ", barCode: " + barCode); - trackliteResults.add(runName + SEPERATOR + barCode); - } - - } finally { - try { - if (null != rs && null != rs.getStatement() ) { - rs.getStatement().close(); - } - } finally { - if (null != rs) rs.close(); - } - } - - return trackliteResults; - } - - private List getConstituentBamFiles(File bamFile) { - List results = new ArrayList(); - SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFile); - try { - - SAMFileHeader header = reader.getFileHeader(); - // get the read groups - for (SAMReadGroupRecord readGroup : header.getReadGroups()) { - String constituentBamFile = readGroup.getAttribute("zc"); - if (null == constituentBamFile) - constituentBamFile = readGroup.getAttribute("ZC"); - - if (null != constituentBamFile) { - constituentBamFile = constituentBamFile.substring(2); - logger.debug("read group ZC attribute: " + constituentBamFile); - results.add(constituentBamFile); - } else { - logger.debug("null ZC attribute in file: " + bamFile.getAbsolutePath()); - } - } - - } finally { - reader.close(); - } - return results; - } - - - - - public static void main(String[] args) throws Exception { - BAMHeaderChecker sp = new BAMHeaderChecker(); - int exitStatus = 0; - try { - exitStatus = sp.setup(args); - } catch (Exception e) { - exitStatus = 1; - if (null != logger) - logger.error("Exception caught whilst running BAMHeaderChecker:", e); - else System.err.println("Exception caught whilst running BAMHeaderChecker"); - } - - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(BAMHeaderChecker.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("BAMHeaderChecker", BAMHeaderChecker.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - */ -} diff --git a/qmule/src/org/qcmg/qmule/BAMPileupUtil.java b/qmule/src/org/qcmg/qmule/BAMPileupUtil.java deleted file mode 100644 index b8646c1ee..000000000 --- a/qmule/src/org/qcmg/qmule/BAMPileupUtil.java +++ /dev/null @@ -1,124 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import htsjdk.samtools.Cigar; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; - -public class BAMPileupUtil { - - public static int SM_CUTOFF = 14; - public static int MD_CUTOFF = 3; - public static int CIGAR_CUTOFF = 34; - - public static int readLengthMatchCounter = 0; - public static int posiitonInDeletionCounter = 0; - - private static final QLogger logger = QLoggerFactory.getLogger(BAMPileupUtil.class); - - -// public static void examinePileup(List sams, VCFRecord record) { -//// int normalCoverage = 0; -// String pileup = ""; -// String qualities = ""; -// for (SAMRecord sam : sams ) { -// -// if ( eligibleSamRecord(sam)) { -//// ++normalCoverage; -// -// int offset = getReadPosition(sam, record.getPosition()); -// -// if (offset < 0) { -// logger.info("invalid offset position - position falls within deletion?? position: "+ record.getPosition() + ", alignment start: " + sam.getAlignmentStart() + ", alignment end: " + sam.getAlignmentEnd() + ", read length: " + sam.getReadLength() + " cigar: "+ sam.getCigarString()); -// continue; -// } -// -// if (offset >= sam.getReadLength()) { -//// throw new Exception("offset [position: " + record.getPosition() + ", read start pos(unclipped): " + sam.getUnclippedStart() + ", read end pos(unclipped): " + sam.getUnclippedEnd()+ "] is larger than read length!!!: " + sam.format()); -// // set to last entry in sequence -//// logger.info("adjusting offset to read length -1"); -//// String read = sam.getReadString(); -//// int refPosition = sam.getReferencePositionAtReadPosition(offset); -// logger.info("offset: " + offset + ", position: " + record.getPosition() + ", alignment start: " + sam.getAlignmentStart() + ", unclipped alignment start: " + sam.getUnclippedStart() + ", alignment end: " + sam.getAlignmentEnd()); -// logger.info( sam.format()); -//// offset = sam.getReadLength() -1; -//// logger.info("char at adjusted offset: " + read.charAt(offset)); -//// logger.info("md tag: " + sam.getStringAttribute("MD")); -// continue; -// } -// -// char c = sam.getReadString().charAt(offset); -// pileup += sam.getReadNegativeStrandFlag() ? Character.toLowerCase(c) : c; -// qualities += sam.getBaseQualityString().charAt(offset); -// } -// } -// -// -// if (pileup.length() > 0) -// record.setPileup(PileupUtil.getPileupCounts(pileup, qualities)); -// -// } - - /** - * Determines whether a sam record is eligible by applying some filtering criteria. - * Currently filters on the SM tag value, some of the flags, and the Cigar string - * - *

NOTE that we should also be filtering on MD tag, but GATK removes this - * tag when it does its local realignment, so there is no need to include this check for the time being - * - * @param record SAMRecord that is being put through the filter check - * @return boolean indicating if the record has passed the filter - */ - public static boolean eligibleSamRecord(SAMRecord record) { - if (null == record) return false; - Integer sm = record.getIntegerAttribute("SM"); - return ! record.getDuplicateReadFlag() - && (null == sm ? false : sm.intValue() > SM_CUTOFF) -// && tallyMDMismatches(record.getStringAttribute("MD")) < MD_CUTOFF // - && ((record.getReadPairedFlag() && record.getSecondOfPairFlag() && record.getProperPairFlag()) - || tallyCigarMatchMismatches(record.getCigar()) > CIGAR_CUTOFF); - - } - - public static int tallyCigarMatchMismatches(Cigar cigar) { - int tally = 0; - if (null != cigar) { - for (CigarElement element : cigar.getCigarElements()) { - if (CigarOperator.M == element.getOperator()) { - tally += element.getLength(); - } - } - } - return tally; - } - - public static int tallyMDMismatches(String mdData) { - int count = 0; - if (null != mdData) { - for (int i = 0, size = mdData.length() ; i < size ; ) { - - if (isValidMismatch(mdData.charAt(i))) { - count++; - i++; - } else if ('^' == mdData.charAt(i)) { - while (++i < size && Character.isLetter(mdData.charAt(i))) {} - } else i++; // need to increment this or could end up with infinite loop... - } - } - return count; - } - - private static boolean isValidMismatch(char c) { - return c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N'; - } - -} diff --git a/qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- b/qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- deleted file mode 100644 index b8646c1ee..000000000 --- a/qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- +++ /dev/null @@ -1,124 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import htsjdk.samtools.Cigar; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; - -public class BAMPileupUtil { - - public static int SM_CUTOFF = 14; - public static int MD_CUTOFF = 3; - public static int CIGAR_CUTOFF = 34; - - public static int readLengthMatchCounter = 0; - public static int posiitonInDeletionCounter = 0; - - private static final QLogger logger = QLoggerFactory.getLogger(BAMPileupUtil.class); - - -// public static void examinePileup(List sams, VCFRecord record) { -//// int normalCoverage = 0; -// String pileup = ""; -// String qualities = ""; -// for (SAMRecord sam : sams ) { -// -// if ( eligibleSamRecord(sam)) { -//// ++normalCoverage; -// -// int offset = getReadPosition(sam, record.getPosition()); -// -// if (offset < 0) { -// logger.info("invalid offset position - position falls within deletion?? position: "+ record.getPosition() + ", alignment start: " + sam.getAlignmentStart() + ", alignment end: " + sam.getAlignmentEnd() + ", read length: " + sam.getReadLength() + " cigar: "+ sam.getCigarString()); -// continue; -// } -// -// if (offset >= sam.getReadLength()) { -//// throw new Exception("offset [position: " + record.getPosition() + ", read start pos(unclipped): " + sam.getUnclippedStart() + ", read end pos(unclipped): " + sam.getUnclippedEnd()+ "] is larger than read length!!!: " + sam.format()); -// // set to last entry in sequence -//// logger.info("adjusting offset to read length -1"); -//// String read = sam.getReadString(); -//// int refPosition = sam.getReferencePositionAtReadPosition(offset); -// logger.info("offset: " + offset + ", position: " + record.getPosition() + ", alignment start: " + sam.getAlignmentStart() + ", unclipped alignment start: " + sam.getUnclippedStart() + ", alignment end: " + sam.getAlignmentEnd()); -// logger.info( sam.format()); -//// offset = sam.getReadLength() -1; -//// logger.info("char at adjusted offset: " + read.charAt(offset)); -//// logger.info("md tag: " + sam.getStringAttribute("MD")); -// continue; -// } -// -// char c = sam.getReadString().charAt(offset); -// pileup += sam.getReadNegativeStrandFlag() ? Character.toLowerCase(c) : c; -// qualities += sam.getBaseQualityString().charAt(offset); -// } -// } -// -// -// if (pileup.length() > 0) -// record.setPileup(PileupUtil.getPileupCounts(pileup, qualities)); -// -// } - - /** - * Determines whether a sam record is eligible by applying some filtering criteria. - * Currently filters on the SM tag value, some of the flags, and the Cigar string - * - *

NOTE that we should also be filtering on MD tag, but GATK removes this - * tag when it does its local realignment, so there is no need to include this check for the time being - * - * @param record SAMRecord that is being put through the filter check - * @return boolean indicating if the record has passed the filter - */ - public static boolean eligibleSamRecord(SAMRecord record) { - if (null == record) return false; - Integer sm = record.getIntegerAttribute("SM"); - return ! record.getDuplicateReadFlag() - && (null == sm ? false : sm.intValue() > SM_CUTOFF) -// && tallyMDMismatches(record.getStringAttribute("MD")) < MD_CUTOFF // - && ((record.getReadPairedFlag() && record.getSecondOfPairFlag() && record.getProperPairFlag()) - || tallyCigarMatchMismatches(record.getCigar()) > CIGAR_CUTOFF); - - } - - public static int tallyCigarMatchMismatches(Cigar cigar) { - int tally = 0; - if (null != cigar) { - for (CigarElement element : cigar.getCigarElements()) { - if (CigarOperator.M == element.getOperator()) { - tally += element.getLength(); - } - } - } - return tally; - } - - public static int tallyMDMismatches(String mdData) { - int count = 0; - if (null != mdData) { - for (int i = 0, size = mdData.length() ; i < size ; ) { - - if (isValidMismatch(mdData.charAt(i))) { - count++; - i++; - } else if ('^' == mdData.charAt(i)) { - while (++i < size && Character.isLetter(mdData.charAt(i))) {} - } else i++; // need to increment this or could end up with infinite loop... - } - } - return count; - } - - private static boolean isValidMismatch(char c) { - return c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N'; - } - -} diff --git a/qmule/src/org/qcmg/qmule/BamMismatchCounts.java b/qmule/src/org/qcmg/qmule/BamMismatchCounts.java deleted file mode 100644 index 4501a5994..000000000 --- a/qmule/src/org/qcmg/qmule/BamMismatchCounts.java +++ /dev/null @@ -1,160 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.nio.file.StandardOpenOption; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; - -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.ValidationStringency; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.SAMFileReaderFactory; - -public class BamMismatchCounts { - static QLogger logger = QLoggerFactory.getLogger(BamMismatchCounts.class); - static long[] mismatch = new long[100]; - - static HashMap counts = new HashMap(); - static long total = 0; - static long unmapped = 0; - static long clipped = 0; - static long indel = 0; - static long skipPad = 0; - static long fullMapped = 0; - static long noMDreads = 0; - - /** - * count the mismatch base number based on the MD field - * @param r: samrecord - */ - private static void countMismatch(SAMRecord r) { - String attribute = (String)r.getAttribute("MD"); - if (null != attribute) { - int count = 0; - for (int i = 0, size = attribute.length() ; i < size ; ) { - char c = attribute.charAt(i); - if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { - count++; - i++; - } else if ( c == '^') { - //skip the insertion base - while (++i < size && Character.isLetter(attribute.charAt(i))) {} - } else i++; // need to increment this or could end up with infinite loop... - } - mismatch[count] ++; - - }else - noMDreads ++; - - - } - - /** - * - * @param r: sam record - * @return true if this read is full length mapped without any indels, skips and pads - */ - static private Boolean seekFullMapped(SAMRecord r){ - - if(r.getReadUnmappedFlag()){ - unmapped ++; - return false; - } - //reads with clips or indel, skips, pads - else{ - List ele = r.getCigar().getCigarElements(); - for (CigarElement element : r.getCigar().getCigarElements()){ - if( element.getLength() > 0){ - if(element.getOperator() == CigarOperator.H ||element.getOperator() == CigarOperator.S) { - clipped ++; - return false; - }else if (element.getOperator() == CigarOperator.I ||element.getOperator() == CigarOperator.D){ - indel ++; - return false; - }else if (element.getOperator() == CigarOperator.P ||element.getOperator() == CigarOperator.N){ - skipPad ++; - return false; - } - } - } - //count mismatch after the for loop - return true; - } - } - - /** - * survey the mismatch stats on full length mapped reads - * @param args: SAM/BAM file with full path, log file with full path - * @throws Exception - */ - public static void main(final String[] args) throws Exception { - Options op = new Options(BamMismatchCounts.class, args); - if(op.hasHelpOption()){ - System.out.println(Messages.getMessage("USAGE_BamMismatchCounts")); - op.displayHelp(); - System.exit(0); - } - - - if(op.hasLogOption()) - logger = QLoggerFactory.getLogger(BamMismatchCounts.class, op.getLogFile(), op.getLogLevel()); - else - logger = QLoggerFactory.getLogger(BamMismatchCounts.class,op.getOutputFileNames()[0] + ".log", op.getLogLevel()); - - String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - logger.logInitialExecutionStats( "qmule " + BamMismatchCounts.class.getName(), version,args); - - String output = op.getOutputFileNames()[0]; - String input = op.getInputFileNames()[0]; - SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(input), - ValidationStringency.SILENT); - - for(int i = 0; i < 100; i++) mismatch[i] = 0; - for (SAMRecord r : reader){ - total ++; - if(seekFullMapped( r)){ - fullMapped ++; - countMismatch(r); - } - } - reader.close(); - - //report mismatch - String S_mismatch = "mismatch matrix for fully mapped reads is below:\nmismatch\treads_number\tratio_to_(fullmapped,total)\n"; - for(int i = 0; i < 100; i++) - if(mismatch[i] > 0){ - int p1 = Math.round(mismatch[i] * 100 / fullMapped); - int p2 = Math.round(mismatch[i] * 100 / total); - S_mismatch += String.format("%d\t%d\t(%d%%,%d%%)\n", i,mismatch[i],p1, p2); - } - - Files.write(Paths.get(output), S_mismatch.getBytes() ); - - logger.info("total records in file: " + total ); - logger.info("unmapped records: " + unmapped); - logger.info("records with clipping (CIGAR S,H): " + clipped); - logger.info("records with indel (CIGAR I,D) : " + indel); - logger.info("records with skipping or padding (CIGAR N,P) : " + skipPad); - logger.info("records mapped full-length: " + fullMapped); - logger.info("records mapped full-length but missing MD field: " + noMDreads); - logger.info("the mismatch counts matrix is outputed to " + args[1]); - logger.logFinalExecutionStats(0); - - } - -} diff --git a/qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- b/qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- deleted file mode 100644 index 4501a5994..000000000 --- a/qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- +++ /dev/null @@ -1,160 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.nio.file.StandardOpenOption; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; - -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.ValidationStringency; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.SAMFileReaderFactory; - -public class BamMismatchCounts { - static QLogger logger = QLoggerFactory.getLogger(BamMismatchCounts.class); - static long[] mismatch = new long[100]; - - static HashMap counts = new HashMap(); - static long total = 0; - static long unmapped = 0; - static long clipped = 0; - static long indel = 0; - static long skipPad = 0; - static long fullMapped = 0; - static long noMDreads = 0; - - /** - * count the mismatch base number based on the MD field - * @param r: samrecord - */ - private static void countMismatch(SAMRecord r) { - String attribute = (String)r.getAttribute("MD"); - if (null != attribute) { - int count = 0; - for (int i = 0, size = attribute.length() ; i < size ; ) { - char c = attribute.charAt(i); - if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { - count++; - i++; - } else if ( c == '^') { - //skip the insertion base - while (++i < size && Character.isLetter(attribute.charAt(i))) {} - } else i++; // need to increment this or could end up with infinite loop... - } - mismatch[count] ++; - - }else - noMDreads ++; - - - } - - /** - * - * @param r: sam record - * @return true if this read is full length mapped without any indels, skips and pads - */ - static private Boolean seekFullMapped(SAMRecord r){ - - if(r.getReadUnmappedFlag()){ - unmapped ++; - return false; - } - //reads with clips or indel, skips, pads - else{ - List ele = r.getCigar().getCigarElements(); - for (CigarElement element : r.getCigar().getCigarElements()){ - if( element.getLength() > 0){ - if(element.getOperator() == CigarOperator.H ||element.getOperator() == CigarOperator.S) { - clipped ++; - return false; - }else if (element.getOperator() == CigarOperator.I ||element.getOperator() == CigarOperator.D){ - indel ++; - return false; - }else if (element.getOperator() == CigarOperator.P ||element.getOperator() == CigarOperator.N){ - skipPad ++; - return false; - } - } - } - //count mismatch after the for loop - return true; - } - } - - /** - * survey the mismatch stats on full length mapped reads - * @param args: SAM/BAM file with full path, log file with full path - * @throws Exception - */ - public static void main(final String[] args) throws Exception { - Options op = new Options(BamMismatchCounts.class, args); - if(op.hasHelpOption()){ - System.out.println(Messages.getMessage("USAGE_BamMismatchCounts")); - op.displayHelp(); - System.exit(0); - } - - - if(op.hasLogOption()) - logger = QLoggerFactory.getLogger(BamMismatchCounts.class, op.getLogFile(), op.getLogLevel()); - else - logger = QLoggerFactory.getLogger(BamMismatchCounts.class,op.getOutputFileNames()[0] + ".log", op.getLogLevel()); - - String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - logger.logInitialExecutionStats( "qmule " + BamMismatchCounts.class.getName(), version,args); - - String output = op.getOutputFileNames()[0]; - String input = op.getInputFileNames()[0]; - SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(input), - ValidationStringency.SILENT); - - for(int i = 0; i < 100; i++) mismatch[i] = 0; - for (SAMRecord r : reader){ - total ++; - if(seekFullMapped( r)){ - fullMapped ++; - countMismatch(r); - } - } - reader.close(); - - //report mismatch - String S_mismatch = "mismatch matrix for fully mapped reads is below:\nmismatch\treads_number\tratio_to_(fullmapped,total)\n"; - for(int i = 0; i < 100; i++) - if(mismatch[i] > 0){ - int p1 = Math.round(mismatch[i] * 100 / fullMapped); - int p2 = Math.round(mismatch[i] * 100 / total); - S_mismatch += String.format("%d\t%d\t(%d%%,%d%%)\n", i,mismatch[i],p1, p2); - } - - Files.write(Paths.get(output), S_mismatch.getBytes() ); - - logger.info("total records in file: " + total ); - logger.info("unmapped records: " + unmapped); - logger.info("records with clipping (CIGAR S,H): " + clipped); - logger.info("records with indel (CIGAR I,D) : " + indel); - logger.info("records with skipping or padding (CIGAR N,P) : " + skipPad); - logger.info("records mapped full-length: " + fullMapped); - logger.info("records mapped full-length but missing MD field: " + noMDreads); - logger.info("the mismatch counts matrix is outputed to " + args[1]); - logger.logFinalExecutionStats(0); - - } - -} diff --git a/qmule/src/org/qcmg/qmule/BamRecordCounter.java b/qmule/src/org/qcmg/qmule/BamRecordCounter.java deleted file mode 100644 index d81e01a9c..000000000 --- a/qmule/src/org/qcmg/qmule/BamRecordCounter.java +++ /dev/null @@ -1,44 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; - -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.SAMFileReaderFactory; - -public class BamRecordCounter { - - private static final QLogger logger = QLoggerFactory.getLogger(BamRecordCounter.class); - - public static void main(String args[]) { - - if (null != args && args.length > 0) { - for (String filename : args) { - SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(filename)); - long count = 0; - long duplicates = 0; - long startTime = System.currentTimeMillis(); - for (SAMRecord r : reader) { - count++; - if (r.getDuplicateReadFlag()) - duplicates++; - } - logger.info("no of records in file [" + filename + "] is: " + count); - logger.info("no of duplicate records: " + duplicates); - logger.info("It took " + (System.currentTimeMillis() - startTime) + "ms to perform the count."); - } - } else { - logger.info("USAGE: qmule " + BamRecordCounter.class.getName() + " "); - } - } - -} diff --git a/qmule/src/org/qcmg/qmule/BamRecordCounter.java-- b/qmule/src/org/qcmg/qmule/BamRecordCounter.java-- deleted file mode 100644 index d81e01a9c..000000000 --- a/qmule/src/org/qcmg/qmule/BamRecordCounter.java-- +++ /dev/null @@ -1,44 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; - -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.SAMFileReaderFactory; - -public class BamRecordCounter { - - private static final QLogger logger = QLoggerFactory.getLogger(BamRecordCounter.class); - - public static void main(String args[]) { - - if (null != args && args.length > 0) { - for (String filename : args) { - SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(filename)); - long count = 0; - long duplicates = 0; - long startTime = System.currentTimeMillis(); - for (SAMRecord r : reader) { - count++; - if (r.getDuplicateReadFlag()) - duplicates++; - } - logger.info("no of records in file [" + filename + "] is: " + count); - logger.info("no of duplicate records: " + duplicates); - logger.info("It took " + (System.currentTimeMillis() - startTime) + "ms to perform the count."); - } - } else { - logger.info("USAGE: qmule " + BamRecordCounter.class.getName() + " "); - } - } - -} diff --git a/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java b/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java deleted file mode 100644 index 3b3fbc798..000000000 --- a/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java +++ /dev/null @@ -1,676 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; -import java.util.Map.Entry; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrPositionName; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; - - -public class CompareReferenceRegions { - - private static final String MODE_ONEWAY = "oneway"; - private static final String MODE_ANNOTATE = "annotate"; - private static final String MODE_TWOWAY = "twoway"; - private static final String MODE_INTERSECT = "intersect"; - private static final String MODE_UNIQUE = "unique"; - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private List chromosomes = new ArrayList(); - private int overlapCount = 0; - private int notOverlappingCount = 0; - private int recordCount; - private String mode; - private int column; - private String annotation; - private static QLogger logger; - private static final String MAF = "maf"; - private static final String GFF3 = "gff3"; - private static final String GTF = "gtf"; - private static final String BED = "bed"; - private static final String VCF = "vcf"; - private static final String TAB = "txt"; - private static final String DCC1 = "dcc1"; - - private void runOnewayComparison(File inputFile, File comparisonFile, - File outputOverlapFile, File outputNoOverlapFile) throws Exception { - - if (mode.equals(MODE_ANNOTATE)) { - logger.info("If overlapping, will annotate column: " + column+1 +" of file with the annotation " + annotation); - } - - //get a list of the chromosomes - setUp(inputFile, outputOverlapFile, outputNoOverlapFile); - - logger.info("Input file: " + inputFile.getAbsolutePath()); - logger.info("Comparison file: " + comparisonFile.getAbsolutePath()); - - logger.info("Chromosomes to analyze: " + chromosomes.size()); - - for (String c: chromosomes) { - logger.info("Getting records for chromosome: " + c); - Map inputRecords = readRecords(inputFile, c); - Map compareRecords = readRecords(comparisonFile, c); - compareRecords(inputRecords, compareRecords, outputOverlapFile, outputNoOverlapFile); - } - logSummary(); - clear(); - } - - private void logSummary() { - logger.info("SUMMARY"); - logger.info("Total Records: " + recordCount); - logger.info("Total Records in supplied reference regions: " + overlapCount); - logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); - } - - private void runAnnotateComparison(File inputFile, File comparisonFile, - File outputOverlapFile) throws Exception { - - //get a list of the chromosomes - setUp(inputFile, outputOverlapFile, null); - - logger.info("Input file: " + inputFile.getAbsolutePath()); - logger.info("Comparison file: " + comparisonFile.getAbsolutePath()); - - logger.info("Chromosomes to analyze: " + chromosomes.size()); - - for (String c: chromosomes) { - logger.info("Getting records for chromosome: " + c); - Map inputRecords = readRecords(inputFile, c); - Map compareRecords = readRecords(comparisonFile, c); - compareRecordsAndAnnotate(inputRecords, compareRecords, outputOverlapFile); - } - logSummary(); - clear(); - } - - private void runIntersectComparison() throws Exception { - //Set first input file as primary - File primaryInputFile = new File(cmdLineInputFiles[0]); - //Single output file - File outputFile = new File(cmdLineOutputFiles[0]); - - int[] counts = new int[cmdLineInputFiles.length]; - counts[0] = 0; - - setUp(primaryInputFile, outputFile, null); - - //logging - logger.info("Input file 1: " + primaryInputFile.getAbsolutePath()); - for (int i=1; i inputRecords = readRecords(primaryInputFile, c); - counts[0] += inputRecords.size(); - for (int i=1; i compareRecords = readRecords(compareFile, c); - counts[i] += compareRecords.size(); - compareOverlapRecords(c, inputRecords, compareRecords, getFileType(primaryInputFile)); - } - overlapCount += inputRecords.size(); - //any input records left at the end are intersecting - writeRecords(inputRecords, outputFile); - } - for (int i=0; i inputRecords = readRecords(primaryInputFile, c); - Map compareRecords = new TreeMap(); - counts[f] += inputRecords.size(); - for (int i=0; i currentRecords = readRecords(compareFile, c); - counts[i] = counts[i] + currentRecords.size(); - compareRecords.putAll(currentRecords); - } - } - compareOverlapRecords(c, inputRecords, compareRecords, getFileType(primaryInputFile)); - notOverlappingCount += inputRecords.size(); - //any input records left at the end are unique - writeRecords(inputRecords, outputFile); - logger.info(counts[f] + " total records for file " +cmdLineInputFiles[f]); - for (int i=0; i inputRecords, Map compareRecords, String inputFileType) throws Exception { - - Iterator> entries = inputRecords.entrySet().iterator(); - while (entries.hasNext()) { - Entry entry = entries.next(); - - boolean isOverlapping = compareRecord(entry, compareRecords, inputFileType); - - if (mode.equals(MODE_INTERSECT) && !isOverlapping) { - //remove input record if it isn't overlapping and won't intersect with all records - entries.remove(); - } - if (mode.equals(MODE_UNIQUE) && isOverlapping) { - entries.remove(); - } - } - } - - private void compareRecordsAndAnnotate(Map inputRecords, - Map compareRecords, - File outputOverlapFile) throws Exception { - BufferedWriter overlapWriter = new BufferedWriter(new FileWriter(outputOverlapFile, true)); - - try { - for (Entry entry : inputRecords.entrySet()) { - recordCount++; - boolean isOverlapping = compareRecord(entry, compareRecords, null); - - if (isOverlapping) { - overlapCount++; - } else { - notOverlappingCount++; - } - writeRecord(overlapWriter, entry.getValue()); - } - } finally { - overlapWriter.close(); - } - } - - private void compareRecords(Map inputRecords, - Map compareRecords, - File outputOverlapFile, File outputNoOverlapFile) throws Exception { - BufferedWriter overlapWriter = new BufferedWriter(new FileWriter(outputOverlapFile, true)); - BufferedWriter noOverlapWriter = new BufferedWriter(new FileWriter(outputNoOverlapFile, true)); - - try { - for (Entry entry : inputRecords.entrySet()) { - - recordCount++; - - boolean isOverlapping = compareRecord(entry, compareRecords, null); - - if (isOverlapping) { - overlapCount++; - writeRecord(overlapWriter, entry.getValue()); - } else { - notOverlappingCount++; - if (mode.equals(MODE_ANNOTATE)) { - - } else { - writeRecord(noOverlapWriter, entry.getValue()); - } - } - } - } finally { - overlapWriter.close(); - noOverlapWriter.close(); - } - } - - private boolean compareRecord(Entry entry, Map compareRecords, String inputFileType) throws Exception { - ChrPosition inputChrPos = entry.getKey(); - TabbedRecord inputRecord = entry.getValue(); - boolean isOverlapping = false; - //check to see if it is overlapping with the comparison reference region - for (Entry compareEntry : compareRecords.entrySet()) { - ChrPosition comparePos = compareEntry.getKey(); - if (comparePos.getEndPosition() < inputChrPos.getStartPosition()) { - continue; - } else if (comparePos.getStartPosition() > inputChrPos.getEndPosition()) { - break; - } else { - if (tabbedRecordFallsInCompareRecord(inputChrPos, inputRecord, compareEntry)) { - isOverlapping = true; - if (mode.equals(MODE_ANNOTATE)) { - String[] values = inputRecord.getDataArray(); - String oldVal = values[column]; - if (oldVal.equals("")) { - values[column] = annotation; - } else { - if (oldVal.endsWith(";")) { - values[column] = oldVal + annotation; - } else { - values[column] = oldVal + ";" + annotation; - } - } - String data = ""; - for (String s: values) { - data += s + "\t"; - } - inputRecord.setData(data); - } - if (mode.equals(MODE_INTERSECT)) { - //change the ends?? - int[] indexes = getChrIndex(inputFileType, entry.getValue().getData().split("\t")); - String[] array = inputRecord.getDataArray(); - - if (inputChrPos.getStartPosition() > compareEntry.getKey().getStartPosition()) { - array[indexes[1]] = Integer.toString(compareEntry.getKey().getStartPosition()); - } - if (inputChrPos.getEndPosition() < compareEntry.getKey().getEndPosition()) { - array[indexes[2]] = Integer.toString(compareEntry.getKey().getEndPosition()); - } - String data = ""; - for (String s: array) { - data += s + "\t"; - } - inputRecord.setData(data); - entry.setValue(inputRecord); - } - } - } - } - return isOverlapping; - } - - - private void writeRecords(Map records, File outputFile) throws IOException { - BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile, true)); - - for (Entry entry: records.entrySet()) { - writeRecord(writer, entry.getValue()); - } - writer.close(); - } - - private void writeRecord(BufferedWriter writer, TabbedRecord record) throws IOException { - if (!record.getData().endsWith("\n")) { - record.setData(record.getData() + "\n"); - } - writer.write(record.getData()); - } - - private TreeMap readRecords(File inputFile, String chromosome) throws Exception { - - TabbedFileReader reader = new TabbedFileReader(inputFile); - TreeMap records = new TreeMap(); - String fileType = getFileType(inputFile); - try { - - Iterator iterator = reader.getRecordIterator(); - - while (iterator.hasNext()) { - - TabbedRecord tab = iterator.next(); - if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo") || tab.getData().startsWith("analysis") || tab.getData().startsWith("Chr") ) { - continue; - } - ChrPosition chrPos = getChrPosition(fileType, tab); - if (chrPos.getChromosome().equals(chromosome)) { - records.put(chrPos, tab); - } - } - - } finally { - reader.close(); - } - - return records; - } - - private String getFileType(File inputFile) { - int index = inputFile.getName().lastIndexOf(".") + 1; - String name = inputFile.getName().substring(index, inputFile.getName().length()); - - if (name.equals("dcc")) { - return "dcc1"; - } - - return name; - } - - private void setUp(File file, File outputFileOne, File outputFileTwo) throws Exception { - TabbedFileReader reader = new TabbedFileReader(file); - Iterator iterator = reader.getRecordIterator(); - - String fileType = getFileType(file); - List header = new ArrayList(); - if (reader.getHeader() != null) { - Iterator iter = reader.getHeader().iterator(); - while (iter.hasNext()) { - header.add(iter.next()); - } - } - - while (iterator.hasNext()) { - - TabbedRecord tab = iterator.next(); - - if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo") || tab.getData().startsWith("analysis") || tab.getData().startsWith("Chr") ) { - header.add(tab.getData()); - continue; - } - - ChrPosition chrPos = getChrPosition(fileType, tab); - - if (!chromosomes.contains(chrPos.getChromosome())) { - chromosomes.add(chrPos.getChromosome()); - } - } - reader.close(); - - if (outputFileOne != null) { - writeHeader(header, outputFileOne); - } - if (outputFileTwo != null) { - writeHeader(header, outputFileTwo); - } - - } - - private int[] getChrIndex(String inputFileType, String[] values) throws Exception { - - int chrIndex = 0; - int startIndex = 0; - int endIndex = 0; - - if (inputFileType.equals(MAF)) { - chrIndex = 4; - startIndex = 5; - endIndex = 6; - } else if (inputFileType.equals(DCC1)) { - chrIndex = 4; - startIndex = 5; - endIndex = 6; - } else if (inputFileType.equals(BED)) { - chrIndex = 0; - startIndex = 1; - endIndex = 2; - } else if (inputFileType.equals(GFF3) || inputFileType.equals(GTF)) { - chrIndex = 0; - startIndex = 3; - endIndex = 4; - } else if (inputFileType.equals(VCF)) { - chrIndex = 0; - startIndex = 1; - endIndex = 1; - if (values.length >= 8) { - String[] infos = values[7].split("\t"); - - for (String info : infos) { - String[] params = info.split("="); - if (params.length == 2) { - if (params[0].equals("END")) { - endIndex = 2; - values[2] = params[1]; - } - } - } - } - //NEED TO CHANGE FOR INDELS - } else if (inputFileType.equals(TAB)) { - chrIndex = 0; - startIndex = 1; - endIndex = 2; - } else { - throw new Exception("Input file type is not recognized"); - } - int[] arr = {chrIndex, startIndex, endIndex}; - return arr; - } - - private ChrPosition getChrPosition(String inputFileType, TabbedRecord tab) throws Exception { - String[] values = tab.getData().split("\t"); - ChrPosition chr = null; - - int[] indexes = getChrIndex(inputFileType, values); - int chrIndex = indexes[0]; - int startIndex = indexes[1]; - int endIndex = indexes[2]; - - if (inputFileType.equals(BED)) { - chr = new ChrRangePosition(values[chrIndex], new Integer(values[startIndex])+1, new Integer(values[endIndex])+1); - } else { - String chromosome = values[chrIndex]; - if (!chromosome.contains("GL") && !chromosome.startsWith("chr")) { - chromosome = "chr" + chromosome; - } - if (chromosome.equals("chrM")) { - chromosome = "chrMT"; - } - if (inputFileType.equals(MAF)) { - chr = new ChrPositionName(chromosome, new Integer(values[startIndex]), new Integer(values[endIndex]), values[0]); - } else { - chr = new ChrRangePosition(chromosome, new Integer(values[startIndex]), new Integer(values[endIndex])); - } - } - return chr; - } - - private boolean tabbedRecordFallsInCompareRecord(ChrPosition inputChrPos, TabbedRecord inputRecord, Entry entry) { - if (entry != null) { - ChrPosition compareChrPos = entry.getKey(); - if ((inputChrPos.getStartPosition() >= compareChrPos.getStartPosition() && inputChrPos.getStartPosition() <= compareChrPos.getEndPosition()) || - (inputChrPos.getEndPosition() >= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() <= compareChrPos.getEndPosition()) - || (inputChrPos.getStartPosition() <= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() >= compareChrPos.getEndPosition())) { - return true; - } - } - return false; - } - - public String[] getCmdLineInputFiles() { - return cmdLineInputFiles; - } - - public void setCmdLineInputFiles(String[] cmdLineInputFiles) { - this.cmdLineInputFiles = cmdLineInputFiles; - } - - - private void writeHeader(List header, File outputOverlapFile) throws IOException { - BufferedWriter writer = new BufferedWriter(new FileWriter(outputOverlapFile, true)); - - for (String h: header) { - - writer.write(h + "\n"); - } - writer.close(); - } - - public List getChromosomes() { - return chromosomes; - } - - public void setChromosomes(List chromosomes) { - this.chromosomes = chromosomes; - } - - - public int getOverlapCount() { - return overlapCount; - } - - public void setOverlapCount(int overlapCount) { - this.overlapCount = overlapCount; - } - - public int getNotOverlappingCount() { - return notOverlappingCount; - } - - public void setNotOverlappingCount(int notOverlappingCount) { - this.notOverlappingCount = notOverlappingCount; - } - - public int getMafCount() { - return recordCount; - } - - public void setMafCount(int mafCount) { - this.recordCount = mafCount; - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(CompareReferenceRegions.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CompareReferenceRegions", CompareReferenceRegions.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - //output files - cmdLineOutputFiles = options.getOutputFileNames(); - - if (cmdLineOutputFiles.length >= 1) { - if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); - } - - for (String file : cmdLineOutputFiles) { - if (new File(file).exists() && !new File(file).isDirectory()) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); - } - } - } - mode = options.getMode(); - if (mode == null) { - mode = MODE_ONEWAY; - } - logger.info("Mode: " + mode); - - if (mode.equals(MODE_ANNOTATE)) { - //take away 1 to get index of column rather than column number - column = new Integer(options.getColumn()) -1; - annotation = options.getAnnotation(); - } - - return engage(); - } - - return returnStatus; - } - - - private int engage() throws Exception { - - if (mode.equals(MODE_ONEWAY) || mode.equals(MODE_TWOWAY)) { - runOnewayComparison(new File(cmdLineInputFiles[0]), new File(cmdLineInputFiles[1]), new File(cmdLineOutputFiles[0]), new File(cmdLineOutputFiles[1])); - if (mode.equals(MODE_TWOWAY)) { - runOnewayComparison(new File(cmdLineInputFiles[1]), new File(cmdLineInputFiles[0]), new File(cmdLineOutputFiles[2]), new File(cmdLineOutputFiles[3])); - } - } else if (mode.equals(MODE_ANNOTATE)) { - runAnnotateComparison(new File(cmdLineInputFiles[0]), new File(cmdLineInputFiles[1]), new File(cmdLineOutputFiles[0])); - } else if (mode.equals(MODE_INTERSECT)) { - runIntersectComparison(); - } else if (mode.equals(MODE_UNIQUE)) { - runUniqueComparison(); - } else { - throw new QMuleException("MODE_ERROR", mode); - } - return 0; - } - - - private void clear() { - recordCount = 0; - overlapCount = 0; - notOverlappingCount = 0; - } - - public static void main(String[] args) throws Exception { - CompareReferenceRegions sp = new CompareReferenceRegions(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - -} diff --git a/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- b/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- deleted file mode 100644 index 3b3fbc798..000000000 --- a/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- +++ /dev/null @@ -1,676 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; -import java.util.Map.Entry; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrPositionName; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; - - -public class CompareReferenceRegions { - - private static final String MODE_ONEWAY = "oneway"; - private static final String MODE_ANNOTATE = "annotate"; - private static final String MODE_TWOWAY = "twoway"; - private static final String MODE_INTERSECT = "intersect"; - private static final String MODE_UNIQUE = "unique"; - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private List chromosomes = new ArrayList(); - private int overlapCount = 0; - private int notOverlappingCount = 0; - private int recordCount; - private String mode; - private int column; - private String annotation; - private static QLogger logger; - private static final String MAF = "maf"; - private static final String GFF3 = "gff3"; - private static final String GTF = "gtf"; - private static final String BED = "bed"; - private static final String VCF = "vcf"; - private static final String TAB = "txt"; - private static final String DCC1 = "dcc1"; - - private void runOnewayComparison(File inputFile, File comparisonFile, - File outputOverlapFile, File outputNoOverlapFile) throws Exception { - - if (mode.equals(MODE_ANNOTATE)) { - logger.info("If overlapping, will annotate column: " + column+1 +" of file with the annotation " + annotation); - } - - //get a list of the chromosomes - setUp(inputFile, outputOverlapFile, outputNoOverlapFile); - - logger.info("Input file: " + inputFile.getAbsolutePath()); - logger.info("Comparison file: " + comparisonFile.getAbsolutePath()); - - logger.info("Chromosomes to analyze: " + chromosomes.size()); - - for (String c: chromosomes) { - logger.info("Getting records for chromosome: " + c); - Map inputRecords = readRecords(inputFile, c); - Map compareRecords = readRecords(comparisonFile, c); - compareRecords(inputRecords, compareRecords, outputOverlapFile, outputNoOverlapFile); - } - logSummary(); - clear(); - } - - private void logSummary() { - logger.info("SUMMARY"); - logger.info("Total Records: " + recordCount); - logger.info("Total Records in supplied reference regions: " + overlapCount); - logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); - } - - private void runAnnotateComparison(File inputFile, File comparisonFile, - File outputOverlapFile) throws Exception { - - //get a list of the chromosomes - setUp(inputFile, outputOverlapFile, null); - - logger.info("Input file: " + inputFile.getAbsolutePath()); - logger.info("Comparison file: " + comparisonFile.getAbsolutePath()); - - logger.info("Chromosomes to analyze: " + chromosomes.size()); - - for (String c: chromosomes) { - logger.info("Getting records for chromosome: " + c); - Map inputRecords = readRecords(inputFile, c); - Map compareRecords = readRecords(comparisonFile, c); - compareRecordsAndAnnotate(inputRecords, compareRecords, outputOverlapFile); - } - logSummary(); - clear(); - } - - private void runIntersectComparison() throws Exception { - //Set first input file as primary - File primaryInputFile = new File(cmdLineInputFiles[0]); - //Single output file - File outputFile = new File(cmdLineOutputFiles[0]); - - int[] counts = new int[cmdLineInputFiles.length]; - counts[0] = 0; - - setUp(primaryInputFile, outputFile, null); - - //logging - logger.info("Input file 1: " + primaryInputFile.getAbsolutePath()); - for (int i=1; i inputRecords = readRecords(primaryInputFile, c); - counts[0] += inputRecords.size(); - for (int i=1; i compareRecords = readRecords(compareFile, c); - counts[i] += compareRecords.size(); - compareOverlapRecords(c, inputRecords, compareRecords, getFileType(primaryInputFile)); - } - overlapCount += inputRecords.size(); - //any input records left at the end are intersecting - writeRecords(inputRecords, outputFile); - } - for (int i=0; i inputRecords = readRecords(primaryInputFile, c); - Map compareRecords = new TreeMap(); - counts[f] += inputRecords.size(); - for (int i=0; i currentRecords = readRecords(compareFile, c); - counts[i] = counts[i] + currentRecords.size(); - compareRecords.putAll(currentRecords); - } - } - compareOverlapRecords(c, inputRecords, compareRecords, getFileType(primaryInputFile)); - notOverlappingCount += inputRecords.size(); - //any input records left at the end are unique - writeRecords(inputRecords, outputFile); - logger.info(counts[f] + " total records for file " +cmdLineInputFiles[f]); - for (int i=0; i inputRecords, Map compareRecords, String inputFileType) throws Exception { - - Iterator> entries = inputRecords.entrySet().iterator(); - while (entries.hasNext()) { - Entry entry = entries.next(); - - boolean isOverlapping = compareRecord(entry, compareRecords, inputFileType); - - if (mode.equals(MODE_INTERSECT) && !isOverlapping) { - //remove input record if it isn't overlapping and won't intersect with all records - entries.remove(); - } - if (mode.equals(MODE_UNIQUE) && isOverlapping) { - entries.remove(); - } - } - } - - private void compareRecordsAndAnnotate(Map inputRecords, - Map compareRecords, - File outputOverlapFile) throws Exception { - BufferedWriter overlapWriter = new BufferedWriter(new FileWriter(outputOverlapFile, true)); - - try { - for (Entry entry : inputRecords.entrySet()) { - recordCount++; - boolean isOverlapping = compareRecord(entry, compareRecords, null); - - if (isOverlapping) { - overlapCount++; - } else { - notOverlappingCount++; - } - writeRecord(overlapWriter, entry.getValue()); - } - } finally { - overlapWriter.close(); - } - } - - private void compareRecords(Map inputRecords, - Map compareRecords, - File outputOverlapFile, File outputNoOverlapFile) throws Exception { - BufferedWriter overlapWriter = new BufferedWriter(new FileWriter(outputOverlapFile, true)); - BufferedWriter noOverlapWriter = new BufferedWriter(new FileWriter(outputNoOverlapFile, true)); - - try { - for (Entry entry : inputRecords.entrySet()) { - - recordCount++; - - boolean isOverlapping = compareRecord(entry, compareRecords, null); - - if (isOverlapping) { - overlapCount++; - writeRecord(overlapWriter, entry.getValue()); - } else { - notOverlappingCount++; - if (mode.equals(MODE_ANNOTATE)) { - - } else { - writeRecord(noOverlapWriter, entry.getValue()); - } - } - } - } finally { - overlapWriter.close(); - noOverlapWriter.close(); - } - } - - private boolean compareRecord(Entry entry, Map compareRecords, String inputFileType) throws Exception { - ChrPosition inputChrPos = entry.getKey(); - TabbedRecord inputRecord = entry.getValue(); - boolean isOverlapping = false; - //check to see if it is overlapping with the comparison reference region - for (Entry compareEntry : compareRecords.entrySet()) { - ChrPosition comparePos = compareEntry.getKey(); - if (comparePos.getEndPosition() < inputChrPos.getStartPosition()) { - continue; - } else if (comparePos.getStartPosition() > inputChrPos.getEndPosition()) { - break; - } else { - if (tabbedRecordFallsInCompareRecord(inputChrPos, inputRecord, compareEntry)) { - isOverlapping = true; - if (mode.equals(MODE_ANNOTATE)) { - String[] values = inputRecord.getDataArray(); - String oldVal = values[column]; - if (oldVal.equals("")) { - values[column] = annotation; - } else { - if (oldVal.endsWith(";")) { - values[column] = oldVal + annotation; - } else { - values[column] = oldVal + ";" + annotation; - } - } - String data = ""; - for (String s: values) { - data += s + "\t"; - } - inputRecord.setData(data); - } - if (mode.equals(MODE_INTERSECT)) { - //change the ends?? - int[] indexes = getChrIndex(inputFileType, entry.getValue().getData().split("\t")); - String[] array = inputRecord.getDataArray(); - - if (inputChrPos.getStartPosition() > compareEntry.getKey().getStartPosition()) { - array[indexes[1]] = Integer.toString(compareEntry.getKey().getStartPosition()); - } - if (inputChrPos.getEndPosition() < compareEntry.getKey().getEndPosition()) { - array[indexes[2]] = Integer.toString(compareEntry.getKey().getEndPosition()); - } - String data = ""; - for (String s: array) { - data += s + "\t"; - } - inputRecord.setData(data); - entry.setValue(inputRecord); - } - } - } - } - return isOverlapping; - } - - - private void writeRecords(Map records, File outputFile) throws IOException { - BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile, true)); - - for (Entry entry: records.entrySet()) { - writeRecord(writer, entry.getValue()); - } - writer.close(); - } - - private void writeRecord(BufferedWriter writer, TabbedRecord record) throws IOException { - if (!record.getData().endsWith("\n")) { - record.setData(record.getData() + "\n"); - } - writer.write(record.getData()); - } - - private TreeMap readRecords(File inputFile, String chromosome) throws Exception { - - TabbedFileReader reader = new TabbedFileReader(inputFile); - TreeMap records = new TreeMap(); - String fileType = getFileType(inputFile); - try { - - Iterator iterator = reader.getRecordIterator(); - - while (iterator.hasNext()) { - - TabbedRecord tab = iterator.next(); - if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo") || tab.getData().startsWith("analysis") || tab.getData().startsWith("Chr") ) { - continue; - } - ChrPosition chrPos = getChrPosition(fileType, tab); - if (chrPos.getChromosome().equals(chromosome)) { - records.put(chrPos, tab); - } - } - - } finally { - reader.close(); - } - - return records; - } - - private String getFileType(File inputFile) { - int index = inputFile.getName().lastIndexOf(".") + 1; - String name = inputFile.getName().substring(index, inputFile.getName().length()); - - if (name.equals("dcc")) { - return "dcc1"; - } - - return name; - } - - private void setUp(File file, File outputFileOne, File outputFileTwo) throws Exception { - TabbedFileReader reader = new TabbedFileReader(file); - Iterator iterator = reader.getRecordIterator(); - - String fileType = getFileType(file); - List header = new ArrayList(); - if (reader.getHeader() != null) { - Iterator iter = reader.getHeader().iterator(); - while (iter.hasNext()) { - header.add(iter.next()); - } - } - - while (iterator.hasNext()) { - - TabbedRecord tab = iterator.next(); - - if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo") || tab.getData().startsWith("analysis") || tab.getData().startsWith("Chr") ) { - header.add(tab.getData()); - continue; - } - - ChrPosition chrPos = getChrPosition(fileType, tab); - - if (!chromosomes.contains(chrPos.getChromosome())) { - chromosomes.add(chrPos.getChromosome()); - } - } - reader.close(); - - if (outputFileOne != null) { - writeHeader(header, outputFileOne); - } - if (outputFileTwo != null) { - writeHeader(header, outputFileTwo); - } - - } - - private int[] getChrIndex(String inputFileType, String[] values) throws Exception { - - int chrIndex = 0; - int startIndex = 0; - int endIndex = 0; - - if (inputFileType.equals(MAF)) { - chrIndex = 4; - startIndex = 5; - endIndex = 6; - } else if (inputFileType.equals(DCC1)) { - chrIndex = 4; - startIndex = 5; - endIndex = 6; - } else if (inputFileType.equals(BED)) { - chrIndex = 0; - startIndex = 1; - endIndex = 2; - } else if (inputFileType.equals(GFF3) || inputFileType.equals(GTF)) { - chrIndex = 0; - startIndex = 3; - endIndex = 4; - } else if (inputFileType.equals(VCF)) { - chrIndex = 0; - startIndex = 1; - endIndex = 1; - if (values.length >= 8) { - String[] infos = values[7].split("\t"); - - for (String info : infos) { - String[] params = info.split("="); - if (params.length == 2) { - if (params[0].equals("END")) { - endIndex = 2; - values[2] = params[1]; - } - } - } - } - //NEED TO CHANGE FOR INDELS - } else if (inputFileType.equals(TAB)) { - chrIndex = 0; - startIndex = 1; - endIndex = 2; - } else { - throw new Exception("Input file type is not recognized"); - } - int[] arr = {chrIndex, startIndex, endIndex}; - return arr; - } - - private ChrPosition getChrPosition(String inputFileType, TabbedRecord tab) throws Exception { - String[] values = tab.getData().split("\t"); - ChrPosition chr = null; - - int[] indexes = getChrIndex(inputFileType, values); - int chrIndex = indexes[0]; - int startIndex = indexes[1]; - int endIndex = indexes[2]; - - if (inputFileType.equals(BED)) { - chr = new ChrRangePosition(values[chrIndex], new Integer(values[startIndex])+1, new Integer(values[endIndex])+1); - } else { - String chromosome = values[chrIndex]; - if (!chromosome.contains("GL") && !chromosome.startsWith("chr")) { - chromosome = "chr" + chromosome; - } - if (chromosome.equals("chrM")) { - chromosome = "chrMT"; - } - if (inputFileType.equals(MAF)) { - chr = new ChrPositionName(chromosome, new Integer(values[startIndex]), new Integer(values[endIndex]), values[0]); - } else { - chr = new ChrRangePosition(chromosome, new Integer(values[startIndex]), new Integer(values[endIndex])); - } - } - return chr; - } - - private boolean tabbedRecordFallsInCompareRecord(ChrPosition inputChrPos, TabbedRecord inputRecord, Entry entry) { - if (entry != null) { - ChrPosition compareChrPos = entry.getKey(); - if ((inputChrPos.getStartPosition() >= compareChrPos.getStartPosition() && inputChrPos.getStartPosition() <= compareChrPos.getEndPosition()) || - (inputChrPos.getEndPosition() >= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() <= compareChrPos.getEndPosition()) - || (inputChrPos.getStartPosition() <= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() >= compareChrPos.getEndPosition())) { - return true; - } - } - return false; - } - - public String[] getCmdLineInputFiles() { - return cmdLineInputFiles; - } - - public void setCmdLineInputFiles(String[] cmdLineInputFiles) { - this.cmdLineInputFiles = cmdLineInputFiles; - } - - - private void writeHeader(List header, File outputOverlapFile) throws IOException { - BufferedWriter writer = new BufferedWriter(new FileWriter(outputOverlapFile, true)); - - for (String h: header) { - - writer.write(h + "\n"); - } - writer.close(); - } - - public List getChromosomes() { - return chromosomes; - } - - public void setChromosomes(List chromosomes) { - this.chromosomes = chromosomes; - } - - - public int getOverlapCount() { - return overlapCount; - } - - public void setOverlapCount(int overlapCount) { - this.overlapCount = overlapCount; - } - - public int getNotOverlappingCount() { - return notOverlappingCount; - } - - public void setNotOverlappingCount(int notOverlappingCount) { - this.notOverlappingCount = notOverlappingCount; - } - - public int getMafCount() { - return recordCount; - } - - public void setMafCount(int mafCount) { - this.recordCount = mafCount; - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(CompareReferenceRegions.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CompareReferenceRegions", CompareReferenceRegions.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - //output files - cmdLineOutputFiles = options.getOutputFileNames(); - - if (cmdLineOutputFiles.length >= 1) { - if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); - } - - for (String file : cmdLineOutputFiles) { - if (new File(file).exists() && !new File(file).isDirectory()) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); - } - } - } - mode = options.getMode(); - if (mode == null) { - mode = MODE_ONEWAY; - } - logger.info("Mode: " + mode); - - if (mode.equals(MODE_ANNOTATE)) { - //take away 1 to get index of column rather than column number - column = new Integer(options.getColumn()) -1; - annotation = options.getAnnotation(); - } - - return engage(); - } - - return returnStatus; - } - - - private int engage() throws Exception { - - if (mode.equals(MODE_ONEWAY) || mode.equals(MODE_TWOWAY)) { - runOnewayComparison(new File(cmdLineInputFiles[0]), new File(cmdLineInputFiles[1]), new File(cmdLineOutputFiles[0]), new File(cmdLineOutputFiles[1])); - if (mode.equals(MODE_TWOWAY)) { - runOnewayComparison(new File(cmdLineInputFiles[1]), new File(cmdLineInputFiles[0]), new File(cmdLineOutputFiles[2]), new File(cmdLineOutputFiles[3])); - } - } else if (mode.equals(MODE_ANNOTATE)) { - runAnnotateComparison(new File(cmdLineInputFiles[0]), new File(cmdLineInputFiles[1]), new File(cmdLineOutputFiles[0])); - } else if (mode.equals(MODE_INTERSECT)) { - runIntersectComparison(); - } else if (mode.equals(MODE_UNIQUE)) { - runUniqueComparison(); - } else { - throw new QMuleException("MODE_ERROR", mode); - } - return 0; - } - - - private void clear() { - recordCount = 0; - overlapCount = 0; - notOverlappingCount = 0; - } - - public static void main(String[] args) throws Exception { - CompareReferenceRegions sp = new CompareReferenceRegions(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - -} diff --git a/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java b/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java deleted file mode 100644 index 84fc72a32..000000000 --- a/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java +++ /dev/null @@ -1,86 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.Set; -import java.util.TreeSet; - -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedFileWriter; -import org.qcmg.qmule.tab.TabbedHeader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class DbSnpChrLiftover { - - private static char TAB = '\t'; - - String inputVCF; - String outputVCF; - - - private final Set uniqueChrNames = new TreeSet(); - - public DbSnpChrLiftover() {} - - private void getUniqueChrNames() throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(inputVCF)); - TabbedFileWriter writer = new TabbedFileWriter(new File(outputVCF)); - try { - - TabbedHeader header = reader.getHeader(); - - // writer out header - writer.addHeader(header); - - for (TabbedRecord record : reader) { - String [] params = TabTokenizer.tokenize(record.getData()); - String chr = params[0]; - uniqueChrNames.add(chr); - - // switch the chr - params[0] = "chr" + chr; - - StringBuilder sb = new StringBuilder(); - for (int i = 0, len = params.length ; i < len ; i ++) { - sb.append(params[i]); - if (i < len-1) sb.append(TAB); - } - - record.setData(sb.toString()); - - writer.add(record); - } - - } finally { - try { - writer.close(); - } finally { - reader.close(); - } - } - - - for (String chr : uniqueChrNames) { - System.out.println("chr: " + chr); - } - } - - - public static void main(String[] args) throws Exception { - if (args.length < 2) - throw new IllegalArgumentException("USAGE: DbSnpChrLiftover "); - - DbSnpChrLiftover dcl = new DbSnpChrLiftover(); - - - dcl.inputVCF = args[0]; - dcl.outputVCF = args[1]; - - dcl.getUniqueChrNames(); - - } - -} diff --git a/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- b/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- deleted file mode 100644 index 84fc72a32..000000000 --- a/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- +++ /dev/null @@ -1,86 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.Set; -import java.util.TreeSet; - -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedFileWriter; -import org.qcmg.qmule.tab.TabbedHeader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class DbSnpChrLiftover { - - private static char TAB = '\t'; - - String inputVCF; - String outputVCF; - - - private final Set uniqueChrNames = new TreeSet(); - - public DbSnpChrLiftover() {} - - private void getUniqueChrNames() throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(inputVCF)); - TabbedFileWriter writer = new TabbedFileWriter(new File(outputVCF)); - try { - - TabbedHeader header = reader.getHeader(); - - // writer out header - writer.addHeader(header); - - for (TabbedRecord record : reader) { - String [] params = TabTokenizer.tokenize(record.getData()); - String chr = params[0]; - uniqueChrNames.add(chr); - - // switch the chr - params[0] = "chr" + chr; - - StringBuilder sb = new StringBuilder(); - for (int i = 0, len = params.length ; i < len ; i ++) { - sb.append(params[i]); - if (i < len-1) sb.append(TAB); - } - - record.setData(sb.toString()); - - writer.add(record); - } - - } finally { - try { - writer.close(); - } finally { - reader.close(); - } - } - - - for (String chr : uniqueChrNames) { - System.out.println("chr: " + chr); - } - } - - - public static void main(String[] args) throws Exception { - if (args.length < 2) - throw new IllegalArgumentException("USAGE: DbSnpChrLiftover "); - - DbSnpChrLiftover dcl = new DbSnpChrLiftover(); - - - dcl.inputVCF = args[0]; - dcl.outputVCF = args[1]; - - dcl.getUniqueChrNames(); - - } - -} diff --git a/qmule/src/org/qcmg/qmule/GermlineDBStripper.java b/qmule/src/org/qcmg/qmule/GermlineDBStripper.java deleted file mode 100644 index 71bd5e9cf..000000000 --- a/qmule/src/org/qcmg/qmule/GermlineDBStripper.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.IOException; - -import org.qcmg.germlinedb.GermlineDBFileReader; -import org.qcmg.germlinedb.GermlineDBFileWriter; -import org.qcmg.germlinedb.GermlineDBRecord; - -public class GermlineDBStripper { - - - public static void main(String[] args) throws IOException { - - String germlineDB = args[0]; - String germlineDBClassA = args[1]; - String header = "analysis_id\tcontrol_sample_id\tvariation_id\tvariation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tquality_score\tprobability\tread_count\tis_annotated\tvalidation_status\tvalidation_platform\txref_ensembl_var_id\tnote\tflag"; - - GermlineDBFileReader reader = new GermlineDBFileReader(new File(germlineDB)); - GermlineDBFileWriter writer = new GermlineDBFileWriter(new File(germlineDBClassA)); - - try { - writer.add(header+"\n"); - - // strip out all non-classA entities from Germline_DB - int totalCount = 0, classACount = 0; - for (GermlineDBRecord record : reader) { - ++totalCount; - if ("--".equals(record.getFlag())) { - ++classACount; - writer.add(record.getData() + "\n"); - } - } - System.out.println("total count: " + totalCount + ", classA count: " + classACount); - - } finally { - try { - reader.close(); - } finally { - writer.close(); - } - } - } -} diff --git a/qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- b/qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- deleted file mode 100644 index 71bd5e9cf..000000000 --- a/qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- +++ /dev/null @@ -1,47 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.IOException; - -import org.qcmg.germlinedb.GermlineDBFileReader; -import org.qcmg.germlinedb.GermlineDBFileWriter; -import org.qcmg.germlinedb.GermlineDBRecord; - -public class GermlineDBStripper { - - - public static void main(String[] args) throws IOException { - - String germlineDB = args[0]; - String germlineDBClassA = args[1]; - String header = "analysis_id\tcontrol_sample_id\tvariation_id\tvariation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tquality_score\tprobability\tread_count\tis_annotated\tvalidation_status\tvalidation_platform\txref_ensembl_var_id\tnote\tflag"; - - GermlineDBFileReader reader = new GermlineDBFileReader(new File(germlineDB)); - GermlineDBFileWriter writer = new GermlineDBFileWriter(new File(germlineDBClassA)); - - try { - writer.add(header+"\n"); - - // strip out all non-classA entities from Germline_DB - int totalCount = 0, classACount = 0; - for (GermlineDBRecord record : reader) { - ++totalCount; - if ("--".equals(record.getFlag())) { - ++classACount; - writer.add(record.getData() + "\n"); - } - } - System.out.println("total count: " + totalCount + ", classA count: " + classACount); - - } finally { - try { - reader.close(); - } finally { - writer.close(); - } - } - } -} diff --git a/qmule/src/org/qcmg/qmule/GetBamRecords.java b/qmule/src/org/qcmg/qmule/GetBamRecords.java deleted file mode 100644 index 114351d71..000000000 --- a/qmule/src/org/qcmg/qmule/GetBamRecords.java +++ /dev/null @@ -1,226 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMUtils; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.picard.QJumper; - -public class GetBamRecords { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - List records; - - private String position; - - private int exitStatus; - private static QLogger logger; - - - public int engage() throws Exception { - - logger.info("Setting up the QJumper"); - QJumper jumper = new QJumper(); - jumper.setupReader(cmdLineInputFiles[0]); - - String contig = position.substring(0, position.indexOf(":")); - int start = Integer.parseInt(position.substring(position.indexOf(":")+1)); - - logger.info("config: " + contig); - logger.info("start: " + start); - - records = jumper.getOverlappingRecordsAtPosition(contig, start, start); - - logger.info("unfiltered read count: " + records.size()+ ""); - - int filteredCount = 0, readsWithBaseAtPosition = 0, duplicateCount = 0, properlyPaired = 0,properlyPairedAll = 0, pairedAll = 0, paired = 0, notPrimaryAlignment = 0, unmapped = 0; - String qualityString = "", qualityPhredString = ""; - String baseString = ""; - int unmappedSecondaryDuplicates = 0, unmappedSecondaryDuplicatesProperly = 0; - - char[] novelStartBases = new char[1024]; // hmmmmm - Set forwardStrand = new HashSet(); - Set reverseStrand = new HashSet(); - int j = 0; - - for (SAMRecord rec : records) { - int readPosition = org.qcmg.picard.util.SAMUtils.getIndexInReadFromPosition(rec, start); - if (readPosition >= 0 && readPosition < rec.getReadLength()) { - char c = rec.getReadString().charAt(readPosition); - if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { - readsWithBaseAtPosition++; - if (rec.getDuplicateReadFlag()) { - duplicateCount++; - } else { - byte [] baseQuals = SAMUtils.fastqToPhred(rec.getBaseQualityString()); - qualityPhredString +=baseQuals[readPosition] + ","; - baseString += (rec.getReadNegativeStrandFlag() ? Character.toLowerCase(c) : c) + ""; -// baseString += c + ","; - qualityString +=rec.getBaseQualityString().charAt(readPosition) + ""; - - if (rec.getMappingQuality() >= 10 && rec.getBaseQualities()[readPosition] >= 10) { - if (rec.getReadNegativeStrandFlag()) { - if (reverseStrand.add(rec.getAlignmentStart())) { - novelStartBases[j++] = c; - } - } else { - if (forwardStrand.add(rec.getAlignmentStart())) { - novelStartBases[j++] = c; - } - } - } - } - } - - if (rec.getReadPairedFlag()) { - paired++; - if ( rec.getProperPairFlag()) properlyPaired++; - - } - if (rec.getReadUnmappedFlag()) unmapped++; - if (rec.getReadUnmappedFlag()) unmapped++; - if (rec.getNotPrimaryAlignmentFlag()) notPrimaryAlignment++; - - - if ( ! rec.getDuplicateReadFlag() && ! rec.getNotPrimaryAlignmentFlag() && ! rec.getReadUnmappedFlag()) - unmappedSecondaryDuplicates++; - if ( ! rec.getDuplicateReadFlag() && ! rec.getNotPrimaryAlignmentFlag() && ! rec.getReadUnmappedFlag() - && (rec.getReadPairedFlag() ? rec.getProperPairFlag() : true)) -// && (rec.getReadPairedFlag() && rec.getProperPairFlag())) - unmappedSecondaryDuplicatesProperly++; - } - - if (rec.getReadPairedFlag()) { - pairedAll++; - if (rec.getProperPairFlag()) properlyPairedAll++; - } - - if (BAMPileupUtil.eligibleSamRecord(rec)) { - ++filteredCount; - logger.info("***" + rec.getSAMString()); - } else logger.info(rec.getSAMString()); - - - - } - - - logger.info("SUMMARY: "); - logger.info("Total no of records: " + records.size() ); - logger.info("No of records with a base at position: " + readsWithBaseAtPosition); - logger.info("No of duplicate records (that have a base at position): " + duplicateCount); - logger.info("No of unique records (that have a base at position): " + (readsWithBaseAtPosition-duplicateCount)); - logger.info("No of unique paired records (that have a base at position): " + paired); - logger.info("No of unique properly paired records (that have a base at position): " + properlyPaired); - logger.info("No of records not primary aligned (that have a base at position): " + notPrimaryAlignment); - logger.info("No of records not mapped (that have a base at position): " + unmapped); - logger.info("unmappedSecondaryDuplicates (that have a base at position): " + unmappedSecondaryDuplicates); - logger.info("unmappedSecondaryDuplicatesProperly (that have a base at position): " + unmappedSecondaryDuplicatesProperly); - logger.info("No of paired records (all): " + pairedAll); - logger.info("No of properly paired records (all): " + properlyPairedAll); - logger.info("Unique record bases: " + baseString.substring(0,baseString.length() > 0 ? baseString.length() : 0)); - logger.info("Unique record base qualities: " + qualityString.substring(0,qualityString.length() > 0 ? qualityString.length() : 0)); - logger.info("Unique record base qualities (phred): " + qualityPhredString.substring(0,qualityPhredString.length() > 0 ? qualityPhredString.length() : 0)); - logger.info("filtered read count: " + filteredCount + " out of " + records.size() ); - logger.info("Novel start bases: " + new String(novelStartBases)); - - jumper.closeReader(); - - writeToFile(); - - return exitStatus; - } - - private void writeToFile() { - if (null != cmdLineOutputFiles && cmdLineOutputFiles.length == 1) { - try (FileWriter writer = new FileWriter(cmdLineOutputFiles[0]);){ - for (SAMRecord rec : records) { - writer.write(rec.getSAMString()); - } - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - } - } - - - - public static void main(String[] args) throws Exception { - GetBamRecords sp = new GetBamRecords(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(GetBamRecords.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("GetBamRecords", GetBamRecords.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - position = options.getPosition(); - position = options.getPosition(); - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/GetBamRecords.java-- b/qmule/src/org/qcmg/qmule/GetBamRecords.java-- deleted file mode 100644 index 114351d71..000000000 --- a/qmule/src/org/qcmg/qmule/GetBamRecords.java-- +++ /dev/null @@ -1,226 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMUtils; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.picard.QJumper; - -public class GetBamRecords { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - List records; - - private String position; - - private int exitStatus; - private static QLogger logger; - - - public int engage() throws Exception { - - logger.info("Setting up the QJumper"); - QJumper jumper = new QJumper(); - jumper.setupReader(cmdLineInputFiles[0]); - - String contig = position.substring(0, position.indexOf(":")); - int start = Integer.parseInt(position.substring(position.indexOf(":")+1)); - - logger.info("config: " + contig); - logger.info("start: " + start); - - records = jumper.getOverlappingRecordsAtPosition(contig, start, start); - - logger.info("unfiltered read count: " + records.size()+ ""); - - int filteredCount = 0, readsWithBaseAtPosition = 0, duplicateCount = 0, properlyPaired = 0,properlyPairedAll = 0, pairedAll = 0, paired = 0, notPrimaryAlignment = 0, unmapped = 0; - String qualityString = "", qualityPhredString = ""; - String baseString = ""; - int unmappedSecondaryDuplicates = 0, unmappedSecondaryDuplicatesProperly = 0; - - char[] novelStartBases = new char[1024]; // hmmmmm - Set forwardStrand = new HashSet(); - Set reverseStrand = new HashSet(); - int j = 0; - - for (SAMRecord rec : records) { - int readPosition = org.qcmg.picard.util.SAMUtils.getIndexInReadFromPosition(rec, start); - if (readPosition >= 0 && readPosition < rec.getReadLength()) { - char c = rec.getReadString().charAt(readPosition); - if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { - readsWithBaseAtPosition++; - if (rec.getDuplicateReadFlag()) { - duplicateCount++; - } else { - byte [] baseQuals = SAMUtils.fastqToPhred(rec.getBaseQualityString()); - qualityPhredString +=baseQuals[readPosition] + ","; - baseString += (rec.getReadNegativeStrandFlag() ? Character.toLowerCase(c) : c) + ""; -// baseString += c + ","; - qualityString +=rec.getBaseQualityString().charAt(readPosition) + ""; - - if (rec.getMappingQuality() >= 10 && rec.getBaseQualities()[readPosition] >= 10) { - if (rec.getReadNegativeStrandFlag()) { - if (reverseStrand.add(rec.getAlignmentStart())) { - novelStartBases[j++] = c; - } - } else { - if (forwardStrand.add(rec.getAlignmentStart())) { - novelStartBases[j++] = c; - } - } - } - } - } - - if (rec.getReadPairedFlag()) { - paired++; - if ( rec.getProperPairFlag()) properlyPaired++; - - } - if (rec.getReadUnmappedFlag()) unmapped++; - if (rec.getReadUnmappedFlag()) unmapped++; - if (rec.getNotPrimaryAlignmentFlag()) notPrimaryAlignment++; - - - if ( ! rec.getDuplicateReadFlag() && ! rec.getNotPrimaryAlignmentFlag() && ! rec.getReadUnmappedFlag()) - unmappedSecondaryDuplicates++; - if ( ! rec.getDuplicateReadFlag() && ! rec.getNotPrimaryAlignmentFlag() && ! rec.getReadUnmappedFlag() - && (rec.getReadPairedFlag() ? rec.getProperPairFlag() : true)) -// && (rec.getReadPairedFlag() && rec.getProperPairFlag())) - unmappedSecondaryDuplicatesProperly++; - } - - if (rec.getReadPairedFlag()) { - pairedAll++; - if (rec.getProperPairFlag()) properlyPairedAll++; - } - - if (BAMPileupUtil.eligibleSamRecord(rec)) { - ++filteredCount; - logger.info("***" + rec.getSAMString()); - } else logger.info(rec.getSAMString()); - - - - } - - - logger.info("SUMMARY: "); - logger.info("Total no of records: " + records.size() ); - logger.info("No of records with a base at position: " + readsWithBaseAtPosition); - logger.info("No of duplicate records (that have a base at position): " + duplicateCount); - logger.info("No of unique records (that have a base at position): " + (readsWithBaseAtPosition-duplicateCount)); - logger.info("No of unique paired records (that have a base at position): " + paired); - logger.info("No of unique properly paired records (that have a base at position): " + properlyPaired); - logger.info("No of records not primary aligned (that have a base at position): " + notPrimaryAlignment); - logger.info("No of records not mapped (that have a base at position): " + unmapped); - logger.info("unmappedSecondaryDuplicates (that have a base at position): " + unmappedSecondaryDuplicates); - logger.info("unmappedSecondaryDuplicatesProperly (that have a base at position): " + unmappedSecondaryDuplicatesProperly); - logger.info("No of paired records (all): " + pairedAll); - logger.info("No of properly paired records (all): " + properlyPairedAll); - logger.info("Unique record bases: " + baseString.substring(0,baseString.length() > 0 ? baseString.length() : 0)); - logger.info("Unique record base qualities: " + qualityString.substring(0,qualityString.length() > 0 ? qualityString.length() : 0)); - logger.info("Unique record base qualities (phred): " + qualityPhredString.substring(0,qualityPhredString.length() > 0 ? qualityPhredString.length() : 0)); - logger.info("filtered read count: " + filteredCount + " out of " + records.size() ); - logger.info("Novel start bases: " + new String(novelStartBases)); - - jumper.closeReader(); - - writeToFile(); - - return exitStatus; - } - - private void writeToFile() { - if (null != cmdLineOutputFiles && cmdLineOutputFiles.length == 1) { - try (FileWriter writer = new FileWriter(cmdLineOutputFiles[0]);){ - for (SAMRecord rec : records) { - writer.write(rec.getSAMString()); - } - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - } - } - - - - public static void main(String[] args) throws Exception { - GetBamRecords sp = new GetBamRecords(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(GetBamRecords.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("GetBamRecords", GetBamRecords.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - position = options.getPosition(); - position = options.getPosition(); - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/GetInsetSize.java b/qmule/src/org/qcmg/qmule/GetInsetSize.java deleted file mode 100644 index 44d5cc8c6..000000000 --- a/qmule/src/org/qcmg/qmule/GetInsetSize.java +++ /dev/null @@ -1,35 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; - -import org.qcmg.picard.SAMFileReaderFactory; - -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -public class GetInsetSize { - public static void main(String[] args) throws Exception{ - - File input = new File(args[0]); - SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); //new SAMFileReader(input); - int min =3000; - int max = 0; - String aaa = "AAA"; - for( SAMRecord record : reader){ - - if(record.getAttribute("XC").equals(aaa)){ - int size = Math.abs( record.getInferredInsertSize()); - if(size > max) max = size; - if(size < min) min = size; - } - } - reader.close(); - System.out.println(String.format("Insert range %d-%d\n", min, max)); - } - -} diff --git a/qmule/src/org/qcmg/qmule/GetInsetSize.java-- b/qmule/src/org/qcmg/qmule/GetInsetSize.java-- deleted file mode 100644 index 44d5cc8c6..000000000 --- a/qmule/src/org/qcmg/qmule/GetInsetSize.java-- +++ /dev/null @@ -1,35 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; - -import org.qcmg.picard.SAMFileReaderFactory; - -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -public class GetInsetSize { - public static void main(String[] args) throws Exception{ - - File input = new File(args[0]); - SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); //new SAMFileReader(input); - int min =3000; - int max = 0; - String aaa = "AAA"; - for( SAMRecord record : reader){ - - if(record.getAttribute("XC").equals(aaa)){ - int size = Math.abs( record.getInferredInsertSize()); - if(size > max) max = size; - if(size < min) min = size; - } - } - reader.close(); - System.out.println(String.format("Insert range %d-%d\n", min, max)); - } - -} diff --git a/qmule/src/org/qcmg/qmule/IndelDCCHeader.java b/qmule/src/org/qcmg/qmule/IndelDCCHeader.java deleted file mode 100644 index 408ef9027..000000000 --- a/qmule/src/org/qcmg/qmule/IndelDCCHeader.java +++ /dev/null @@ -1,395 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; - -import htsjdk.samtools.SAMFileHeader; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.meta.QDccMeta; -import org.qcmg.common.meta.QLimsMeta; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.util.QDccMetaFactory; -import org.qcmg.picard.util.QLimsMetaFactory; - -public class IndelDCCHeader { - - private String logFile; - private File somaticOutputFile; - private File germlineOutputFile; - private String mode; - private File normalBam; - private File tumourBam; - private String uuid; - private boolean qexecPresent = false; - private ArrayList qexec = new ArrayList(); - private boolean completeHeaderPresent = false; - private File somaticFile; - private File germlineFile; - private String tumourSampleId; - private String normalSampleId; - private static QLogger logger; - - public void setup(String args[]) throws Exception{ - - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(IndelDCCHeader.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("IndelDCCHeader", IndelDCCHeader.class.getPackage().getImplementationVersion(), args); - - // get list of file names - String[] cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 2) { - throw new QMuleException("INSUFFICIENT_INPUT_FILES"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - somaticFile = new File(cmdLineInputFiles[0]); - germlineFile = new File(cmdLineInputFiles[1]); - tumourBam = new File(options.getTumour()); - normalBam = new File(options.getNormal()); - - if ( ! FileUtils.canFileBeRead(tumourBam)) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , tumourBam.getAbsolutePath()); - } - if ( ! FileUtils.canFileBeRead(normalBam)) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , tumourBam.getAbsolutePath()); - } - - String[] cmdLineOutputFiles = options.getOutputFileNames(); - - somaticOutputFile = new File(cmdLineOutputFiles[0]); - germlineOutputFile = new File(cmdLineOutputFiles[1]); - - if (cmdLineOutputFiles.length != 2) { - throw new QMuleException("TOO_MANY_OUTPUTFILE"); - } - if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); - } - for (String file : cmdLineOutputFiles) { - if (new File(file).exists()) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); - } - } - - mode = options.getMode(); - - if (mode == null || (!mode.equals("pindel") && !mode.equals("gatk"))) { - throw new QMuleException("MODE_ERROR", mode); - } - - logger.info("Somatic input DCC: " + somaticFile.getAbsolutePath()); - logger.info("Germline input DCC: " + germlineFile.getAbsolutePath()); - logger.info("Output DCC: " + somaticOutputFile.getAbsolutePath()); - logger.info("Output DCC: " + germlineOutputFile.getAbsolutePath()); - logger.info("Tumour bam: " + tumourBam.getAbsolutePath()); - logger.info("Normal bam: " + normalBam.getAbsolutePath()); - logger.info("Mode: " + mode); - - } - } - - public int annotate() throws Exception { - //double check to make sure that uuid isn't already present - checkForUUid(); - - StringBuilder header = new StringBuilder(); - if (completeHeaderPresent) { - logger.info("UUid already present in header. No annotation is taking place"); - } else if (qexecPresent){ - StringBuilder sb = new StringBuilder(); - for (String s: qexec) { - sb.append(s + "\n"); - } - header.append(sb.toString()); - header.append(getDCCMeta()); - QLimsMeta tumour = QLimsMetaFactory.getLimsMeta("TEST", tumourBam.getAbsolutePath()); - tumourSampleId = tumour.getSample(); - header.append(tumour.getLimsMetaDataToString()); - QLimsMeta normal = QLimsMetaFactory.getLimsMeta("CONTROL", normalBam.getAbsolutePath()); - normalSampleId = normal.getSample(); - header.append(normal.getLimsMetaDataToString()); - //write somatic - writeOutputFile(header.toString(), somaticFile, somaticOutputFile, false); - //write germline - writeOutputFile(header.toString(), germlineFile, germlineOutputFile, true); - } - - return 0; - } - - public File getSomaticOutputFile() { - return somaticOutputFile; - } - - public void setSomaticOutputFile(File somaticOutputFile) { - this.somaticOutputFile = somaticOutputFile; - } - - public File getGermlineOutputFile() { - return germlineOutputFile; - } - - public void setGermlineOutputFile(File germlineOutputFile) { - this.germlineOutputFile = germlineOutputFile; - } - - public File getSomaticFile() { - return somaticFile; - } - - public void setSomaticFile(File somaticFile) { - this.somaticFile = somaticFile; - } - - public File getGermlineFile() { - return germlineFile; - } - - public void setGermlineFile(File germlineFile) { - this.germlineFile = germlineFile; - } - - public boolean isQexecPresent() { - return qexecPresent; - } - - public void setQexecPresent(boolean qexecPresent) { - this.qexecPresent = qexecPresent; - } - - public ArrayList getQexec() { - return qexec; - } - - public void setQexec(ArrayList qexec) { - this.qexec = qexec; - } - - public boolean isCompleteHeaderPresent() { - return completeHeaderPresent; - } - - public void setCompleteHeaderPresent(boolean completeHeaderPresent) { - this.completeHeaderPresent = completeHeaderPresent; - } - - public void checkForUUid() throws IOException, QMuleException { - BufferedReader reader = new BufferedReader(new FileReader(somaticFile)); - - String line; - boolean ddcMeta = false; - boolean uuidHere = false; - boolean uuidInResults = false; - qexec = new ArrayList(); - while((line = reader.readLine()) != null) { - if (line.startsWith("#") || line.startsWith("analysis")) { - if (line.contains("Uuid") || line.contains("uuid")) { - uuidHere = true; - } - if (line.startsWith("#Q_EXEC")) { - qexec.add(line); - } - if (line.startsWith("#Q_DCCMETA")) { - ddcMeta = true; - } - } else { - String[] values = line.split("\t"); - if (isCorrectUuidFormat(values[0])) { - uuidInResults = true; - } - } - } - reader.close(); - if (ddcMeta && uuidHere && uuidInResults) { - logger.info("Complete header already present."); - completeHeaderPresent = true; - } else if (uuidHere && qexec.size() == 14) { - qexecPresent = true; - logger.info("QExec header and uuid present."); - String q = ""; - for (String s: qexec) { - if (s.contains("Uuid")) { - q = s.replace("-", "_"); - String potentialUuid = s.split("\t")[2].replace("-", "_"); - if (isCorrectUuidFormat(potentialUuid)) { - uuid = potentialUuid; - } else { - logger.info("UUid was not correct format: " + potentialUuid); - throw new QMuleException("UUID_ERROR"); - } - } - } - qexec.remove(0); - qexec.add(0, q); - } else { - logger.info("Could not determine if UUid and DCC header is present"); - throw new QMuleException("UUID_ERROR"); - } - } - - public boolean isCorrectUuidFormat(String potentialUuid) { - if (potentialUuid.length() == 36 && potentialUuid.split("_").length == 5) { - return true; - } - return false; - } - - public String getDCCMeta() throws Exception { - SAMFileHeader tHeader = SAMFileReaderFactory.createSAMFileReader(tumourBam).getFileHeader(); - SAMFileHeader nHeader = SAMFileReaderFactory.createSAMFileReader(normalBam).getFileHeader(); - QDccMeta meta; - - meta = QDccMetaFactory.getDccMeta(uuid, nHeader, tHeader, mode); - return meta.getDCCMetaDataToString(); - } - - public void writeOutputFile(String header, File inputFile, File outputFile, boolean isGermline) throws IOException { - BufferedReader reader = new BufferedReader(new FileReader(inputFile)); - BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile)); - - if (!completeHeaderPresent) { - writer.write(header); - } - - String line; - while((line = reader.readLine()) != null) { - if (!line.startsWith("#") && !line.startsWith("analysis") && !completeHeaderPresent) { - writer.write(replaceIdsInLine(line, isGermline) + "\n"); - } else { - if (qexecPresent && !line.startsWith("#Q_EXEC")) { - writer.write(line + "\n"); - } - } - } - reader.close(); - writer.close(); - } - - public String getTumourSampleId() { - return tumourSampleId; - } - - public void setTumourSampleId(String tumourSampleId) { - this.tumourSampleId = tumourSampleId; - } - - public String getNormalSampleId() { - return normalSampleId; - } - - public void setNormalSampleId(String normalSampleId) { - this.normalSampleId = normalSampleId; - } - - public String replaceIdsInLine(String line, boolean isGermline) { - String[] values = line.split("\t"); - - StringBuilder sb = new StringBuilder(); - for (int i=0; i< values.length; i++) { - if (i==0 && !completeHeaderPresent) { - sb.append(uuid + "\t"); - } else if (i==1 && !completeHeaderPresent){ - if (isGermline) { - sb.append(normalSampleId + "\t"); - } else { - sb.append(tumourSampleId + "\t"); - } - } else if (i==2 && !completeHeaderPresent) { - String[] mutationStrs = values[i].split("_"); - String count = "_" + mutationStrs[mutationStrs.length-1]; - if (isGermline) { - sb.append(uuid + "_" + normalSampleId + count + "\t"); - } else { - sb.append(uuid + "_"+ tumourSampleId + count + "\t"); - } - } else { - sb.append(values[i] + "\t"); - } - } - return sb.toString(); - } - - public String getUuid() { - return uuid; - } - - public void setUuid(String uuid) { - this.uuid = uuid; - } - - public String getMode() { - return mode; - } - - public void setMode(String mode) { - this.mode = mode; - } - - public File getNormalBam() { - return normalBam; - } - - public void setNormalBam(File normalBam) { - this.normalBam = normalBam; - } - - public File getTumourBam() { - return tumourBam; - } - - public void setTumourBam(File tumourBam) { - this.tumourBam = tumourBam; - } - - public static void main(String[] args) throws Exception { - IndelDCCHeader sp = new IndelDCCHeader(); - LoadReferencedClasses.loadClasses(IndelDCCHeader.class); - sp.setup(args); - int exitStatus = sp.annotate(); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - -} diff --git a/qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- b/qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- deleted file mode 100644 index 408ef9027..000000000 --- a/qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- +++ /dev/null @@ -1,395 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; - -import htsjdk.samtools.SAMFileHeader; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.meta.QDccMeta; -import org.qcmg.common.meta.QLimsMeta; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.util.QDccMetaFactory; -import org.qcmg.picard.util.QLimsMetaFactory; - -public class IndelDCCHeader { - - private String logFile; - private File somaticOutputFile; - private File germlineOutputFile; - private String mode; - private File normalBam; - private File tumourBam; - private String uuid; - private boolean qexecPresent = false; - private ArrayList qexec = new ArrayList(); - private boolean completeHeaderPresent = false; - private File somaticFile; - private File germlineFile; - private String tumourSampleId; - private String normalSampleId; - private static QLogger logger; - - public void setup(String args[]) throws Exception{ - - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(IndelDCCHeader.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("IndelDCCHeader", IndelDCCHeader.class.getPackage().getImplementationVersion(), args); - - // get list of file names - String[] cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 2) { - throw new QMuleException("INSUFFICIENT_INPUT_FILES"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - somaticFile = new File(cmdLineInputFiles[0]); - germlineFile = new File(cmdLineInputFiles[1]); - tumourBam = new File(options.getTumour()); - normalBam = new File(options.getNormal()); - - if ( ! FileUtils.canFileBeRead(tumourBam)) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , tumourBam.getAbsolutePath()); - } - if ( ! FileUtils.canFileBeRead(normalBam)) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , tumourBam.getAbsolutePath()); - } - - String[] cmdLineOutputFiles = options.getOutputFileNames(); - - somaticOutputFile = new File(cmdLineOutputFiles[0]); - germlineOutputFile = new File(cmdLineOutputFiles[1]); - - if (cmdLineOutputFiles.length != 2) { - throw new QMuleException("TOO_MANY_OUTPUTFILE"); - } - if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); - } - for (String file : cmdLineOutputFiles) { - if (new File(file).exists()) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); - } - } - - mode = options.getMode(); - - if (mode == null || (!mode.equals("pindel") && !mode.equals("gatk"))) { - throw new QMuleException("MODE_ERROR", mode); - } - - logger.info("Somatic input DCC: " + somaticFile.getAbsolutePath()); - logger.info("Germline input DCC: " + germlineFile.getAbsolutePath()); - logger.info("Output DCC: " + somaticOutputFile.getAbsolutePath()); - logger.info("Output DCC: " + germlineOutputFile.getAbsolutePath()); - logger.info("Tumour bam: " + tumourBam.getAbsolutePath()); - logger.info("Normal bam: " + normalBam.getAbsolutePath()); - logger.info("Mode: " + mode); - - } - } - - public int annotate() throws Exception { - //double check to make sure that uuid isn't already present - checkForUUid(); - - StringBuilder header = new StringBuilder(); - if (completeHeaderPresent) { - logger.info("UUid already present in header. No annotation is taking place"); - } else if (qexecPresent){ - StringBuilder sb = new StringBuilder(); - for (String s: qexec) { - sb.append(s + "\n"); - } - header.append(sb.toString()); - header.append(getDCCMeta()); - QLimsMeta tumour = QLimsMetaFactory.getLimsMeta("TEST", tumourBam.getAbsolutePath()); - tumourSampleId = tumour.getSample(); - header.append(tumour.getLimsMetaDataToString()); - QLimsMeta normal = QLimsMetaFactory.getLimsMeta("CONTROL", normalBam.getAbsolutePath()); - normalSampleId = normal.getSample(); - header.append(normal.getLimsMetaDataToString()); - //write somatic - writeOutputFile(header.toString(), somaticFile, somaticOutputFile, false); - //write germline - writeOutputFile(header.toString(), germlineFile, germlineOutputFile, true); - } - - return 0; - } - - public File getSomaticOutputFile() { - return somaticOutputFile; - } - - public void setSomaticOutputFile(File somaticOutputFile) { - this.somaticOutputFile = somaticOutputFile; - } - - public File getGermlineOutputFile() { - return germlineOutputFile; - } - - public void setGermlineOutputFile(File germlineOutputFile) { - this.germlineOutputFile = germlineOutputFile; - } - - public File getSomaticFile() { - return somaticFile; - } - - public void setSomaticFile(File somaticFile) { - this.somaticFile = somaticFile; - } - - public File getGermlineFile() { - return germlineFile; - } - - public void setGermlineFile(File germlineFile) { - this.germlineFile = germlineFile; - } - - public boolean isQexecPresent() { - return qexecPresent; - } - - public void setQexecPresent(boolean qexecPresent) { - this.qexecPresent = qexecPresent; - } - - public ArrayList getQexec() { - return qexec; - } - - public void setQexec(ArrayList qexec) { - this.qexec = qexec; - } - - public boolean isCompleteHeaderPresent() { - return completeHeaderPresent; - } - - public void setCompleteHeaderPresent(boolean completeHeaderPresent) { - this.completeHeaderPresent = completeHeaderPresent; - } - - public void checkForUUid() throws IOException, QMuleException { - BufferedReader reader = new BufferedReader(new FileReader(somaticFile)); - - String line; - boolean ddcMeta = false; - boolean uuidHere = false; - boolean uuidInResults = false; - qexec = new ArrayList(); - while((line = reader.readLine()) != null) { - if (line.startsWith("#") || line.startsWith("analysis")) { - if (line.contains("Uuid") || line.contains("uuid")) { - uuidHere = true; - } - if (line.startsWith("#Q_EXEC")) { - qexec.add(line); - } - if (line.startsWith("#Q_DCCMETA")) { - ddcMeta = true; - } - } else { - String[] values = line.split("\t"); - if (isCorrectUuidFormat(values[0])) { - uuidInResults = true; - } - } - } - reader.close(); - if (ddcMeta && uuidHere && uuidInResults) { - logger.info("Complete header already present."); - completeHeaderPresent = true; - } else if (uuidHere && qexec.size() == 14) { - qexecPresent = true; - logger.info("QExec header and uuid present."); - String q = ""; - for (String s: qexec) { - if (s.contains("Uuid")) { - q = s.replace("-", "_"); - String potentialUuid = s.split("\t")[2].replace("-", "_"); - if (isCorrectUuidFormat(potentialUuid)) { - uuid = potentialUuid; - } else { - logger.info("UUid was not correct format: " + potentialUuid); - throw new QMuleException("UUID_ERROR"); - } - } - } - qexec.remove(0); - qexec.add(0, q); - } else { - logger.info("Could not determine if UUid and DCC header is present"); - throw new QMuleException("UUID_ERROR"); - } - } - - public boolean isCorrectUuidFormat(String potentialUuid) { - if (potentialUuid.length() == 36 && potentialUuid.split("_").length == 5) { - return true; - } - return false; - } - - public String getDCCMeta() throws Exception { - SAMFileHeader tHeader = SAMFileReaderFactory.createSAMFileReader(tumourBam).getFileHeader(); - SAMFileHeader nHeader = SAMFileReaderFactory.createSAMFileReader(normalBam).getFileHeader(); - QDccMeta meta; - - meta = QDccMetaFactory.getDccMeta(uuid, nHeader, tHeader, mode); - return meta.getDCCMetaDataToString(); - } - - public void writeOutputFile(String header, File inputFile, File outputFile, boolean isGermline) throws IOException { - BufferedReader reader = new BufferedReader(new FileReader(inputFile)); - BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile)); - - if (!completeHeaderPresent) { - writer.write(header); - } - - String line; - while((line = reader.readLine()) != null) { - if (!line.startsWith("#") && !line.startsWith("analysis") && !completeHeaderPresent) { - writer.write(replaceIdsInLine(line, isGermline) + "\n"); - } else { - if (qexecPresent && !line.startsWith("#Q_EXEC")) { - writer.write(line + "\n"); - } - } - } - reader.close(); - writer.close(); - } - - public String getTumourSampleId() { - return tumourSampleId; - } - - public void setTumourSampleId(String tumourSampleId) { - this.tumourSampleId = tumourSampleId; - } - - public String getNormalSampleId() { - return normalSampleId; - } - - public void setNormalSampleId(String normalSampleId) { - this.normalSampleId = normalSampleId; - } - - public String replaceIdsInLine(String line, boolean isGermline) { - String[] values = line.split("\t"); - - StringBuilder sb = new StringBuilder(); - for (int i=0; i< values.length; i++) { - if (i==0 && !completeHeaderPresent) { - sb.append(uuid + "\t"); - } else if (i==1 && !completeHeaderPresent){ - if (isGermline) { - sb.append(normalSampleId + "\t"); - } else { - sb.append(tumourSampleId + "\t"); - } - } else if (i==2 && !completeHeaderPresent) { - String[] mutationStrs = values[i].split("_"); - String count = "_" + mutationStrs[mutationStrs.length-1]; - if (isGermline) { - sb.append(uuid + "_" + normalSampleId + count + "\t"); - } else { - sb.append(uuid + "_"+ tumourSampleId + count + "\t"); - } - } else { - sb.append(values[i] + "\t"); - } - } - return sb.toString(); - } - - public String getUuid() { - return uuid; - } - - public void setUuid(String uuid) { - this.uuid = uuid; - } - - public String getMode() { - return mode; - } - - public void setMode(String mode) { - this.mode = mode; - } - - public File getNormalBam() { - return normalBam; - } - - public void setNormalBam(File normalBam) { - this.normalBam = normalBam; - } - - public File getTumourBam() { - return tumourBam; - } - - public void setTumourBam(File tumourBam) { - this.tumourBam = tumourBam; - } - - public static void main(String[] args) throws Exception { - IndelDCCHeader sp = new IndelDCCHeader(); - LoadReferencedClasses.loadClasses(IndelDCCHeader.class); - sp.setup(args); - int exitStatus = sp.annotate(); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - -} diff --git a/qmule/src/org/qcmg/qmule/MAF2DCC1.java b/qmule/src/org/qcmg/qmule/MAF2DCC1.java deleted file mode 100644 index 998a34a10..000000000 --- a/qmule/src/org/qcmg/qmule/MAF2DCC1.java +++ /dev/null @@ -1,418 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedFileWriter; -import org.qcmg.qmule.tab.TabbedHeader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class MAF2DCC1 { - - private String logFile; - private File mafFile; - private final List dccFiles = new ArrayList(); - private File outputDccFile; - private static QLogger logger; - private Map> mafRecords = new HashMap<>(); - private int inputMafRecordCount; - private int[] mafColumnIndexes; - private int[] dccColumnIndexes; - private String mode; - - - public String getLogFile() { - return logFile; - } - - public File getMafFile() { - return mafFile; - } - - public File getOutputDccFile() { - return outputDccFile; - } - - public Map> getMafRecords() { - return mafRecords; - } - - public void setMafRecords(Map> mafRecords) { - this.mafRecords = mafRecords; - } - - public int[] getMafColumnIndexes() { - return mafColumnIndexes; - } - - public void setMafColumnIndexes(int[] mafColumnIndexes) { - this.mafColumnIndexes = mafColumnIndexes; - } - - public int[] getDccColumnIndexes() { - return dccColumnIndexes; - } - - public void setDccColumnIndexes(int[] dccColumnIndexes) { - this.dccColumnIndexes = dccColumnIndexes; - } - - public String getMode() { - return mode; - } - - public void setMode(String mode) { - this.mode = mode; - } - - public int getInputMafRecordCount() { - return inputMafRecordCount; - } - - public List getDccFiles() { - return dccFiles; - } - - public void setup(String args[]) throws Exception{ - - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(MAF2DCC1.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("MAF2DCC1", MAF2DCC1.class.getPackage().getImplementationVersion(), args); - - // get list of file names - String[] cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 2) { - throw new QMuleException("INSUFFICIENT_INPUT_FILES"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - mafFile = new File(cmdLineInputFiles[0]); - - for (int i=1; i 0) { - logger.warn("Could not find matches for the following records: "); - for (ChrPosition key : mafRecords.keySet()) { - logger.info("Missing at positions: " + key.toString()); - } - throw new QMuleException("MISSING_DCC_RECORDS", Integer.toString(mafRecords.size())); - } - - if (countInMaf != inputMafRecordCount || mafRecords.size() > 0) { - throw new QMuleException("COUNT_ERROR", Integer.toString(countInMaf), Integer.toString(inputMafRecordCount)); - } - - logger.info("Added " + countInMaf + " records to the dcc1 output file"); - - return 0; - } - - private void readMafFile() throws Exception { - TabbedFileReader reader = new TabbedFileReader(mafFile); - try { - int count = 0; - boolean checkForMissingColumnIndex = true; - for (TabbedRecord rec : reader) { - count++; - //header - if (rec.getData().startsWith("Hugo")) { - mafColumnIndexes = findColumnIndexesFromHeader(rec); - } else { - // only need to do this once - if (checkForMissingColumnIndex) { - if (missingColumnIndex(mafColumnIndexes)) { - throw new QMuleException("NO_COLUMN_INDEX", mafFile.getAbsolutePath()); - } - checkForMissingColumnIndex = false; - } - addToMafRecordMap(rec, count); - inputMafRecordCount++; - } - } - - logger.info("Number of input maf records: " + inputMafRecordCount); - - } finally { - reader.close(); - } - } - - private int compare(File dccFile, int count, TabbedFileWriter writer) throws Exception { - logger.info("Looking in dcc file: " + dccFile.getAbsolutePath()); - int countInMaf = 0; - int total = 0; - boolean checkForMissingColumnIndex = true; - - try (TabbedFileReader reader = new TabbedFileReader(dccFile);) { - if (count == 1) { - TabbedHeader header = reader.getHeader(); - writer.addHeader(header); - } - for (TabbedRecord rec : reader) { - //header - - if (rec.getData().startsWith("analysis_id")) { - //mutation id column - dccColumnIndexes = findColumnIndexesFromHeader(rec); - if (count == 1) { - writer.add(rec); - } - } else { - total++; - if (total % 10000 == 0) { - logger.info("Processed: " + total + " dcc records" ); - } - if (checkForMissingColumnIndex) { - if (missingColumnIndex(mafColumnIndexes)) { - throw new QMuleException("NO_MUTATION_ID", dccFile.getAbsolutePath()); - } - checkForMissingColumnIndex = false; - } - String[] strArray = rec.getDataArray(); - String chr = strArray[dccColumnIndexes[0]].replace("chr", ""); - if (chr.equals("M")) { - chr += "T"; - } - ChrPosition chrPos = new ChrRangePosition(chr, Integer.valueOf(strArray[dccColumnIndexes[1]]), Integer.valueOf(strArray[dccColumnIndexes[2]])); - if (recordInMaf(chrPos, rec)) { - writer.add(rec); - countInMaf++; - } - } - } - } - logger.info("Finished looking in dcc file: " + dccFile.getAbsolutePath() + " found " + countInMaf + " maf record/s." ); - return countInMaf; - } - - public void addToMafRecordMap(TabbedRecord rec, int count) throws QMuleException { - String[] strArray = rec.getDataArray(); - - //need to screw around with chr1 vs 1 vs chrMT vs chrM - String chr = strArray[mafColumnIndexes[0]].replace("chr", ""); - - if (chr.equals("M")) { - chr += "T"; - } - ChrPosition chrPos = new ChrRangePosition(chr, Integer.valueOf(strArray[mafColumnIndexes[1]]), Integer.valueOf(strArray[mafColumnIndexes[2]])); - - List recordsAtThisPosition = mafRecords.get(chrPos); - if (null == recordsAtThisPosition) { - recordsAtThisPosition = new ArrayList(2); - mafRecords.put(chrPos, recordsAtThisPosition); - } - recordsAtThisPosition.add(rec); - - } - - public boolean missingColumnIndex(int[] columnIndexes) throws QMuleException { - for (int i =0; i< columnIndexes.length; i++) { - if (columnIndexes[i] == -1) { - throw new QMuleException("NO_COLUMN_INDEX"); - } - } - return false; - } - - public int[] findColumnIndexesFromHeader(TabbedRecord rec) { - int[] mutationColumns = {-1, -1, -1, -1, -1, -1}; - String[] strArray = rec.getDataArray(); - for (int i=0; i recordsAtThisPosition = mafRecords.get(dccChrPos); - if (null != recordsAtThisPosition && ! recordsAtThisPosition.isEmpty()) { - - if (recordsAtThisPosition.size() > 1) { - logger.info("more than 1 record for position: " + dccChrPos); - } - - // check to see if any of the records match our dccRec - List recordsToRemove = new ArrayList<>(2); - - for (TabbedRecord tr : recordsAtThisPosition) { - if (matchOtherColumns(tr, dccRec)) { - matches++; - if (matches > 1) { - throw new QMuleException("T0O_MANY_MATCHES", dccChrPos.toString()); - } - - // remove record from array - recordsToRemove.add(tr); - matchFound = true; - } - } - - // remove records that have been matched - recordsAtThisPosition.removeAll(recordsToRemove); - - // check to see if there are any records left, if not, remove entry from map - if (recordsAtThisPosition.isEmpty()) { - mafRecords.remove(dccChrPos); - } - } - - return matchFound; - } - - public boolean matchOtherColumns(TabbedRecord mafRec, TabbedRecord dccRec) { - String[] mafValues = mafRec.getDataArray(); - String[] dccValues = dccRec.getDataArray(); - - if (mode.equals("snp")) { - if (matchingMutation(mafValues[mafColumnIndexes[3]], dccValues[dccColumnIndexes[3]])) { - return true; - } - } - if (mode.equals("indel")) { - if (matchingMutation(mafValues[mafColumnIndexes[3]], dccValues[dccColumnIndexes[3]]) && - mafValues[mafColumnIndexes[4]].equals(dccValues[dccColumnIndexes[4]]) && - mafValues[mafColumnIndexes[5]].equals(dccValues[dccColumnIndexes[5]])) { - return true; - } - } - - - return false; - } - - public boolean matchingMutation(String mafMutation, String dccMutation) { - if ((mafMutation.equals("SNP") && dccMutation.equals("1")) || - (mafMutation.equals("INS") && dccMutation.equals("2")) || - (mafMutation.equals("DEL") && dccMutation.equals("3"))) { - return true; - } - return false; - } - - public boolean match(ChrPosition mafChrPos, ChrPosition dccChrPos) { - if (mafChrPos.getChromosome().equals(dccChrPos.getChromosome()) - && mafChrPos.getStartPosition() == dccChrPos.getStartPosition() - && mafChrPos.getEndPosition() == dccChrPos.getEndPosition()) { - return true; - } - return false; - } - - - public static void main(String[] args) throws Exception { - MAF2DCC1 sp = new MAF2DCC1(); - LoadReferencedClasses.loadClasses(MAF2DCC1.class); - sp.setup(args); - - int exitStatus = sp.annotate(); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - -} diff --git a/qmule/src/org/qcmg/qmule/MAF2DCC1.java-- b/qmule/src/org/qcmg/qmule/MAF2DCC1.java-- deleted file mode 100644 index 998a34a10..000000000 --- a/qmule/src/org/qcmg/qmule/MAF2DCC1.java-- +++ /dev/null @@ -1,418 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedFileWriter; -import org.qcmg.qmule.tab.TabbedHeader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class MAF2DCC1 { - - private String logFile; - private File mafFile; - private final List dccFiles = new ArrayList(); - private File outputDccFile; - private static QLogger logger; - private Map> mafRecords = new HashMap<>(); - private int inputMafRecordCount; - private int[] mafColumnIndexes; - private int[] dccColumnIndexes; - private String mode; - - - public String getLogFile() { - return logFile; - } - - public File getMafFile() { - return mafFile; - } - - public File getOutputDccFile() { - return outputDccFile; - } - - public Map> getMafRecords() { - return mafRecords; - } - - public void setMafRecords(Map> mafRecords) { - this.mafRecords = mafRecords; - } - - public int[] getMafColumnIndexes() { - return mafColumnIndexes; - } - - public void setMafColumnIndexes(int[] mafColumnIndexes) { - this.mafColumnIndexes = mafColumnIndexes; - } - - public int[] getDccColumnIndexes() { - return dccColumnIndexes; - } - - public void setDccColumnIndexes(int[] dccColumnIndexes) { - this.dccColumnIndexes = dccColumnIndexes; - } - - public String getMode() { - return mode; - } - - public void setMode(String mode) { - this.mode = mode; - } - - public int getInputMafRecordCount() { - return inputMafRecordCount; - } - - public List getDccFiles() { - return dccFiles; - } - - public void setup(String args[]) throws Exception{ - - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(MAF2DCC1.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("MAF2DCC1", MAF2DCC1.class.getPackage().getImplementationVersion(), args); - - // get list of file names - String[] cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 2) { - throw new QMuleException("INSUFFICIENT_INPUT_FILES"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - mafFile = new File(cmdLineInputFiles[0]); - - for (int i=1; i 0) { - logger.warn("Could not find matches for the following records: "); - for (ChrPosition key : mafRecords.keySet()) { - logger.info("Missing at positions: " + key.toString()); - } - throw new QMuleException("MISSING_DCC_RECORDS", Integer.toString(mafRecords.size())); - } - - if (countInMaf != inputMafRecordCount || mafRecords.size() > 0) { - throw new QMuleException("COUNT_ERROR", Integer.toString(countInMaf), Integer.toString(inputMafRecordCount)); - } - - logger.info("Added " + countInMaf + " records to the dcc1 output file"); - - return 0; - } - - private void readMafFile() throws Exception { - TabbedFileReader reader = new TabbedFileReader(mafFile); - try { - int count = 0; - boolean checkForMissingColumnIndex = true; - for (TabbedRecord rec : reader) { - count++; - //header - if (rec.getData().startsWith("Hugo")) { - mafColumnIndexes = findColumnIndexesFromHeader(rec); - } else { - // only need to do this once - if (checkForMissingColumnIndex) { - if (missingColumnIndex(mafColumnIndexes)) { - throw new QMuleException("NO_COLUMN_INDEX", mafFile.getAbsolutePath()); - } - checkForMissingColumnIndex = false; - } - addToMafRecordMap(rec, count); - inputMafRecordCount++; - } - } - - logger.info("Number of input maf records: " + inputMafRecordCount); - - } finally { - reader.close(); - } - } - - private int compare(File dccFile, int count, TabbedFileWriter writer) throws Exception { - logger.info("Looking in dcc file: " + dccFile.getAbsolutePath()); - int countInMaf = 0; - int total = 0; - boolean checkForMissingColumnIndex = true; - - try (TabbedFileReader reader = new TabbedFileReader(dccFile);) { - if (count == 1) { - TabbedHeader header = reader.getHeader(); - writer.addHeader(header); - } - for (TabbedRecord rec : reader) { - //header - - if (rec.getData().startsWith("analysis_id")) { - //mutation id column - dccColumnIndexes = findColumnIndexesFromHeader(rec); - if (count == 1) { - writer.add(rec); - } - } else { - total++; - if (total % 10000 == 0) { - logger.info("Processed: " + total + " dcc records" ); - } - if (checkForMissingColumnIndex) { - if (missingColumnIndex(mafColumnIndexes)) { - throw new QMuleException("NO_MUTATION_ID", dccFile.getAbsolutePath()); - } - checkForMissingColumnIndex = false; - } - String[] strArray = rec.getDataArray(); - String chr = strArray[dccColumnIndexes[0]].replace("chr", ""); - if (chr.equals("M")) { - chr += "T"; - } - ChrPosition chrPos = new ChrRangePosition(chr, Integer.valueOf(strArray[dccColumnIndexes[1]]), Integer.valueOf(strArray[dccColumnIndexes[2]])); - if (recordInMaf(chrPos, rec)) { - writer.add(rec); - countInMaf++; - } - } - } - } - logger.info("Finished looking in dcc file: " + dccFile.getAbsolutePath() + " found " + countInMaf + " maf record/s." ); - return countInMaf; - } - - public void addToMafRecordMap(TabbedRecord rec, int count) throws QMuleException { - String[] strArray = rec.getDataArray(); - - //need to screw around with chr1 vs 1 vs chrMT vs chrM - String chr = strArray[mafColumnIndexes[0]].replace("chr", ""); - - if (chr.equals("M")) { - chr += "T"; - } - ChrPosition chrPos = new ChrRangePosition(chr, Integer.valueOf(strArray[mafColumnIndexes[1]]), Integer.valueOf(strArray[mafColumnIndexes[2]])); - - List recordsAtThisPosition = mafRecords.get(chrPos); - if (null == recordsAtThisPosition) { - recordsAtThisPosition = new ArrayList(2); - mafRecords.put(chrPos, recordsAtThisPosition); - } - recordsAtThisPosition.add(rec); - - } - - public boolean missingColumnIndex(int[] columnIndexes) throws QMuleException { - for (int i =0; i< columnIndexes.length; i++) { - if (columnIndexes[i] == -1) { - throw new QMuleException("NO_COLUMN_INDEX"); - } - } - return false; - } - - public int[] findColumnIndexesFromHeader(TabbedRecord rec) { - int[] mutationColumns = {-1, -1, -1, -1, -1, -1}; - String[] strArray = rec.getDataArray(); - for (int i=0; i recordsAtThisPosition = mafRecords.get(dccChrPos); - if (null != recordsAtThisPosition && ! recordsAtThisPosition.isEmpty()) { - - if (recordsAtThisPosition.size() > 1) { - logger.info("more than 1 record for position: " + dccChrPos); - } - - // check to see if any of the records match our dccRec - List recordsToRemove = new ArrayList<>(2); - - for (TabbedRecord tr : recordsAtThisPosition) { - if (matchOtherColumns(tr, dccRec)) { - matches++; - if (matches > 1) { - throw new QMuleException("T0O_MANY_MATCHES", dccChrPos.toString()); - } - - // remove record from array - recordsToRemove.add(tr); - matchFound = true; - } - } - - // remove records that have been matched - recordsAtThisPosition.removeAll(recordsToRemove); - - // check to see if there are any records left, if not, remove entry from map - if (recordsAtThisPosition.isEmpty()) { - mafRecords.remove(dccChrPos); - } - } - - return matchFound; - } - - public boolean matchOtherColumns(TabbedRecord mafRec, TabbedRecord dccRec) { - String[] mafValues = mafRec.getDataArray(); - String[] dccValues = dccRec.getDataArray(); - - if (mode.equals("snp")) { - if (matchingMutation(mafValues[mafColumnIndexes[3]], dccValues[dccColumnIndexes[3]])) { - return true; - } - } - if (mode.equals("indel")) { - if (matchingMutation(mafValues[mafColumnIndexes[3]], dccValues[dccColumnIndexes[3]]) && - mafValues[mafColumnIndexes[4]].equals(dccValues[dccColumnIndexes[4]]) && - mafValues[mafColumnIndexes[5]].equals(dccValues[dccColumnIndexes[5]])) { - return true; - } - } - - - return false; - } - - public boolean matchingMutation(String mafMutation, String dccMutation) { - if ((mafMutation.equals("SNP") && dccMutation.equals("1")) || - (mafMutation.equals("INS") && dccMutation.equals("2")) || - (mafMutation.equals("DEL") && dccMutation.equals("3"))) { - return true; - } - return false; - } - - public boolean match(ChrPosition mafChrPos, ChrPosition dccChrPos) { - if (mafChrPos.getChromosome().equals(dccChrPos.getChromosome()) - && mafChrPos.getStartPosition() == dccChrPos.getStartPosition() - && mafChrPos.getEndPosition() == dccChrPos.getEndPosition()) { - return true; - } - return false; - } - - - public static void main(String[] args) throws Exception { - MAF2DCC1 sp = new MAF2DCC1(); - LoadReferencedClasses.loadClasses(MAF2DCC1.class); - sp.setup(args); - - int exitStatus = sp.annotate(); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - -} diff --git a/qmule/src/org/qcmg/qmule/Main.java b/qmule/src/org/qcmg/qmule/Main.java deleted file mode 100644 index fc7560b17..000000000 --- a/qmule/src/org/qcmg/qmule/Main.java +++ /dev/null @@ -1,100 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - - - -/** - * The entry point for the command-line SAM/BAM merging tool. - */ -public final class Main { - -// enum Tool { -// GetBamRecords("org.qcmg.qmule.GetBamRecords"); -//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"), -//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"), -//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"); -// -//// private final String name; -// private final String fullyQualifiedName; -// -// private Tool(String fullyQualifiedName) { -//// this.name = name; -// this.fullyQualifiedName = fullyQualifiedName; -// } -// -// public String getFullyQualifiedName() { -// return fullyQualifiedName; -// } -// public static Tool getTool(String name) { -// for (Tool t : Tool.values()) { -// if (name.equals(t.name())) return t; -// } -// throw new IllegalArgumentException("Tool not found: " + name); -// } -// } - - /** - * Performs a single merge based on the supplied arguments. Errors will - * terminate the merge and display error and usage messages. - * - * @param args - * the command-line arguments. - * @throws ClassNotFoundException - */ - public static void main(final String[] args) throws ClassNotFoundException { - Options options = null; - try { - options = new Options(args); - } catch (Exception e) { - e.printStackTrace(); - } - System.out.println(Messages.USAGE); - try { - options.displayHelp(); - } catch (Exception e) { - e.printStackTrace(); - } - -// String toolName = options.getToolName(); -// Tool t = Tool.getTool(toolName); -// Class tool = Class.forName(t.getFullyQualifiedName()); -// System.out.println("Class: " + tool.getCanonicalName()); -// // Create the array of Argument Types -// Class[] argTypes = { args.getClass()}; // array is Object! -// // Now find the method -// Method m = null; -// try { -// m = tool.getMethod("main", argTypes); -// } catch (SecurityException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } catch (NoSuchMethodException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } -// System.out.println(m); -// -// // Create the actual argument array -// Object passedArgv[] = { args }; -// -// // Now invoke the method. -// try { -// m.invoke(null, passedArgv); -// } catch (IllegalArgumentException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } catch (IllegalAccessException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } catch (InvocationTargetException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } - -//) Method m = tool.getMethod("main", Object.class); -// m.iinvoke(args); - System.exit(0); - } -} diff --git a/qmule/src/org/qcmg/qmule/Main.java-- b/qmule/src/org/qcmg/qmule/Main.java-- deleted file mode 100644 index fc7560b17..000000000 --- a/qmule/src/org/qcmg/qmule/Main.java-- +++ /dev/null @@ -1,100 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - - - -/** - * The entry point for the command-line SAM/BAM merging tool. - */ -public final class Main { - -// enum Tool { -// GetBamRecords("org.qcmg.qmule.GetBamRecords"); -//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"), -//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"), -//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"); -// -//// private final String name; -// private final String fullyQualifiedName; -// -// private Tool(String fullyQualifiedName) { -//// this.name = name; -// this.fullyQualifiedName = fullyQualifiedName; -// } -// -// public String getFullyQualifiedName() { -// return fullyQualifiedName; -// } -// public static Tool getTool(String name) { -// for (Tool t : Tool.values()) { -// if (name.equals(t.name())) return t; -// } -// throw new IllegalArgumentException("Tool not found: " + name); -// } -// } - - /** - * Performs a single merge based on the supplied arguments. Errors will - * terminate the merge and display error and usage messages. - * - * @param args - * the command-line arguments. - * @throws ClassNotFoundException - */ - public static void main(final String[] args) throws ClassNotFoundException { - Options options = null; - try { - options = new Options(args); - } catch (Exception e) { - e.printStackTrace(); - } - System.out.println(Messages.USAGE); - try { - options.displayHelp(); - } catch (Exception e) { - e.printStackTrace(); - } - -// String toolName = options.getToolName(); -// Tool t = Tool.getTool(toolName); -// Class tool = Class.forName(t.getFullyQualifiedName()); -// System.out.println("Class: " + tool.getCanonicalName()); -// // Create the array of Argument Types -// Class[] argTypes = { args.getClass()}; // array is Object! -// // Now find the method -// Method m = null; -// try { -// m = tool.getMethod("main", argTypes); -// } catch (SecurityException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } catch (NoSuchMethodException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } -// System.out.println(m); -// -// // Create the actual argument array -// Object passedArgv[] = { args }; -// -// // Now invoke the method. -// try { -// m.invoke(null, passedArgv); -// } catch (IllegalArgumentException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } catch (IllegalAccessException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } catch (InvocationTargetException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } - -//) Method m = tool.getMethod("main", Object.class); -// m.iinvoke(args); - System.exit(0); - } -} diff --git a/qmule/src/org/qcmg/qmule/Messages.java b/qmule/src/org/qcmg/qmule/Messages.java deleted file mode 100644 index 302f166f1..000000000 --- a/qmule/src/org/qcmg/qmule/Messages.java +++ /dev/null @@ -1,132 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.text.MessageFormat; -import java.util.ResourceBundle; - -/** - * Class used to lookup messages from this package's message bundles. - */ -public final class Messages { - - /** The Constant messages. */ - static final ResourceBundle messages = ResourceBundle - .getBundle("org.qcmg.qmule.messages"); - - /** The Constant ERROR_PREFIX. */ - static final String ERROR_PREFIX = getProgramName() + ": "; - - /** The Constant USAGE. */ - public static final String USAGE = getMessage("USAGE"); - - /** - * Gets the message. - * - * @param identifier the identifier - * @return the message - */ - public static String getMessage(final String identifier) { - return messages.getString(identifier); - } - - /** - * Gets the message. - * - * @param identifier the identifier - * @param argument the argument - * @return the message - */ - public static String getMessage(final String identifier, final String argument) { - final String message = Messages.getMessage(identifier); - Object[] arguments = { argument }; - return MessageFormat.format(message, arguments); - } - - /** - * Gets the message. - * - * @param identifier the identifier - * @param arg1 the arg1 - * @param arg2 the arg2 - * @return the message - */ - public static String getMessage(final String identifier, final String arg1, - final String arg2) { - final String message = Messages.getMessage(identifier); - Object[] arguments = { arg1, arg2 }; - return MessageFormat.format(message, arguments); - } - - /** - * Gets the message. - * - * @param identifier the identifier - * @param arg1 the arg1 - * @param arg2 the arg2 - * @param arg3 the arg3 - * @return the message - */ - public static String getMessage(final String identifier, final String arg1, - final String arg2, final String arg3) { - final String message = Messages.getMessage(identifier); - Object[] arguments = { arg1, arg2, arg3 }; - return MessageFormat.format(message, arguments); - } - - /** - * Gets the message. - * - * @param identifier the identifier - * @param arguments the arguments - * @return the message - */ - public static String getMessage(final String identifier, final Object[] arguments) { - final String message = Messages.getMessage(identifier); - return MessageFormat.format(message, arguments); - } - - /** - * Gets the program name. - * - * @return the program name - */ - static String getProgramName() { - return Messages.class.getPackage().getImplementationTitle(); - } - - /** - * Gets the program version. - * - * @return the program version - */ - static String getProgramVersion() { - return Messages.class.getPackage().getImplementationVersion(); - } - - /** - * Gets the version message. - * - * @return the version message - * @throws Exception the exception - */ - public static String getVersionMessage() throws Exception { - return getProgramName() + ", version " + getProgramVersion(); - } - - /** - * Reconstruct command line. - * - * @param args the args - * @return the string - */ - public static String reconstructCommandLine(final String[] args) { - String result = getProgramName() + " "; - for (final String arg : args) { - result += arg + " "; - } - return result; - } - -} diff --git a/qmule/src/org/qcmg/qmule/Messages.java-- b/qmule/src/org/qcmg/qmule/Messages.java-- deleted file mode 100644 index 302f166f1..000000000 --- a/qmule/src/org/qcmg/qmule/Messages.java-- +++ /dev/null @@ -1,132 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.text.MessageFormat; -import java.util.ResourceBundle; - -/** - * Class used to lookup messages from this package's message bundles. - */ -public final class Messages { - - /** The Constant messages. */ - static final ResourceBundle messages = ResourceBundle - .getBundle("org.qcmg.qmule.messages"); - - /** The Constant ERROR_PREFIX. */ - static final String ERROR_PREFIX = getProgramName() + ": "; - - /** The Constant USAGE. */ - public static final String USAGE = getMessage("USAGE"); - - /** - * Gets the message. - * - * @param identifier the identifier - * @return the message - */ - public static String getMessage(final String identifier) { - return messages.getString(identifier); - } - - /** - * Gets the message. - * - * @param identifier the identifier - * @param argument the argument - * @return the message - */ - public static String getMessage(final String identifier, final String argument) { - final String message = Messages.getMessage(identifier); - Object[] arguments = { argument }; - return MessageFormat.format(message, arguments); - } - - /** - * Gets the message. - * - * @param identifier the identifier - * @param arg1 the arg1 - * @param arg2 the arg2 - * @return the message - */ - public static String getMessage(final String identifier, final String arg1, - final String arg2) { - final String message = Messages.getMessage(identifier); - Object[] arguments = { arg1, arg2 }; - return MessageFormat.format(message, arguments); - } - - /** - * Gets the message. - * - * @param identifier the identifier - * @param arg1 the arg1 - * @param arg2 the arg2 - * @param arg3 the arg3 - * @return the message - */ - public static String getMessage(final String identifier, final String arg1, - final String arg2, final String arg3) { - final String message = Messages.getMessage(identifier); - Object[] arguments = { arg1, arg2, arg3 }; - return MessageFormat.format(message, arguments); - } - - /** - * Gets the message. - * - * @param identifier the identifier - * @param arguments the arguments - * @return the message - */ - public static String getMessage(final String identifier, final Object[] arguments) { - final String message = Messages.getMessage(identifier); - return MessageFormat.format(message, arguments); - } - - /** - * Gets the program name. - * - * @return the program name - */ - static String getProgramName() { - return Messages.class.getPackage().getImplementationTitle(); - } - - /** - * Gets the program version. - * - * @return the program version - */ - static String getProgramVersion() { - return Messages.class.getPackage().getImplementationVersion(); - } - - /** - * Gets the version message. - * - * @return the version message - * @throws Exception the exception - */ - public static String getVersionMessage() throws Exception { - return getProgramName() + ", version " + getProgramVersion(); - } - - /** - * Reconstruct command line. - * - * @param args the args - * @return the string - */ - public static String reconstructCommandLine(final String[] args) { - String result = getProgramName() + " "; - for (final String arg : args) { - result += arg + " "; - } - return result; - } - -} diff --git a/qmule/src/org/qcmg/qmule/Options.java b/qmule/src/org/qcmg/qmule/Options.java deleted file mode 100644 index c83f4812d..000000000 --- a/qmule/src/org/qcmg/qmule/Options.java +++ /dev/null @@ -1,512 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import static java.util.Arrays.asList; - -import java.io.IOException; -import java.util.List; -import java.util.Properties; - -import joptsimple.OptionParser; -import joptsimple.OptionSet; - -/** - * The Class Options. - */ -public final class Options { - - public enum Ids{ - PATIENT, - SOMATIC_ANALYSIS, - GEMLINE_ANALYSIS, - TUMOUR_SAMPLE, - NORMAL_SAMPLE; - } - - /** The Constant HELP_DESCRIPTION. */ - private static final String HELP_DESCRIPTION = Messages - .getMessage("HELP_OPTION_DESCRIPTION"); - - /** The Constant VERSION_DESCRIPTION. */ - private static final String VERSION_DESCRIPTION = Messages - .getMessage("VERSION_OPTION_DESCRIPTION"); - - /** The Constant INPUT_DESCRIPTION. */ - private static final String INPUT_DESCRIPTION = Messages - .getMessage("INPUT_OPTION_DESCRIPTION"); - - /** The Constant OUTPUT_DESCRIPTION. */ - private static final String OUTPUT_DESCRIPTION = Messages - .getMessage("OUTPUT_OPTION_DESCRIPTION"); - - /** The parser. */ - private final OptionParser parser = new OptionParser(); - - /** The options. */ - private final OptionSet options; - - /** The command line. */ - private final String commandLine; - - /** The input file names. */ - private final String[] inputFileNames; - - /** The output file names. */ - private final String[] outputFileNames; - - /** The log file */ - private String logFile; - - /** The log level */ - private String logLevel; - - private String patientId; - private String somaticAnalysisId; - private String germlineAnalysisId; - private String normalSampleId; - private String tumourSampleId; - private String position; - private String pileupFormat; - private int normalCoverage; - private int numberOfThreads; - private int tumourCoverage; - private int minCoverage; - private String mafMode; - private String gff; - private String fasta; - private String[] gffRegions; - private int noOfBases; - private String mode; - - - private String column; - - private String annotation; - - private String features; - - private String tumour; - - private String normal; - - private String analysis; - - /** - * Instantiates a new options. - * - * @param args the args - * @throws Exception the exception - */ - @SuppressWarnings("unchecked") - public Options(final String[] args) throws Exception { - commandLine = Messages.reconstructCommandLine(args); - -// parser.accepts("qmule", "Tool").withRequiredArg().ofType(String.class).describedAs("tool name"); - parser.accepts("output", OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); - parser.accepts("input", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("inputfile"); - parser.accepts("log", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("logfile"); - parser.accepts("loglevel", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("loglevel"); - parser.accepts("help", HELP_DESCRIPTION); - parser.accepts("version", VERSION_DESCRIPTION); - parser.accepts("patientId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("patientId"); - parser.accepts("somaticAnalysisId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("somaticAnalysisId"); - parser.accepts("germlineAnalysisId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("germlineAnalysisId"); - parser.accepts("normalSampleId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("normalSampleId"); - parser.accepts("tumourSampleId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("tumourSampleId"); - parser.accepts("position", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("position"); - parser.accepts("pileupFormat", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("pileupFormat"); - parser.accepts("normalCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) - .describedAs("normalCoverage"); - parser.accepts("numberOfThreads", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) - .describedAs("numberOfThreads"); - parser.accepts("tumourCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) - .describedAs("tumourCoverage"); - parser.accepts("minCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) - .describedAs("minCoverage"); - parser.accepts("mafMode", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("mafMode"); - parser.accepts("mode", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("mode"); - parser.accepts("column", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("column"); - parser.accepts("annotation", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("annotation"); - parser.accepts("gffFile", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("gffFile"); - parser.accepts("fasta", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("fasta"); - parser.accepts("feature", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("feature"); - parser.accepts("tumour", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("tumour"); - parser.accepts("normal", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("normal"); - parser.accepts("analysis", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("analysis"); - parser.accepts("verifiedInvalid", INPUT_DESCRIPTION); - parser.accepts("gffRegions", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).withValuesSeparatedBy(',').describedAs("gffRegions"); - parser.accepts("noOfBases", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class).describedAs("noOfBases"); - parser.accepts("proportion", Messages - .getMessage("PROPORTION_OPTION_DESCRIPTION")).withRequiredArg().ofType(Double.class); - parser.accepts("stranded", Messages - .getMessage("STRANDED_OPTION_DESCRIPTION")); - parser.accepts("compareAll",Messages.getMessage("COMPAREALL_OPTION")); - - parser.posixlyCorrect(true); - options = parser.parse(args); - - List inputList = options.valuesOf("input"); - inputFileNames = new String[inputList.size()]; - inputList.toArray(inputFileNames); - - List outputList = options.valuesOf("output"); - outputFileNames = new String[outputList.size()]; - outputList.toArray(outputFileNames); - - logFile = (String) options.valueOf("log"); - logLevel = (String) options.valueOf("loglevel"); - - patientId = (String) options.valueOf("patientId"); - somaticAnalysisId = (String) options.valueOf("somaticAnalysisId"); - germlineAnalysisId = (String) options.valueOf("germlineAnalysisId"); - normalSampleId = (String) options.valueOf("normalSampleId"); - tumourSampleId = (String) options.valueOf("tumourSampleId"); - - // WiggleFromPileup specific options - pileupFormat = (String) options.valueOf("pileupFormat"); - if (null != options.valueOf("normalCoverage")) - normalCoverage = (Integer) options.valueOf("normalCoverage"); - if (null != options.valueOf("tumourCoverage")) - tumourCoverage = (Integer) options.valueOf("tumourCoverage"); - // end of WiggleFromPileup specific options - - //compareReferenceRegions - mode = (String) options.valueOf("mode"); - column = (String) options.valueOf("column"); - annotation = (String) options.valueOf("annotation"); - features = (String) options.valueOf("feature"); - position = (String) options.valueOf("position"); - mafMode = (String) options.valueOf("mafMode"); - - gff = (String) options.valueOf("gffFile"); - fasta = (String) options.valueOf("fasta"); - - tumour = (String) options.valueOf("tumour"); - normal = (String) options.valueOf("normal"); - analysis = (String) options.valueOf("analysis"); - - // gffRegions - List gffRegionsArgs = (List) options.valuesOf("gffRegions"); - gffRegions = new String[gffRegionsArgs.size()]; - gffRegionsArgs.toArray(gffRegions); - - // MafAddCPG specific - if (null != options.valueOf("noOfBases")) - noOfBases = (Integer) options.valueOf("noOfBases"); - - // qsignature - if (null != options.valueOf("minCoverage")) - minCoverage = (Integer) options.valueOf("minCoverage"); - - if (null != options.valueOf("numberOfThreads")) - numberOfThreads = (Integer) options.valueOf("numberOfThreads"); - - } - - /** - * - * @param className - * @param args - * @throws Exception - */ - public Options( final Class myclass, final String[] args) throws Exception { - commandLine = Messages.reconstructCommandLine(args); - - parser.acceptsAll( asList("h", "help"), HELP_DESCRIPTION ); -// parser.acceptsAll( asList("v", "version"), VERSION_DESCRIPTION); - parser.acceptsAll( asList("i", "input"), INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("input"); - parser.acceptsAll(asList("o", "output"), OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); - parser.accepts("log", Messages.getMessage("LOG_OPTION_DESCRIPTION")).withRequiredArg().ofType(String.class).describedAs("logfile"); - parser.accepts("loglevel", Messages.getMessage("LOGLEVEL_OPTION_DESCRIPTION")).withRequiredArg().ofType(String.class).describedAs("loglevel"); - - if( myclass.equals(AlignerCompare.class) ){ - parser.accepts("compareAll",Messages.getMessage("COMPAREALL_OPTION")); - parser.acceptsAll( asList("o", "output"), Messages.getMessage("OUTPUT_AlignerCompare")).withRequiredArg().ofType(String.class).describedAs("output"); - }else if(myclass.equals(SubSample.class)) { - parser.accepts("proportion",Messages.getMessage("PROPORTION_OPTION_DESCRIPTION")).withRequiredArg().ofType(Double.class).describedAs("[0,1]"); - }else if(myclass.equals(BAMCompress.class)){ - parser.accepts("compressLevel",Messages.getMessage("COMPRESS_LEVEL_DESCRIPTION") ).withRequiredArg().ofType(Integer.class).describedAs("[0,9]"); - } - - - //else if( myclass.equals(BamMismatchCounts.class)){} - - options = parser.parse(args); - - List inputList = options.valuesOf("input"); - inputFileNames = new String[inputList.size()]; - inputList.toArray(inputFileNames); - - List outputList = options.valuesOf("output"); - outputFileNames = new String[outputList.size()]; - outputList.toArray(outputFileNames); - - } - - public String getTumour() { - return tumour; - } - - public void setTumour(String tumour) { - this.tumour = tumour; - } - - public String getNormal() { - return normal; - } - - public void setNormal(String normal) { - this.normal = normal; - } - - public String getAnalysis() { - return analysis; - } - - public void setAnalysis(String analysis) { - this.analysis = analysis; - } - - /** - * Checks for input option. - * - * @return true, if successful - */ - public boolean hasInputOption() { - return options.has("input"); - } - - /** - * Checks for output option. - * - * @return true, if successful - */ - public boolean hasOutputOption() { - return options.has("o") || options.has("output"); - } - - /** - * Checks for version option. - * - * @return true, if successful - */ - public boolean hasVersionOption() { - return options.has("version"); - } - - public boolean getIncludeInvalid() { - return options.has("verifiedInvalid"); - } - - /** - * Checks for help option. - * - * @return true, if successful - */ - public boolean hasHelpOption() { - return options.has("help"); - } - - public boolean hasCompareAllOption() { - return options.has("compareAll"); - } - - /** - * Checks for log option. - * - * @return true, if successful - */ - public boolean hasLogOption() { - return options.has("log"); - } - - /** - * Checks for non options. - * - * @return true, if successful - */ - public boolean hasNonOptions() { - return 0 != options.nonOptionArguments().size(); - } - - /** - * Gets the input file names. - * - * @return the input file names - */ - public String[] getInputFileNames() { - return inputFileNames; - } - - /** - * Gets the output file names. - * - * @return the output file names - */ - public String[] getOutputFileNames() { - return outputFileNames; - } - - /** - * Gets the command line. - * - * @return the command line - */ - public String getCommandLine() { - return commandLine; - } - - public boolean hasStrandedOption() { - return options.has("stranded"); - } - - public String getPosition() { - return position; - } - public String getPileupFormat() { - return pileupFormat; - } - public int getNormalCoverage() { - return normalCoverage; - } - public int getTumourCoverage() { - return tumourCoverage; - } - public int getMinCoverage() { - return minCoverage; - } - public int getNumberOfThreads() { - return numberOfThreads; - } - public String getMafMode() { - return mafMode; - } - public String getGffFile() { - return gff; - } - public String getFastaFile() { - return fasta; - } - - public String getMode() { - return mode; - } - - public int getcompressLevel() throws Exception{ - if(options.has("compressLevel")){ - int l = (int) options.valueOf("compressLevel"); - if(l >= 0 && l <= 9) - return l; - else - throw new Exception("compressLevel must between [0,9]"); - } - - return 5; - } - //subSample - public double getPROPORTION() throws Exception{ - if(options.has("proportion")){ - - double prop = (double) options.valueOf("proportion"); -// double prop = Double.parseDouble( (String) options.valueOf("proportion") ); - if(prop > 0 && prop <= 1){ - return prop; - - } - } - throw new Exception("no proportion are specified"); - } - - - /** - * Display help. - * - * @throws Exception the exception - */ - public void displayHelp() throws IOException { - parser.printHelpOn(System.out); - } - - /** - * Detect bad options. - * - * @throws Exception the exception - */ - public void detectBadOptions() throws Exception { - if (hasNonOptions()) { - throw new Exception("ALL_ARGUMENTS_MUST_BE_OPTIONS"); - } - if (hasOutputOption() && 1 != getOutputFileNames().length) { - throw new Exception("MULTIPLE_OUTPUT_FILES_SPECIFIED"); - } - if (!hasInputOption()) { - throw new Exception("MISSING_INPUT_OPTIONS"); - } - } - - public String getLogFile(){ - return logFile; - } - - public String getLogLevel(){ - return logLevel; - } - - public Properties getIds() { - Properties props = new Properties(); - props.put(Ids.PATIENT, patientId); - props.put(Ids.SOMATIC_ANALYSIS, somaticAnalysisId); - props.put(Ids.GEMLINE_ANALYSIS, germlineAnalysisId); - props.put(Ids.NORMAL_SAMPLE, normalSampleId); - props.put(Ids.TUMOUR_SAMPLE, tumourSampleId); - return props; - } - - public String[] getGffRegions() { - - return gffRegions; - } - - public int getNoOfBases() { - - return noOfBases; - } - - public String getColumn() { - return column; - } - - public String getAnnotation() { - return annotation; - } - - public String[] getFeature() { - if (features != null) { - return features.split(","); - } - return null; - } - -} diff --git a/qmule/src/org/qcmg/qmule/Options.java-- b/qmule/src/org/qcmg/qmule/Options.java-- deleted file mode 100644 index c83f4812d..000000000 --- a/qmule/src/org/qcmg/qmule/Options.java-- +++ /dev/null @@ -1,512 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import static java.util.Arrays.asList; - -import java.io.IOException; -import java.util.List; -import java.util.Properties; - -import joptsimple.OptionParser; -import joptsimple.OptionSet; - -/** - * The Class Options. - */ -public final class Options { - - public enum Ids{ - PATIENT, - SOMATIC_ANALYSIS, - GEMLINE_ANALYSIS, - TUMOUR_SAMPLE, - NORMAL_SAMPLE; - } - - /** The Constant HELP_DESCRIPTION. */ - private static final String HELP_DESCRIPTION = Messages - .getMessage("HELP_OPTION_DESCRIPTION"); - - /** The Constant VERSION_DESCRIPTION. */ - private static final String VERSION_DESCRIPTION = Messages - .getMessage("VERSION_OPTION_DESCRIPTION"); - - /** The Constant INPUT_DESCRIPTION. */ - private static final String INPUT_DESCRIPTION = Messages - .getMessage("INPUT_OPTION_DESCRIPTION"); - - /** The Constant OUTPUT_DESCRIPTION. */ - private static final String OUTPUT_DESCRIPTION = Messages - .getMessage("OUTPUT_OPTION_DESCRIPTION"); - - /** The parser. */ - private final OptionParser parser = new OptionParser(); - - /** The options. */ - private final OptionSet options; - - /** The command line. */ - private final String commandLine; - - /** The input file names. */ - private final String[] inputFileNames; - - /** The output file names. */ - private final String[] outputFileNames; - - /** The log file */ - private String logFile; - - /** The log level */ - private String logLevel; - - private String patientId; - private String somaticAnalysisId; - private String germlineAnalysisId; - private String normalSampleId; - private String tumourSampleId; - private String position; - private String pileupFormat; - private int normalCoverage; - private int numberOfThreads; - private int tumourCoverage; - private int minCoverage; - private String mafMode; - private String gff; - private String fasta; - private String[] gffRegions; - private int noOfBases; - private String mode; - - - private String column; - - private String annotation; - - private String features; - - private String tumour; - - private String normal; - - private String analysis; - - /** - * Instantiates a new options. - * - * @param args the args - * @throws Exception the exception - */ - @SuppressWarnings("unchecked") - public Options(final String[] args) throws Exception { - commandLine = Messages.reconstructCommandLine(args); - -// parser.accepts("qmule", "Tool").withRequiredArg().ofType(String.class).describedAs("tool name"); - parser.accepts("output", OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); - parser.accepts("input", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("inputfile"); - parser.accepts("log", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("logfile"); - parser.accepts("loglevel", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("loglevel"); - parser.accepts("help", HELP_DESCRIPTION); - parser.accepts("version", VERSION_DESCRIPTION); - parser.accepts("patientId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("patientId"); - parser.accepts("somaticAnalysisId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("somaticAnalysisId"); - parser.accepts("germlineAnalysisId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("germlineAnalysisId"); - parser.accepts("normalSampleId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("normalSampleId"); - parser.accepts("tumourSampleId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("tumourSampleId"); - parser.accepts("position", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("position"); - parser.accepts("pileupFormat", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("pileupFormat"); - parser.accepts("normalCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) - .describedAs("normalCoverage"); - parser.accepts("numberOfThreads", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) - .describedAs("numberOfThreads"); - parser.accepts("tumourCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) - .describedAs("tumourCoverage"); - parser.accepts("minCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) - .describedAs("minCoverage"); - parser.accepts("mafMode", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("mafMode"); - parser.accepts("mode", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("mode"); - parser.accepts("column", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("column"); - parser.accepts("annotation", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("annotation"); - parser.accepts("gffFile", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("gffFile"); - parser.accepts("fasta", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("fasta"); - parser.accepts("feature", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("feature"); - parser.accepts("tumour", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("tumour"); - parser.accepts("normal", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("normal"); - parser.accepts("analysis", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("analysis"); - parser.accepts("verifiedInvalid", INPUT_DESCRIPTION); - parser.accepts("gffRegions", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).withValuesSeparatedBy(',').describedAs("gffRegions"); - parser.accepts("noOfBases", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class).describedAs("noOfBases"); - parser.accepts("proportion", Messages - .getMessage("PROPORTION_OPTION_DESCRIPTION")).withRequiredArg().ofType(Double.class); - parser.accepts("stranded", Messages - .getMessage("STRANDED_OPTION_DESCRIPTION")); - parser.accepts("compareAll",Messages.getMessage("COMPAREALL_OPTION")); - - parser.posixlyCorrect(true); - options = parser.parse(args); - - List inputList = options.valuesOf("input"); - inputFileNames = new String[inputList.size()]; - inputList.toArray(inputFileNames); - - List outputList = options.valuesOf("output"); - outputFileNames = new String[outputList.size()]; - outputList.toArray(outputFileNames); - - logFile = (String) options.valueOf("log"); - logLevel = (String) options.valueOf("loglevel"); - - patientId = (String) options.valueOf("patientId"); - somaticAnalysisId = (String) options.valueOf("somaticAnalysisId"); - germlineAnalysisId = (String) options.valueOf("germlineAnalysisId"); - normalSampleId = (String) options.valueOf("normalSampleId"); - tumourSampleId = (String) options.valueOf("tumourSampleId"); - - // WiggleFromPileup specific options - pileupFormat = (String) options.valueOf("pileupFormat"); - if (null != options.valueOf("normalCoverage")) - normalCoverage = (Integer) options.valueOf("normalCoverage"); - if (null != options.valueOf("tumourCoverage")) - tumourCoverage = (Integer) options.valueOf("tumourCoverage"); - // end of WiggleFromPileup specific options - - //compareReferenceRegions - mode = (String) options.valueOf("mode"); - column = (String) options.valueOf("column"); - annotation = (String) options.valueOf("annotation"); - features = (String) options.valueOf("feature"); - position = (String) options.valueOf("position"); - mafMode = (String) options.valueOf("mafMode"); - - gff = (String) options.valueOf("gffFile"); - fasta = (String) options.valueOf("fasta"); - - tumour = (String) options.valueOf("tumour"); - normal = (String) options.valueOf("normal"); - analysis = (String) options.valueOf("analysis"); - - // gffRegions - List gffRegionsArgs = (List) options.valuesOf("gffRegions"); - gffRegions = new String[gffRegionsArgs.size()]; - gffRegionsArgs.toArray(gffRegions); - - // MafAddCPG specific - if (null != options.valueOf("noOfBases")) - noOfBases = (Integer) options.valueOf("noOfBases"); - - // qsignature - if (null != options.valueOf("minCoverage")) - minCoverage = (Integer) options.valueOf("minCoverage"); - - if (null != options.valueOf("numberOfThreads")) - numberOfThreads = (Integer) options.valueOf("numberOfThreads"); - - } - - /** - * - * @param className - * @param args - * @throws Exception - */ - public Options( final Class myclass, final String[] args) throws Exception { - commandLine = Messages.reconstructCommandLine(args); - - parser.acceptsAll( asList("h", "help"), HELP_DESCRIPTION ); -// parser.acceptsAll( asList("v", "version"), VERSION_DESCRIPTION); - parser.acceptsAll( asList("i", "input"), INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("input"); - parser.acceptsAll(asList("o", "output"), OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); - parser.accepts("log", Messages.getMessage("LOG_OPTION_DESCRIPTION")).withRequiredArg().ofType(String.class).describedAs("logfile"); - parser.accepts("loglevel", Messages.getMessage("LOGLEVEL_OPTION_DESCRIPTION")).withRequiredArg().ofType(String.class).describedAs("loglevel"); - - if( myclass.equals(AlignerCompare.class) ){ - parser.accepts("compareAll",Messages.getMessage("COMPAREALL_OPTION")); - parser.acceptsAll( asList("o", "output"), Messages.getMessage("OUTPUT_AlignerCompare")).withRequiredArg().ofType(String.class).describedAs("output"); - }else if(myclass.equals(SubSample.class)) { - parser.accepts("proportion",Messages.getMessage("PROPORTION_OPTION_DESCRIPTION")).withRequiredArg().ofType(Double.class).describedAs("[0,1]"); - }else if(myclass.equals(BAMCompress.class)){ - parser.accepts("compressLevel",Messages.getMessage("COMPRESS_LEVEL_DESCRIPTION") ).withRequiredArg().ofType(Integer.class).describedAs("[0,9]"); - } - - - //else if( myclass.equals(BamMismatchCounts.class)){} - - options = parser.parse(args); - - List inputList = options.valuesOf("input"); - inputFileNames = new String[inputList.size()]; - inputList.toArray(inputFileNames); - - List outputList = options.valuesOf("output"); - outputFileNames = new String[outputList.size()]; - outputList.toArray(outputFileNames); - - } - - public String getTumour() { - return tumour; - } - - public void setTumour(String tumour) { - this.tumour = tumour; - } - - public String getNormal() { - return normal; - } - - public void setNormal(String normal) { - this.normal = normal; - } - - public String getAnalysis() { - return analysis; - } - - public void setAnalysis(String analysis) { - this.analysis = analysis; - } - - /** - * Checks for input option. - * - * @return true, if successful - */ - public boolean hasInputOption() { - return options.has("input"); - } - - /** - * Checks for output option. - * - * @return true, if successful - */ - public boolean hasOutputOption() { - return options.has("o") || options.has("output"); - } - - /** - * Checks for version option. - * - * @return true, if successful - */ - public boolean hasVersionOption() { - return options.has("version"); - } - - public boolean getIncludeInvalid() { - return options.has("verifiedInvalid"); - } - - /** - * Checks for help option. - * - * @return true, if successful - */ - public boolean hasHelpOption() { - return options.has("help"); - } - - public boolean hasCompareAllOption() { - return options.has("compareAll"); - } - - /** - * Checks for log option. - * - * @return true, if successful - */ - public boolean hasLogOption() { - return options.has("log"); - } - - /** - * Checks for non options. - * - * @return true, if successful - */ - public boolean hasNonOptions() { - return 0 != options.nonOptionArguments().size(); - } - - /** - * Gets the input file names. - * - * @return the input file names - */ - public String[] getInputFileNames() { - return inputFileNames; - } - - /** - * Gets the output file names. - * - * @return the output file names - */ - public String[] getOutputFileNames() { - return outputFileNames; - } - - /** - * Gets the command line. - * - * @return the command line - */ - public String getCommandLine() { - return commandLine; - } - - public boolean hasStrandedOption() { - return options.has("stranded"); - } - - public String getPosition() { - return position; - } - public String getPileupFormat() { - return pileupFormat; - } - public int getNormalCoverage() { - return normalCoverage; - } - public int getTumourCoverage() { - return tumourCoverage; - } - public int getMinCoverage() { - return minCoverage; - } - public int getNumberOfThreads() { - return numberOfThreads; - } - public String getMafMode() { - return mafMode; - } - public String getGffFile() { - return gff; - } - public String getFastaFile() { - return fasta; - } - - public String getMode() { - return mode; - } - - public int getcompressLevel() throws Exception{ - if(options.has("compressLevel")){ - int l = (int) options.valueOf("compressLevel"); - if(l >= 0 && l <= 9) - return l; - else - throw new Exception("compressLevel must between [0,9]"); - } - - return 5; - } - //subSample - public double getPROPORTION() throws Exception{ - if(options.has("proportion")){ - - double prop = (double) options.valueOf("proportion"); -// double prop = Double.parseDouble( (String) options.valueOf("proportion") ); - if(prop > 0 && prop <= 1){ - return prop; - - } - } - throw new Exception("no proportion are specified"); - } - - - /** - * Display help. - * - * @throws Exception the exception - */ - public void displayHelp() throws IOException { - parser.printHelpOn(System.out); - } - - /** - * Detect bad options. - * - * @throws Exception the exception - */ - public void detectBadOptions() throws Exception { - if (hasNonOptions()) { - throw new Exception("ALL_ARGUMENTS_MUST_BE_OPTIONS"); - } - if (hasOutputOption() && 1 != getOutputFileNames().length) { - throw new Exception("MULTIPLE_OUTPUT_FILES_SPECIFIED"); - } - if (!hasInputOption()) { - throw new Exception("MISSING_INPUT_OPTIONS"); - } - } - - public String getLogFile(){ - return logFile; - } - - public String getLogLevel(){ - return logLevel; - } - - public Properties getIds() { - Properties props = new Properties(); - props.put(Ids.PATIENT, patientId); - props.put(Ids.SOMATIC_ANALYSIS, somaticAnalysisId); - props.put(Ids.GEMLINE_ANALYSIS, germlineAnalysisId); - props.put(Ids.NORMAL_SAMPLE, normalSampleId); - props.put(Ids.TUMOUR_SAMPLE, tumourSampleId); - return props; - } - - public String[] getGffRegions() { - - return gffRegions; - } - - public int getNoOfBases() { - - return noOfBases; - } - - public String getColumn() { - return column; - } - - public String getAnnotation() { - return annotation; - } - - public String[] getFeature() { - if (features != null) { - return features.split(","); - } - return null; - } - -} diff --git a/qmule/src/org/qcmg/qmule/Pileup.java b/qmule/src/org/qcmg/qmule/Pileup.java deleted file mode 100644 index c1503ab6a..000000000 --- a/qmule/src/org/qcmg/qmule/Pileup.java +++ /dev/null @@ -1,101 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.Comparator; -import java.util.Iterator; -import java.util.Map; -import java.util.TreeMap; -import java.util.Map.Entry; - -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrPositionComparator; -import org.qcmg.common.model.QPileupSimpleRecord; -import org.qcmg.picard.SAMFileReaderFactory; - - -public class Pileup { - private static final Comparator COMPARATOR = new ChrPositionComparator(); - private static QLogger logger = QLoggerFactory.getLogger(Pileup.class); - - Map pileup = new TreeMap(); -// Map pileup = new HashMap(10000000, 0.99f); - - private void engage(String args[]) throws IOException { - - SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(args[0])); - FileWriter writer = new FileWriter(new File(args[1])); - - int counter = 0; - for (SAMRecord sr : reader) { - parseRecord(sr); - if (++counter % 100000 == 0) { - logger.info("hit " + counter + " reads in bam file, size of pileup map is: " + pileup.size()); - - // output contents of pileup to file to clear memory - // get current chromosome and position an write out - //all records a couple of hundred bases prior to that position - writePileup(writer, sr.getReferenceName(), sr.getAlignmentStart() - 500); - } - } - logger.info("Done!! No of reads in file: " + counter + ", size of pileup map is: " + pileup.size() ); - } - - private void writePileup(FileWriter writer, String chromosome, int position) throws IOException { - ChrPosition chrPos = ChrPointPosition.valueOf(chromosome, position); - - Iterator> iter = pileup.entrySet().iterator(); - - while (iter.hasNext()) { - Map.Entry entry = iter.next(); - if (0 < COMPARATOR.compare(chrPos, entry.getKey())) { - - writer.write(entry.getKey().getChromosome() + "\t" + - entry.getKey().getStartPosition() + "\t" + - entry.getValue().getFormattedString()); - - iter.remove(); - } - } - - } - - private void parseRecord(SAMRecord sr) { - - ChrPosition chrPos; - QPileupSimpleRecord pileupRec; - int position = 0; - - for (byte b : sr.getReadBases()) { - chrPos = ChrPointPosition.valueOf(sr.getReferenceName(), sr.getAlignmentStart() + position++); - pileupRec = pileup.get(chrPos); - if (null == pileupRec) { - pileupRec = new QPileupSimpleRecord(); - pileup.put(chrPos, pileupRec); - } - pileupRec.incrementBase(b); - } - - - } - - - - public static void main(String[] args) throws IOException { - Pileup p = new Pileup(); - p.engage(args); - } -} diff --git a/qmule/src/org/qcmg/qmule/Pileup.java-- b/qmule/src/org/qcmg/qmule/Pileup.java-- deleted file mode 100644 index c1503ab6a..000000000 --- a/qmule/src/org/qcmg/qmule/Pileup.java-- +++ /dev/null @@ -1,101 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.Comparator; -import java.util.Iterator; -import java.util.Map; -import java.util.TreeMap; -import java.util.Map.Entry; - -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrPositionComparator; -import org.qcmg.common.model.QPileupSimpleRecord; -import org.qcmg.picard.SAMFileReaderFactory; - - -public class Pileup { - private static final Comparator COMPARATOR = new ChrPositionComparator(); - private static QLogger logger = QLoggerFactory.getLogger(Pileup.class); - - Map pileup = new TreeMap(); -// Map pileup = new HashMap(10000000, 0.99f); - - private void engage(String args[]) throws IOException { - - SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(args[0])); - FileWriter writer = new FileWriter(new File(args[1])); - - int counter = 0; - for (SAMRecord sr : reader) { - parseRecord(sr); - if (++counter % 100000 == 0) { - logger.info("hit " + counter + " reads in bam file, size of pileup map is: " + pileup.size()); - - // output contents of pileup to file to clear memory - // get current chromosome and position an write out - //all records a couple of hundred bases prior to that position - writePileup(writer, sr.getReferenceName(), sr.getAlignmentStart() - 500); - } - } - logger.info("Done!! No of reads in file: " + counter + ", size of pileup map is: " + pileup.size() ); - } - - private void writePileup(FileWriter writer, String chromosome, int position) throws IOException { - ChrPosition chrPos = ChrPointPosition.valueOf(chromosome, position); - - Iterator> iter = pileup.entrySet().iterator(); - - while (iter.hasNext()) { - Map.Entry entry = iter.next(); - if (0 < COMPARATOR.compare(chrPos, entry.getKey())) { - - writer.write(entry.getKey().getChromosome() + "\t" + - entry.getKey().getStartPosition() + "\t" + - entry.getValue().getFormattedString()); - - iter.remove(); - } - } - - } - - private void parseRecord(SAMRecord sr) { - - ChrPosition chrPos; - QPileupSimpleRecord pileupRec; - int position = 0; - - for (byte b : sr.getReadBases()) { - chrPos = ChrPointPosition.valueOf(sr.getReferenceName(), sr.getAlignmentStart() + position++); - pileupRec = pileup.get(chrPos); - if (null == pileupRec) { - pileupRec = new QPileupSimpleRecord(); - pileup.put(chrPos, pileupRec); - } - pileupRec.incrementBase(b); - } - - - } - - - - public static void main(String[] args) throws IOException { - Pileup p = new Pileup(); - p.engage(args); - } -} diff --git a/qmule/src/org/qcmg/qmule/PileupStats.java b/qmule/src/org/qcmg/qmule/PileupStats.java deleted file mode 100644 index e2ea6d844..000000000 --- a/qmule/src/org/qcmg/qmule/PileupStats.java +++ /dev/null @@ -1,254 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.util.Map.Entry; -import java.util.TreeMap; - -import htsjdk.samtools.Cigar; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordIterator; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.picard.SAMFileReaderFactory; - -public class PileupStats { - - private String logFile; - private File inputFile; - private File outputFile; - private File bamFile; - private static QLogger logger; - - public int engage() throws Exception { - - BufferedReader reader = new BufferedReader(new FileReader(inputFile)); - BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile)); - - writer.write(getHeader()); - String line; - int count = 0; - while ((line = reader.readLine()) != null) { - String[] values = line.split("\t"); - - String result = pileup(values[0], new Integer(values[1]), new Integer(values[2])); - - writer.write(line + "\t" + result + "\n"); - //System.out.println(line + "\t " + result); - if (count++ % 1000 == 0) { - logger.info("Number processed: " + count); - } - } - logger.info("Total processed: " + count); - reader.close(); - writer.close(); - - return 0; - } - - private String getHeader() { - return "chr\tposition\tposition\tbed\tbed\tbed\ttotal reads\ttotal unmapped" + - "\ttotal mates unmapped\ttotal indels\ttotal mismatch reads\ttotal soft clips" + - "\ttotal hard clips\ttotal spliced reads\ttotal duplicates\tmismatch counts\tsplice lengths\n"; - } - - private String pileup(String chromosome, int start, int end) throws IOException { - SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFile, "silent"); - - SAMRecordIterator iterator = reader.queryOverlapping(chromosome, start, end); - - int totalReads = 0; - int totalMatesUnmapped = 0; - int totalUnmapped = 0; - int totalDuplicates = 0; - int totalMismatches = 0; - int totalSpliced = 0; - int totalSoftClips = 0; - int totalHardClips = 0; - int totalIndels = 0; - TreeMap spliceMap = new TreeMap(); - TreeMap mismatchMap = new TreeMap(); - - while (iterator.hasNext()) { - SAMRecord record = iterator.next(); - if (record.getReadUnmappedFlag()) { - totalUnmapped++; - } else { - totalReads++; - if (record.getDuplicateReadFlag()) { - totalDuplicates++; - } else { - - if (record.getMateUnmappedFlag()) { - totalMatesUnmapped++; - } - - //cigars - Cigar cigar = record.getCigar(); - - for (CigarElement ce : cigar.getCigarElements()) { - if (ce.getOperator().equals(CigarOperator.DELETION) || ce.getOperator().equals(CigarOperator.INSERTION)) { - totalIndels++; - } - - if (ce.getOperator().equals(CigarOperator.SOFT_CLIP)) { - totalSoftClips++; - } - if (ce.getOperator().equals(CigarOperator.HARD_CLIP)) { - totalHardClips++; - } - if (ce.getOperator().equals(CigarOperator.N)) { - totalSpliced++; - Integer length = new Integer(ce.getLength()); - int count = 1; - if (spliceMap.containsKey(length)) { - count += spliceMap.get(length); - } - spliceMap.put(length, count); - } - } - - //MD tag - String mdData = (String) record.getAttribute("MD"); - int matches = tallyMDMismatches(mdData); - if (matches > 0) { - totalMismatches++; - } - int count = 1; - if (mismatchMap.containsKey(matches)) { - count += mismatchMap.get(matches); - } - mismatchMap.put(matches, count); - - } - } - - } - - iterator.close(); - reader.close(); - - String spliceCounts = getMapString(spliceMap); - String mismatchCounts = getMapString(mismatchMap); - - String result = totalReads + "\t" + totalUnmapped + "\t" + totalMatesUnmapped + "\t" + totalIndels + "\t" - + totalMismatches + "\t" + totalSoftClips + "\t" + totalHardClips + "\t" + totalSpliced + "\t" + totalDuplicates - + "\t" + mismatchCounts + "\t" + spliceCounts; - return result; - } - - private String getMapString(TreeMap map) { - StringBuilder sb = new StringBuilder(); - - for (Entry entry: map.entrySet()) { - sb.append(entry.getKey() + ":" + entry.getValue() + ";"); - } - - return sb.toString(); - } - - public int tallyMDMismatches(String mdData) { - int count = 0; - if (null != mdData) { - for (int i = 0, size = mdData.length() ; i < size ; ) { - char c = mdData.charAt(i); - if (isValidMismatch(c)) { - count++; - i++; - } else if ('^' == c) { - while (++i < size && Character.isLetter(mdData.charAt(i))) {} - } else i++; // need to increment this or could end up with infinite loop... - } - } - return count; - } - - private boolean isValidMismatch(char c) { - return c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N'; - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(PileupStats.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("PileupStats", PileupStats.class.getPackage().getImplementationVersion(), args); - - // get list of file names - String[] cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 2) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - String[] cmdLineOutputFiles = options.getOutputFileNames(); - if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); - } - - for (String file : cmdLineOutputFiles) { - if (new File(file).exists() && !new File(file).isDirectory()) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); - } - } - - bamFile = new File(cmdLineInputFiles[0]); - inputFile = new File(cmdLineInputFiles[1]); - outputFile = new File(cmdLineOutputFiles[0]); - logger.info("Bam file: " + bamFile); - logger.info("Input file: " + inputFile); - logger.info("Output file: " + outputFile); - - } - - return returnStatus; - } - - public static void main(String[] args) throws Exception { - PileupStats sp = new PileupStats(); - sp.setup(args); - int exitStatus = sp.engage(); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } -} diff --git a/qmule/src/org/qcmg/qmule/PileupStats.java-- b/qmule/src/org/qcmg/qmule/PileupStats.java-- deleted file mode 100644 index e2ea6d844..000000000 --- a/qmule/src/org/qcmg/qmule/PileupStats.java-- +++ /dev/null @@ -1,254 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.util.Map.Entry; -import java.util.TreeMap; - -import htsjdk.samtools.Cigar; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordIterator; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.picard.SAMFileReaderFactory; - -public class PileupStats { - - private String logFile; - private File inputFile; - private File outputFile; - private File bamFile; - private static QLogger logger; - - public int engage() throws Exception { - - BufferedReader reader = new BufferedReader(new FileReader(inputFile)); - BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile)); - - writer.write(getHeader()); - String line; - int count = 0; - while ((line = reader.readLine()) != null) { - String[] values = line.split("\t"); - - String result = pileup(values[0], new Integer(values[1]), new Integer(values[2])); - - writer.write(line + "\t" + result + "\n"); - //System.out.println(line + "\t " + result); - if (count++ % 1000 == 0) { - logger.info("Number processed: " + count); - } - } - logger.info("Total processed: " + count); - reader.close(); - writer.close(); - - return 0; - } - - private String getHeader() { - return "chr\tposition\tposition\tbed\tbed\tbed\ttotal reads\ttotal unmapped" + - "\ttotal mates unmapped\ttotal indels\ttotal mismatch reads\ttotal soft clips" + - "\ttotal hard clips\ttotal spliced reads\ttotal duplicates\tmismatch counts\tsplice lengths\n"; - } - - private String pileup(String chromosome, int start, int end) throws IOException { - SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFile, "silent"); - - SAMRecordIterator iterator = reader.queryOverlapping(chromosome, start, end); - - int totalReads = 0; - int totalMatesUnmapped = 0; - int totalUnmapped = 0; - int totalDuplicates = 0; - int totalMismatches = 0; - int totalSpliced = 0; - int totalSoftClips = 0; - int totalHardClips = 0; - int totalIndels = 0; - TreeMap spliceMap = new TreeMap(); - TreeMap mismatchMap = new TreeMap(); - - while (iterator.hasNext()) { - SAMRecord record = iterator.next(); - if (record.getReadUnmappedFlag()) { - totalUnmapped++; - } else { - totalReads++; - if (record.getDuplicateReadFlag()) { - totalDuplicates++; - } else { - - if (record.getMateUnmappedFlag()) { - totalMatesUnmapped++; - } - - //cigars - Cigar cigar = record.getCigar(); - - for (CigarElement ce : cigar.getCigarElements()) { - if (ce.getOperator().equals(CigarOperator.DELETION) || ce.getOperator().equals(CigarOperator.INSERTION)) { - totalIndels++; - } - - if (ce.getOperator().equals(CigarOperator.SOFT_CLIP)) { - totalSoftClips++; - } - if (ce.getOperator().equals(CigarOperator.HARD_CLIP)) { - totalHardClips++; - } - if (ce.getOperator().equals(CigarOperator.N)) { - totalSpliced++; - Integer length = new Integer(ce.getLength()); - int count = 1; - if (spliceMap.containsKey(length)) { - count += spliceMap.get(length); - } - spliceMap.put(length, count); - } - } - - //MD tag - String mdData = (String) record.getAttribute("MD"); - int matches = tallyMDMismatches(mdData); - if (matches > 0) { - totalMismatches++; - } - int count = 1; - if (mismatchMap.containsKey(matches)) { - count += mismatchMap.get(matches); - } - mismatchMap.put(matches, count); - - } - } - - } - - iterator.close(); - reader.close(); - - String spliceCounts = getMapString(spliceMap); - String mismatchCounts = getMapString(mismatchMap); - - String result = totalReads + "\t" + totalUnmapped + "\t" + totalMatesUnmapped + "\t" + totalIndels + "\t" - + totalMismatches + "\t" + totalSoftClips + "\t" + totalHardClips + "\t" + totalSpliced + "\t" + totalDuplicates - + "\t" + mismatchCounts + "\t" + spliceCounts; - return result; - } - - private String getMapString(TreeMap map) { - StringBuilder sb = new StringBuilder(); - - for (Entry entry: map.entrySet()) { - sb.append(entry.getKey() + ":" + entry.getValue() + ";"); - } - - return sb.toString(); - } - - public int tallyMDMismatches(String mdData) { - int count = 0; - if (null != mdData) { - for (int i = 0, size = mdData.length() ; i < size ; ) { - char c = mdData.charAt(i); - if (isValidMismatch(c)) { - count++; - i++; - } else if ('^' == c) { - while (++i < size && Character.isLetter(mdData.charAt(i))) {} - } else i++; // need to increment this or could end up with infinite loop... - } - } - return count; - } - - private boolean isValidMismatch(char c) { - return c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N'; - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(PileupStats.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("PileupStats", PileupStats.class.getPackage().getImplementationVersion(), args); - - // get list of file names - String[] cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 2) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - String[] cmdLineOutputFiles = options.getOutputFileNames(); - if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); - } - - for (String file : cmdLineOutputFiles) { - if (new File(file).exists() && !new File(file).isDirectory()) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); - } - } - - bamFile = new File(cmdLineInputFiles[0]); - inputFile = new File(cmdLineInputFiles[1]); - outputFile = new File(cmdLineOutputFiles[0]); - logger.info("Bam file: " + bamFile); - logger.info("Input file: " + inputFile); - logger.info("Output file: " + outputFile); - - } - - return returnStatus; - } - - public static void main(String[] args) throws Exception { - PileupStats sp = new PileupStats(); - sp.setup(args); - int exitStatus = sp.engage(); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } -} diff --git a/qmule/src/org/qcmg/qmule/QMuleException.java b/qmule/src/org/qcmg/qmule/QMuleException.java deleted file mode 100644 index 2e85e03f0..000000000 --- a/qmule/src/org/qcmg/qmule/QMuleException.java +++ /dev/null @@ -1,28 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -public final class QMuleException extends Exception { - private static final long serialVersionUID = -4575755996356751582L; - - public QMuleException(final String identifier) { - super(Messages.getMessage(identifier)); - } - - public QMuleException(final String identifier, final String argument) { - super(Messages.getMessage(identifier, argument)); - } - - public QMuleException(final String identifier, final String arg1, final String arg2) { - super(Messages.getMessage(identifier, arg1, arg2)); - } - - public QMuleException(final String identifier, final String arg1, final String arg2, final String arg3) { - super(Messages.getMessage(identifier, arg1, arg2, arg3)); - } - - public QMuleException(final String identifier, final Object[] arguments) { - super(Messages.getMessage(identifier, arguments)); - } -} diff --git a/qmule/src/org/qcmg/qmule/QMuleException.java-- b/qmule/src/org/qcmg/qmule/QMuleException.java-- deleted file mode 100644 index 2e85e03f0..000000000 --- a/qmule/src/org/qcmg/qmule/QMuleException.java-- +++ /dev/null @@ -1,28 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -public final class QMuleException extends Exception { - private static final long serialVersionUID = -4575755996356751582L; - - public QMuleException(final String identifier) { - super(Messages.getMessage(identifier)); - } - - public QMuleException(final String identifier, final String argument) { - super(Messages.getMessage(identifier, argument)); - } - - public QMuleException(final String identifier, final String arg1, final String arg2) { - super(Messages.getMessage(identifier, arg1, arg2)); - } - - public QMuleException(final String identifier, final String arg1, final String arg2, final String arg3) { - super(Messages.getMessage(identifier, arg1, arg2, arg3)); - } - - public QMuleException(final String identifier, final Object[] arguments) { - super(Messages.getMessage(identifier, arguments)); - } -} diff --git a/qmule/src/org/qcmg/qmule/QueryCADDLib.java b/qmule/src/org/qcmg/qmule/QueryCADDLib.java deleted file mode 100644 index eece05fe3..000000000 --- a/qmule/src/org/qcmg/qmule/QueryCADDLib.java +++ /dev/null @@ -1,187 +0,0 @@ -/** - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. -*/ -package org.qcmg.qmule; - - -import htsjdk.tribble.readers.TabixReader; - -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.header.VcfHeader; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.vcf.VCFFileReader; - - -public class QueryCADDLib { -// protected final static ArrayList libBlocks = new ArrayList<>(); -// protected final static ArrayList inputBlocks = new ArrayList<>(); -// protected final static ArrayList outputBlocks = new ArrayList<>(); - - protected final static Map positionRecordMap = new HashMap(); - protected static long outputNo = 0; - protected static long blockNo = 0; - protected static long inputNo = 0; - final String CADD = "CADD"; - - public QueryCADDLib(final String input_gzip_file, final String vcf, final String output, final int gap) throws IOException{ - - TabixReader tabix = new TabixReader( input_gzip_file); - String chr = null; - int pos = 0; - int start = -1; - - System.out.println("Below is the stats for each queried block, follow the format \norder: query(ref,start,end) [CADDLibBlockSize, inputVariantNo, outputVariantNo, runtime]"); - - try (VCFFileReader reader = new VCFFileReader(vcf); - FileWriter writer = new FileWriter(new File(output))) { - for (final VcfRecord re : reader){ - if(re.getChromosome().equals(chr) && - (re.getPosition() - pos) < gap ){ - pos = re.getPosition(); - add2Map(re); - }else{ - //s1: query(chr:start:pos), and output - if(chr != null){ - if(chr.startsWith("chr")) chr = chr.substring(3); - TabixReader.Iterator it = tabix.query(chr, start-1, pos); - //debug - System.out.print(String.format("%8d: query(%s, %8d, %8d) ", blockNo++, chr, start, pos)); - query( it, writer ); - - } - //s2: reset -// //debug bf clear -// for( Entry entry: positionRecordMap.entrySet()){ -// if(entry.getValue().getFilter() == null) -// System.out.println(entry.getValue().toString()); -// } - - positionRecordMap.clear(); - chr = re.getChromosome(); - start = re.getPosition(); - pos = re.getPosition(); - add2Map(re); - } - } - //last block - if(chr != null){ - if(chr.startsWith("chr")) chr = chr.substring(3); - TabixReader.Iterator it = tabix.query(chr, start, pos); - query( it, writer ); - } - - }//end try - - System.out.println("total input variants is " + inputNo); - System.out.println("total outputed and annotated variants is " + outputNo); - System.out.println("total query CADD library time is " + blockNo); - - } - - /** - * it remove "chr" string from reference name if exists - * @param re input vcf record - */ - private void add2Map(VcfRecord re){ - ChrPosition chr = re.getChrPosition(); - if(chr.getChromosome().startsWith("chr")) - chr = new ChrRangePosition(re.getChromosome().substring(3), re.getChrPosition().getStartPosition(), re.getChrPosition().getEndPosition()); // orig.getChromosome().substring(3); - - - re.setFilter(null); //for debug - positionRecordMap.put(chr, re); - } - - - private void query(TabixReader.Iterator it,FileWriter writer ) throws IOException{ - long startTime = System.currentTimeMillis(); - - String line; - String[] eles; - String last = null; - - int blockSize = 0; - int outputSize = 0; - - while(( line = it.next())!= null){ - blockSize ++; - eles = TabTokenizer.tokenize(line, '\t'); - int s = Integer.parseInt(eles[1]); //start position = second column - int e = s + eles[2].length() - 1; //start position + length -1 - - //only retrive the first annotation entry from CADD library - String entry = eles[0] + ":" + eles[1] + ":" +eles[2]+ ":" + eles[4]; - if(entry.equals(last)) continue; - else last = entry; - - VcfRecord inputVcf = positionRecordMap.get(new ChrRangePosition(eles[0], s, e )); - - if ( (null == inputVcf) || !inputVcf.getRef().equalsIgnoreCase(eles[2])) continue; - - String[] allels = {inputVcf.getAlt()}; - if(inputVcf.getAlt().contains(",")) - allels = TabTokenizer.tokenize(inputVcf.getAlt(), ','); - - String cadd = ""; - - //it will exit loop once find the matched allele - for(String al : allels) - if(al.equalsIgnoreCase(eles[4])){ - cadd = String.format("(%s=>%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s)", eles[2],eles[4],eles[8],eles[10],eles[11],eles[12],eles[17], - eles[21],eles[26],eles[35],eles[39],eles[72],eles[82],eles[83],eles[86],eles[92],eles[92],eles[93],eles[96]); - String info = inputVcf.getInfoRecord().getField(CADD); - info = (info == null)? CADD + "=" + cadd : CADD + "=" + info + "," + cadd; - inputVcf.appendInfo( info); - - writer.append(inputVcf.toString() + "\n"); - outputSize ++; - } - } - - //get stats - long endTime = System.currentTimeMillis(); - String time = QLogger.getRunTime(startTime, endTime); - System.out.println(String.format("[ %8d,%8d,%8d, %s ] ", blockSize, positionRecordMap.size(), outputSize, time)); - inputNo += positionRecordMap.size(); - outputNo += outputSize; - } - - - public static void main(String[] args) { - - long startTime = System.currentTimeMillis(); - try{ - String gzlib = args[0]; - String input = args[1]; - String output = args[2]; - int gap = 1000; - if(args.length > 3) - gap = Integer.parseInt(args[3]); - - new QueryCADDLib(gzlib, input, output, gap); - - }catch(Exception e){ - e.printStackTrace(); - System.err.println("Usage: java -cp qmule-0.1pre.jar QueryCADDLib "); - } - - long endTime = System.currentTimeMillis(); - String time = QLogger.getRunTime(startTime, endTime); - System.out.println("run Time is " + time); - } -} - diff --git a/qmule/src/org/qcmg/qmule/QueryCADDLib.java-- b/qmule/src/org/qcmg/qmule/QueryCADDLib.java-- deleted file mode 100644 index eece05fe3..000000000 --- a/qmule/src/org/qcmg/qmule/QueryCADDLib.java-- +++ /dev/null @@ -1,187 +0,0 @@ -/** - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. -*/ -package org.qcmg.qmule; - - -import htsjdk.tribble.readers.TabixReader; - -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.header.VcfHeader; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.vcf.VCFFileReader; - - -public class QueryCADDLib { -// protected final static ArrayList libBlocks = new ArrayList<>(); -// protected final static ArrayList inputBlocks = new ArrayList<>(); -// protected final static ArrayList outputBlocks = new ArrayList<>(); - - protected final static Map positionRecordMap = new HashMap(); - protected static long outputNo = 0; - protected static long blockNo = 0; - protected static long inputNo = 0; - final String CADD = "CADD"; - - public QueryCADDLib(final String input_gzip_file, final String vcf, final String output, final int gap) throws IOException{ - - TabixReader tabix = new TabixReader( input_gzip_file); - String chr = null; - int pos = 0; - int start = -1; - - System.out.println("Below is the stats for each queried block, follow the format \norder: query(ref,start,end) [CADDLibBlockSize, inputVariantNo, outputVariantNo, runtime]"); - - try (VCFFileReader reader = new VCFFileReader(vcf); - FileWriter writer = new FileWriter(new File(output))) { - for (final VcfRecord re : reader){ - if(re.getChromosome().equals(chr) && - (re.getPosition() - pos) < gap ){ - pos = re.getPosition(); - add2Map(re); - }else{ - //s1: query(chr:start:pos), and output - if(chr != null){ - if(chr.startsWith("chr")) chr = chr.substring(3); - TabixReader.Iterator it = tabix.query(chr, start-1, pos); - //debug - System.out.print(String.format("%8d: query(%s, %8d, %8d) ", blockNo++, chr, start, pos)); - query( it, writer ); - - } - //s2: reset -// //debug bf clear -// for( Entry entry: positionRecordMap.entrySet()){ -// if(entry.getValue().getFilter() == null) -// System.out.println(entry.getValue().toString()); -// } - - positionRecordMap.clear(); - chr = re.getChromosome(); - start = re.getPosition(); - pos = re.getPosition(); - add2Map(re); - } - } - //last block - if(chr != null){ - if(chr.startsWith("chr")) chr = chr.substring(3); - TabixReader.Iterator it = tabix.query(chr, start, pos); - query( it, writer ); - } - - }//end try - - System.out.println("total input variants is " + inputNo); - System.out.println("total outputed and annotated variants is " + outputNo); - System.out.println("total query CADD library time is " + blockNo); - - } - - /** - * it remove "chr" string from reference name if exists - * @param re input vcf record - */ - private void add2Map(VcfRecord re){ - ChrPosition chr = re.getChrPosition(); - if(chr.getChromosome().startsWith("chr")) - chr = new ChrRangePosition(re.getChromosome().substring(3), re.getChrPosition().getStartPosition(), re.getChrPosition().getEndPosition()); // orig.getChromosome().substring(3); - - - re.setFilter(null); //for debug - positionRecordMap.put(chr, re); - } - - - private void query(TabixReader.Iterator it,FileWriter writer ) throws IOException{ - long startTime = System.currentTimeMillis(); - - String line; - String[] eles; - String last = null; - - int blockSize = 0; - int outputSize = 0; - - while(( line = it.next())!= null){ - blockSize ++; - eles = TabTokenizer.tokenize(line, '\t'); - int s = Integer.parseInt(eles[1]); //start position = second column - int e = s + eles[2].length() - 1; //start position + length -1 - - //only retrive the first annotation entry from CADD library - String entry = eles[0] + ":" + eles[1] + ":" +eles[2]+ ":" + eles[4]; - if(entry.equals(last)) continue; - else last = entry; - - VcfRecord inputVcf = positionRecordMap.get(new ChrRangePosition(eles[0], s, e )); - - if ( (null == inputVcf) || !inputVcf.getRef().equalsIgnoreCase(eles[2])) continue; - - String[] allels = {inputVcf.getAlt()}; - if(inputVcf.getAlt().contains(",")) - allels = TabTokenizer.tokenize(inputVcf.getAlt(), ','); - - String cadd = ""; - - //it will exit loop once find the matched allele - for(String al : allels) - if(al.equalsIgnoreCase(eles[4])){ - cadd = String.format("(%s=>%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s)", eles[2],eles[4],eles[8],eles[10],eles[11],eles[12],eles[17], - eles[21],eles[26],eles[35],eles[39],eles[72],eles[82],eles[83],eles[86],eles[92],eles[92],eles[93],eles[96]); - String info = inputVcf.getInfoRecord().getField(CADD); - info = (info == null)? CADD + "=" + cadd : CADD + "=" + info + "," + cadd; - inputVcf.appendInfo( info); - - writer.append(inputVcf.toString() + "\n"); - outputSize ++; - } - } - - //get stats - long endTime = System.currentTimeMillis(); - String time = QLogger.getRunTime(startTime, endTime); - System.out.println(String.format("[ %8d,%8d,%8d, %s ] ", blockSize, positionRecordMap.size(), outputSize, time)); - inputNo += positionRecordMap.size(); - outputNo += outputSize; - } - - - public static void main(String[] args) { - - long startTime = System.currentTimeMillis(); - try{ - String gzlib = args[0]; - String input = args[1]; - String output = args[2]; - int gap = 1000; - if(args.length > 3) - gap = Integer.parseInt(args[3]); - - new QueryCADDLib(gzlib, input, output, gap); - - }catch(Exception e){ - e.printStackTrace(); - System.err.println("Usage: java -cp qmule-0.1pre.jar QueryCADDLib "); - } - - long endTime = System.currentTimeMillis(); - String time = QLogger.getRunTime(startTime, endTime); - System.out.println("run Time is " + time); - } -} - diff --git a/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java b/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java deleted file mode 100644 index 86499809c..000000000 --- a/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java +++ /dev/null @@ -1,280 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileWriter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrPositionComparator; -import org.qcmg.common.string.StringUtils; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; -import org.qcmg.vcf.VCFFileReader; - -public class ReAnnotateDccWithDbSNP { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - private String header; - - private static QLogger logger; - - private Map dccs = new HashMap(); - - - public int engage() throws Exception { - - loadDccFile(); - - updateDBSnpData(); - - writeDCCOutput(); - - - return exitStatus; - } - - private void writeDCCOutput() throws Exception { - if ( ! StringUtils.isNullOrEmpty(cmdLineOutputFiles[0])) { - FileWriter writer = new FileWriter(new File(cmdLineOutputFiles[0])); - try { - //sort - List data = new ArrayList(dccs.keySet()); - Collections.sort(data, new ChrPositionComparator()); - - - writer.write(header + "\tdbSnpVer\n"); - - for (ChrPosition cp : data) { - String[] dcc = dccs.get(cp); - StringBuilder sb = new StringBuilder(); - for (String s : dcc) { - if (sb.length() > 0) sb.append('\t'); - sb.append(s); - } - writer.write(sb.toString() + '\n'); - } - - } finally { - writer.close(); - } - } - } - - - private void loadDccFile() throws Exception { - logger.info("Attempting to load dcc data"); - TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); - int count = 0; - try { - for (TabbedRecord rec : reader) { - if (++count == 1) { // header line - header = rec.getData(); - continue; - } - String[] params = TabTokenizer.tokenize(rec.getData()); - ChrPosition cp = ChrPointPosition.valueOf(params[4], Integer.parseInt(params[5])); - - // reset dbsnpid - params[20] = null; -// StringBuilder sb = new StringBuilder(); -// for (String s : params) { -// if (sb.length() > 0) sb.append('\t'); -// sb.append(s); -// } -// rec.setData(sb.toString()); - dccs.put(cp, params); - } - } finally { - reader.close(); - } - logger.info("Attempting to load dcc data - DONE with " + dccs.size() + " entries"); - } - - private void updateDBSnpData() throws Exception { - - VCFFileReader reader = new VCFFileReader(new File(cmdLineInputFiles[1])); - - int count = 0, multipleVersions = 0; - int pre30 = 0, thirty = 0, thirtyOne = 0, thirtyTwo = 0, thirtyThree = 0, thirtyFour = 0, thirtyFive = 0; - try { - for (VcfRecord dbSNPVcf : reader) { - if (++count % 1000000 == 0) - logger.info("hit " + count + " dbsnp records"); - - if ( ! StringUtils.doesStringContainSubString(dbSNPVcf.getInfo(), "VC=SNV", false)) continue; - // vcf dbSNP record chromosome does not contain "chr", whereas the positionRecordMap does - add - String[] params = dccs.get(ChrPointPosition.valueOf(dbSNPVcf.getChromosome(), dbSNPVcf.getPosition())); - if (null == params) continue; - - // if no dbsnp data - continue - String previousDBSnpValue = params[20]; - if ( ! StringUtils.isNullOrEmpty(previousDBSnpValue)) { - multipleVersions++; - continue; - } - -// logger.info("Resetting previousDBSnpValue of: " + previousDBSnpValue + " to " + dbSNPVcf.getId()); - - // only proceed if we have a SNP variant record - int startIndex = dbSNPVcf.getInfo().indexOf("dbSNPBuildID=") + 13; - int endIndex = dbSNPVcf.getInfo().indexOf(";" , startIndex); - String dbSnpVersion = dbSNPVcf.getInfo().substring(startIndex, endIndex); -// logger.info("dbsnp version = " + dbSnpVersion); - - int dbSnpVersionInt = Integer.parseInt(dbSnpVersion); - if (dbSnpVersionInt < 130) pre30++; - else if (dbSnpVersionInt == 130) thirty++; - else if (dbSnpVersionInt == 131) thirtyOne++; - else if (dbSnpVersionInt == 132) thirtyTwo++; - else if (dbSnpVersionInt == 133) thirtyThree++; - else if (dbSnpVersionInt == 134) thirtyFour++; - else if (dbSnpVersionInt == 135) thirtyFive++; - else if (dbSnpVersionInt > 135) logger.info("hmmm: " + dbSnpVersionInt); - - params[20] = dbSNPVcf.getId(); - params = Arrays.copyOf(params, params.length + 1); - params[params.length -1] = dbSnpVersion; - dccs.put(ChrPointPosition.valueOf(dbSNPVcf.getChromosome(), dbSNPVcf.getPosition()), params); - - -// GenotypeEnum tumour = snpRecord.getTumourGenotype(); -// //TODO should we continue if the tumour Genotype is null?? -// if (null == tumour) continue; -// -// // multiple dbSNP entries can exist for a position. -// // if we already have dbSNP info for this snp, check to see if the dbSNP alt is shorter than the existing dbSNP record -// // if so, proceed, and re-write dbSNP details (if applicable). -// int dbSNPAltLengh = dbSNPVcf.getAlt().length(); -// if (snpRecord.getDbSnpAltLength() > 0 && dbSNPAltLengh > snpRecord.getDbSnpAltLength()) { -// continue; -// } -// -// // deal with multiple alt bases -// String [] alts = null; -// if (dbSNPAltLengh == 1) { -// alts = new String[] {dbSNPVcf.getAlt()}; -// } else if (dbSNPAltLengh > 1){ -// alts = TabTokenizer.tokenize(dbSNPVcf.getAlt(), ','); -// } -// -// if (null != alts) { -// for (String alt : alts) { -// -// GenotypeEnum dbSnpGenotype = BaseUtils.getGenotypeEnum(dbSNPVcf.getRef() + alt); -// if (null == dbSnpGenotype) { -// logger.warn("Couldn't get Genotype from dbSNP position with variant: " + alt); -// continue; -// } -//// // no longer flip the genotype as dbSNP is reporting on the +ve strand -////// if (reverseStrand) { -////// dbSnpGenotype = dbSnpGenotype.getComplement(); -////// } -// if (tumour == dbSnpGenotype || (tumour.isHomozygous() && dbSnpGenotype.containsAllele(tumour.getFirstAllele()))) { -// boolean reverseStrand = StringUtils.doesStringContainSubString(dbSNPVcf.getInfo(), "RV", false); -//// boolean reverseStrand = VcfUtils.isDbSNPVcfRecordOnReverseStrand(dbSNPVcf.getInfo()); -// snpRecord.setDbSnpStrand(reverseStrand ? '-' : '+'); -// snpRecord.setDbSnpId(dbSNPVcf.getId()); -// snpRecord.setDbSnpGenotype(dbSnpGenotype); -// snpRecord.setDbSnpAltLength(dbSNPAltLengh); -// break; -// } -// } -// } - } - } finally { - reader.close(); - } - logger.info("STATS:"); - logger.info("No of dcc records with dbSNP version of pre 130: " + pre30); - logger.info("No of dcc records with dbSNP version of 130: " + thirty); - logger.info("No of dcc records with dbSNP version of 131: " + thirtyOne); - logger.info("No of dcc records with dbSNP version of 132: " + thirtyTwo); - logger.info("No of dcc records with dbSNP version of 133: " + thirtyThree); - logger.info("No of dcc records with dbSNP version of 134: " + thirtyFour); - logger.info("No of dcc records with dbSNP version of 135: " + thirtyFive); - logger.info("No of dcc records with duplicate dbSNP versions : " + multipleVersions); - logger.info("Total no of dcc records with dbSNP data : " + (pre30 + thirty + thirtyOne + thirtyTwo + thirtyThree + thirtyFour + thirtyFive)); - } - - public static void main(String[] args) throws Exception { - ReAnnotateDccWithDbSNP sp = new ReAnnotateDccWithDbSNP(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(ReAnnotateDccWithDbSNP.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("ReAnnotateDccWithDbSNP", ReAnnotateDccWithDbSNP.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- b/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- deleted file mode 100644 index 86499809c..000000000 --- a/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- +++ /dev/null @@ -1,280 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileWriter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrPositionComparator; -import org.qcmg.common.string.StringUtils; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; -import org.qcmg.vcf.VCFFileReader; - -public class ReAnnotateDccWithDbSNP { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - private String header; - - private static QLogger logger; - - private Map dccs = new HashMap(); - - - public int engage() throws Exception { - - loadDccFile(); - - updateDBSnpData(); - - writeDCCOutput(); - - - return exitStatus; - } - - private void writeDCCOutput() throws Exception { - if ( ! StringUtils.isNullOrEmpty(cmdLineOutputFiles[0])) { - FileWriter writer = new FileWriter(new File(cmdLineOutputFiles[0])); - try { - //sort - List data = new ArrayList(dccs.keySet()); - Collections.sort(data, new ChrPositionComparator()); - - - writer.write(header + "\tdbSnpVer\n"); - - for (ChrPosition cp : data) { - String[] dcc = dccs.get(cp); - StringBuilder sb = new StringBuilder(); - for (String s : dcc) { - if (sb.length() > 0) sb.append('\t'); - sb.append(s); - } - writer.write(sb.toString() + '\n'); - } - - } finally { - writer.close(); - } - } - } - - - private void loadDccFile() throws Exception { - logger.info("Attempting to load dcc data"); - TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); - int count = 0; - try { - for (TabbedRecord rec : reader) { - if (++count == 1) { // header line - header = rec.getData(); - continue; - } - String[] params = TabTokenizer.tokenize(rec.getData()); - ChrPosition cp = ChrPointPosition.valueOf(params[4], Integer.parseInt(params[5])); - - // reset dbsnpid - params[20] = null; -// StringBuilder sb = new StringBuilder(); -// for (String s : params) { -// if (sb.length() > 0) sb.append('\t'); -// sb.append(s); -// } -// rec.setData(sb.toString()); - dccs.put(cp, params); - } - } finally { - reader.close(); - } - logger.info("Attempting to load dcc data - DONE with " + dccs.size() + " entries"); - } - - private void updateDBSnpData() throws Exception { - - VCFFileReader reader = new VCFFileReader(new File(cmdLineInputFiles[1])); - - int count = 0, multipleVersions = 0; - int pre30 = 0, thirty = 0, thirtyOne = 0, thirtyTwo = 0, thirtyThree = 0, thirtyFour = 0, thirtyFive = 0; - try { - for (VcfRecord dbSNPVcf : reader) { - if (++count % 1000000 == 0) - logger.info("hit " + count + " dbsnp records"); - - if ( ! StringUtils.doesStringContainSubString(dbSNPVcf.getInfo(), "VC=SNV", false)) continue; - // vcf dbSNP record chromosome does not contain "chr", whereas the positionRecordMap does - add - String[] params = dccs.get(ChrPointPosition.valueOf(dbSNPVcf.getChromosome(), dbSNPVcf.getPosition())); - if (null == params) continue; - - // if no dbsnp data - continue - String previousDBSnpValue = params[20]; - if ( ! StringUtils.isNullOrEmpty(previousDBSnpValue)) { - multipleVersions++; - continue; - } - -// logger.info("Resetting previousDBSnpValue of: " + previousDBSnpValue + " to " + dbSNPVcf.getId()); - - // only proceed if we have a SNP variant record - int startIndex = dbSNPVcf.getInfo().indexOf("dbSNPBuildID=") + 13; - int endIndex = dbSNPVcf.getInfo().indexOf(";" , startIndex); - String dbSnpVersion = dbSNPVcf.getInfo().substring(startIndex, endIndex); -// logger.info("dbsnp version = " + dbSnpVersion); - - int dbSnpVersionInt = Integer.parseInt(dbSnpVersion); - if (dbSnpVersionInt < 130) pre30++; - else if (dbSnpVersionInt == 130) thirty++; - else if (dbSnpVersionInt == 131) thirtyOne++; - else if (dbSnpVersionInt == 132) thirtyTwo++; - else if (dbSnpVersionInt == 133) thirtyThree++; - else if (dbSnpVersionInt == 134) thirtyFour++; - else if (dbSnpVersionInt == 135) thirtyFive++; - else if (dbSnpVersionInt > 135) logger.info("hmmm: " + dbSnpVersionInt); - - params[20] = dbSNPVcf.getId(); - params = Arrays.copyOf(params, params.length + 1); - params[params.length -1] = dbSnpVersion; - dccs.put(ChrPointPosition.valueOf(dbSNPVcf.getChromosome(), dbSNPVcf.getPosition()), params); - - -// GenotypeEnum tumour = snpRecord.getTumourGenotype(); -// //TODO should we continue if the tumour Genotype is null?? -// if (null == tumour) continue; -// -// // multiple dbSNP entries can exist for a position. -// // if we already have dbSNP info for this snp, check to see if the dbSNP alt is shorter than the existing dbSNP record -// // if so, proceed, and re-write dbSNP details (if applicable). -// int dbSNPAltLengh = dbSNPVcf.getAlt().length(); -// if (snpRecord.getDbSnpAltLength() > 0 && dbSNPAltLengh > snpRecord.getDbSnpAltLength()) { -// continue; -// } -// -// // deal with multiple alt bases -// String [] alts = null; -// if (dbSNPAltLengh == 1) { -// alts = new String[] {dbSNPVcf.getAlt()}; -// } else if (dbSNPAltLengh > 1){ -// alts = TabTokenizer.tokenize(dbSNPVcf.getAlt(), ','); -// } -// -// if (null != alts) { -// for (String alt : alts) { -// -// GenotypeEnum dbSnpGenotype = BaseUtils.getGenotypeEnum(dbSNPVcf.getRef() + alt); -// if (null == dbSnpGenotype) { -// logger.warn("Couldn't get Genotype from dbSNP position with variant: " + alt); -// continue; -// } -//// // no longer flip the genotype as dbSNP is reporting on the +ve strand -////// if (reverseStrand) { -////// dbSnpGenotype = dbSnpGenotype.getComplement(); -////// } -// if (tumour == dbSnpGenotype || (tumour.isHomozygous() && dbSnpGenotype.containsAllele(tumour.getFirstAllele()))) { -// boolean reverseStrand = StringUtils.doesStringContainSubString(dbSNPVcf.getInfo(), "RV", false); -//// boolean reverseStrand = VcfUtils.isDbSNPVcfRecordOnReverseStrand(dbSNPVcf.getInfo()); -// snpRecord.setDbSnpStrand(reverseStrand ? '-' : '+'); -// snpRecord.setDbSnpId(dbSNPVcf.getId()); -// snpRecord.setDbSnpGenotype(dbSnpGenotype); -// snpRecord.setDbSnpAltLength(dbSNPAltLengh); -// break; -// } -// } -// } - } - } finally { - reader.close(); - } - logger.info("STATS:"); - logger.info("No of dcc records with dbSNP version of pre 130: " + pre30); - logger.info("No of dcc records with dbSNP version of 130: " + thirty); - logger.info("No of dcc records with dbSNP version of 131: " + thirtyOne); - logger.info("No of dcc records with dbSNP version of 132: " + thirtyTwo); - logger.info("No of dcc records with dbSNP version of 133: " + thirtyThree); - logger.info("No of dcc records with dbSNP version of 134: " + thirtyFour); - logger.info("No of dcc records with dbSNP version of 135: " + thirtyFive); - logger.info("No of dcc records with duplicate dbSNP versions : " + multipleVersions); - logger.info("Total no of dcc records with dbSNP data : " + (pre30 + thirty + thirtyOne + thirtyTwo + thirtyThree + thirtyFour + thirtyFive)); - } - - public static void main(String[] args) throws Exception { - ReAnnotateDccWithDbSNP sp = new ReAnnotateDccWithDbSNP(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(ReAnnotateDccWithDbSNP.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("ReAnnotateDccWithDbSNP", ReAnnotateDccWithDbSNP.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/ReadPartGZFile.java b/qmule/src/org/qcmg/qmule/ReadPartGZFile.java deleted file mode 100644 index ee8018ccc..000000000 --- a/qmule/src/org/qcmg/qmule/ReadPartGZFile.java +++ /dev/null @@ -1,152 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import htsjdk.tribble.readers.TabixReader; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.HashSet; -import java.util.Set; -import java.util.zip.GZIPInputStream; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.util.FileUtils; -import org.qcmg.vcf.VCFSerializer; - - -public class ReadPartGZFile { - -// static InputStream getInputStream(File input_gzip_file) throws FileNotFoundException, IOException{ -// InputStream inputStream; -// // if (FileUtils.isFileGZip(input_gzip_file)) { -// if (FileUtils.isInputGZip(input_gzip_file)) { -// GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(input_gzip_file)); -// try(InputStreamReader streamReader = new InputStreamReader(gzis)){ -// inputStream = new GZIPInputStream(new FileInputStream(input_gzip_file)); -// } -// } else { -// FileInputStream stream = new FileInputStream(input_gzip_file); -// try(InputStreamReader streamReader = new InputStreamReader(stream)){ -// BufferedReader in = new BufferedReader(streamReader); -// inputStream = new FileInputStream(input_gzip_file); -// } -// } -// return inputStream; -// } - - - - - ReadPartGZFile(File input_gzip_file, int no) throws Exception{ - - //get a new stream rather than a closed one - InputStream inputStream = FileUtils.isInputGZip( input_gzip_file) ? - new GZIPInputStream(new FileInputStream(input_gzip_file), 65536) : new FileInputStream(input_gzip_file); - - try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream) )){ - int num = 0; - String line; - while( (line = reader.readLine() ) != null){ - if( ++num > no) break; - System.out.println(line); - } - } - - - } - static void countLines(File input_gzip_file) throws FileNotFoundException, IOException, InterruptedException{ - HashSet uniqRef = new HashSet(); - - long startTime = System.currentTimeMillis(); - long num = 0; -// InputStream inputStream = getInputStream(input_gzip_file); - InputStream inputStream = FileUtils.isInputGZip( input_gzip_file) ? - new GZIPInputStream(new FileInputStream(input_gzip_file), 65536) : new FileInputStream(input_gzip_file); - - try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream) )){ - String line; - while( (line = reader.readLine() ) != null){ - uniqRef.add(line.split("\\t")[0]); - num ++; - } - } - - System.out.println(String.format("Read file: %s\nLine number: %d", input_gzip_file.getAbsoluteFile(), num)); - System.out.println("Uniq reference name are " + uniqRef ); - - - } - - static void countUniqPosition(String input_gzip_file, String indexFile) throws IOException{ - // TabixReader tabix = new TabixReader( input_gzip_file, indexFile); - TabixReader tabix = new TabixReader( input_gzip_file); - Set chrs = tabix.getChromosomes(); - HashSet uniqPos = new HashSet(); - long total_uniq = 0; - long num = 0; - System.out.println("total reference number is " + chrs.size() + " from " + input_gzip_file); - for(String str : chrs){ - - uniqPos.clear(); - TabixReader.Iterator it = tabix.query(str); - - - String line; - while(( line = it.next())!= null){ - // String[] eles = TabTokenizer.tokenize(line, '\t'); - // uniqPos.add(eles[1]); - // uniqPos.add(line.split("\\t")[1]); - num ++; - } - - //debug - System.out.println("There are " + num+ " position recorded in reference " + str); - num ++; - - - // total_uniq += uniqPos.size(); - // System.out.println("There are " + uniqPos.size() + " uniq position recorded in reference " + str); - - } - -// System.out.println("Total uniq position recorded in all reference is " + total_uniq); -// System.out.println("Total records in whole file is " + num); - - } - - public static void main(String[] args) { - try{ - long startTime = System.currentTimeMillis(); - File input = new File(args[0]); - int no = Integer.parseInt(args[1]); - - if(no > 0) - new ReadPartGZFile(input, no ); - else if (no == 0) - countUniqPosition(args[0], null); - else - countLines(input); - - long endTime = System.currentTimeMillis(); - String time = QLogger.getRunTime(startTime, endTime); - System.out.println("run Time is " + time); - - }catch(Exception e){ - e.printStackTrace(); - //System.out.println(e.printStackTrace();); - System.err.println("Usage: java -cp qmule-0.1pre.jar ReadPartGZFile "); - - } - - } -} diff --git a/qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- b/qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- deleted file mode 100644 index ee8018ccc..000000000 --- a/qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- +++ /dev/null @@ -1,152 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import htsjdk.tribble.readers.TabixReader; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.HashSet; -import java.util.Set; -import java.util.zip.GZIPInputStream; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.util.FileUtils; -import org.qcmg.vcf.VCFSerializer; - - -public class ReadPartGZFile { - -// static InputStream getInputStream(File input_gzip_file) throws FileNotFoundException, IOException{ -// InputStream inputStream; -// // if (FileUtils.isFileGZip(input_gzip_file)) { -// if (FileUtils.isInputGZip(input_gzip_file)) { -// GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(input_gzip_file)); -// try(InputStreamReader streamReader = new InputStreamReader(gzis)){ -// inputStream = new GZIPInputStream(new FileInputStream(input_gzip_file)); -// } -// } else { -// FileInputStream stream = new FileInputStream(input_gzip_file); -// try(InputStreamReader streamReader = new InputStreamReader(stream)){ -// BufferedReader in = new BufferedReader(streamReader); -// inputStream = new FileInputStream(input_gzip_file); -// } -// } -// return inputStream; -// } - - - - - ReadPartGZFile(File input_gzip_file, int no) throws Exception{ - - //get a new stream rather than a closed one - InputStream inputStream = FileUtils.isInputGZip( input_gzip_file) ? - new GZIPInputStream(new FileInputStream(input_gzip_file), 65536) : new FileInputStream(input_gzip_file); - - try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream) )){ - int num = 0; - String line; - while( (line = reader.readLine() ) != null){ - if( ++num > no) break; - System.out.println(line); - } - } - - - } - static void countLines(File input_gzip_file) throws FileNotFoundException, IOException, InterruptedException{ - HashSet uniqRef = new HashSet(); - - long startTime = System.currentTimeMillis(); - long num = 0; -// InputStream inputStream = getInputStream(input_gzip_file); - InputStream inputStream = FileUtils.isInputGZip( input_gzip_file) ? - new GZIPInputStream(new FileInputStream(input_gzip_file), 65536) : new FileInputStream(input_gzip_file); - - try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream) )){ - String line; - while( (line = reader.readLine() ) != null){ - uniqRef.add(line.split("\\t")[0]); - num ++; - } - } - - System.out.println(String.format("Read file: %s\nLine number: %d", input_gzip_file.getAbsoluteFile(), num)); - System.out.println("Uniq reference name are " + uniqRef ); - - - } - - static void countUniqPosition(String input_gzip_file, String indexFile) throws IOException{ - // TabixReader tabix = new TabixReader( input_gzip_file, indexFile); - TabixReader tabix = new TabixReader( input_gzip_file); - Set chrs = tabix.getChromosomes(); - HashSet uniqPos = new HashSet(); - long total_uniq = 0; - long num = 0; - System.out.println("total reference number is " + chrs.size() + " from " + input_gzip_file); - for(String str : chrs){ - - uniqPos.clear(); - TabixReader.Iterator it = tabix.query(str); - - - String line; - while(( line = it.next())!= null){ - // String[] eles = TabTokenizer.tokenize(line, '\t'); - // uniqPos.add(eles[1]); - // uniqPos.add(line.split("\\t")[1]); - num ++; - } - - //debug - System.out.println("There are " + num+ " position recorded in reference " + str); - num ++; - - - // total_uniq += uniqPos.size(); - // System.out.println("There are " + uniqPos.size() + " uniq position recorded in reference " + str); - - } - -// System.out.println("Total uniq position recorded in all reference is " + total_uniq); -// System.out.println("Total records in whole file is " + num); - - } - - public static void main(String[] args) { - try{ - long startTime = System.currentTimeMillis(); - File input = new File(args[0]); - int no = Integer.parseInt(args[1]); - - if(no > 0) - new ReadPartGZFile(input, no ); - else if (no == 0) - countUniqPosition(args[0], null); - else - countLines(input); - - long endTime = System.currentTimeMillis(); - String time = QLogger.getRunTime(startTime, endTime); - System.out.println("run Time is " + time); - - }catch(Exception e){ - e.printStackTrace(); - //System.out.println(e.printStackTrace();); - System.err.println("Usage: java -cp qmule-0.1pre.jar ReadPartGZFile "); - - } - - } -} diff --git a/qmule/src/org/qcmg/qmule/ReadsAppend.java b/qmule/src/org/qcmg/qmule/ReadsAppend.java deleted file mode 100644 index 4c2ce5fab..000000000 --- a/qmule/src/org/qcmg/qmule/ReadsAppend.java +++ /dev/null @@ -1,95 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.IOException; - -import htsjdk.samtools.*; - -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.List; - -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.SAMOrBAMWriterFactory; - - -public class ReadsAppend { - File[] inputs; - File output; - - - ReadsAppend(File output, File[] inputs ) throws Exception{ - this.output = output; - this.inputs = inputs; - merging(); - } - - /** - * retrive the CS and CQ value from BAM record to output csfasta or qual file - * @throws Exception - */ - void merging() throws Exception{ - System.out.println("start time : " + getTime()); - - List readers = new ArrayList<>(); - for (File f: inputs) { - readers.add( SAMFileReaderFactory.createSAMFileReader(f)); - } - - SAMFileHeader header = readers.get(0).getFileHeader().clone(); - - SAMOrBAMWriterFactory factory = new SAMOrBAMWriterFactory(header, true, output,2000000 ); - SAMFileWriter writer = factory.getWriter(); - - for( SamReader reader : readers){ - for( SAMRecord record : reader) { - writer.addAlignment(record); - } - reader.close(); - } - - factory.closeWriter(); - System.out.println("end time : " + getTime()); - System.exit(0); - } - - - private String getTime(){ - Calendar currentDate = Calendar.getInstance(); - SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); - return "[" + formatter.format(currentDate.getTime()) + "]"; - } - public static void main(final String[] args) throws IOException, InterruptedException { - - try{ - if(args.length < 2) - throw new Exception("missing inputs or outputs name"); - - File output = new File(args[0]); - File[] inputs = new File[args.length-1]; - for (int i = 1; i < args.length; i++) { - inputs[i-1] = new File(args[i]) ; - - System.out.println(inputs[i-1].toString()); - } - - - new ReadsAppend(output, inputs ); - - System.exit(0); - }catch(Exception e){ - System.err.println(e.toString()); - Thread.sleep(1); - System.out.println("usage: qmule org.qcmg.qmule.ReadsAppend "); - System.exit(1); - } - - } -} diff --git a/qmule/src/org/qcmg/qmule/ReadsAppend.java-- b/qmule/src/org/qcmg/qmule/ReadsAppend.java-- deleted file mode 100644 index 4c2ce5fab..000000000 --- a/qmule/src/org/qcmg/qmule/ReadsAppend.java-- +++ /dev/null @@ -1,95 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.IOException; - -import htsjdk.samtools.*; - -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.List; - -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.SAMOrBAMWriterFactory; - - -public class ReadsAppend { - File[] inputs; - File output; - - - ReadsAppend(File output, File[] inputs ) throws Exception{ - this.output = output; - this.inputs = inputs; - merging(); - } - - /** - * retrive the CS and CQ value from BAM record to output csfasta or qual file - * @throws Exception - */ - void merging() throws Exception{ - System.out.println("start time : " + getTime()); - - List readers = new ArrayList<>(); - for (File f: inputs) { - readers.add( SAMFileReaderFactory.createSAMFileReader(f)); - } - - SAMFileHeader header = readers.get(0).getFileHeader().clone(); - - SAMOrBAMWriterFactory factory = new SAMOrBAMWriterFactory(header, true, output,2000000 ); - SAMFileWriter writer = factory.getWriter(); - - for( SamReader reader : readers){ - for( SAMRecord record : reader) { - writer.addAlignment(record); - } - reader.close(); - } - - factory.closeWriter(); - System.out.println("end time : " + getTime()); - System.exit(0); - } - - - private String getTime(){ - Calendar currentDate = Calendar.getInstance(); - SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); - return "[" + formatter.format(currentDate.getTime()) + "]"; - } - public static void main(final String[] args) throws IOException, InterruptedException { - - try{ - if(args.length < 2) - throw new Exception("missing inputs or outputs name"); - - File output = new File(args[0]); - File[] inputs = new File[args.length-1]; - for (int i = 1; i < args.length; i++) { - inputs[i-1] = new File(args[i]) ; - - System.out.println(inputs[i-1].toString()); - } - - - new ReadsAppend(output, inputs ); - - System.exit(0); - }catch(Exception e){ - System.err.println(e.toString()); - Thread.sleep(1); - System.out.println("usage: qmule org.qcmg.qmule.ReadsAppend "); - System.exit(1); - } - - } -} diff --git a/qmule/src/org/qcmg/qmule/RunGatk.java b/qmule/src/org/qcmg/qmule/RunGatk.java deleted file mode 100644 index b2e13458d..000000000 --- a/qmule/src/org/qcmg/qmule/RunGatk.java +++ /dev/null @@ -1,141 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - - -public class RunGatk { - -// public static String PATH="/panfs/home/oholmes/devel/QCMGScripts/o.holmes/gatk/pbs4java/"; -// public static final String PARAMS=" -l walltime=124:00:00 -v patient="; -// public static int jobCounter = 1; -// -// // inputs -// public static String patientId; -// public static String mixture; -// public static String normalBamFile; -// public static String tumourBamFile; -// public static String outputDir; -// -// public static String patientParams; -// public static String nodeName; -// public static String startPoint; -// -// public static void main(String[] args) throws IOException, InterruptedException, Exception { -// -// if (args.length < 5) throw new IllegalArgumentException("USAGE: RunGatk []"); -// -// patientId = args[0]; -// mixture = args[1]; -// normalBamFile = args[2]; -// tumourBamFile = args[3]; -// outputDir = args[4]; -// if (args.length == 6) { -// PATH = args[5]; -// } -// if (args.length == 7) { -// PATH = args[6]; -// } -// -// patientParams = PARAMS + patientId + ",mixture=" + mixture; -// -// String mergeParams = patientParams + ",normalBam=" + normalBamFile + ",tumourBam=" + tumourBamFile; -// -// -// String jobName = jobCounter++ + "RG_" + mixture; -// System.out.println("About to submit merge job"); -// -// Job merge = new Job(jobName, PATH + "run_gatk_merge_1.sh" + mergeParams); -//// merge.setQueue(queue); -// merge.queue(); -// String status = merge.getStatus(); -// System.out.println("1st job status: " + status); -// while ("N/A".equals(status)) { -// Thread.sleep(1500); -// String [] jobs = Job.SearchJobsByName(jobName, true); -// System.out.println("Sleeping till job status changes..." + status + ", id: " + merge.getId() + " no of jobs: " + jobs.length); -// -// for (int i = 0 ; i < jobs.length ; i++) { -// System.out.println("jobs[" + i + "] : " + jobs[i]); -// merge = Job.getJobById(jobs[i]); -// status = merge.getStatus(); -// System.out.println("job.getJobStatus: " + Job.getJobStatus(jobs[i])); -// -// } -// } -// nodeName = merge.getExecuteNode().substring(0, merge.getExecuteNode().indexOf('/')); -// -// -// -// System.out.println("About to submit clean 1 job"); -// // clean 1 -// String script = PATH + "run_gatk_clean_1.sh" + patientParams; -// Job clean1 = submitDependantJob(merge, "1", script, true); -// -// -// System.out.println("About to submit clean 2 job"); -// // clean 2 -// script = PATH + "run_gatk_clean_2.sh" + patientParams; -// Job clean2 = submitDependantJob(clean1, "1", script, true); -// -// // clean 3 -// script = PATH + "run_gatk_clean_3.sh" + patientParams; -// Job clean3 = submitDependantJob(clean2, "6", script, true); -// -//// String scriptToRun = PATH + "run_gatk_clean_4.sh" + patientParams; -// -// System.out.println("About to submit clean 4 job"); -// script = PATH + "run_gatk_clean_4.sh" + patientParams; -// Job clean4 = submitDependantJob(clean3, "1", script, true); -// -// // split -// System.out.println("About to submit split job"); -// script = PATH + "run_gatk_split.sh" + patientParams; -// Job split = submitDependantJob(clean4, "1", script, true); -// -// runMergeDelUG(split, "ND"); -// runMergeDelUG(split, "TD"); -// } -// -// private static void runMergeDelUG(Job splitJob, String type) throws IOException, InterruptedException, Exception { -// String script = PATH + "run_gatk_merge_2.sh" + patientParams + ",type=" + type; -// Job mergeJob = submitDependantJob(splitJob, "1", script, true); -// -// // delete -// script = PATH + "run_gatk_del_split_files.sh" + patientParams + ",type=" + type; -// Job deleteJob = submitDependantJob(mergeJob, "1", script, true); -// -// -// // UG -// script = PATH + "run_gatk_UG.sh" + patientParams + ",type=" + type; -// Job unifiedGenotyperJob = submitDependantJob(mergeJob, "4", script, false); -// -// } -// -// private static Job submitDependantJob(Job depJob, String ppn, String script, boolean onNode) throws IOException, InterruptedException, Exception { -// -// String jobName; -// ArrayList dependantJobs; -// String[] jobs; -// jobName = jobCounter++ + "RG_" + mixture; -// Job newJob = new Job(jobName, script); -//// Job newJob = new Job(jobName, PATH + script + patientParams + ",type=" + type); -//// newJob.setQueue(queue); -// if (onNode) { -// newJob.setExecuteNode(nodeName); -// newJob.setNodes(nodeName); -// } -// newJob.setPpn(ppn); -// dependantJobs = new ArrayList(); -// dependantJobs.add(depJob.getId() + " "); -// newJob.setAfterOK(dependantJobs); -// newJob.queue(); -// // sleep to allow job to make it to the queue -// Thread.sleep(1000); -// -// jobs = Job.SearchJobsByName(jobName, true); -// newJob = Job.getJobById(jobs[0]); -// return newJob; -// } - -} diff --git a/qmule/src/org/qcmg/qmule/RunGatk.java-- b/qmule/src/org/qcmg/qmule/RunGatk.java-- deleted file mode 100644 index b2e13458d..000000000 --- a/qmule/src/org/qcmg/qmule/RunGatk.java-- +++ /dev/null @@ -1,141 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - - -public class RunGatk { - -// public static String PATH="/panfs/home/oholmes/devel/QCMGScripts/o.holmes/gatk/pbs4java/"; -// public static final String PARAMS=" -l walltime=124:00:00 -v patient="; -// public static int jobCounter = 1; -// -// // inputs -// public static String patientId; -// public static String mixture; -// public static String normalBamFile; -// public static String tumourBamFile; -// public static String outputDir; -// -// public static String patientParams; -// public static String nodeName; -// public static String startPoint; -// -// public static void main(String[] args) throws IOException, InterruptedException, Exception { -// -// if (args.length < 5) throw new IllegalArgumentException("USAGE: RunGatk []"); -// -// patientId = args[0]; -// mixture = args[1]; -// normalBamFile = args[2]; -// tumourBamFile = args[3]; -// outputDir = args[4]; -// if (args.length == 6) { -// PATH = args[5]; -// } -// if (args.length == 7) { -// PATH = args[6]; -// } -// -// patientParams = PARAMS + patientId + ",mixture=" + mixture; -// -// String mergeParams = patientParams + ",normalBam=" + normalBamFile + ",tumourBam=" + tumourBamFile; -// -// -// String jobName = jobCounter++ + "RG_" + mixture; -// System.out.println("About to submit merge job"); -// -// Job merge = new Job(jobName, PATH + "run_gatk_merge_1.sh" + mergeParams); -//// merge.setQueue(queue); -// merge.queue(); -// String status = merge.getStatus(); -// System.out.println("1st job status: " + status); -// while ("N/A".equals(status)) { -// Thread.sleep(1500); -// String [] jobs = Job.SearchJobsByName(jobName, true); -// System.out.println("Sleeping till job status changes..." + status + ", id: " + merge.getId() + " no of jobs: " + jobs.length); -// -// for (int i = 0 ; i < jobs.length ; i++) { -// System.out.println("jobs[" + i + "] : " + jobs[i]); -// merge = Job.getJobById(jobs[i]); -// status = merge.getStatus(); -// System.out.println("job.getJobStatus: " + Job.getJobStatus(jobs[i])); -// -// } -// } -// nodeName = merge.getExecuteNode().substring(0, merge.getExecuteNode().indexOf('/')); -// -// -// -// System.out.println("About to submit clean 1 job"); -// // clean 1 -// String script = PATH + "run_gatk_clean_1.sh" + patientParams; -// Job clean1 = submitDependantJob(merge, "1", script, true); -// -// -// System.out.println("About to submit clean 2 job"); -// // clean 2 -// script = PATH + "run_gatk_clean_2.sh" + patientParams; -// Job clean2 = submitDependantJob(clean1, "1", script, true); -// -// // clean 3 -// script = PATH + "run_gatk_clean_3.sh" + patientParams; -// Job clean3 = submitDependantJob(clean2, "6", script, true); -// -//// String scriptToRun = PATH + "run_gatk_clean_4.sh" + patientParams; -// -// System.out.println("About to submit clean 4 job"); -// script = PATH + "run_gatk_clean_4.sh" + patientParams; -// Job clean4 = submitDependantJob(clean3, "1", script, true); -// -// // split -// System.out.println("About to submit split job"); -// script = PATH + "run_gatk_split.sh" + patientParams; -// Job split = submitDependantJob(clean4, "1", script, true); -// -// runMergeDelUG(split, "ND"); -// runMergeDelUG(split, "TD"); -// } -// -// private static void runMergeDelUG(Job splitJob, String type) throws IOException, InterruptedException, Exception { -// String script = PATH + "run_gatk_merge_2.sh" + patientParams + ",type=" + type; -// Job mergeJob = submitDependantJob(splitJob, "1", script, true); -// -// // delete -// script = PATH + "run_gatk_del_split_files.sh" + patientParams + ",type=" + type; -// Job deleteJob = submitDependantJob(mergeJob, "1", script, true); -// -// -// // UG -// script = PATH + "run_gatk_UG.sh" + patientParams + ",type=" + type; -// Job unifiedGenotyperJob = submitDependantJob(mergeJob, "4", script, false); -// -// } -// -// private static Job submitDependantJob(Job depJob, String ppn, String script, boolean onNode) throws IOException, InterruptedException, Exception { -// -// String jobName; -// ArrayList dependantJobs; -// String[] jobs; -// jobName = jobCounter++ + "RG_" + mixture; -// Job newJob = new Job(jobName, script); -//// Job newJob = new Job(jobName, PATH + script + patientParams + ",type=" + type); -//// newJob.setQueue(queue); -// if (onNode) { -// newJob.setExecuteNode(nodeName); -// newJob.setNodes(nodeName); -// } -// newJob.setPpn(ppn); -// dependantJobs = new ArrayList(); -// dependantJobs.add(depJob.getId() + " "); -// newJob.setAfterOK(dependantJobs); -// newJob.queue(); -// // sleep to allow job to make it to the queue -// Thread.sleep(1000); -// -// jobs = Job.SearchJobsByName(jobName, true); -// newJob = Job.getJobById(jobs[0]); -// return newJob; -// } - -} diff --git a/qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java b/qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java deleted file mode 100644 index 6730aa5ff..000000000 --- a/qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java +++ /dev/null @@ -1,368 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; - -public class SmithWatermanGotoh { - - private final float gapOpen; - private final float gapExtend; - private final int matchScore; - private final int mismatchScore; - private final String sequenceA; - private final String sequenceB; - private final int rows; - private final int columns; - private int[][] pointerMatrix; - private short[][] verticalGaps; - private short[][] horizontalGaps; - private int bestRow; - private int bestColumn; - private float bestScore; - private static final int STOP = 0; - private static final int LEFT = 1; - private static final int DIAGONAL = 2; - private static final int UP = 3; - private static final String GAP = "-"; - private static final String EMPTY = " "; - private static final String MISMATCH = "."; - private static final String MATCH = "|"; - private static final String TAB = ""; - - public SmithWatermanGotoh(File fileA, File fileB, int matchScore, int mismatchScore, float gapOpen, float gapExtend) throws IOException { - - this.sequenceA = readFastaFile(fileA); - this.sequenceB = readFastaFile(fileB); - this.gapOpen = gapOpen; - this.gapExtend = gapExtend; - this.matchScore = matchScore; - this.mismatchScore = mismatchScore; - this.rows = sequenceA.length() + 1;//i - this.columns = sequenceB.length() + 1;//j - align(); - } - - public SmithWatermanGotoh(String a, String b, int matchScore, int mismatchScore, float gapOpen, float gapExtend) { - - this.sequenceA = a; - this.sequenceB = b; - this.gapOpen = gapOpen; - this.gapExtend = gapExtend; - this.matchScore = matchScore; - this.mismatchScore = mismatchScore; - this.rows = sequenceA.length() + 1;//i - this.columns = sequenceB.length() + 1;//j - align(); - } - - private String readFastaFile(File file) throws IOException { - - try (FileInputStream inputStream = new FileInputStream(file)) { - StringBuilder buffer = new StringBuilder(); - int ch; - while ((ch = inputStream.read()) != -1) { - buffer.append((char)ch); - } - inputStream.close(); - - String seq = buffer.toString(); - - if (seq.startsWith(">")) { - int index = seq.indexOf("\n"); - return seq.substring(index, seq.length()).replace("\n", "").toUpperCase(); - } else { - return seq.replace("\n", "").toUpperCase(); - } - } - } - - private void align() { - fillMatrix(); - traceback(); -// System.out.println(getDiffs()); - } - - private void fillMatrix() { - //etutorials.org/Misc/blast/Part+II+Theory/Chapter+3.+Sequence+Alignment/3.2+Local+Alignment+Smith-Waterman/ - //Gotoh: http://www.cse.msu.edu/~cse891/Sect001/notes_alignment.pdf - //https://github.com/ekg/smithwaterman/blob/master/SmithWatermanGotoh.cpp - //http://cci.lbl.gov/cctbx_sources/mmtbx/alignment.py - // - //The dynamic programming algorithm was improved in performance by Gotoh (1982) by using the linear -// relationship for a gap weight wx = g + rx, where the weight for a gap of length x is the sum of a gap -// opening penalty (g) and a gap extension penalty (r) times the gap length (x), and by simplifying -// the dynamic programming algorithm. He reasoned that two of the terms that are maximized in the -// dynamic programming algorithm and designated here Pij and Qij depend only on the values in the -// current and previous row and column, as indicated below. - - initialize(); - - //storage for current calculations - float[] bestScores = new float[columns];//score if xi aligns to gap after yi - float[] queryGapScores = new float[columns];//best score of alignment x1..xi to y1..yi - - for (int i=0; i queryGapOpenScore) { - //add extend score - queryGapScores[column] = queryGapExtendScore; - //increase size of gap - int gapLength = verticalGaps[row-1][column] + 1; - verticalGaps[row][column] = (short) gapLength; - } else { - //add open score - queryGapScores[column] = queryGapOpenScore; - } - - //calculate horizontal gaps - referenceGapExtendScore = currentAnchorGapScore - gapExtend; - referenceGapOpenScore = bestScores[column-1] - gapOpen; - - if (referenceGapExtendScore > referenceGapOpenScore) { - //add extend score - currentAnchorGapScore = referenceGapExtendScore; - //increase size of gap - short gapLength = (short) (horizontalGaps[row][column-1] + 1); - horizontalGaps[row][column] = gapLength; - } else { - //add open score - currentAnchorGapScore = referenceGapOpenScore; - } - - //test scores - bestScoreDiagonal = bestScores[column]; - bestScores[column] = findMaximum(totalSimilarityScore, queryGapScores[column], currentAnchorGapScore); - - //determine trackback direction - float score = bestScores[column]; - if (score == 0) { - pointerMatrix[row][column] = STOP; - } else if (score == totalSimilarityScore) { - pointerMatrix[row][column] = DIAGONAL; - } else if (score == queryGapScores[column]) { - pointerMatrix[row][column] = UP; - } else { - pointerMatrix[row][column] = LEFT; - } - - //set current cell if this is the best score - if (score > bestScore) { - bestRow = row; - bestColumn = column; - bestScore = score; - } - } - } - } - - - private void initialize() { - pointerMatrix = new int[rows][columns]; - verticalGaps = new short[rows][columns]; - horizontalGaps = new short[rows][columns]; - for (int i=0; i")) { - int index = seq.indexOf("\n"); - return seq.substring(index, seq.length()).replace("\n", "").toUpperCase(); - } else { - return seq.replace("\n", "").toUpperCase(); - } - } - } - - private void align() { - fillMatrix(); - traceback(); -// System.out.println(getDiffs()); - } - - private void fillMatrix() { - //etutorials.org/Misc/blast/Part+II+Theory/Chapter+3.+Sequence+Alignment/3.2+Local+Alignment+Smith-Waterman/ - //Gotoh: http://www.cse.msu.edu/~cse891/Sect001/notes_alignment.pdf - //https://github.com/ekg/smithwaterman/blob/master/SmithWatermanGotoh.cpp - //http://cci.lbl.gov/cctbx_sources/mmtbx/alignment.py - // - //The dynamic programming algorithm was improved in performance by Gotoh (1982) by using the linear -// relationship for a gap weight wx = g + rx, where the weight for a gap of length x is the sum of a gap -// opening penalty (g) and a gap extension penalty (r) times the gap length (x), and by simplifying -// the dynamic programming algorithm. He reasoned that two of the terms that are maximized in the -// dynamic programming algorithm and designated here Pij and Qij depend only on the values in the -// current and previous row and column, as indicated below. - - initialize(); - - //storage for current calculations - float[] bestScores = new float[columns];//score if xi aligns to gap after yi - float[] queryGapScores = new float[columns];//best score of alignment x1..xi to y1..yi - - for (int i=0; i queryGapOpenScore) { - //add extend score - queryGapScores[column] = queryGapExtendScore; - //increase size of gap - int gapLength = verticalGaps[row-1][column] + 1; - verticalGaps[row][column] = (short) gapLength; - } else { - //add open score - queryGapScores[column] = queryGapOpenScore; - } - - //calculate horizontal gaps - referenceGapExtendScore = currentAnchorGapScore - gapExtend; - referenceGapOpenScore = bestScores[column-1] - gapOpen; - - if (referenceGapExtendScore > referenceGapOpenScore) { - //add extend score - currentAnchorGapScore = referenceGapExtendScore; - //increase size of gap - short gapLength = (short) (horizontalGaps[row][column-1] + 1); - horizontalGaps[row][column] = gapLength; - } else { - //add open score - currentAnchorGapScore = referenceGapOpenScore; - } - - //test scores - bestScoreDiagonal = bestScores[column]; - bestScores[column] = findMaximum(totalSimilarityScore, queryGapScores[column], currentAnchorGapScore); - - //determine trackback direction - float score = bestScores[column]; - if (score == 0) { - pointerMatrix[row][column] = STOP; - } else if (score == totalSimilarityScore) { - pointerMatrix[row][column] = DIAGONAL; - } else if (score == queryGapScores[column]) { - pointerMatrix[row][column] = UP; - } else { - pointerMatrix[row][column] = LEFT; - } - - //set current cell if this is the best score - if (score > bestScore) { - bestRow = row; - bestColumn = column; - bestScore = score; - } - } - } - } - - - private void initialize() { - pointerMatrix = new int[rows][columns]; - verticalGaps = new short[rows][columns]; - horizontalGaps = new short[rows][columns]; - for (int i=0; i chromosomes = new ArrayList(); -// private int exitStatus; -// private Map> vcfRecords = new HashMap>(); -// private Map> mafRecords = new HashMap>(); -// private Map> gffRecords = new HashMap>(); -// private Map> bedRecords = new HashMap>(); -// private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); -// private List overlappingMafRecords = new ArrayList(); -// private List notOverlappingMafRecords = new ArrayList(); -// private List overlappingVcfRecords = new ArrayList(); -// private List notOverlappingVcfRecords = new ArrayList(); -// private int overlapCount = 0; -// private int notOverlappingCount = 0; -// private int snpCount; -// private Vector header = new Vector(); -// private String inputSnpType; -// -// private static QLogger logger; -// -// public int engage() throws Exception { -// inputSnpType = null; -// if (cmdLineInputFiles[0].endsWith("maf")) { -// logger.info("MAF File: " + cmdLineInputFiles[0]); -// loadMafFile(); -// inputSnpType = "MAF"; -// if (mafRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from maf file"); -// } -// if (cmdLineInputFiles[0].endsWith("vcf")) { -// logger.info("VCF File: " + cmdLineInputFiles[0]); -// loadVCFFile(); -// inputSnpType = "VCF"; -// if (vcfRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from vcf file"); -// } -// if (cmdLineInputFiles[1].endsWith("bed")) { -// logger.info("BED File: " + cmdLineInputFiles[1]); -// } else if (cmdLineInputFiles[1].endsWith("gff3")) { -// logger.info("GFF3 File: " + cmdLineInputFiles[1]); -// } -// logger.info("Output file: " + cmdLineOutputFiles[0]); -// logger.info("Output file: " + cmdLineOutputFiles[1]); -// -// String fileType = null; -// if (cmdLineInputFiles[1].endsWith("bed")) { -// loadBedFile(); -// fileType = "bed"; -// } else if (cmdLineInputFiles[1].endsWith("gff3")) { -// fileType = "gff3"; -// loadGffFile(); -// } else { -// throw new IllegalArgumentException("File type for reference regions is not recognized. Must be bed or gff3"); -// } -// -// Collections.sort(chromosomes,COMPARATOR); -// -// writeHeader(); -// -// if (fileType.equals("bed")) { -// if (bedRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from bed file"); -// for (String c: chromosomes) { -// if (inputSnpType.equals("MAF")) { -// -// compareBedRecordsToMaf(c, bedRecords.get(c)); -// } -// if (inputSnpType.equals("VCF")) { -// compareBedRecordsToVcf(c, bedRecords.get(c)); -// } -// } -// } -// -// if (fileType.equals("gff3")) { -// if (gffRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); -// for (String c: chromosomes) { -// logger.info("Chromosome: " + c); -// if (inputSnpType.equals("MAF")) { -// compareGFFRecordsToMaf(c, gffRecords.get(c)); -// } -// if (inputSnpType.equals("VCF")) { -// compareGFFRecordsToVcf(c, gffRecords.get(c)); -// } -// } -// } -// -// logger.info("SUMMARY"); -// logger.info("Total Records in " +inputSnpType+ ": " + snpCount); -// logger.info("Total Records in supplied reference regions: " + overlapCount); -// logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); -// return exitStatus; -// } -// -// private void loadVCFFile() throws Exception { -// VCFFileReader reader = new VCFFileReader(new File(cmdLineInputFiles[0])); -// try { -// header = new Vector(); -// Iterator iterator = reader.getRecordIterator(); -// snpCount = 0; -// if (reader.getHeader() != null) { -// Iterator iter = reader.getHeader().iterator(); -// while (iter.hasNext()) { -// header.add(iter.next()); -// } -// } -// while (iterator.hasNext()) { -// -// VCFRecord vcfRec = iterator.next(); -// -// snpCount++; -// if (vcfRecords.containsKey(vcfRec.getChromosome())) { -// vcfRecords.get(vcfRec.getChromosome()).add(vcfRec); -// } else { -// List list = new ArrayList(); -// list.add(vcfRec); -// vcfRecords.put(vcfRec.getChromosome(),list); -// } -// if (!chromosomes.contains(vcfRec.getChromosome())) { -// chromosomes.add(vcfRec.getChromosome()); -// } -// } -// logger.info("loaded maf file, total records: " + snpCount); -// } finally { -// reader.close(); -// } -// } -// -// private void loadMafFile() throws Exception { -// TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); -// try { -// header = new Vector(); -// Iterator iterator = reader.getRecordIterator(); -// snpCount = 0; -// if (reader.getHeader() != null) { -// Iterator iter = reader.getHeader().iterator(); -// while (iter.hasNext()) { -// header.add(iter.next()); -// } -// } -// while (iterator.hasNext()) { -// -// TabbedRecord tab = iterator.next(); -// -// if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo")) { -// header.add(tab.getData()); -// continue; -// } -// snpCount++; -// MAFRecord mafRec = convertToMafRecord(tab.getData().split("\t")); -// mafRec.setData(tab.getData()); -// if (mafRecords.containsKey(mafRec.getChromosome())) { -// mafRecords.get(mafRec.getChromosome()).add(mafRec); -// } else { -// List list = new ArrayList(); -// list.add(mafRec); -// mafRecords.put(mafRec.getChromosome(),list); -// } -// if (!chromosomes.contains(mafRec.getChromosome())) { -// chromosomes.add(mafRec.getChromosome()); -// } -// } -// logger.info("loaded maf file, total records: " + snpCount); -// } finally { -// reader.close(); -// } -// } -// -// private void loadBedFile() throws IOException { -// BEDFileReader reader = new BEDFileReader(new File(cmdLineInputFiles[1])); -// try { -// int count = 0; -// for (BEDRecord record : reader) { -// count++; -// String chr = record.getChrom(); -// if (inputSnpType.equals("MAF")) { -// chr = record.getChrom().replace("chr", ""); -// } -// if (bedRecords.containsKey(chr)) { -// bedRecords.get(chr).put(record.getChromStart(), record); -// } else { -// TreeMap map = new TreeMap(); -// map.put(record.getChromStart(), record); -// bedRecords.put(chr,map); -// } -// } -// logger.info("loaded bed file, total record: " + count); -// } finally { -// reader.close(); -// } -// -// } -// -// private void loadGffFile() throws Exception { -// GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); -// try { -// int count = 0; -// for (GFF3Record record : reader) { -// count++; -// String chr = record.getSeqId(); -// if (inputSnpType.equals("MAF")) { -// chr = record.getSeqId().replace("chr", ""); -// } -// if (gffRecords.containsKey(chr)) { -// gffRecords.get(chr).put(record.getStart(), record); -// } else { -// TreeMap map = new TreeMap(); -// map.put(record.getStart(), record); -// gffRecords.put(chr,map); -// } -// } -// -// logger.info("loaded gff3 file, total record: " + count); -// } finally { -// reader.close(); -// } -// } -// -// public void compareBedRecordsToVcf(String chromosome, TreeMap map) throws IOException { -// List vcfList = vcfRecords.get(chromosome); -// -// //bed positions are zero based -// if (map != null) { -// -// for (VCFRecord snp : vcfList) { -// Entry floor = map.floorEntry(new Integer(snp.getPosition())); -// Entry ceiling = map.ceilingEntry(new Integer(snp.getPosition())); -// -// if (vcfRecordFallsInBEDRecord(snp, floor) || vcfRecordFallsInBEDRecord(snp, ceiling)) { -// overlapCount++; -// overlappingVcfRecords.add(snp); -// } else { -// notOverlappingCount++; -// notOverlappingVcfRecords.add(snp); -// if (notOverlappingCount % 10000 == 0) { -// logger.info("Processed records: " + notOverlappingCount); -// } -// } -// } -// } else { -// notOverlappingVcfRecords.addAll(vcfList); -// notOverlappingCount += vcfList.size(); -// } -// writeParsedVcfRecords(); -// } -// -// public void compareBedRecordsToMaf(String chromosome, TreeMap map) throws IOException { -// List mafList = mafRecords.get(chromosome); -// -// //bed positions are zero based -// if (map != null) { -// -// for (MAFRecord snp : mafList) { -// -// Entry floor = map.floorEntry(new Integer(snp.getStartPosition())); -// Entry ceiling = map.ceilingEntry(new Integer(snp.getStartPosition())); -// -// if (mafRecordFallsInBEDRecord(snp, floor) || mafRecordFallsInBEDRecord(snp, ceiling)) { -// overlapCount++; -// overlappingMafRecords.add(snp); -// } else { -// notOverlappingCount++; -// notOverlappingMafRecords.add(snp); -// if (notOverlappingCount % 10000 == 0) { -// logger.info("Processed records: " + notOverlappingCount); -// } -// } -// -// } -// } else { -// notOverlappingMafRecords.addAll(mafList); -// notOverlappingCount += mafList.size(); -// } -// writeParsedMafRecords(); -// } -// -// public void compareGFFRecordsToVcf(String chromosome, TreeMap map) throws IOException { -// List vcfList = vcfRecords.get(chromosome); -// -// if (map != null) { -// -// logger.info("List size: " + vcfList.size()); -// for (VCFRecord snp : vcfList) { -// Entry floor = map.floorEntry(new Integer(snp.getPosition())); -// Entry ceiling = map.ceilingEntry(new Integer(snp.getPosition())); -// -// if (vcfRecordFallsInGFF3Record(snp, floor) || vcfRecordFallsInGFF3Record(snp, ceiling)) { -// overlapCount++; -// overlappingVcfRecords.add(snp); -// } else { -// notOverlappingCount++; -// notOverlappingVcfRecords.add(snp); -// if (notOverlappingCount % 10000 == 0) { -// logger.info("Processed records: " + notOverlappingCount); -// } -// } -// } -// } else { -// notOverlappingVcfRecords.addAll(vcfList); -// notOverlappingCount += vcfList.size(); -// } -// writeParsedVcfRecords(); -// } -// -// public void compareGFFRecordsToMaf(String chromosome, TreeMap map) throws IOException { -// List mafList = mafRecords.get(chromosome); -// -// if (map != null) { -// -// for (MAFRecord snp : mafList) { -// -// Entry floor = map.floorEntry(new Integer(snp.getStartPosition())); -// Entry ceiling = map.ceilingEntry(new Integer(snp.getStartPosition())); -// -// if (mafRecordFallsInGFF3Record(snp, floor) || mafRecordFallsInGFF3Record(snp, ceiling)) { -// overlapCount++; -// overlappingMafRecords.add(snp); -// } else { -// notOverlappingCount++; -// notOverlappingMafRecords.add(snp); -// if (notOverlappingCount % 10000 == 0) { -// logger.info("Processed records: " + notOverlappingCount); -// } -// } -// } -// } else { -// notOverlappingMafRecords.addAll(mafList); -// notOverlappingCount += mafList.size(); -// } -// writeParsedMafRecords(); -// } -// -// -// private boolean mafRecordFallsInGFF3Record(MAFRecord snp, Entry entry) { -// if (entry != null) { -// if (snp.getStartPosition() >= entry.getValue().getStart() && snp.getStartPosition() <= entry.getValue().getEnd() || -// snp.getEndPosition() >= entry.getValue().getStart() && snp.getEndPosition() <= entry.getValue().getEnd()) { -// return true; -// } -// } -// return false; -// } -// -// private boolean mafRecordFallsInBEDRecord(MAFRecord snp, Entry entry) { -// if (entry != null) { -// if (snp.getStartPosition() >= entry.getValue().getChromStart()+1 && snp.getStartPosition() <= entry.getValue().getChromEnd() || -// snp.getEndPosition() >= entry.getValue().getChromStart()+1 && snp.getEndPosition() <= entry.getValue().getChromEnd()) { -// return true; -// } -// } -// return false; -// } -// -// private boolean vcfRecordFallsInGFF3Record(VCFRecord snp, Entry entry) { -// if (entry != null) { -// if (snp.getPosition() >= entry.getValue().getStart() && snp.getPosition() <= entry.getValue().getEnd()) { -// return true; -// } -// } -// return false; -// } -// -// private boolean vcfRecordFallsInBEDRecord(VCFRecord snp, Entry entry) { -// if (entry != null) { -// if (snp.getPosition() >= entry.getValue().getChromStart()+1 && snp.getPosition() <= entry.getValue().getChromEnd()) { -// return true; -// } -// } -// return false; -// } -// -// public String[] getCmdLineInputFiles() { -// return cmdLineInputFiles; -// } -// -// public void setCmdLineInputFiles(String[] cmdLineInputFiles) { -// this.cmdLineInputFiles = cmdLineInputFiles; -// } -// -// public String[] getCmdLineOutputFiles() { -// return cmdLineOutputFiles; -// } -// -// public void setCmdLineOutputFiles(String[] cmdLineOutputFiles) { -// this.cmdLineOutputFiles = cmdLineOutputFiles; -// } -// -// private void writeParsedMafRecords() throws IOException { -// writeMafRecordsToFile(cmdLineOutputFiles[0], overlappingMafRecords); -// writeMafRecordsToFile(cmdLineOutputFiles[1], notOverlappingMafRecords); -// } -// -// private void writeParsedVcfRecords() throws IOException { -// writeVcfRecordsToFile(cmdLineOutputFiles[0], overlappingVcfRecords); -// writeVcfRecordsToFile(cmdLineOutputFiles[1], notOverlappingVcfRecords); -// } -// -// private void writeHeader() throws IOException { -// writeHeader(cmdLineOutputFiles[0]); -// writeHeader(cmdLineOutputFiles[1]); -// } -// -// private void writeHeader(String fileName) throws IOException { -// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); -// -// for (String h: header) { -// writer.write(h + "\n"); -// } -// writer.close(); -// } -// -// private void writeMafRecordsToFile(String fileName, -// List outputRecords) throws IOException { -// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); -// -// for (MAFRecord r: outputRecords) { -// writer.write(r.getData() + "\n"); -// } -// -// writer.close(); -// outputRecords.clear(); -// } -// -// private void writeVcfRecordsToFile(String fileName, -// List outputRecords) throws IOException { -// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); -// -// for (VCFRecord r: outputRecords) { -// writer.write(r.toString() + "\n"); -// } -// -// writer.close(); -// outputRecords.clear(); -// } -// -// -// protected int setup(String args[]) throws Exception{ -// int returnStatus = 1; -// if (null == args || args.length == 0) { -// System.err.println(Messages.USAGE); -// System.exit(1); -// } -// Options options = new Options(args); -// -// if (options.hasHelpOption()) { -// System.err.println(Messages.USAGE); -// options.displayHelp(); -// returnStatus = 0; -// } else if (options.hasVersionOption()) { -// System.err.println(Messages.getVersionMessage()); -// returnStatus = 0; -// } else if (options.getInputFileNames().length < 1) { -// System.err.println(Messages.USAGE); -// } else if ( ! options.hasLogOption()) { -// System.err.println(Messages.USAGE); -// } else { -// // configure logging -// logFile = options.getLogFile(); -// logger = QLoggerFactory.getLogger(SnpToReferenceRegionFilter.class, logFile, options.getLogLevel()); -// logger.logInitialExecutionStats("SnpMafBedFileComparison", SnpToReferenceRegionFilter.class.getPackage().getImplementationVersion(), args); -// -// // get list of file names -// cmdLineInputFiles = options.getInputFileNames(); -// if (cmdLineInputFiles.length < 1) { -// throw new QMuleException("INSUFFICIENT_ARGUMENTS"); -// } else { -// // loop through supplied files - check they can be read -// for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { -// if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { -// throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); -// } -// } -// } -// -// // check supplied output files can be written to -// if (null != options.getOutputFileNames()) { -// cmdLineOutputFiles = options.getOutputFileNames(); -// for (String outputFile : cmdLineOutputFiles) { -// if ( ! FileUtils.canFileBeWrittenTo(outputFile)) -// throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); -// } -// } -// return engage(); -// } -// return returnStatus; -// } -// -// -// public static void main(String[] args) throws Exception { -// SnpToReferenceRegionFilter sp = new SnpToReferenceRegionFilter(); -// int exitStatus = sp.setup(args); -// if (null != logger) -// logger.logFinalExecutionStats(exitStatus); -// -// System.exit(exitStatus); -// } -// -// public static MAFRecord convertToMafRecord(String[] params) { -// MAFRecord maf = new MAFRecord(); -//// System.out.println(params[0]); -//// maf.setHugoSymbol(params[0]); -//// maf.setEntrezGeneId(params[1]); -//// maf.setCenter(params[2]); -//// maf.setNcbiBuild(Integer.parseInt(params[3])); -// maf.setChromosome(params[0]); -// maf.setStartPosition(Integer.parseInt(params[1])); -// maf.setEndPosition(Integer.parseInt(params[1])); -//// maf.setStrand(params[7].charAt(0)); -//// maf.setVariantClassification(params[8]); -//// maf.setVariantType(params[9]); -//// maf.setRef(params[10]); -//// maf.setTumourAllele1(params[11]); -//// maf.setTumourAllele2(params[12]); -//// maf.setDbSnpId(params[13]); -//// maf.setDbSnpValStatus(params[14]); -//// maf.setTumourSampleBarcode(params[15]); -//// maf.setNormalSampleBarcode(params[16]); -//// maf.setNormalAllele1(params[17]); -//// maf.setNormalAllele2(params[18]); -//// maf.setTumourValidationAllele1(params[19]); -//// maf.setTumourValidationAllele2(params[20]); -//// maf.setNormalValidationAllele1(params[21]); -//// maf.setNormalValidationAllele2(params[22]); -//// maf.setVerificationStatus(params[23]); -//// maf.setValidationStatus(params[24]); -//// maf.setMutationStatus(params[25]); -//// maf.setSequencingPhase(params[26]); -//// maf.setSequencingSource(params[27]); -//// maf.setValidationMethod(params[28]); -//// maf.setScore(params[29]); -//// maf.setBamFile(params[30]); -//// maf.setSequencer(params[31]); -//// // QCMG -//// if (params.length > 32) -//// maf.setFlag(params[32]); -//// if (params.length > 33) -//// maf.setNd(params[33]); -//// if (params.length > 34) -//// maf.setTd(params[34]); -//// if (params.length > 35) -//// maf.setCanonicalTranscriptId(params[35]); -//// if (params.length > 36) -//// maf.setCanonicalAAChange(params[36]); -//// if (params.length > 37) -//// maf.setCanonicalBaseChange(params[37]); -//// if (params.length > 38) -//// maf.setAlternateTranscriptId(params[38]); -//// if (params.length > 39) -//// maf.setAlternateAAChange(params[39]); -//// if (params.length > 40) -//// maf.setAlternateBaseChange(params[40]); -// -// return maf; -// } -// -// public List getChromosomes() { -// return chromosomes; -// } -// -// public void setChromosomes(List chromosomes) { -// this.chromosomes = chromosomes; -// } -// -// public Map> getMafRecords() { -// return mafRecords; -// } -// -// public void setMafRecords(Map> mafRecords) { -// this.mafRecords = mafRecords; -// } -// -// public List getOverlappingRecords() { -// return overlappingMafRecords; -// } -// -// public void setOverlappingRecords(List overlappingRecords) { -// this.overlappingMafRecords = overlappingRecords; -// } -// -// public List getNotOverlappingRecords() { -// return notOverlappingMafRecords; -// } -// -// public void setNotOverlappingRecords(List notOverlappingRecords) { -// this.notOverlappingMafRecords = notOverlappingRecords; -// } -// -// public int getOverlapCount() { -// return overlapCount; -// } -// -// public void setOverlapCount(int overlapCount) { -// this.overlapCount = overlapCount; -// } -// -// public int getNotOverlappingCount() { -// return notOverlappingCount; -// } -// -// public void setNotOverlappingCount(int notOverlappingCount) { -// this.notOverlappingCount = notOverlappingCount; -// } -// -// public int getMafCount() { -// return snpCount; -// } -// -// public void setMafCount(int mafCount) { -// this.snpCount = mafCount; -// } -// -// -// -//} diff --git a/qmule/src/org/qcmg/qmule/SnpToReferenceRegionFilter.java-- b/qmule/src/org/qcmg/qmule/SnpToReferenceRegionFilter.java-- deleted file mode 100644 index c0a138f29..000000000 --- a/qmule/src/org/qcmg/qmule/SnpToReferenceRegionFilter.java-- +++ /dev/null @@ -1,647 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -//package org.qcmg.qmule; -// -//import java.io.BufferedWriter; -//import java.io.File; -//import java.io.FileWriter; -//import java.io.IOException; -//import java.util.ArrayList; -//import java.util.Collections; -//import java.util.HashMap; -//import java.util.Iterator; -//import java.util.List; -//import java.util.Map; -//import java.util.Map.Entry; -//import java.util.TreeMap; -//import java.util.Vector; -// -//import org.qcmg.bed.BEDFileReader; -//import org.qcmg.bed.BEDRecord; -//import org.qcmg.common.log.QLogger; -//import org.qcmg.common.log.QLoggerFactory; -//import org.qcmg.common.model.ReferenceNameComparator; -//import org.qcmg.common.model.VCFRecord; -//import org.qcmg.common.util.FileUtils; -//import org.qcmg.qmule.gff3.GFF3FileReader; -//import org.qcmg.qmule.gff3.GFF3Record; -//import org.qcmg.maf.MAFRecord; -//import org.qcmg.qmule.tab.TabbedFileReader; -//import org.qcmg.qmule.tab.TabbedRecord; -//import org.qcmg.vcf.VCFFileReader; -// -//public class SnpToReferenceRegionFilter { -// -// private String logFile; -// private String[] cmdLineInputFiles; -// private String[] cmdLineOutputFiles; -// private List chromosomes = new ArrayList(); -// private int exitStatus; -// private Map> vcfRecords = new HashMap>(); -// private Map> mafRecords = new HashMap>(); -// private Map> gffRecords = new HashMap>(); -// private Map> bedRecords = new HashMap>(); -// private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); -// private List overlappingMafRecords = new ArrayList(); -// private List notOverlappingMafRecords = new ArrayList(); -// private List overlappingVcfRecords = new ArrayList(); -// private List notOverlappingVcfRecords = new ArrayList(); -// private int overlapCount = 0; -// private int notOverlappingCount = 0; -// private int snpCount; -// private Vector header = new Vector(); -// private String inputSnpType; -// -// private static QLogger logger; -// -// public int engage() throws Exception { -// inputSnpType = null; -// if (cmdLineInputFiles[0].endsWith("maf")) { -// logger.info("MAF File: " + cmdLineInputFiles[0]); -// loadMafFile(); -// inputSnpType = "MAF"; -// if (mafRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from maf file"); -// } -// if (cmdLineInputFiles[0].endsWith("vcf")) { -// logger.info("VCF File: " + cmdLineInputFiles[0]); -// loadVCFFile(); -// inputSnpType = "VCF"; -// if (vcfRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from vcf file"); -// } -// if (cmdLineInputFiles[1].endsWith("bed")) { -// logger.info("BED File: " + cmdLineInputFiles[1]); -// } else if (cmdLineInputFiles[1].endsWith("gff3")) { -// logger.info("GFF3 File: " + cmdLineInputFiles[1]); -// } -// logger.info("Output file: " + cmdLineOutputFiles[0]); -// logger.info("Output file: " + cmdLineOutputFiles[1]); -// -// String fileType = null; -// if (cmdLineInputFiles[1].endsWith("bed")) { -// loadBedFile(); -// fileType = "bed"; -// } else if (cmdLineInputFiles[1].endsWith("gff3")) { -// fileType = "gff3"; -// loadGffFile(); -// } else { -// throw new IllegalArgumentException("File type for reference regions is not recognized. Must be bed or gff3"); -// } -// -// Collections.sort(chromosomes,COMPARATOR); -// -// writeHeader(); -// -// if (fileType.equals("bed")) { -// if (bedRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from bed file"); -// for (String c: chromosomes) { -// if (inputSnpType.equals("MAF")) { -// -// compareBedRecordsToMaf(c, bedRecords.get(c)); -// } -// if (inputSnpType.equals("VCF")) { -// compareBedRecordsToVcf(c, bedRecords.get(c)); -// } -// } -// } -// -// if (fileType.equals("gff3")) { -// if (gffRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); -// for (String c: chromosomes) { -// logger.info("Chromosome: " + c); -// if (inputSnpType.equals("MAF")) { -// compareGFFRecordsToMaf(c, gffRecords.get(c)); -// } -// if (inputSnpType.equals("VCF")) { -// compareGFFRecordsToVcf(c, gffRecords.get(c)); -// } -// } -// } -// -// logger.info("SUMMARY"); -// logger.info("Total Records in " +inputSnpType+ ": " + snpCount); -// logger.info("Total Records in supplied reference regions: " + overlapCount); -// logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); -// return exitStatus; -// } -// -// private void loadVCFFile() throws Exception { -// VCFFileReader reader = new VCFFileReader(new File(cmdLineInputFiles[0])); -// try { -// header = new Vector(); -// Iterator iterator = reader.getRecordIterator(); -// snpCount = 0; -// if (reader.getHeader() != null) { -// Iterator iter = reader.getHeader().iterator(); -// while (iter.hasNext()) { -// header.add(iter.next()); -// } -// } -// while (iterator.hasNext()) { -// -// VCFRecord vcfRec = iterator.next(); -// -// snpCount++; -// if (vcfRecords.containsKey(vcfRec.getChromosome())) { -// vcfRecords.get(vcfRec.getChromosome()).add(vcfRec); -// } else { -// List list = new ArrayList(); -// list.add(vcfRec); -// vcfRecords.put(vcfRec.getChromosome(),list); -// } -// if (!chromosomes.contains(vcfRec.getChromosome())) { -// chromosomes.add(vcfRec.getChromosome()); -// } -// } -// logger.info("loaded maf file, total records: " + snpCount); -// } finally { -// reader.close(); -// } -// } -// -// private void loadMafFile() throws Exception { -// TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); -// try { -// header = new Vector(); -// Iterator iterator = reader.getRecordIterator(); -// snpCount = 0; -// if (reader.getHeader() != null) { -// Iterator iter = reader.getHeader().iterator(); -// while (iter.hasNext()) { -// header.add(iter.next()); -// } -// } -// while (iterator.hasNext()) { -// -// TabbedRecord tab = iterator.next(); -// -// if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo")) { -// header.add(tab.getData()); -// continue; -// } -// snpCount++; -// MAFRecord mafRec = convertToMafRecord(tab.getData().split("\t")); -// mafRec.setData(tab.getData()); -// if (mafRecords.containsKey(mafRec.getChromosome())) { -// mafRecords.get(mafRec.getChromosome()).add(mafRec); -// } else { -// List list = new ArrayList(); -// list.add(mafRec); -// mafRecords.put(mafRec.getChromosome(),list); -// } -// if (!chromosomes.contains(mafRec.getChromosome())) { -// chromosomes.add(mafRec.getChromosome()); -// } -// } -// logger.info("loaded maf file, total records: " + snpCount); -// } finally { -// reader.close(); -// } -// } -// -// private void loadBedFile() throws IOException { -// BEDFileReader reader = new BEDFileReader(new File(cmdLineInputFiles[1])); -// try { -// int count = 0; -// for (BEDRecord record : reader) { -// count++; -// String chr = record.getChrom(); -// if (inputSnpType.equals("MAF")) { -// chr = record.getChrom().replace("chr", ""); -// } -// if (bedRecords.containsKey(chr)) { -// bedRecords.get(chr).put(record.getChromStart(), record); -// } else { -// TreeMap map = new TreeMap(); -// map.put(record.getChromStart(), record); -// bedRecords.put(chr,map); -// } -// } -// logger.info("loaded bed file, total record: " + count); -// } finally { -// reader.close(); -// } -// -// } -// -// private void loadGffFile() throws Exception { -// GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); -// try { -// int count = 0; -// for (GFF3Record record : reader) { -// count++; -// String chr = record.getSeqId(); -// if (inputSnpType.equals("MAF")) { -// chr = record.getSeqId().replace("chr", ""); -// } -// if (gffRecords.containsKey(chr)) { -// gffRecords.get(chr).put(record.getStart(), record); -// } else { -// TreeMap map = new TreeMap(); -// map.put(record.getStart(), record); -// gffRecords.put(chr,map); -// } -// } -// -// logger.info("loaded gff3 file, total record: " + count); -// } finally { -// reader.close(); -// } -// } -// -// public void compareBedRecordsToVcf(String chromosome, TreeMap map) throws IOException { -// List vcfList = vcfRecords.get(chromosome); -// -// //bed positions are zero based -// if (map != null) { -// -// for (VCFRecord snp : vcfList) { -// Entry floor = map.floorEntry(new Integer(snp.getPosition())); -// Entry ceiling = map.ceilingEntry(new Integer(snp.getPosition())); -// -// if (vcfRecordFallsInBEDRecord(snp, floor) || vcfRecordFallsInBEDRecord(snp, ceiling)) { -// overlapCount++; -// overlappingVcfRecords.add(snp); -// } else { -// notOverlappingCount++; -// notOverlappingVcfRecords.add(snp); -// if (notOverlappingCount % 10000 == 0) { -// logger.info("Processed records: " + notOverlappingCount); -// } -// } -// } -// } else { -// notOverlappingVcfRecords.addAll(vcfList); -// notOverlappingCount += vcfList.size(); -// } -// writeParsedVcfRecords(); -// } -// -// public void compareBedRecordsToMaf(String chromosome, TreeMap map) throws IOException { -// List mafList = mafRecords.get(chromosome); -// -// //bed positions are zero based -// if (map != null) { -// -// for (MAFRecord snp : mafList) { -// -// Entry floor = map.floorEntry(new Integer(snp.getStartPosition())); -// Entry ceiling = map.ceilingEntry(new Integer(snp.getStartPosition())); -// -// if (mafRecordFallsInBEDRecord(snp, floor) || mafRecordFallsInBEDRecord(snp, ceiling)) { -// overlapCount++; -// overlappingMafRecords.add(snp); -// } else { -// notOverlappingCount++; -// notOverlappingMafRecords.add(snp); -// if (notOverlappingCount % 10000 == 0) { -// logger.info("Processed records: " + notOverlappingCount); -// } -// } -// -// } -// } else { -// notOverlappingMafRecords.addAll(mafList); -// notOverlappingCount += mafList.size(); -// } -// writeParsedMafRecords(); -// } -// -// public void compareGFFRecordsToVcf(String chromosome, TreeMap map) throws IOException { -// List vcfList = vcfRecords.get(chromosome); -// -// if (map != null) { -// -// logger.info("List size: " + vcfList.size()); -// for (VCFRecord snp : vcfList) { -// Entry floor = map.floorEntry(new Integer(snp.getPosition())); -// Entry ceiling = map.ceilingEntry(new Integer(snp.getPosition())); -// -// if (vcfRecordFallsInGFF3Record(snp, floor) || vcfRecordFallsInGFF3Record(snp, ceiling)) { -// overlapCount++; -// overlappingVcfRecords.add(snp); -// } else { -// notOverlappingCount++; -// notOverlappingVcfRecords.add(snp); -// if (notOverlappingCount % 10000 == 0) { -// logger.info("Processed records: " + notOverlappingCount); -// } -// } -// } -// } else { -// notOverlappingVcfRecords.addAll(vcfList); -// notOverlappingCount += vcfList.size(); -// } -// writeParsedVcfRecords(); -// } -// -// public void compareGFFRecordsToMaf(String chromosome, TreeMap map) throws IOException { -// List mafList = mafRecords.get(chromosome); -// -// if (map != null) { -// -// for (MAFRecord snp : mafList) { -// -// Entry floor = map.floorEntry(new Integer(snp.getStartPosition())); -// Entry ceiling = map.ceilingEntry(new Integer(snp.getStartPosition())); -// -// if (mafRecordFallsInGFF3Record(snp, floor) || mafRecordFallsInGFF3Record(snp, ceiling)) { -// overlapCount++; -// overlappingMafRecords.add(snp); -// } else { -// notOverlappingCount++; -// notOverlappingMafRecords.add(snp); -// if (notOverlappingCount % 10000 == 0) { -// logger.info("Processed records: " + notOverlappingCount); -// } -// } -// } -// } else { -// notOverlappingMafRecords.addAll(mafList); -// notOverlappingCount += mafList.size(); -// } -// writeParsedMafRecords(); -// } -// -// -// private boolean mafRecordFallsInGFF3Record(MAFRecord snp, Entry entry) { -// if (entry != null) { -// if (snp.getStartPosition() >= entry.getValue().getStart() && snp.getStartPosition() <= entry.getValue().getEnd() || -// snp.getEndPosition() >= entry.getValue().getStart() && snp.getEndPosition() <= entry.getValue().getEnd()) { -// return true; -// } -// } -// return false; -// } -// -// private boolean mafRecordFallsInBEDRecord(MAFRecord snp, Entry entry) { -// if (entry != null) { -// if (snp.getStartPosition() >= entry.getValue().getChromStart()+1 && snp.getStartPosition() <= entry.getValue().getChromEnd() || -// snp.getEndPosition() >= entry.getValue().getChromStart()+1 && snp.getEndPosition() <= entry.getValue().getChromEnd()) { -// return true; -// } -// } -// return false; -// } -// -// private boolean vcfRecordFallsInGFF3Record(VCFRecord snp, Entry entry) { -// if (entry != null) { -// if (snp.getPosition() >= entry.getValue().getStart() && snp.getPosition() <= entry.getValue().getEnd()) { -// return true; -// } -// } -// return false; -// } -// -// private boolean vcfRecordFallsInBEDRecord(VCFRecord snp, Entry entry) { -// if (entry != null) { -// if (snp.getPosition() >= entry.getValue().getChromStart()+1 && snp.getPosition() <= entry.getValue().getChromEnd()) { -// return true; -// } -// } -// return false; -// } -// -// public String[] getCmdLineInputFiles() { -// return cmdLineInputFiles; -// } -// -// public void setCmdLineInputFiles(String[] cmdLineInputFiles) { -// this.cmdLineInputFiles = cmdLineInputFiles; -// } -// -// public String[] getCmdLineOutputFiles() { -// return cmdLineOutputFiles; -// } -// -// public void setCmdLineOutputFiles(String[] cmdLineOutputFiles) { -// this.cmdLineOutputFiles = cmdLineOutputFiles; -// } -// -// private void writeParsedMafRecords() throws IOException { -// writeMafRecordsToFile(cmdLineOutputFiles[0], overlappingMafRecords); -// writeMafRecordsToFile(cmdLineOutputFiles[1], notOverlappingMafRecords); -// } -// -// private void writeParsedVcfRecords() throws IOException { -// writeVcfRecordsToFile(cmdLineOutputFiles[0], overlappingVcfRecords); -// writeVcfRecordsToFile(cmdLineOutputFiles[1], notOverlappingVcfRecords); -// } -// -// private void writeHeader() throws IOException { -// writeHeader(cmdLineOutputFiles[0]); -// writeHeader(cmdLineOutputFiles[1]); -// } -// -// private void writeHeader(String fileName) throws IOException { -// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); -// -// for (String h: header) { -// writer.write(h + "\n"); -// } -// writer.close(); -// } -// -// private void writeMafRecordsToFile(String fileName, -// List outputRecords) throws IOException { -// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); -// -// for (MAFRecord r: outputRecords) { -// writer.write(r.getData() + "\n"); -// } -// -// writer.close(); -// outputRecords.clear(); -// } -// -// private void writeVcfRecordsToFile(String fileName, -// List outputRecords) throws IOException { -// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); -// -// for (VCFRecord r: outputRecords) { -// writer.write(r.toString() + "\n"); -// } -// -// writer.close(); -// outputRecords.clear(); -// } -// -// -// protected int setup(String args[]) throws Exception{ -// int returnStatus = 1; -// if (null == args || args.length == 0) { -// System.err.println(Messages.USAGE); -// System.exit(1); -// } -// Options options = new Options(args); -// -// if (options.hasHelpOption()) { -// System.err.println(Messages.USAGE); -// options.displayHelp(); -// returnStatus = 0; -// } else if (options.hasVersionOption()) { -// System.err.println(Messages.getVersionMessage()); -// returnStatus = 0; -// } else if (options.getInputFileNames().length < 1) { -// System.err.println(Messages.USAGE); -// } else if ( ! options.hasLogOption()) { -// System.err.println(Messages.USAGE); -// } else { -// // configure logging -// logFile = options.getLogFile(); -// logger = QLoggerFactory.getLogger(SnpToReferenceRegionFilter.class, logFile, options.getLogLevel()); -// logger.logInitialExecutionStats("SnpMafBedFileComparison", SnpToReferenceRegionFilter.class.getPackage().getImplementationVersion(), args); -// -// // get list of file names -// cmdLineInputFiles = options.getInputFileNames(); -// if (cmdLineInputFiles.length < 1) { -// throw new QMuleException("INSUFFICIENT_ARGUMENTS"); -// } else { -// // loop through supplied files - check they can be read -// for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { -// if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { -// throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); -// } -// } -// } -// -// // check supplied output files can be written to -// if (null != options.getOutputFileNames()) { -// cmdLineOutputFiles = options.getOutputFileNames(); -// for (String outputFile : cmdLineOutputFiles) { -// if ( ! FileUtils.canFileBeWrittenTo(outputFile)) -// throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); -// } -// } -// return engage(); -// } -// return returnStatus; -// } -// -// -// public static void main(String[] args) throws Exception { -// SnpToReferenceRegionFilter sp = new SnpToReferenceRegionFilter(); -// int exitStatus = sp.setup(args); -// if (null != logger) -// logger.logFinalExecutionStats(exitStatus); -// -// System.exit(exitStatus); -// } -// -// public static MAFRecord convertToMafRecord(String[] params) { -// MAFRecord maf = new MAFRecord(); -//// System.out.println(params[0]); -//// maf.setHugoSymbol(params[0]); -//// maf.setEntrezGeneId(params[1]); -//// maf.setCenter(params[2]); -//// maf.setNcbiBuild(Integer.parseInt(params[3])); -// maf.setChromosome(params[0]); -// maf.setStartPosition(Integer.parseInt(params[1])); -// maf.setEndPosition(Integer.parseInt(params[1])); -//// maf.setStrand(params[7].charAt(0)); -//// maf.setVariantClassification(params[8]); -//// maf.setVariantType(params[9]); -//// maf.setRef(params[10]); -//// maf.setTumourAllele1(params[11]); -//// maf.setTumourAllele2(params[12]); -//// maf.setDbSnpId(params[13]); -//// maf.setDbSnpValStatus(params[14]); -//// maf.setTumourSampleBarcode(params[15]); -//// maf.setNormalSampleBarcode(params[16]); -//// maf.setNormalAllele1(params[17]); -//// maf.setNormalAllele2(params[18]); -//// maf.setTumourValidationAllele1(params[19]); -//// maf.setTumourValidationAllele2(params[20]); -//// maf.setNormalValidationAllele1(params[21]); -//// maf.setNormalValidationAllele2(params[22]); -//// maf.setVerificationStatus(params[23]); -//// maf.setValidationStatus(params[24]); -//// maf.setMutationStatus(params[25]); -//// maf.setSequencingPhase(params[26]); -//// maf.setSequencingSource(params[27]); -//// maf.setValidationMethod(params[28]); -//// maf.setScore(params[29]); -//// maf.setBamFile(params[30]); -//// maf.setSequencer(params[31]); -//// // QCMG -//// if (params.length > 32) -//// maf.setFlag(params[32]); -//// if (params.length > 33) -//// maf.setNd(params[33]); -//// if (params.length > 34) -//// maf.setTd(params[34]); -//// if (params.length > 35) -//// maf.setCanonicalTranscriptId(params[35]); -//// if (params.length > 36) -//// maf.setCanonicalAAChange(params[36]); -//// if (params.length > 37) -//// maf.setCanonicalBaseChange(params[37]); -//// if (params.length > 38) -//// maf.setAlternateTranscriptId(params[38]); -//// if (params.length > 39) -//// maf.setAlternateAAChange(params[39]); -//// if (params.length > 40) -//// maf.setAlternateBaseChange(params[40]); -// -// return maf; -// } -// -// public List getChromosomes() { -// return chromosomes; -// } -// -// public void setChromosomes(List chromosomes) { -// this.chromosomes = chromosomes; -// } -// -// public Map> getMafRecords() { -// return mafRecords; -// } -// -// public void setMafRecords(Map> mafRecords) { -// this.mafRecords = mafRecords; -// } -// -// public List getOverlappingRecords() { -// return overlappingMafRecords; -// } -// -// public void setOverlappingRecords(List overlappingRecords) { -// this.overlappingMafRecords = overlappingRecords; -// } -// -// public List getNotOverlappingRecords() { -// return notOverlappingMafRecords; -// } -// -// public void setNotOverlappingRecords(List notOverlappingRecords) { -// this.notOverlappingMafRecords = notOverlappingRecords; -// } -// -// public int getOverlapCount() { -// return overlapCount; -// } -// -// public void setOverlapCount(int overlapCount) { -// this.overlapCount = overlapCount; -// } -// -// public int getNotOverlappingCount() { -// return notOverlappingCount; -// } -// -// public void setNotOverlappingCount(int notOverlappingCount) { -// this.notOverlappingCount = notOverlappingCount; -// } -// -// public int getMafCount() { -// return snpCount; -// } -// -// public void setMafCount(int mafCount) { -// this.snpCount = mafCount; -// } -// -// -// -//} diff --git a/qmule/src/org/qcmg/qmule/SubSample.java b/qmule/src/org/qcmg/qmule/SubSample.java deleted file mode 100644 index 63f71a737..000000000 --- a/qmule/src/org/qcmg/qmule/SubSample.java +++ /dev/null @@ -1,165 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.ArrayList; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.HeaderUtils; -import org.qcmg.picard.SAMFileReaderFactory; - -import htsjdk.samtools.*; - -public class SubSample { - SamReader reader; - SAMFileWriter writer; - double proportion; - QLogger logger; - - SubSample(Options op, QLogger log) throws Exception{ - - proportion = op.getPROPORTION(); - logger = log; - - String[] inputs =op.getInputFileNames(); - String[] outputs =op.getOutputFileNames(); - if(inputs.length == 0 || outputs.length == 0) - throw new Exception("please specify input/output"); - - //get initialized logger - File input = new File(inputs[0]); - File output = new File(outputs[0]); - if(!input.canRead()) - throw new Exception("unreadable input: " + input.getAbsolutePath()); - - reader = SAMFileReaderFactory.createSAMFileReader(input,ValidationStringency.LENIENT); - SAMFileHeader header = reader.getFileHeader(); - if(header.getSortOrder() != SAMFileHeader.SortOrder.queryname){ - throw new Exception("the input BAM is not sorted by queryname"); - } - SAMFileWriterFactory writeFactory = new SAMFileWriterFactory(); - HeaderUtils.addProgramRecord(header, op.getCommandLine(), null ); - - writer = writeFactory.makeSAMOrBAMWriter(header, false, output ); - - - } - - void run() throws Exception{ - int numPair = 0; - int numSingle = 0; - int numtotal = 0; - SAMRecordIterator ie = reader.iterator(); - ArrayList adjacents = new ArrayList(); - adjacents.add(ie.next()); - - while(ie.hasNext()){ - numtotal ++; - SAMRecord record = ie.next(); - - //select reads - if(! record.getReadName().equals(adjacents.get(0).getReadName())){ - //select pairs - if(adjacents.size() > 1) - numPair += selectPair( adjacents); - //select single - else if(Math.random() < proportion ){ - writer.addAlignment(adjacents.get(0)); - numSingle ++; - } - //after reporting clear the arraylist - adjacents.clear(); - } - adjacents.add(record); - - } - - //select last records - if(adjacents.size() > 1) - selectPair( adjacents); - else if(Math.random() < proportion ) - writer.addAlignment(adjacents.get(0)); - - reader.close(); - writer.close(); - - logger.info("total reads in input is " + numtotal); - logger.info("select paired reads is " + numPair); - logger.info("select single reads is " + numSingle); - logger.info("the rate of selected reads is "+ ((double)(numPair + numSingle)) / numtotal); - - } - - private int selectPair(ArrayList pairs) { - - if(pairs.size() == 0 ){ - logger.error("Program Error: select reads from empty arraylist! "); - return 0; - } - if(pairs.size() == 1 ){ - logger.error("program Error: single read in paired arraylist -- " + pairs.get(0).getReadName()); - return 0; - } - - int num = 0; - while(pairs.size() >= 2){ - //seek pair one by one - SAMRecord first = pairs.get(0); - SAMRecord mate = null; - pairs.remove(first); - - for(int i = 0; i < pairs.size(); i ++){ - if(first.getReadGroup().getId().equals(pairs.get(i).getReadGroup().getId())){ - mate = pairs.get(i); - pairs.remove(mate); - break; - } - } - - - if(Math.random() < proportion ){ - num ++; //number of selected paired reads - writer.addAlignment(first); - if(mate != null){ - num ++; - writer.addAlignment(mate); - }else{ - logger.error("paired reads missing mate -- " + pairs.get(0).getReadName()); - } - } - } - - return num; - } - - public static void main(String[] args) throws Exception{ - Options op = new Options(SubSample.class, args); - if(op.hasHelpOption()){ - System.out.println(Messages.getMessage("USAGE_SUBSAMPLE")); - op.displayHelp(); - System.exit(0); - } - - String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - QLogger logger = QLoggerFactory.getLogger(SubSample.class, op.getLogFile(), op.getLogLevel()); - try{ - logger.logInitialExecutionStats(SubSample.class.toString(), version, args); - logger.exec("Porportion " + op.getPROPORTION()); - SubSample mySample = new SubSample(op, logger); - mySample.run(); - logger.logFinalExecutionStats(0); - System.exit(0); - }catch(Exception e){ - System.err.println( e.getMessage() + e.toString()); - logger.logFinalExecutionStats(-1); - System.exit(1); - } - } - -} diff --git a/qmule/src/org/qcmg/qmule/SubSample.java-- b/qmule/src/org/qcmg/qmule/SubSample.java-- deleted file mode 100644 index 63f71a737..000000000 --- a/qmule/src/org/qcmg/qmule/SubSample.java-- +++ /dev/null @@ -1,165 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.ArrayList; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.HeaderUtils; -import org.qcmg.picard.SAMFileReaderFactory; - -import htsjdk.samtools.*; - -public class SubSample { - SamReader reader; - SAMFileWriter writer; - double proportion; - QLogger logger; - - SubSample(Options op, QLogger log) throws Exception{ - - proportion = op.getPROPORTION(); - logger = log; - - String[] inputs =op.getInputFileNames(); - String[] outputs =op.getOutputFileNames(); - if(inputs.length == 0 || outputs.length == 0) - throw new Exception("please specify input/output"); - - //get initialized logger - File input = new File(inputs[0]); - File output = new File(outputs[0]); - if(!input.canRead()) - throw new Exception("unreadable input: " + input.getAbsolutePath()); - - reader = SAMFileReaderFactory.createSAMFileReader(input,ValidationStringency.LENIENT); - SAMFileHeader header = reader.getFileHeader(); - if(header.getSortOrder() != SAMFileHeader.SortOrder.queryname){ - throw new Exception("the input BAM is not sorted by queryname"); - } - SAMFileWriterFactory writeFactory = new SAMFileWriterFactory(); - HeaderUtils.addProgramRecord(header, op.getCommandLine(), null ); - - writer = writeFactory.makeSAMOrBAMWriter(header, false, output ); - - - } - - void run() throws Exception{ - int numPair = 0; - int numSingle = 0; - int numtotal = 0; - SAMRecordIterator ie = reader.iterator(); - ArrayList adjacents = new ArrayList(); - adjacents.add(ie.next()); - - while(ie.hasNext()){ - numtotal ++; - SAMRecord record = ie.next(); - - //select reads - if(! record.getReadName().equals(adjacents.get(0).getReadName())){ - //select pairs - if(adjacents.size() > 1) - numPair += selectPair( adjacents); - //select single - else if(Math.random() < proportion ){ - writer.addAlignment(adjacents.get(0)); - numSingle ++; - } - //after reporting clear the arraylist - adjacents.clear(); - } - adjacents.add(record); - - } - - //select last records - if(adjacents.size() > 1) - selectPair( adjacents); - else if(Math.random() < proportion ) - writer.addAlignment(adjacents.get(0)); - - reader.close(); - writer.close(); - - logger.info("total reads in input is " + numtotal); - logger.info("select paired reads is " + numPair); - logger.info("select single reads is " + numSingle); - logger.info("the rate of selected reads is "+ ((double)(numPair + numSingle)) / numtotal); - - } - - private int selectPair(ArrayList pairs) { - - if(pairs.size() == 0 ){ - logger.error("Program Error: select reads from empty arraylist! "); - return 0; - } - if(pairs.size() == 1 ){ - logger.error("program Error: single read in paired arraylist -- " + pairs.get(0).getReadName()); - return 0; - } - - int num = 0; - while(pairs.size() >= 2){ - //seek pair one by one - SAMRecord first = pairs.get(0); - SAMRecord mate = null; - pairs.remove(first); - - for(int i = 0; i < pairs.size(); i ++){ - if(first.getReadGroup().getId().equals(pairs.get(i).getReadGroup().getId())){ - mate = pairs.get(i); - pairs.remove(mate); - break; - } - } - - - if(Math.random() < proportion ){ - num ++; //number of selected paired reads - writer.addAlignment(first); - if(mate != null){ - num ++; - writer.addAlignment(mate); - }else{ - logger.error("paired reads missing mate -- " + pairs.get(0).getReadName()); - } - } - } - - return num; - } - - public static void main(String[] args) throws Exception{ - Options op = new Options(SubSample.class, args); - if(op.hasHelpOption()){ - System.out.println(Messages.getMessage("USAGE_SUBSAMPLE")); - op.displayHelp(); - System.exit(0); - } - - String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - QLogger logger = QLoggerFactory.getLogger(SubSample.class, op.getLogFile(), op.getLogLevel()); - try{ - logger.logInitialExecutionStats(SubSample.class.toString(), version, args); - logger.exec("Porportion " + op.getPROPORTION()); - SubSample mySample = new SubSample(op, logger); - mySample.run(); - logger.logFinalExecutionStats(0); - System.exit(0); - }catch(Exception e){ - System.err.println( e.getMessage() + e.toString()); - logger.logFinalExecutionStats(-1); - System.exit(1); - } - } - -} diff --git a/qmule/src/org/qcmg/qmule/TestFileFinder.java b/qmule/src/org/qcmg/qmule/TestFileFinder.java deleted file mode 100644 index 28da0aa08..000000000 --- a/qmule/src/org/qcmg/qmule/TestFileFinder.java +++ /dev/null @@ -1,23 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; - -import org.qcmg.common.util.FileUtils; - -public class TestFileFinder { - public static void main(String[] args) { - File [] files = FileUtils.findDirectories(args[0], "seq_final", true); - System.out.println("no of files: " + files.length); - for (File f : files) { - System.out.println("file found: " + f.getAbsolutePath()); - } -// File [] files = FileUtils.findFiles(args[0], "java", true); -// System.out.println("no of files: " + files.length); -// for (File f : files) { -// System.out.println("file found: " + f.getAbsolutePath()); -// } - } -} diff --git a/qmule/src/org/qcmg/qmule/TestFileFinder.java-- b/qmule/src/org/qcmg/qmule/TestFileFinder.java-- deleted file mode 100644 index 28da0aa08..000000000 --- a/qmule/src/org/qcmg/qmule/TestFileFinder.java-- +++ /dev/null @@ -1,23 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; - -import org.qcmg.common.util.FileUtils; - -public class TestFileFinder { - public static void main(String[] args) { - File [] files = FileUtils.findDirectories(args[0], "seq_final", true); - System.out.println("no of files: " + files.length); - for (File f : files) { - System.out.println("file found: " + f.getAbsolutePath()); - } -// File [] files = FileUtils.findFiles(args[0], "java", true); -// System.out.println("no of files: " + files.length); -// for (File f : files) { -// System.out.println("file found: " + f.getAbsolutePath()); -// } - } -} diff --git a/qmule/src/org/qcmg/qmule/TestJarUpdate.java b/qmule/src/org/qcmg/qmule/TestJarUpdate.java deleted file mode 100644 index c1937f55a..000000000 --- a/qmule/src/org/qcmg/qmule/TestJarUpdate.java +++ /dev/null @@ -1,191 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.OutputStream; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.List; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMFileWriter; -import htsjdk.samtools.SAMFileWriterFactory; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.picard.SAMFileReaderFactory; - -public class TestJarUpdate { - - private SAMFileWriter writer; - private SamReader reader; - - private void doWork() throws Exception{ - try { - - LoadReferencedClasses.loadClasses(getClass()); - -// URL className = getClass().getResource(TestJarUpdate.class.getName()); -// if (null != className) -// System.out.println("url: " + className.getFile()); -// else -// System.out.println("url: " + null); -// -// File jarFile = new File(TestJarUpdate.class.getProtectionDomain().getCodeSource().getLocation().toURI()); -// if (null != jarFile) -// System.out.println("jarFile: " + jarFile.getName()); -// else -// System.out.println("jarFile: " + null); -// -// System.out.println("is file type valid jar: " + FileUtils.isFileTypeValid(jarFile, "jar")); -// -// System.out.println("BEFORE: no of loaded packages: " + Package.getPackages().length); -// -// if (FileUtils.isFileTypeValid(jarFile, "jar")) { -// -// // got jar file - load and -// JarFile jf = new JarFile(jarFile); -// Attributes att = jf.getManifest().getMainAttributes(); -// System.out.println("att.size" + att.size()); -// String classpath = att.getValue("Class-Path"); -// System.out.println("classpath: " + classpath); -// -// String [] jars = classpath.split(" "); -// for (String jar : jars) { -// JarFile internalJarFile = new JarFile(jar); -// Enumeration enums = internalJarFile.entries(); -// while (enums.hasMoreElements()) { -// JarEntry je = enums.nextElement(); -// if (FileUtils.isFileTypeValid(je.getName(), "class")) { -// String blah = je.getName().replace(".class", ""); -// blah = blah.replaceAll(System.getProperty("file.separator"), "."); -// System.out.println("about to load class: " + blah); -// this.getClass().getClassLoader().loadClass(blah); -// } -// } -// } -// -// } -// -// System.out.println("AFTER: no of loaded packages: " + Package.getPackages().length); - - - // write to bam file - // sleep for a few mins to allow the sam jar file to be removed/replaced - // close bam file - // tinker with class loader - File inputFile = File.createTempFile("testJarUpdateInput", ".sam"); - inputFile.deleteOnExit(); - File outputFile = File.createTempFile("testJarUpdateOutput", ".bam"); -// outputFile.deleteOnExit(); - - createCoverageSam(inputFile); - - reader = SAMFileReaderFactory.createSAMFileReader(inputFile); - - SAMFileHeader header = reader.getFileHeader(); - List recs = new ArrayList(); - - for( SAMRecord rec : reader) { - recs.add(rec); - } - - - SAMFileWriterFactory factory = new SAMFileWriterFactory(); - - writer = factory.makeSAMOrBAMWriter(header, true, outputFile); - -// for (int i = 0 ; i < 100 ; i++) - for( SAMRecord rec : recs) { - for (int i = 0 ; i < 100 ; i++) - writer.addAlignment(rec); - } - - System.out.println("About to sleep!"); - System.gc(); - Thread.sleep(60000); - System.out.println("Am awake now"); - - close(); - System.out.println("DONE!!!"); - } finally { - System.out.println("about to run close quietly"); - closeQuietly(); - System.out.println("DONE!!! again"); - } - } - - - public static void main(String[] args) throws Exception { - TestJarUpdate tju = new TestJarUpdate(); - tju.doWork(); - } - - - private void close() throws Exception { - try { - writer.close(); - reader.close(); - } catch (Exception e) { - System.out.println("Exception caught in close(): "); -// e.printStackTrace(); - throw new Exception("CANNOT_CLOSE_FILES"); - } - } - - private void closeQuietly() { - try { - close(); - } catch (Exception e) { -// e.printStackTrace(); - } - } - - public static final void createCoverageSam(final File fileName) throws Exception { - - OutputStream os = new FileOutputStream(fileName); - PrintStream ps = new PrintStream(os); - - ps.println("@HD VN:1.0 SO:coordinate"); - ps.println("@RG ID:ZZ SM:ES DS:rl=50 "); - ps.println("@RG ID:ZZZ SM:ES DS:rl=50 "); - ps.println("@PG ID:SOLID-GffToSam VN:1.4.3"); - ps.println("@SQ SN:chr1 LN:249250621"); - ps.println("@SQ SN:chr2 LN:243199373"); - ps.println("@SQ SN:chr3 LN:198022430"); - ps.println("@SQ SN:chr4 LN:191154276"); - ps.println("@SQ SN:chr5 LN:180915260"); - ps.println("@SQ SN:chr6 LN:171115067"); - ps.println("@SQ SN:chr7 LN:159138663"); - ps.println("@SQ SN:chr8 LN:146364022"); - ps.println("@SQ SN:chr9 LN:141213431"); - ps.println("@SQ SN:chr10 LN:135534747"); - ps.println("@SQ SN:chr11 LN:135006516"); - ps.println("@SQ SN:chr12 LN:133851895"); - ps.println("@SQ SN:chr13 LN:115169878"); - ps.println("@SQ SN:chr14 LN:107349540"); - ps.println("@SQ SN:chr15 LN:102531392"); - ps.println("@SQ SN:chr16 LN:90354753"); - ps.println("@SQ SN:chr17 LN:81195210"); - ps.println("@SQ SN:chr18 LN:78077248"); - ps.println("@SQ SN:chr19 LN:59128983"); - ps.println("@SQ SN:chr20 LN:63025520"); - ps.println("@SQ SN:chr21 LN:48129895"); - ps.println("@SQ SN:chr22 LN:51304566"); - ps.println("@SQ SN:chrX LN:155270560"); - ps.println("@SQ SN:chrY LN:59373566"); - ps.println("@SQ SN:chrM LN:16571"); - ps.println("1290_738_1025 0 chr1 54026 255 45M5H * 0 0 AACATTCCAAAAGTCAACCATCCAAGTTTATTCTAAATAGATGTG !DDDDDDDDDDDDDDDD''DDDDDD9DDDDDDDDD:<3B''DDD! RG:Z:ZZ CS:Z:T301130201000212101113201021003302230033233111 CQ:Z:BBB=B:@5?>B9A5?>B?'A49<475%@;6<+;9@'4)+8'1?:>"); - ps.println("2333_755_492 16 chr2 10103 255 10H40M * 0 0 CACACCACACCCACACACCACACACCACACCCACACCCAC !=DD?%+DD<)=DDD<@9)9C:DA.:DD>%%,?+%;<-1"); - ps.println("1879_282_595 0 chr3 60775 255 40M10H * 0 0 TCTAAATTTGTTTGATCACATACTCCTTTTCTGGCTAACA !DD,*@DDD''DD>5:DD>;DDDD=CDD8%%DA9-DDC0! RG:Z:ZZ CS:Z:T0223303001200123211133122020003210323011 CQ:Z:=><=,*7685'970/'437(4<:54*:84%%;/3''?;)("); - ps.close(); - os.close(); - } -} diff --git a/qmule/src/org/qcmg/qmule/TestJarUpdate.java-- b/qmule/src/org/qcmg/qmule/TestJarUpdate.java-- deleted file mode 100644 index c1937f55a..000000000 --- a/qmule/src/org/qcmg/qmule/TestJarUpdate.java-- +++ /dev/null @@ -1,191 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.OutputStream; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.List; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMFileWriter; -import htsjdk.samtools.SAMFileWriterFactory; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.picard.SAMFileReaderFactory; - -public class TestJarUpdate { - - private SAMFileWriter writer; - private SamReader reader; - - private void doWork() throws Exception{ - try { - - LoadReferencedClasses.loadClasses(getClass()); - -// URL className = getClass().getResource(TestJarUpdate.class.getName()); -// if (null != className) -// System.out.println("url: " + className.getFile()); -// else -// System.out.println("url: " + null); -// -// File jarFile = new File(TestJarUpdate.class.getProtectionDomain().getCodeSource().getLocation().toURI()); -// if (null != jarFile) -// System.out.println("jarFile: " + jarFile.getName()); -// else -// System.out.println("jarFile: " + null); -// -// System.out.println("is file type valid jar: " + FileUtils.isFileTypeValid(jarFile, "jar")); -// -// System.out.println("BEFORE: no of loaded packages: " + Package.getPackages().length); -// -// if (FileUtils.isFileTypeValid(jarFile, "jar")) { -// -// // got jar file - load and -// JarFile jf = new JarFile(jarFile); -// Attributes att = jf.getManifest().getMainAttributes(); -// System.out.println("att.size" + att.size()); -// String classpath = att.getValue("Class-Path"); -// System.out.println("classpath: " + classpath); -// -// String [] jars = classpath.split(" "); -// for (String jar : jars) { -// JarFile internalJarFile = new JarFile(jar); -// Enumeration enums = internalJarFile.entries(); -// while (enums.hasMoreElements()) { -// JarEntry je = enums.nextElement(); -// if (FileUtils.isFileTypeValid(je.getName(), "class")) { -// String blah = je.getName().replace(".class", ""); -// blah = blah.replaceAll(System.getProperty("file.separator"), "."); -// System.out.println("about to load class: " + blah); -// this.getClass().getClassLoader().loadClass(blah); -// } -// } -// } -// -// } -// -// System.out.println("AFTER: no of loaded packages: " + Package.getPackages().length); - - - // write to bam file - // sleep for a few mins to allow the sam jar file to be removed/replaced - // close bam file - // tinker with class loader - File inputFile = File.createTempFile("testJarUpdateInput", ".sam"); - inputFile.deleteOnExit(); - File outputFile = File.createTempFile("testJarUpdateOutput", ".bam"); -// outputFile.deleteOnExit(); - - createCoverageSam(inputFile); - - reader = SAMFileReaderFactory.createSAMFileReader(inputFile); - - SAMFileHeader header = reader.getFileHeader(); - List recs = new ArrayList(); - - for( SAMRecord rec : reader) { - recs.add(rec); - } - - - SAMFileWriterFactory factory = new SAMFileWriterFactory(); - - writer = factory.makeSAMOrBAMWriter(header, true, outputFile); - -// for (int i = 0 ; i < 100 ; i++) - for( SAMRecord rec : recs) { - for (int i = 0 ; i < 100 ; i++) - writer.addAlignment(rec); - } - - System.out.println("About to sleep!"); - System.gc(); - Thread.sleep(60000); - System.out.println("Am awake now"); - - close(); - System.out.println("DONE!!!"); - } finally { - System.out.println("about to run close quietly"); - closeQuietly(); - System.out.println("DONE!!! again"); - } - } - - - public static void main(String[] args) throws Exception { - TestJarUpdate tju = new TestJarUpdate(); - tju.doWork(); - } - - - private void close() throws Exception { - try { - writer.close(); - reader.close(); - } catch (Exception e) { - System.out.println("Exception caught in close(): "); -// e.printStackTrace(); - throw new Exception("CANNOT_CLOSE_FILES"); - } - } - - private void closeQuietly() { - try { - close(); - } catch (Exception e) { -// e.printStackTrace(); - } - } - - public static final void createCoverageSam(final File fileName) throws Exception { - - OutputStream os = new FileOutputStream(fileName); - PrintStream ps = new PrintStream(os); - - ps.println("@HD VN:1.0 SO:coordinate"); - ps.println("@RG ID:ZZ SM:ES DS:rl=50 "); - ps.println("@RG ID:ZZZ SM:ES DS:rl=50 "); - ps.println("@PG ID:SOLID-GffToSam VN:1.4.3"); - ps.println("@SQ SN:chr1 LN:249250621"); - ps.println("@SQ SN:chr2 LN:243199373"); - ps.println("@SQ SN:chr3 LN:198022430"); - ps.println("@SQ SN:chr4 LN:191154276"); - ps.println("@SQ SN:chr5 LN:180915260"); - ps.println("@SQ SN:chr6 LN:171115067"); - ps.println("@SQ SN:chr7 LN:159138663"); - ps.println("@SQ SN:chr8 LN:146364022"); - ps.println("@SQ SN:chr9 LN:141213431"); - ps.println("@SQ SN:chr10 LN:135534747"); - ps.println("@SQ SN:chr11 LN:135006516"); - ps.println("@SQ SN:chr12 LN:133851895"); - ps.println("@SQ SN:chr13 LN:115169878"); - ps.println("@SQ SN:chr14 LN:107349540"); - ps.println("@SQ SN:chr15 LN:102531392"); - ps.println("@SQ SN:chr16 LN:90354753"); - ps.println("@SQ SN:chr17 LN:81195210"); - ps.println("@SQ SN:chr18 LN:78077248"); - ps.println("@SQ SN:chr19 LN:59128983"); - ps.println("@SQ SN:chr20 LN:63025520"); - ps.println("@SQ SN:chr21 LN:48129895"); - ps.println("@SQ SN:chr22 LN:51304566"); - ps.println("@SQ SN:chrX LN:155270560"); - ps.println("@SQ SN:chrY LN:59373566"); - ps.println("@SQ SN:chrM LN:16571"); - ps.println("1290_738_1025 0 chr1 54026 255 45M5H * 0 0 AACATTCCAAAAGTCAACCATCCAAGTTTATTCTAAATAGATGTG !DDDDDDDDDDDDDDDD''DDDDDD9DDDDDDDDD:<3B''DDD! RG:Z:ZZ CS:Z:T301130201000212101113201021003302230033233111 CQ:Z:BBB=B:@5?>B9A5?>B?'A49<475%@;6<+;9@'4)+8'1?:>"); - ps.println("2333_755_492 16 chr2 10103 255 10H40M * 0 0 CACACCACACCCACACACCACACACCACACCCACACCCAC !=DD?%+DD<)=DDD<@9)9C:DA.:DD>%%,?+%;<-1"); - ps.println("1879_282_595 0 chr3 60775 255 40M10H * 0 0 TCTAAATTTGTTTGATCACATACTCCTTTTCTGGCTAACA !DD,*@DDD''DD>5:DD>;DDDD=CDD8%%DA9-DDC0! RG:Z:ZZ CS:Z:T0223303001200123211133122020003210323011 CQ:Z:=><=,*7685'970/'437(4<:54*:84%%;/3''?;)("); - ps.close(); - os.close(); - } -} diff --git a/qmule/src/org/qcmg/qmule/TestSort.java b/qmule/src/org/qcmg/qmule/TestSort.java deleted file mode 100644 index cf9faddb6..000000000 --- a/qmule/src/org/qcmg/qmule/TestSort.java +++ /dev/null @@ -1,109 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.IOException; -import java.net.InetAddress; -import java.text.SimpleDateFormat; -import java.util.Calendar; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMFileWriter; -import htsjdk.samtools.SAMFileWriterFactory; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.picard.SAMFileReaderFactory; - -public class TestSort { - private final File input; - private final File output; - private final int maxRecordsInRam; - private SAMFileHeader.SortOrder sort = SAMFileHeader.SortOrder.unsorted; - - TestSort(final String[] args) throws Exception{ - input = new File(args[0]); - output = new File(args[1]); - maxRecordsInRam = Integer.parseInt(args[2]); - - String sortOrder = args[3]; - if(sortOrder.equalsIgnoreCase("coordinate")) - sort = SAMFileHeader.SortOrder.coordinate; - else if(sortOrder.equalsIgnoreCase("queryname")) - sort = SAMFileHeader.SortOrder.queryname; - else if(! sortOrder.equalsIgnoreCase("unsorted")) - throw new Exception( sortOrder + " isn't valid SAMFileHeader sort order!"); - - System.out.println(getTime() + " host: " + InetAddress.getLocalHost().getHostName()); - System.out.println(getTime() + " input: " + input.getAbsolutePath()); - System.out.println(getTime() + " output: " + output.getAbsolutePath()); - System.out.println(getTime() + " sort order: " + sortOrder); - System.out.println(getTime() + " max Records In RAM: " + maxRecordsInRam); - } - - public void Sorting() throws Exception{ - SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); - SAMFileHeader header = reader.getFileHeader(); - - SAMFileWriterFactory writeFactory = new SAMFileWriterFactory(); - htsjdk.samtools.SAMFileWriterImpl.setDefaultMaxRecordsInRam(maxRecordsInRam ); - header.setSortOrder(sort); - if(sort.equals(SAMFileHeader.SortOrder.coordinate)) - writeFactory.setCreateIndex(true); - final SAMFileWriter writer = writeFactory.makeSAMOrBAMWriter(header, false, output); - - int num = 0; - for (SAMRecord record : reader) { - if(num % maxRecordsInRam == 0) - printRunInfo(num); - - writer.addAlignment(record); - num ++; - } - -// System.out.println(getTime() + " Merging tmp into output BAM, tmp location are " + htsjdk.samtools.util.IOUtil.getDefaultTmpDir()); - reader.close(); - writer.close(); - - System.out.println(getTime() + " created output: " + output.getAbsolutePath()); - } - - private void printRunInfo(int number) throws IOException{ - Runtime runtime = Runtime.getRuntime(); - int mb = 1024 * 1024; - long totalRAM = runtime.totalMemory() / mb; - long usedRAM = (runtime.totalMemory() - runtime.freeMemory()) / mb; - - String dateNow = getTime(); - - String info = String.format("%s read %d record. Total memeory: %dM, used memory: %dM", - dateNow, number, totalRAM, usedRAM); - - System.out.println(info); - } - - private String getTime(){ - Calendar currentDate = Calendar.getInstance(); - SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); - return "[" + formatter.format(currentDate.getTime()) + "]"; - } - - public static void main(final String[] args) { - try{ - TestSort mysort = new TestSort(args); - mysort.Sorting(); - System.exit(0); - }catch(Exception e){ - System.err.println("usage:qmule.TestSort [queryname/coordinate/unsorted]"); - System.err.println(e.toString()); - System.exit(1); - } - - - } -} diff --git a/qmule/src/org/qcmg/qmule/TestSort.java-- b/qmule/src/org/qcmg/qmule/TestSort.java-- deleted file mode 100644 index cf9faddb6..000000000 --- a/qmule/src/org/qcmg/qmule/TestSort.java-- +++ /dev/null @@ -1,109 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.IOException; -import java.net.InetAddress; -import java.text.SimpleDateFormat; -import java.util.Calendar; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMFileWriter; -import htsjdk.samtools.SAMFileWriterFactory; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.picard.SAMFileReaderFactory; - -public class TestSort { - private final File input; - private final File output; - private final int maxRecordsInRam; - private SAMFileHeader.SortOrder sort = SAMFileHeader.SortOrder.unsorted; - - TestSort(final String[] args) throws Exception{ - input = new File(args[0]); - output = new File(args[1]); - maxRecordsInRam = Integer.parseInt(args[2]); - - String sortOrder = args[3]; - if(sortOrder.equalsIgnoreCase("coordinate")) - sort = SAMFileHeader.SortOrder.coordinate; - else if(sortOrder.equalsIgnoreCase("queryname")) - sort = SAMFileHeader.SortOrder.queryname; - else if(! sortOrder.equalsIgnoreCase("unsorted")) - throw new Exception( sortOrder + " isn't valid SAMFileHeader sort order!"); - - System.out.println(getTime() + " host: " + InetAddress.getLocalHost().getHostName()); - System.out.println(getTime() + " input: " + input.getAbsolutePath()); - System.out.println(getTime() + " output: " + output.getAbsolutePath()); - System.out.println(getTime() + " sort order: " + sortOrder); - System.out.println(getTime() + " max Records In RAM: " + maxRecordsInRam); - } - - public void Sorting() throws Exception{ - SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); - SAMFileHeader header = reader.getFileHeader(); - - SAMFileWriterFactory writeFactory = new SAMFileWriterFactory(); - htsjdk.samtools.SAMFileWriterImpl.setDefaultMaxRecordsInRam(maxRecordsInRam ); - header.setSortOrder(sort); - if(sort.equals(SAMFileHeader.SortOrder.coordinate)) - writeFactory.setCreateIndex(true); - final SAMFileWriter writer = writeFactory.makeSAMOrBAMWriter(header, false, output); - - int num = 0; - for (SAMRecord record : reader) { - if(num % maxRecordsInRam == 0) - printRunInfo(num); - - writer.addAlignment(record); - num ++; - } - -// System.out.println(getTime() + " Merging tmp into output BAM, tmp location are " + htsjdk.samtools.util.IOUtil.getDefaultTmpDir()); - reader.close(); - writer.close(); - - System.out.println(getTime() + " created output: " + output.getAbsolutePath()); - } - - private void printRunInfo(int number) throws IOException{ - Runtime runtime = Runtime.getRuntime(); - int mb = 1024 * 1024; - long totalRAM = runtime.totalMemory() / mb; - long usedRAM = (runtime.totalMemory() - runtime.freeMemory()) / mb; - - String dateNow = getTime(); - - String info = String.format("%s read %d record. Total memeory: %dM, used memory: %dM", - dateNow, number, totalRAM, usedRAM); - - System.out.println(info); - } - - private String getTime(){ - Calendar currentDate = Calendar.getInstance(); - SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); - return "[" + formatter.format(currentDate.getTime()) + "]"; - } - - public static void main(final String[] args) { - try{ - TestSort mysort = new TestSort(args); - mysort.Sorting(); - System.exit(0); - }catch(Exception e){ - System.err.println("usage:qmule.TestSort [queryname/coordinate/unsorted]"); - System.err.println(e.toString()); - System.exit(1); - } - - - } -} diff --git a/qmule/src/org/qcmg/qmule/TranscriptomeMule.java b/qmule/src/org/qcmg/qmule/TranscriptomeMule.java deleted file mode 100644 index c9b4f95f5..000000000 --- a/qmule/src/org/qcmg/qmule/TranscriptomeMule.java +++ /dev/null @@ -1,192 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.PileupElement; -import org.qcmg.common.string.StringUtils; -import org.qcmg.common.util.Constants; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.PileupUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.picard.util.PileupElementUtil; -import org.qcmg.pileup.PileupFileReader; - -public class TranscriptomeMule { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - private final static int MIN_COVERAGE = 3; - // assuming all the tumours have been merged together, and we only have a single entry -// private static int[] tumourStartPositions = null; - private int[] tumourStartPositions = null; - - private final List positions = new ArrayList<>(100000); - - private static QLogger logger; - - public int engage() throws Exception { - logger.info("loading samtools mpileup data"); - walkPileup(cmdLineInputFiles[0]); - logger.info("loading samtools mpileup data - DONE [" + positions.size() + "]"); - - logger.info("outputting data"); - writeOutput(cmdLineOutputFiles[0]); - logger.info("outputting data - DONE"); - - return exitStatus; - } - - private void writeOutput(String outputFile) throws IOException { - FileWriter writer = new FileWriter(outputFile); - String header = "chr\tposition\tref\tpileup"; - - try { - writer.write(header + "\n"); - for (StringBuilder sb : positions) { - writer.write(sb.toString() + Constants.NEW_LINE); - } -// for (QSnpRecord record : positions) -// writer.write(record.getChromosome() + "\t" -// + record.getPosition() + "\t" -// + record.getRef() + "\t" -// + record.getTumourNucleotides() + "\n"); - } finally { - writer.close(); - } - } - - private void parsePileup(String record) { -// private void parsePileup(PileupRecord record) { - String[] params = TabTokenizer.tokenize(record); -// String[] params = tabbedPattern.split(record.getPileup(), -1); - if (null == tumourStartPositions) { - // set up the number of tumour start positions - // dependent on the number of columns in the input - // HUGE assumption that the mpileup data only contains tumour data here... - - //TODO is this right? - // first 3 columns are chr pos ref - int noOfSamples = (params.length -3) /3; - tumourStartPositions = new int[noOfSamples]; - for (int i = 0 ; i < noOfSamples ; i++) { - tumourStartPositions[i] = (i+1) * 3; - } - } - - // get coverage for both normal and tumour - int tumourCoverage = PileupUtils.getCoverageCount(params, tumourStartPositions); - if (tumourCoverage < MIN_COVERAGE) return; - - String tumourBases = PileupUtils.getBases(params, tumourStartPositions); - - // means there is an indel at this position - ignore - if (tumourBases.contains("+") || tumourBases.contains("-")) return; - String tumourBaseQualities = PileupUtils.getQualities(params, tumourStartPositions); - - // get bases as PileupElement collections - List tumourBaseCounts = PileupElementUtil.getPileupCounts(tumourBases, tumourBaseQualities); - - // get variant count for both - int tumourVariantCount = PileupElementUtil.getLargestVariantCount(tumourBaseCounts); - - if (tumourVariantCount >= 3) { - // keeper - StringBuilder sb = new StringBuilder(params[0]); - StringUtils.updateStringBuilder(sb, params[1], Constants.TAB); - StringUtils.updateStringBuilder(sb, params[2], Constants.TAB); - StringUtils.updateStringBuilder(sb, PileupElementUtil.getOABS(tumourBaseCounts, params[2].charAt(0)), Constants.TAB); - -// QSnpRecord rec = new QSnpRecord(params[0], Integer.parseInt(params[1]), params[2]); -// rec.setTumourOABS(PileupElementUtil.getOABS(tumourBaseCounts, rec.getRef().charAt(0))); - positions.add(sb); - } - - } - - private void walkPileup(String pileupFileName) throws Exception { - PileupFileReader reader = new PileupFileReader(new File(pileupFileName)); - int count = 0; - try { - for (String record : reader) { -// for (PileupRecord record : reader) { - parsePileup(record); - if (++count % 1000000 == 0) - logger.info("hit " + count + " pileup records, with " + positions.size() + " keepers."); - } - } finally { - reader.close(); - } - } - - public static void main(String[] args) throws Exception { - TranscriptomeMule sp = new TranscriptomeMule(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(TranscriptomeMule.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("Example", TranscriptomeMule.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- b/qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- deleted file mode 100644 index c9b4f95f5..000000000 --- a/qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- +++ /dev/null @@ -1,192 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.PileupElement; -import org.qcmg.common.string.StringUtils; -import org.qcmg.common.util.Constants; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.PileupUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.picard.util.PileupElementUtil; -import org.qcmg.pileup.PileupFileReader; - -public class TranscriptomeMule { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - private final static int MIN_COVERAGE = 3; - // assuming all the tumours have been merged together, and we only have a single entry -// private static int[] tumourStartPositions = null; - private int[] tumourStartPositions = null; - - private final List positions = new ArrayList<>(100000); - - private static QLogger logger; - - public int engage() throws Exception { - logger.info("loading samtools mpileup data"); - walkPileup(cmdLineInputFiles[0]); - logger.info("loading samtools mpileup data - DONE [" + positions.size() + "]"); - - logger.info("outputting data"); - writeOutput(cmdLineOutputFiles[0]); - logger.info("outputting data - DONE"); - - return exitStatus; - } - - private void writeOutput(String outputFile) throws IOException { - FileWriter writer = new FileWriter(outputFile); - String header = "chr\tposition\tref\tpileup"; - - try { - writer.write(header + "\n"); - for (StringBuilder sb : positions) { - writer.write(sb.toString() + Constants.NEW_LINE); - } -// for (QSnpRecord record : positions) -// writer.write(record.getChromosome() + "\t" -// + record.getPosition() + "\t" -// + record.getRef() + "\t" -// + record.getTumourNucleotides() + "\n"); - } finally { - writer.close(); - } - } - - private void parsePileup(String record) { -// private void parsePileup(PileupRecord record) { - String[] params = TabTokenizer.tokenize(record); -// String[] params = tabbedPattern.split(record.getPileup(), -1); - if (null == tumourStartPositions) { - // set up the number of tumour start positions - // dependent on the number of columns in the input - // HUGE assumption that the mpileup data only contains tumour data here... - - //TODO is this right? - // first 3 columns are chr pos ref - int noOfSamples = (params.length -3) /3; - tumourStartPositions = new int[noOfSamples]; - for (int i = 0 ; i < noOfSamples ; i++) { - tumourStartPositions[i] = (i+1) * 3; - } - } - - // get coverage for both normal and tumour - int tumourCoverage = PileupUtils.getCoverageCount(params, tumourStartPositions); - if (tumourCoverage < MIN_COVERAGE) return; - - String tumourBases = PileupUtils.getBases(params, tumourStartPositions); - - // means there is an indel at this position - ignore - if (tumourBases.contains("+") || tumourBases.contains("-")) return; - String tumourBaseQualities = PileupUtils.getQualities(params, tumourStartPositions); - - // get bases as PileupElement collections - List tumourBaseCounts = PileupElementUtil.getPileupCounts(tumourBases, tumourBaseQualities); - - // get variant count for both - int tumourVariantCount = PileupElementUtil.getLargestVariantCount(tumourBaseCounts); - - if (tumourVariantCount >= 3) { - // keeper - StringBuilder sb = new StringBuilder(params[0]); - StringUtils.updateStringBuilder(sb, params[1], Constants.TAB); - StringUtils.updateStringBuilder(sb, params[2], Constants.TAB); - StringUtils.updateStringBuilder(sb, PileupElementUtil.getOABS(tumourBaseCounts, params[2].charAt(0)), Constants.TAB); - -// QSnpRecord rec = new QSnpRecord(params[0], Integer.parseInt(params[1]), params[2]); -// rec.setTumourOABS(PileupElementUtil.getOABS(tumourBaseCounts, rec.getRef().charAt(0))); - positions.add(sb); - } - - } - - private void walkPileup(String pileupFileName) throws Exception { - PileupFileReader reader = new PileupFileReader(new File(pileupFileName)); - int count = 0; - try { - for (String record : reader) { -// for (PileupRecord record : reader) { - parsePileup(record); - if (++count % 1000000 == 0) - logger.info("hit " + count + " pileup records, with " + positions.size() + " keepers."); - } - } finally { - reader.close(); - } - } - - public static void main(String[] args) throws Exception { - TranscriptomeMule sp = new TranscriptomeMule(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(TranscriptomeMule.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("Example", TranscriptomeMule.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileup.java b/qmule/src/org/qcmg/qmule/WiggleFromPileup.java deleted file mode 100644 index 222727290..000000000 --- a/qmule/src/org/qcmg/qmule/WiggleFromPileup.java +++ /dev/null @@ -1,302 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.regex.Pattern; -import java.util.zip.GZIPOutputStream; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ReferenceNameComparator; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.PileupUtils; -import org.qcmg.qmule.gff3.GFF3FileReader; -import org.qcmg.qmule.gff3.GFF3Record; -import org.qcmg.qmule.gff3.GFF3RecordChromosomeAndPositionComparator; -import org.qcmg.pileup.PileupFileReader; - -public class WiggleFromPileup { - - private final static Pattern tabbedPattern = Pattern.compile("[\\t]"); - private boolean compressOutput; - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - private String pileupFormat; - private int normalCoverage, tumourCoverage; - private int noOfNormalFiles, noOfTumourFiles; - private long covGood, covBad, totalCov; - private int[] normalStartPositions, tumourStartPositions; - private String currentChromosome = "chr1"; - - private int lastPosition; - - private final List gffs = new ArrayList(); - - private static GFF3Record gffRecord; - private static Iterator iter; - - private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); - private final static GFF3RecordChromosomeAndPositionComparator CHR_POS_COMP = new GFF3RecordChromosomeAndPositionComparator(); - - - private static QLogger logger; - - public int engage() throws Exception { - - // setup - initialise(); - - loadGffFile(); - - Collections.sort(gffs, CHR_POS_COMP); - - if (gffs.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); - - // parse pileup file - parsePileup(); - - logger.info("bases with enough coverage: " + covGood + ", those with not enough coverage: " + covBad + ", total: " + totalCov); - - return exitStatus; - } - - private void loadGffFile() throws Exception { - GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); - try { - int totalNoOfbaits = 0, ignoredBaits = 0; - for (GFF3Record record : reader) { - totalNoOfbaits++; - if (isGff3RecordBait(record.getType())) { - gffs.add(record); - } else ignoredBaits++; - } - - logger.info("loaded gff3 file, total no of baits: " + totalNoOfbaits + ", entries in collection: " + gffs.size() + ", entries that didn't make it: " + ignoredBaits); - } finally { - reader.close(); - } - } - - protected static boolean isGff3RecordBait(String type) { - return "exon".equals(type); - } -// protected static boolean isGff3RecordBait(String type) { -// return "bait_1_100".equals(type) -// || "bait".equals(type) -// || "highbait".equals(type) -// || "lowbait".equals(type); -// } - - private void initialise() { - noOfNormalFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'N'); - noOfTumourFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'T'); - normalStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, true); - tumourStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, false); - -// logger.info("start positions: " + Arrays.deepToString(normalStartPositions) + ", " + Arrays.deepToString(tumourStartPositions)); - } - - private void parsePileup() throws Exception { - Writer writer = getWriter(cmdLineOutputFiles[0]); - - iter = gffs.iterator(); - if (iter.hasNext()) { - setGffRecord(iter.next()); - } else { - throw new RuntimeException("Unable to set next Gff record"); - } - - PileupFileReader reader = new PileupFileReader(new File(cmdLineInputFiles[0])); - StringBuilder sb = new StringBuilder(); - try { - for (String pr : reader) { -// for (PileupRecord pr : reader) { - addWiggleData(pr, sb); -// addWiggleData(tabbedPattern.split(pr.getPileup(), -1), sb); - if (++totalCov % 100000 == 0 && sb.length() > 0) { - writer.write(sb.toString()); - sb = new StringBuilder(); - - if (totalCov % 10000000 == 0) - logger.info("hit " + totalCov + " pileup records"); - } - } - - // empty contents of StringBuilder to writer - if (sb.length() > 0) writer.write(sb.toString()); - - } finally { - writer.close(); - reader.close(); - } - } - - private Writer getWriter(String fileName) throws IOException { - Writer writer = null; - if (compressOutput) { - writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(fileName))); - } else { - writer = new FileWriter(new File(fileName)); - } - return writer; - } - - protected static boolean isPositionInBait(String chromosome, int position, Iterator iter, GFF3Record currentRecord) { - - if (chromosome.equals(currentRecord.getSeqId())) { - - if (position < currentRecord.getStart()) { - return false; - } else if (position <= currentRecord.getEnd()) { - return true; - } else { - return advanceGff3Record(chromosome, position, iter); - } - } else if (COMPARATOR.compare(chromosome, currentRecord.getSeqId()) < 0) { - // pileup position is in lower chromosome than gffRecord - return false; - } else { - // pileup position is in higher chromosome than gffRecord - // advance iterator - return advanceGff3Record(chromosome, position, iter); - } - } - - private static boolean advanceGff3Record(String chromosome, int position, - Iterator iter) { - if ( ! iter.hasNext()) { - // no more entries in gffs - return false; - } else { - setGffRecord(iter.next()); - return isPositionInBait(chromosome, position, iter, getGffRecord()); - } - } - - private void addWiggleData(String paramString, StringBuilder sb) { - int firstTabIndex = paramString.indexOf('\t'); - String chromosome = paramString.substring(0, firstTabIndex); - int position = Integer.parseInt(paramString.substring(firstTabIndex+1, paramString.indexOf('\t', firstTabIndex+1))); - - if ( ! isPositionInBait(chromosome, position, iter, getGffRecord())) return; -// if ( ! isPositionInBait(chromosome, position)) return; - - if (position != lastPosition +1 || ! currentChromosome.equalsIgnoreCase(chromosome)) { - // add new header to the StringBuilder - String wiggleHeader = "fixedStep chrom=" + chromosome + " start=" + position + " step=1\n"; - sb.append(wiggleHeader); - - // update last position and current chromosome - currentChromosome = chromosome; - } - lastPosition = position; - String [] params = tabbedPattern.split(paramString, -1); - - if (PileupUtils.getCoverageCount(params, normalStartPositions) < normalCoverage) { - sb.append("0\n"); - ++covBad; - } else { - if (PileupUtils.getCoverageCount(params, tumourStartPositions) >= tumourCoverage) { - sb.append("1\n"); - ++covGood; - } else { - sb.append("0\n"); - ++covBad; - } - } - } - - public static void main(String[] args) throws Exception { - WiggleFromPileup sp = new WiggleFromPileup(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(WiggleFromPileup.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("WiggleFromPileup", WiggleFromPileup.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - // get app specific options - pileupFormat = options.getPileupFormat(); - normalCoverage = options.getNormalCoverage(); - tumourCoverage = options.getTumourCoverage(); - compressOutput = FileUtils.isFileNameGZip(new File(cmdLineOutputFiles[0])); - - if (null == pileupFormat) throw new IllegalArgumentException("Please specify a pileupFormat relating to the pileup file (eg. NNTT)"); - if (normalCoverage == 0) throw new IllegalArgumentException("Please specify a normal coverage value (eg. 20)"); - if (tumourCoverage == 0) throw new IllegalArgumentException("Please specify a tumour coverage value (eg. 20)"); - - logger.tool("about to run with pileupFormat: " + pileupFormat + ", normal cov: " + normalCoverage + ", tumour cov: " + tumourCoverage + ", compressOutput: " + compressOutput); - - return engage(); - } - return returnStatus; - } - - protected static void setGffRecord(GFF3Record gffRecord) { - WiggleFromPileup.gffRecord = gffRecord; - } - - protected static GFF3Record getGffRecord() { - return gffRecord; - } -} diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- b/qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- deleted file mode 100644 index 222727290..000000000 --- a/qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- +++ /dev/null @@ -1,302 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.regex.Pattern; -import java.util.zip.GZIPOutputStream; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ReferenceNameComparator; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.PileupUtils; -import org.qcmg.qmule.gff3.GFF3FileReader; -import org.qcmg.qmule.gff3.GFF3Record; -import org.qcmg.qmule.gff3.GFF3RecordChromosomeAndPositionComparator; -import org.qcmg.pileup.PileupFileReader; - -public class WiggleFromPileup { - - private final static Pattern tabbedPattern = Pattern.compile("[\\t]"); - private boolean compressOutput; - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - private String pileupFormat; - private int normalCoverage, tumourCoverage; - private int noOfNormalFiles, noOfTumourFiles; - private long covGood, covBad, totalCov; - private int[] normalStartPositions, tumourStartPositions; - private String currentChromosome = "chr1"; - - private int lastPosition; - - private final List gffs = new ArrayList(); - - private static GFF3Record gffRecord; - private static Iterator iter; - - private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); - private final static GFF3RecordChromosomeAndPositionComparator CHR_POS_COMP = new GFF3RecordChromosomeAndPositionComparator(); - - - private static QLogger logger; - - public int engage() throws Exception { - - // setup - initialise(); - - loadGffFile(); - - Collections.sort(gffs, CHR_POS_COMP); - - if (gffs.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); - - // parse pileup file - parsePileup(); - - logger.info("bases with enough coverage: " + covGood + ", those with not enough coverage: " + covBad + ", total: " + totalCov); - - return exitStatus; - } - - private void loadGffFile() throws Exception { - GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); - try { - int totalNoOfbaits = 0, ignoredBaits = 0; - for (GFF3Record record : reader) { - totalNoOfbaits++; - if (isGff3RecordBait(record.getType())) { - gffs.add(record); - } else ignoredBaits++; - } - - logger.info("loaded gff3 file, total no of baits: " + totalNoOfbaits + ", entries in collection: " + gffs.size() + ", entries that didn't make it: " + ignoredBaits); - } finally { - reader.close(); - } - } - - protected static boolean isGff3RecordBait(String type) { - return "exon".equals(type); - } -// protected static boolean isGff3RecordBait(String type) { -// return "bait_1_100".equals(type) -// || "bait".equals(type) -// || "highbait".equals(type) -// || "lowbait".equals(type); -// } - - private void initialise() { - noOfNormalFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'N'); - noOfTumourFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'T'); - normalStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, true); - tumourStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, false); - -// logger.info("start positions: " + Arrays.deepToString(normalStartPositions) + ", " + Arrays.deepToString(tumourStartPositions)); - } - - private void parsePileup() throws Exception { - Writer writer = getWriter(cmdLineOutputFiles[0]); - - iter = gffs.iterator(); - if (iter.hasNext()) { - setGffRecord(iter.next()); - } else { - throw new RuntimeException("Unable to set next Gff record"); - } - - PileupFileReader reader = new PileupFileReader(new File(cmdLineInputFiles[0])); - StringBuilder sb = new StringBuilder(); - try { - for (String pr : reader) { -// for (PileupRecord pr : reader) { - addWiggleData(pr, sb); -// addWiggleData(tabbedPattern.split(pr.getPileup(), -1), sb); - if (++totalCov % 100000 == 0 && sb.length() > 0) { - writer.write(sb.toString()); - sb = new StringBuilder(); - - if (totalCov % 10000000 == 0) - logger.info("hit " + totalCov + " pileup records"); - } - } - - // empty contents of StringBuilder to writer - if (sb.length() > 0) writer.write(sb.toString()); - - } finally { - writer.close(); - reader.close(); - } - } - - private Writer getWriter(String fileName) throws IOException { - Writer writer = null; - if (compressOutput) { - writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(fileName))); - } else { - writer = new FileWriter(new File(fileName)); - } - return writer; - } - - protected static boolean isPositionInBait(String chromosome, int position, Iterator iter, GFF3Record currentRecord) { - - if (chromosome.equals(currentRecord.getSeqId())) { - - if (position < currentRecord.getStart()) { - return false; - } else if (position <= currentRecord.getEnd()) { - return true; - } else { - return advanceGff3Record(chromosome, position, iter); - } - } else if (COMPARATOR.compare(chromosome, currentRecord.getSeqId()) < 0) { - // pileup position is in lower chromosome than gffRecord - return false; - } else { - // pileup position is in higher chromosome than gffRecord - // advance iterator - return advanceGff3Record(chromosome, position, iter); - } - } - - private static boolean advanceGff3Record(String chromosome, int position, - Iterator iter) { - if ( ! iter.hasNext()) { - // no more entries in gffs - return false; - } else { - setGffRecord(iter.next()); - return isPositionInBait(chromosome, position, iter, getGffRecord()); - } - } - - private void addWiggleData(String paramString, StringBuilder sb) { - int firstTabIndex = paramString.indexOf('\t'); - String chromosome = paramString.substring(0, firstTabIndex); - int position = Integer.parseInt(paramString.substring(firstTabIndex+1, paramString.indexOf('\t', firstTabIndex+1))); - - if ( ! isPositionInBait(chromosome, position, iter, getGffRecord())) return; -// if ( ! isPositionInBait(chromosome, position)) return; - - if (position != lastPosition +1 || ! currentChromosome.equalsIgnoreCase(chromosome)) { - // add new header to the StringBuilder - String wiggleHeader = "fixedStep chrom=" + chromosome + " start=" + position + " step=1\n"; - sb.append(wiggleHeader); - - // update last position and current chromosome - currentChromosome = chromosome; - } - lastPosition = position; - String [] params = tabbedPattern.split(paramString, -1); - - if (PileupUtils.getCoverageCount(params, normalStartPositions) < normalCoverage) { - sb.append("0\n"); - ++covBad; - } else { - if (PileupUtils.getCoverageCount(params, tumourStartPositions) >= tumourCoverage) { - sb.append("1\n"); - ++covGood; - } else { - sb.append("0\n"); - ++covBad; - } - } - } - - public static void main(String[] args) throws Exception { - WiggleFromPileup sp = new WiggleFromPileup(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(WiggleFromPileup.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("WiggleFromPileup", WiggleFromPileup.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - // get app specific options - pileupFormat = options.getPileupFormat(); - normalCoverage = options.getNormalCoverage(); - tumourCoverage = options.getTumourCoverage(); - compressOutput = FileUtils.isFileNameGZip(new File(cmdLineOutputFiles[0])); - - if (null == pileupFormat) throw new IllegalArgumentException("Please specify a pileupFormat relating to the pileup file (eg. NNTT)"); - if (normalCoverage == 0) throw new IllegalArgumentException("Please specify a normal coverage value (eg. 20)"); - if (tumourCoverage == 0) throw new IllegalArgumentException("Please specify a tumour coverage value (eg. 20)"); - - logger.tool("about to run with pileupFormat: " + pileupFormat + ", normal cov: " + normalCoverage + ", tumour cov: " + tumourCoverage + ", compressOutput: " + compressOutput); - - return engage(); - } - return returnStatus; - } - - protected static void setGffRecord(GFF3Record gffRecord) { - WiggleFromPileup.gffRecord = gffRecord; - } - - protected static GFF3Record getGffRecord() { - return gffRecord; - } -} diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java b/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java deleted file mode 100644 index 36c6a7a8f..000000000 --- a/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java +++ /dev/null @@ -1,307 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; -import java.util.PriorityQueue; -import java.util.zip.GZIPOutputStream; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.PositionRange; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.common.util.PileupUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.qmule.gff3.GFF3FileReader; -import org.qcmg.qmule.gff3.GFF3Record; -import org.qcmg.pileup.PileupFileReader; - -public class WiggleFromPileupTakeTwo { - - private static QLogger logger; -// private final static Pattern tabbedPattern = Pattern.compile("[\\t]"); -// private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); - - private boolean compressOutput; - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - private String pileupFormat; - private int normalCoverage, tumourCoverage; - private int noOfNormalFiles, noOfTumourFiles; - private long covGood, covBad, totalCov; - private int[] normalStartPositions, tumourStartPositions; - private String currentChromosome; - private String[] gffRegions; - private PriorityQueue currentQueue; - private PositionRange currentRange; - private int lastPosition; - private final Map> regionsOfInterest = new HashMap>(); - - - public int engage() throws Exception { - // setup - initialise(); - - loadGffFile(); - - logger.info("no of entries in regionsOfInterest: " + regionsOfInterest.size()); - - long baseCount = 0; - for (PriorityQueue ranges : regionsOfInterest.values()) { - for (PositionRange pr : ranges) { - baseCount += (pr.getEnd() - pr.getStart()); - } - } - logger.info("total no of bases covered by gff regions of interest: " + baseCount); - - - if (regionsOfInterest.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); - - // parse pileup file - parsePileup(); - - logger.info("bases with enough coverage: " + covGood + ", those with not enough coverage: " + covBad + ", total: " + totalCov); - - return exitStatus; - } - - private void loadGffFile() throws Exception { - GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); - try { - int totalNoOfbaits = 0, ignoredBaits = 0; - for (GFF3Record record : reader) { - totalNoOfbaits++; - if (isGff3RecordCorrectType(record.getType())) { - populateRegionsOfInterest(record); - } else ignoredBaits++; - } - - logger.info("loaded gff3 file, total no of baits: " + totalNoOfbaits + ", entries in collection: " + (totalNoOfbaits - ignoredBaits) + ", entries that didn't make it: " + ignoredBaits); - } finally { - reader.close(); - } - } - - private void populateRegionsOfInterest(GFF3Record record) { - // get collection corresponding to chromosome - PriorityQueue ranges = regionsOfInterest.get(record.getSeqId()); - if (null == ranges) { - ranges = new PriorityQueue(); - ranges.add(new PositionRange(record.getStart(), record.getEnd())); - regionsOfInterest.put(record.getSeqId(), ranges); - } else { - // loop through PositionRanges and see if any are adjacent - // not very efficient, but will do for now - boolean rangeExtended = false; - for (PositionRange pr : ranges) { - if (pr.isAdjacentToEnd(record.getStart())) { - pr.extendRange(record.getEnd()); - rangeExtended = true; - break; - } - } - if ( ! rangeExtended) { - // add new PositionRange - ranges.add(new PositionRange(record.getStart(), record.getEnd())); - } - } - } - - protected boolean isGff3RecordCorrectType(String type) { - for (String regionName : gffRegions) { - if (type.equals(regionName)) return true; - } - return false; - } - - private void initialise() { - noOfNormalFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'N'); - noOfTumourFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'T'); - normalStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, true); - tumourStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, false); - } - - private void parsePileup() throws Exception { - Writer writer = getWriter(cmdLineOutputFiles[0]); - - PileupFileReader reader = new PileupFileReader(new File(cmdLineInputFiles[0])); - StringBuilder sb = new StringBuilder(); - try { - for (String pr : reader) { -// for (PileupRecord pr : reader) { - addWiggleData(pr, sb); -// addWiggleData(tabbedPattern.split(pr.getPileup(), -1), sb); - if (++totalCov % 100000 == 0 && sb.length() > 0) { - writer.write(sb.toString()); - sb = new StringBuilder(); - - if (totalCov % 10000000 == 0) - logger.info("hit " + totalCov + " pileup records"); - } - } - - // empty contents of StringBuilder to writer - if (sb.length() > 0) writer.write(sb.toString()); - - } finally { - writer.close(); - reader.close(); - } - } - - private Writer getWriter(String fileName) throws IOException { - Writer writer = null; - if (compressOutput) { - writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(fileName))); - } else { - writer = new FileWriter(new File(fileName)); - } - return writer; - } - - protected boolean isPositionInRegionOfInterest(int position, PriorityQueue ranges) { - if (null == currentRange) return false; - - if (position < currentRange.getStart()) { - return false; - } else if (position <= currentRange.getEnd()) { - return true; - } else { - // advance queue - currentRange = ranges.poll(); - return isPositionInRegionOfInterest(position, ranges); - } - } - - private void addWiggleData(String paramString, StringBuilder sb) { - int firstTabIndex = paramString.indexOf('\t'); - String chromosome = paramString.substring(0, firstTabIndex); - int position = Integer.parseInt(paramString.substring(firstTabIndex+1, paramString.indexOf('\t', firstTabIndex+1))); - boolean chromosomeUpdated = false; - if ( ! chromosome.equalsIgnoreCase(currentChromosome)) { - // update last position and current chromosome - currentChromosome = chromosome; - chromosomeUpdated = true; - currentQueue = regionsOfInterest.get(chromosome); - if (null == currentQueue) { - logger.warn("no ranges found for chr: " + chromosome); - currentRange = null; - } else { - currentRange = currentQueue.poll(); - } - } - - if ( ! isPositionInRegionOfInterest(position, currentQueue)) return; - - if (position != lastPosition +1 || chromosomeUpdated) { - String wiggleHeader = "fixedStep chrom=" + chromosome + " start=" + position + " step=1\n"; - sb.append(wiggleHeader); - } - lastPosition = position; - String [] params = TabTokenizer.tokenize(paramString); -// String [] params = tabbedPattern.split(paramString, -1); - - if (PileupUtils.getCoverageCount(params, normalStartPositions) < normalCoverage) { - sb.append("0\n"); - ++covBad; - } else { - if (PileupUtils.getCoverageCount(params, tumourStartPositions) >= tumourCoverage) { - sb.append("1\n"); - ++covGood; - } else { - sb.append("0\n"); - ++covBad; - } - } - } - - public static void main(String[] args) throws Exception { - LoadReferencedClasses.loadClasses(WiggleFromPileupTakeTwo.class); - WiggleFromPileupTakeTwo sp = new WiggleFromPileupTakeTwo(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(WiggleFromPileupTakeTwo.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("WiggleFromPileup", WiggleFromPileupTakeTwo.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - // get app specific options - pileupFormat = options.getPileupFormat(); - normalCoverage = options.getNormalCoverage(); - tumourCoverage = options.getTumourCoverage(); - compressOutput = FileUtils.isFileNameGZip(new File(cmdLineOutputFiles[0])); - gffRegions = options.getGffRegions(); - - - if (null == pileupFormat) throw new IllegalArgumentException("Please specify a pileupFormat relating to the pileup file (eg. NNTT)"); - if (normalCoverage == 0) throw new IllegalArgumentException("Please specify a normal coverage value (eg. 20)"); - if (tumourCoverage == 0) throw new IllegalArgumentException("Please specify a tumour coverage value (eg. 20)"); - if (gffRegions.length == 0) throw new IllegalArgumentException("Please specify the region names within the gff3 file you are interested in"); - - logger.tool("about to run with pileupFormat: " + pileupFormat + ", normal cov: " + normalCoverage + ", tumour cov: " + tumourCoverage + ", compressOutput: " + compressOutput + ", gff regions: " + Arrays.deepToString(gffRegions)); - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- b/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- deleted file mode 100644 index 36c6a7a8f..000000000 --- a/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- +++ /dev/null @@ -1,307 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; -import java.util.PriorityQueue; -import java.util.zip.GZIPOutputStream; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.PositionRange; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.common.util.PileupUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.qmule.gff3.GFF3FileReader; -import org.qcmg.qmule.gff3.GFF3Record; -import org.qcmg.pileup.PileupFileReader; - -public class WiggleFromPileupTakeTwo { - - private static QLogger logger; -// private final static Pattern tabbedPattern = Pattern.compile("[\\t]"); -// private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); - - private boolean compressOutput; - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - private String pileupFormat; - private int normalCoverage, tumourCoverage; - private int noOfNormalFiles, noOfTumourFiles; - private long covGood, covBad, totalCov; - private int[] normalStartPositions, tumourStartPositions; - private String currentChromosome; - private String[] gffRegions; - private PriorityQueue currentQueue; - private PositionRange currentRange; - private int lastPosition; - private final Map> regionsOfInterest = new HashMap>(); - - - public int engage() throws Exception { - // setup - initialise(); - - loadGffFile(); - - logger.info("no of entries in regionsOfInterest: " + regionsOfInterest.size()); - - long baseCount = 0; - for (PriorityQueue ranges : regionsOfInterest.values()) { - for (PositionRange pr : ranges) { - baseCount += (pr.getEnd() - pr.getStart()); - } - } - logger.info("total no of bases covered by gff regions of interest: " + baseCount); - - - if (regionsOfInterest.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); - - // parse pileup file - parsePileup(); - - logger.info("bases with enough coverage: " + covGood + ", those with not enough coverage: " + covBad + ", total: " + totalCov); - - return exitStatus; - } - - private void loadGffFile() throws Exception { - GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); - try { - int totalNoOfbaits = 0, ignoredBaits = 0; - for (GFF3Record record : reader) { - totalNoOfbaits++; - if (isGff3RecordCorrectType(record.getType())) { - populateRegionsOfInterest(record); - } else ignoredBaits++; - } - - logger.info("loaded gff3 file, total no of baits: " + totalNoOfbaits + ", entries in collection: " + (totalNoOfbaits - ignoredBaits) + ", entries that didn't make it: " + ignoredBaits); - } finally { - reader.close(); - } - } - - private void populateRegionsOfInterest(GFF3Record record) { - // get collection corresponding to chromosome - PriorityQueue ranges = regionsOfInterest.get(record.getSeqId()); - if (null == ranges) { - ranges = new PriorityQueue(); - ranges.add(new PositionRange(record.getStart(), record.getEnd())); - regionsOfInterest.put(record.getSeqId(), ranges); - } else { - // loop through PositionRanges and see if any are adjacent - // not very efficient, but will do for now - boolean rangeExtended = false; - for (PositionRange pr : ranges) { - if (pr.isAdjacentToEnd(record.getStart())) { - pr.extendRange(record.getEnd()); - rangeExtended = true; - break; - } - } - if ( ! rangeExtended) { - // add new PositionRange - ranges.add(new PositionRange(record.getStart(), record.getEnd())); - } - } - } - - protected boolean isGff3RecordCorrectType(String type) { - for (String regionName : gffRegions) { - if (type.equals(regionName)) return true; - } - return false; - } - - private void initialise() { - noOfNormalFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'N'); - noOfTumourFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'T'); - normalStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, true); - tumourStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, false); - } - - private void parsePileup() throws Exception { - Writer writer = getWriter(cmdLineOutputFiles[0]); - - PileupFileReader reader = new PileupFileReader(new File(cmdLineInputFiles[0])); - StringBuilder sb = new StringBuilder(); - try { - for (String pr : reader) { -// for (PileupRecord pr : reader) { - addWiggleData(pr, sb); -// addWiggleData(tabbedPattern.split(pr.getPileup(), -1), sb); - if (++totalCov % 100000 == 0 && sb.length() > 0) { - writer.write(sb.toString()); - sb = new StringBuilder(); - - if (totalCov % 10000000 == 0) - logger.info("hit " + totalCov + " pileup records"); - } - } - - // empty contents of StringBuilder to writer - if (sb.length() > 0) writer.write(sb.toString()); - - } finally { - writer.close(); - reader.close(); - } - } - - private Writer getWriter(String fileName) throws IOException { - Writer writer = null; - if (compressOutput) { - writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(fileName))); - } else { - writer = new FileWriter(new File(fileName)); - } - return writer; - } - - protected boolean isPositionInRegionOfInterest(int position, PriorityQueue ranges) { - if (null == currentRange) return false; - - if (position < currentRange.getStart()) { - return false; - } else if (position <= currentRange.getEnd()) { - return true; - } else { - // advance queue - currentRange = ranges.poll(); - return isPositionInRegionOfInterest(position, ranges); - } - } - - private void addWiggleData(String paramString, StringBuilder sb) { - int firstTabIndex = paramString.indexOf('\t'); - String chromosome = paramString.substring(0, firstTabIndex); - int position = Integer.parseInt(paramString.substring(firstTabIndex+1, paramString.indexOf('\t', firstTabIndex+1))); - boolean chromosomeUpdated = false; - if ( ! chromosome.equalsIgnoreCase(currentChromosome)) { - // update last position and current chromosome - currentChromosome = chromosome; - chromosomeUpdated = true; - currentQueue = regionsOfInterest.get(chromosome); - if (null == currentQueue) { - logger.warn("no ranges found for chr: " + chromosome); - currentRange = null; - } else { - currentRange = currentQueue.poll(); - } - } - - if ( ! isPositionInRegionOfInterest(position, currentQueue)) return; - - if (position != lastPosition +1 || chromosomeUpdated) { - String wiggleHeader = "fixedStep chrom=" + chromosome + " start=" + position + " step=1\n"; - sb.append(wiggleHeader); - } - lastPosition = position; - String [] params = TabTokenizer.tokenize(paramString); -// String [] params = tabbedPattern.split(paramString, -1); - - if (PileupUtils.getCoverageCount(params, normalStartPositions) < normalCoverage) { - sb.append("0\n"); - ++covBad; - } else { - if (PileupUtils.getCoverageCount(params, tumourStartPositions) >= tumourCoverage) { - sb.append("1\n"); - ++covGood; - } else { - sb.append("0\n"); - ++covBad; - } - } - } - - public static void main(String[] args) throws Exception { - LoadReferencedClasses.loadClasses(WiggleFromPileupTakeTwo.class); - WiggleFromPileupTakeTwo sp = new WiggleFromPileupTakeTwo(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(WiggleFromPileupTakeTwo.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("WiggleFromPileup", WiggleFromPileupTakeTwo.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - // get app specific options - pileupFormat = options.getPileupFormat(); - normalCoverage = options.getNormalCoverage(); - tumourCoverage = options.getTumourCoverage(); - compressOutput = FileUtils.isFileNameGZip(new File(cmdLineOutputFiles[0])); - gffRegions = options.getGffRegions(); - - - if (null == pileupFormat) throw new IllegalArgumentException("Please specify a pileupFormat relating to the pileup file (eg. NNTT)"); - if (normalCoverage == 0) throw new IllegalArgumentException("Please specify a normal coverage value (eg. 20)"); - if (tumourCoverage == 0) throw new IllegalArgumentException("Please specify a tumour coverage value (eg. 20)"); - if (gffRegions.length == 0) throw new IllegalArgumentException("Please specify the region names within the gff3 file you are interested in"); - - logger.tool("about to run with pileupFormat: " + pileupFormat + ", normal cov: " + normalCoverage + ", tumour cov: " + tumourCoverage + ", compressOutput: " + compressOutput + ", gff regions: " + Arrays.deepToString(gffRegions)); - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/XCvsZP.java b/qmule/src/org/qcmg/qmule/XCvsZP.java deleted file mode 100644 index e7973ade9..000000000 --- a/qmule/src/org/qcmg/qmule/XCvsZP.java +++ /dev/null @@ -1,117 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; -import java.lang.Math; - -import org.qcmg.picard.SAMFileReaderFactory; - -import htsjdk.samtools.BAMIndex; -import htsjdk.samtools.BAMIndexMetaData; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; - -public class XCvsZP { - - - XCvsZP(File input) throws Exception{ - SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); //new SAMFileReader(input); - - HashMap matric = countToMatric( reader ); - - ArrayList keys = getKeys(matric ); - printMatric(matric, keys); - - reader.close(); - - } - - ArrayList getKeys( HashMap matric ){ - Set myset = new HashSet(); - - Iterator itr = matric.keySet().iterator(); - while( itr.hasNext()){ - String key = itr.next().toString(); - String[] zpxc = key.split("_"); - myset.add(zpxc[0]); - myset.add(zpxc[1]); - } - ArrayList mylist = new ArrayList(myset); - Collections.sort(mylist); - - - return mylist; - } - - - void printMatric( HashMap matric, ArrayList keys ){ - System.out.print("\t\tZP \t(reads_Number/total_number)\n"); - System.out.print("-------------------------------------------------------------------------------------------------------------------------------------------------\n XC\t|" ); - for(int i = 0; i < keys.size(); i ++) - System.out.print( "\t " + keys.get(i) + " "); - - for(int i = 0; i < keys.size(); i ++){ - System.out.print( "\n\t|" + keys.get(i) + "|\t"); - for(int j = 0; j < keys.size(); j ++){ - String xc_zp = keys.get(i) + "_" + keys.get(j); - if(matric.containsKey(xc_zp)) - System.out.print(String.format("%.4f\t", matric.get(xc_zp)) ); - else - System.out.print("-----\t"); - } - } - } - - - HashMap countToMatric( SamReader reader) throws Exception{ - - HashMap matric = new HashMap(); - HashMap rateMatric = new HashMap(); - - long numRead = 0; - for( SAMRecord record : reader){ - String xc = record.getAttribute("XC").toString(); - String zp = record.getAttribute("ZP").toString(); - String key = xc + "_" + zp; - - long value = 1; - if( matric.containsKey(key)) - value = matric.get(key) + 1; - - matric.put(key, value); - numRead ++; - } - - System.out.println("Total number of reads is " + numRead + "\n"); - - //convert to float with %.4f formart - for(Map.Entry set: matric.entrySet()){ - String key = set.getKey(); - int value = Math.round((set.getValue() * 10000 )/ numRead ); - rateMatric.put(key, ((float) value/10000 )); - } - - return rateMatric; - } - - - - public static void main(String[] args) throws Exception{ - - XCvsZP vs = new XCvsZP(new File(args[0]) ); - - } -} diff --git a/qmule/src/org/qcmg/qmule/XCvsZP.java-- b/qmule/src/org/qcmg/qmule/XCvsZP.java-- deleted file mode 100644 index e7973ade9..000000000 --- a/qmule/src/org/qcmg/qmule/XCvsZP.java-- +++ /dev/null @@ -1,117 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; -import java.lang.Math; - -import org.qcmg.picard.SAMFileReaderFactory; - -import htsjdk.samtools.BAMIndex; -import htsjdk.samtools.BAMIndexMetaData; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; - -public class XCvsZP { - - - XCvsZP(File input) throws Exception{ - SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); //new SAMFileReader(input); - - HashMap matric = countToMatric( reader ); - - ArrayList keys = getKeys(matric ); - printMatric(matric, keys); - - reader.close(); - - } - - ArrayList getKeys( HashMap matric ){ - Set myset = new HashSet(); - - Iterator itr = matric.keySet().iterator(); - while( itr.hasNext()){ - String key = itr.next().toString(); - String[] zpxc = key.split("_"); - myset.add(zpxc[0]); - myset.add(zpxc[1]); - } - ArrayList mylist = new ArrayList(myset); - Collections.sort(mylist); - - - return mylist; - } - - - void printMatric( HashMap matric, ArrayList keys ){ - System.out.print("\t\tZP \t(reads_Number/total_number)\n"); - System.out.print("-------------------------------------------------------------------------------------------------------------------------------------------------\n XC\t|" ); - for(int i = 0; i < keys.size(); i ++) - System.out.print( "\t " + keys.get(i) + " "); - - for(int i = 0; i < keys.size(); i ++){ - System.out.print( "\n\t|" + keys.get(i) + "|\t"); - for(int j = 0; j < keys.size(); j ++){ - String xc_zp = keys.get(i) + "_" + keys.get(j); - if(matric.containsKey(xc_zp)) - System.out.print(String.format("%.4f\t", matric.get(xc_zp)) ); - else - System.out.print("-----\t"); - } - } - } - - - HashMap countToMatric( SamReader reader) throws Exception{ - - HashMap matric = new HashMap(); - HashMap rateMatric = new HashMap(); - - long numRead = 0; - for( SAMRecord record : reader){ - String xc = record.getAttribute("XC").toString(); - String zp = record.getAttribute("ZP").toString(); - String key = xc + "_" + zp; - - long value = 1; - if( matric.containsKey(key)) - value = matric.get(key) + 1; - - matric.put(key, value); - numRead ++; - } - - System.out.println("Total number of reads is " + numRead + "\n"); - - //convert to float with %.4f formart - for(Map.Entry set: matric.entrySet()){ - String key = set.getKey(); - int value = Math.round((set.getValue() * 10000 )/ numRead ); - rateMatric.put(key, ((float) value/10000 )); - } - - return rateMatric; - } - - - - public static void main(String[] args) throws Exception{ - - XCvsZP vs = new XCvsZP(new File(args[0]) ); - - } -} diff --git a/qmule/src/org/qcmg/qmule/bam/CheckBam.java b/qmule/src/org/qcmg/qmule/bam/CheckBam.java deleted file mode 100644 index 3154595c1..000000000 --- a/qmule/src/org/qcmg/qmule/bam/CheckBam.java +++ /dev/null @@ -1,339 +0,0 @@ -/** - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. -*/ -package org.qcmg.qmule.bam; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordIterator; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.SamReader; - -import java.io.File; -import java.util.AbstractQueue; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicLongArray; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.qmule.GetBamRecords; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; - - -public class CheckBam { - - private final static String UNMAPPED_READS = "Unmapped"; - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private File bamFIle; - private int numberOfThreads = 1; - - - private static final int READ_PAIRED_FLAG = 0x1; - private static final int PROPER_PAIR_FLAG = 0x2; - private static final int READ_UNMAPPED_FLAG = 0x4; - private static final int MATE_UNMAPPED_FLAG = 0x8; - private static final int READ_STRAND_FLAG = 0x10; - private static final int MATE_STRAND_FLAG = 0x20; - private static final int FIRST_OF_PAIR_FLAG = 0x40; - private static final int SECOND_OF_PAIR_FLAG = 0x80; - private static final int NOT_PRIMARY_ALIGNMENT_FLAG = 0x100; - private static final int READ_FAILS_VENDOR_QUALITY_CHECK_FLAG = 0x200; - private static final int DUPLICATE_READ_FLAG = 0x400; - private static final int SUPPLEMENTARY_ALIGNMENT_FLAG = 0x800; - - - private int exitStatus; - private static QLogger logger; - - private final AtomicLong counter = new AtomicLong(); - -// long [] flagCounter = new long[5000]; - AtomicLongArray flags = new AtomicLongArray(5000); - - - public int engage() throws Exception { - - logger.info("Get reference contigs from bam header"); - bamFIle = new File(cmdLineInputFiles[0]); - - final AbstractQueue sequences = new ConcurrentLinkedQueue(); - - try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { - if ( ! reader.hasIndex() && numberOfThreads > 1) { - logger.warn("Using 1 producer thread - no index found for bam file: " + bamFIle.getAbsolutePath()); - numberOfThreads = 1; - } - - SAMFileHeader header = reader.getFileHeader(); - List samSequences = header.getSequenceDictionary().getSequences(); - List orderedSamSequences = new ArrayList(); - orderedSamSequences.addAll(samSequences); - Collections.sort(orderedSamSequences, new Comparator(){ - @Override - public int compare(SAMSequenceRecord o1, SAMSequenceRecord o2) { - return o2.getSequenceLength() - o1.getSequenceLength(); - } - }); - // add the unmapped reads marker - sequences.add(UNMAPPED_READS); - - for (SAMSequenceRecord rec : orderedSamSequences) { - sequences.add(rec.getSequenceName()); - } - } - - - logger.info("will create " + numberOfThreads + " threads"); - - final CountDownLatch pLatch = new CountDownLatch(numberOfThreads); -// setpup and kick-off single Producer thread - ExecutorService producerThreads = Executors.newFixedThreadPool(numberOfThreads); - if (1 == numberOfThreads) { - producerThreads.execute(new SingleProducer(Thread.currentThread(), pLatch)); - } else { - for (int i = 0 ; i < numberOfThreads ; i++) { - producerThreads.execute(new Producer(Thread.currentThread(), pLatch, sequences)); - } - } - - // don't allow any new threads to start - producerThreads.shutdown(); - - logger.info("waiting for Producer thread to finish"); - pLatch.await(); - logger.info("Producer thread finished, counter size: " + counter.longValue()); - // output flag stats too - long dups = 0; - long sups = 0; - long mapped = 0; - long paired = 0; - long properPair = 0; - long r1 = 0; - long r2 = 0; - for (int i = 0 ; i < flags.length() ; i++) { - long l = flags.get(i); - if (l > 0) { - - if ((i & READ_PAIRED_FLAG) != 0) { - paired += l; - } - if ((i & PROPER_PAIR_FLAG) != 0) { - properPair += l; - } - if ((i & READ_UNMAPPED_FLAG) == 0) { - mapped += l; - } - if ((i & FIRST_OF_PAIR_FLAG) != 0) { - r1 += l; - } - if ((i & SECOND_OF_PAIR_FLAG) != 0) { - r2 += l; - } - if ((i & DUPLICATE_READ_FLAG) != 0) { - dups += l; - } - if ((i & SUPPLEMENTARY_ALIGNMENT_FLAG) != 0) { - sups += l; - } - logger.info("flag: " + i + " : " + l + " hits"); - } - } - logger.info("total read count: " + counter.longValue()); - logger.info("dups: " + dups + " (" + (((double) dups / counter.longValue()) * 100) + "%)"); - logger.info("sups: " + sups + " (" + (((double) sups / counter.longValue()) * 100) + "%)"); - logger.info("mapped: " + mapped + " (" + (((double) mapped / counter.longValue()) * 100) + "%)"); - logger.info("paired: " + paired + " (" + (((double) paired / counter.longValue()) * 100) + "%)"); - logger.info("properPair: " + properPair + " (" + (((double)properPair / counter.longValue()) * 100) + "%)"); - logger.info("r1: " + r1 + " (" + (((double) r1 / counter.longValue()) * 100) + "%)"); - logger.info("r2: " + r2 + " (" + (((double) r2 / counter.longValue()) * 100) + "%)"); - - return exitStatus; - } - - - - public class Producer implements Runnable { - private final Thread mainThread; - private final CountDownLatch pLatch; - private final AbstractQueue sequences; - private final QLogger log = QLoggerFactory.getLogger(Producer.class); - - private final long [] flagCounter = new long[5000]; - - Producer(Thread mainThread, CountDownLatch pLatch, AbstractQueue sequences) { - this.mainThread = mainThread; - this.pLatch = pLatch; - this.sequences = sequences; - } - - @Override - public void run() { - log.debug("Start Producer "); - - long count = 0; - - try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { - - while (true) { - String sequence = sequences.poll(); - if (null == sequence) break; - SAMRecordIterator iter = UNMAPPED_READS.equals(sequence) ? reader.queryUnmapped() : reader.query(sequence, 0, 0, false) ; - log.info("retrieving records for sequence: " + sequence); - while (iter.hasNext()) { - int flag = iter.next().getFlags(); - flagCounter[flag] ++ ; - // update count for this flag - if (++count % 2000000 == 0) { - log.info("added " + count/1000000 + "M"); - } - } - iter.close(); - } - - } catch (Exception e) { - log.error(Thread.currentThread().getName() + " " + e.getMessage(), e); - mainThread.interrupt(); - } finally { - pLatch.countDown(); - } - // update the shared counter - counter.addAndGet(count); - //update the flag Counter - int i = 0 ; - for (long l : flagCounter) { - if (l > 0) { - flags.addAndGet(i, l); - } - i++; - } - } - } - - public class SingleProducer implements Runnable { - private final Thread mainThread; - private final QLogger log = QLoggerFactory.getLogger(SingleProducer.class); - private final CountDownLatch pLatch; - private final long [] flagCounter = new long[5000]; - - SingleProducer(Thread mainThread, CountDownLatch pLatch) { - this.mainThread = mainThread; - this.pLatch = pLatch; - } - - @Override - public void run() { - log.debug("Start SingleProducer "); - - long count = 0; - - try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { - - for (SAMRecord r : reader) { - int flag = r.getFlags(); - flagCounter[flag] ++ ; - if (++count % 2000000 == 0) { - log.info("added " + count/1000000 + "M"); - } - } - - } catch (Exception e) { - log.error(Thread.currentThread().getName() + " " + e.getMessage(), e); - mainThread.interrupt(); - } finally { - pLatch.countDown(); - } - // update the shared counter - counter.addAndGet(count); - //update the flag Counter - int i = 0 ; - for (long l : flagCounter) { - if (l > 0) { - flags.addAndGet(i, l); - } - i++; - } - } - } - - public static void main(String[] args) throws Exception { - CheckBam sp = new CheckBam(); - int exitStatus = sp.setup(args); - if (null != logger) { - logger.logFinalExecutionStats(exitStatus); - } - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(GetBamRecords.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CheckBam", CheckBam.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - int nt = options.getNumberOfThreads(); - if (nt > 0) { - numberOfThreads = nt; - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/bam/CheckBam.java-- b/qmule/src/org/qcmg/qmule/bam/CheckBam.java-- deleted file mode 100644 index 3154595c1..000000000 --- a/qmule/src/org/qcmg/qmule/bam/CheckBam.java-- +++ /dev/null @@ -1,339 +0,0 @@ -/** - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. -*/ -package org.qcmg.qmule.bam; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordIterator; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.SamReader; - -import java.io.File; -import java.util.AbstractQueue; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicLongArray; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.qmule.GetBamRecords; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; - - -public class CheckBam { - - private final static String UNMAPPED_READS = "Unmapped"; - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private File bamFIle; - private int numberOfThreads = 1; - - - private static final int READ_PAIRED_FLAG = 0x1; - private static final int PROPER_PAIR_FLAG = 0x2; - private static final int READ_UNMAPPED_FLAG = 0x4; - private static final int MATE_UNMAPPED_FLAG = 0x8; - private static final int READ_STRAND_FLAG = 0x10; - private static final int MATE_STRAND_FLAG = 0x20; - private static final int FIRST_OF_PAIR_FLAG = 0x40; - private static final int SECOND_OF_PAIR_FLAG = 0x80; - private static final int NOT_PRIMARY_ALIGNMENT_FLAG = 0x100; - private static final int READ_FAILS_VENDOR_QUALITY_CHECK_FLAG = 0x200; - private static final int DUPLICATE_READ_FLAG = 0x400; - private static final int SUPPLEMENTARY_ALIGNMENT_FLAG = 0x800; - - - private int exitStatus; - private static QLogger logger; - - private final AtomicLong counter = new AtomicLong(); - -// long [] flagCounter = new long[5000]; - AtomicLongArray flags = new AtomicLongArray(5000); - - - public int engage() throws Exception { - - logger.info("Get reference contigs from bam header"); - bamFIle = new File(cmdLineInputFiles[0]); - - final AbstractQueue sequences = new ConcurrentLinkedQueue(); - - try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { - if ( ! reader.hasIndex() && numberOfThreads > 1) { - logger.warn("Using 1 producer thread - no index found for bam file: " + bamFIle.getAbsolutePath()); - numberOfThreads = 1; - } - - SAMFileHeader header = reader.getFileHeader(); - List samSequences = header.getSequenceDictionary().getSequences(); - List orderedSamSequences = new ArrayList(); - orderedSamSequences.addAll(samSequences); - Collections.sort(orderedSamSequences, new Comparator(){ - @Override - public int compare(SAMSequenceRecord o1, SAMSequenceRecord o2) { - return o2.getSequenceLength() - o1.getSequenceLength(); - } - }); - // add the unmapped reads marker - sequences.add(UNMAPPED_READS); - - for (SAMSequenceRecord rec : orderedSamSequences) { - sequences.add(rec.getSequenceName()); - } - } - - - logger.info("will create " + numberOfThreads + " threads"); - - final CountDownLatch pLatch = new CountDownLatch(numberOfThreads); -// setpup and kick-off single Producer thread - ExecutorService producerThreads = Executors.newFixedThreadPool(numberOfThreads); - if (1 == numberOfThreads) { - producerThreads.execute(new SingleProducer(Thread.currentThread(), pLatch)); - } else { - for (int i = 0 ; i < numberOfThreads ; i++) { - producerThreads.execute(new Producer(Thread.currentThread(), pLatch, sequences)); - } - } - - // don't allow any new threads to start - producerThreads.shutdown(); - - logger.info("waiting for Producer thread to finish"); - pLatch.await(); - logger.info("Producer thread finished, counter size: " + counter.longValue()); - // output flag stats too - long dups = 0; - long sups = 0; - long mapped = 0; - long paired = 0; - long properPair = 0; - long r1 = 0; - long r2 = 0; - for (int i = 0 ; i < flags.length() ; i++) { - long l = flags.get(i); - if (l > 0) { - - if ((i & READ_PAIRED_FLAG) != 0) { - paired += l; - } - if ((i & PROPER_PAIR_FLAG) != 0) { - properPair += l; - } - if ((i & READ_UNMAPPED_FLAG) == 0) { - mapped += l; - } - if ((i & FIRST_OF_PAIR_FLAG) != 0) { - r1 += l; - } - if ((i & SECOND_OF_PAIR_FLAG) != 0) { - r2 += l; - } - if ((i & DUPLICATE_READ_FLAG) != 0) { - dups += l; - } - if ((i & SUPPLEMENTARY_ALIGNMENT_FLAG) != 0) { - sups += l; - } - logger.info("flag: " + i + " : " + l + " hits"); - } - } - logger.info("total read count: " + counter.longValue()); - logger.info("dups: " + dups + " (" + (((double) dups / counter.longValue()) * 100) + "%)"); - logger.info("sups: " + sups + " (" + (((double) sups / counter.longValue()) * 100) + "%)"); - logger.info("mapped: " + mapped + " (" + (((double) mapped / counter.longValue()) * 100) + "%)"); - logger.info("paired: " + paired + " (" + (((double) paired / counter.longValue()) * 100) + "%)"); - logger.info("properPair: " + properPair + " (" + (((double)properPair / counter.longValue()) * 100) + "%)"); - logger.info("r1: " + r1 + " (" + (((double) r1 / counter.longValue()) * 100) + "%)"); - logger.info("r2: " + r2 + " (" + (((double) r2 / counter.longValue()) * 100) + "%)"); - - return exitStatus; - } - - - - public class Producer implements Runnable { - private final Thread mainThread; - private final CountDownLatch pLatch; - private final AbstractQueue sequences; - private final QLogger log = QLoggerFactory.getLogger(Producer.class); - - private final long [] flagCounter = new long[5000]; - - Producer(Thread mainThread, CountDownLatch pLatch, AbstractQueue sequences) { - this.mainThread = mainThread; - this.pLatch = pLatch; - this.sequences = sequences; - } - - @Override - public void run() { - log.debug("Start Producer "); - - long count = 0; - - try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { - - while (true) { - String sequence = sequences.poll(); - if (null == sequence) break; - SAMRecordIterator iter = UNMAPPED_READS.equals(sequence) ? reader.queryUnmapped() : reader.query(sequence, 0, 0, false) ; - log.info("retrieving records for sequence: " + sequence); - while (iter.hasNext()) { - int flag = iter.next().getFlags(); - flagCounter[flag] ++ ; - // update count for this flag - if (++count % 2000000 == 0) { - log.info("added " + count/1000000 + "M"); - } - } - iter.close(); - } - - } catch (Exception e) { - log.error(Thread.currentThread().getName() + " " + e.getMessage(), e); - mainThread.interrupt(); - } finally { - pLatch.countDown(); - } - // update the shared counter - counter.addAndGet(count); - //update the flag Counter - int i = 0 ; - for (long l : flagCounter) { - if (l > 0) { - flags.addAndGet(i, l); - } - i++; - } - } - } - - public class SingleProducer implements Runnable { - private final Thread mainThread; - private final QLogger log = QLoggerFactory.getLogger(SingleProducer.class); - private final CountDownLatch pLatch; - private final long [] flagCounter = new long[5000]; - - SingleProducer(Thread mainThread, CountDownLatch pLatch) { - this.mainThread = mainThread; - this.pLatch = pLatch; - } - - @Override - public void run() { - log.debug("Start SingleProducer "); - - long count = 0; - - try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { - - for (SAMRecord r : reader) { - int flag = r.getFlags(); - flagCounter[flag] ++ ; - if (++count % 2000000 == 0) { - log.info("added " + count/1000000 + "M"); - } - } - - } catch (Exception e) { - log.error(Thread.currentThread().getName() + " " + e.getMessage(), e); - mainThread.interrupt(); - } finally { - pLatch.countDown(); - } - // update the shared counter - counter.addAndGet(count); - //update the flag Counter - int i = 0 ; - for (long l : flagCounter) { - if (l > 0) { - flags.addAndGet(i, l); - } - i++; - } - } - } - - public static void main(String[] args) throws Exception { - CheckBam sp = new CheckBam(); - int exitStatus = sp.setup(args); - if (null != logger) { - logger.logFinalExecutionStats(exitStatus); - } - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(GetBamRecords.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CheckBam", CheckBam.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - int nt = options.getNumberOfThreads(); - if (nt > 0) { - numberOfThreads = nt; - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java b/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java deleted file mode 100644 index b480f21f4..000000000 --- a/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java +++ /dev/null @@ -1,127 +0,0 @@ -package org.qcmg.qmule.bam; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.Writer; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Collectors; - -import org.qcmg.common.log.QLogger; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMSequenceDictionary; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; - -public class GetContigsFromHeader { - - private static QLogger logger; - - private int setup(String [] args) throws IOException { - /* - * first arg should be the header, - * second arg (if present) should be how many times the genome should be diviied up - */ - - SamReaderFactory factory = SamReaderFactory.make(); - SamReader reader = factory.open(new File(args[0])); - SAMFileHeader header = reader.getFileHeader(); - - SAMSequenceDictionary dict = header.getSequenceDictionary(); - Map map = dict.getSequences().stream().collect(Collectors.groupingBy(SAMSequenceRecord::getSequenceName, Collectors.summingInt(SAMSequenceRecord::getSequenceLength))); - - - - if (args.length > 1 && null != args[1]) { - int numberOfContigs = map.keySet().size(); - long length = map.values().stream().mapToLong(Integer::longValue).sum(); - int numberOfEntries = Integer.parseInt(args[1]) - 1; - - long noOFBasesPerEntry = length / numberOfEntries; - - System.out.println("genome length: " + length + ", numberOfEntries: " + numberOfEntries + ", noOFBasesPerEntry: " + noOFBasesPerEntry + ", numberOfContigs: " + numberOfContigs); - - - Map results = new HashMap<>(); - Set contigs = new HashSet<>(); - - List sortedContigs = map.entrySet().stream().sorted((e1, e2) -> e2.getValue() - e1.getValue()).map(e -> e.getKey()).collect(Collectors.toList()); - - - for (String contig : sortedContigs) { - System.out.println("looking at contig: " + contig); - Integer contigLength = map.get(contig); - if ( ! contigs.contains(contig)) { - if (contigLength >= noOFBasesPerEntry) { - results.put(contig, contigLength); - contigs.add(contig); - } else { - AtomicLong basesToMakeUp = new AtomicLong(noOFBasesPerEntry - contigLength); -// long basesToMakeUp = noOFBasesPerEntry - e.getValue(); - StringBuilder key = new StringBuilder(); - key.append(contig); - contigs.add(contig); - while (basesToMakeUp.longValue() > 1000000) { - Optional> e1 = map.entrySet().stream().filter(en -> ! contigs.contains(en.getKey())).filter(en -> en.getValue() < basesToMakeUp.longValue()).max((en1, en2) -> en2.getValue() - en1.getValue()); - if (e1.isPresent()) { - key.append(" -L "); - key.append(e1.get().getKey()); - basesToMakeUp.addAndGet( - e1.get().getValue()); - contigs.add(e1.get().getKey()); - } else { - break; - } - } - results.put(key.toString(), (int)noOFBasesPerEntry - basesToMakeUp.intValue()); - } - } - } - - results.forEach((k,v) -> System.out.println("contigs: " + k + ", size: " + v)); - System.out.println("contigs.size(): " + contigs.size()); - - /* - * write file - */ - if (args.length > 2 && null != args[2]) { - try (Writer writer = new FileWriter(args[2]);) { - - /* - * sort according to number of bases - */ - results.entrySet().stream().sorted((e1, e2) -> e2.getValue() - e1.getValue()).forEach(e -> { - try { - writer.write(e.getKey() + "\n"); - } catch (IOException e3) { - // TODO Auto-generated catch block - e3.printStackTrace(); - } - }); - } - } - } - - return 0; - } - - public static void main(String[] args) throws Exception { - GetContigsFromHeader sp = new GetContigsFromHeader(); - int exitStatus = sp.setup(args); - if (null != logger) { - logger.logFinalExecutionStats(exitStatus); - } - - System.exit(exitStatus); - } - -} - diff --git a/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- b/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- deleted file mode 100644 index b480f21f4..000000000 --- a/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- +++ /dev/null @@ -1,127 +0,0 @@ -package org.qcmg.qmule.bam; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.Writer; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Collectors; - -import org.qcmg.common.log.QLogger; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMSequenceDictionary; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; - -public class GetContigsFromHeader { - - private static QLogger logger; - - private int setup(String [] args) throws IOException { - /* - * first arg should be the header, - * second arg (if present) should be how many times the genome should be diviied up - */ - - SamReaderFactory factory = SamReaderFactory.make(); - SamReader reader = factory.open(new File(args[0])); - SAMFileHeader header = reader.getFileHeader(); - - SAMSequenceDictionary dict = header.getSequenceDictionary(); - Map map = dict.getSequences().stream().collect(Collectors.groupingBy(SAMSequenceRecord::getSequenceName, Collectors.summingInt(SAMSequenceRecord::getSequenceLength))); - - - - if (args.length > 1 && null != args[1]) { - int numberOfContigs = map.keySet().size(); - long length = map.values().stream().mapToLong(Integer::longValue).sum(); - int numberOfEntries = Integer.parseInt(args[1]) - 1; - - long noOFBasesPerEntry = length / numberOfEntries; - - System.out.println("genome length: " + length + ", numberOfEntries: " + numberOfEntries + ", noOFBasesPerEntry: " + noOFBasesPerEntry + ", numberOfContigs: " + numberOfContigs); - - - Map results = new HashMap<>(); - Set contigs = new HashSet<>(); - - List sortedContigs = map.entrySet().stream().sorted((e1, e2) -> e2.getValue() - e1.getValue()).map(e -> e.getKey()).collect(Collectors.toList()); - - - for (String contig : sortedContigs) { - System.out.println("looking at contig: " + contig); - Integer contigLength = map.get(contig); - if ( ! contigs.contains(contig)) { - if (contigLength >= noOFBasesPerEntry) { - results.put(contig, contigLength); - contigs.add(contig); - } else { - AtomicLong basesToMakeUp = new AtomicLong(noOFBasesPerEntry - contigLength); -// long basesToMakeUp = noOFBasesPerEntry - e.getValue(); - StringBuilder key = new StringBuilder(); - key.append(contig); - contigs.add(contig); - while (basesToMakeUp.longValue() > 1000000) { - Optional> e1 = map.entrySet().stream().filter(en -> ! contigs.contains(en.getKey())).filter(en -> en.getValue() < basesToMakeUp.longValue()).max((en1, en2) -> en2.getValue() - en1.getValue()); - if (e1.isPresent()) { - key.append(" -L "); - key.append(e1.get().getKey()); - basesToMakeUp.addAndGet( - e1.get().getValue()); - contigs.add(e1.get().getKey()); - } else { - break; - } - } - results.put(key.toString(), (int)noOFBasesPerEntry - basesToMakeUp.intValue()); - } - } - } - - results.forEach((k,v) -> System.out.println("contigs: " + k + ", size: " + v)); - System.out.println("contigs.size(): " + contigs.size()); - - /* - * write file - */ - if (args.length > 2 && null != args[2]) { - try (Writer writer = new FileWriter(args[2]);) { - - /* - * sort according to number of bases - */ - results.entrySet().stream().sorted((e1, e2) -> e2.getValue() - e1.getValue()).forEach(e -> { - try { - writer.write(e.getKey() + "\n"); - } catch (IOException e3) { - // TODO Auto-generated catch block - e3.printStackTrace(); - } - }); - } - } - } - - return 0; - } - - public static void main(String[] args) throws Exception { - GetContigsFromHeader sp = new GetContigsFromHeader(); - int exitStatus = sp.setup(args); - if (null != logger) { - logger.logFinalExecutionStats(exitStatus); - } - - System.exit(exitStatus); - } - -} - diff --git a/qmule/src/org/qcmg/qmule/messages.properties b/qmule/src/org/qcmg/qmule/messages.properties deleted file mode 100644 index 4ec3962f0..000000000 --- a/qmule/src/org/qcmg/qmule/messages.properties +++ /dev/null @@ -1,107 +0,0 @@ -#general usage message -USAGE = usage: qmule ToolName [-options] -HELP_OPTION_DESCRIPTION = Shows this help message. -VERSION_OPTION_DESCRIPTION = Print version info. -UNKNOWN_ERROR = An unknown error has occurred -ALL_ARGUMENTS_MUST_BE_OPTIONS = All arguments must be supplied as option values -LOG_OPTION_DESCRIPTION = specify the log file -LOGLEVEL_OPTION_DESCRIPTION = specify the log levle, eg: EXEC,TOOL,DEBUG,INFO. By default, it is INFO - -#IO warning message -INPUT_OPTION_DESCRIPTION = Specifies an input file. -INPUT_FILE_READ_ERROR = Cannot read input file {0} -INSUFFICIENT_INPUT_FILES = Insufficient input files -NONEXISTENT_INPUT_FILE = Nonexistent input file {0} -MISSING_INPUT_OPTIONS = You must specify at least one -i option -MISSING_OUTPUT_OPTIONS = You must specify an -o option -SAME_INPUT_FILE = Input file {0} supplied more than once -OUTPUT_OPTION_DESCRIPTION = Specifies the output file. -OUTPUT_FILE_WRITE_ERROR = Cannot write to output file {0} -CANNOT_CLOSE_FILES = Cannot close file(s) -SAME_FILES = {0} and {1} are the same file -FILE_USED_AS_INPUT_AND_OUTPUT = File {0} used both as input and output -TOO_MANY_OUTPUTFILE= Too many output files - -#for snppicker -MISSING_OUTPUT_MERGE_OPTIONS = You must specify either the -o or -m option -MERGE_AND_OUTPUT_ERROR = Either -o or -m may be specified, not both -READ_GROUP_OVERLAP = Read group overlap -BLANK_FILE_NAME_IN_READGROUP = Blank file name in replacement {0} -BLANK_REPLACEMENT_GROUP = Cannot use an empty value for a group replacement -NULL_REPLACEMENT_GROUP = Cannot use a null value for a group replacement -OVERLAPPING_READ_GROUPS = Overlapping read group(s) in files {0} and {1} -IDENTICAL_GROUP_FOR_REPLACEMENT = Identical group specified for replacement {0} -MULTIPLE_NUMBER_RECORDS_SPECIFIED = Too many number-of-records options specified -MULTIPLE_OUTPUT_FILES_SPECIFIED = Too many output files specified -BAD_REPLACEMENT_FORMAT = {0} does not follow pattern file:oldgroup:newgroup -BAD_GROUP_REPLACEMENT_FILENAME = Group replacement {0} specifies unknown input file {1} -CLASHING_NEW_REPLACEMENT_GROUP = Identical new group {0} for replacements {1} and {2} -CLASHING_OLD_REPLACEMENT_GROUP = Identical old group {0} for replacements {1} and {2} -CANNOT_OVERWRITE_EXISTING_OUTPUT = Cannot overwrite existing output file -CANNOT_DELETE_TEMPORARY_FILE = Unable to delete the temporary file during merge -UNSUITABLE_MERGE_FILE = Unsuitable merge file -FILE_NOT_DIRECTORY = Output must be a file not a directory -BAD_RECORD_ZC = Record ZC does not correspond to available ZCs in source file -FORCE_OPTION_DESCRIPTION = Forces the merge regardless of overlapping groups. -MERGE_OPTION_DESCRIPTION = Specifies the merge file. -NUMBER_RECORDS_DESCRIPTION = The number of records to merge into the final file (from start of file). -REPLACE_OPTION_DESCRIPTION = Replace the oldgroup in the input file with a newgroup in the output file. - -#message for replaceReadGroup -ID = specify read group identifier, with which the RG line will be replaced. -CN = specify name of sequencing center producing the read which will be added or replaced into CN field. -DS = add or replace specify descriptionwhich will be added or replaced into DS field. -DT = add or replace specify date the run was produced (ISO8601 date or date/time)which will be added or replaced into DT field. -LB = specify read group library value which will be added or replaced into LB field. -PI = specify predicted median insert size which will be added or replaced into PI field. -PL = specify platform/technology used to produce the reads. Valid values: CAPILLARY, LS454, ILLUMINA,SOLID, HELICOS, IONTORRENT and PACBIO. -PU = specify platform unit (e.g. owcell-barcode.lane for Illumina or slide for SOLiD). Unique identified. -SM = specify sample. Use pool name where a pool is being sequenced which will be added or replaced into SM field -AB = specify attributes. Formart should be :. eg. --AB "ZC:modify bioscope BAM header" "PG:qmule.replaceReadGroup" which will be added into field -SAMHEADER = output SAM file header only if this option specified. -ATTRIBUTE_ERR = Format error in attribute: {0}. See usage in help message. -USAGE_SWAP_LB_DS = usage: qmule org.qcmg.qmule.replaceReadGroup.Swqp_LB_DS -i -o --LB -l -USAGE_REPLACEREADGROUP = usage: qmule org.qcmg.qmule.replaceReadGroup.ReplaceReadGroup -i -o --ID -l [options] -USAGE_FixSingleRG = usage: qmule org.qcmg.qmule.replaceReadGroup.FixSingleRG -i -o --LB -l -USAGE_Fix = usage: qmule org.qcmg.qmule.FixBAM.Fix -i -o --LB -l - -#message for qcnv -REF_OPTION_DESCRIPTION = specify the normal or germline BAM file with full path -TEST_OPTION_DESCRIPTION = specify the tumour or cell line BAM file with full path -USAGE_QCNV = usage: qmule org.qcmg.qmule.qcnv.Main --ref --test -o --log [options] -WINDOW_SIZE_DESCRIPTION = (Optional) specify the window size here. Otherwise a default window size will given based on a hard coded formula. -TMPDIR_DESCRIPTION = (Optional) the directory for temporary files. Without this option, all temporary file will created into output file directory. -THREAD_OPTION_DESCRIPTION = (Optional) specify the thread number (the number of paralleled query). by default we set it to 2 - -#message for subSample -PROPORTION_OPTION_DESCRIPTION = specify the proportion of total reads you want to output -USAGE_SUBSAMPLE = usage: qmule org.qcmg.qmule.SubSample -i -o --proportion (0,1) --log - -#message for AlignerCompare -COMPAREALL_OPTION = Without this option, this comparison will discard all non primary alignments, such as secondary, supplementary alignments. -USAGE_AlignerCompare = usage: qmule org.qcmg.qmule.AlignerCompare -i -i -o [options] -OUTPUT_AlignerCompare = specifies output file prefix with full path here - -#message for BamMismatchCounts -USAGE_BamMismatchCounts = usage: qmule org.qcmg.qmule.BamMismatchCounts -i -o - -#BAMCompress -COMPRESS_LEVEL_DESCRIPTION = specifies output BAM compress level, default is 5 -USAGE_BAMCompress = usage: qmule org.qcmg.qmule.BAMCompress -i -compressLevel [0~9] - -#for AnnotateGFF -STRANDED_OPTION_DESCRIPTION=Only features that occur on the same strand as the input record are annotated. -MODE_ERROR=Mode provided {0} is not recognized -DCC_PARSE_ERROR=Could not determine the type of the DCC header. Could not find the {0} column -DCC_HEADER_ERROR=No header present for the DCC file -FILE_TYPE_ERROR=File does not appear to be dcc1 or dccq -NULL_GFF_MOTIF=Could not determine the motif from the gff file at position: {0} -#IndelDCCHeader -UUID_ERROR=Could not determine if a UUID and DCC header was present -#MAF2DCC1 -NO_COLUMN_INDEX=Could not find all required columns in the header -MISSING_DCC_RECORDS=Could not find all maf records in the input dcc1 file -DUPLICATE_MAF=Two identical maf record positions -T0O_MANY_MATCHES=More than one match found for dcc1 record at position: {0} -COUNT_ERROR=Number of dcc records added to the output file {0} does not match number of maf records {1} - diff --git a/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java b/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java deleted file mode 100644 index 707c4726d..000000000 --- a/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java +++ /dev/null @@ -1,226 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.qcnv; - -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import htsjdk.samtools.*; - -import java.lang.Math; -import java.io.*; - -import org.qcmg.picard.SAMFileReaderFactory; - - -public class CNVseq { - - private static final boolean String = false; - //in cnv-seq.pl it call below R to get value - //echo 'options(digits=16);qnorm(1-0.5*0.001)' | R --vanilla --slave (result: 3.290526731491926) - public static double bt = 3.290526731491926; - //$echo 'options(digits=16);qnorm(0.5*0.001)' | R --vanilla --slave (result: -3.290526731491894) - public static double st = -3.290526731491894; - - public static double pvalue = 0.001; - public static int min_windoe = 4; - public static double log2 = 0.6; - public static double bigger = 1.5; - //public static int debug = 0; - //public static String Rexe = "R"; - - private final Map refSeq; - - private final long genomeSize ; - private final long numTest; - private final long numRef; - - private final double biglog2_window; - private final double smalog2_window; - private final int windowSize; - - private final File ftest; - private final File fref; - - - /** - * it caculate the window size based on genome size, TEST and REF BAM records number - * @param test: File of TEST BAM - * @param ref: File of reference BAM - * @throws Exception - */ - CNVseq(File test, File ref, int window ) throws Exception { - //open file - SamReader rtest = SAMFileReaderFactory.createSAMFileReader(test );//new SAMFileReader(test); - SamReader rref = SAMFileReaderFactory.createSAMFileReader(ref );//new SAMFileReader(ref); - - //check whether index file exist or not - if(!rtest.hasIndex()){ - throw new Exception("can't find index for: " + test.getName()); - } - if(!rref.hasIndex()){ - throw new Exception("can't find index for: " + ref.getName()); - } - ftest = test; - fref = ref; - - //check SAM header - SAMFileHeader htest = rtest.getFileHeader(); - SAMFileHeader href = rref.getFileHeader(); - - //get sequence information from both inputs - Map seqTest = new HashMap (); - Map seqRef = new HashMap (); - - List genome = htest.getSequenceDictionary().getSequences(); - for(SAMSequenceRecord re : genome){ - seqTest.put(re.getSequenceName(),re.getSequenceLength()); - } - - genome = href.getSequenceDictionary().getSequences(); - for(SAMSequenceRecord re : genome){ - seqRef.put(re.getSequenceName(),re.getSequenceLength()); - } - - // check both @SQ line are same or not - if(seqRef.size() != seqTest.size()){ - throw new Exception("the sequence size are different between two inputs: \n" + ftest.getName() + "\n" + fref.getName() ); - } - - for (String key : seqTest.keySet()){ - //first convert Integer to int - int l1 = seqTest.get(key); - int l2 = seqRef.get(key); - if(l1 != l2){ - throw new Exception("the sequence size of " + key + " are different between two inputs : \n" + ftest.getName() + "\n" + fref.getName() ); - } - } - - // assign one of the identical reference info into the hash map - refSeq = seqTest; - - //caculate the genome size based on the identail reference - long size = 0; - for(String key : refSeq.keySet()){ size += refSeq.get(key); } - genomeSize = size; -//-debug -//genomeSize = 3253037807L; - - //count mapped record number based on index file - BAMIndex tIndex = rtest.indexing().getIndex(); - BAMIndex rIndex = rref.indexing().getIndex(); - BAMIndexMetaData meta; - int tMapped = 0; - int rMapped = 0; - for(int i = 0; i < seqRef.size(); i ++ ){ - meta = tIndex.getMetaData(i); - tMapped += meta.getAlignedRecordCount(); - meta = rIndex.getMetaData(i); - rMapped += meta.getAlignedRecordCount(); - } - numTest = tMapped; - numRef = rMapped; - - //close files - rtest.close(); - rref.close(); - - //caculate window size - double brp = Math.pow(2, log2); - double srp = 1.0 / brp; - - - biglog2_window = (numTest * Math.pow(brp, 2) + numRef) * genomeSize * Math.pow(bt, 2) / ( Math.pow((1- brp),2 ) * numTest * numRef); - smalog2_window = (numTest * Math.pow(srp, 2) + numRef) * genomeSize * Math.pow(st, 2) / ( Math.pow((1- srp),2 ) * numTest * numRef); - if(window == 0 ){ - windowSize = (int) (Math.max(biglog2_window, smalog2_window) * bigger) ; - }else{ - windowSize = window; - } - - } - - /** - * it create an Iterator and query on each window; finally it close the iterator - * @param f: SAMFileReader - * @param chr: genoeme name - * @param start: window start postion - * @param end: window end position - * @return the totoal number of records mapped overlapped on this window region - */ - int exeQuery (SamReader reader, String chr, int start, int end){ - - SAMRecordIterator block_ite = reader.queryOverlapping(chr, start, end); - int num = 0; - while(block_ite.hasNext()){ - num ++; - block_ite.next(); - } - - block_ite.close(); - - return num; - } - - /** - * - * @return total SAM records number in Test input file - */ - long getTestReadsNumber(){return numTest;} - - /** - * - * @return total SAM records number in Ref input file - */ - long getRefReadsNumber(){return numRef;} - - /** - * - * @return a hash table list each sequence reference name and length - */ - Map getrefseq(){return refSeq;} - - /** - * - * @return return the minimum window size for detecting log2>=0.6 - */ - double getpositivelog2window(){ return biglog2_window;} - - /** - * - * @return The minimum window size for detecting log2<=-0.6 - */ - double getnegativelog2window(){return smalog2_window;} - - /** - * - * @return The window size to use is max(100138.993801, 66550.928197) * 1.500000 - */ - int getWindowSize(){ return windowSize; } - - /** - * - * @return the total length of reference sequence listed on BAM @SQ lines - */ - long getGenomeSize( ){ return genomeSize;} - - /** - * - * @return the Test File with File type - */ - File getTestFile(){return ftest;} - - /** - * - * @return the Ref File with File type - */ - File getRefFile(){return fref;} - -} diff --git a/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- b/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- deleted file mode 100644 index 707c4726d..000000000 --- a/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- +++ /dev/null @@ -1,226 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.qcnv; - -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import htsjdk.samtools.*; - -import java.lang.Math; -import java.io.*; - -import org.qcmg.picard.SAMFileReaderFactory; - - -public class CNVseq { - - private static final boolean String = false; - //in cnv-seq.pl it call below R to get value - //echo 'options(digits=16);qnorm(1-0.5*0.001)' | R --vanilla --slave (result: 3.290526731491926) - public static double bt = 3.290526731491926; - //$echo 'options(digits=16);qnorm(0.5*0.001)' | R --vanilla --slave (result: -3.290526731491894) - public static double st = -3.290526731491894; - - public static double pvalue = 0.001; - public static int min_windoe = 4; - public static double log2 = 0.6; - public static double bigger = 1.5; - //public static int debug = 0; - //public static String Rexe = "R"; - - private final Map refSeq; - - private final long genomeSize ; - private final long numTest; - private final long numRef; - - private final double biglog2_window; - private final double smalog2_window; - private final int windowSize; - - private final File ftest; - private final File fref; - - - /** - * it caculate the window size based on genome size, TEST and REF BAM records number - * @param test: File of TEST BAM - * @param ref: File of reference BAM - * @throws Exception - */ - CNVseq(File test, File ref, int window ) throws Exception { - //open file - SamReader rtest = SAMFileReaderFactory.createSAMFileReader(test );//new SAMFileReader(test); - SamReader rref = SAMFileReaderFactory.createSAMFileReader(ref );//new SAMFileReader(ref); - - //check whether index file exist or not - if(!rtest.hasIndex()){ - throw new Exception("can't find index for: " + test.getName()); - } - if(!rref.hasIndex()){ - throw new Exception("can't find index for: " + ref.getName()); - } - ftest = test; - fref = ref; - - //check SAM header - SAMFileHeader htest = rtest.getFileHeader(); - SAMFileHeader href = rref.getFileHeader(); - - //get sequence information from both inputs - Map seqTest = new HashMap (); - Map seqRef = new HashMap (); - - List genome = htest.getSequenceDictionary().getSequences(); - for(SAMSequenceRecord re : genome){ - seqTest.put(re.getSequenceName(),re.getSequenceLength()); - } - - genome = href.getSequenceDictionary().getSequences(); - for(SAMSequenceRecord re : genome){ - seqRef.put(re.getSequenceName(),re.getSequenceLength()); - } - - // check both @SQ line are same or not - if(seqRef.size() != seqTest.size()){ - throw new Exception("the sequence size are different between two inputs: \n" + ftest.getName() + "\n" + fref.getName() ); - } - - for (String key : seqTest.keySet()){ - //first convert Integer to int - int l1 = seqTest.get(key); - int l2 = seqRef.get(key); - if(l1 != l2){ - throw new Exception("the sequence size of " + key + " are different between two inputs : \n" + ftest.getName() + "\n" + fref.getName() ); - } - } - - // assign one of the identical reference info into the hash map - refSeq = seqTest; - - //caculate the genome size based on the identail reference - long size = 0; - for(String key : refSeq.keySet()){ size += refSeq.get(key); } - genomeSize = size; -//-debug -//genomeSize = 3253037807L; - - //count mapped record number based on index file - BAMIndex tIndex = rtest.indexing().getIndex(); - BAMIndex rIndex = rref.indexing().getIndex(); - BAMIndexMetaData meta; - int tMapped = 0; - int rMapped = 0; - for(int i = 0; i < seqRef.size(); i ++ ){ - meta = tIndex.getMetaData(i); - tMapped += meta.getAlignedRecordCount(); - meta = rIndex.getMetaData(i); - rMapped += meta.getAlignedRecordCount(); - } - numTest = tMapped; - numRef = rMapped; - - //close files - rtest.close(); - rref.close(); - - //caculate window size - double brp = Math.pow(2, log2); - double srp = 1.0 / brp; - - - biglog2_window = (numTest * Math.pow(brp, 2) + numRef) * genomeSize * Math.pow(bt, 2) / ( Math.pow((1- brp),2 ) * numTest * numRef); - smalog2_window = (numTest * Math.pow(srp, 2) + numRef) * genomeSize * Math.pow(st, 2) / ( Math.pow((1- srp),2 ) * numTest * numRef); - if(window == 0 ){ - windowSize = (int) (Math.max(biglog2_window, smalog2_window) * bigger) ; - }else{ - windowSize = window; - } - - } - - /** - * it create an Iterator and query on each window; finally it close the iterator - * @param f: SAMFileReader - * @param chr: genoeme name - * @param start: window start postion - * @param end: window end position - * @return the totoal number of records mapped overlapped on this window region - */ - int exeQuery (SamReader reader, String chr, int start, int end){ - - SAMRecordIterator block_ite = reader.queryOverlapping(chr, start, end); - int num = 0; - while(block_ite.hasNext()){ - num ++; - block_ite.next(); - } - - block_ite.close(); - - return num; - } - - /** - * - * @return total SAM records number in Test input file - */ - long getTestReadsNumber(){return numTest;} - - /** - * - * @return total SAM records number in Ref input file - */ - long getRefReadsNumber(){return numRef;} - - /** - * - * @return a hash table list each sequence reference name and length - */ - Map getrefseq(){return refSeq;} - - /** - * - * @return return the minimum window size for detecting log2>=0.6 - */ - double getpositivelog2window(){ return biglog2_window;} - - /** - * - * @return The minimum window size for detecting log2<=-0.6 - */ - double getnegativelog2window(){return smalog2_window;} - - /** - * - * @return The window size to use is max(100138.993801, 66550.928197) * 1.500000 - */ - int getWindowSize(){ return windowSize; } - - /** - * - * @return the total length of reference sequence listed on BAM @SQ lines - */ - long getGenomeSize( ){ return genomeSize;} - - /** - * - * @return the Test File with File type - */ - File getTestFile(){return ftest;} - - /** - * - * @return the Ref File with File type - */ - File getRefFile(){return fref;} - -} diff --git a/qmule/src/org/qcmg/qmule/qcnv/Main.java b/qmule/src/org/qcmg/qmule/qcnv/Main.java deleted file mode 100644 index 41b681505..000000000 --- a/qmule/src/org/qcmg/qmule/qcnv/Main.java +++ /dev/null @@ -1,57 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.qcnv; - -import org.qcmg.common.log.*; -import htsjdk.samtools.*; -import java.util.*; -import java.util.Map.Entry; -import java.io.*; - -public class Main { - - public static void main(String[] args) throws Exception{ - //check arguments - Options options = new Options( args); - if(! options.commandCheck()){ System.exit(1); } - - QLogger logger = options.getLogger(args); - try{ - File ftest = new File(options.getIO("test")); - File fref = new File(options.getIO("ref")); - CNVseq cnvseq = new CNVseq(ftest, fref, options.getWindowSize()); - - logger.info("genome size used for calculation is " + cnvseq.getGenomeSize()); - logger.info(ftest.getName() + "contains records number: " + cnvseq.getTestReadsNumber()); - logger.info(fref.getName() + "contains records number: " + cnvseq.getRefReadsNumber()); - if(options.getWindowSize() == 0){ - logger.info("The minimum window size for detecting log2>=" + CNVseq.log2 +" should be " + cnvseq.getpositivelog2window()); - logger.info("The minimum window size for detecting log2<=-" + CNVseq.log2 +" should be " + cnvseq.getnegativelog2window()); - logger.info(String.format("The window size to use is max(%f, %f) * %f = %d", - cnvseq.getpositivelog2window(),cnvseq.getnegativelog2window(), CNVseq.bigger, cnvseq.getWindowSize())); - }else{ - logger.info("The window size used in this run is " + options.getWindowSize()); - } - - //count reads number in each window and output - MtCNVSeq cnvThread = new MtCNVSeq(cnvseq, new File(options.getIO("output")), options.getThreadNumber(), options.getTmpDir()); - cnvThread.cnvCount(logger); - - logger.logFinalExecutionStats(0); - System.exit(0); - }catch(Exception e){ - logger.error(e.toString()); - logger.logFinalExecutionStats(1); - System.err.println(e.toString()); - System.exit(1); - } - } - - - - -} diff --git a/qmule/src/org/qcmg/qmule/qcnv/Main.java-- b/qmule/src/org/qcmg/qmule/qcnv/Main.java-- deleted file mode 100644 index 41b681505..000000000 --- a/qmule/src/org/qcmg/qmule/qcnv/Main.java-- +++ /dev/null @@ -1,57 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.qcnv; - -import org.qcmg.common.log.*; -import htsjdk.samtools.*; -import java.util.*; -import java.util.Map.Entry; -import java.io.*; - -public class Main { - - public static void main(String[] args) throws Exception{ - //check arguments - Options options = new Options( args); - if(! options.commandCheck()){ System.exit(1); } - - QLogger logger = options.getLogger(args); - try{ - File ftest = new File(options.getIO("test")); - File fref = new File(options.getIO("ref")); - CNVseq cnvseq = new CNVseq(ftest, fref, options.getWindowSize()); - - logger.info("genome size used for calculation is " + cnvseq.getGenomeSize()); - logger.info(ftest.getName() + "contains records number: " + cnvseq.getTestReadsNumber()); - logger.info(fref.getName() + "contains records number: " + cnvseq.getRefReadsNumber()); - if(options.getWindowSize() == 0){ - logger.info("The minimum window size for detecting log2>=" + CNVseq.log2 +" should be " + cnvseq.getpositivelog2window()); - logger.info("The minimum window size for detecting log2<=-" + CNVseq.log2 +" should be " + cnvseq.getnegativelog2window()); - logger.info(String.format("The window size to use is max(%f, %f) * %f = %d", - cnvseq.getpositivelog2window(),cnvseq.getnegativelog2window(), CNVseq.bigger, cnvseq.getWindowSize())); - }else{ - logger.info("The window size used in this run is " + options.getWindowSize()); - } - - //count reads number in each window and output - MtCNVSeq cnvThread = new MtCNVSeq(cnvseq, new File(options.getIO("output")), options.getThreadNumber(), options.getTmpDir()); - cnvThread.cnvCount(logger); - - logger.logFinalExecutionStats(0); - System.exit(0); - }catch(Exception e){ - logger.error(e.toString()); - logger.logFinalExecutionStats(1); - System.err.println(e.toString()); - System.exit(1); - } - } - - - - -} diff --git a/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java b/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java deleted file mode 100644 index b8bdbfcf5..000000000 --- a/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java +++ /dev/null @@ -1,152 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.qcnv; - -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.File; -import java.io.IOException; - -import java.util.HashMap; -import java.util.Map; - -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; - -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecordIterator; -import htsjdk.samtools.ValidationStringency; - -import org.qcmg.common.log.*; -import org.qcmg.common.util.Constants; -import org.qcmg.picard.SAMFileReaderFactory; - - -public class MtCNVSeq { - - final CNVseq cnvseq; - final File Output; - final int noOfThreads; - final File tmpPath; - - MtCNVSeq(CNVseq cnvseq, File output, int noOfThreads, File tmpdir) throws IOException{ - this.cnvseq = cnvseq; - this.Output = output; - this.noOfThreads = noOfThreads; - if(tmpdir == null) - tmpPath = File.createTempFile( "qcnv", "", Output.getParentFile()); - else - tmpPath = File.createTempFile( "qcnv", "",tmpdir); - } - /** - * it call threads, parallel the BAMFileReader.query for single genome - * @param logger: an instance of QLogger - * @throws IOException - * @throws InterruptedException - */ - void cnvCount(QLogger logger) throws IOException, InterruptedException{ - - Map refseq = cnvseq.getrefseq(); - Map tmpoutput = new HashMap(); - ExecutorService queryThreads = Executors.newFixedThreadPool(noOfThreads); - - logger.debug("start parallel query based on genome file name"); - - - if(!(tmpPath.delete())) - throw new IOException("Could not delete tmp file: " + tmpPath.getAbsolutePath()); - if(! tmpPath.mkdirs()) - throw new IOException("Could not create tmp directory: " + tmpPath.getAbsolutePath()); - - //parallel query by genomes and output to tmp files - for ( Map.Entry chr : refseq.entrySet()){ - File tmp = File.createTempFile(chr.getKey(), ".count", tmpPath); - tmpoutput.put(chr.getKey(), tmp); - queryThreads.execute(new ExeQuery(cnvseq,chr, tmp)); - } - //wait threads finish - queryThreads.shutdown(); - queryThreads.awaitTermination(Constants.EXECUTOR_SERVICE_AWAIT_TERMINATION, TimeUnit.HOURS); - queryThreads.shutdownNow(); - logger.debug("completed parallel query based on genome file name"); - - - //collect outputs from tmp files into - logger.debug("starting collect each genome counts into final output"); - FileWriter writer = new FileWriter(Output); - writer.write("reference\tstart\tend\ttest\tref\n"); - for( Map.Entry tmp : tmpoutput.entrySet()){ - BufferedReader input = new BufferedReader(new FileReader(tmp.getValue())); - String line = null; - while((line = input.readLine()) != null){ - writer.write(line + "\n"); - } - input.close(); - tmp.getValue().deleteOnExit(); - } - tmpPath.delete(); - writer.close(); - logger.debug("created final output"); - } - - /** - * query on Test BAM and Ref BAM records which mapped to specified gemoem - * @author q.xu - * - */ - public static class ExeQuery implements Runnable { - CNVseq cnvseq; - File Output; - File Test; - File Ref; - QLogger logger; - int chrSize; - int winSize; - String chrName; - - ExeQuery(CNVseq cnvseq, Map.Entry chr,File tmp) { - Output = tmp; - Test = cnvseq.getTestFile(); - Ref = cnvseq.getRefFile(); - chrSize = chr.getValue(); - chrName = chr.getKey(); - winSize = cnvseq.getWindowSize(); - this.cnvseq = cnvseq; - } - - public void run() { - try { - FileWriter writer = new FileWriter(Output); - SamReader rTest = SAMFileReaderFactory.createSAMFileReader(Test,ValidationStringency.SILENT); - SamReader rRef = SAMFileReaderFactory.createSAMFileReader(Ref,ValidationStringency.SILENT); - - int win_num = chrSize / winSize + 1; - - for (int i = 0; i < win_num; i++){ - int start = i * winSize + 1; - int end = (i + 1 ) * winSize; - int num_test = cnvseq.exeQuery(rTest, chrName, start, end); - int num_ref = cnvseq.exeQuery(rRef, chrName, start, end); - writer.write(String.format("%s\t%d\t%d\t%d\t%d\n", chrName, start, end, num_test, num_ref )); - } - - rRef.close(); - writer.close(); - rTest.close(); - - } catch (Exception e) { - System.out.println(Thread.currentThread().getName() + " " - + e.getMessage()); - Thread.currentThread().interrupt(); - } - - } - } - -} diff --git a/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- b/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- deleted file mode 100644 index b8bdbfcf5..000000000 --- a/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- +++ /dev/null @@ -1,152 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.qcnv; - -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.File; -import java.io.IOException; - -import java.util.HashMap; -import java.util.Map; - -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; - -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecordIterator; -import htsjdk.samtools.ValidationStringency; - -import org.qcmg.common.log.*; -import org.qcmg.common.util.Constants; -import org.qcmg.picard.SAMFileReaderFactory; - - -public class MtCNVSeq { - - final CNVseq cnvseq; - final File Output; - final int noOfThreads; - final File tmpPath; - - MtCNVSeq(CNVseq cnvseq, File output, int noOfThreads, File tmpdir) throws IOException{ - this.cnvseq = cnvseq; - this.Output = output; - this.noOfThreads = noOfThreads; - if(tmpdir == null) - tmpPath = File.createTempFile( "qcnv", "", Output.getParentFile()); - else - tmpPath = File.createTempFile( "qcnv", "",tmpdir); - } - /** - * it call threads, parallel the BAMFileReader.query for single genome - * @param logger: an instance of QLogger - * @throws IOException - * @throws InterruptedException - */ - void cnvCount(QLogger logger) throws IOException, InterruptedException{ - - Map refseq = cnvseq.getrefseq(); - Map tmpoutput = new HashMap(); - ExecutorService queryThreads = Executors.newFixedThreadPool(noOfThreads); - - logger.debug("start parallel query based on genome file name"); - - - if(!(tmpPath.delete())) - throw new IOException("Could not delete tmp file: " + tmpPath.getAbsolutePath()); - if(! tmpPath.mkdirs()) - throw new IOException("Could not create tmp directory: " + tmpPath.getAbsolutePath()); - - //parallel query by genomes and output to tmp files - for ( Map.Entry chr : refseq.entrySet()){ - File tmp = File.createTempFile(chr.getKey(), ".count", tmpPath); - tmpoutput.put(chr.getKey(), tmp); - queryThreads.execute(new ExeQuery(cnvseq,chr, tmp)); - } - //wait threads finish - queryThreads.shutdown(); - queryThreads.awaitTermination(Constants.EXECUTOR_SERVICE_AWAIT_TERMINATION, TimeUnit.HOURS); - queryThreads.shutdownNow(); - logger.debug("completed parallel query based on genome file name"); - - - //collect outputs from tmp files into - logger.debug("starting collect each genome counts into final output"); - FileWriter writer = new FileWriter(Output); - writer.write("reference\tstart\tend\ttest\tref\n"); - for( Map.Entry tmp : tmpoutput.entrySet()){ - BufferedReader input = new BufferedReader(new FileReader(tmp.getValue())); - String line = null; - while((line = input.readLine()) != null){ - writer.write(line + "\n"); - } - input.close(); - tmp.getValue().deleteOnExit(); - } - tmpPath.delete(); - writer.close(); - logger.debug("created final output"); - } - - /** - * query on Test BAM and Ref BAM records which mapped to specified gemoem - * @author q.xu - * - */ - public static class ExeQuery implements Runnable { - CNVseq cnvseq; - File Output; - File Test; - File Ref; - QLogger logger; - int chrSize; - int winSize; - String chrName; - - ExeQuery(CNVseq cnvseq, Map.Entry chr,File tmp) { - Output = tmp; - Test = cnvseq.getTestFile(); - Ref = cnvseq.getRefFile(); - chrSize = chr.getValue(); - chrName = chr.getKey(); - winSize = cnvseq.getWindowSize(); - this.cnvseq = cnvseq; - } - - public void run() { - try { - FileWriter writer = new FileWriter(Output); - SamReader rTest = SAMFileReaderFactory.createSAMFileReader(Test,ValidationStringency.SILENT); - SamReader rRef = SAMFileReaderFactory.createSAMFileReader(Ref,ValidationStringency.SILENT); - - int win_num = chrSize / winSize + 1; - - for (int i = 0; i < win_num; i++){ - int start = i * winSize + 1; - int end = (i + 1 ) * winSize; - int num_test = cnvseq.exeQuery(rTest, chrName, start, end); - int num_ref = cnvseq.exeQuery(rRef, chrName, start, end); - writer.write(String.format("%s\t%d\t%d\t%d\t%d\n", chrName, start, end, num_test, num_ref )); - } - - rRef.close(); - writer.close(); - rTest.close(); - - } catch (Exception e) { - System.out.println(Thread.currentThread().getName() + " " - + e.getMessage()); - Thread.currentThread().interrupt(); - } - - } - } - -} diff --git a/qmule/src/org/qcmg/qmule/qcnv/Options.java b/qmule/src/org/qcmg/qmule/qcnv/Options.java deleted file mode 100644 index 3f4dc850b..000000000 --- a/qmule/src/org/qcmg/qmule/qcnv/Options.java +++ /dev/null @@ -1,169 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.qcnv; - - -import java.io.File; -import java.util.List; - -import joptsimple.OptionParser; -import joptsimple.OptionSet; - -import org.qcmg.qmule.Messages; -import org.qcmg.common.log.*; - -public class Options { - private static final String HELP_DESCRIPTION = Messages.getMessage("HELP_OPTION_DESCRIPTION"); - private static final String VERSION_DESCRIPTION = Messages.getMessage("VERSION_OPTION_DESCRIPTION"); - private static final String LOG_DESCRIPTION = Messages.getMessage("LOG_OPTION_DESCRIPTION"); - private static final String LOGLEVEL_DESCRIPTION = Messages.getMessage("LOGLEVEL_OPTION_DESCRIPTION"); - - private static final String OUTPUT_DESCRIPTION = Messages.getMessage("OUTPUT_OPTION_DESCRIPTION"); - private static final String TEST_DESCRIPTION = Messages.getMessage("TEST_OPTION_DESCRIPTION"); - private static final String REF_DESCRIPTION = Messages.getMessage("REF_OPTION_DESCRIPTION"); - private static final String THREAD_DESCRIPTION = Messages.getMessage("THREAD_OPTION_DESCRIPTION"); - private static final String WINDOW_DESCRIPTION = Messages.getMessage("WINDOW_SIZE_DESCRIPTION"); - private static final String TMPDIR_DESCRIPTION = Messages.getMessage("TMPDIR_DESCRIPTION"); - private final OptionParser parser = new OptionParser(); - private final OptionSet options; - - final static int DEFAULT_THREAD = 2; - final String commandLine; - final String USAGE = Messages.getMessage("USAGE_QCNV"); - final String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - - public Options( final String[] args) throws Exception { - parser.accepts("output", OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); - parser.accepts("ref", REF_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("Normal BAM"); - parser.accepts("test", TEST_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("Tumor BAM"); - parser.accepts("thread", THREAD_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("thread number"); - parser.accepts("window", WINDOW_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("window size"); - parser.accepts("tmpdir", TMPDIR_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("window size"); - - - parser.accepts("log", LOG_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("logfile"); - parser.accepts("loglevel", LOGLEVEL_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("loglevel"); - parser.accepts("version", VERSION_DESCRIPTION); - parser.accepts("help", HELP_DESCRIPTION); - - options = parser.parse(args); - commandLine = Messages.reconstructCommandLine(args); - } - - //IO parameters - String getIO(String io) throws Exception{ - - int size = options.valuesOf(io).size(); - if( size > 1){ - throw new Exception("multiple "+ io + " files specified" ); - } - else if( size < 1 ){ - throw new Exception(" missing or invalid IO option specified: " + io ); - } - - return options.valueOf(io).toString(); - } - - File getTmpDir() throws Exception{ - if(options.has("tmpdir")) - return new File (options.valueOf("tmpdir").toString()); - - - - return null; - - } - - int getThreadNumber(){ - - if(options.has("thread")){ - return Integer.parseInt((String) options.valueOf("thread")); - } - - return DEFAULT_THREAD; - } - - int getWindowSize(){ - - if(options.has("window")){ - return Integer.parseInt((String) options.valueOf("window")); - } - - return 0; - } - - QLogger getLogger(String[] args) throws Exception{ - - // configure logging - QLogger logger; - String logLevel = (String) options.valueOf("loglevel"); - String logFile; - if(options.has("log")){ - logFile = options.valueOf("log").toString(); - } - else{ - logFile = options.valueOf("output") + ".log"; - } - - logger = QLoggerFactory.getLogger( Main.class, logFile,logLevel); - logger.logInitialExecutionStats(Main.class.toString(), version, args); - return logger; - } - - boolean hasHelp() throws Exception{ - if(options.has("h") || options.has("help")){ - System.out.println(USAGE); - System.out.println(HELP_DESCRIPTION); - parser.printHelpOn(System.err); - return true; - } - return false; - } - - boolean hasVersion()throws Exception{ - if(options.has("v") || options.has("version")){ - System.out.println(VERSION_DESCRIPTION); - System.err.println(version); - return true; - } - return false; - } - - boolean commandCheck() throws Exception{ - //quit system after provide help or version info - if( hasHelp() || hasVersion() ){ - System.exit(0); - } - - - if (options.nonOptionArguments().size() > 0) { - List nonoptions = (List) options.nonOptionArguments(); - - for(String str : nonoptions){ - System.err.println("INVALID OPTION: " + str); - } - return false; - } - - if(getIO("ref") == null || getIO("test") == null){ - System.err.println("Missing ref or test option"); - return false; - } - if( getIO("ref").equals(getIO("output"))){ - System.err.println(Messages.getMessage("SAME_FILES", "ref", "output")); - return false; - } - if(options.has("thread")){ - int thread = Integer.parseInt((String) options.valueOf("thread")); - if(thread < 1){ - System.err.println("THREAD NUMBER MUST GREATER THAN ONE: " + options.valueOf("thread") ); - } - } - - return true; - } -} diff --git a/qmule/src/org/qcmg/qmule/qcnv/Options.java-- b/qmule/src/org/qcmg/qmule/qcnv/Options.java-- deleted file mode 100644 index 3f4dc850b..000000000 --- a/qmule/src/org/qcmg/qmule/qcnv/Options.java-- +++ /dev/null @@ -1,169 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.qcnv; - - -import java.io.File; -import java.util.List; - -import joptsimple.OptionParser; -import joptsimple.OptionSet; - -import org.qcmg.qmule.Messages; -import org.qcmg.common.log.*; - -public class Options { - private static final String HELP_DESCRIPTION = Messages.getMessage("HELP_OPTION_DESCRIPTION"); - private static final String VERSION_DESCRIPTION = Messages.getMessage("VERSION_OPTION_DESCRIPTION"); - private static final String LOG_DESCRIPTION = Messages.getMessage("LOG_OPTION_DESCRIPTION"); - private static final String LOGLEVEL_DESCRIPTION = Messages.getMessage("LOGLEVEL_OPTION_DESCRIPTION"); - - private static final String OUTPUT_DESCRIPTION = Messages.getMessage("OUTPUT_OPTION_DESCRIPTION"); - private static final String TEST_DESCRIPTION = Messages.getMessage("TEST_OPTION_DESCRIPTION"); - private static final String REF_DESCRIPTION = Messages.getMessage("REF_OPTION_DESCRIPTION"); - private static final String THREAD_DESCRIPTION = Messages.getMessage("THREAD_OPTION_DESCRIPTION"); - private static final String WINDOW_DESCRIPTION = Messages.getMessage("WINDOW_SIZE_DESCRIPTION"); - private static final String TMPDIR_DESCRIPTION = Messages.getMessage("TMPDIR_DESCRIPTION"); - private final OptionParser parser = new OptionParser(); - private final OptionSet options; - - final static int DEFAULT_THREAD = 2; - final String commandLine; - final String USAGE = Messages.getMessage("USAGE_QCNV"); - final String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - - public Options( final String[] args) throws Exception { - parser.accepts("output", OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); - parser.accepts("ref", REF_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("Normal BAM"); - parser.accepts("test", TEST_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("Tumor BAM"); - parser.accepts("thread", THREAD_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("thread number"); - parser.accepts("window", WINDOW_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("window size"); - parser.accepts("tmpdir", TMPDIR_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("window size"); - - - parser.accepts("log", LOG_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("logfile"); - parser.accepts("loglevel", LOGLEVEL_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("loglevel"); - parser.accepts("version", VERSION_DESCRIPTION); - parser.accepts("help", HELP_DESCRIPTION); - - options = parser.parse(args); - commandLine = Messages.reconstructCommandLine(args); - } - - //IO parameters - String getIO(String io) throws Exception{ - - int size = options.valuesOf(io).size(); - if( size > 1){ - throw new Exception("multiple "+ io + " files specified" ); - } - else if( size < 1 ){ - throw new Exception(" missing or invalid IO option specified: " + io ); - } - - return options.valueOf(io).toString(); - } - - File getTmpDir() throws Exception{ - if(options.has("tmpdir")) - return new File (options.valueOf("tmpdir").toString()); - - - - return null; - - } - - int getThreadNumber(){ - - if(options.has("thread")){ - return Integer.parseInt((String) options.valueOf("thread")); - } - - return DEFAULT_THREAD; - } - - int getWindowSize(){ - - if(options.has("window")){ - return Integer.parseInt((String) options.valueOf("window")); - } - - return 0; - } - - QLogger getLogger(String[] args) throws Exception{ - - // configure logging - QLogger logger; - String logLevel = (String) options.valueOf("loglevel"); - String logFile; - if(options.has("log")){ - logFile = options.valueOf("log").toString(); - } - else{ - logFile = options.valueOf("output") + ".log"; - } - - logger = QLoggerFactory.getLogger( Main.class, logFile,logLevel); - logger.logInitialExecutionStats(Main.class.toString(), version, args); - return logger; - } - - boolean hasHelp() throws Exception{ - if(options.has("h") || options.has("help")){ - System.out.println(USAGE); - System.out.println(HELP_DESCRIPTION); - parser.printHelpOn(System.err); - return true; - } - return false; - } - - boolean hasVersion()throws Exception{ - if(options.has("v") || options.has("version")){ - System.out.println(VERSION_DESCRIPTION); - System.err.println(version); - return true; - } - return false; - } - - boolean commandCheck() throws Exception{ - //quit system after provide help or version info - if( hasHelp() || hasVersion() ){ - System.exit(0); - } - - - if (options.nonOptionArguments().size() > 0) { - List nonoptions = (List) options.nonOptionArguments(); - - for(String str : nonoptions){ - System.err.println("INVALID OPTION: " + str); - } - return false; - } - - if(getIO("ref") == null || getIO("test") == null){ - System.err.println("Missing ref or test option"); - return false; - } - if( getIO("ref").equals(getIO("output"))){ - System.err.println(Messages.getMessage("SAME_FILES", "ref", "output")); - return false; - } - if(options.has("thread")){ - int thread = Integer.parseInt((String) options.valueOf("thread")); - if(thread < 1){ - System.err.println("THREAD NUMBER MUST GREATER THAN ONE: " + options.valueOf("thread") ); - } - } - - return true; - } -} diff --git a/qmule/src/org/qcmg/qmule/queryChrMT.java b/qmule/src/org/qcmg/qmule/queryChrMT.java deleted file mode 100644 index d9dcad3ff..000000000 --- a/qmule/src/org/qcmg/qmule/queryChrMT.java +++ /dev/null @@ -1,68 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.IOException; -import java.io.PrintWriter; - -import htsjdk.samtools.*; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.ValidationStringency; -import htsjdk.samtools.SAMRecordIterator; - -import java.io.*; -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; - -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.SAMOrBAMWriterFactory; - - -public class queryChrMT { - - public static void main(final String[] args) throws IOException, InterruptedException { - - try{ - - File inBAM = new File(args[0]); - String outputName = inBAM.getName().replace(".bam", ".chrMT.primary.bam"); - File output = new File(args[1], outputName); - - SamReader reader = SAMFileReaderFactory.createSAMFileReader(inBAM,ValidationStringency.SILENT); - SAMFileHeader he = reader.getFileHeader().clone(); - SAMOrBAMWriterFactory writeFactory = new SAMOrBAMWriterFactory(he , true, output); - SAMRecordIterator ite = reader.query("chrMT",0, 16569, false); - - SAMRecord record; - while(ite.hasNext()){ - record = ite.next(); - if(!record.getNotPrimaryAlignmentFlag()) - writeFactory.getWriter().addAlignment(record ); - - } - writeFactory.closeWriter(); - reader.close(); - - System.exit(0); - }catch(Exception e){ - System.err.println(e.toString()); - Thread.sleep(1); - System.out.println("usage: qmule org.qcmg.qmule.queryChrMT "); - System.exit(1); - } - - } - - -} diff --git a/qmule/src/org/qcmg/qmule/queryChrMT.java-- b/qmule/src/org/qcmg/qmule/queryChrMT.java-- deleted file mode 100644 index d9dcad3ff..000000000 --- a/qmule/src/org/qcmg/qmule/queryChrMT.java-- +++ /dev/null @@ -1,68 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.IOException; -import java.io.PrintWriter; - -import htsjdk.samtools.*; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.ValidationStringency; -import htsjdk.samtools.SAMRecordIterator; - -import java.io.*; -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; - -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.SAMOrBAMWriterFactory; - - -public class queryChrMT { - - public static void main(final String[] args) throws IOException, InterruptedException { - - try{ - - File inBAM = new File(args[0]); - String outputName = inBAM.getName().replace(".bam", ".chrMT.primary.bam"); - File output = new File(args[1], outputName); - - SamReader reader = SAMFileReaderFactory.createSAMFileReader(inBAM,ValidationStringency.SILENT); - SAMFileHeader he = reader.getFileHeader().clone(); - SAMOrBAMWriterFactory writeFactory = new SAMOrBAMWriterFactory(he , true, output); - SAMRecordIterator ite = reader.query("chrMT",0, 16569, false); - - SAMRecord record; - while(ite.hasNext()){ - record = ite.next(); - if(!record.getNotPrimaryAlignmentFlag()) - writeFactory.getWriter().addAlignment(record ); - - } - writeFactory.closeWriter(); - reader.close(); - - System.exit(0); - }catch(Exception e){ - System.err.println(e.toString()); - Thread.sleep(1); - System.out.println("usage: qmule org.qcmg.qmule.queryChrMT "); - System.exit(1); - } - - } - - -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java b/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java deleted file mode 100644 index e405206bd..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java +++ /dev/null @@ -1,205 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map.Entry; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.SnpUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.qmule.util.IGVBatchFileGenerator; -import org.qcmg.qmule.util.TabbedDataLoader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class CompareSnps { - - private final ConcurrentMap firstSnpMap = new ConcurrentHashMap(30000); //not expecting more than 100000 - private final ConcurrentMap secondSnpMap = new ConcurrentHashMap(30000); - private final List firstList = new ArrayList(); - private final List secondList = new ArrayList(); -// private final ConcurrentMap uniqueTumourVCFMap = new ConcurrentHashMap(40000); - - private static QLogger logger; - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - public int engage() throws Exception { - - logger.info("loading snp data from file: " + cmdLineInputFiles[0]); - TabbedDataLoader.loadTabbedData(cmdLineInputFiles[0], -2, firstSnpMap); - logger.info("loading snp data - DONE [" + firstSnpMap.size() + "]"); - logger.info("loading snp data from file: " + cmdLineInputFiles[1]); - TabbedDataLoader.loadTabbedData(cmdLineInputFiles[1], -2, secondSnpMap); - logger.info("loading snp data - DONE [" + secondSnpMap.size() + "]"); - - compare(); - - outputIGVBatchFiles(); - -// addPileupFromNormalBam(); - - return exitStatus; - } - - private void outputIGVBatchFiles() throws IOException { - IGVBatchFileGenerator.generate(firstList, cmdLineOutputFiles[0]); - IGVBatchFileGenerator.generate(secondList, cmdLineOutputFiles[1]); - } - - protected void compare() { - - // total counts - int firstMapCount = 0, secondMapCount = 0; - // count of snps unique to each input - int uniqueToFirstMap = 0, uniqueToSecondMap = 0; - int commonSnps = 0, commonAndAlsoClassABStopNonSynon = 0; - - // loop through first set - for (Entry entry : firstSnpMap.entrySet()) { - - TabbedRecord firstRecord = entry.getValue(); - - if (isClassAB(firstRecord, -1) && isStopNonSynonymous(firstRecord, 22)) { - firstMapCount++; - - TabbedRecord secondRecord = secondSnpMap.get(entry.getKey()); - if (null == secondRecord || ! (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22))) { - uniqueToFirstMap++; - firstList.add(entry.getKey()); - logger.info("Unique to first: " + entry.getKey().getChromosome() + ":" + entry.getKey().getStartPosition()); - } else { - commonSnps++; -// if (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22)) { -// commonAndAlsoClassABStopNonSynon++; -// } - } - } - - } - - // loop through second set - for (Entry entry : secondSnpMap.entrySet()) { - - TabbedRecord secondRecord = entry.getValue(); - - if (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22)) { - secondMapCount++; - - TabbedRecord firstRecord = firstSnpMap.get(entry.getKey()); - if (null == firstRecord || ! (isClassAB(firstRecord, -1) && isStopNonSynonymous(firstRecord, 22))) { - uniqueToSecondMap++; - secondList.add(entry.getKey()); - logger.info("Unique to second: " + entry.getKey().getChromosome() + ":" + entry.getKey().getStartPosition()); -// logger.info("IGV: " + entry.getValue().getData()); - } - } - } - - logger.info("SUMMARY:"); - logger.info("firstMapCount: " + firstMapCount); - logger.info("secondMapCount: " + secondMapCount); - logger.info("uniqueToFirstMap: " + uniqueToFirstMap); - logger.info("uniqueToSecondMap: " + uniqueToSecondMap); - logger.info("commonSnps: " + commonSnps); -// logger.info("commonAndAlsoClassABStopNonSynon: " + commonAndAlsoClassABStopNonSynon); - - } - - - - protected static boolean isClassAB(TabbedRecord record, int index) { - if (null == record || null == record.getData()) throw new IllegalArgumentException("null or empty Tabbed record"); - String [] params = TabbedDataLoader.tabbedPattern.split(record.getData()); - String qcmgFlag = TabbedDataLoader.getStringFromArray(params, index); - - return SnpUtils.isClassAorB(qcmgFlag); -// return "--".equals(qcmgFlag) || "less than 12 reads coverage in normal".equals(qcmgFlag) -// || "less than 3 reads coverage in normal".equals(qcmgFlag); - - } - - protected static boolean isStopNonSynonymous(TabbedRecord record, int index) { - if (null == record || null == record.getData()) throw new IllegalArgumentException("null or empty Tabbed record"); - String [] params = TabbedDataLoader.tabbedPattern.split(record.getData()); -// String consequenceType = params[index]; - String consequenceType = TabbedDataLoader.getStringFromArray(params, index); - - return consequenceType.contains("STOP") || consequenceType.contains("NON_SYNONYMOUS"); - } - - - - public static void main(String[] args) throws Exception { - CompareSnps sp = new CompareSnps(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(CompareSnps.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CompareSnps", CompareSnps.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- deleted file mode 100644 index e405206bd..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- +++ /dev/null @@ -1,205 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map.Entry; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.SnpUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.qmule.util.IGVBatchFileGenerator; -import org.qcmg.qmule.util.TabbedDataLoader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class CompareSnps { - - private final ConcurrentMap firstSnpMap = new ConcurrentHashMap(30000); //not expecting more than 100000 - private final ConcurrentMap secondSnpMap = new ConcurrentHashMap(30000); - private final List firstList = new ArrayList(); - private final List secondList = new ArrayList(); -// private final ConcurrentMap uniqueTumourVCFMap = new ConcurrentHashMap(40000); - - private static QLogger logger; - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - public int engage() throws Exception { - - logger.info("loading snp data from file: " + cmdLineInputFiles[0]); - TabbedDataLoader.loadTabbedData(cmdLineInputFiles[0], -2, firstSnpMap); - logger.info("loading snp data - DONE [" + firstSnpMap.size() + "]"); - logger.info("loading snp data from file: " + cmdLineInputFiles[1]); - TabbedDataLoader.loadTabbedData(cmdLineInputFiles[1], -2, secondSnpMap); - logger.info("loading snp data - DONE [" + secondSnpMap.size() + "]"); - - compare(); - - outputIGVBatchFiles(); - -// addPileupFromNormalBam(); - - return exitStatus; - } - - private void outputIGVBatchFiles() throws IOException { - IGVBatchFileGenerator.generate(firstList, cmdLineOutputFiles[0]); - IGVBatchFileGenerator.generate(secondList, cmdLineOutputFiles[1]); - } - - protected void compare() { - - // total counts - int firstMapCount = 0, secondMapCount = 0; - // count of snps unique to each input - int uniqueToFirstMap = 0, uniqueToSecondMap = 0; - int commonSnps = 0, commonAndAlsoClassABStopNonSynon = 0; - - // loop through first set - for (Entry entry : firstSnpMap.entrySet()) { - - TabbedRecord firstRecord = entry.getValue(); - - if (isClassAB(firstRecord, -1) && isStopNonSynonymous(firstRecord, 22)) { - firstMapCount++; - - TabbedRecord secondRecord = secondSnpMap.get(entry.getKey()); - if (null == secondRecord || ! (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22))) { - uniqueToFirstMap++; - firstList.add(entry.getKey()); - logger.info("Unique to first: " + entry.getKey().getChromosome() + ":" + entry.getKey().getStartPosition()); - } else { - commonSnps++; -// if (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22)) { -// commonAndAlsoClassABStopNonSynon++; -// } - } - } - - } - - // loop through second set - for (Entry entry : secondSnpMap.entrySet()) { - - TabbedRecord secondRecord = entry.getValue(); - - if (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22)) { - secondMapCount++; - - TabbedRecord firstRecord = firstSnpMap.get(entry.getKey()); - if (null == firstRecord || ! (isClassAB(firstRecord, -1) && isStopNonSynonymous(firstRecord, 22))) { - uniqueToSecondMap++; - secondList.add(entry.getKey()); - logger.info("Unique to second: " + entry.getKey().getChromosome() + ":" + entry.getKey().getStartPosition()); -// logger.info("IGV: " + entry.getValue().getData()); - } - } - } - - logger.info("SUMMARY:"); - logger.info("firstMapCount: " + firstMapCount); - logger.info("secondMapCount: " + secondMapCount); - logger.info("uniqueToFirstMap: " + uniqueToFirstMap); - logger.info("uniqueToSecondMap: " + uniqueToSecondMap); - logger.info("commonSnps: " + commonSnps); -// logger.info("commonAndAlsoClassABStopNonSynon: " + commonAndAlsoClassABStopNonSynon); - - } - - - - protected static boolean isClassAB(TabbedRecord record, int index) { - if (null == record || null == record.getData()) throw new IllegalArgumentException("null or empty Tabbed record"); - String [] params = TabbedDataLoader.tabbedPattern.split(record.getData()); - String qcmgFlag = TabbedDataLoader.getStringFromArray(params, index); - - return SnpUtils.isClassAorB(qcmgFlag); -// return "--".equals(qcmgFlag) || "less than 12 reads coverage in normal".equals(qcmgFlag) -// || "less than 3 reads coverage in normal".equals(qcmgFlag); - - } - - protected static boolean isStopNonSynonymous(TabbedRecord record, int index) { - if (null == record || null == record.getData()) throw new IllegalArgumentException("null or empty Tabbed record"); - String [] params = TabbedDataLoader.tabbedPattern.split(record.getData()); -// String consequenceType = params[index]; - String consequenceType = TabbedDataLoader.getStringFromArray(params, index); - - return consequenceType.contains("STOP") || consequenceType.contains("NON_SYNONYMOUS"); - } - - - - public static void main(String[] args) throws Exception { - CompareSnps sp = new CompareSnps(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(CompareSnps.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CompareSnps", CompareSnps.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java b/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java deleted file mode 100644 index 322cbd5d1..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java +++ /dev/null @@ -1,237 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.pileup.QPileupFileReader; -import org.qcmg.pileup.QSnpRecord; -import org.qcmg.pileup.VerifiedSnpFileReader; -import org.qcmg.pileup.VerifiedSnpRecord; -import org.qcmg.vcf.VCFFileReader; - -public class ExamineVerifiedSnps { - - private static final QLogger logger = QLoggerFactory.getLogger(ExamineVerifiedSnps.class); - - private static Map pileup = new HashMap<>(80000); - private static Map vcfRecords = new HashMap<>(80000); - private static Map verifiedSNPs = new HashMap<>(250); - - public static void main(String[] args) throws Exception { - logger.info("hello..."); - - String filename = args[0]; - boolean runQPileup = true; - // filename type depends on whether to load qpileup or vcf - if (FileUtils.isFileTypeValid(filename, "vcf")) { - runQPileup = false; - } - - loadVerifiedSnps(args[1]); - logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); - - if (runQPileup) { - // load the existing pileup into memory - logger.info("running in pileup mode"); - loadQPileup(args[0]); - logger.info("loaded " + pileup.size() + " entries into the pileup map"); - examine(args[2]); - } else { - logger.info("running in vcf mode"); - loadGATKData(args[0]); - logger.info("loaded " + vcfRecords.size() + " entries into the vcf map"); - examineVCF(args[2]); - } - logger.info("goodbye..."); - } - - private static void examine(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - - int verifiedYes = 0, qsnpVerifiedYes = 0; - int verifiedNo = 0, qsnpVerifiedNo = 0; - int verifiedNoGL = 0, qsnpVerifiedNoGL = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - for (final Map.Entry entry : verifiedSNPs.entrySet()) { - - QSnpRecord qpr = pileup.get(entry.getKey()); - VerifiedSnpRecord vsr = entry.getValue(); - - // only interested in exome data - if ( ! "exome".equals(vsr.getAnalysis())) continue; - - - if ("no".equals(vsr.getStatus())) { - verifiedNo++; - // if we don't have a matching qpr - good, otherwise, print details - if (null == qpr) { - qsnpVerifiedNo++; - writer.write(vsr.getFormattedString() + "\tOK - no entry in qsnp\n"); - } else { - writer.write(vsr.getFormattedString() + "\t???\t" + qpr.getClassification() + "\t" - + getAnnotationAndNote(qpr) + "\n"); -// + qpr.getMutation() + getAnnotationAndNote(qpr) + "\n"); - } - - } else if ("yes".equals(vsr.getStatus())) { - verifiedYes++; - if (null != qpr) { - qsnpVerifiedYes++; - writer.write(vsr.getFormattedString() + "\tOK - entry in qsnp\t" + qpr.getClassification() + "\t" - + getAnnotationAndNote(qpr) +"\n"); -// + qpr.getMutation() + getAnnotationAndNote(qpr) +"\n"); - } else { - writer.write(vsr.getFormattedString() + "\t???\n"); - } - } else if ("no -GL".equals(vsr.getStatus())) { - verifiedNoGL++; - if (null != qpr) { - qsnpVerifiedNoGL++; - - writer.write(vsr.getFormattedString() + "\tentry in qsnp\t" + qpr.getClassification() + "\t" - + getAnnotationAndNote(qpr) +"\n"); -// + qpr.getMutation() + getAnnotationAndNote(qpr) +"\n"); - } else { - writer.write(vsr.getFormattedString() + "\tNo entry in qsnp\n"); - } - } - } - - writer.close(); - logger.info("verified yes: " + verifiedYes + ", in qsnp: " + qsnpVerifiedYes); - logger.info("verified no: " + verifiedNo + ", in qsnp: " + (verifiedNo-qsnpVerifiedNo)); - logger.info("verified no -GL: " + verifiedNoGL + ", in qsnp: " + qsnpVerifiedNoGL); - } - } - - private static void examineVCF(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - - int verifiedYes = 0, gatkVerifiedYes = 0; - int verifiedNo = 0, gatkVerifiedNo = 0; - int verifiedNoGL = 0, gatkVerifiedNoGL = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - for (final Map.Entry entry : verifiedSNPs.entrySet()) { - - VcfRecord qpr = vcfRecords.get(entry.getKey()); - VerifiedSnpRecord vsr = entry.getValue(); - - // only interested in exome data - if ( ! "exome".equals(vsr.getAnalysis())) continue; - - if ("no".equals(vsr.getStatus())) { - verifiedNo++; - // if we don't have a matching qpr - good, otherwise, print details - if (null == qpr) { - gatkVerifiedNo++; - writer.write(vsr.getFormattedString() + "\tOK - no entry in GATK\n"); - } else { - writer.write(vsr.getFormattedString() + "\t???\t" + - VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() + "\n"); -// writer.write(vsr.getFormattedString() + "\t???\t" + qpr.getGenotype() + "\t" + qpr.getAlt() + "\n"); - } - - } else if ("yes".equals(vsr.getStatus())) { - verifiedYes++; - if (null != qpr) { - gatkVerifiedYes++; - writer.write(vsr.getFormattedString() + "\tOK - entry in GATK\t" + - VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() +"\n"); - } else { - writer.write(vsr.getFormattedString() + "\t???\n"); - } - } else if ("no -GL".equals(vsr.getStatus())) { - verifiedNoGL++; - if (null != qpr) { - gatkVerifiedNoGL++; - - writer.write(vsr.getFormattedString() + "\tentry in GATK\t" + - VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() +"\n"); - } else { - writer.write(vsr.getFormattedString() + "\tNo entry in GATK\n"); - } - } - } - - writer.close(); - logger.info("verified yes: " + verifiedYes + ", in GATK: " + gatkVerifiedYes); - logger.info("verified no: " + verifiedNo + ", in GATK: " + (verifiedNo-gatkVerifiedNo)); - logger.info("verified no -GL: " + verifiedNoGL + ", in GATK: " + gatkVerifiedNoGL); - } - } - - private static String getAnnotationAndNote(QSnpRecord record) { - if ( isNull(record.getAnnotation())) return "\tClassA"; - else if (isNull(record.getAnnotation())) return "\tClassB"; - else return "\tClassB\t" + record.getAnnotation(); - } - - private static boolean isNull(String string) { - return null == string || "null".equals(string) || 0 == string.length(); - } - - private static void loadQPileup(String pileupFile) throws IOException { - if (FileUtils.canFileBeRead(pileupFile)) { - QPileupFileReader reader = new QPileupFileReader(new File(pileupFile)); - try { - for (QSnpRecord qpr : reader) { - pileup.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); - } - } finally { - reader.close(); - } - } - } - - private static void loadGATKData(String pileupFile) throws Exception { - if (FileUtils.canFileBeRead(pileupFile)) { - - VCFFileReader reader = new VCFFileReader(new File(pileupFile)); - try { - for (VcfRecord qpr : reader) { - vcfRecords.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); - } - } finally { - reader.close(); - } - } - } - private static void loadVerifiedSnps(String verifiedSnpFile) throws IOException { - if (FileUtils.canFileBeRead(verifiedSnpFile)) { - VerifiedSnpFileReader reader = new VerifiedSnpFileReader(new File(verifiedSnpFile)); - try { - for (VerifiedSnpRecord vsr : reader) { - verifiedSNPs.put(ChrPointPosition.valueOf(vsr.getChromosome(), vsr.getPosition()),vsr); - } - } finally { - reader.close(); - } - } - } -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- deleted file mode 100644 index 322cbd5d1..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- +++ /dev/null @@ -1,237 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.pileup.QPileupFileReader; -import org.qcmg.pileup.QSnpRecord; -import org.qcmg.pileup.VerifiedSnpFileReader; -import org.qcmg.pileup.VerifiedSnpRecord; -import org.qcmg.vcf.VCFFileReader; - -public class ExamineVerifiedSnps { - - private static final QLogger logger = QLoggerFactory.getLogger(ExamineVerifiedSnps.class); - - private static Map pileup = new HashMap<>(80000); - private static Map vcfRecords = new HashMap<>(80000); - private static Map verifiedSNPs = new HashMap<>(250); - - public static void main(String[] args) throws Exception { - logger.info("hello..."); - - String filename = args[0]; - boolean runQPileup = true; - // filename type depends on whether to load qpileup or vcf - if (FileUtils.isFileTypeValid(filename, "vcf")) { - runQPileup = false; - } - - loadVerifiedSnps(args[1]); - logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); - - if (runQPileup) { - // load the existing pileup into memory - logger.info("running in pileup mode"); - loadQPileup(args[0]); - logger.info("loaded " + pileup.size() + " entries into the pileup map"); - examine(args[2]); - } else { - logger.info("running in vcf mode"); - loadGATKData(args[0]); - logger.info("loaded " + vcfRecords.size() + " entries into the vcf map"); - examineVCF(args[2]); - } - logger.info("goodbye..."); - } - - private static void examine(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - - int verifiedYes = 0, qsnpVerifiedYes = 0; - int verifiedNo = 0, qsnpVerifiedNo = 0; - int verifiedNoGL = 0, qsnpVerifiedNoGL = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - for (final Map.Entry entry : verifiedSNPs.entrySet()) { - - QSnpRecord qpr = pileup.get(entry.getKey()); - VerifiedSnpRecord vsr = entry.getValue(); - - // only interested in exome data - if ( ! "exome".equals(vsr.getAnalysis())) continue; - - - if ("no".equals(vsr.getStatus())) { - verifiedNo++; - // if we don't have a matching qpr - good, otherwise, print details - if (null == qpr) { - qsnpVerifiedNo++; - writer.write(vsr.getFormattedString() + "\tOK - no entry in qsnp\n"); - } else { - writer.write(vsr.getFormattedString() + "\t???\t" + qpr.getClassification() + "\t" - + getAnnotationAndNote(qpr) + "\n"); -// + qpr.getMutation() + getAnnotationAndNote(qpr) + "\n"); - } - - } else if ("yes".equals(vsr.getStatus())) { - verifiedYes++; - if (null != qpr) { - qsnpVerifiedYes++; - writer.write(vsr.getFormattedString() + "\tOK - entry in qsnp\t" + qpr.getClassification() + "\t" - + getAnnotationAndNote(qpr) +"\n"); -// + qpr.getMutation() + getAnnotationAndNote(qpr) +"\n"); - } else { - writer.write(vsr.getFormattedString() + "\t???\n"); - } - } else if ("no -GL".equals(vsr.getStatus())) { - verifiedNoGL++; - if (null != qpr) { - qsnpVerifiedNoGL++; - - writer.write(vsr.getFormattedString() + "\tentry in qsnp\t" + qpr.getClassification() + "\t" - + getAnnotationAndNote(qpr) +"\n"); -// + qpr.getMutation() + getAnnotationAndNote(qpr) +"\n"); - } else { - writer.write(vsr.getFormattedString() + "\tNo entry in qsnp\n"); - } - } - } - - writer.close(); - logger.info("verified yes: " + verifiedYes + ", in qsnp: " + qsnpVerifiedYes); - logger.info("verified no: " + verifiedNo + ", in qsnp: " + (verifiedNo-qsnpVerifiedNo)); - logger.info("verified no -GL: " + verifiedNoGL + ", in qsnp: " + qsnpVerifiedNoGL); - } - } - - private static void examineVCF(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - - int verifiedYes = 0, gatkVerifiedYes = 0; - int verifiedNo = 0, gatkVerifiedNo = 0; - int verifiedNoGL = 0, gatkVerifiedNoGL = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - for (final Map.Entry entry : verifiedSNPs.entrySet()) { - - VcfRecord qpr = vcfRecords.get(entry.getKey()); - VerifiedSnpRecord vsr = entry.getValue(); - - // only interested in exome data - if ( ! "exome".equals(vsr.getAnalysis())) continue; - - if ("no".equals(vsr.getStatus())) { - verifiedNo++; - // if we don't have a matching qpr - good, otherwise, print details - if (null == qpr) { - gatkVerifiedNo++; - writer.write(vsr.getFormattedString() + "\tOK - no entry in GATK\n"); - } else { - writer.write(vsr.getFormattedString() + "\t???\t" + - VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() + "\n"); -// writer.write(vsr.getFormattedString() + "\t???\t" + qpr.getGenotype() + "\t" + qpr.getAlt() + "\n"); - } - - } else if ("yes".equals(vsr.getStatus())) { - verifiedYes++; - if (null != qpr) { - gatkVerifiedYes++; - writer.write(vsr.getFormattedString() + "\tOK - entry in GATK\t" + - VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() +"\n"); - } else { - writer.write(vsr.getFormattedString() + "\t???\n"); - } - } else if ("no -GL".equals(vsr.getStatus())) { - verifiedNoGL++; - if (null != qpr) { - gatkVerifiedNoGL++; - - writer.write(vsr.getFormattedString() + "\tentry in GATK\t" + - VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() +"\n"); - } else { - writer.write(vsr.getFormattedString() + "\tNo entry in GATK\n"); - } - } - } - - writer.close(); - logger.info("verified yes: " + verifiedYes + ", in GATK: " + gatkVerifiedYes); - logger.info("verified no: " + verifiedNo + ", in GATK: " + (verifiedNo-gatkVerifiedNo)); - logger.info("verified no -GL: " + verifiedNoGL + ", in GATK: " + gatkVerifiedNoGL); - } - } - - private static String getAnnotationAndNote(QSnpRecord record) { - if ( isNull(record.getAnnotation())) return "\tClassA"; - else if (isNull(record.getAnnotation())) return "\tClassB"; - else return "\tClassB\t" + record.getAnnotation(); - } - - private static boolean isNull(String string) { - return null == string || "null".equals(string) || 0 == string.length(); - } - - private static void loadQPileup(String pileupFile) throws IOException { - if (FileUtils.canFileBeRead(pileupFile)) { - QPileupFileReader reader = new QPileupFileReader(new File(pileupFile)); - try { - for (QSnpRecord qpr : reader) { - pileup.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); - } - } finally { - reader.close(); - } - } - } - - private static void loadGATKData(String pileupFile) throws Exception { - if (FileUtils.canFileBeRead(pileupFile)) { - - VCFFileReader reader = new VCFFileReader(new File(pileupFile)); - try { - for (VcfRecord qpr : reader) { - vcfRecords.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); - } - } finally { - reader.close(); - } - } - } - private static void loadVerifiedSnps(String verifiedSnpFile) throws IOException { - if (FileUtils.canFileBeRead(verifiedSnpFile)) { - VerifiedSnpFileReader reader = new VerifiedSnpFileReader(new File(verifiedSnpFile)); - try { - for (VerifiedSnpRecord vsr : reader) { - verifiedSNPs.put(ChrPointPosition.valueOf(vsr.getChromosome(), vsr.getPosition()),vsr); - } - } finally { - reader.close(); - } - } - } -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java b/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java deleted file mode 100644 index 6758eb70e..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java +++ /dev/null @@ -1,488 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; - -import htsjdk.samtools.SAMRecord; - -import org.qcmg.chrconv.ChrConvFileReader; -import org.qcmg.chrconv.ChromosomeConversionRecord; -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.Classification; -import org.qcmg.common.model.GenotypeEnum; -import org.qcmg.common.model.QSnpGATKRecord; -import org.qcmg.common.util.BaseUtils; -import org.qcmg.common.util.Constants; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.common.vcf.header.VcfHeaderUtils; -import org.qcmg.germlinedb.GermlineDBFileReader; -import org.qcmg.germlinedb.GermlineDBRecord; -import org.qcmg.picard.QJumper; -import org.qcmg.pileup.QSnpRecord; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.Options.Ids; -import org.qcmg.qmule.QMuleException; -import org.qcmg.vcf.VCFFileReader; - -public class GatkUniqueSnps { - -// private static final QLogger logger = QLoggerFactory.getLogger(GatkUniqueSnps.class); - private static QLogger logger; - - private static Map tumourRecords = new HashMap(100000); - private static Map normalRecords = new HashMap(100000); - -// private static Map classABRecords = new HashMap(100000); - private static List qPileupRecords = new ArrayList(15000); - - // map to hold chromosome conversion data - private static final Map ensembleToQCMG = new HashMap(110); - - - // constants - private String mutationIdPrefix; - private String tumourSampleId; - private String normalSampleId; - private String patientId; - private String somaticAnalysisId; - private String germlineAnalysisId; -// private String analysisId; -// private static final String mutationIdPrefix = "APGI_1992_"; -// private static final String analysisId = "qcmg_ssm_20110524_1"; -// private static final String tumourSampleId = "ICGC-ABMP-20091203-06-TD"; - - - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private Properties ids; - - private int exitStatus; - - - private static String bamFile1; - private static String bamFile1Index; -// private static String bamFile2; -// private static String bamFile2Index; - - private static QJumper jumper1; -// private static QJumper jumper2; - - public int engage() throws Exception { - - setupIds(); - - logger.info("loading normal vcf file"); - loadGATKData(cmdLineInputFiles[0], normalRecords); - logger.info("loaded " + normalRecords.size() + " normal vcf's"); - - logger.info("loading tumour vcf file"); - loadGATKData(cmdLineInputFiles[1], tumourRecords); - logger.info("loaded " + tumourRecords.size() + " tumour vcf's"); - - bamFile1 = cmdLineInputFiles[2]; - bamFile1Index = cmdLineInputFiles[3]; -// bamFile2 = args[4]; -// bamFile2Index = args[5]; - - - jumper1 = new QJumper(); - jumper1.setupReader(bamFile1, bamFile1Index); -// jumper2 = new QJumper(); -// jumper2.setupReader(bamFile2, bamFile2Index); - - - logger.info("about to call examine"); - examine(); - logger.info("about to call examine - DONE"); - - // close the qjumper - jumper1.closeReader(); - - logger.info("about to load chromosome conversion data"); - loadChromosomeConversionData(cmdLineInputFiles[4]); - logger.info("about to load chromosome conversion data - DONE"); - - logger.info("about to add germlineDB info"); - addGermlineDBData(cmdLineInputFiles[5]); - - int noAnnotation = 0; - for (final QSnpRecord qpr : qPileupRecords) if (null == qpr.getAnnotation()) noAnnotation++; - logger.info("class A after addition of germlinedb data: " + noAnnotation ); - - - logger.info("writing output"); - writeOutputForDCC(cmdLineOutputFiles[0]); - logger.info("DONE"); - - return exitStatus; - } - - private void setupIds() throws Exception { - if (null != ids) { - - somaticAnalysisId = (String) ids.get(Ids.SOMATIC_ANALYSIS); - germlineAnalysisId = (String) ids.get(Ids.GEMLINE_ANALYSIS); - tumourSampleId = (String) ids.get(Ids.TUMOUR_SAMPLE); - normalSampleId = (String) ids.get(Ids.NORMAL_SAMPLE); - patientId = (String) ids.get(Ids.PATIENT); - mutationIdPrefix = patientId + "_SNP_"; - - logger.tool("somaticAnalysisId: " + somaticAnalysisId); - logger.tool("germlineAnalysisId: " + germlineAnalysisId); - logger.tool("normalSampleId: " + normalSampleId); - logger.tool("tumourSampleId: " + tumourSampleId); - logger.tool("patientId: " + patientId); - logger.tool("mutationIdPrefix: " + mutationIdPrefix); - - } else { - logger.error("No ids were passed into the program"); - throw new Exception("Invalid arguments to GatkUniqueSnps"); - } - } - - private static void examine() throws Exception { - - int existsInNormalAndTumour = 0, sameGenotype = 0; - // loop through the tumour map - - for (final Entry tumourEntry : tumourRecords.entrySet()) { - - // see if a position exists in the normal map - final QSnpGATKRecord normalRecord = normalRecords.get(tumourEntry.getKey()); - if (null != normalRecord) { - existsInNormalAndTumour++; - - final GenotypeEnum normalGenotype = normalRecord.getGenotypeEnum(); - final GenotypeEnum tumourGenotype = tumourEntry.getValue().getGenotypeEnum(); - - if (normalGenotype == tumourGenotype) { - sameGenotype++; - } else { - if (tumourGenotype.containsAllele(normalRecord.getAlt().charAt(0))) { - //tumourEntry.getValue().getVCFRecord().addInfo("MIN"); - tumourEntry.getValue().getVCFRecord().appendInfo("MIN");; - } - if ( tumourGenotype.isHeterozygous() && ! tumourGenotype.containsAllele(tumourEntry.getValue().getRef().charAt(0))) - //tumourEntry.getValue().getVCFRecord().addInfo("tumour heterozygous for two non-reference alleles"); - tumourEntry.getValue().getVCFRecord().appendInfo("tumour heterozygous for two non-reference alleles"); -// if (null == tumourEntry.getValue().getAnnotation()) { - qPileupRecords.add(getQPileupRecord(tumourEntry.getValue())); -// } - } - } else { - // interested primarily in these fellas - qPileupRecords.add(getQPileupRecord(tumourEntry.getValue())); - } - } - - logger.info("exists in both normal and tumour: " + existsInNormalAndTumour + ", same Genotype: " + sameGenotype); - - logger.info("potential number of class A&B's before pileup: " + qPileupRecords.size() ); - - int noAnnotation = 0, count = 0; - for (final QSnpRecord qpr : qPileupRecords) { - getPileup(jumper1, qpr); - - if (++count % 100 == 0) - logger.info("hit " + count + " vcf records, " + qpr.toString()); - - if (qpr.getAnnotation() == null) - noAnnotation++; - } - - logger.info("class A after pileup: " + noAnnotation ); - - } - - private static void loadChromosomeConversionData(String chrConvFile) throws IOException { - final ChrConvFileReader reader = new ChrConvFileReader(new File(chrConvFile)); - try { - for (final ChromosomeConversionRecord record : reader) { - // add extra map inserts here as required - ensembleToQCMG.put(record.getEnsembleV55(), record.getQcmg()); - } - } finally { - reader.close(); - } - } - - private void writeOutputForDCC(String dccSomaticFile) throws IOException { - if (dccSomaticFile.contains("Germline_DB.txt")) throw new IOException("Wrong output file!!!"); - - final FileWriter somaticWriter = new FileWriter(new File(dccSomaticFile)); - - final String somaticHeader = "analysis_id\ttumour_sample_id\tmutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation\tquality_score\tprobability\tread_count\tis_annotated\tvalidation_status\tvalidation_platform\txref_ensembl_var_id\tnote\tQCMGflag\n"; - final int counter = 1; - try { - - somaticWriter.write(somaticHeader); - for (final QSnpRecord record : qPileupRecords) { - - String ensemblChr = null; - // get ensembl chromosome - for (final Map.Entry entry : ensembleToQCMG.entrySet()) { - if (record.getChromosome().equals(entry.getValue())) { - ensemblChr = entry.getKey(); - break; - } - } - somaticWriter.write(somaticAnalysisId + "\t" + tumourSampleId + "\t" - + "\n"); -// + record.getDCCData(mutationIdPrefix, ensemblChr) + "\n"); - } - } finally { - somaticWriter.close(); - } - } - - private static QSnpRecord getQPileupRecord(QSnpGATKRecord vcfRec) { - final QSnpRecord qpr = new QSnpRecord(vcfRec.getChromosome(), vcfRec.getPosition(), vcfRec.getRef()); - qpr.setTumourGenotype(vcfRec.getGenotypeEnum()); -// qpr.setMutation(vcfRec.getRef() + Constants.MUT_DELIM + vcfRec.getAlt()); -// qpr.getVcfRecord().setFilter(vcfRec.getAnnotation()); - qpr.setClassification(Classification.SOMATIC); - return qpr; - } - - - public static void getPileup(QJumper jumper, QSnpRecord record) throws Exception { - - final List firstSet = jumper.getRecordsAtPosition(record.getChromosome(), record.getPosition()); -// List secondSet = jumper2.getRecordsAtPosition(record.getChromosome(), record.getPosition()); - - - examinePileup(firstSet, record); - - -// char mutation = record.getMutation().charAt(record.getMutation().length() -1); -// boolean mutationFoundInNormal = false; -// int normalCoverage = 0; -// for (SAMRecord sam : firstSet ) { -// if ( ! sam.getDuplicateReadFlag()) { -// ++normalCoverage; -// -// // need to get the base at the position -// int offset = record.getPosition() - sam.getAlignmentStart(); -// if (offset < 0) throw new Exception("invalid start position!!!"); -// -// if (sam.getReadBases()[offset] == mutation) { -// mutationFoundInNormal = true; -// break; -// } -// } -// } -// -// if (mutationFoundInNormal) { -// record.addAnnotation("mutation also found in pileup of normal"); -// } -// -// record.setNormalCount(normalCoverage); -// -// if (normalCoverage < 12) -// record.addAnnotation("less than 12 reads coverage in normal"); - - } - - - public static void examinePileup(List sams, QSnpRecord record) throws Exception { - - final char mutation = record.getAlt().charAt(0); -// final char mutation = record.getMutation().charAt(record.getMutation().length() -1); - boolean mutationFoundInNormal = false; - int normalCoverage = 0; - for (final SAMRecord sam : sams ) { - if ( ! sam.getDuplicateReadFlag()) { - ++normalCoverage; - - // need to get the base at the position -// int offset = record.getPosition() - sam.getUnclippedStart(); - int offset = record.getPosition() - sam.getAlignmentStart(); - if (offset < 0) throw new Exception("invalid start position!!!: "+ sam.format()); - - if (offset >= sam.getReadLength()) { -// throw new Exception("offset [position: " + record.getPosition() + ", read start pos(unclipped): " + sam.getUnclippedStart() + ", read end pos(unclipped): " + sam.getUnclippedEnd()+ "] is larger than read length!!!: " + sam.format()); - // set to last entry in sequence - offset = sam.getReadLength() -1; - } - - if (sam.getReadBases()[offset] == mutation) { - mutationFoundInNormal = true; -// break; - } - } - } - - if (mutationFoundInNormal) { - VcfUtils.updateFilter(record.getVcfRecord(), VcfHeaderUtils.FILTER_MUTATION_IN_NORMAL); - } - -// record.setNormalCount(normalCoverage); - - if (normalCoverage < 12) { - VcfUtils.updateFilter(record.getVcfRecord(), VcfHeaderUtils.FILTER_COVERAGE); - } - - - } - - -// private static void getPileup(VCFRecord record) { -// -// List firstSet = jumper1.getRecordsAtPosition(record.getChromosome(), record.getPosition()); -//// List secondSet = jumper2.getRecordsAtPosition(record.getChromosome(), record.getPosition()); -// -// int normalCoverage = 0; -// for (SAMRecord sam : firstSet ) { -// if ( ! sam.getDuplicateReadFlag()) -// ++normalCoverage; -// } -// -// -//// int normalCoverage = firstSet.size(); -//// int normalCoverage = firstSet.size() + secondSet.size(); -// record.setNormalCoverage(normalCoverage); -// -// if (normalCoverage < 12) -// record.addAnnotation("less than 12 reads coverage in normal"); -// -// } - - - private static void addGermlineDBData(String germlineDBFile) throws IOException { - - final GermlineDBFileReader reader = new GermlineDBFileReader(new File(germlineDBFile)); - // create map of SOMATIC classified SNPs - final Map somaticPileupMap = new HashMap(qPileupRecords.size(), 1); - for (final QSnpRecord pileupRecord : qPileupRecords) { - somaticPileupMap.put(ChrPointPosition.valueOf(pileupRecord.getChromosome(), pileupRecord.getPosition()), pileupRecord); - } - - int updateCount = 0, count = 0; - try { - for (final GermlineDBRecord rec : reader) { - - // get QCMG chromosome from map - final String chr = ensembleToQCMG.get(rec.getChromosome()); - final ChrPosition id = ChrPointPosition.valueOf(chr, rec.getPosition()); - - final QSnpRecord qpr = somaticPileupMap.get(id); - if (null != qpr && null != qpr.getAlt() && (null == qpr.getAnnotation() || ! qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_GERMLINE))) { - final String mutation = qpr.getAlt(); - if (mutation.length() == 3) { - final char c = mutation.charAt(2); - - final GenotypeEnum germlineDBGenotype = BaseUtils.getGenotypeEnum(rec.getNormalGenotype()); - if (germlineDBGenotype.containsAllele(c)) { - updateCount++; - - VcfUtils.updateFilter(qpr.getVcfRecord(), VcfHeaderUtils.FILTER_GERMLINE); - } - - - } else { - logger.info("mutation string length: " + mutation.length()); - } - } - - if (++count % 1000000 == 0) - logger.info("hit " + count + " germline reords"); - - } - } finally { - reader.close(); - } - logger.info("updated: " + updateCount + " somatic positions with germlineDB info"); - } - - private static void loadGATKData(String pileupFile, Map map) throws Exception { - if (FileUtils.canFileBeRead(pileupFile)) { - - final VCFFileReader reader = new VCFFileReader(new File(pileupFile)); - try { - for (final VcfRecord qpr : reader) { - map.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()), new QSnpGATKRecord(qpr)); - } - } finally { - reader.close(); - } - } - } - - public static void main(String[] args) throws Exception { - final GatkUniqueSnps gus = new GatkUniqueSnps(); - final int exitStatus = gus.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - final Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logger = QLoggerFactory.getLogger(GatkUniqueSnps.class, options.getLogFile(), options.getLogLevel()); - logger.logInitialExecutionStats("GatkUniqueSnps", GatkUniqueSnps.class.getPackage().getImplementationVersion()); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (final String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - ids = options.getIds(); - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- deleted file mode 100644 index 6758eb70e..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- +++ /dev/null @@ -1,488 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; - -import htsjdk.samtools.SAMRecord; - -import org.qcmg.chrconv.ChrConvFileReader; -import org.qcmg.chrconv.ChromosomeConversionRecord; -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.Classification; -import org.qcmg.common.model.GenotypeEnum; -import org.qcmg.common.model.QSnpGATKRecord; -import org.qcmg.common.util.BaseUtils; -import org.qcmg.common.util.Constants; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.common.vcf.header.VcfHeaderUtils; -import org.qcmg.germlinedb.GermlineDBFileReader; -import org.qcmg.germlinedb.GermlineDBRecord; -import org.qcmg.picard.QJumper; -import org.qcmg.pileup.QSnpRecord; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.Options.Ids; -import org.qcmg.qmule.QMuleException; -import org.qcmg.vcf.VCFFileReader; - -public class GatkUniqueSnps { - -// private static final QLogger logger = QLoggerFactory.getLogger(GatkUniqueSnps.class); - private static QLogger logger; - - private static Map tumourRecords = new HashMap(100000); - private static Map normalRecords = new HashMap(100000); - -// private static Map classABRecords = new HashMap(100000); - private static List qPileupRecords = new ArrayList(15000); - - // map to hold chromosome conversion data - private static final Map ensembleToQCMG = new HashMap(110); - - - // constants - private String mutationIdPrefix; - private String tumourSampleId; - private String normalSampleId; - private String patientId; - private String somaticAnalysisId; - private String germlineAnalysisId; -// private String analysisId; -// private static final String mutationIdPrefix = "APGI_1992_"; -// private static final String analysisId = "qcmg_ssm_20110524_1"; -// private static final String tumourSampleId = "ICGC-ABMP-20091203-06-TD"; - - - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private Properties ids; - - private int exitStatus; - - - private static String bamFile1; - private static String bamFile1Index; -// private static String bamFile2; -// private static String bamFile2Index; - - private static QJumper jumper1; -// private static QJumper jumper2; - - public int engage() throws Exception { - - setupIds(); - - logger.info("loading normal vcf file"); - loadGATKData(cmdLineInputFiles[0], normalRecords); - logger.info("loaded " + normalRecords.size() + " normal vcf's"); - - logger.info("loading tumour vcf file"); - loadGATKData(cmdLineInputFiles[1], tumourRecords); - logger.info("loaded " + tumourRecords.size() + " tumour vcf's"); - - bamFile1 = cmdLineInputFiles[2]; - bamFile1Index = cmdLineInputFiles[3]; -// bamFile2 = args[4]; -// bamFile2Index = args[5]; - - - jumper1 = new QJumper(); - jumper1.setupReader(bamFile1, bamFile1Index); -// jumper2 = new QJumper(); -// jumper2.setupReader(bamFile2, bamFile2Index); - - - logger.info("about to call examine"); - examine(); - logger.info("about to call examine - DONE"); - - // close the qjumper - jumper1.closeReader(); - - logger.info("about to load chromosome conversion data"); - loadChromosomeConversionData(cmdLineInputFiles[4]); - logger.info("about to load chromosome conversion data - DONE"); - - logger.info("about to add germlineDB info"); - addGermlineDBData(cmdLineInputFiles[5]); - - int noAnnotation = 0; - for (final QSnpRecord qpr : qPileupRecords) if (null == qpr.getAnnotation()) noAnnotation++; - logger.info("class A after addition of germlinedb data: " + noAnnotation ); - - - logger.info("writing output"); - writeOutputForDCC(cmdLineOutputFiles[0]); - logger.info("DONE"); - - return exitStatus; - } - - private void setupIds() throws Exception { - if (null != ids) { - - somaticAnalysisId = (String) ids.get(Ids.SOMATIC_ANALYSIS); - germlineAnalysisId = (String) ids.get(Ids.GEMLINE_ANALYSIS); - tumourSampleId = (String) ids.get(Ids.TUMOUR_SAMPLE); - normalSampleId = (String) ids.get(Ids.NORMAL_SAMPLE); - patientId = (String) ids.get(Ids.PATIENT); - mutationIdPrefix = patientId + "_SNP_"; - - logger.tool("somaticAnalysisId: " + somaticAnalysisId); - logger.tool("germlineAnalysisId: " + germlineAnalysisId); - logger.tool("normalSampleId: " + normalSampleId); - logger.tool("tumourSampleId: " + tumourSampleId); - logger.tool("patientId: " + patientId); - logger.tool("mutationIdPrefix: " + mutationIdPrefix); - - } else { - logger.error("No ids were passed into the program"); - throw new Exception("Invalid arguments to GatkUniqueSnps"); - } - } - - private static void examine() throws Exception { - - int existsInNormalAndTumour = 0, sameGenotype = 0; - // loop through the tumour map - - for (final Entry tumourEntry : tumourRecords.entrySet()) { - - // see if a position exists in the normal map - final QSnpGATKRecord normalRecord = normalRecords.get(tumourEntry.getKey()); - if (null != normalRecord) { - existsInNormalAndTumour++; - - final GenotypeEnum normalGenotype = normalRecord.getGenotypeEnum(); - final GenotypeEnum tumourGenotype = tumourEntry.getValue().getGenotypeEnum(); - - if (normalGenotype == tumourGenotype) { - sameGenotype++; - } else { - if (tumourGenotype.containsAllele(normalRecord.getAlt().charAt(0))) { - //tumourEntry.getValue().getVCFRecord().addInfo("MIN"); - tumourEntry.getValue().getVCFRecord().appendInfo("MIN");; - } - if ( tumourGenotype.isHeterozygous() && ! tumourGenotype.containsAllele(tumourEntry.getValue().getRef().charAt(0))) - //tumourEntry.getValue().getVCFRecord().addInfo("tumour heterozygous for two non-reference alleles"); - tumourEntry.getValue().getVCFRecord().appendInfo("tumour heterozygous for two non-reference alleles"); -// if (null == tumourEntry.getValue().getAnnotation()) { - qPileupRecords.add(getQPileupRecord(tumourEntry.getValue())); -// } - } - } else { - // interested primarily in these fellas - qPileupRecords.add(getQPileupRecord(tumourEntry.getValue())); - } - } - - logger.info("exists in both normal and tumour: " + existsInNormalAndTumour + ", same Genotype: " + sameGenotype); - - logger.info("potential number of class A&B's before pileup: " + qPileupRecords.size() ); - - int noAnnotation = 0, count = 0; - for (final QSnpRecord qpr : qPileupRecords) { - getPileup(jumper1, qpr); - - if (++count % 100 == 0) - logger.info("hit " + count + " vcf records, " + qpr.toString()); - - if (qpr.getAnnotation() == null) - noAnnotation++; - } - - logger.info("class A after pileup: " + noAnnotation ); - - } - - private static void loadChromosomeConversionData(String chrConvFile) throws IOException { - final ChrConvFileReader reader = new ChrConvFileReader(new File(chrConvFile)); - try { - for (final ChromosomeConversionRecord record : reader) { - // add extra map inserts here as required - ensembleToQCMG.put(record.getEnsembleV55(), record.getQcmg()); - } - } finally { - reader.close(); - } - } - - private void writeOutputForDCC(String dccSomaticFile) throws IOException { - if (dccSomaticFile.contains("Germline_DB.txt")) throw new IOException("Wrong output file!!!"); - - final FileWriter somaticWriter = new FileWriter(new File(dccSomaticFile)); - - final String somaticHeader = "analysis_id\ttumour_sample_id\tmutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation\tquality_score\tprobability\tread_count\tis_annotated\tvalidation_status\tvalidation_platform\txref_ensembl_var_id\tnote\tQCMGflag\n"; - final int counter = 1; - try { - - somaticWriter.write(somaticHeader); - for (final QSnpRecord record : qPileupRecords) { - - String ensemblChr = null; - // get ensembl chromosome - for (final Map.Entry entry : ensembleToQCMG.entrySet()) { - if (record.getChromosome().equals(entry.getValue())) { - ensemblChr = entry.getKey(); - break; - } - } - somaticWriter.write(somaticAnalysisId + "\t" + tumourSampleId + "\t" - + "\n"); -// + record.getDCCData(mutationIdPrefix, ensemblChr) + "\n"); - } - } finally { - somaticWriter.close(); - } - } - - private static QSnpRecord getQPileupRecord(QSnpGATKRecord vcfRec) { - final QSnpRecord qpr = new QSnpRecord(vcfRec.getChromosome(), vcfRec.getPosition(), vcfRec.getRef()); - qpr.setTumourGenotype(vcfRec.getGenotypeEnum()); -// qpr.setMutation(vcfRec.getRef() + Constants.MUT_DELIM + vcfRec.getAlt()); -// qpr.getVcfRecord().setFilter(vcfRec.getAnnotation()); - qpr.setClassification(Classification.SOMATIC); - return qpr; - } - - - public static void getPileup(QJumper jumper, QSnpRecord record) throws Exception { - - final List firstSet = jumper.getRecordsAtPosition(record.getChromosome(), record.getPosition()); -// List secondSet = jumper2.getRecordsAtPosition(record.getChromosome(), record.getPosition()); - - - examinePileup(firstSet, record); - - -// char mutation = record.getMutation().charAt(record.getMutation().length() -1); -// boolean mutationFoundInNormal = false; -// int normalCoverage = 0; -// for (SAMRecord sam : firstSet ) { -// if ( ! sam.getDuplicateReadFlag()) { -// ++normalCoverage; -// -// // need to get the base at the position -// int offset = record.getPosition() - sam.getAlignmentStart(); -// if (offset < 0) throw new Exception("invalid start position!!!"); -// -// if (sam.getReadBases()[offset] == mutation) { -// mutationFoundInNormal = true; -// break; -// } -// } -// } -// -// if (mutationFoundInNormal) { -// record.addAnnotation("mutation also found in pileup of normal"); -// } -// -// record.setNormalCount(normalCoverage); -// -// if (normalCoverage < 12) -// record.addAnnotation("less than 12 reads coverage in normal"); - - } - - - public static void examinePileup(List sams, QSnpRecord record) throws Exception { - - final char mutation = record.getAlt().charAt(0); -// final char mutation = record.getMutation().charAt(record.getMutation().length() -1); - boolean mutationFoundInNormal = false; - int normalCoverage = 0; - for (final SAMRecord sam : sams ) { - if ( ! sam.getDuplicateReadFlag()) { - ++normalCoverage; - - // need to get the base at the position -// int offset = record.getPosition() - sam.getUnclippedStart(); - int offset = record.getPosition() - sam.getAlignmentStart(); - if (offset < 0) throw new Exception("invalid start position!!!: "+ sam.format()); - - if (offset >= sam.getReadLength()) { -// throw new Exception("offset [position: " + record.getPosition() + ", read start pos(unclipped): " + sam.getUnclippedStart() + ", read end pos(unclipped): " + sam.getUnclippedEnd()+ "] is larger than read length!!!: " + sam.format()); - // set to last entry in sequence - offset = sam.getReadLength() -1; - } - - if (sam.getReadBases()[offset] == mutation) { - mutationFoundInNormal = true; -// break; - } - } - } - - if (mutationFoundInNormal) { - VcfUtils.updateFilter(record.getVcfRecord(), VcfHeaderUtils.FILTER_MUTATION_IN_NORMAL); - } - -// record.setNormalCount(normalCoverage); - - if (normalCoverage < 12) { - VcfUtils.updateFilter(record.getVcfRecord(), VcfHeaderUtils.FILTER_COVERAGE); - } - - - } - - -// private static void getPileup(VCFRecord record) { -// -// List firstSet = jumper1.getRecordsAtPosition(record.getChromosome(), record.getPosition()); -//// List secondSet = jumper2.getRecordsAtPosition(record.getChromosome(), record.getPosition()); -// -// int normalCoverage = 0; -// for (SAMRecord sam : firstSet ) { -// if ( ! sam.getDuplicateReadFlag()) -// ++normalCoverage; -// } -// -// -//// int normalCoverage = firstSet.size(); -//// int normalCoverage = firstSet.size() + secondSet.size(); -// record.setNormalCoverage(normalCoverage); -// -// if (normalCoverage < 12) -// record.addAnnotation("less than 12 reads coverage in normal"); -// -// } - - - private static void addGermlineDBData(String germlineDBFile) throws IOException { - - final GermlineDBFileReader reader = new GermlineDBFileReader(new File(germlineDBFile)); - // create map of SOMATIC classified SNPs - final Map somaticPileupMap = new HashMap(qPileupRecords.size(), 1); - for (final QSnpRecord pileupRecord : qPileupRecords) { - somaticPileupMap.put(ChrPointPosition.valueOf(pileupRecord.getChromosome(), pileupRecord.getPosition()), pileupRecord); - } - - int updateCount = 0, count = 0; - try { - for (final GermlineDBRecord rec : reader) { - - // get QCMG chromosome from map - final String chr = ensembleToQCMG.get(rec.getChromosome()); - final ChrPosition id = ChrPointPosition.valueOf(chr, rec.getPosition()); - - final QSnpRecord qpr = somaticPileupMap.get(id); - if (null != qpr && null != qpr.getAlt() && (null == qpr.getAnnotation() || ! qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_GERMLINE))) { - final String mutation = qpr.getAlt(); - if (mutation.length() == 3) { - final char c = mutation.charAt(2); - - final GenotypeEnum germlineDBGenotype = BaseUtils.getGenotypeEnum(rec.getNormalGenotype()); - if (germlineDBGenotype.containsAllele(c)) { - updateCount++; - - VcfUtils.updateFilter(qpr.getVcfRecord(), VcfHeaderUtils.FILTER_GERMLINE); - } - - - } else { - logger.info("mutation string length: " + mutation.length()); - } - } - - if (++count % 1000000 == 0) - logger.info("hit " + count + " germline reords"); - - } - } finally { - reader.close(); - } - logger.info("updated: " + updateCount + " somatic positions with germlineDB info"); - } - - private static void loadGATKData(String pileupFile, Map map) throws Exception { - if (FileUtils.canFileBeRead(pileupFile)) { - - final VCFFileReader reader = new VCFFileReader(new File(pileupFile)); - try { - for (final VcfRecord qpr : reader) { - map.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()), new QSnpGATKRecord(qpr)); - } - } finally { - reader.close(); - } - } - } - - public static void main(String[] args) throws Exception { - final GatkUniqueSnps gus = new GatkUniqueSnps(); - final int exitStatus = gus.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - final Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logger = QLoggerFactory.getLogger(GatkUniqueSnps.class, options.getLogFile(), options.getLogLevel()); - logger.logInitialExecutionStats("GatkUniqueSnps", GatkUniqueSnps.class.getPackage().getImplementationVersion()); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (final String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - ids = options.getIds(); - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/Mule.java b/qmule/src/org/qcmg/qmule/snppicker/Mule.java deleted file mode 100644 index 6b3b7f4a7..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/Mule.java +++ /dev/null @@ -1,85 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; - -public class Mule { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - - private static QLogger logger; - - public int engage() { - return 1; - } - - - - public static void main(String[] args) throws Exception { - Mule sp = new Mule(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(Mule.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("Example", Mule.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/Mule.java-- b/qmule/src/org/qcmg/qmule/snppicker/Mule.java-- deleted file mode 100644 index 6b3b7f4a7..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/Mule.java-- +++ /dev/null @@ -1,85 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; - -public class Mule { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - - private static QLogger logger; - - public int engage() { - return 1; - } - - - - public static void main(String[] args) throws Exception { - Mule sp = new Mule(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(Mule.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("Example", Mule.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java deleted file mode 100644 index ad7f90ae8..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java +++ /dev/null @@ -1,802 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - -import htsjdk.samtools.SAMRecord; - -import org.qcmg.chrconv.ChrConvFileReader; -import org.qcmg.chrconv.ChromosomeConversionRecord; -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.Genotype; -import org.qcmg.common.util.BaseUtils; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.dbsnp.Dbsnp130Record; -import org.qcmg.dbsnp.DbsnpFileReader; -import org.qcmg.qmule.gff3.GFF3FileReader; -import org.qcmg.qmule.gff3.GFF3Record; -import org.qcmg.picard.QJumper; -import org.qcmg.pileup.PileupFileReader; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.qmule.record.Record; -import org.qcmg.unused.illumina.IlluminaFileReader; -import org.qcmg.unused.illumina.IlluminaRecord; -import org.qcmg.vcf.VCFFileReader; - -public class SnpPicker { - - private static final char DEFAULT_CHAR = '\u0000'; - private static QLogger logger; -// private static DecimalFormat df = new DecimalFormat("0.0000"); - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - private static boolean isNormal; - -// private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - - Map illuminaMap = new HashMap(1000000,0.99f); // not expecting more than 1000000 - - Map variantMap = new HashMap(2000000); - - // map to hold chromosome conversion data - Map gffToQCMG = new HashMap(100, 0.99f); - -// List illuminaRecords = new ArrayList(); -// List dbSNPRecords = new ArrayList(13000000); - - private int engage() throws Exception { - - // populate the chromosome conversion map - logger.info("about to load chromosome conversion data"); - loadChromosomeConversionData(); - logger.info("about to load chromosome conversion data - DONE"); - - // we are working off the raw illumina data here - first convert it into filtered format, and use that as the input - - logger.info("About to load raw illumina data"); - loadRawIlluminaData(); -// logger.info("No of variant records: " + variantMap.size() + " in file: " + cmdLineInputFiles[0]); - - logger.info("About to load gff3 data"); - loadGff3Data(); - logger.info("No of variant records: " + variantMap.size()); - -// logger.info("About to load vcf data"); -// loadVCFData(); -// logger.info("No of variant records: " + variantMap.size()); - - logger.info("About to load qsnp data"); - loadQSnpData(); - logger.info("No of variant records: " + variantMap.size()); - - - - - logger.info("About to load dbSNP data"); - loadDbSnpData(); -// logger.info("No of variant records: " + variantMap.size()); - - // update variantMap with details from illuminaMap - logger.info("About to load filtered illumina data into variant map"); - convertIlluminaToVariant(); - logger.info("About to load filtered illumina data into variant map - DONE"); - - // get some stats - displayStats(); - - // pileup - logger.info("time for pileup..."); - getPileup(); - logger.info("time for pileup - DONE"); - - // more stats - displayStats2(); - - logger.info("Will now attempt to write out variant data" ); - outputVariantData(); - logger.info("Will now attempt to write out variant data - DONE"); - - return exitStatus; - } - - private void getPileup() throws Exception { - QJumper qj = new QJumper(); - qj.setupReader(cmdLineInputFiles[5], cmdLineInputFiles[6]); - - VariantRecord rec; - StringBuilder pileup = new StringBuilder(); - List reads; -// String chr; - int position; - int offset; - - int pileupCount = 0; - for (Map.Entry entry : variantMap.entrySet()) { - // only want pileup if we have gff or vcf data - rec = entry.getValue(); - if (DEFAULT_CHAR != rec.getGffRef() || null != rec.getVcfGenotype()) { -// chr = ( ! entry.getKey().getChromosome().startsWith("GL") ? "chr" : "") + entry.getKey().getChromosome(); - - reads = qj.getRecordsAtPosition(entry.getKey().getChromosome(), entry.getKey().getStartPosition()); - // do something with the reads - position = entry.getKey().getStartPosition(); - for (SAMRecord sr : reads) { - offset = position - sr.getAlignmentStart(); - pileup.append((char)sr.getReadBases()[offset]); - } - rec.setPileup(pileup.toString()); - - // reset the StringBuilder - pileup.setLength(0); - - if (++pileupCount % 1000 == 0) - logger.info("Run " + pileupCount + " pileups so far, " + reads.size() + " sam records returned from picard"); - } - } - } - - private void loadChromosomeConversionData() { - String chrConvFile = cmdLineInputFiles[4]; - ChrConvFileReader reader = null; - try { - reader = new ChrConvFileReader(new File(chrConvFile)); - } catch (Exception e) { - logger.error("Exception caught whilst trying to instantiate ChrConvFileReader", e); - exitStatus = -1; - } - - if (null != reader) { - for (ChromosomeConversionRecord record : reader) { - // add extra map inserts here as required - // diBayes field is no longer present in chr conv file -// gffToQCMG.put(record.getDiBayes(), record.getQcmg()); - // guessing we want ensemble in here as the key - gffToQCMG.put(record.getEnsembleV55(), record.getQcmg()); - } - - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close ChrConvFileReader", e); - exitStatus = -1; - } - } - } - - private void displayStats() { - int illuminaOnly = 0; - int gff3Only = 0; - int vcfOnly = 0; - int vcfANDgff = 0; - int vcfANDillumina = 0; - int gffANDillumina = 0; - int allThree = 0; - for (VariantRecord record : variantMap.values()) { - - boolean illuminaDataPresent = null != record.getIlluminaRef(); - boolean gffDataPresent = DEFAULT_CHAR != record.getGffRef(); - boolean vcfDataPresent = DEFAULT_CHAR != record.getVcfRef(); - - if (illuminaDataPresent && gffDataPresent && vcfDataPresent) { - allThree++; - record.setPositionMatch("IGV"); - } else if (gffDataPresent && vcfDataPresent) { - vcfANDgff++; - record.setPositionMatch("GV"); - } else if (illuminaDataPresent && vcfDataPresent) { - vcfANDillumina++; - record.setPositionMatch("IV"); - } else if (illuminaDataPresent && gffDataPresent) { - gffANDillumina++; - record.setPositionMatch("IG"); - } else if ( gffDataPresent) { - gff3Only++; - record.setPositionMatch("G"); - }else if ( vcfDataPresent) { - vcfOnly++; - record.setPositionMatch("V"); - }else if ( illuminaDataPresent) { - illuminaOnly++; - record.setPositionMatch("I"); - } - - record.setGenotypeMatch(getGenotypeMatchInfo(record)); - } - - logger.info("allThree: " + allThree); - logger.info("illuminaOnly: " + illuminaOnly); - logger.info("gff3Only: " + gff3Only); - logger.info("vcfANDgff: " + vcfANDgff); - logger.info("vcfANDillumina: " + vcfANDillumina); - logger.info("gffANDillumina: " + gffANDillumina); - logger.info("vcfOnly: " + vcfOnly); - - int total = allThree + illuminaOnly + gff3Only + vcfANDgff + vcfANDillumina + gffANDillumina + vcfOnly; - logger.info("Sum of above numbers: " + total); - logger.info("No of records in map: " + variantMap.size()); - - } - - private void displayStats2() { - final String IGV = "IGV"; - final String IG = "IG"; - final String IV = "IV"; - final String GV = "GV"; - final String I = "I"; - final String G = "G"; - final String V = "V"; - - int positionIGV=0, positionIG=0, positionIV=0, positionGV=0, positionI=0, positionG=0, positionV = 0; - int pIGVgIGV=0, pIGVgIG=0, pIGVgIV=0, pIGVgGV=0; - int pIGgIG=0; - int pIVgIV=0; - int pGVgGV=0; - - - for (VariantRecord record : variantMap.values()) { - - String positionMatch = record.getPositionMatch(); - String genotypeMatch = record.getGenotypeMatch(); - - if (IGV.equals(positionMatch)) { - positionIGV++; - if (IGV.equals(genotypeMatch)) pIGVgIGV++; - else if (IG.equals(genotypeMatch)) pIGVgIG++; - else if (IV.equals(genotypeMatch)) pIGVgIV++; - else if (GV.equals(genotypeMatch)) pIGVgGV++; - - } else if (IG.equals(positionMatch)) { - positionIG++; - if (IG.equals(genotypeMatch)) pIGgIG++; - - } else if (IV.equals(positionMatch)) { - positionIV++; - if (IV.equals(genotypeMatch)) pIVgIV++; - - } else if (GV.equals(positionMatch)) { - positionGV++; - if (GV.equals(genotypeMatch)) pGVgGV++; - - } else if (I.equals(positionMatch)) positionI++; - else if ( G.equals(positionMatch)) positionG++; - else if ( V.equals(positionMatch)) positionV++; - } - - logger.info("position IGV: " + positionIGV + ", genotype IGV: " + pIGVgIGV + ", genotype IG: " + pIGVgIG + ", genotype IV: " + pIGVgIV + ", genotype GV: " + pIGVgGV); - logger.info("position IG: " + positionIG + ", genotype IG: " + pIGgIG); - logger.info("position IV: " + positionIV + ", genotype IV: " + pIVgIV); - logger.info("position GV: " + positionGV + ", genotype GV: " + pGVgGV); - - logger.info("position I: " + positionI); - logger.info("position G: " + positionG); - logger.info("position V: " + positionV); - - int total = positionIGV + positionIG + positionIV + positionGV + positionI + positionG + positionV; - logger.info("Sum of above numbers: " + total); - logger.info("No of records in map: " + variantMap.size()); - - } - - private String getGenotypeMatchInfo(VariantRecord record) { - Genotype illuminaGen = BaseUtils.getGenotype(record.getIllAllele1() , record.getIllAllele2()); -// String illuminaGen = record.getIlluminaRef(); - Genotype gffGen = BaseUtils.getGenotypeFromIUPACCode(record.getGffGenotype()); - Genotype vcfGen = null; - if (DEFAULT_CHAR != record.getVcfAlt()) - vcfGen = BaseUtils.getGenotypeFromVcf(record.getVcfGenotype(), record.getVcfRef(), record.getVcfAlt()); - else - vcfGen = BaseUtils.getGenotype(record.getVcfGenotype()); - - String result = null; - - if (illuminaGen.equals( gffGen) && illuminaGen.equals(vcfGen)) result = "IGV"; - else if (illuminaGen.equals(gffGen)) result = "IG"; - else if (illuminaGen.equals(vcfGen)) result = "IV"; - else if (null != gffGen && gffGen.equals(vcfGen)) result = "GV"; -// if (doStringsMatch(illuminaGen, gffGen) && doStringsMatch(illuminaGen, vcfGen)) result = "IGV"; -// else if (doStringsMatch(illuminaGen, gffGen)) result = "IG"; -// else if (doStringsMatch(illuminaGen, vcfGen)) result = "IV"; -// else if (doStringsMatch(gffGen, vcfGen)) result = "GV"; - - return result; - } - - private boolean doStringsMatch(String a, String b) { - return null == a ? false : a.equals(b); - } - - private void loadDbSnpData() { - // update records with dbsnp info - // should be second of the input files - String dbSNPFile = cmdLineInputFiles[3]; - DbsnpFileReader dbSNPReader = null; - try { - dbSNPReader = new DbsnpFileReader(new File(dbSNPFile)); - } catch (Exception e) { - logger.error("Error caught whilst trying to instantiate DbsnpFileReader", e); - exitStatus = -1; - } - - int updateCount = 0; - int noOfDbSnps = 0; - if (null != dbSNPReader) { - - ChrPosition varId; - VariantRecord varRec; - IlluminaRecord illRec; - int illuminaDbSnpCount = 0; - - for (Dbsnp130Record rec : dbSNPReader) { - // update illumina array with dbSNP details - illRec = illuminaMap.get(rec.getRefSnp()); - if (null != illRec) { - if (null != illRec.getChr()) { - logger.info("illumina rec: " + illRec.getChr() + ":" + illRec.getStart() + ":" + illRec.getSnpId() +" has already been updated - dbSNP: " + rec.getChromosome() + ":" + rec.getChromosomePosition() + ":" + rec.getRefSnp()); - // dbSNP id has more than 1 chr and position - create another IlluminaRecord in the variantMap - //TODO deal with multiple dbSnps for same id here!!! - } else { - updateIlluminaRecord(illRec, rec); - } - illuminaDbSnpCount++; - } - - varId = ChrPointPosition.valueOf(rec.getChromosome(), rec.getChromosomePosition()); - // lookup variant map to see if we have a matching record - varRec = variantMap.get(varId); - if (null == varRec && null != illRec && illRec.isSnp()) { - // don't have an existing record at this position, but we want to put illumina data in here if its a snp - varRec = new VariantRecord(); - variantMap.put(varId, varRec); - } - - if (null != varRec) { - // update required fields - varRec.setDbSnpID(rec.getRefSnp()); - varRec.setDbSnpStrand(rec.getStrand().charAt(0)); - varRec.setDbSnpRef_Alt(rec.getRefGenome() + "__" + rec.getVariant()); - - if (++updateCount % 100000 == 0) - logger.info("updated " + updateCount + " variant records with dbSNP ids"); - } - -// dbSNPRecords.add(rec); - if (++noOfDbSnps % 1000000 == 0) - logger.info("hit " + noOfDbSnps + " dbSnp records"); - } - - logger.info("match count for dbSnp and Illumina: " + illuminaDbSnpCount); - - try { - dbSNPReader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close DbsnpFileReader", e); - exitStatus = -1; - } - } - - logger.info("No of dbSnp records: " + noOfDbSnps + " in file: " + dbSNPFile); - logger.info("No of updated variant records: " + updateCount); - } - - private void loadVCFData() { - String vcfFile = cmdLineInputFiles[2]; - VCFFileReader reader = null; - try { - reader = new VCFFileReader(new File(vcfFile)); - } catch (Exception e) { - logger.error("Error caught whilst trying to instantiate VCFFileReader", e); - exitStatus = -1; - } - - if (null != reader) { - int vcfCount = 0; - ChrPosition id; - VariantRecord value; - - for (VcfRecord rec : reader) { - - id = ChrPointPosition.valueOf(rec.getChromosome(), rec.getPosition()); - - value = variantMap.get(id); - if (null == value) { - value = new VariantRecord(); - variantMap.put(id, value); - } - value.setVcfRef(rec.getRefChar()); - value.setVcfAlt(rec.getAlt().charAt(0)); - value.setVcfGenotype(VcfUtils.getGenotypeFromGATKVCFRecord(rec)); - vcfCount++; - } - logger.info("there were " + vcfCount + " records in the vcf file"); - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close VCFFileReader", e); - exitStatus = -1; - } - } - } - - private void loadQSnpData() { - String qSnpFile = cmdLineInputFiles[2]; - PileupFileReader reader = null; - try { - reader = new PileupFileReader(new File(qSnpFile)); - } catch (Exception e) { - logger.error("Error caught whilst trying to instantiate PileupFileReader", e); - exitStatus = -1; - } - - if (null != reader) { - int vcfCount = 0; - ChrPosition id; - VariantRecord value; - - for (String rec : reader) { -// for (PileupRecord rec : reader) { - // got some work to do here - need to split the pileup attribute to construct the object - String [] params = TabTokenizer.tokenize(rec); -// String [] params = tabbedPattern.split(rec.getPileup(), -1); - - // skip if the tumour genotype is null - String genotype = params[params.length-(isNormal ? 2 : 1)]; - if (null != genotype && ! "null".equals(genotype)) { - - id = ChrPointPosition.valueOf(params[0], Integer.parseInt(params[1])); - - value = variantMap.get(id); - if (null == value) { - value = new VariantRecord(); - variantMap.put(id, value); - } - value.setVcfRef(params[2].charAt(0)); - // value.setVcfAlt(rec.getAlt()); - value.setVcfGenotype(genotype); - vcfCount++; - } - } - logger.info("there were " + vcfCount + " records in the qsnp file"); - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close PileupFileReader", e); - exitStatus = -1; - } - } - } - - private void loadGff3Data() { - String gff3File = cmdLineInputFiles[1]; - GFF3FileReader reader = null; - try { - reader = new GFF3FileReader(new File(gff3File)); - } catch (Exception e) { - logger.error("Exception caught whilst trying to instantiate GFF3FileReader", e); - exitStatus = -1; - } - - if (null != reader) { - int gff3Count = 0; - ChrPosition id; - VariantRecord value; - String chr; - - for (GFF3Record rec : reader) { - // get QCMG chromosome from map - chr = gffToQCMG.get(rec.getSeqId()); - - id = ChrPointPosition.valueOf(chr, rec.getStart()); - - value = variantMap.get(id); - if (null == value) { - value = new VariantRecord(); - variantMap.put(id, value); - } - String attributes = rec.getAttributes(); - char genotype = attributes.charAt(attributes.indexOf("genotype=")+9); - char reference = attributes.charAt(attributes.indexOf("reference=")+10); -// value.setGffAlt(genotype+""); - value.setGffGenotype(genotype); - value.setGffRef(reference); - gff3Count++; - } - logger.info("there were " + gff3Count + " records in the gff3 file"); - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close GFF3FileReader", e); - exitStatus = -1; - } - } - } - - private void loadRawIlluminaData() { - String illuminaFile = cmdLineInputFiles[0]; - - isNormal = illuminaFile.contains("ND_"); - - IlluminaFileReader reader = null; - try { - reader = new IlluminaFileReader(new File(illuminaFile)); - } catch (Exception e) { - logger.error("Error caught whilst trying to instantiate IlluminaFileReader", e); - exitStatus = -1; - } - - if (null != reader) { - IlluminaRecord tempRec; - for (Record rec : reader) { - tempRec = (IlluminaRecord) rec; - illuminaMap.put(tempRec.getSnpId(), tempRec); - } - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close IlluminaFileReader", e); - exitStatus = -1; - } - } - logger.info("Loaded " + illuminaMap.size() + " entries into the illumina map"); - } - -// private void loadIlluminaData() { -// String illuminaFile = cmdLineInputFiles[0]; -// IlluminaFileReader reader = null; -// try { -// reader = new IlluminaFileReader(new File(illuminaFile)); -// } catch (Exception e) { -// logger.error("Error caught whilst trying to instantiate IlluminaFileReader", e); -// exitStatus = -1; -// } -// -// if (null != reader) { -// VariantID id; -// IlluminaRecord tempRec; -// -// for (Record rec : reader) { -// tempRec = (IlluminaRecord) rec; -// -// id = new VariantID(tempRec.getChr(), tempRec.getStart()); -// -// VariantRecord value = variantMap.get(id); -// if (null == value) { -// value = new VariantRecord(); -// variantMap.put(id, value); -// } -// value.setIlluminaSNP(tempRec.getSnp()); -// } -// try { -// reader.close(); -// } catch (IOException e) { -// logger.error("IOException caught whilst trying to close IlluminaFileReader", e); -// exitStatus = -1; -// } -// } -// } - - private void convertIlluminaToVariant() { - ChrPosition id; - VariantRecord value; - - // loop through the illumina map converting all entries into the variantMap - for (IlluminaRecord illuminaRec : illuminaMap.values()) { - - // TODO check this !!! - // ignore records that did not have a dbSNP - if (null != illuminaRec.getChr()) { - - id = ChrPointPosition.valueOf(illuminaRec.getChr(), illuminaRec.getStart()); - - value = variantMap.get(id); - if (null == value && illuminaRec.isSnp()) { - // only want to populate our map with illumina data that does not have a corresponding gff or vcf record - // if it contains a snp - value = new VariantRecord(); - variantMap.put(id, value); - } - - if (null != value) { - value.setDbSnpID(illuminaRec.getSnpId()); -// value.setIlluminaAlt(illuminaRec.getRefGenomeRefSNPAllele()); - value.setIlluminaRef(illuminaRec.getSnp()); - value.setIllAllele1(illuminaRec.getFirstAllele()); - value.setIllAllele2(illuminaRec.getSecondAllele()); - value.setIllGCScore(illuminaRec.getGCScore()); - value.setIllTypeHom(illuminaRec.isHom()); - } - } - } - - // clear illuminaMap - no longer required - illuminaMap.clear(); - } - - - private void updateIlluminaRecord(IlluminaRecord illuminaRec, Dbsnp130Record dbSnpRec) { - // standard value setting here... - char dbSnpStrand = dbSnpRec.getStrand().charAt(0); - illuminaRec.setChr(dbSnpRec.getChromosome()); - illuminaRec.setStart(dbSnpRec.getChromosomePosition()); -// illuminaRec.setRefGenomeRefSNPAllele(dbSnpRec.getRefGenome() + "__" + dbSnpRec.getVariant()); - - // now gets a bit more interesting - char strand; - // if illumina alleles are equal to dbsnp alleles - if (BaseUtils.areGenotypesEqual(dbSnpRec.getVariant(), illuminaRec.getSnp())) { - strand = dbSnpStrand; - } else strand = '+' == dbSnpStrand ? '-' : '+'; -// if (illuminaRec.getReference().charAt(1) == dbAlleles.charAt(0) && -// illuminaRec.getReference().charAt(3) == dbAlleles.charAt(2)) { -// strand = dbSnpStrand; -// } else strand = '+' == dbSnpStrand ? '-' : '+'; - - // no longer switch the illumina snp call, but the actual allele data -// if ('-' == strand) -// illuminaRec.setReference(BaseUtils.getComplementFromString(illuminaRec.getReference())); -// else -// illuminaRec.setReference(illuminaRec.getReference().substring(1, illuminaRec.getReference().length()-1)); - if ('-' == strand) { - illuminaRec.setFirstAllele(BaseUtils.getComplement(illuminaRec.getFirstAllele())); - illuminaRec.setSecondAllele(BaseUtils.getComplement(illuminaRec.getSecondAllele())); - } - // trim illumina snp - illuminaRec.setSnp(illuminaRec.getSnp().substring(1, illuminaRec.getSnp().length()-1)); - - // set snp - illuminaRec.setSnp(isSnp(dbSnpRec.getRefGenome(), illuminaRec.getFirstAllele(), illuminaRec.getSecondAllele())); - } - - private boolean isSnp(String ref, char alleleOne, char alleleTwo) { - if (null == ref || DEFAULT_CHAR == alleleOne || DEFAULT_CHAR == alleleTwo) - return false; - return ref.charAt(0) != alleleOne || ref.charAt(0) != alleleTwo; - } -// private boolean isSnp(String ref, String genotype) { -// if (null == ref || null == genotype) -// return false; -// // assume ref is of type A -// // assume genotype is of the form A/G -// return ref.charAt(0) != genotype.charAt(0) || ref.charAt(0) != genotype.charAt(2); -// } - - - private void outputVariantData() { - FileWriter allRecordsWriter = null; - FileWriter nonDbSnpwriter = null; - try { - allRecordsWriter = new FileWriter(new File(cmdLineOutputFiles[0])); // should be the first output file supplied - nonDbSnpwriter = new FileWriter(new File(cmdLineOutputFiles[1])); // should be the second output file supplied - allRecordsWriter.write("#chr\tstart\tdbSNP_id\tstrand\trg_rsa\t" + //dbSNP - "Ill_gc\ta1\ta2\ttype\tref\t" + //illumina - "gff3_ref\talt\tgen" + //gff - "\tvfc_ref\talt\tgen\t" + //vcf - "pileup\t" + //pileup - "posMatch\tgenMatch\n"); //matching - - nonDbSnpwriter.write("#chr\tstart\tdbSNP_id\tstrand\trg_rsa\t" + //dbSNP - "Ill_gc\ta1\ta2\ttype\tref\t" + //illumina - "gff3_ref\talt\tgen" + //gff - "\tvfc_ref\talt\tgen\t" + //vcf - "pileup\n" + //pileup - "posMatch\tgenMatch\n"); //matching - } catch (IOException ioe) { - logger.error("IOException caught whilst outputting data", ioe); - } - - //plonk the data into a TreeMap to bring some order to the proceedings.. - TreeMap sortedVariantMap = new TreeMap(variantMap); - - ChrPosition id; - VariantRecord value; -// String chr; - - for (Map.Entry entry : sortedVariantMap.entrySet()) { - id = entry.getKey(); - value = entry.getValue(); -// chr = ( ! id.getChromosome().startsWith("GL") ? "chr" : "") + id.getChromosome(); - - try { - allRecordsWriter.write(id.getChromosome() + "\t" + - id.getStartPosition() + "\t" + - value.formattedRecord() ); - // only want non dbSNP records - if (null == value.getDbSnpID()) { - nonDbSnpwriter.write(id.getChromosome() + "\t" + - id.getStartPosition() + "\t" + - value.formattedRecord() ); - } - } catch (IOException e) { - logger.error("IOException caught whilst outputting data", e); - } - } - - // close up - try { - allRecordsWriter.close(); - nonDbSnpwriter.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close output files", e); - } - } - - - public static void main(String[] args) throws Exception { - SnpPicker sp = new SnpPicker(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(SnpPicker.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("SnpPicker", SnpPicker.class.getPackage().getImplementationVersion()); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- deleted file mode 100644 index 63193c01a..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- +++ /dev/null @@ -1,802 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - -import htsjdk.samtools.SAMRecord; - -import org.qcmg.chrconv.ChrConvFileReader; -import org.qcmg.chrconv.ChromosomeConversionRecord; -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.Genotype; -import org.qcmg.common.util.BaseUtils; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.dbsnp.Dbsnp130Record; -import org.qcmg.dbsnp.DbsnpFileReader; -import org.qcmg.qmule.gff3.GFF3FileReader; -import org.qcmg.qmule.gff3.GFF3Record; -import org.qcmg.picard.QJumper; -import org.qcmg.pileup.PileupFileReader; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.record.Record; -import org.qcmg.unused.illumina.IlluminaFileReader; -import org.qcmg.unused.illumina.IlluminaRecord; -import org.qcmg.vcf.VCFFileReader; - -public class SnpPicker { - - private static final char DEFAULT_CHAR = '\u0000'; - private static QLogger logger; -// private static DecimalFormat df = new DecimalFormat("0.0000"); - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - private static boolean isNormal; - -// private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - - Map illuminaMap = new HashMap(1000000,0.99f); // not expecting more than 1000000 - - Map variantMap = new HashMap(2000000); - - // map to hold chromosome conversion data - Map gffToQCMG = new HashMap(100, 0.99f); - -// List illuminaRecords = new ArrayList(); -// List dbSNPRecords = new ArrayList(13000000); - - private int engage() throws Exception { - - // populate the chromosome conversion map - logger.info("about to load chromosome conversion data"); - loadChromosomeConversionData(); - logger.info("about to load chromosome conversion data - DONE"); - - // we are working off the raw illumina data here - first convert it into filtered format, and use that as the input - - logger.info("About to load raw illumina data"); - loadRawIlluminaData(); -// logger.info("No of variant records: " + variantMap.size() + " in file: " + cmdLineInputFiles[0]); - - logger.info("About to load gff3 data"); - loadGff3Data(); - logger.info("No of variant records: " + variantMap.size()); - -// logger.info("About to load vcf data"); -// loadVCFData(); -// logger.info("No of variant records: " + variantMap.size()); - - logger.info("About to load qsnp data"); - loadQSnpData(); - logger.info("No of variant records: " + variantMap.size()); - - - - - logger.info("About to load dbSNP data"); - loadDbSnpData(); -// logger.info("No of variant records: " + variantMap.size()); - - // update variantMap with details from illuminaMap - logger.info("About to load filtered illumina data into variant map"); - convertIlluminaToVariant(); - logger.info("About to load filtered illumina data into variant map - DONE"); - - // get some stats - displayStats(); - - // pileup - logger.info("time for pileup..."); - getPileup(); - logger.info("time for pileup - DONE"); - - // more stats - displayStats2(); - - logger.info("Will now attempt to write out variant data" ); - outputVariantData(); - logger.info("Will now attempt to write out variant data - DONE"); - - return exitStatus; - } - - private void getPileup() throws Exception { - QJumper qj = new QJumper(); - qj.setupReader(cmdLineInputFiles[5], cmdLineInputFiles[6]); - - VariantRecord rec; - StringBuilder pileup = new StringBuilder(); - List reads; -// String chr; - int position; - int offset; - - int pileupCount = 0; - for (Map.Entry entry : variantMap.entrySet()) { - // only want pileup if we have gff or vcf data - rec = entry.getValue(); - if (DEFAULT_CHAR != rec.getGffRef() || null != rec.getVcfGenotype()) { -// chr = ( ! entry.getKey().getChromosome().startsWith("GL") ? "chr" : "") + entry.getKey().getChromosome(); - - reads = qj.getRecordsAtPosition(entry.getKey().getChromosome(), entry.getKey().getStartPosition()); - // do something with the reads - position = entry.getKey().getStartPosition(); - for (SAMRecord sr : reads) { - offset = position - sr.getAlignmentStart(); - pileup.append((char)sr.getReadBases()[offset]); - } - rec.setPileup(pileup.toString()); - - // reset the StringBuilder - pileup.setLength(0); - - if (++pileupCount % 1000 == 0) - logger.info("Run " + pileupCount + " pileups so far, " + reads.size() + " sam records returned from picard"); - } - } - } - - private void loadChromosomeConversionData() { - String chrConvFile = cmdLineInputFiles[4]; - ChrConvFileReader reader = null; - try { - reader = new ChrConvFileReader(new File(chrConvFile)); - } catch (Exception e) { - logger.error("Exception caught whilst trying to instantiate ChrConvFileReader", e); - exitStatus = -1; - } - - if (null != reader) { - for (ChromosomeConversionRecord record : reader) { - // add extra map inserts here as required - // diBayes field is no longer present in chr conv file -// gffToQCMG.put(record.getDiBayes(), record.getQcmg()); - // guessing we want ensemble in here as the key - gffToQCMG.put(record.getEnsembleV55(), record.getQcmg()); - } - - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close ChrConvFileReader", e); - exitStatus = -1; - } - } - } - - private void displayStats() { - int illuminaOnly = 0; - int gff3Only = 0; - int vcfOnly = 0; - int vcfANDgff = 0; - int vcfANDillumina = 0; - int gffANDillumina = 0; - int allThree = 0; - for (VariantRecord record : variantMap.values()) { - - boolean illuminaDataPresent = null != record.getIlluminaRef(); - boolean gffDataPresent = DEFAULT_CHAR != record.getGffRef(); - boolean vcfDataPresent = DEFAULT_CHAR != record.getVcfRef(); - - if (illuminaDataPresent && gffDataPresent && vcfDataPresent) { - allThree++; - record.setPositionMatch("IGV"); - } else if (gffDataPresent && vcfDataPresent) { - vcfANDgff++; - record.setPositionMatch("GV"); - } else if (illuminaDataPresent && vcfDataPresent) { - vcfANDillumina++; - record.setPositionMatch("IV"); - } else if (illuminaDataPresent && gffDataPresent) { - gffANDillumina++; - record.setPositionMatch("IG"); - } else if ( gffDataPresent) { - gff3Only++; - record.setPositionMatch("G"); - }else if ( vcfDataPresent) { - vcfOnly++; - record.setPositionMatch("V"); - }else if ( illuminaDataPresent) { - illuminaOnly++; - record.setPositionMatch("I"); - } - - record.setGenotypeMatch(getGenotypeMatchInfo(record)); - } - - logger.info("allThree: " + allThree); - logger.info("illuminaOnly: " + illuminaOnly); - logger.info("gff3Only: " + gff3Only); - logger.info("vcfANDgff: " + vcfANDgff); - logger.info("vcfANDillumina: " + vcfANDillumina); - logger.info("gffANDillumina: " + gffANDillumina); - logger.info("vcfOnly: " + vcfOnly); - - int total = allThree + illuminaOnly + gff3Only + vcfANDgff + vcfANDillumina + gffANDillumina + vcfOnly; - logger.info("Sum of above numbers: " + total); - logger.info("No of records in map: " + variantMap.size()); - - } - - private void displayStats2() { - final String IGV = "IGV"; - final String IG = "IG"; - final String IV = "IV"; - final String GV = "GV"; - final String I = "I"; - final String G = "G"; - final String V = "V"; - - int positionIGV=0, positionIG=0, positionIV=0, positionGV=0, positionI=0, positionG=0, positionV = 0; - int pIGVgIGV=0, pIGVgIG=0, pIGVgIV=0, pIGVgGV=0; - int pIGgIG=0; - int pIVgIV=0; - int pGVgGV=0; - - - for (VariantRecord record : variantMap.values()) { - - String positionMatch = record.getPositionMatch(); - String genotypeMatch = record.getGenotypeMatch(); - - if (IGV.equals(positionMatch)) { - positionIGV++; - if (IGV.equals(genotypeMatch)) pIGVgIGV++; - else if (IG.equals(genotypeMatch)) pIGVgIG++; - else if (IV.equals(genotypeMatch)) pIGVgIV++; - else if (GV.equals(genotypeMatch)) pIGVgGV++; - - } else if (IG.equals(positionMatch)) { - positionIG++; - if (IG.equals(genotypeMatch)) pIGgIG++; - - } else if (IV.equals(positionMatch)) { - positionIV++; - if (IV.equals(genotypeMatch)) pIVgIV++; - - } else if (GV.equals(positionMatch)) { - positionGV++; - if (GV.equals(genotypeMatch)) pGVgGV++; - - } else if (I.equals(positionMatch)) positionI++; - else if ( G.equals(positionMatch)) positionG++; - else if ( V.equals(positionMatch)) positionV++; - } - - logger.info("position IGV: " + positionIGV + ", genotype IGV: " + pIGVgIGV + ", genotype IG: " + pIGVgIG + ", genotype IV: " + pIGVgIV + ", genotype GV: " + pIGVgGV); - logger.info("position IG: " + positionIG + ", genotype IG: " + pIGgIG); - logger.info("position IV: " + positionIV + ", genotype IV: " + pIVgIV); - logger.info("position GV: " + positionGV + ", genotype GV: " + pGVgGV); - - logger.info("position I: " + positionI); - logger.info("position G: " + positionG); - logger.info("position V: " + positionV); - - int total = positionIGV + positionIG + positionIV + positionGV + positionI + positionG + positionV; - logger.info("Sum of above numbers: " + total); - logger.info("No of records in map: " + variantMap.size()); - - } - - private String getGenotypeMatchInfo(VariantRecord record) { - Genotype illuminaGen = BaseUtils.getGenotype(record.getIllAllele1() , record.getIllAllele2()); -// String illuminaGen = record.getIlluminaRef(); - Genotype gffGen = BaseUtils.getGenotypeFromIUPACCode(record.getGffGenotype()); - Genotype vcfGen = null; - if (DEFAULT_CHAR != record.getVcfAlt()) - vcfGen = BaseUtils.getGenotypeFromVcf(record.getVcfGenotype(), record.getVcfRef(), record.getVcfAlt()); - else - vcfGen = BaseUtils.getGenotype(record.getVcfGenotype()); - - String result = null; - - if (illuminaGen.equals( gffGen) && illuminaGen.equals(vcfGen)) result = "IGV"; - else if (illuminaGen.equals(gffGen)) result = "IG"; - else if (illuminaGen.equals(vcfGen)) result = "IV"; - else if (null != gffGen && gffGen.equals(vcfGen)) result = "GV"; -// if (doStringsMatch(illuminaGen, gffGen) && doStringsMatch(illuminaGen, vcfGen)) result = "IGV"; -// else if (doStringsMatch(illuminaGen, gffGen)) result = "IG"; -// else if (doStringsMatch(illuminaGen, vcfGen)) result = "IV"; -// else if (doStringsMatch(gffGen, vcfGen)) result = "GV"; - - return result; - } - - private boolean doStringsMatch(String a, String b) { - return null == a ? false : a.equals(b); - } - - private void loadDbSnpData() { - // update records with dbsnp info - // should be second of the input files - String dbSNPFile = cmdLineInputFiles[3]; - DbsnpFileReader dbSNPReader = null; - try { - dbSNPReader = new DbsnpFileReader(new File(dbSNPFile)); - } catch (Exception e) { - logger.error("Error caught whilst trying to instantiate DbsnpFileReader", e); - exitStatus = -1; - } - - int updateCount = 0; - int noOfDbSnps = 0; - if (null != dbSNPReader) { - - ChrPosition varId; - VariantRecord varRec; - IlluminaRecord illRec; - int illuminaDbSnpCount = 0; - - for (Dbsnp130Record rec : dbSNPReader) { - // update illumina array with dbSNP details - illRec = illuminaMap.get(rec.getRefSnp()); - if (null != illRec) { - if (null != illRec.getChr()) { - logger.info("illumina rec: " + illRec.getChr() + ":" + illRec.getStart() + ":" + illRec.getSnpId() +" has already been updated - dbSNP: " + rec.getChromosome() + ":" + rec.getChromosomePosition() + ":" + rec.getRefSnp()); - // dbSNP id has more than 1 chr and position - create another IlluminaRecord in the variantMap - //TODO deal with multiple dbSnps for same id here!!! - } else { - updateIlluminaRecord(illRec, rec); - } - illuminaDbSnpCount++; - } - - varId = ChrPointPosition.valueOf(rec.getChromosome(), rec.getChromosomePosition()); - // lookup variant map to see if we have a matching record - varRec = variantMap.get(varId); - if (null == varRec && null != illRec && illRec.isSnp()) { - // don't have an existing record at this position, but we want to put illumina data in here if its a snp - varRec = new VariantRecord(); - variantMap.put(varId, varRec); - } - - if (null != varRec) { - // update required fields - varRec.setDbSnpID(rec.getRefSnp()); - varRec.setDbSnpStrand(rec.getStrand().charAt(0)); - varRec.setDbSnpRef_Alt(rec.getRefGenome() + "__" + rec.getVariant()); - - if (++updateCount % 100000 == 0) - logger.info("updated " + updateCount + " variant records with dbSNP ids"); - } - -// dbSNPRecords.add(rec); - if (++noOfDbSnps % 1000000 == 0) - logger.info("hit " + noOfDbSnps + " dbSnp records"); - } - - logger.info("match count for dbSnp and Illumina: " + illuminaDbSnpCount); - - try { - dbSNPReader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close DbsnpFileReader", e); - exitStatus = -1; - } - } - - logger.info("No of dbSnp records: " + noOfDbSnps + " in file: " + dbSNPFile); - logger.info("No of updated variant records: " + updateCount); - } - - private void loadVCFData() { - String vcfFile = cmdLineInputFiles[2]; - VCFFileReader reader = null; - try { - reader = new VCFFileReader(new File(vcfFile)); - } catch (Exception e) { - logger.error("Error caught whilst trying to instantiate VCFFileReader", e); - exitStatus = -1; - } - - if (null != reader) { - int vcfCount = 0; - ChrPosition id; - VariantRecord value; - - for (VcfRecord rec : reader) { - - id = ChrPointPosition.valueOf(rec.getChromosome(), rec.getPosition()); - - value = variantMap.get(id); - if (null == value) { - value = new VariantRecord(); - variantMap.put(id, value); - } - value.setVcfRef(rec.getRefChar()); - value.setVcfAlt(rec.getAlt().charAt(0)); - value.setVcfGenotype(VcfUtils.getGenotypeFromGATKVCFRecord(rec)); - vcfCount++; - } - logger.info("there were " + vcfCount + " records in the vcf file"); - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close VCFFileReader", e); - exitStatus = -1; - } - } - } - - private void loadQSnpData() { - String qSnpFile = cmdLineInputFiles[2]; - PileupFileReader reader = null; - try { - reader = new PileupFileReader(new File(qSnpFile)); - } catch (Exception e) { - logger.error("Error caught whilst trying to instantiate PileupFileReader", e); - exitStatus = -1; - } - - if (null != reader) { - int vcfCount = 0; - ChrPosition id; - VariantRecord value; - - for (String rec : reader) { -// for (PileupRecord rec : reader) { - // got some work to do here - need to split the pileup attribute to construct the object - String [] params = TabTokenizer.tokenize(rec); -// String [] params = tabbedPattern.split(rec.getPileup(), -1); - - // skip if the tumour genotype is null - String genotype = params[params.length-(isNormal ? 2 : 1)]; - if (null != genotype && ! "null".equals(genotype)) { - - id = ChrPointPosition.valueOf(params[0], Integer.parseInt(params[1])); - - value = variantMap.get(id); - if (null == value) { - value = new VariantRecord(); - variantMap.put(id, value); - } - value.setVcfRef(params[2].charAt(0)); - // value.setVcfAlt(rec.getAlt()); - value.setVcfGenotype(genotype); - vcfCount++; - } - } - logger.info("there were " + vcfCount + " records in the qsnp file"); - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close PileupFileReader", e); - exitStatus = -1; - } - } - } - - private void loadGff3Data() { - String gff3File = cmdLineInputFiles[1]; - GFF3FileReader reader = null; - try { - reader = new GFF3FileReader(new File(gff3File)); - } catch (Exception e) { - logger.error("Exception caught whilst trying to instantiate GFF3FileReader", e); - exitStatus = -1; - } - - if (null != reader) { - int gff3Count = 0; - ChrPosition id; - VariantRecord value; - String chr; - - for (GFF3Record rec : reader) { - // get QCMG chromosome from map - chr = gffToQCMG.get(rec.getSeqId()); - - id = ChrPointPosition.valueOf(chr, rec.getStart()); - - value = variantMap.get(id); - if (null == value) { - value = new VariantRecord(); - variantMap.put(id, value); - } - String attributes = rec.getAttributes(); - char genotype = attributes.charAt(attributes.indexOf("genotype=")+9); - char reference = attributes.charAt(attributes.indexOf("reference=")+10); -// value.setGffAlt(genotype+""); - value.setGffGenotype(genotype); - value.setGffRef(reference); - gff3Count++; - } - logger.info("there were " + gff3Count + " records in the gff3 file"); - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close GFF3FileReader", e); - exitStatus = -1; - } - } - } - - private void loadRawIlluminaData() { - String illuminaFile = cmdLineInputFiles[0]; - - isNormal = illuminaFile.contains("ND_"); - - IlluminaFileReader reader = null; - try { - reader = new IlluminaFileReader(new File(illuminaFile)); - } catch (Exception e) { - logger.error("Error caught whilst trying to instantiate IlluminaFileReader", e); - exitStatus = -1; - } - - if (null != reader) { - IlluminaRecord tempRec; - for (Record rec : reader) { - tempRec = (IlluminaRecord) rec; - illuminaMap.put(tempRec.getSnpId(), tempRec); - } - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close IlluminaFileReader", e); - exitStatus = -1; - } - } - logger.info("Loaded " + illuminaMap.size() + " entries into the illumina map"); - } - -// private void loadIlluminaData() { -// String illuminaFile = cmdLineInputFiles[0]; -// IlluminaFileReader reader = null; -// try { -// reader = new IlluminaFileReader(new File(illuminaFile)); -// } catch (Exception e) { -// logger.error("Error caught whilst trying to instantiate IlluminaFileReader", e); -// exitStatus = -1; -// } -// -// if (null != reader) { -// VariantID id; -// IlluminaRecord tempRec; -// -// for (Record rec : reader) { -// tempRec = (IlluminaRecord) rec; -// -// id = new VariantID(tempRec.getChr(), tempRec.getStart()); -// -// VariantRecord value = variantMap.get(id); -// if (null == value) { -// value = new VariantRecord(); -// variantMap.put(id, value); -// } -// value.setIlluminaSNP(tempRec.getSnp()); -// } -// try { -// reader.close(); -// } catch (IOException e) { -// logger.error("IOException caught whilst trying to close IlluminaFileReader", e); -// exitStatus = -1; -// } -// } -// } - - private void convertIlluminaToVariant() { - ChrPosition id; - VariantRecord value; - - // loop through the illumina map converting all entries into the variantMap - for (IlluminaRecord illuminaRec : illuminaMap.values()) { - - // TODO check this !!! - // ignore records that did not have a dbSNP - if (null != illuminaRec.getChr()) { - - id = ChrPointPosition.valueOf(illuminaRec.getChr(), illuminaRec.getStart()); - - value = variantMap.get(id); - if (null == value && illuminaRec.isSnp()) { - // only want to populate our map with illumina data that does not have a corresponding gff or vcf record - // if it contains a snp - value = new VariantRecord(); - variantMap.put(id, value); - } - - if (null != value) { - value.setDbSnpID(illuminaRec.getSnpId()); -// value.setIlluminaAlt(illuminaRec.getRefGenomeRefSNPAllele()); - value.setIlluminaRef(illuminaRec.getSnp()); - value.setIllAllele1(illuminaRec.getFirstAllele()); - value.setIllAllele2(illuminaRec.getSecondAllele()); - value.setIllGCScore(illuminaRec.getGCScore()); - value.setIllTypeHom(illuminaRec.isHom()); - } - } - } - - // clear illuminaMap - no longer required - illuminaMap.clear(); - } - - - private void updateIlluminaRecord(IlluminaRecord illuminaRec, Dbsnp130Record dbSnpRec) { - // standard value setting here... - char dbSnpStrand = dbSnpRec.getStrand().charAt(0); - illuminaRec.setChr(dbSnpRec.getChromosome()); - illuminaRec.setStart(dbSnpRec.getChromosomePosition()); -// illuminaRec.setRefGenomeRefSNPAllele(dbSnpRec.getRefGenome() + "__" + dbSnpRec.getVariant()); - - // now gets a bit more interesting - char strand; - // if illumina alleles are equal to dbsnp alleles - if (BaseUtils.areGenotypesEqual(dbSnpRec.getVariant(), illuminaRec.getSnp())) { - strand = dbSnpStrand; - } else strand = '+' == dbSnpStrand ? '-' : '+'; -// if (illuminaRec.getReference().charAt(1) == dbAlleles.charAt(0) && -// illuminaRec.getReference().charAt(3) == dbAlleles.charAt(2)) { -// strand = dbSnpStrand; -// } else strand = '+' == dbSnpStrand ? '-' : '+'; - - // no longer switch the illumina snp call, but the actual allele data -// if ('-' == strand) -// illuminaRec.setReference(BaseUtils.getComplementFromString(illuminaRec.getReference())); -// else -// illuminaRec.setReference(illuminaRec.getReference().substring(1, illuminaRec.getReference().length()-1)); - if ('-' == strand) { - illuminaRec.setFirstAllele(BaseUtils.getComplement(illuminaRec.getFirstAllele())); - illuminaRec.setSecondAllele(BaseUtils.getComplement(illuminaRec.getSecondAllele())); - } - // trim illumina snp - illuminaRec.setSnp(illuminaRec.getSnp().substring(1, illuminaRec.getSnp().length()-1)); - - // set snp - illuminaRec.setSnp(isSnp(dbSnpRec.getRefGenome(), illuminaRec.getFirstAllele(), illuminaRec.getSecondAllele())); - } - - private boolean isSnp(String ref, char alleleOne, char alleleTwo) { - if (null == ref || DEFAULT_CHAR == alleleOne || DEFAULT_CHAR == alleleTwo) - return false; - return ref.charAt(0) != alleleOne || ref.charAt(0) != alleleTwo; - } -// private boolean isSnp(String ref, String genotype) { -// if (null == ref || null == genotype) -// return false; -// // assume ref is of type A -// // assume genotype is of the form A/G -// return ref.charAt(0) != genotype.charAt(0) || ref.charAt(0) != genotype.charAt(2); -// } - - - private void outputVariantData() { - FileWriter allRecordsWriter = null; - FileWriter nonDbSnpwriter = null; - try { - allRecordsWriter = new FileWriter(new File(cmdLineOutputFiles[0])); // should be the first output file supplied - nonDbSnpwriter = new FileWriter(new File(cmdLineOutputFiles[1])); // should be the second output file supplied - allRecordsWriter.write("#chr\tstart\tdbSNP_id\tstrand\trg_rsa\t" + //dbSNP - "Ill_gc\ta1\ta2\ttype\tref\t" + //illumina - "gff3_ref\talt\tgen" + //gff - "\tvfc_ref\talt\tgen\t" + //vcf - "pileup\t" + //pileup - "posMatch\tgenMatch\n"); //matching - - nonDbSnpwriter.write("#chr\tstart\tdbSNP_id\tstrand\trg_rsa\t" + //dbSNP - "Ill_gc\ta1\ta2\ttype\tref\t" + //illumina - "gff3_ref\talt\tgen" + //gff - "\tvfc_ref\talt\tgen\t" + //vcf - "pileup\n" + //pileup - "posMatch\tgenMatch\n"); //matching - } catch (IOException ioe) { - logger.error("IOException caught whilst outputting data", ioe); - } - - //plonk the data into a TreeMap to bring some order to the proceedings.. - TreeMap sortedVariantMap = new TreeMap(variantMap); - - ChrPosition id; - VariantRecord value; -// String chr; - - for (Map.Entry entry : sortedVariantMap.entrySet()) { - id = entry.getKey(); - value = entry.getValue(); -// chr = ( ! id.getChromosome().startsWith("GL") ? "chr" : "") + id.getChromosome(); - - try { - allRecordsWriter.write(id.getChromosome() + "\t" + - id.getStartPosition() + "\t" + - value.formattedRecord() ); - // only want non dbSNP records - if (null == value.getDbSnpID()) { - nonDbSnpwriter.write(id.getChromosome() + "\t" + - id.getStartPosition() + "\t" + - value.formattedRecord() ); - } - } catch (IOException e) { - logger.error("IOException caught whilst outputting data", e); - } - } - - // close up - try { - allRecordsWriter.close(); - nonDbSnpwriter.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close output files", e); - } - } - - - public static void main(String[] args) throws Exception { - SnpPicker sp = new SnpPicker(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(SnpPicker.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("SnpPicker", SnpPicker.class.getPackage().getImplementationVersion()); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java b/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java deleted file mode 100644 index 7e6275fe1..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java +++ /dev/null @@ -1,200 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Pattern; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.pileup.PileupFileReader; - -public class UniqueQSnps { - - private static final QLogger logger = QLoggerFactory.getLogger(UniqueQSnps.class); - - private static Map qSnpPileup = new HashMap(10000); -// private static Map qSnpPileup = new HashMap(10000); - private static Map gatkVcfs = new HashMap(10000); -// private static Map gatkVcfs = new HashMap(10000); - private static Map verifiedSNPs = new HashMap(500); -// private static Map verifiedSNPs = new HashMap(500); - - private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - - - public static void main(String[] args) throws Exception { - logger.info("hello..."); - - String filename = args[0]; - boolean runQPileup = true; - // filename type depends on whether to load qpileup or vcf - if (FileUtils.isFileTypeValid(filename, "vcf")) { - runQPileup = false; - } - loadVerifiedSnps(args[1]); - logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); - - - if (runQPileup) { - // load the existing pileup into memory - logger.info("running in pileup mode"); - loadQPileup(args[0]); - logger.info("loaded " + qSnpPileup.size() + " entries into the pileup map"); - examine(args[2]); - } else { - logger.info("running in vcf mode"); - loadGatkData(args[0]); - logger.info("loaded " + gatkVcfs.size() + " entries into the vcf map"); - examineVCFs(args[2]); - } - - - // load the existing pileup into memory - - examine(args[2]); - logger.info("goodbye..."); - } - - - private static void examine(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - for (final Map.Entry entry : qSnpPileup.entrySet()) { - ++totalCount; - String verifiedRecord = verifiedSNPs.get(entry.getKey()); -// PileupRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); - String qSnpRecord = entry.getValue(); - - if (null == verifiedRecord) { - String [] params = TabTokenizer.tokenize(qSnpRecord); -// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); - String annotation = params[params.length-1]; - if ("--".equals(annotation)) { - ++uniqueQSnpClassACount; - writer.write(qSnpRecord + "\n"); - } else if ("less than 12 reads coverage in normal".equals(annotation)) { - ++uniqueQSnpClassBCount; - writer.write(qSnpRecord + "\n"); - } - } - } - - writer.close(); - logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); - } - } - - private static void examineVCFs(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - for (final Map.Entry entry : qSnpPileup.entrySet()) { - ++totalCount; - String verifiedRecord = verifiedSNPs.get(entry.getKey()); -// PileupRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); - String qSnpRecord = entry.getValue(); -// PileupRecord qSnpRecord = entry.getValue(); - - if (null == verifiedRecord) { - String [] params = TabTokenizer.tokenize(qSnpRecord); -// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); - String annotation = params[params.length-1]; - if ("--".equals(annotation)) { - ++uniqueQSnpClassACount; - writer.write(qSnpRecord + "\n"); - } else if ("less than 12 reads coverage in normal".equals(annotation)) { - ++uniqueQSnpClassBCount; - writer.write(qSnpRecord + "\n"); - } - } - } - - writer.close(); - logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); - } - } - - - private static void loadQPileup(String pileupFile) throws Exception { - if (FileUtils.canFileBeRead(pileupFile)) { - PileupFileReader reader = new PileupFileReader(new File(pileupFile)); - for (String pr : reader) { -// for (PileupRecord pr : reader) { - String [] params = TabTokenizer.tokenize(pr); -// String [] params = tabbedPattern.split(pr.getPileup()); - String chrPosition = params[params.length-2]; -// logger.info("chrPosition: " + chrPosition); - //TODO refactor to use StringUtils.getChrPositionFromString() - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - - qSnpPileup.put(chrPos,pr); - } - reader.close(); - } - } - - private static void loadGatkData(String pileupFile) throws Exception { - if (FileUtils.canFileBeRead(pileupFile)) { - PileupFileReader reader = new PileupFileReader(new File(pileupFile)); - for (String pr : reader) { -// for (PileupRecord pr : reader) { - String [] params = TabTokenizer.tokenize(pr); -// String [] params = tabbedPattern.split(pr.getPileup()); - String chrPosition = params[params.length-2]; -// logger.info("chrPosition: " + chrPosition); - //TODO refactor to use StringUtils.getChrPositionFromString() - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - - gatkVcfs.put(chrPos,pr); - } - reader.close(); - } - } - - private static void loadVerifiedSnps(String verifiedSnpFile) throws Exception { - if (FileUtils.canFileBeRead(verifiedSnpFile)) { - - PileupFileReader reader = new PileupFileReader(new File(verifiedSnpFile)); - for (String pr : reader) { -// for (PileupRecord pr : reader) { - String [] params = TabTokenizer.tokenize(pr); -// String [] params = tabbedPattern.split(pr.getPileup()); - String chrPosition = params[2]; -// logger.info("chrPosition: " + chrPosition); - //TODO refactor to use StringUtils.getChrPositionFromString() - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - - verifiedSNPs.put(chrPos,pr); - } - reader.close(); - } - } - -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- deleted file mode 100644 index 7e6275fe1..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- +++ /dev/null @@ -1,200 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Pattern; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.pileup.PileupFileReader; - -public class UniqueQSnps { - - private static final QLogger logger = QLoggerFactory.getLogger(UniqueQSnps.class); - - private static Map qSnpPileup = new HashMap(10000); -// private static Map qSnpPileup = new HashMap(10000); - private static Map gatkVcfs = new HashMap(10000); -// private static Map gatkVcfs = new HashMap(10000); - private static Map verifiedSNPs = new HashMap(500); -// private static Map verifiedSNPs = new HashMap(500); - - private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - - - public static void main(String[] args) throws Exception { - logger.info("hello..."); - - String filename = args[0]; - boolean runQPileup = true; - // filename type depends on whether to load qpileup or vcf - if (FileUtils.isFileTypeValid(filename, "vcf")) { - runQPileup = false; - } - loadVerifiedSnps(args[1]); - logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); - - - if (runQPileup) { - // load the existing pileup into memory - logger.info("running in pileup mode"); - loadQPileup(args[0]); - logger.info("loaded " + qSnpPileup.size() + " entries into the pileup map"); - examine(args[2]); - } else { - logger.info("running in vcf mode"); - loadGatkData(args[0]); - logger.info("loaded " + gatkVcfs.size() + " entries into the vcf map"); - examineVCFs(args[2]); - } - - - // load the existing pileup into memory - - examine(args[2]); - logger.info("goodbye..."); - } - - - private static void examine(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - for (final Map.Entry entry : qSnpPileup.entrySet()) { - ++totalCount; - String verifiedRecord = verifiedSNPs.get(entry.getKey()); -// PileupRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); - String qSnpRecord = entry.getValue(); - - if (null == verifiedRecord) { - String [] params = TabTokenizer.tokenize(qSnpRecord); -// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); - String annotation = params[params.length-1]; - if ("--".equals(annotation)) { - ++uniqueQSnpClassACount; - writer.write(qSnpRecord + "\n"); - } else if ("less than 12 reads coverage in normal".equals(annotation)) { - ++uniqueQSnpClassBCount; - writer.write(qSnpRecord + "\n"); - } - } - } - - writer.close(); - logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); - } - } - - private static void examineVCFs(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - for (final Map.Entry entry : qSnpPileup.entrySet()) { - ++totalCount; - String verifiedRecord = verifiedSNPs.get(entry.getKey()); -// PileupRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); - String qSnpRecord = entry.getValue(); -// PileupRecord qSnpRecord = entry.getValue(); - - if (null == verifiedRecord) { - String [] params = TabTokenizer.tokenize(qSnpRecord); -// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); - String annotation = params[params.length-1]; - if ("--".equals(annotation)) { - ++uniqueQSnpClassACount; - writer.write(qSnpRecord + "\n"); - } else if ("less than 12 reads coverage in normal".equals(annotation)) { - ++uniqueQSnpClassBCount; - writer.write(qSnpRecord + "\n"); - } - } - } - - writer.close(); - logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); - } - } - - - private static void loadQPileup(String pileupFile) throws Exception { - if (FileUtils.canFileBeRead(pileupFile)) { - PileupFileReader reader = new PileupFileReader(new File(pileupFile)); - for (String pr : reader) { -// for (PileupRecord pr : reader) { - String [] params = TabTokenizer.tokenize(pr); -// String [] params = tabbedPattern.split(pr.getPileup()); - String chrPosition = params[params.length-2]; -// logger.info("chrPosition: " + chrPosition); - //TODO refactor to use StringUtils.getChrPositionFromString() - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - - qSnpPileup.put(chrPos,pr); - } - reader.close(); - } - } - - private static void loadGatkData(String pileupFile) throws Exception { - if (FileUtils.canFileBeRead(pileupFile)) { - PileupFileReader reader = new PileupFileReader(new File(pileupFile)); - for (String pr : reader) { -// for (PileupRecord pr : reader) { - String [] params = TabTokenizer.tokenize(pr); -// String [] params = tabbedPattern.split(pr.getPileup()); - String chrPosition = params[params.length-2]; -// logger.info("chrPosition: " + chrPosition); - //TODO refactor to use StringUtils.getChrPositionFromString() - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - - gatkVcfs.put(chrPos,pr); - } - reader.close(); - } - } - - private static void loadVerifiedSnps(String verifiedSnpFile) throws Exception { - if (FileUtils.canFileBeRead(verifiedSnpFile)) { - - PileupFileReader reader = new PileupFileReader(new File(verifiedSnpFile)); - for (String pr : reader) { -// for (PileupRecord pr : reader) { - String [] params = TabTokenizer.tokenize(pr); -// String [] params = tabbedPattern.split(pr.getPileup()); - String chrPosition = params[2]; -// logger.info("chrPosition: " + chrPosition); - //TODO refactor to use StringUtils.getChrPositionFromString() - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - - verifiedSNPs.put(chrPos,pr); - } - reader.close(); - } - } - -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java b/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java deleted file mode 100644 index 4ac4d5586..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java +++ /dev/null @@ -1,263 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Pattern; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class UniqueSnps { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - - private static QLogger logger; - -// private static Map qSnpPileup = new HashMap(10000); -// private static Map gatkVcfs = new HashMap(10000); - private static Map verifiedSNPs = new HashMap(500); - private static Map unVerifiedSNPs = new HashMap(10000); - - private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - - - public int engage() throws Exception { - logger.info("hello..."); - - loadVerifiedSnps(cmdLineInputFiles[1]); - logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); - if (verifiedSNPs.isEmpty()) exitStatus = 1; - - loadUnverifiedSnps(cmdLineInputFiles[0]); - logger.info("loaded " + unVerifiedSNPs.size() + " entries into the un-verifiedSNPs map"); - if (unVerifiedSNPs.isEmpty()) exitStatus = 1; - - -// examine(args[2]); -// if (runQPileup) { -// // load the existing pileup into memory -// logger.info("running in pileup mode"); -// loadUnverifiedSnps(args[0]); -// logger.info("loaded " + qSnpPileup.size() + " entries into the pileup map"); -// } else { -// logger.info("running in vcf mode"); -// loadGatkData(args[0]); -// logger.info("loaded " + gatkVcfs.size() + " entries into the vcf map"); -// examineVCFs(args[2]); -// } - - - // load the existing pileup into memory - - examine(cmdLineOutputFiles[0]); - logger.info("goodbye..."); - - return exitStatus; - } - - - private static void examine(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - int totalCount = 0, uniqueClassA = 0, uniqueClassB = 0, uniqueClassC = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - try { - for (final Map.Entry unVerifiedEntry : unVerifiedSNPs.entrySet()) { - TabbedRecord unVerifiedRecord = unVerifiedEntry.getValue(); - String [] params = tabbedPattern.split(unVerifiedRecord.getData()); - String consequenceType = params[22]; - if (consequenceType.contains("STOP") || consequenceType.contains("NON_SYNONYMOUS")) { - - ++totalCount; - - TabbedRecord verifiedRecord = verifiedSNPs.get(unVerifiedEntry.getKey()); - - if (null == verifiedRecord) { - String annotation = params[params.length-1]; - if ("--".equals(annotation)) { - ++uniqueClassA; - writer.write(unVerifiedRecord.getData() + "\n"); - } else if ("less than 12 reads coverage in normal".equals(annotation) - || "less than 3 reads coverage in normal".equals(annotation)) { - ++uniqueClassB; - writer.write(unVerifiedRecord.getData() + "\n"); - } - } - } - } - } finally { - writer.close(); - } - logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueClassA + ", uniqueQSnpCount (class B): " + uniqueClassB ); - } - } - -// private static void examineVCFs(String outputFile) throws IOException { -// if (FileUtils.canFileBeWrittenTo(outputFile)) { -// -// int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; -// -// FileWriter writer = new FileWriter(new File(outputFile)); -// -// // loop through the verified snps -// -// for (final Map.Entry entry : qSnpPileup.entrySet()) { -// ++totalCount; -// TabbedRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); -// TabbedRecord qSnpRecord = entry.getValue(); -// -// if (null == verifiedRecord) { -// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); -// String annotation = params[params.length-1]; -// if ("--".equals(annotation)) { -// ++uniqueQSnpClassACount; -// writer.write(qSnpRecord.getPileup() + "\n"); -// } else if ("less than 12 reads coverage in normal".equals(annotation)) { -// ++uniqueQSnpClassBCount; -// writer.write(qSnpRecord.getPileup() + "\n"); -// } -// } -// } -// -// writer.close(); -// logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); -// } -// } - - - private static void loadUnverifiedSnps(String file) throws Exception { - if (FileUtils.canFileBeRead(file)) { - TabbedFileReader reader = new TabbedFileReader(new File(file)); - try { - for (TabbedRecord tr : reader) { - String [] params = tabbedPattern.split(tr.getData()); - String chrPosition = params[params.length-2]; -// logger.info("chrPosition: " + chrPosition); - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - unVerifiedSNPs.put(chrPos,tr); - } - } finally { - reader.close(); - } - } - } - -// private static void loadGatkData(String pileupFile) throws IOException { -// if (FileUtils.canFileBeRead(pileupFile)) { -// TabbedFileReader reader = new TabbedFileReader(new File(pileupFile)); -// for (TabbedRecord pr : reader) { -// String [] params = tabbedPattern.split(pr.getPileup()); -// String chrPosition = params[params.length-2]; -//// logger.info("chrPosition: " + chrPosition); -// ChrPosition chrPos = new ChrPosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-")))); -// -// gatkVcfs.put(chrPos,pr); -// } -// reader.close(); -// } -// } - - private void loadVerifiedSnps(String verifiedSnpFile) throws Exception { - if (FileUtils.canFileBeRead(verifiedSnpFile)) { - - TabbedFileReader reader = new TabbedFileReader(new File(verifiedSnpFile)); - try { - for (TabbedRecord tr : reader) { - String [] params = tabbedPattern.split(tr.getData()); - String chrPosition = params[2]; - // logger.info("chrPosition: " + chrPosition); - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1),start, start); - - verifiedSNPs.put(chrPos,tr); - } - } finally { - reader.close(); - } - } - } - - public static void main(String[] args) throws Exception { - UniqueSnps sp = new UniqueSnps(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(UniqueSnps.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("UniqueSnps", UniqueSnps.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- deleted file mode 100644 index 4ac4d5586..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- +++ /dev/null @@ -1,263 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Pattern; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class UniqueSnps { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - - private static QLogger logger; - -// private static Map qSnpPileup = new HashMap(10000); -// private static Map gatkVcfs = new HashMap(10000); - private static Map verifiedSNPs = new HashMap(500); - private static Map unVerifiedSNPs = new HashMap(10000); - - private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - - - public int engage() throws Exception { - logger.info("hello..."); - - loadVerifiedSnps(cmdLineInputFiles[1]); - logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); - if (verifiedSNPs.isEmpty()) exitStatus = 1; - - loadUnverifiedSnps(cmdLineInputFiles[0]); - logger.info("loaded " + unVerifiedSNPs.size() + " entries into the un-verifiedSNPs map"); - if (unVerifiedSNPs.isEmpty()) exitStatus = 1; - - -// examine(args[2]); -// if (runQPileup) { -// // load the existing pileup into memory -// logger.info("running in pileup mode"); -// loadUnverifiedSnps(args[0]); -// logger.info("loaded " + qSnpPileup.size() + " entries into the pileup map"); -// } else { -// logger.info("running in vcf mode"); -// loadGatkData(args[0]); -// logger.info("loaded " + gatkVcfs.size() + " entries into the vcf map"); -// examineVCFs(args[2]); -// } - - - // load the existing pileup into memory - - examine(cmdLineOutputFiles[0]); - logger.info("goodbye..."); - - return exitStatus; - } - - - private static void examine(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - int totalCount = 0, uniqueClassA = 0, uniqueClassB = 0, uniqueClassC = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - try { - for (final Map.Entry unVerifiedEntry : unVerifiedSNPs.entrySet()) { - TabbedRecord unVerifiedRecord = unVerifiedEntry.getValue(); - String [] params = tabbedPattern.split(unVerifiedRecord.getData()); - String consequenceType = params[22]; - if (consequenceType.contains("STOP") || consequenceType.contains("NON_SYNONYMOUS")) { - - ++totalCount; - - TabbedRecord verifiedRecord = verifiedSNPs.get(unVerifiedEntry.getKey()); - - if (null == verifiedRecord) { - String annotation = params[params.length-1]; - if ("--".equals(annotation)) { - ++uniqueClassA; - writer.write(unVerifiedRecord.getData() + "\n"); - } else if ("less than 12 reads coverage in normal".equals(annotation) - || "less than 3 reads coverage in normal".equals(annotation)) { - ++uniqueClassB; - writer.write(unVerifiedRecord.getData() + "\n"); - } - } - } - } - } finally { - writer.close(); - } - logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueClassA + ", uniqueQSnpCount (class B): " + uniqueClassB ); - } - } - -// private static void examineVCFs(String outputFile) throws IOException { -// if (FileUtils.canFileBeWrittenTo(outputFile)) { -// -// int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; -// -// FileWriter writer = new FileWriter(new File(outputFile)); -// -// // loop through the verified snps -// -// for (final Map.Entry entry : qSnpPileup.entrySet()) { -// ++totalCount; -// TabbedRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); -// TabbedRecord qSnpRecord = entry.getValue(); -// -// if (null == verifiedRecord) { -// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); -// String annotation = params[params.length-1]; -// if ("--".equals(annotation)) { -// ++uniqueQSnpClassACount; -// writer.write(qSnpRecord.getPileup() + "\n"); -// } else if ("less than 12 reads coverage in normal".equals(annotation)) { -// ++uniqueQSnpClassBCount; -// writer.write(qSnpRecord.getPileup() + "\n"); -// } -// } -// } -// -// writer.close(); -// logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); -// } -// } - - - private static void loadUnverifiedSnps(String file) throws Exception { - if (FileUtils.canFileBeRead(file)) { - TabbedFileReader reader = new TabbedFileReader(new File(file)); - try { - for (TabbedRecord tr : reader) { - String [] params = tabbedPattern.split(tr.getData()); - String chrPosition = params[params.length-2]; -// logger.info("chrPosition: " + chrPosition); - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - unVerifiedSNPs.put(chrPos,tr); - } - } finally { - reader.close(); - } - } - } - -// private static void loadGatkData(String pileupFile) throws IOException { -// if (FileUtils.canFileBeRead(pileupFile)) { -// TabbedFileReader reader = new TabbedFileReader(new File(pileupFile)); -// for (TabbedRecord pr : reader) { -// String [] params = tabbedPattern.split(pr.getPileup()); -// String chrPosition = params[params.length-2]; -//// logger.info("chrPosition: " + chrPosition); -// ChrPosition chrPos = new ChrPosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-")))); -// -// gatkVcfs.put(chrPos,pr); -// } -// reader.close(); -// } -// } - - private void loadVerifiedSnps(String verifiedSnpFile) throws Exception { - if (FileUtils.canFileBeRead(verifiedSnpFile)) { - - TabbedFileReader reader = new TabbedFileReader(new File(verifiedSnpFile)); - try { - for (TabbedRecord tr : reader) { - String [] params = tabbedPattern.split(tr.getData()); - String chrPosition = params[2]; - // logger.info("chrPosition: " + chrPosition); - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1),start, start); - - verifiedSNPs.put(chrPos,tr); - } - } finally { - reader.close(); - } - } - } - - public static void main(String[] args) throws Exception { - UniqueSnps sp = new UniqueSnps(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(UniqueSnps.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("UniqueSnps", UniqueSnps.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java b/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java deleted file mode 100644 index eefbdd9ed..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java +++ /dev/null @@ -1,193 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.text.DecimalFormat; - -public class VariantRecord { - - private final static char DEFAULT_CHAR = '\u0000'; - private final static DecimalFormat df = new DecimalFormat("0.0000"); - - private String dbSnpID; - private char dbSnpStrand; - private String dbSnpRef_Alt; - private float illGCScore; - private char illAllele1; - private char illAllele2; - private boolean illTypeHom; - private String illuminaRef; -// private String illuminaAlt; - private String illuminaSNP; - private char gffRef; - private char gffGenotype; - private String gffAlt; - private char vcfRef; - private char vcfAlt; - private String vcfGenotype; - private String pileup; - private String positionMatch; - private String genotypeMatch; - - public String getDbSnpID() { - return dbSnpID; - } - public void setDbSnpID(String dbSnpID) { - this.dbSnpID = dbSnpID; - } - public String getIlluminaRef() { - return illuminaRef; - } - public void setIlluminaRef(String illuminaRef) { - this.illuminaRef = illuminaRef; - } -// public String getIlluminaAlt() { -// return illuminaAlt; -// } -// public void setIlluminaAlt(String illuminaAlt) { -// this.illuminaAlt = illuminaAlt; -// } - public char getGffRef() { - return gffRef; - } - public void setGffRef(char gffRef) { - this.gffRef = gffRef; - } - public char getGffGenotype() { - return gffGenotype; - } - public void setGffGenotype(char gffGenotype) { - this.gffGenotype = gffGenotype; - } - public String getGffAlt() { - return gffAlt; - } - public void setGffAlt(String gffAlt) { - this.gffAlt = gffAlt; - } - public char getVcfRef() { - return vcfRef; - } - public void setVcfRef(char vcfRef) { - this.vcfRef = vcfRef; - } - public char getVcfAlt() { - return vcfAlt; - } - public void setVcfAlt(char vcfAlt) { - this.vcfAlt = vcfAlt; - } - public String getVcfGenotype() { - return vcfGenotype; - } - public void setVcfGenotype(String vcfGenotype) { - this.vcfGenotype = vcfGenotype; - } - public void setIlluminaSNP(String illuminaSNP) { - this.illuminaSNP = illuminaSNP; - } - public String getIlluminaSNP() { - return illuminaSNP; - } - - public String formattedRecord() { - StringBuilder sb = new StringBuilder(); - - sb.append(null != dbSnpID ? dbSnpID : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != dbSnpStrand ? dbSnpStrand : ""); - sb.append("\t"); - sb.append(null != dbSnpRef_Alt ? dbSnpRef_Alt : ""); - sb.append("\t"); - sb.append(illGCScore != 0.0f ? df.format(illGCScore) : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != illAllele1 ? illAllele1 : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != illAllele2 ? illAllele2 : ""); - sb.append("\t"); - sb.append(null != illuminaRef ? (illTypeHom ? "hom" : "het") : ""); - sb.append("\t"); - sb.append(null != illuminaRef ? illuminaRef : ""); - sb.append("\t"); -// sb.append(null != illuminaAlt ? illuminaAlt : ""); -// sb.append("\t"); -// sb.append(null != illuminaSNP ? illuminaSNP : ""); -// sb.append("\t"); - sb.append(DEFAULT_CHAR != gffRef ? gffRef : ""); - sb.append("\t"); - sb.append(null != gffAlt ? gffAlt : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != gffGenotype ? gffGenotype : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != vcfRef ? vcfRef : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != vcfAlt ? vcfAlt: ""); - sb.append("\t"); - sb.append(null != vcfGenotype ? vcfGenotype: ""); - sb.append("\t"); - sb.append(null != pileup ? pileup: ""); - sb.append("\t"); - sb.append(null != positionMatch ? positionMatch: ""); - sb.append("\t"); - sb.append(null != genotypeMatch ? genotypeMatch: ""); - sb.append("\n"); - - return sb.toString(); - } - public float getIllGCScore() { - return illGCScore; - } - public void setIllGCScore(float illGCScore) { - this.illGCScore = illGCScore; - } - public char getIllAllele1() { - return illAllele1; - } - public void setIllAllele1(char illAllele1) { - this.illAllele1 = illAllele1; - } - public char getIllAllele2() { - return illAllele2; - } - public void setIllAllele2(char illAllele2) { - this.illAllele2 = illAllele2; - } - public boolean isIllTypeHom() { - return illTypeHom; - } - public void setIllTypeHom(boolean illTypeHom) { - this.illTypeHom = illTypeHom; - } - public char getDbSnpStrand() { - return dbSnpStrand; - } - public void setDbSnpStrand(char dbSnpStrand) { - this.dbSnpStrand = dbSnpStrand; - } - public String getDbSnpRef_Alt() { - return dbSnpRef_Alt; - } - public void setDbSnpRef_Alt(String dbSnpRefAlt) { - dbSnpRef_Alt = dbSnpRefAlt; - } - public void setPileup(String pileup) { - this.pileup = pileup; - } - public String getPileup(String pileup) { - return pileup; - } - public String getPositionMatch() { - return positionMatch; - } - public void setPositionMatch(String positionMatch) { - this.positionMatch = positionMatch; - } - public String getGenotypeMatch() { - return genotypeMatch; - } - public void setGenotypeMatch(String genotypeMatch) { - this.genotypeMatch = genotypeMatch; - } - -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- b/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- deleted file mode 100644 index eefbdd9ed..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- +++ /dev/null @@ -1,193 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.text.DecimalFormat; - -public class VariantRecord { - - private final static char DEFAULT_CHAR = '\u0000'; - private final static DecimalFormat df = new DecimalFormat("0.0000"); - - private String dbSnpID; - private char dbSnpStrand; - private String dbSnpRef_Alt; - private float illGCScore; - private char illAllele1; - private char illAllele2; - private boolean illTypeHom; - private String illuminaRef; -// private String illuminaAlt; - private String illuminaSNP; - private char gffRef; - private char gffGenotype; - private String gffAlt; - private char vcfRef; - private char vcfAlt; - private String vcfGenotype; - private String pileup; - private String positionMatch; - private String genotypeMatch; - - public String getDbSnpID() { - return dbSnpID; - } - public void setDbSnpID(String dbSnpID) { - this.dbSnpID = dbSnpID; - } - public String getIlluminaRef() { - return illuminaRef; - } - public void setIlluminaRef(String illuminaRef) { - this.illuminaRef = illuminaRef; - } -// public String getIlluminaAlt() { -// return illuminaAlt; -// } -// public void setIlluminaAlt(String illuminaAlt) { -// this.illuminaAlt = illuminaAlt; -// } - public char getGffRef() { - return gffRef; - } - public void setGffRef(char gffRef) { - this.gffRef = gffRef; - } - public char getGffGenotype() { - return gffGenotype; - } - public void setGffGenotype(char gffGenotype) { - this.gffGenotype = gffGenotype; - } - public String getGffAlt() { - return gffAlt; - } - public void setGffAlt(String gffAlt) { - this.gffAlt = gffAlt; - } - public char getVcfRef() { - return vcfRef; - } - public void setVcfRef(char vcfRef) { - this.vcfRef = vcfRef; - } - public char getVcfAlt() { - return vcfAlt; - } - public void setVcfAlt(char vcfAlt) { - this.vcfAlt = vcfAlt; - } - public String getVcfGenotype() { - return vcfGenotype; - } - public void setVcfGenotype(String vcfGenotype) { - this.vcfGenotype = vcfGenotype; - } - public void setIlluminaSNP(String illuminaSNP) { - this.illuminaSNP = illuminaSNP; - } - public String getIlluminaSNP() { - return illuminaSNP; - } - - public String formattedRecord() { - StringBuilder sb = new StringBuilder(); - - sb.append(null != dbSnpID ? dbSnpID : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != dbSnpStrand ? dbSnpStrand : ""); - sb.append("\t"); - sb.append(null != dbSnpRef_Alt ? dbSnpRef_Alt : ""); - sb.append("\t"); - sb.append(illGCScore != 0.0f ? df.format(illGCScore) : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != illAllele1 ? illAllele1 : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != illAllele2 ? illAllele2 : ""); - sb.append("\t"); - sb.append(null != illuminaRef ? (illTypeHom ? "hom" : "het") : ""); - sb.append("\t"); - sb.append(null != illuminaRef ? illuminaRef : ""); - sb.append("\t"); -// sb.append(null != illuminaAlt ? illuminaAlt : ""); -// sb.append("\t"); -// sb.append(null != illuminaSNP ? illuminaSNP : ""); -// sb.append("\t"); - sb.append(DEFAULT_CHAR != gffRef ? gffRef : ""); - sb.append("\t"); - sb.append(null != gffAlt ? gffAlt : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != gffGenotype ? gffGenotype : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != vcfRef ? vcfRef : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != vcfAlt ? vcfAlt: ""); - sb.append("\t"); - sb.append(null != vcfGenotype ? vcfGenotype: ""); - sb.append("\t"); - sb.append(null != pileup ? pileup: ""); - sb.append("\t"); - sb.append(null != positionMatch ? positionMatch: ""); - sb.append("\t"); - sb.append(null != genotypeMatch ? genotypeMatch: ""); - sb.append("\n"); - - return sb.toString(); - } - public float getIllGCScore() { - return illGCScore; - } - public void setIllGCScore(float illGCScore) { - this.illGCScore = illGCScore; - } - public char getIllAllele1() { - return illAllele1; - } - public void setIllAllele1(char illAllele1) { - this.illAllele1 = illAllele1; - } - public char getIllAllele2() { - return illAllele2; - } - public void setIllAllele2(char illAllele2) { - this.illAllele2 = illAllele2; - } - public boolean isIllTypeHom() { - return illTypeHom; - } - public void setIllTypeHom(boolean illTypeHom) { - this.illTypeHom = illTypeHom; - } - public char getDbSnpStrand() { - return dbSnpStrand; - } - public void setDbSnpStrand(char dbSnpStrand) { - this.dbSnpStrand = dbSnpStrand; - } - public String getDbSnpRef_Alt() { - return dbSnpRef_Alt; - } - public void setDbSnpRef_Alt(String dbSnpRefAlt) { - dbSnpRef_Alt = dbSnpRefAlt; - } - public void setPileup(String pileup) { - this.pileup = pileup; - } - public String getPileup(String pileup) { - return pileup; - } - public String getPositionMatch() { - return positionMatch; - } - public void setPositionMatch(String positionMatch) { - this.positionMatch = positionMatch; - } - public String getGenotypeMatch() { - return genotypeMatch; - } - public void setGenotypeMatch(String genotypeMatch) { - this.genotypeMatch = genotypeMatch; - } - -} diff --git a/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java b/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java deleted file mode 100644 index 3a1e039aa..000000000 --- a/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java +++ /dev/null @@ -1,78 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.util; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.List; - -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.util.FileUtils; - -public class IGVBatchFileGenerator { - - public static final String GENOME = "GRCh37_ICGC_standard_v2"; - - - public static void generate(final List positions, final String outputFile) throws IOException { - // check that list is not empty - if (positions == null || positions.isEmpty()) - throw new IllegalArgumentException("Null or empty list passed to IGVBatchFileGenerator"); - - // can we write to the outputFile? - File output = new File(outputFile); - if( ! FileUtils.canFileBeWrittenTo(output)) - throw new IllegalArgumentException("Can't write to output file: " + outputFile); - - FileWriter writer = new FileWriter(output); - - try { - writer.write(getHeaderInfo(output)); - - for (ChrPosition position : positions) { - writer.write(getLocationString(position)); - } - - } finally { - writer.close(); - } - - } - - private static String getHeaderInfo(File output) { - String path = output.getParent(); - return "snapshotDirectory " + path + "\n" - + "genome " + GENOME + "\n"; - } - - private static String getLocationString(ChrPosition chrPos) { - return "goto " + chrPos.toIGVString() - + "\nsort base\n" + - "collapse\n" + - "snapshot " + chrPos.getChromosome() + ":" + chrPos.getStartPosition() + ".png\n"; - } - - - -// snapshotDirectory C:/IGV_sessions/exonorama/APGI_1992 -// genome GRCh37_ICGC_standard_v2 -// goto chr8:93156526-93156566 -// sort base -// collapse -// snapshot APGI_1992_SNP_35325-chr8-93156546-var-CtoT-WITHIN_NON_CODING_GENE-ENSG00000233778.png -// goto chr12:114377865-114377905 -// sort base -// collapse -// snapshot APGI_1992_SNP_50905-chr12-114377885-var-GtoC-SYNONYMOUS_CODING-RBM19.png -// goto chr1:228481880-228481920 -// sort base -// collapse -// snapshot APGI_1992_SNP_6964-chr1-228481900-var-GtoA-NON_SYNONYMOUS_CODING-OBSCN.png - - -} diff --git a/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- b/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- deleted file mode 100644 index 3a1e039aa..000000000 --- a/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- +++ /dev/null @@ -1,78 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.util; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.List; - -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.util.FileUtils; - -public class IGVBatchFileGenerator { - - public static final String GENOME = "GRCh37_ICGC_standard_v2"; - - - public static void generate(final List positions, final String outputFile) throws IOException { - // check that list is not empty - if (positions == null || positions.isEmpty()) - throw new IllegalArgumentException("Null or empty list passed to IGVBatchFileGenerator"); - - // can we write to the outputFile? - File output = new File(outputFile); - if( ! FileUtils.canFileBeWrittenTo(output)) - throw new IllegalArgumentException("Can't write to output file: " + outputFile); - - FileWriter writer = new FileWriter(output); - - try { - writer.write(getHeaderInfo(output)); - - for (ChrPosition position : positions) { - writer.write(getLocationString(position)); - } - - } finally { - writer.close(); - } - - } - - private static String getHeaderInfo(File output) { - String path = output.getParent(); - return "snapshotDirectory " + path + "\n" - + "genome " + GENOME + "\n"; - } - - private static String getLocationString(ChrPosition chrPos) { - return "goto " + chrPos.toIGVString() - + "\nsort base\n" + - "collapse\n" + - "snapshot " + chrPos.getChromosome() + ":" + chrPos.getStartPosition() + ".png\n"; - } - - - -// snapshotDirectory C:/IGV_sessions/exonorama/APGI_1992 -// genome GRCh37_ICGC_standard_v2 -// goto chr8:93156526-93156566 -// sort base -// collapse -// snapshot APGI_1992_SNP_35325-chr8-93156546-var-CtoT-WITHIN_NON_CODING_GENE-ENSG00000233778.png -// goto chr12:114377865-114377905 -// sort base -// collapse -// snapshot APGI_1992_SNP_50905-chr12-114377885-var-GtoC-SYNONYMOUS_CODING-RBM19.png -// goto chr1:228481880-228481920 -// sort base -// collapse -// snapshot APGI_1992_SNP_6964-chr1-228481900-var-GtoA-NON_SYNONYMOUS_CODING-OBSCN.png - - -} diff --git a/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java b/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java deleted file mode 100644 index 60389d85b..000000000 --- a/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java +++ /dev/null @@ -1,61 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.util; - -import java.io.File; -import java.util.Map; -import java.util.regex.Pattern; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.string.StringUtils; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class TabbedDataLoader { - - public static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - private static final QLogger logger = QLoggerFactory.getLogger(TabbedDataLoader.class); - - - public static void loadTabbedData(String tabbedDataFile, int position, Map collection) throws Exception { - if (FileUtils.canFileBeRead(tabbedDataFile)) { - - TabbedFileReader reader = new TabbedFileReader(new File(tabbedDataFile)); - try { - for (TabbedRecord tr : reader) { - String [] params = tabbedPattern.split(tr.getData()); - String chrPosition = getStringFromArray(params, position); - - if (null != chrPosition) { - ChrPosition chrPos = StringUtils.getChrPositionFromString(chrPosition); - if (null != chrPos) collection.put(chrPos,tr); - } - } - - logger.info("Added " + collection.size() + " entries to the tabbed data collection"); - - } finally { - reader.close(); - } - } else { - throw new IllegalArgumentException("data file: " + tabbedDataFile + " could not be read"); - } - } - - public static String getStringFromArray(String[] params, int index) { - String result = null; - if (null != params && params.length > 0) { - if (index >= 0) { - result = params[(index > params.length ? params.length : index)]; - } else if (params.length + index >= 0 & params.length + index < params.length){ - result = params[params.length + index]; // adding a negative number! - } - } - return result; - } - -} diff --git a/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- b/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- deleted file mode 100644 index 60389d85b..000000000 --- a/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- +++ /dev/null @@ -1,61 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.util; - -import java.io.File; -import java.util.Map; -import java.util.regex.Pattern; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.string.StringUtils; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class TabbedDataLoader { - - public static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - private static final QLogger logger = QLoggerFactory.getLogger(TabbedDataLoader.class); - - - public static void loadTabbedData(String tabbedDataFile, int position, Map collection) throws Exception { - if (FileUtils.canFileBeRead(tabbedDataFile)) { - - TabbedFileReader reader = new TabbedFileReader(new File(tabbedDataFile)); - try { - for (TabbedRecord tr : reader) { - String [] params = tabbedPattern.split(tr.getData()); - String chrPosition = getStringFromArray(params, position); - - if (null != chrPosition) { - ChrPosition chrPos = StringUtils.getChrPositionFromString(chrPosition); - if (null != chrPos) collection.put(chrPos,tr); - } - } - - logger.info("Added " + collection.size() + " entries to the tabbed data collection"); - - } finally { - reader.close(); - } - } else { - throw new IllegalArgumentException("data file: " + tabbedDataFile + " could not be read"); - } - } - - public static String getStringFromArray(String[] params, int index) { - String result = null; - if (null != params && params.length > 0) { - if (index >= 0) { - result = params[(index > params.length ? params.length : index)]; - } else if (params.length + index >= 0 & params.length + index < params.length){ - result = params[params.length + index]; // adding a negative number! - } - } - return result; - } - -} diff --git a/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java b/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java deleted file mode 100644 index 03a4e2f03..000000000 --- a/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java +++ /dev/null @@ -1,269 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.vcf; - -import java.io.File; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.atomic.AtomicLong; - -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.GenotypeEnum; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.picard.QJumper; -import org.qcmg.picard.util.SAMUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.vcf.VCFFileReader; - -public class CompareVCFs { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - private static QLogger logger; - - private final ConcurrentMap normalVCFMap = new ConcurrentHashMap(12500); //not expecting more than 100000 - private final ConcurrentMap tumourVCFMap = new ConcurrentHashMap(12500); - private final ConcurrentMap uniqueTumourVCFMap = new ConcurrentHashMap(40000); - - public int engage() throws Exception { - - logger.info("loading normal vcf data"); - loadVCFData(cmdLineInputFiles[0], normalVCFMap); - logger.info("loading normal vcf data - DONE [" + normalVCFMap.size() + "]"); - - logger.info("loading tumour vcf data"); - loadVCFData(cmdLineInputFiles[1], tumourVCFMap); - logger.info("loading tumour vcf data - DONE [" + tumourVCFMap.size() + "]"); - - examine(); - - addPileupFromNormalBam(); - - return exitStatus; - } - - private void addPileupFromNormalBam() throws Exception { - // loop through each position in the unique map and get the entries in the normal GATK cleaned BAM file. - int notEnoughCoverage = 0, mutationFoundInNormal = 0; - StringBuilder sb = new StringBuilder(); - QJumper qj = new QJumper(); - qj.setupReader(cmdLineInputFiles[2]); - - for (Entry entry : uniqueTumourVCFMap.entrySet()) { - int position = entry.getKey().getStartPosition(); - boolean foundInNormal = false; - List sams = qj.getOverlappingRecordsAtPosition(entry.getKey().getChromosome(), position, position); - - for (SAMRecord sam : sams) { - int offset = SAMUtils.getIndexInReadFromPosition(sam, position); - if (offset > -1 && offset < sam.getReadLength()) { - char c = sam.getReadString().charAt(offset); - if (c == entry.getValue().getAlt().charAt(0)) { - foundInNormal = true; - mutationFoundInNormal++; - break; - } - } - } - - if ( ! foundInNormal && sams.size() < 8) - notEnoughCoverage++; - else if ( ! foundInNormal) - sb.append(entry.getKey().getChromosome() + ":" + position + "\n"); - } - - logger.info("total positions examined: " + uniqueTumourVCFMap.size()); - logger.info("positions where mutation was also found in normal (class C): " + mutationFoundInNormal); - logger.info("positions where coverage in normal was less than 8 (class B): " + notEnoughCoverage); - logger.info("Potential class A positions: "); - logger.info(sb.toString()); - } - - private void examine() { - - final Map diffGenotypes = new HashMap(); - - // we want to know the following... - // number unique to normal - // number unique to tumour - // no of common positions - int normalUnique = 0, tumourUnique = 0, normalAndTumour = 0; - - // for the common positions... - // no that have the same mutation - // no that have a different mutation - // no of those that have the same genotype - - int sameMutation = 0, sameMutationSameGenotype = 0; - int diffMutation = 0, diffMutationSameGenotype = 0; - - // here we go - - for (Entry entry : normalVCFMap.entrySet()) { - - VcfRecord normalVCF = entry.getValue(); - VcfRecord tumourVCF = tumourVCFMap.get(entry.getKey()); - - if (null == tumourVCF) { - normalUnique++; - } else { - ++normalAndTumour; - - // sanity check - compare ref - if not the same - oh dear... - assert normalVCF.getRef().equals(tumourVCF.getRef()); - - // compare mutations - char normalMut = normalVCF.getAlt().charAt(0); - char tumourMut = tumourVCF.getAlt().charAt(0); - - // need to get the genotype from the VCFRecord - - GenotypeEnum normalGenotype = VcfUtils.calculateGenotypeEnum( - normalVCF.getInfo().substring(0, 3), normalVCF.getRefChar(), normalVCF.getAlt().charAt(0)); - GenotypeEnum tumourGenotype = VcfUtils.calculateGenotypeEnum( - tumourVCF.getInfo().substring(0, 3), tumourVCF.getRefChar(), tumourVCF.getAlt().charAt(0)); - - if (normalMut == tumourMut) { - sameMutation++; - if (normalGenotype == tumourGenotype) - ++sameMutationSameGenotype; - else { - RefAndMultiGenotype ramg = new RefAndMultiGenotype(normalVCF.getRefChar(), normalGenotype, tumourGenotype); - AtomicLong al = diffGenotypes.get(ramg); - if (null == al) { - al = new AtomicLong(); - diffGenotypes.put(ramg, al); - } - al.incrementAndGet(); - } - } else { - diffMutation++; - if (normalGenotype == tumourGenotype) - ++diffMutationSameGenotype; - } - } - } - - for (ChrPosition position : tumourVCFMap.keySet()) { - if (null == normalVCFMap.get(position)) { - tumourUnique++; - uniqueTumourVCFMap.put(position, tumourVCFMap.get(position)); - } - } - - // now print out some stats - StringBuilder sb = new StringBuilder("\nSTATS\n"); - sb.append("No of positions in normal map: " + normalVCFMap.size()); - sb.append("\nNo of unique positions in normal map: " + normalUnique); - sb.append("\nNo of positions in tumour map: " + tumourVCFMap.size()); - sb.append("\nNo of unique positions in tumour map: " + tumourUnique); - sb.append("\nNo of shared positions: " + normalAndTumour); - sb.append("\n"); - sb.append("\nNo of positions with same mutation: " + sameMutation); - sb.append("\nNo of positions with same mutation and same genotype: " + sameMutationSameGenotype); - - sb.append("\npositions with same mutation and diff genotype: "); - - for (Entry entry : diffGenotypes.entrySet()) { - sb.append("\n" + entry.getKey().toString() + " count: " + entry.getValue().get()); - } - sb.append("\nNo of positions with diff mutation: " + diffMutation); - sb.append("\nNo of positions with diff mutation and same genotype: " + diffMutationSameGenotype); - - logger.info(sb.toString()); - - - } - - private void loadVCFData(String vcfFile, Map map) throws Exception { - if (FileUtils.canFileBeRead(vcfFile)) { - - VCFFileReader reader = new VCFFileReader(new File(vcfFile)); - try { - for (VcfRecord qpr : reader) { - map.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); - } - } finally { - reader.close(); - } - } - } - - - public static void main(String[] args) throws Exception { - CompareVCFs sp = new CompareVCFs(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(CompareVCFs.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CompareVCFs", CompareVCFs.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- b/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- deleted file mode 100644 index 03a4e2f03..000000000 --- a/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- +++ /dev/null @@ -1,269 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.vcf; - -import java.io.File; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.atomic.AtomicLong; - -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.GenotypeEnum; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.picard.QJumper; -import org.qcmg.picard.util.SAMUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.vcf.VCFFileReader; - -public class CompareVCFs { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - private static QLogger logger; - - private final ConcurrentMap normalVCFMap = new ConcurrentHashMap(12500); //not expecting more than 100000 - private final ConcurrentMap tumourVCFMap = new ConcurrentHashMap(12500); - private final ConcurrentMap uniqueTumourVCFMap = new ConcurrentHashMap(40000); - - public int engage() throws Exception { - - logger.info("loading normal vcf data"); - loadVCFData(cmdLineInputFiles[0], normalVCFMap); - logger.info("loading normal vcf data - DONE [" + normalVCFMap.size() + "]"); - - logger.info("loading tumour vcf data"); - loadVCFData(cmdLineInputFiles[1], tumourVCFMap); - logger.info("loading tumour vcf data - DONE [" + tumourVCFMap.size() + "]"); - - examine(); - - addPileupFromNormalBam(); - - return exitStatus; - } - - private void addPileupFromNormalBam() throws Exception { - // loop through each position in the unique map and get the entries in the normal GATK cleaned BAM file. - int notEnoughCoverage = 0, mutationFoundInNormal = 0; - StringBuilder sb = new StringBuilder(); - QJumper qj = new QJumper(); - qj.setupReader(cmdLineInputFiles[2]); - - for (Entry entry : uniqueTumourVCFMap.entrySet()) { - int position = entry.getKey().getStartPosition(); - boolean foundInNormal = false; - List sams = qj.getOverlappingRecordsAtPosition(entry.getKey().getChromosome(), position, position); - - for (SAMRecord sam : sams) { - int offset = SAMUtils.getIndexInReadFromPosition(sam, position); - if (offset > -1 && offset < sam.getReadLength()) { - char c = sam.getReadString().charAt(offset); - if (c == entry.getValue().getAlt().charAt(0)) { - foundInNormal = true; - mutationFoundInNormal++; - break; - } - } - } - - if ( ! foundInNormal && sams.size() < 8) - notEnoughCoverage++; - else if ( ! foundInNormal) - sb.append(entry.getKey().getChromosome() + ":" + position + "\n"); - } - - logger.info("total positions examined: " + uniqueTumourVCFMap.size()); - logger.info("positions where mutation was also found in normal (class C): " + mutationFoundInNormal); - logger.info("positions where coverage in normal was less than 8 (class B): " + notEnoughCoverage); - logger.info("Potential class A positions: "); - logger.info(sb.toString()); - } - - private void examine() { - - final Map diffGenotypes = new HashMap(); - - // we want to know the following... - // number unique to normal - // number unique to tumour - // no of common positions - int normalUnique = 0, tumourUnique = 0, normalAndTumour = 0; - - // for the common positions... - // no that have the same mutation - // no that have a different mutation - // no of those that have the same genotype - - int sameMutation = 0, sameMutationSameGenotype = 0; - int diffMutation = 0, diffMutationSameGenotype = 0; - - // here we go - - for (Entry entry : normalVCFMap.entrySet()) { - - VcfRecord normalVCF = entry.getValue(); - VcfRecord tumourVCF = tumourVCFMap.get(entry.getKey()); - - if (null == tumourVCF) { - normalUnique++; - } else { - ++normalAndTumour; - - // sanity check - compare ref - if not the same - oh dear... - assert normalVCF.getRef().equals(tumourVCF.getRef()); - - // compare mutations - char normalMut = normalVCF.getAlt().charAt(0); - char tumourMut = tumourVCF.getAlt().charAt(0); - - // need to get the genotype from the VCFRecord - - GenotypeEnum normalGenotype = VcfUtils.calculateGenotypeEnum( - normalVCF.getInfo().substring(0, 3), normalVCF.getRefChar(), normalVCF.getAlt().charAt(0)); - GenotypeEnum tumourGenotype = VcfUtils.calculateGenotypeEnum( - tumourVCF.getInfo().substring(0, 3), tumourVCF.getRefChar(), tumourVCF.getAlt().charAt(0)); - - if (normalMut == tumourMut) { - sameMutation++; - if (normalGenotype == tumourGenotype) - ++sameMutationSameGenotype; - else { - RefAndMultiGenotype ramg = new RefAndMultiGenotype(normalVCF.getRefChar(), normalGenotype, tumourGenotype); - AtomicLong al = diffGenotypes.get(ramg); - if (null == al) { - al = new AtomicLong(); - diffGenotypes.put(ramg, al); - } - al.incrementAndGet(); - } - } else { - diffMutation++; - if (normalGenotype == tumourGenotype) - ++diffMutationSameGenotype; - } - } - } - - for (ChrPosition position : tumourVCFMap.keySet()) { - if (null == normalVCFMap.get(position)) { - tumourUnique++; - uniqueTumourVCFMap.put(position, tumourVCFMap.get(position)); - } - } - - // now print out some stats - StringBuilder sb = new StringBuilder("\nSTATS\n"); - sb.append("No of positions in normal map: " + normalVCFMap.size()); - sb.append("\nNo of unique positions in normal map: " + normalUnique); - sb.append("\nNo of positions in tumour map: " + tumourVCFMap.size()); - sb.append("\nNo of unique positions in tumour map: " + tumourUnique); - sb.append("\nNo of shared positions: " + normalAndTumour); - sb.append("\n"); - sb.append("\nNo of positions with same mutation: " + sameMutation); - sb.append("\nNo of positions with same mutation and same genotype: " + sameMutationSameGenotype); - - sb.append("\npositions with same mutation and diff genotype: "); - - for (Entry entry : diffGenotypes.entrySet()) { - sb.append("\n" + entry.getKey().toString() + " count: " + entry.getValue().get()); - } - sb.append("\nNo of positions with diff mutation: " + diffMutation); - sb.append("\nNo of positions with diff mutation and same genotype: " + diffMutationSameGenotype); - - logger.info(sb.toString()); - - - } - - private void loadVCFData(String vcfFile, Map map) throws Exception { - if (FileUtils.canFileBeRead(vcfFile)) { - - VCFFileReader reader = new VCFFileReader(new File(vcfFile)); - try { - for (VcfRecord qpr : reader) { - map.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); - } - } finally { - reader.close(); - } - } - } - - - public static void main(String[] args) throws Exception { - CompareVCFs sp = new CompareVCFs(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(CompareVCFs.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CompareVCFs", CompareVCFs.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java b/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java deleted file mode 100644 index 29bb7c4c1..000000000 --- a/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java +++ /dev/null @@ -1,116 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.vcf; - -import java.io.File; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedFileWriter; -import org.qcmg.qmule.tab.TabbedHeader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class ConvertVcfChr { - - private static final String CHR = "chr"; - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - private static QLogger logger; - - - private int engage() throws Exception { - - // load - if (FileUtils.canFileBeRead(cmdLineInputFiles[0])) { - TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); - TabbedHeader header = reader.getHeader(); - - TabbedFileWriter writer = new TabbedFileWriter(new File(cmdLineOutputFiles[0])); - writer.addHeader(header); - - try { - for (TabbedRecord tabRec : reader) { - if ( ! tabRec.getData().startsWith(CHR)) { - tabRec.setData(CHR + tabRec.getData()); - } - writer.add(tabRec); - } - } finally { - try { - writer.close(); - } finally { - reader.close(); - } - } - } - return exitStatus; - } - - public static void main(String[] args) throws Exception { - ConvertVcfChr sp = new ConvertVcfChr(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(ConvertVcfChr.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CompareVCFs", ConvertVcfChr.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- b/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- deleted file mode 100644 index 29bb7c4c1..000000000 --- a/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- +++ /dev/null @@ -1,116 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.vcf; - -import java.io.File; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedFileWriter; -import org.qcmg.qmule.tab.TabbedHeader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class ConvertVcfChr { - - private static final String CHR = "chr"; - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - private static QLogger logger; - - - private int engage() throws Exception { - - // load - if (FileUtils.canFileBeRead(cmdLineInputFiles[0])) { - TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); - TabbedHeader header = reader.getHeader(); - - TabbedFileWriter writer = new TabbedFileWriter(new File(cmdLineOutputFiles[0])); - writer.addHeader(header); - - try { - for (TabbedRecord tabRec : reader) { - if ( ! tabRec.getData().startsWith(CHR)) { - tabRec.setData(CHR + tabRec.getData()); - } - writer.add(tabRec); - } - } finally { - try { - writer.close(); - } finally { - reader.close(); - } - } - } - return exitStatus; - } - - public static void main(String[] args) throws Exception { - ConvertVcfChr sp = new ConvertVcfChr(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(ConvertVcfChr.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CompareVCFs", ConvertVcfChr.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java b/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java deleted file mode 100644 index b0aad1b7f..000000000 --- a/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java +++ /dev/null @@ -1,101 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.vcf; - -import org.qcmg.common.model.GenotypeEnum; -import org.qcmg.common.model.Classification; - -public class RefAndMultiGenotype { - - private final char ref; - private final GenotypeEnum normal; - private final GenotypeEnum tumour; - - public RefAndMultiGenotype(char ref, GenotypeEnum normal, GenotypeEnum tumour) { - this.ref = ref; - this.normal = normal; - this.tumour = tumour; - } - - @Override - public String toString() { - return ref + " : " + normal.getDisplayString() + " : " + tumour.getDisplayString() + " : " + getClassification(); - } - - public String getClassification() { - if (normal == tumour) { - return Classification.GERMLINE.name(); - - } else if (normal.isHomozygous() && tumour.isHomozygous()) { - // not equal but both are homozygous - return Classification.SOMATIC.name(); - } else if (normal.isHeterozygous() && tumour.isHeterozygous()) { - // not equal but both are heterozygous - return Classification.SOMATIC.name(); - } - - /////////////////////////////////////////////////////// - // normal is HOM and tumour is HET - /////////////////////////////////////////////////////// - if (normal.isHomozygous() && tumour.isHeterozygous()) { - - GenotypeEnum refAndNormalGenotype = GenotypeEnum.getGenotypeEnum(ref, normal.getFirstAllele()); - - if (tumour == refAndNormalGenotype) { - return Classification.GERMLINE.name(); -// mutation = normal.getFirstAllele() + MUT_DELIM + record.getRef(); - } else { - return Classification.SOMATIC.name(); - } - } - - /////////////////////////////////////////////////////// - // normal is HET and tumour is HOM - ////////////////////////////////////////////////////// - else if (normal.isHeterozygous() && tumour.isHomozygous()){ - - if (normal.containsAllele(tumour.getFirstAllele())) { - return Classification.GERMLINE.name(); - } else { - return Classification.SOMATIC.name(); - } - } - return null; - } - - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((normal == null) ? 0 : normal.hashCode()); - result = prime * result + ref; - result = prime * result + ((tumour == null) ? 0 : tumour.hashCode()); - return result; - } - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - RefAndMultiGenotype other = (RefAndMultiGenotype) obj; - if (normal == null) { - if (other.normal != null) - return false; - } else if (!normal.equals(other.normal)) - return false; - if (ref != other.ref) - return false; - if (tumour == null) { - if (other.tumour != null) - return false; - } else if (!tumour.equals(other.tumour)) - return false; - return true; - } - -} diff --git a/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- b/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- deleted file mode 100644 index b0aad1b7f..000000000 --- a/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- +++ /dev/null @@ -1,101 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.vcf; - -import org.qcmg.common.model.GenotypeEnum; -import org.qcmg.common.model.Classification; - -public class RefAndMultiGenotype { - - private final char ref; - private final GenotypeEnum normal; - private final GenotypeEnum tumour; - - public RefAndMultiGenotype(char ref, GenotypeEnum normal, GenotypeEnum tumour) { - this.ref = ref; - this.normal = normal; - this.tumour = tumour; - } - - @Override - public String toString() { - return ref + " : " + normal.getDisplayString() + " : " + tumour.getDisplayString() + " : " + getClassification(); - } - - public String getClassification() { - if (normal == tumour) { - return Classification.GERMLINE.name(); - - } else if (normal.isHomozygous() && tumour.isHomozygous()) { - // not equal but both are homozygous - return Classification.SOMATIC.name(); - } else if (normal.isHeterozygous() && tumour.isHeterozygous()) { - // not equal but both are heterozygous - return Classification.SOMATIC.name(); - } - - /////////////////////////////////////////////////////// - // normal is HOM and tumour is HET - /////////////////////////////////////////////////////// - if (normal.isHomozygous() && tumour.isHeterozygous()) { - - GenotypeEnum refAndNormalGenotype = GenotypeEnum.getGenotypeEnum(ref, normal.getFirstAllele()); - - if (tumour == refAndNormalGenotype) { - return Classification.GERMLINE.name(); -// mutation = normal.getFirstAllele() + MUT_DELIM + record.getRef(); - } else { - return Classification.SOMATIC.name(); - } - } - - /////////////////////////////////////////////////////// - // normal is HET and tumour is HOM - ////////////////////////////////////////////////////// - else if (normal.isHeterozygous() && tumour.isHomozygous()){ - - if (normal.containsAllele(tumour.getFirstAllele())) { - return Classification.GERMLINE.name(); - } else { - return Classification.SOMATIC.name(); - } - } - return null; - } - - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((normal == null) ? 0 : normal.hashCode()); - result = prime * result + ref; - result = prime * result + ((tumour == null) ? 0 : tumour.hashCode()); - return result; - } - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - RefAndMultiGenotype other = (RefAndMultiGenotype) obj; - if (normal == null) { - if (other.normal != null) - return false; - } else if (!normal.equals(other.normal)) - return false; - if (ref != other.ref) - return false; - if (tumour == null) { - if (other.tumour != null) - return false; - } else if (!tumour.equals(other.tumour)) - return false; - return true; - } - -} From bb1edfa56617e987f44ae47ec044eb545bb83ea5 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 18:39:52 +1000 Subject: [PATCH 14/73] mv unused package --- .../org/qcmg/unused/bed/BEDFileReader.java | 37 ++ .../org/qcmg/unused/bed/BEDFileReader.java-- | 37 ++ qio/src/org/qcmg/unused/bed/BEDRecord.java | 97 +++ qio/src/org/qcmg/unused/bed/BEDRecord.java-- | 97 +++ .../qcmg/unused/bed/BEDRecordIterator.java | 49 ++ .../qcmg/unused/bed/BEDRecordIterator.java-- | 49 ++ .../bed/BEDRecordPositionComparator.java | 21 + .../bed/BEDRecordPositionComparator.java-- | 21 + .../org/qcmg/unused/bed/BEDSerializer.java | 65 ++ .../org/qcmg/unused/bed/BEDSerializer.java-- | 65 ++ .../consensuscalls/ConsensusCallsFlag.java | 64 ++ .../consensuscalls/ConsensusCallsFlag.java-- | 64 ++ .../consensuscalls/ConsensusCallsRecord.java | 377 ++++++++++++ .../ConsensusCallsRecord.java-- | 377 ++++++++++++ .../ConsensusCallsSerializer.java | 68 +++ .../ConsensusCallsSerializer.java-- | 68 +++ .../exception/RecordIteratorException.java | 17 + .../exception/RecordIteratorException.java-- | 17 + .../genesymbol/GeneSymbolFileReader.java | 38 ++ .../genesymbol/GeneSymbolFileReader.java-- | 38 ++ .../unused/genesymbol/GeneSymbolRecord.java | 32 + .../unused/genesymbol/GeneSymbolRecord.java-- | 32 + .../genesymbol/GeneSymbolRecordIterator.java | 49 ++ .../GeneSymbolRecordIterator.java-- | 49 ++ .../genesymbol/GeneSymbolSerializer.java | 44 ++ .../genesymbol/GeneSymbolSerializer.java-- | 44 ++ .../unused/illumina/IlluminaFileReader.java | 21 + .../unused/illumina/IlluminaFileReader.java-- | 21 + .../qcmg/unused/illumina/IlluminaRecord.java | 262 ++++++++ .../unused/illumina/IlluminaRecord.java-- | 262 ++++++++ .../illumina/IlluminaRecordIterator.java | 21 + .../illumina/IlluminaRecordIterator.java-- | 21 + .../unused/illumina/IlluminaSerializer.java | 73 +++ .../unused/illumina/IlluminaSerializer.java-- | 73 +++ .../org/qcmg/unused/maf/MAFFileReader.java | 42 ++ .../org/qcmg/unused/maf/MAFFileReader.java-- | 42 ++ .../qcmg/unused/maf/MAFRecordIterator.java | 54 ++ .../qcmg/unused/maf/MAFRecordIterator.java-- | 54 ++ .../org/qcmg/unused/maf/MAFSerializer.java | 68 +++ .../org/qcmg/unused/maf/MAFSerializer.java-- | 68 +++ .../PrimerDesignFileReader.java | 17 + .../PrimerDesignFileReader.java-- | 17 + .../PrimerDesignRecord.java | 135 +++++ .../PrimerDesignRecord.java-- | 135 +++++ .../PrimerDesignRecordSerializer.java | 80 +++ .../PrimerDesignRecordSerializer.java-- | 80 +++ .../primerdesignsummary/PrimerPosition.java | 68 +++ .../primerdesignsummary/PrimerPosition.java-- | 68 +++ .../primerinput/PrimerInputFileReader.java | 17 + .../primerinput/PrimerInputFileReader.java-- | 17 + .../primerinput/PrimerInputFileWriter.java | 33 + .../primerinput/PrimerInputFileWriter.java-- | 33 + .../unused/primerinput/PrimerInputRecord.java | 331 ++++++++++ .../primerinput/PrimerInputRecord.java-- | 331 ++++++++++ .../PrimerInputRecordSerializer.java | 201 +++++++ .../PrimerInputRecordSerializer.java-- | 201 +++++++ .../primerinput/PrimerSequenceTarget.java | 43 ++ .../primerinput/PrimerSequenceTarget.java-- | 43 ++ .../unused/primerinput/PrimerSizeRange.java | 44 ++ .../unused/primerinput/PrimerSizeRange.java-- | 44 ++ .../primeroutput/PrimerOutputFileReader.java | 22 + .../PrimerOutputFileReader.java-- | 22 + .../primeroutput/PrimerOutputFileWriter.java | 35 ++ .../PrimerOutputFileWriter.java-- | 35 ++ .../primeroutput/PrimerOutputHeader.java | 563 ++++++++++++++++++ .../primeroutput/PrimerOutputHeader.java-- | 563 ++++++++++++++++++ .../PrimerOutputHeaderSerializer.java | 128 ++++ .../PrimerOutputHeaderSerializer.java-- | 128 ++++ .../primeroutput/PrimerOutputRecord.java | 517 ++++++++++++++++ .../primeroutput/PrimerOutputRecord.java-- | 517 ++++++++++++++++ .../PrimerOutputRecordSerializer.java | 136 +++++ .../PrimerOutputRecordSerializer.java-- | 136 +++++ .../qcmg/unused/reader/AbstractReader.java | 40 ++ .../qcmg/unused/reader/AbstractReader.java-- | 40 ++ .../unused/reader/ExtendedFileReader.java | 41 ++ .../unused/reader/ExtendedFileReader.java-- | 41 ++ .../org/qcmg/unused/reader/FileReader.java | 57 ++ .../org/qcmg/unused/reader/FileReader.java-- | 57 ++ qio/src/org/qcmg/unused/reader/Reader.java | 10 + qio/src/org/qcmg/unused/reader/Reader.java-- | 10 + .../qcmg/unused/simple/SimpleFileReader.java | 23 + .../unused/simple/SimpleFileReader.java-- | 23 + .../unused/simple/SimpleRecordIterator.java | 26 + .../unused/simple/SimpleRecordIterator.java-- | 26 + .../qcmg/unused/simple/SimpleSerializer.java | 57 ++ .../unused/simple/SimpleSerializer.java-- | 57 ++ 86 files changed, 8246 insertions(+) create mode 100644 qio/src/org/qcmg/unused/bed/BEDFileReader.java create mode 100644 qio/src/org/qcmg/unused/bed/BEDFileReader.java-- create mode 100644 qio/src/org/qcmg/unused/bed/BEDRecord.java create mode 100644 qio/src/org/qcmg/unused/bed/BEDRecord.java-- create mode 100644 qio/src/org/qcmg/unused/bed/BEDRecordIterator.java create mode 100644 qio/src/org/qcmg/unused/bed/BEDRecordIterator.java-- create mode 100644 qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java create mode 100644 qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java-- create mode 100644 qio/src/org/qcmg/unused/bed/BEDSerializer.java create mode 100644 qio/src/org/qcmg/unused/bed/BEDSerializer.java-- create mode 100644 qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java create mode 100644 qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java-- create mode 100644 qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java create mode 100644 qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java-- create mode 100644 qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java create mode 100644 qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java-- create mode 100644 qio/src/org/qcmg/unused/exception/RecordIteratorException.java create mode 100644 qio/src/org/qcmg/unused/exception/RecordIteratorException.java-- create mode 100644 qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java create mode 100644 qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java-- create mode 100644 qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java create mode 100644 qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java-- create mode 100644 qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java create mode 100644 qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java-- create mode 100644 qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java create mode 100644 qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java-- create mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java create mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java-- create mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaRecord.java create mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaRecord.java-- create mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java create mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java-- create mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java create mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java-- create mode 100644 qio/src/org/qcmg/unused/maf/MAFFileReader.java create mode 100644 qio/src/org/qcmg/unused/maf/MAFFileReader.java-- create mode 100644 qio/src/org/qcmg/unused/maf/MAFRecordIterator.java create mode 100644 qio/src/org/qcmg/unused/maf/MAFRecordIterator.java-- create mode 100644 qio/src/org/qcmg/unused/maf/MAFSerializer.java create mode 100644 qio/src/org/qcmg/unused/maf/MAFSerializer.java-- create mode 100644 qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java create mode 100644 qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java-- create mode 100644 qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java create mode 100644 qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java-- create mode 100644 qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java create mode 100644 qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java-- create mode 100644 qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java create mode 100644 qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java-- create mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java create mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java-- create mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java create mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java-- create mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java create mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java-- create mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java create mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java-- create mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java create mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java-- create mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java create mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java-- create mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java create mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java-- create mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java create mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java-- create mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java create mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java-- create mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java create mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java-- create mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java create mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java-- create mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java create mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java-- create mode 100644 qio/src/org/qcmg/unused/reader/AbstractReader.java create mode 100644 qio/src/org/qcmg/unused/reader/AbstractReader.java-- create mode 100644 qio/src/org/qcmg/unused/reader/ExtendedFileReader.java create mode 100644 qio/src/org/qcmg/unused/reader/ExtendedFileReader.java-- create mode 100644 qio/src/org/qcmg/unused/reader/FileReader.java create mode 100644 qio/src/org/qcmg/unused/reader/FileReader.java-- create mode 100644 qio/src/org/qcmg/unused/reader/Reader.java create mode 100644 qio/src/org/qcmg/unused/reader/Reader.java-- create mode 100644 qio/src/org/qcmg/unused/simple/SimpleFileReader.java create mode 100644 qio/src/org/qcmg/unused/simple/SimpleFileReader.java-- create mode 100644 qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java create mode 100644 qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java-- create mode 100644 qio/src/org/qcmg/unused/simple/SimpleSerializer.java create mode 100644 qio/src/org/qcmg/unused/simple/SimpleSerializer.java-- diff --git a/qio/src/org/qcmg/unused/bed/BEDFileReader.java b/qio/src/org/qcmg/unused/bed/BEDFileReader.java new file mode 100644 index 000000000..39090fc6d --- /dev/null +++ b/qio/src/org/qcmg/unused/bed/BEDFileReader.java @@ -0,0 +1,37 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.bed; + +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +public final class BEDFileReader implements Closeable, Iterable { + private final File file; + private final InputStream inputStream; + + public BEDFileReader(final File file) throws IOException { + this.file = file; + FileInputStream fileStream = new FileInputStream(file); + inputStream = fileStream; + } + + public Iterator iterator() { + return getRecordIterator(); + } + + public BEDRecordIterator getRecordIterator() { + return new BEDRecordIterator(inputStream); + } + + public void close() throws IOException { + } + + public File getFile() { + return file; + } +} diff --git a/qio/src/org/qcmg/unused/bed/BEDFileReader.java-- b/qio/src/org/qcmg/unused/bed/BEDFileReader.java-- new file mode 100644 index 000000000..39090fc6d --- /dev/null +++ b/qio/src/org/qcmg/unused/bed/BEDFileReader.java-- @@ -0,0 +1,37 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.bed; + +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +public final class BEDFileReader implements Closeable, Iterable { + private final File file; + private final InputStream inputStream; + + public BEDFileReader(final File file) throws IOException { + this.file = file; + FileInputStream fileStream = new FileInputStream(file); + inputStream = fileStream; + } + + public Iterator iterator() { + return getRecordIterator(); + } + + public BEDRecordIterator getRecordIterator() { + return new BEDRecordIterator(inputStream); + } + + public void close() throws IOException { + } + + public File getFile() { + return file; + } +} diff --git a/qio/src/org/qcmg/unused/bed/BEDRecord.java b/qio/src/org/qcmg/unused/bed/BEDRecord.java new file mode 100644 index 000000000..297b66cb2 --- /dev/null +++ b/qio/src/org/qcmg/unused/bed/BEDRecord.java @@ -0,0 +1,97 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.bed; + + + +public class BEDRecord { + + private final static char T = '\t'; + + String chrom; + int chromStart; + int chromEnd; + String name; + int score; + String strand; + int thickStart; + int thickEnd; + String itemRGB; + int blockCount; + int blockSizes; + int blockStarts; + + public String getChrom() { + return chrom; + } + public void setChrom(String chrom) { + this.chrom = chrom; + } + public int getChromStart() { + return chromStart; + } + public void setChromStart(int chromStart) { + this.chromStart = chromStart; + } + public int getChromEnd() { + return chromEnd; + } + public void setChromEnd(int chromEnd) { + this.chromEnd = chromEnd; + } + public String getName() { + return name; + } + public void setName(String name) { + this.name = name; + } + public int getScore() { + return score; + } + public void setScore(int score) { + this.score = score; + } + public String getStrand() { + return strand; + } + public void setStrand(String strand) { + this.strand = strand; + } + public int getThickStart() { + return thickStart; + } + public void setThickStart(int thickStart) { + this.thickStart = thickStart; + } + public int getThickEnd() { + return thickEnd; + } + public void setThickEnd(int thickEnd) { + this.thickEnd = thickEnd; + } + public String getItemRGB() { + return itemRGB; + } + public void setItemRGB(String itemRGB) { + this.itemRGB = itemRGB; + } + public int getBlockCount() { + return blockCount; + } + public void setBlockCount(int blockCount) { + this.blockCount = blockCount; + } + public int getBlockSizes() { + return blockSizes; + } + public void setBlockSizes(int blockSizes) { + this.blockSizes = blockSizes; + } + public int getBlockStarts() { + return blockStarts; + } + public void setBlockStarts(int blockStarts) { + this.blockStarts = blockStarts; + } +} diff --git a/qio/src/org/qcmg/unused/bed/BEDRecord.java-- b/qio/src/org/qcmg/unused/bed/BEDRecord.java-- new file mode 100644 index 000000000..297b66cb2 --- /dev/null +++ b/qio/src/org/qcmg/unused/bed/BEDRecord.java-- @@ -0,0 +1,97 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.bed; + + + +public class BEDRecord { + + private final static char T = '\t'; + + String chrom; + int chromStart; + int chromEnd; + String name; + int score; + String strand; + int thickStart; + int thickEnd; + String itemRGB; + int blockCount; + int blockSizes; + int blockStarts; + + public String getChrom() { + return chrom; + } + public void setChrom(String chrom) { + this.chrom = chrom; + } + public int getChromStart() { + return chromStart; + } + public void setChromStart(int chromStart) { + this.chromStart = chromStart; + } + public int getChromEnd() { + return chromEnd; + } + public void setChromEnd(int chromEnd) { + this.chromEnd = chromEnd; + } + public String getName() { + return name; + } + public void setName(String name) { + this.name = name; + } + public int getScore() { + return score; + } + public void setScore(int score) { + this.score = score; + } + public String getStrand() { + return strand; + } + public void setStrand(String strand) { + this.strand = strand; + } + public int getThickStart() { + return thickStart; + } + public void setThickStart(int thickStart) { + this.thickStart = thickStart; + } + public int getThickEnd() { + return thickEnd; + } + public void setThickEnd(int thickEnd) { + this.thickEnd = thickEnd; + } + public String getItemRGB() { + return itemRGB; + } + public void setItemRGB(String itemRGB) { + this.itemRGB = itemRGB; + } + public int getBlockCount() { + return blockCount; + } + public void setBlockCount(int blockCount) { + this.blockCount = blockCount; + } + public int getBlockSizes() { + return blockSizes; + } + public void setBlockSizes(int blockSizes) { + this.blockSizes = blockSizes; + } + public int getBlockStarts() { + return blockStarts; + } + public void setBlockStarts(int blockStarts) { + this.blockStarts = blockStarts; + } +} diff --git a/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java b/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java new file mode 100644 index 000000000..6d8726a5d --- /dev/null +++ b/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java @@ -0,0 +1,49 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.bed; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.NoSuchElementException; + +public final class BEDRecordIterator implements Iterator { + private final BufferedReader reader; + private BEDRecord next; + + public BEDRecordIterator(final InputStream stream) { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public BEDRecord next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + BEDRecord result = next; + readNext(); + return result; + } + + private void readNext() { + try { + next = BEDSerializer.nextRecord(reader); + } catch (NoSuchElementException e) { + throw e; + } catch (Exception ex) { + throw new RuntimeException(ex.getMessage(), ex); + } + } + + @SuppressWarnings("unchecked") + public void remove() { + throw new UnsupportedOperationException(); + } +} diff --git a/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java-- b/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java-- new file mode 100644 index 000000000..6d8726a5d --- /dev/null +++ b/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java-- @@ -0,0 +1,49 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.bed; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.NoSuchElementException; + +public final class BEDRecordIterator implements Iterator { + private final BufferedReader reader; + private BEDRecord next; + + public BEDRecordIterator(final InputStream stream) { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public BEDRecord next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + BEDRecord result = next; + readNext(); + return result; + } + + private void readNext() { + try { + next = BEDSerializer.nextRecord(reader); + } catch (NoSuchElementException e) { + throw e; + } catch (Exception ex) { + throw new RuntimeException(ex.getMessage(), ex); + } + } + + @SuppressWarnings("unchecked") + public void remove() { + throw new UnsupportedOperationException(); + } +} diff --git a/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java b/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java new file mode 100644 index 000000000..a813a9afc --- /dev/null +++ b/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java @@ -0,0 +1,21 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.bed; + +import java.util.Comparator; + +public class BEDRecordPositionComparator implements + Comparator { + public int compare(BEDRecord recordA, BEDRecord recordB) { + return compareStart(recordA, recordB) + compareEnd(recordA, recordB); + } + + public int compareStart(BEDRecord recordA, BEDRecord recordB) { + return recordA.getChromStart() - recordB.getChromStart(); + } + + public int compareEnd(BEDRecord recordA, BEDRecord recordB) { + return recordA.getChromEnd() - recordB.getChromEnd(); + } +} diff --git a/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java-- b/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java-- new file mode 100644 index 000000000..a813a9afc --- /dev/null +++ b/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java-- @@ -0,0 +1,21 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.bed; + +import java.util.Comparator; + +public class BEDRecordPositionComparator implements + Comparator { + public int compare(BEDRecord recordA, BEDRecord recordB) { + return compareStart(recordA, recordB) + compareEnd(recordA, recordB); + } + + public int compareStart(BEDRecord recordA, BEDRecord recordB) { + return recordA.getChromStart() - recordB.getChromStart(); + } + + public int compareEnd(BEDRecord recordA, BEDRecord recordB) { + return recordA.getChromEnd() - recordB.getChromEnd(); + } +} diff --git a/qio/src/org/qcmg/unused/bed/BEDSerializer.java b/qio/src/org/qcmg/unused/bed/BEDSerializer.java new file mode 100644 index 000000000..164cf8189 --- /dev/null +++ b/qio/src/org/qcmg/unused/bed/BEDSerializer.java @@ -0,0 +1,65 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.bed; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.regex.Pattern; + +public final class BEDSerializer { + private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + private static final String DEFAULT_HEADER_PREFIX = "#"; + + private static String nextNonheaderLine(final BufferedReader reader) + throws IOException { + String line = reader.readLine(); + while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { + line = reader.readLine(); + } + return line; + } + + public static BEDRecord nextRecord(final BufferedReader reader) + throws IOException , Exception { + BEDRecord result = null; + String line = nextNonheaderLine(reader); + if (null != line) { + result = parseRecord(line); + } + return result; + } + + static BEDRecord parseRecord(final String line) throws Exception { + String[] params = tabbedPattern.split(line, -1); + if (3 > params.length) { + throw new Exception("Bad BED format. Insufficient columns: '" + line + "'"); + } + BEDRecord result = new BEDRecord(); + result.setChrom(params[0]); + result.setChromStart(Integer.parseInt(params[1])); + result.setChromEnd(Integer.parseInt(params[2])); + if (params.length > 3) { + if (params.length >= 4) + result.setName(params[3]); + if (params.length >= 5) + result.setScore(Integer.parseInt(params[4])); + if (params.length >= 6) + result.setStrand(params[5]); + if (params.length >= 7) + result.setThickStart(Integer.parseInt(params[6])); + if (params.length >= 8) + result.setThickEnd(Integer.parseInt(params[7])); + if (params.length >= 9) + result.setItemRGB(params[8]); + if (params.length >= 10) + result.setBlockCount(Integer.parseInt(params[9])); + if (params.length >= 11) + result.setBlockSizes(Integer.parseInt(params[10])); + if (params.length >= 12) + result.setBlockStarts(Integer.parseInt(params[11])); + } + return result; + } + +} diff --git a/qio/src/org/qcmg/unused/bed/BEDSerializer.java-- b/qio/src/org/qcmg/unused/bed/BEDSerializer.java-- new file mode 100644 index 000000000..164cf8189 --- /dev/null +++ b/qio/src/org/qcmg/unused/bed/BEDSerializer.java-- @@ -0,0 +1,65 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.bed; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.regex.Pattern; + +public final class BEDSerializer { + private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + private static final String DEFAULT_HEADER_PREFIX = "#"; + + private static String nextNonheaderLine(final BufferedReader reader) + throws IOException { + String line = reader.readLine(); + while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { + line = reader.readLine(); + } + return line; + } + + public static BEDRecord nextRecord(final BufferedReader reader) + throws IOException , Exception { + BEDRecord result = null; + String line = nextNonheaderLine(reader); + if (null != line) { + result = parseRecord(line); + } + return result; + } + + static BEDRecord parseRecord(final String line) throws Exception { + String[] params = tabbedPattern.split(line, -1); + if (3 > params.length) { + throw new Exception("Bad BED format. Insufficient columns: '" + line + "'"); + } + BEDRecord result = new BEDRecord(); + result.setChrom(params[0]); + result.setChromStart(Integer.parseInt(params[1])); + result.setChromEnd(Integer.parseInt(params[2])); + if (params.length > 3) { + if (params.length >= 4) + result.setName(params[3]); + if (params.length >= 5) + result.setScore(Integer.parseInt(params[4])); + if (params.length >= 6) + result.setStrand(params[5]); + if (params.length >= 7) + result.setThickStart(Integer.parseInt(params[6])); + if (params.length >= 8) + result.setThickEnd(Integer.parseInt(params[7])); + if (params.length >= 9) + result.setItemRGB(params[8]); + if (params.length >= 10) + result.setBlockCount(Integer.parseInt(params[9])); + if (params.length >= 11) + result.setBlockSizes(Integer.parseInt(params[10])); + if (params.length >= 12) + result.setBlockStarts(Integer.parseInt(params[11])); + } + return result; + } + +} diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java new file mode 100644 index 000000000..6cb38fa52 --- /dev/null +++ b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java @@ -0,0 +1,64 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.consensuscalls; + + +public enum ConsensusCallsFlag { + + H_1("h1"), + H_2("h2"), + H_3("h3"), + H_4("h4"), + H_5("h5"), + H_6("h6"), + H_7("h7"), + H_8("h8"), + H_9("h9"), + H_10("h10"), + H_11("h11"), + H_12("h12"), + H_13("h13"), + H_14("h14"), + H_15("h15"), + H_16("h16"), + H_17("h17"), + H_18("h18"), + H_19("h19"), + H_20("h20"), + H_21("h21"), + H_22("h22"), + M_1("m1"), + M_2("m2"), + M_3("m3"), + M_4("m4"), + M_5("m5"), + M_6("m6"), + M_7("m7"), + M_8("m8"), + M_9("m9"), + M_10("m10"), + M_11("m11"), + M_12("m12"), + M_13("m13"); + + private final String value; + + ConsensusCallsFlag(String v) { + value = v; + } + + public String value() { + return value; + } + + public static ConsensusCallsFlag fromValue(String v) { + for (ConsensusCallsFlag c: ConsensusCallsFlag.values()) { + if (c.value.equals(v)) { + return c; + } + } + throw new IllegalArgumentException(v); + } + +} diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java-- b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java-- new file mode 100644 index 000000000..6cb38fa52 --- /dev/null +++ b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java-- @@ -0,0 +1,64 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.consensuscalls; + + +public enum ConsensusCallsFlag { + + H_1("h1"), + H_2("h2"), + H_3("h3"), + H_4("h4"), + H_5("h5"), + H_6("h6"), + H_7("h7"), + H_8("h8"), + H_9("h9"), + H_10("h10"), + H_11("h11"), + H_12("h12"), + H_13("h13"), + H_14("h14"), + H_15("h15"), + H_16("h16"), + H_17("h17"), + H_18("h18"), + H_19("h19"), + H_20("h20"), + H_21("h21"), + H_22("h22"), + M_1("m1"), + M_2("m2"), + M_3("m3"), + M_4("m4"), + M_5("m5"), + M_6("m6"), + M_7("m7"), + M_8("m8"), + M_9("m9"), + M_10("m10"), + M_11("m11"), + M_12("m12"), + M_13("m13"); + + private final String value; + + ConsensusCallsFlag(String v) { + value = v; + } + + public String value() { + return value; + } + + public static ConsensusCallsFlag fromValue(String v) { + for (ConsensusCallsFlag c: ConsensusCallsFlag.values()) { + if (c.value.equals(v)) { + return c; + } + } + throw new IllegalArgumentException(v); + } + +} diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java new file mode 100644 index 000000000..c9321b571 --- /dev/null +++ b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java @@ -0,0 +1,377 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2013.10.25 at 10:52:20 AM EST +// + + +package org.qcmg.unused.consensuscalls; + +import java.util.ArrayList; +import java.util.List; +public class ConsensusCallsRecord { + + protected String chr; + protected int position; + protected String alleleDiColor1; + protected String alleleDiColor2; + protected String reference; + protected String genotype; + protected double pValue; + protected List flag; + protected int coverage; + protected int nCounts1StAllele; + protected int nCountsReferenceAllele; + protected int nCountsNonReferenceAllele; + protected int refAvgQV; + protected int novelAvgQV; + protected int heterozygous; + protected String algorithm; + protected String algorithmName; + + /** + * Gets the value of the chr property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getChr() { + return chr; + } + + /** + * Sets the value of the chr property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setChr(String value) { + this.chr = value; + } + + /** + * Gets the value of the position property. + * + */ + public int getPosition() { + return position; + } + + /** + * Sets the value of the position property. + * + */ + public void setPosition(int value) { + this.position = value; + } + + /** + * Gets the value of the alleleDiColor1 property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getAlleleDiColor1() { + return alleleDiColor1; + } + + /** + * Sets the value of the alleleDiColor1 property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setAlleleDiColor1(String value) { + this.alleleDiColor1 = value; + } + + /** + * Gets the value of the alleleDiColor2 property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getAlleleDiColor2() { + return alleleDiColor2; + } + + /** + * Sets the value of the alleleDiColor2 property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setAlleleDiColor2(String value) { + this.alleleDiColor2 = value; + } + + /** + * Gets the value of the reference property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getReference() { + return reference; + } + + /** + * Sets the value of the reference property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setReference(String value) { + this.reference = value; + } + + /** + * Gets the value of the genotype property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getGenotype() { + return genotype; + } + + /** + * Sets the value of the genotype property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setGenotype(String value) { + this.genotype = value; + } + + /** + * Gets the value of the pValue property. + * + */ + public double getPValue() { + return pValue; + } + + /** + * Sets the value of the pValue property. + * + */ + public void setPValue(double value) { + this.pValue = value; + } + + /** + * Gets the value of the flag property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the flag property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getFlag().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link ConsensusCallsFlag } + * + * + */ + public List getFlag() { + if (flag == null) { + flag = new ArrayList(); + } + return this.flag; + } + + /** + * Gets the value of the coverage property. + * + */ + public int getCoverage() { + return coverage; + } + + /** + * Sets the value of the coverage property. + * + */ + public void setCoverage(int value) { + this.coverage = value; + } + + /** + * Gets the value of the nCounts1StAllele property. + * + */ + public int getNCounts1StAllele() { + return nCounts1StAllele; + } + + /** + * Sets the value of the nCounts1StAllele property. + * + */ + public void setNCounts1StAllele(int value) { + this.nCounts1StAllele = value; + } + + /** + * Gets the value of the nCountsReferenceAllele property. + * + */ + public int getNCountsReferenceAllele() { + return nCountsReferenceAllele; + } + + /** + * Sets the value of the nCountsReferenceAllele property. + * + */ + public void setNCountsReferenceAllele(int value) { + this.nCountsReferenceAllele = value; + } + + /** + * Gets the value of the nCountsNonReferenceAllele property. + * + */ + public int getNCountsNonReferenceAllele() { + return nCountsNonReferenceAllele; + } + + /** + * Sets the value of the nCountsNonReferenceAllele property. + * + */ + public void setNCountsNonReferenceAllele(int value) { + this.nCountsNonReferenceAllele = value; + } + + /** + * Gets the value of the refAvgQV property. + * + */ + public int getRefAvgQV() { + return refAvgQV; + } + + /** + * Sets the value of the refAvgQV property. + * + */ + public void setRefAvgQV(int value) { + this.refAvgQV = value; + } + + /** + * Gets the value of the novelAvgQV property. + * + */ + public int getNovelAvgQV() { + return novelAvgQV; + } + + /** + * Sets the value of the novelAvgQV property. + * + */ + public void setNovelAvgQV(int value) { + this.novelAvgQV = value; + } + + /** + * Gets the value of the heterozygous property. + * + */ + public int getHeterozygous() { + return heterozygous; + } + + /** + * Sets the value of the heterozygous property. + * + */ + public void setHeterozygous(int value) { + this.heterozygous = value; + } + + /** + * Gets the value of the algorithm property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getAlgorithm() { + return algorithm; + } + + /** + * Sets the value of the algorithm property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setAlgorithm(String value) { + this.algorithm = value; + } + + /** + * Gets the value of the algorithmName property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getAlgorithmName() { + return algorithmName; + } + + /** + * Sets the value of the algorithmName property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setAlgorithmName(String value) { + this.algorithmName = value; + } + +} diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java-- b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java-- new file mode 100644 index 000000000..c9321b571 --- /dev/null +++ b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java-- @@ -0,0 +1,377 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2013.10.25 at 10:52:20 AM EST +// + + +package org.qcmg.unused.consensuscalls; + +import java.util.ArrayList; +import java.util.List; +public class ConsensusCallsRecord { + + protected String chr; + protected int position; + protected String alleleDiColor1; + protected String alleleDiColor2; + protected String reference; + protected String genotype; + protected double pValue; + protected List flag; + protected int coverage; + protected int nCounts1StAllele; + protected int nCountsReferenceAllele; + protected int nCountsNonReferenceAllele; + protected int refAvgQV; + protected int novelAvgQV; + protected int heterozygous; + protected String algorithm; + protected String algorithmName; + + /** + * Gets the value of the chr property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getChr() { + return chr; + } + + /** + * Sets the value of the chr property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setChr(String value) { + this.chr = value; + } + + /** + * Gets the value of the position property. + * + */ + public int getPosition() { + return position; + } + + /** + * Sets the value of the position property. + * + */ + public void setPosition(int value) { + this.position = value; + } + + /** + * Gets the value of the alleleDiColor1 property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getAlleleDiColor1() { + return alleleDiColor1; + } + + /** + * Sets the value of the alleleDiColor1 property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setAlleleDiColor1(String value) { + this.alleleDiColor1 = value; + } + + /** + * Gets the value of the alleleDiColor2 property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getAlleleDiColor2() { + return alleleDiColor2; + } + + /** + * Sets the value of the alleleDiColor2 property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setAlleleDiColor2(String value) { + this.alleleDiColor2 = value; + } + + /** + * Gets the value of the reference property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getReference() { + return reference; + } + + /** + * Sets the value of the reference property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setReference(String value) { + this.reference = value; + } + + /** + * Gets the value of the genotype property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getGenotype() { + return genotype; + } + + /** + * Sets the value of the genotype property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setGenotype(String value) { + this.genotype = value; + } + + /** + * Gets the value of the pValue property. + * + */ + public double getPValue() { + return pValue; + } + + /** + * Sets the value of the pValue property. + * + */ + public void setPValue(double value) { + this.pValue = value; + } + + /** + * Gets the value of the flag property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the flag property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getFlag().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link ConsensusCallsFlag } + * + * + */ + public List getFlag() { + if (flag == null) { + flag = new ArrayList(); + } + return this.flag; + } + + /** + * Gets the value of the coverage property. + * + */ + public int getCoverage() { + return coverage; + } + + /** + * Sets the value of the coverage property. + * + */ + public void setCoverage(int value) { + this.coverage = value; + } + + /** + * Gets the value of the nCounts1StAllele property. + * + */ + public int getNCounts1StAllele() { + return nCounts1StAllele; + } + + /** + * Sets the value of the nCounts1StAllele property. + * + */ + public void setNCounts1StAllele(int value) { + this.nCounts1StAllele = value; + } + + /** + * Gets the value of the nCountsReferenceAllele property. + * + */ + public int getNCountsReferenceAllele() { + return nCountsReferenceAllele; + } + + /** + * Sets the value of the nCountsReferenceAllele property. + * + */ + public void setNCountsReferenceAllele(int value) { + this.nCountsReferenceAllele = value; + } + + /** + * Gets the value of the nCountsNonReferenceAllele property. + * + */ + public int getNCountsNonReferenceAllele() { + return nCountsNonReferenceAllele; + } + + /** + * Sets the value of the nCountsNonReferenceAllele property. + * + */ + public void setNCountsNonReferenceAllele(int value) { + this.nCountsNonReferenceAllele = value; + } + + /** + * Gets the value of the refAvgQV property. + * + */ + public int getRefAvgQV() { + return refAvgQV; + } + + /** + * Sets the value of the refAvgQV property. + * + */ + public void setRefAvgQV(int value) { + this.refAvgQV = value; + } + + /** + * Gets the value of the novelAvgQV property. + * + */ + public int getNovelAvgQV() { + return novelAvgQV; + } + + /** + * Sets the value of the novelAvgQV property. + * + */ + public void setNovelAvgQV(int value) { + this.novelAvgQV = value; + } + + /** + * Gets the value of the heterozygous property. + * + */ + public int getHeterozygous() { + return heterozygous; + } + + /** + * Sets the value of the heterozygous property. + * + */ + public void setHeterozygous(int value) { + this.heterozygous = value; + } + + /** + * Gets the value of the algorithm property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getAlgorithm() { + return algorithm; + } + + /** + * Sets the value of the algorithm property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setAlgorithm(String value) { + this.algorithm = value; + } + + /** + * Gets the value of the algorithmName property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getAlgorithmName() { + return algorithmName; + } + + /** + * Sets the value of the algorithmName property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setAlgorithmName(String value) { + this.algorithmName = value; + } + +} diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java new file mode 100644 index 000000000..7695b6486 --- /dev/null +++ b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java @@ -0,0 +1,68 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.consensuscalls; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +public final class ConsensusCallsSerializer { + private static final Pattern tabbedPattern = Pattern.compile("[\\t]+"); + private static final Pattern commaPattern = Pattern.compile("[,]+"); + + public static ConsensusCallsRecord nextRecord(final BufferedReader reader) + throws Exception, IOException { + ConsensusCallsRecord result = null; + try { + String line = reader.readLine(); + if (null != line) { + result = parseRecord(line); + } + } catch (IOException e) { + throw e; + } catch (Exception e) { + throw e; + } + return result; + } + + static ConsensusCallsRecord parseRecord(final String line) throws Exception { + String[] params = tabbedPattern.split(line); + if (17 != params.length) { + throw new Exception("Bad Consensus Calls format"); + } + ConsensusCallsRecord result = new ConsensusCallsRecord(); + result.setChr(params[0]); + result.setPosition(Integer.parseInt(params[1])); + result.setAlleleDiColor1(params[2]); + result.setAlleleDiColor2(params[3]); + result.setReference(params[4]); + result.setGenotype(params[5]); + result.setPValue(Double.parseDouble(params[6])); + parseFlags(result.getFlag(), params[7]); + result.setCoverage(Integer.parseInt(params[8])); + result.setNCountsNonReferenceAllele(Integer.parseInt(params[9])); + result.setNCountsReferenceAllele(Integer.parseInt(params[10])); + result.setNCountsNonReferenceAllele(Integer.parseInt(params[11])); + result.setRefAvgQV(Integer.parseInt(params[12])); + result.setNovelAvgQV(Integer.parseInt(params[13])); + result.setHeterozygous(Integer.parseInt(params[14])); + result.setAlgorithm(params[15]); + result.setAlgorithmName(params[16]); + return result; + } + + public static void parseFlags(final List list, final String value) throws Exception { + String[] params = commaPattern.split(value); + if (1 > params.length) { + throw new Exception("Bad Consensus Calls Flag format"); + } + for (String param : params) { + list.add(ConsensusCallsFlag.fromValue(param)); + } + } +} diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java-- b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java-- new file mode 100644 index 000000000..7695b6486 --- /dev/null +++ b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java-- @@ -0,0 +1,68 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.consensuscalls; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +public final class ConsensusCallsSerializer { + private static final Pattern tabbedPattern = Pattern.compile("[\\t]+"); + private static final Pattern commaPattern = Pattern.compile("[,]+"); + + public static ConsensusCallsRecord nextRecord(final BufferedReader reader) + throws Exception, IOException { + ConsensusCallsRecord result = null; + try { + String line = reader.readLine(); + if (null != line) { + result = parseRecord(line); + } + } catch (IOException e) { + throw e; + } catch (Exception e) { + throw e; + } + return result; + } + + static ConsensusCallsRecord parseRecord(final String line) throws Exception { + String[] params = tabbedPattern.split(line); + if (17 != params.length) { + throw new Exception("Bad Consensus Calls format"); + } + ConsensusCallsRecord result = new ConsensusCallsRecord(); + result.setChr(params[0]); + result.setPosition(Integer.parseInt(params[1])); + result.setAlleleDiColor1(params[2]); + result.setAlleleDiColor2(params[3]); + result.setReference(params[4]); + result.setGenotype(params[5]); + result.setPValue(Double.parseDouble(params[6])); + parseFlags(result.getFlag(), params[7]); + result.setCoverage(Integer.parseInt(params[8])); + result.setNCountsNonReferenceAllele(Integer.parseInt(params[9])); + result.setNCountsReferenceAllele(Integer.parseInt(params[10])); + result.setNCountsNonReferenceAllele(Integer.parseInt(params[11])); + result.setRefAvgQV(Integer.parseInt(params[12])); + result.setNovelAvgQV(Integer.parseInt(params[13])); + result.setHeterozygous(Integer.parseInt(params[14])); + result.setAlgorithm(params[15]); + result.setAlgorithmName(params[16]); + return result; + } + + public static void parseFlags(final List list, final String value) throws Exception { + String[] params = commaPattern.split(value); + if (1 > params.length) { + throw new Exception("Bad Consensus Calls Flag format"); + } + for (String param : params) { + list.add(ConsensusCallsFlag.fromValue(param)); + } + } +} diff --git a/qio/src/org/qcmg/unused/exception/RecordIteratorException.java b/qio/src/org/qcmg/unused/exception/RecordIteratorException.java new file mode 100644 index 000000000..d2df8afe7 --- /dev/null +++ b/qio/src/org/qcmg/unused/exception/RecordIteratorException.java @@ -0,0 +1,17 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.exception; + +public class RecordIteratorException extends RuntimeException { + + private static final long serialVersionUID = 7963940971937212428L; + + public RecordIteratorException() {} // default constructor + public RecordIteratorException(Exception e) { + super(e.getMessage(), e); + } + public RecordIteratorException(String message, Exception e) { + super(message, e); + } +} diff --git a/qio/src/org/qcmg/unused/exception/RecordIteratorException.java-- b/qio/src/org/qcmg/unused/exception/RecordIteratorException.java-- new file mode 100644 index 000000000..d2df8afe7 --- /dev/null +++ b/qio/src/org/qcmg/unused/exception/RecordIteratorException.java-- @@ -0,0 +1,17 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.exception; + +public class RecordIteratorException extends RuntimeException { + + private static final long serialVersionUID = 7963940971937212428L; + + public RecordIteratorException() {} // default constructor + public RecordIteratorException(Exception e) { + super(e.getMessage(), e); + } + public RecordIteratorException(String message, Exception e) { + super(message, e); + } +} diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java new file mode 100644 index 000000000..3ec52a69c --- /dev/null +++ b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java @@ -0,0 +1,38 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.genesymbol; + +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +public final class GeneSymbolFileReader implements Closeable, Iterable { + private final File file; + private final InputStream inputStream; + + public GeneSymbolFileReader(final File file) throws IOException { + this.file = file; + FileInputStream fileStream = new FileInputStream(file); + inputStream = fileStream; + } + + public Iterator iterator() { + return getRecordIterator(); + } + + public GeneSymbolRecordIterator getRecordIterator() { + return new GeneSymbolRecordIterator(inputStream); + } + + public void close() throws IOException { + inputStream.close(); + } + + public File getFile() { + return file; + } +} diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java-- b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java-- new file mode 100644 index 000000000..3ec52a69c --- /dev/null +++ b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java-- @@ -0,0 +1,38 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.genesymbol; + +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +public final class GeneSymbolFileReader implements Closeable, Iterable { + private final File file; + private final InputStream inputStream; + + public GeneSymbolFileReader(final File file) throws IOException { + this.file = file; + FileInputStream fileStream = new FileInputStream(file); + inputStream = fileStream; + } + + public Iterator iterator() { + return getRecordIterator(); + } + + public GeneSymbolRecordIterator getRecordIterator() { + return new GeneSymbolRecordIterator(inputStream); + } + + public void close() throws IOException { + inputStream.close(); + } + + public File getFile() { + return file; + } +} diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java new file mode 100644 index 000000000..efc505155 --- /dev/null +++ b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java @@ -0,0 +1,32 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.genesymbol; + + +public class GeneSymbolRecord { + + private String geneId; + private String transcriptId; + private String symbol; + + public String getGeneId() { + return geneId; + } + public void setGeneId(String geneId) { + this.geneId = geneId; + } + public String getTranscriptId() { + return transcriptId; + } + public void setTranscriptId(String transcriptId) { + this.transcriptId = transcriptId; + } + public String getSymbol() { + return symbol; + } + public void setSymbol(String symbol) { + this.symbol = symbol; + } + +} diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java-- b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java-- new file mode 100644 index 000000000..efc505155 --- /dev/null +++ b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java-- @@ -0,0 +1,32 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.genesymbol; + + +public class GeneSymbolRecord { + + private String geneId; + private String transcriptId; + private String symbol; + + public String getGeneId() { + return geneId; + } + public void setGeneId(String geneId) { + this.geneId = geneId; + } + public String getTranscriptId() { + return transcriptId; + } + public void setTranscriptId(String transcriptId) { + this.transcriptId = transcriptId; + } + public String getSymbol() { + return symbol; + } + public void setSymbol(String symbol) { + this.symbol = symbol; + } + +} diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java new file mode 100644 index 000000000..1ad4c2505 --- /dev/null +++ b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java @@ -0,0 +1,49 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.genesymbol; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.NoSuchElementException; + +public final class GeneSymbolRecordIterator implements Iterator { + private final BufferedReader reader; + private GeneSymbolRecord next; + + public GeneSymbolRecordIterator(final InputStream stream) { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public GeneSymbolRecord next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + GeneSymbolRecord result = next; + readNext(); + return result; + } + + private void readNext() { + try { + next = GeneSymbolSerializer.nextRecord(reader); + } catch (NoSuchElementException e) { + throw e; + } catch (Exception ex) { + throw new RuntimeException(ex.getMessage(), ex); + } + } + + @SuppressWarnings("unchecked") + public void remove() { + throw new UnsupportedOperationException(); + } +} diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java-- b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java-- new file mode 100644 index 000000000..1ad4c2505 --- /dev/null +++ b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java-- @@ -0,0 +1,49 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.genesymbol; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.NoSuchElementException; + +public final class GeneSymbolRecordIterator implements Iterator { + private final BufferedReader reader; + private GeneSymbolRecord next; + + public GeneSymbolRecordIterator(final InputStream stream) { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public GeneSymbolRecord next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + GeneSymbolRecord result = next; + readNext(); + return result; + } + + private void readNext() { + try { + next = GeneSymbolSerializer.nextRecord(reader); + } catch (NoSuchElementException e) { + throw e; + } catch (Exception ex) { + throw new RuntimeException(ex.getMessage(), ex); + } + } + + @SuppressWarnings("unchecked") + public void remove() { + throw new UnsupportedOperationException(); + } +} diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java new file mode 100644 index 000000000..d117b5abe --- /dev/null +++ b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java @@ -0,0 +1,44 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.genesymbol; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.regex.Pattern; + +public final class GeneSymbolSerializer { + private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + private static final String DEFAULT_HEADER_PREFIX = "#"; + + private static String nextNonheaderLine(final BufferedReader reader) + throws IOException { + String line = reader.readLine(); + while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { + line = reader.readLine(); + } + return line; + } + + public static GeneSymbolRecord nextRecord(final BufferedReader reader) + throws IOException , Exception { + GeneSymbolRecord result = null; + String line = nextNonheaderLine(reader); + if (null != line) { + result = parseRecord(line); + } + return result; + } + + static GeneSymbolRecord parseRecord(final String line) throws Exception { + String[] params = tabbedPattern.split(line, -1); + if (3 > params.length) { + throw new Exception("Bad Gene Symbol format. Insufficient columns: '" + line + "'"); + } + GeneSymbolRecord result = new GeneSymbolRecord(); + result.setGeneId(params[0]); + result.setTranscriptId(params[1]); + result.setSymbol(params[2]); + return result; + } +} diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java-- b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java-- new file mode 100644 index 000000000..d117b5abe --- /dev/null +++ b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java-- @@ -0,0 +1,44 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.genesymbol; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.regex.Pattern; + +public final class GeneSymbolSerializer { + private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + private static final String DEFAULT_HEADER_PREFIX = "#"; + + private static String nextNonheaderLine(final BufferedReader reader) + throws IOException { + String line = reader.readLine(); + while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { + line = reader.readLine(); + } + return line; + } + + public static GeneSymbolRecord nextRecord(final BufferedReader reader) + throws IOException , Exception { + GeneSymbolRecord result = null; + String line = nextNonheaderLine(reader); + if (null != line) { + result = parseRecord(line); + } + return result; + } + + static GeneSymbolRecord parseRecord(final String line) throws Exception { + String[] params = tabbedPattern.split(line, -1); + if (3 > params.length) { + throw new Exception("Bad Gene Symbol format. Insufficient columns: '" + line + "'"); + } + GeneSymbolRecord result = new GeneSymbolRecord(); + result.setGeneId(params[0]); + result.setTranscriptId(params[1]); + result.setSymbol(params[2]); + return result; + } +} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java b/qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java new file mode 100644 index 000000000..5ef126ee3 --- /dev/null +++ b/qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java @@ -0,0 +1,21 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.illumina; + +import java.io.File; +import java.io.IOException; + +import org.qcmg.unused.reader.AbstractReader; + +public final class IlluminaFileReader extends AbstractReader { + + public IlluminaFileReader(final File file) throws IOException { + super(file); + } + + public IlluminaRecordIterator getRecordIterator() throws Exception { + return new IlluminaRecordIterator(inputStream); + } + +} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java-- b/qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java-- new file mode 100644 index 000000000..014fab77d --- /dev/null +++ b/qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java-- @@ -0,0 +1,21 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.illumina; + +import java.io.File; +import java.io.IOException; + +import org.qcmg.reader.AbstractReader; + +public final class IlluminaFileReader extends AbstractReader { + + public IlluminaFileReader(final File file) throws IOException { + super(file); + } + + public IlluminaRecordIterator getRecordIterator() throws Exception { + return new IlluminaRecordIterator(inputStream); + } + +} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaRecord.java b/qio/src/org/qcmg/unused/illumina/IlluminaRecord.java new file mode 100644 index 000000000..eca4e9837 --- /dev/null +++ b/qio/src/org/qcmg/unused/illumina/IlluminaRecord.java @@ -0,0 +1,262 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.illumina; + +import org.qcmg.common.string.StringUtils; +import org.qcmg.qmule.record.Record; + +public class IlluminaRecord implements Record { + + private String chr; + private int start; + private final String strand; + private String snpId; + + //TODO do we need this field? + private float GCScore; + + private char firstAllele; + private char secondAllele; + + private final char firstAlleleForward; + private final char secondAlleleForward; + + private final char firstAlleleCall; + private final char secondAlleleCall; + + //TODO do we need this field? + private boolean hom; + private boolean isSnp; + private String snp; + + private final float logRRatio; + private final float bAlleleFreq; + + private final int rawX; + private final int rawY; + + /** + * Constructor that takes in a String array, retrieving pertinent fields from the array to populate the record + * + * @param rawIlluminaData String[] representing a line in the raw Illumina data file + */ + public IlluminaRecord(String [] rawIlluminaData) { + // chromosome and position defined in the raw Illumina data file relate to an old version + // of the genome (hg18), so instead, we use the dbSNP id to get the more recent + //(hg19) chromosome and position details from the dbSNP file at a later date + int length = rawIlluminaData.length; + snpId = rawIlluminaData[0]; + GCScore = Float.parseFloat(rawIlluminaData[4]); + firstAlleleForward = rawIlluminaData[10].charAt(0); + secondAlleleForward = rawIlluminaData[11].charAt(0); + firstAllele = rawIlluminaData[12].charAt(0); + secondAllele = rawIlluminaData[13].charAt(0); + setHom(rawIlluminaData[14].equals(rawIlluminaData[15])); + chr = rawIlluminaData[16]; + start = Integer.parseInt(rawIlluminaData[17]); + snp = rawIlluminaData[20]; + rawX = Integer.parseInt(rawIlluminaData[length - 4]); + rawY = Integer.parseInt(rawIlluminaData[length - 3]); + bAlleleFreq = Float.parseFloat(rawIlluminaData[length - 2]); + String logRRatioString = rawIlluminaData[length - 1]; + if (StringUtils.isNullOrEmpty(logRRatioString)) + logRRatioString = "NaN"; + logRRatio = Float.parseFloat(logRRatioString); + firstAlleleCall = rawIlluminaData[14].charAt(0); + secondAlleleCall = rawIlluminaData[15].charAt(0); + strand = rawIlluminaData[22]; // use customer strand rather than illumina strand +// strand = rawIlluminaData[21]; + } + + + public String getChr() { + return chr; + } + public void setChr(String chr) { + this.chr = chr; + } + public int getStart() { + return start; + } + public void setStart(int start) { + this.start = start; + } + public String getSnpId() { + return snpId; + } + public void setSnpId(String snpId) { + this.snpId = snpId; + } + public float getGCScore() { + return GCScore; + } + public void setGCScore(float GCScore) { + this.GCScore = GCScore; + } + public char getFirstAllele() { + return firstAllele; + } + public void setFirstAllele(char firstAllele) { + this.firstAllele = firstAllele; + } + public char getSecondAllele() { + return secondAllele; + } + public void setSecondAllele(char secondAllele) { + this.secondAllele = secondAllele; + } + public String getSnp() { + return snp; + } + public void setSnp(String snp) { + this.snp = snp; + } + public void setHom(boolean hom) { + this.hom = hom; + } + public boolean isHom() { + return hom; + } + + public void setSnp(boolean isSnp) { + this.isSnp = isSnp; + } + + public boolean isSnp() { + return isSnp; + } + + public float getLogRRatio() { + return logRRatio; + } + + + public float getbAlleleFreq() { + return bAlleleFreq; + } + + + public char getFirstAlleleCall() { + return firstAlleleCall; + } + + + public char getSecondAlleleCall() { + return secondAlleleCall; + } + + public int getRawX() { + return rawX; + } + + public int getRawY() { + return rawY; + } + + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + Float.floatToIntBits(GCScore); + result = prime * result + Float.floatToIntBits(bAlleleFreq); + result = prime * result + ((chr == null) ? 0 : chr.hashCode()); + result = prime * result + firstAllele; + result = prime * result + firstAlleleCall; + result = prime * result + (hom ? 1231 : 1237); + result = prime * result + (isSnp ? 1231 : 1237); + result = prime * result + Float.floatToIntBits(logRRatio); + result = prime * result + rawX; + result = prime * result + rawY; + result = prime * result + secondAllele; + result = prime * result + secondAlleleCall; + result = prime * result + ((snp == null) ? 0 : snp.hashCode()); + result = prime * result + ((snpId == null) ? 0 : snpId.hashCode()); + result = prime * result + start; + return result; + } + + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + IlluminaRecord other = (IlluminaRecord) obj; + if (Float.floatToIntBits(GCScore) != Float + .floatToIntBits(other.GCScore)) + return false; + if (Float.floatToIntBits(bAlleleFreq) != Float + .floatToIntBits(other.bAlleleFreq)) + return false; + if (chr == null) { + if (other.chr != null) + return false; + } else if (!chr.equals(other.chr)) + return false; + if (firstAllele != other.firstAllele) + return false; + if (firstAlleleCall != other.firstAlleleCall) + return false; + if (hom != other.hom) + return false; + if (isSnp != other.isSnp) + return false; + if (Float.floatToIntBits(logRRatio) != Float + .floatToIntBits(other.logRRatio)) + return false; + if (rawX != other.rawX) + return false; + if (rawY != other.rawY) + return false; + if (secondAllele != other.secondAllele) + return false; + if (secondAlleleCall != other.secondAlleleCall) + return false; + if (snp == null) { + if (other.snp != null) + return false; + } else if (!snp.equals(other.snp)) + return false; + if (snpId == null) { + if (other.snpId != null) + return false; + } else if (!snpId.equals(other.snpId)) + return false; + if (start != other.start) + return false; + return true; + } + + + @Override + public String toString() { + return "IlluminaRecord [GCScore=" + GCScore + ", bAlleleFreq=" + + bAlleleFreq + ", chr=" + chr + ", firstAllele=" + firstAllele + + ", firstAlleleCall=" + firstAlleleCall + ", hom=" + hom + + ", isSnp=" + isSnp + ", logRRatio=" + logRRatio + ", rawX=" + + rawX + ", rawY=" + rawY + ", secondAllele=" + secondAllele + + ", secondAlleleCall=" + secondAlleleCall + ", snp=" + snp + + ", snpId=" + snpId + ", start=" + start + "]"; + } + + + public String getStrand() { + return strand; + } + + + public char getFirstAlleleForward() { + return firstAlleleForward; + } + + public char getSecondAlleleForward() { + return secondAlleleForward; + } + + +} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaRecord.java-- b/qio/src/org/qcmg/unused/illumina/IlluminaRecord.java-- new file mode 100644 index 000000000..ec203b1b2 --- /dev/null +++ b/qio/src/org/qcmg/unused/illumina/IlluminaRecord.java-- @@ -0,0 +1,262 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.illumina; + +import org.qcmg.common.string.StringUtils; +import org.qcmg.unused.record.Record; + +public class IlluminaRecord implements Record { + + private String chr; + private int start; + private final String strand; + private String snpId; + + //TODO do we need this field? + private float GCScore; + + private char firstAllele; + private char secondAllele; + + private final char firstAlleleForward; + private final char secondAlleleForward; + + private final char firstAlleleCall; + private final char secondAlleleCall; + + //TODO do we need this field? + private boolean hom; + private boolean isSnp; + private String snp; + + private final float logRRatio; + private final float bAlleleFreq; + + private final int rawX; + private final int rawY; + + /** + * Constructor that takes in a String array, retrieving pertinent fields from the array to populate the record + * + * @param rawIlluminaData String[] representing a line in the raw Illumina data file + */ + public IlluminaRecord(String [] rawIlluminaData) { + // chromosome and position defined in the raw Illumina data file relate to an old version + // of the genome (hg18), so instead, we use the dbSNP id to get the more recent + //(hg19) chromosome and position details from the dbSNP file at a later date + int length = rawIlluminaData.length; + snpId = rawIlluminaData[0]; + GCScore = Float.parseFloat(rawIlluminaData[4]); + firstAlleleForward = rawIlluminaData[10].charAt(0); + secondAlleleForward = rawIlluminaData[11].charAt(0); + firstAllele = rawIlluminaData[12].charAt(0); + secondAllele = rawIlluminaData[13].charAt(0); + setHom(rawIlluminaData[14].equals(rawIlluminaData[15])); + chr = rawIlluminaData[16]; + start = Integer.parseInt(rawIlluminaData[17]); + snp = rawIlluminaData[20]; + rawX = Integer.parseInt(rawIlluminaData[length - 4]); + rawY = Integer.parseInt(rawIlluminaData[length - 3]); + bAlleleFreq = Float.parseFloat(rawIlluminaData[length - 2]); + String logRRatioString = rawIlluminaData[length - 1]; + if (StringUtils.isNullOrEmpty(logRRatioString)) + logRRatioString = "NaN"; + logRRatio = Float.parseFloat(logRRatioString); + firstAlleleCall = rawIlluminaData[14].charAt(0); + secondAlleleCall = rawIlluminaData[15].charAt(0); + strand = rawIlluminaData[22]; // use customer strand rather than illumina strand +// strand = rawIlluminaData[21]; + } + + + public String getChr() { + return chr; + } + public void setChr(String chr) { + this.chr = chr; + } + public int getStart() { + return start; + } + public void setStart(int start) { + this.start = start; + } + public String getSnpId() { + return snpId; + } + public void setSnpId(String snpId) { + this.snpId = snpId; + } + public float getGCScore() { + return GCScore; + } + public void setGCScore(float GCScore) { + this.GCScore = GCScore; + } + public char getFirstAllele() { + return firstAllele; + } + public void setFirstAllele(char firstAllele) { + this.firstAllele = firstAllele; + } + public char getSecondAllele() { + return secondAllele; + } + public void setSecondAllele(char secondAllele) { + this.secondAllele = secondAllele; + } + public String getSnp() { + return snp; + } + public void setSnp(String snp) { + this.snp = snp; + } + public void setHom(boolean hom) { + this.hom = hom; + } + public boolean isHom() { + return hom; + } + + public void setSnp(boolean isSnp) { + this.isSnp = isSnp; + } + + public boolean isSnp() { + return isSnp; + } + + public float getLogRRatio() { + return logRRatio; + } + + + public float getbAlleleFreq() { + return bAlleleFreq; + } + + + public char getFirstAlleleCall() { + return firstAlleleCall; + } + + + public char getSecondAlleleCall() { + return secondAlleleCall; + } + + public int getRawX() { + return rawX; + } + + public int getRawY() { + return rawY; + } + + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + Float.floatToIntBits(GCScore); + result = prime * result + Float.floatToIntBits(bAlleleFreq); + result = prime * result + ((chr == null) ? 0 : chr.hashCode()); + result = prime * result + firstAllele; + result = prime * result + firstAlleleCall; + result = prime * result + (hom ? 1231 : 1237); + result = prime * result + (isSnp ? 1231 : 1237); + result = prime * result + Float.floatToIntBits(logRRatio); + result = prime * result + rawX; + result = prime * result + rawY; + result = prime * result + secondAllele; + result = prime * result + secondAlleleCall; + result = prime * result + ((snp == null) ? 0 : snp.hashCode()); + result = prime * result + ((snpId == null) ? 0 : snpId.hashCode()); + result = prime * result + start; + return result; + } + + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + IlluminaRecord other = (IlluminaRecord) obj; + if (Float.floatToIntBits(GCScore) != Float + .floatToIntBits(other.GCScore)) + return false; + if (Float.floatToIntBits(bAlleleFreq) != Float + .floatToIntBits(other.bAlleleFreq)) + return false; + if (chr == null) { + if (other.chr != null) + return false; + } else if (!chr.equals(other.chr)) + return false; + if (firstAllele != other.firstAllele) + return false; + if (firstAlleleCall != other.firstAlleleCall) + return false; + if (hom != other.hom) + return false; + if (isSnp != other.isSnp) + return false; + if (Float.floatToIntBits(logRRatio) != Float + .floatToIntBits(other.logRRatio)) + return false; + if (rawX != other.rawX) + return false; + if (rawY != other.rawY) + return false; + if (secondAllele != other.secondAllele) + return false; + if (secondAlleleCall != other.secondAlleleCall) + return false; + if (snp == null) { + if (other.snp != null) + return false; + } else if (!snp.equals(other.snp)) + return false; + if (snpId == null) { + if (other.snpId != null) + return false; + } else if (!snpId.equals(other.snpId)) + return false; + if (start != other.start) + return false; + return true; + } + + + @Override + public String toString() { + return "IlluminaRecord [GCScore=" + GCScore + ", bAlleleFreq=" + + bAlleleFreq + ", chr=" + chr + ", firstAllele=" + firstAllele + + ", firstAlleleCall=" + firstAlleleCall + ", hom=" + hom + + ", isSnp=" + isSnp + ", logRRatio=" + logRRatio + ", rawX=" + + rawX + ", rawY=" + rawY + ", secondAllele=" + secondAllele + + ", secondAlleleCall=" + secondAlleleCall + ", snp=" + snp + + ", snpId=" + snpId + ", start=" + start + "]"; + } + + + public String getStrand() { + return strand; + } + + + public char getFirstAlleleForward() { + return firstAlleleForward; + } + + public char getSecondAlleleForward() { + return secondAlleleForward; + } + + +} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java b/qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java new file mode 100644 index 000000000..872b77b99 --- /dev/null +++ b/qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java @@ -0,0 +1,21 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.illumina; + +import java.io.InputStream; + +import org.qcmg.qmule.record.AbstractRecordIterator; + +public class IlluminaRecordIterator extends AbstractRecordIterator { + + public IlluminaRecordIterator(InputStream stream) throws Exception { + super(stream); + } + + @Override + protected void readNext() throws Exception { + next = IlluminaSerializer.nextRecord(reader); + } + +} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java-- b/qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java-- new file mode 100644 index 000000000..6f39a9463 --- /dev/null +++ b/qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java-- @@ -0,0 +1,21 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.illumina; + +import java.io.InputStream; + +import org.qcmg.unused.record.AbstractRecordIterator; + +public class IlluminaRecordIterator extends AbstractRecordIterator { + + public IlluminaRecordIterator(InputStream stream) throws Exception { + super(stream); + } + + @Override + protected void readNext() throws Exception { + next = IlluminaSerializer.nextRecord(reader); + } + +} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java b/qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java new file mode 100644 index 000000000..2bef61139 --- /dev/null +++ b/qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java @@ -0,0 +1,73 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.illumina; + +import java.io.BufferedReader; +import java.io.IOException; + +import org.qcmg.common.util.TabTokenizer; + +public final class IlluminaSerializer { + private static final String HEADER_LINE = "[Header]"; + private static final String DATA_LINE = "[Data]"; + + private static String nextNonheaderLine(final BufferedReader reader) + throws IOException { + // header lines are as follows: + /* +[Header] +GSGT Version 1.8.4 +Processing Date 8/12/2011 8:41 PM +Content HumanOmni1-Quad_v1-0_H.bpm +Num SNPs 1134514 +Total SNPs 1134514 +Num Samples 259 +Total Samples 260 +File 77 of 259 +[Data] +SNP Name Sample ID Allele1 - Top Allele2 - Top GC Score Sample Name Sample Group Sample Index SNP Index SNP Aux Allele1 - Forward Allele2 - Forward Allele1 - Design Allele2 - Design Allele1 - AB Allele2 - AB Chr Position GT Score Cluster Sep SNP ILMN Strand Customer Strand Top Genomic Sequence Theta R X Y +X Raw Y Raw B Allele Freq Log R Ratio + */ + + String line = reader.readLine(); + if (null != line && line.startsWith(HEADER_LINE)) { + + // ignore header lines until we hit [DATA] + line = reader.readLine(); + while (null != line && ! line.startsWith(DATA_LINE)) { + line = reader.readLine(); + } + // next line is still header.... + line = reader.readLine(); + line = reader.readLine(); + } + return line; + } + + public static IlluminaRecord nextRecord(final BufferedReader reader) throws Exception { + IlluminaRecord result = null; + + String data = nextNonheaderLine(reader); + if (null != data ) { + result = parseRecord(data); + } + + return result; + } + + static String[] parseData(final String value) throws Exception { + String[] dataArray = TabTokenizer.tokenize(value); + + // raw Illumina data has 32 fields... and the first one is an integer + if (dataArray.length != 32) throw new Exception("Bad Illumina data format - expecting 32 fields but saw " + dataArray.length); + + return dataArray; + } + + static IlluminaRecord parseRecord(final String record) + throws Exception { + return new IlluminaRecord(parseData(record)); + } + +} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java-- b/qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java-- new file mode 100644 index 000000000..2bef61139 --- /dev/null +++ b/qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java-- @@ -0,0 +1,73 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.illumina; + +import java.io.BufferedReader; +import java.io.IOException; + +import org.qcmg.common.util.TabTokenizer; + +public final class IlluminaSerializer { + private static final String HEADER_LINE = "[Header]"; + private static final String DATA_LINE = "[Data]"; + + private static String nextNonheaderLine(final BufferedReader reader) + throws IOException { + // header lines are as follows: + /* +[Header] +GSGT Version 1.8.4 +Processing Date 8/12/2011 8:41 PM +Content HumanOmni1-Quad_v1-0_H.bpm +Num SNPs 1134514 +Total SNPs 1134514 +Num Samples 259 +Total Samples 260 +File 77 of 259 +[Data] +SNP Name Sample ID Allele1 - Top Allele2 - Top GC Score Sample Name Sample Group Sample Index SNP Index SNP Aux Allele1 - Forward Allele2 - Forward Allele1 - Design Allele2 - Design Allele1 - AB Allele2 - AB Chr Position GT Score Cluster Sep SNP ILMN Strand Customer Strand Top Genomic Sequence Theta R X Y +X Raw Y Raw B Allele Freq Log R Ratio + */ + + String line = reader.readLine(); + if (null != line && line.startsWith(HEADER_LINE)) { + + // ignore header lines until we hit [DATA] + line = reader.readLine(); + while (null != line && ! line.startsWith(DATA_LINE)) { + line = reader.readLine(); + } + // next line is still header.... + line = reader.readLine(); + line = reader.readLine(); + } + return line; + } + + public static IlluminaRecord nextRecord(final BufferedReader reader) throws Exception { + IlluminaRecord result = null; + + String data = nextNonheaderLine(reader); + if (null != data ) { + result = parseRecord(data); + } + + return result; + } + + static String[] parseData(final String value) throws Exception { + String[] dataArray = TabTokenizer.tokenize(value); + + // raw Illumina data has 32 fields... and the first one is an integer + if (dataArray.length != 32) throw new Exception("Bad Illumina data format - expecting 32 fields but saw " + dataArray.length); + + return dataArray; + } + + static IlluminaRecord parseRecord(final String record) + throws Exception { + return new IlluminaRecord(parseData(record)); + } + +} diff --git a/qio/src/org/qcmg/unused/maf/MAFFileReader.java b/qio/src/org/qcmg/unused/maf/MAFFileReader.java new file mode 100644 index 000000000..9d98df780 --- /dev/null +++ b/qio/src/org/qcmg/unused/maf/MAFFileReader.java @@ -0,0 +1,42 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.maf; + +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +import org.qcmg.common.maf.MAFRecord; + +public final class MAFFileReader implements Closeable, Iterable { + private final File file; + private final InputStream inputStream; + + public MAFFileReader(final File file) throws IOException { + this.file = file; + FileInputStream fileStream = new FileInputStream(file); + inputStream = fileStream; + } + + public Iterator iterator() { + return getRecordIterator(); + } + + public MAFRecordIterator getRecordIterator() { + return new MAFRecordIterator(inputStream); + } + + public void close() throws IOException { + } + + public File getFile() { + return file; + } +} diff --git a/qio/src/org/qcmg/unused/maf/MAFFileReader.java-- b/qio/src/org/qcmg/unused/maf/MAFFileReader.java-- new file mode 100644 index 000000000..9d98df780 --- /dev/null +++ b/qio/src/org/qcmg/unused/maf/MAFFileReader.java-- @@ -0,0 +1,42 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.maf; + +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +import org.qcmg.common.maf.MAFRecord; + +public final class MAFFileReader implements Closeable, Iterable { + private final File file; + private final InputStream inputStream; + + public MAFFileReader(final File file) throws IOException { + this.file = file; + FileInputStream fileStream = new FileInputStream(file); + inputStream = fileStream; + } + + public Iterator iterator() { + return getRecordIterator(); + } + + public MAFRecordIterator getRecordIterator() { + return new MAFRecordIterator(inputStream); + } + + public void close() throws IOException { + } + + public File getFile() { + return file; + } +} diff --git a/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java b/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java new file mode 100644 index 000000000..760780922 --- /dev/null +++ b/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java @@ -0,0 +1,54 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.maf; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.NoSuchElementException; + +import org.qcmg.common.maf.MAFRecord; + +public final class MAFRecordIterator implements Iterator { + private final BufferedReader reader; + private MAFRecord next; + + public MAFRecordIterator(final InputStream stream) { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public MAFRecord next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + MAFRecord result = next; + readNext(); + return result; + } + + private void readNext() { + try { + next = MAFSerializer.nextRecord(reader); + } catch (NoSuchElementException e) { + throw e; + } catch (Exception ex) { + throw new RuntimeException(ex.getMessage(), ex); + } + } + + @SuppressWarnings("unchecked") + public void remove() { + throw new UnsupportedOperationException(); + } +} diff --git a/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java-- b/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java-- new file mode 100644 index 000000000..760780922 --- /dev/null +++ b/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java-- @@ -0,0 +1,54 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.maf; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.NoSuchElementException; + +import org.qcmg.common.maf.MAFRecord; + +public final class MAFRecordIterator implements Iterator { + private final BufferedReader reader; + private MAFRecord next; + + public MAFRecordIterator(final InputStream stream) { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public MAFRecord next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + MAFRecord result = next; + readNext(); + return result; + } + + private void readNext() { + try { + next = MAFSerializer.nextRecord(reader); + } catch (NoSuchElementException e) { + throw e; + } catch (Exception ex) { + throw new RuntimeException(ex.getMessage(), ex); + } + } + + @SuppressWarnings("unchecked") + public void remove() { + throw new UnsupportedOperationException(); + } +} diff --git a/qio/src/org/qcmg/unused/maf/MAFSerializer.java b/qio/src/org/qcmg/unused/maf/MAFSerializer.java new file mode 100644 index 000000000..bfd4b7d0a --- /dev/null +++ b/qio/src/org/qcmg/unused/maf/MAFSerializer.java @@ -0,0 +1,68 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.maf; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.regex.Pattern; + +import org.qcmg.common.maf.MAFRecord; + +public final class MAFSerializer { + private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + private static final String DEFAULT_HEADER_PREFIX = "#"; + + private static String nextNonheaderLine(final BufferedReader reader) + throws IOException { + String line = reader.readLine(); + while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { + line = reader.readLine(); + } + return line; + } + + public static MAFRecord nextRecord(final BufferedReader reader) + throws IOException , Exception { + MAFRecord result = null; + String line = nextNonheaderLine(reader); + if (null != line) { + result = parseRecord(line); + } + return result; + } + + static MAFRecord parseRecord(final String line) throws Exception { + String[] params = tabbedPattern.split(line, -1); + if (8 > params.length) { + throw new Exception("Bad VCF format. Insufficient columns: '" + line + "'"); + } + MAFRecord result = new MAFRecord(); +// result.setChromosome(params[0]); +// result.setPosition(Integer.parseInt(params[1])); +// result.setRef(params[3].charAt(0)); +// result.setAlt(params[4].charAt(0)); +// result.setGenotype(params[9]); +// calculateGenotypeEnum(result); + return result; + } + + private static void calculateGenotypeEnum(MAFRecord record) { + +// String genotypeString = record.getGenotype().substring(0, 3); +// +// if ("0/1".equals(genotypeString)) { +// record.setGenotypeEnum(GenotypeEnum.getGenotypeEnum(record.getRef(), record.getAlt())); +// } else if ("1/1".equals(genotypeString)) { +// record.setGenotypeEnum(GenotypeEnum.getGenotypeEnum(record.getAlt(), record.getAlt())); +// } else if ("0/0".equals(genotypeString)) { +// record.setGenotypeEnum(GenotypeEnum.getGenotypeEnum(record.getRef(), record.getRef())); +// } else { +// System.out.println("unhandled genotype string: " + genotypeString); +// } + + } +} diff --git a/qio/src/org/qcmg/unused/maf/MAFSerializer.java-- b/qio/src/org/qcmg/unused/maf/MAFSerializer.java-- new file mode 100644 index 000000000..bfd4b7d0a --- /dev/null +++ b/qio/src/org/qcmg/unused/maf/MAFSerializer.java-- @@ -0,0 +1,68 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.maf; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.regex.Pattern; + +import org.qcmg.common.maf.MAFRecord; + +public final class MAFSerializer { + private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + private static final String DEFAULT_HEADER_PREFIX = "#"; + + private static String nextNonheaderLine(final BufferedReader reader) + throws IOException { + String line = reader.readLine(); + while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { + line = reader.readLine(); + } + return line; + } + + public static MAFRecord nextRecord(final BufferedReader reader) + throws IOException , Exception { + MAFRecord result = null; + String line = nextNonheaderLine(reader); + if (null != line) { + result = parseRecord(line); + } + return result; + } + + static MAFRecord parseRecord(final String line) throws Exception { + String[] params = tabbedPattern.split(line, -1); + if (8 > params.length) { + throw new Exception("Bad VCF format. Insufficient columns: '" + line + "'"); + } + MAFRecord result = new MAFRecord(); +// result.setChromosome(params[0]); +// result.setPosition(Integer.parseInt(params[1])); +// result.setRef(params[3].charAt(0)); +// result.setAlt(params[4].charAt(0)); +// result.setGenotype(params[9]); +// calculateGenotypeEnum(result); + return result; + } + + private static void calculateGenotypeEnum(MAFRecord record) { + +// String genotypeString = record.getGenotype().substring(0, 3); +// +// if ("0/1".equals(genotypeString)) { +// record.setGenotypeEnum(GenotypeEnum.getGenotypeEnum(record.getRef(), record.getAlt())); +// } else if ("1/1".equals(genotypeString)) { +// record.setGenotypeEnum(GenotypeEnum.getGenotypeEnum(record.getAlt(), record.getAlt())); +// } else if ("0/0".equals(genotypeString)) { +// record.setGenotypeEnum(GenotypeEnum.getGenotypeEnum(record.getRef(), record.getRef())); +// } else { +// System.out.println("unhandled genotype string: " + genotypeString); +// } + + } +} diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java new file mode 100644 index 000000000..9ed51e589 --- /dev/null +++ b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java @@ -0,0 +1,17 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerdesignsummary; + +import java.io.File; + +import org.qcmg.unused.reader.FileReader; + +public class PrimerDesignFileReader extends FileReader { + private final static PrimerDesignRecordSerializer serializer = + new PrimerDesignRecordSerializer(); + + public PrimerDesignFileReader(final File file) throws Exception { + super(file, serializer); + } +} diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java-- b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java-- new file mode 100644 index 000000000..9ed51e589 --- /dev/null +++ b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java-- @@ -0,0 +1,17 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerdesignsummary; + +import java.io.File; + +import org.qcmg.unused.reader.FileReader; + +public class PrimerDesignFileReader extends FileReader { + private final static PrimerDesignRecordSerializer serializer = + new PrimerDesignRecordSerializer(); + + public PrimerDesignFileReader(final File file) throws Exception { + super(file, serializer); + } +} diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java new file mode 100644 index 000000000..289892f7e --- /dev/null +++ b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java @@ -0,0 +1,135 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ + +package org.qcmg.unused.primerdesignsummary; + +public class PrimerDesignRecord { + + protected String snpId; + protected String gene; + protected PrimerPosition position; + protected String baseChange; + protected String snpClass; + + /** + * Gets the value of the snpId property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSnpId() { + return snpId; + } + + /** + * Sets the value of the snpId property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSnpId(String value) { + this.snpId = value; + } + + /** + * Gets the value of the gene property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getGene() { + return gene; + } + + /** + * Sets the value of the gene property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setGene(String value) { + this.gene = value; + } + + /** + * Gets the value of the position property. + * + * @return + * possible object is + * {@link PrimerPosition } + * + */ + public PrimerPosition getPosition() { + return position; + } + + /** + * Sets the value of the position property. + * + * @param value + * allowed object is + * {@link PrimerPosition } + * + */ + public void setPosition(PrimerPosition value) { + this.position = value; + } + + /** + * Gets the value of the baseChange property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getBaseChange() { + return baseChange; + } + + /** + * Sets the value of the baseChange property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setBaseChange(String value) { + this.baseChange = value; + } + + /** + * Gets the value of the snpClass property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSnpClass() { + return snpClass; + } + + /** + * Sets the value of the snpClass property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSnpClass(String value) { + this.snpClass = value; + } + +} diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java-- b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java-- new file mode 100644 index 000000000..289892f7e --- /dev/null +++ b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java-- @@ -0,0 +1,135 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ + +package org.qcmg.unused.primerdesignsummary; + +public class PrimerDesignRecord { + + protected String snpId; + protected String gene; + protected PrimerPosition position; + protected String baseChange; + protected String snpClass; + + /** + * Gets the value of the snpId property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSnpId() { + return snpId; + } + + /** + * Sets the value of the snpId property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSnpId(String value) { + this.snpId = value; + } + + /** + * Gets the value of the gene property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getGene() { + return gene; + } + + /** + * Sets the value of the gene property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setGene(String value) { + this.gene = value; + } + + /** + * Gets the value of the position property. + * + * @return + * possible object is + * {@link PrimerPosition } + * + */ + public PrimerPosition getPosition() { + return position; + } + + /** + * Sets the value of the position property. + * + * @param value + * allowed object is + * {@link PrimerPosition } + * + */ + public void setPosition(PrimerPosition value) { + this.position = value; + } + + /** + * Gets the value of the baseChange property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getBaseChange() { + return baseChange; + } + + /** + * Sets the value of the baseChange property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setBaseChange(String value) { + this.baseChange = value; + } + + /** + * Gets the value of the snpClass property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSnpClass() { + return snpClass; + } + + /** + * Sets the value of the snpClass property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSnpClass(String value) { + this.snpClass = value; + } + +} diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java new file mode 100644 index 000000000..414939f09 --- /dev/null +++ b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java @@ -0,0 +1,80 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerdesignsummary; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.regex.Pattern; + +import org.qcmg.qmule.record.Serializer; + +public final class PrimerDesignRecordSerializer extends Serializer { + public PrimerDesignRecord parseRecord(final String line) throws Exception { + String[] params = tabbedPattern.split(line); + if (5 > params.length) { + throw new Exception("Bad primer design record format: '" + line + + "'"); + } + String encodedPosition = params[2].trim(); + PrimerPosition primerPosition = parsePrimerPosition(encodedPosition); + + PrimerDesignRecord result = new PrimerDesignRecord(); + result.setSnpId(params[0].trim()); + result.setGene(params[1].trim()); + result.setPosition(primerPosition); + result.setBaseChange(params[3].trim()); + result.setSnpClass(params[4].trim()); + return result; + } + + public PrimerPosition parsePrimerPosition(String encodedPosition) + throws Exception { + String[] positionParams = colonPattern.split(encodedPosition); + if (2 != positionParams.length) { + throw new Exception("Bad primer design record position format: '" + + encodedPosition + "'"); + } + String chromosome = positionParams[0].trim(); + String positionRange = positionParams[1].trim(); + + String[] positions = hyphenPattern.split(positionRange); + if (2 != positions.length) { + throw new Exception("Bad primer design record position format: '" + + encodedPosition + "'"); + } + int start = Integer.parseInt(positions[0]); + int end = Integer.parseInt(positions[1]); + + PrimerPosition primerPosition = new PrimerPosition(); + primerPosition.setChromosome(chromosome); + primerPosition.setStart(start); + primerPosition.setEnd(end); + return primerPosition; + } + + public String nextNonheaderLine(final BufferedReader reader) + throws IOException { + String line = reader.readLine(); + while (null != line && line.startsWith(HASH)) { + line = reader.readLine(); + } + return line; + } + + public String serialise(PrimerDesignRecord record) throws Exception { + // TODO Auto-generated method stub + return null; + } + + @Override + public PrimerDesignRecord parseRecord(BufferedReader reader) + throws Exception { + String line = nextNonheaderLine(reader); + PrimerDesignRecord result = null; + if (null != line) { + result = parseRecord(line); + } + return result; + } +} diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java-- b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java-- new file mode 100644 index 000000000..c81b5bf23 --- /dev/null +++ b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java-- @@ -0,0 +1,80 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerdesignsummary; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.regex.Pattern; + +import org.qcmg.unused.record.Serializer; + +public final class PrimerDesignRecordSerializer extends Serializer { + public PrimerDesignRecord parseRecord(final String line) throws Exception { + String[] params = tabbedPattern.split(line); + if (5 > params.length) { + throw new Exception("Bad primer design record format: '" + line + + "'"); + } + String encodedPosition = params[2].trim(); + PrimerPosition primerPosition = parsePrimerPosition(encodedPosition); + + PrimerDesignRecord result = new PrimerDesignRecord(); + result.setSnpId(params[0].trim()); + result.setGene(params[1].trim()); + result.setPosition(primerPosition); + result.setBaseChange(params[3].trim()); + result.setSnpClass(params[4].trim()); + return result; + } + + public PrimerPosition parsePrimerPosition(String encodedPosition) + throws Exception { + String[] positionParams = colonPattern.split(encodedPosition); + if (2 != positionParams.length) { + throw new Exception("Bad primer design record position format: '" + + encodedPosition + "'"); + } + String chromosome = positionParams[0].trim(); + String positionRange = positionParams[1].trim(); + + String[] positions = hyphenPattern.split(positionRange); + if (2 != positions.length) { + throw new Exception("Bad primer design record position format: '" + + encodedPosition + "'"); + } + int start = Integer.parseInt(positions[0]); + int end = Integer.parseInt(positions[1]); + + PrimerPosition primerPosition = new PrimerPosition(); + primerPosition.setChromosome(chromosome); + primerPosition.setStart(start); + primerPosition.setEnd(end); + return primerPosition; + } + + public String nextNonheaderLine(final BufferedReader reader) + throws IOException { + String line = reader.readLine(); + while (null != line && line.startsWith(HASH)) { + line = reader.readLine(); + } + return line; + } + + public String serialise(PrimerDesignRecord record) throws Exception { + // TODO Auto-generated method stub + return null; + } + + @Override + public PrimerDesignRecord parseRecord(BufferedReader reader) + throws Exception { + String line = nextNonheaderLine(reader); + PrimerDesignRecord result = null; + if (null != line) { + result = parseRecord(line); + } + return result; + } +} diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java new file mode 100644 index 000000000..1154b4fb4 --- /dev/null +++ b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java @@ -0,0 +1,68 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerdesignsummary; + +public class PrimerPosition { + + protected String chromosome; + protected int start; + protected int end; + + /** + * Gets the value of the chromosome property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getChromosome() { + return chromosome; + } + + /** + * Sets the value of the chromosome property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setChromosome(String value) { + this.chromosome = value; + } + + /** + * Gets the value of the start property. + * + */ + public int getStart() { + return start; + } + + /** + * Sets the value of the start property. + * + */ + public void setStart(int value) { + this.start = value; + } + + /** + * Gets the value of the end property. + * + */ + public int getEnd() { + return end; + } + + /** + * Sets the value of the end property. + * + */ + public void setEnd(int value) { + this.end = value; + } + +} diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java-- b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java-- new file mode 100644 index 000000000..1154b4fb4 --- /dev/null +++ b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java-- @@ -0,0 +1,68 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerdesignsummary; + +public class PrimerPosition { + + protected String chromosome; + protected int start; + protected int end; + + /** + * Gets the value of the chromosome property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getChromosome() { + return chromosome; + } + + /** + * Sets the value of the chromosome property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setChromosome(String value) { + this.chromosome = value; + } + + /** + * Gets the value of the start property. + * + */ + public int getStart() { + return start; + } + + /** + * Sets the value of the start property. + * + */ + public void setStart(int value) { + this.start = value; + } + + /** + * Gets the value of the end property. + * + */ + public int getEnd() { + return end; + } + + /** + * Sets the value of the end property. + * + */ + public void setEnd(int value) { + this.end = value; + } + +} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java b/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java new file mode 100644 index 000000000..c2a7262a4 --- /dev/null +++ b/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java @@ -0,0 +1,17 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerinput; + +import java.io.File; + +import org.qcmg.unused.reader.FileReader; + +public class PrimerInputFileReader extends FileReader { + private final static PrimerInputRecordSerializer serializer = + new PrimerInputRecordSerializer(); + + public PrimerInputFileReader(final File file) throws Exception { + super(file, serializer); + } +} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java-- b/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java-- new file mode 100644 index 000000000..c2a7262a4 --- /dev/null +++ b/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java-- @@ -0,0 +1,17 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerinput; + +import java.io.File; + +import org.qcmg.unused.reader.FileReader; + +public class PrimerInputFileReader extends FileReader { + private final static PrimerInputRecordSerializer serializer = + new PrimerInputRecordSerializer(); + + public PrimerInputFileReader(final File file) throws Exception { + super(file, serializer); + } +} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java b/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java new file mode 100644 index 000000000..b09623176 --- /dev/null +++ b/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java @@ -0,0 +1,33 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerinput; + +import java.io.Closeable; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +public final class PrimerInputFileWriter implements Closeable { + private static final String EQUALS = "="; + private static final PrimerInputRecordSerializer serializer = new PrimerInputRecordSerializer(); + private final OutputStream outputStream; + + public PrimerInputFileWriter(final File file) throws Exception { + OutputStream stream = new FileOutputStream(file); + outputStream = stream; + } + + public void add(final PrimerInputRecord record) throws Exception { + String encoded = serializer.serialise(record); + outputStream.write(encoded.getBytes()); + outputStream.flush(); + } + + public void close() throws IOException { + outputStream.write(EQUALS.getBytes()); + outputStream.flush(); + outputStream.close(); + } +} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java-- b/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java-- new file mode 100644 index 000000000..b09623176 --- /dev/null +++ b/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java-- @@ -0,0 +1,33 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerinput; + +import java.io.Closeable; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +public final class PrimerInputFileWriter implements Closeable { + private static final String EQUALS = "="; + private static final PrimerInputRecordSerializer serializer = new PrimerInputRecordSerializer(); + private final OutputStream outputStream; + + public PrimerInputFileWriter(final File file) throws Exception { + OutputStream stream = new FileOutputStream(file); + outputStream = stream; + } + + public void add(final PrimerInputRecord record) throws Exception { + String encoded = serializer.serialise(record); + outputStream.write(encoded.getBytes()); + outputStream.flush(); + } + + public void close() throws IOException { + outputStream.write(EQUALS.getBytes()); + outputStream.flush(); + outputStream.close(); + } +} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java b/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java new file mode 100644 index 000000000..7de08071a --- /dev/null +++ b/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java @@ -0,0 +1,331 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerinput; + + +public class PrimerInputRecord { + + protected String sequenceId; + protected String sequenceTemplate; + protected PrimerSequenceTarget sequenceTarget; + protected int primerProductMinTm; + protected int primerProductMaxTm; + protected double primerDnaConc; + protected double primerSaltConc; + protected int primerMinTm; + protected int primerOptTm; + protected int primerMaxTm; + protected int primerMinSize; + protected int primerOptSize; + protected int primerMaxSize; + protected PrimerSizeRange primerProductSizeRange; + protected boolean primerExplainFlag; + protected int primerNumReturn; + protected boolean primerNumNsAccepted; + + /** + * Gets the value of the sequenceId property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSequenceId() { + return sequenceId; + } + + /** + * Sets the value of the sequenceId property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSequenceId(String value) { + this.sequenceId = value; + } + + /** + * Gets the value of the sequenceTemplate property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSequenceTemplate() { + return sequenceTemplate; + } + + /** + * Sets the value of the sequenceTemplate property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSequenceTemplate(String value) { + this.sequenceTemplate = value; + } + + /** + * Gets the value of the sequenceTarget property. + * + * @return + * possible object is + * {@link PrimerSequenceTarget } + * + */ + public PrimerSequenceTarget getSequenceTarget() { + return sequenceTarget; + } + + /** + * Sets the value of the sequenceTarget property. + * + * @param value + * allowed object is + * {@link PrimerSequenceTarget } + * + */ + public void setSequenceTarget(PrimerSequenceTarget value) { + this.sequenceTarget = value; + } + + /** + * Gets the value of the primerProductMinTm property. + * + */ + public int getPrimerProductMinTm() { + return primerProductMinTm; + } + + /** + * Sets the value of the primerProductMinTm property. + * + */ + public void setPrimerProductMinTm(int value) { + this.primerProductMinTm = value; + } + + /** + * Gets the value of the primerProductMaxTm property. + * + */ + public int getPrimerProductMaxTm() { + return primerProductMaxTm; + } + + /** + * Sets the value of the primerProductMaxTm property. + * + */ + public void setPrimerProductMaxTm(int value) { + this.primerProductMaxTm = value; + } + + /** + * Gets the value of the primerDnaConc property. + * + */ + public double getPrimerDnaConc() { + return primerDnaConc; + } + + /** + * Sets the value of the primerDnaConc property. + * + */ + public void setPrimerDnaConc(double value) { + this.primerDnaConc = value; + } + + /** + * Gets the value of the primerSaltConc property. + * + */ + public double getPrimerSaltConc() { + return primerSaltConc; + } + + /** + * Sets the value of the primerSaltConc property. + * + */ + public void setPrimerSaltConc(double value) { + this.primerSaltConc = value; + } + + /** + * Gets the value of the primerMinTm property. + * + */ + public int getPrimerMinTm() { + return primerMinTm; + } + + /** + * Sets the value of the primerMinTm property. + * + */ + public void setPrimerMinTm(int value) { + this.primerMinTm = value; + } + + /** + * Gets the value of the primerOptTm property. + * + */ + public int getPrimerOptTm() { + return primerOptTm; + } + + /** + * Sets the value of the primerOptTm property. + * + */ + public void setPrimerOptTm(int value) { + this.primerOptTm = value; + } + + /** + * Gets the value of the primerMaxTm property. + * + */ + public int getPrimerMaxTm() { + return primerMaxTm; + } + + /** + * Sets the value of the primerMaxTm property. + * + */ + public void setPrimerMaxTm(int value) { + this.primerMaxTm = value; + } + + /** + * Gets the value of the primerMinSize property. + * + */ + public int getPrimerMinSize() { + return primerMinSize; + } + + /** + * Sets the value of the primerMinSize property. + * + */ + public void setPrimerMinSize(int value) { + this.primerMinSize = value; + } + + /** + * Gets the value of the primerOptSize property. + * + */ + public int getPrimerOptSize() { + return primerOptSize; + } + + /** + * Sets the value of the primerOptSize property. + * + */ + public void setPrimerOptSize(int value) { + this.primerOptSize = value; + } + + /** + * Gets the value of the primerMaxSize property. + * + */ + public int getPrimerMaxSize() { + return primerMaxSize; + } + + /** + * Sets the value of the primerMaxSize property. + * + */ + public void setPrimerMaxSize(int value) { + this.primerMaxSize = value; + } + + /** + * Gets the value of the primerProductSizeRange property. + * + * @return + * possible object is + * {@link PrimerSizeRange } + * + */ + public PrimerSizeRange getPrimerProductSizeRange() { + return primerProductSizeRange; + } + + /** + * Sets the value of the primerProductSizeRange property. + * + * @param value + * allowed object is + * {@link PrimerSizeRange } + * + */ + public void setPrimerProductSizeRange(PrimerSizeRange value) { + this.primerProductSizeRange = value; + } + + /** + * Gets the value of the primerExplainFlag property. + * + */ + public boolean isPrimerExplainFlag() { + return primerExplainFlag; + } + + /** + * Sets the value of the primerExplainFlag property. + * + */ + public void setPrimerExplainFlag(boolean value) { + this.primerExplainFlag = value; + } + + /** + * Gets the value of the primerNumReturn property. + * + */ + public int getPrimerNumReturn() { + return primerNumReturn; + } + + /** + * Sets the value of the primerNumReturn property. + * + */ + public void setPrimerNumReturn(int value) { + this.primerNumReturn = value; + } + + /** + * Gets the value of the primerNumNsAccepted property. + * + */ + public boolean isPrimerNumNsAccepted() { + return primerNumNsAccepted; + } + + /** + * Sets the value of the primerNumNsAccepted property. + * + */ + public void setPrimerNumNsAccepted(boolean value) { + this.primerNumNsAccepted = value; + } + +} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java-- b/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java-- new file mode 100644 index 000000000..7de08071a --- /dev/null +++ b/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java-- @@ -0,0 +1,331 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerinput; + + +public class PrimerInputRecord { + + protected String sequenceId; + protected String sequenceTemplate; + protected PrimerSequenceTarget sequenceTarget; + protected int primerProductMinTm; + protected int primerProductMaxTm; + protected double primerDnaConc; + protected double primerSaltConc; + protected int primerMinTm; + protected int primerOptTm; + protected int primerMaxTm; + protected int primerMinSize; + protected int primerOptSize; + protected int primerMaxSize; + protected PrimerSizeRange primerProductSizeRange; + protected boolean primerExplainFlag; + protected int primerNumReturn; + protected boolean primerNumNsAccepted; + + /** + * Gets the value of the sequenceId property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSequenceId() { + return sequenceId; + } + + /** + * Sets the value of the sequenceId property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSequenceId(String value) { + this.sequenceId = value; + } + + /** + * Gets the value of the sequenceTemplate property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSequenceTemplate() { + return sequenceTemplate; + } + + /** + * Sets the value of the sequenceTemplate property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSequenceTemplate(String value) { + this.sequenceTemplate = value; + } + + /** + * Gets the value of the sequenceTarget property. + * + * @return + * possible object is + * {@link PrimerSequenceTarget } + * + */ + public PrimerSequenceTarget getSequenceTarget() { + return sequenceTarget; + } + + /** + * Sets the value of the sequenceTarget property. + * + * @param value + * allowed object is + * {@link PrimerSequenceTarget } + * + */ + public void setSequenceTarget(PrimerSequenceTarget value) { + this.sequenceTarget = value; + } + + /** + * Gets the value of the primerProductMinTm property. + * + */ + public int getPrimerProductMinTm() { + return primerProductMinTm; + } + + /** + * Sets the value of the primerProductMinTm property. + * + */ + public void setPrimerProductMinTm(int value) { + this.primerProductMinTm = value; + } + + /** + * Gets the value of the primerProductMaxTm property. + * + */ + public int getPrimerProductMaxTm() { + return primerProductMaxTm; + } + + /** + * Sets the value of the primerProductMaxTm property. + * + */ + public void setPrimerProductMaxTm(int value) { + this.primerProductMaxTm = value; + } + + /** + * Gets the value of the primerDnaConc property. + * + */ + public double getPrimerDnaConc() { + return primerDnaConc; + } + + /** + * Sets the value of the primerDnaConc property. + * + */ + public void setPrimerDnaConc(double value) { + this.primerDnaConc = value; + } + + /** + * Gets the value of the primerSaltConc property. + * + */ + public double getPrimerSaltConc() { + return primerSaltConc; + } + + /** + * Sets the value of the primerSaltConc property. + * + */ + public void setPrimerSaltConc(double value) { + this.primerSaltConc = value; + } + + /** + * Gets the value of the primerMinTm property. + * + */ + public int getPrimerMinTm() { + return primerMinTm; + } + + /** + * Sets the value of the primerMinTm property. + * + */ + public void setPrimerMinTm(int value) { + this.primerMinTm = value; + } + + /** + * Gets the value of the primerOptTm property. + * + */ + public int getPrimerOptTm() { + return primerOptTm; + } + + /** + * Sets the value of the primerOptTm property. + * + */ + public void setPrimerOptTm(int value) { + this.primerOptTm = value; + } + + /** + * Gets the value of the primerMaxTm property. + * + */ + public int getPrimerMaxTm() { + return primerMaxTm; + } + + /** + * Sets the value of the primerMaxTm property. + * + */ + public void setPrimerMaxTm(int value) { + this.primerMaxTm = value; + } + + /** + * Gets the value of the primerMinSize property. + * + */ + public int getPrimerMinSize() { + return primerMinSize; + } + + /** + * Sets the value of the primerMinSize property. + * + */ + public void setPrimerMinSize(int value) { + this.primerMinSize = value; + } + + /** + * Gets the value of the primerOptSize property. + * + */ + public int getPrimerOptSize() { + return primerOptSize; + } + + /** + * Sets the value of the primerOptSize property. + * + */ + public void setPrimerOptSize(int value) { + this.primerOptSize = value; + } + + /** + * Gets the value of the primerMaxSize property. + * + */ + public int getPrimerMaxSize() { + return primerMaxSize; + } + + /** + * Sets the value of the primerMaxSize property. + * + */ + public void setPrimerMaxSize(int value) { + this.primerMaxSize = value; + } + + /** + * Gets the value of the primerProductSizeRange property. + * + * @return + * possible object is + * {@link PrimerSizeRange } + * + */ + public PrimerSizeRange getPrimerProductSizeRange() { + return primerProductSizeRange; + } + + /** + * Sets the value of the primerProductSizeRange property. + * + * @param value + * allowed object is + * {@link PrimerSizeRange } + * + */ + public void setPrimerProductSizeRange(PrimerSizeRange value) { + this.primerProductSizeRange = value; + } + + /** + * Gets the value of the primerExplainFlag property. + * + */ + public boolean isPrimerExplainFlag() { + return primerExplainFlag; + } + + /** + * Sets the value of the primerExplainFlag property. + * + */ + public void setPrimerExplainFlag(boolean value) { + this.primerExplainFlag = value; + } + + /** + * Gets the value of the primerNumReturn property. + * + */ + public int getPrimerNumReturn() { + return primerNumReturn; + } + + /** + * Sets the value of the primerNumReturn property. + * + */ + public void setPrimerNumReturn(int value) { + this.primerNumReturn = value; + } + + /** + * Gets the value of the primerNumNsAccepted property. + * + */ + public boolean isPrimerNumNsAccepted() { + return primerNumNsAccepted; + } + + /** + * Sets the value of the primerNumNsAccepted property. + * + */ + public void setPrimerNumNsAccepted(boolean value) { + this.primerNumNsAccepted = value; + } + +} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java b/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java new file mode 100644 index 000000000..9a24a1a4a --- /dev/null +++ b/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java @@ -0,0 +1,201 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerinput; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.regex.Pattern; + +import org.qcmg.qmule.gff3.GFF3Record; +import org.qcmg.qmule.record.Serializer; + +public final class PrimerInputRecordSerializer extends + Serializer { + private final static String SEQUENCE_ID = "SEQUENCE_ID"; + private final static String SEQUENCE_TEMPLATE = "SEQUENCE_TEMPLATE"; + private final static String SEQUENCE_TARGET = "SEQUENCE_TARGET"; + private final static String PRIMER_PRODUCT_MIN_TM = "PRIMER_PRODUCT_MIN_TM"; + private final static String PRIMER_PRODUCT_MAX_TM = "PRIMER_PRODUCT_MAX_TM"; + private final static String PRIMER_DNA_CONC = "PRIMER_DNA_CONC"; + private final static String PRIMER_SALT_CONC = "PRIMER_SALT_CONC"; + private final static String PRIMER_MIN_TM = "PRIMER_MIN_TM"; + private final static String PRIMER_OPT_TM = "PRIMER_OPT_TM"; + private final static String PRIMER_MAX_TM = "PRIMER_MAX_TM"; + private final static String PRIMER_MIN_SIZE = "PRIMER_MIN_SIZE"; + private final static String PRIMER_OPT_SIZE = "PRIMER_OPT_SIZE"; + private final static String PRIMER_MAX_SIZE = "PRIMER_MAX_SIZE"; + private final static String PRIMER_PRODUCT_SIZE_RANGE = "PRIMER_PRODUCT_SIZE_RANGE"; + private final static String PRIMER_EXPLAIN_FLAG = "PRIMER_EXPLAIN_FLAG"; + private final static String PRIMER_NUM_RETURN = "PRIMER_NUM_RETURN"; + private final static String PRIMER_NUM_NS_ACCEPTED = "PRIMER_NUM_NS_ACCEPTED"; + private final static String EQUALS = "="; + + public PrimerInputRecord parseRecord(final BufferedReader reader) + throws Exception { + String nextLine = nextStringValue(reader); + if (nextLine.equals("=")) { + return null; + } + PrimerInputRecord result = new PrimerInputRecord(); + result.setSequenceId(nextLine); + result.setSequenceTemplate(nextStringValue(reader)); + result.setSequenceTarget(nextTargetValue(reader)); + result.setPrimerProductMinTm(nextIntegerValue(reader)); + result.setPrimerProductMaxTm(nextIntegerValue(reader)); + result.setPrimerDnaConc(nextDoubleValue(reader)); + result.setPrimerSaltConc(nextDoubleValue(reader)); + result.setPrimerMinTm(nextIntegerValue(reader)); + result.setPrimerOptTm(nextIntegerValue(reader)); + result.setPrimerMaxTm(nextIntegerValue(reader)); + result.setPrimerMinSize(nextIntegerValue(reader)); + result.setPrimerOptSize(nextIntegerValue(reader)); + result.setPrimerMaxSize(nextIntegerValue(reader)); + result.setPrimerProductSizeRange(nextPrimerSizeRangeValue(reader)); + result.setPrimerExplainFlag(nextBooleanValue(reader)); + result.setPrimerNumReturn(nextIntegerValue(reader)); + result.setPrimerNumNsAccepted(nextBooleanValue(reader)); + return result; + } + + private double nextDoubleValue(BufferedReader reader) throws NumberFormatException, Exception { + return Double.parseDouble(nextStringValue(reader)); + } + + private PrimerSizeRange nextPrimerSizeRangeValue(BufferedReader reader) + throws Exception { + String targetValue = nextStringValue(reader); + final String[] params = hyphenPattern.split(targetValue, -1); + if (2 != params.length) { + throw new Exception("Bad format. Insufficient numbered values: '" + + targetValue + "'"); + } + PrimerSizeRange range = new PrimerSizeRange(); + range.setLowerLimit(Integer.parseInt(params[0])); + range.setUpperLimit(Integer.parseInt(params[1])); + return range; + } + + private boolean nextBooleanValue(BufferedReader reader) throws Exception { + return Boolean.parseBoolean(nextStringValue(reader)); + } + + private String nextStringValue(final BufferedReader reader) + throws Exception { + final String line = reader.readLine(); + final String[] params = equalsPattern.split(line, -1); + if (2 != params.length) { + throw new Exception("Bad format. Insufficient columns: '" + line + + "'"); + } + return params[1].trim(); + } + + private int nextIntegerValue(final BufferedReader reader) throws Exception { + return Integer.parseInt(nextStringValue(reader)); + } + + private PrimerSequenceTarget nextTargetValue(final BufferedReader reader) + throws Exception { + String targetValue = nextStringValue(reader); + final String[] params = commaPattern.split(targetValue, -1); + if (2 != params.length) { + throw new Exception("Bad format. Insufficient numbered values: '" + + targetValue + "'"); + } + PrimerSequenceTarget target = new PrimerSequenceTarget(); + target.setLeftValue(Integer.parseInt(params[0])); + target.setRightValue(Integer.parseInt(params[1])); + return target; + } + + public String serialise(final PrimerInputRecord record) throws Exception { + String result = addLine("", SEQUENCE_ID, record.getSequenceId()); + result = addLine(result, SEQUENCE_TEMPLATE, record + .getSequenceTemplate()); + result = addLine(result, SEQUENCE_TARGET, record.getSequenceTarget()); + result = addLine(result, PRIMER_PRODUCT_MIN_TM, record + .getPrimerProductMinTm()); + result = addLine(result, PRIMER_PRODUCT_MAX_TM, record + .getPrimerProductMaxTm()); + result = addLine(result, PRIMER_DNA_CONC, record.getPrimerDnaConc()); + result = addLine(result, PRIMER_SALT_CONC, record.getPrimerSaltConc()); + result = addLine(result, PRIMER_MIN_TM, record.getPrimerMinTm()); + result = addLine(result, PRIMER_OPT_TM, record.getPrimerOptTm()); + result = addLine(result, PRIMER_MAX_TM, record.getPrimerMaxTm()); + result = addLine(result, PRIMER_MIN_SIZE, record.getPrimerMinSize()); + result = addLine(result, PRIMER_OPT_SIZE, record.getPrimerOptSize()); + result = addLine(result, PRIMER_MAX_SIZE, record.getPrimerMaxSize()); + result = addLine(result, PRIMER_PRODUCT_SIZE_RANGE, record + .getPrimerProductSizeRange()); + result = addLine(result, PRIMER_EXPLAIN_FLAG, record + .isPrimerExplainFlag()); + result = addLine(result, PRIMER_NUM_RETURN, record.getPrimerNumReturn()); + result = addLine(result, PRIMER_NUM_NS_ACCEPTED, record + .isPrimerNumNsAccepted()); + return result; + } + + public static void initialise(PrimerInputRecord record) { + record.setPrimerDnaConc(120); + record.setPrimerSaltConc(50); + record.setPrimerExplainFlag(false); + record.setPrimerMaxSize(25); + record.setPrimerExplainFlag(true); + record.setPrimerMaxSize(25); + record.setPrimerMaxTm(75); + record.setPrimerMinSize(18); + record.setPrimerMinTm(55); + record.setPrimerNumNsAccepted(true); + record.setPrimerNumReturn(10000); + record.setPrimerOptSize(20); + record.setPrimerOptTm(65); + record.setPrimerProductMaxTm(85); + record.setPrimerProductMinTm(65); + PrimerSizeRange range = new PrimerSizeRange(); + range.setLowerLimit(50); + range.setUpperLimit(120); + record.setPrimerProductSizeRange(range); + record.setSequenceId(""); + record.setSequenceTemplate(""); + PrimerSequenceTarget target = new PrimerSequenceTarget(); + target.setLeftValue(249); + target.setRightValue(3); + record.setSequenceTarget(target); + } + + private static String addLine(final String result, final String lhs, + final String rhs) { + return result + lhs + EQUALS + rhs + NEWLINE; + } + + private static String addLine(String result, final String lhs, + final double rhs) { + return result + lhs + EQUALS + Double.toString(rhs) + NEWLINE; + } + + private static String addLine(String result, final String lhs, final int rhs) { + return result + lhs + EQUALS + Integer.toString(rhs) + NEWLINE; + } + + private static String addLine(String result, String lhs, + final PrimerSequenceTarget rhs) { + return result + lhs + EQUALS + rhs.getLeftValue() + "," + + rhs.getRightValue() + NEWLINE; + } + + private static String addLine(String result, String lhs, final boolean rhs) { + if (rhs) { + return result + lhs + EQUALS + "1" + NEWLINE; + } else { + return result + lhs + EQUALS + "0" + NEWLINE; + } + } + + private static String addLine(String result, String lhs, + final PrimerSizeRange rhs) { + return result + lhs + EQUALS + rhs.getLowerLimit() + "-" + + rhs.getUpperLimit() + NEWLINE; + } + +} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java-- b/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java-- new file mode 100644 index 000000000..5bff3cd6a --- /dev/null +++ b/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java-- @@ -0,0 +1,201 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerinput; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.regex.Pattern; + +import org.qcmg.unused.gff3.GFF3Record; +import org.qcmg.unused.record.Serializer; + +public final class PrimerInputRecordSerializer extends + Serializer { + private final static String SEQUENCE_ID = "SEQUENCE_ID"; + private final static String SEQUENCE_TEMPLATE = "SEQUENCE_TEMPLATE"; + private final static String SEQUENCE_TARGET = "SEQUENCE_TARGET"; + private final static String PRIMER_PRODUCT_MIN_TM = "PRIMER_PRODUCT_MIN_TM"; + private final static String PRIMER_PRODUCT_MAX_TM = "PRIMER_PRODUCT_MAX_TM"; + private final static String PRIMER_DNA_CONC = "PRIMER_DNA_CONC"; + private final static String PRIMER_SALT_CONC = "PRIMER_SALT_CONC"; + private final static String PRIMER_MIN_TM = "PRIMER_MIN_TM"; + private final static String PRIMER_OPT_TM = "PRIMER_OPT_TM"; + private final static String PRIMER_MAX_TM = "PRIMER_MAX_TM"; + private final static String PRIMER_MIN_SIZE = "PRIMER_MIN_SIZE"; + private final static String PRIMER_OPT_SIZE = "PRIMER_OPT_SIZE"; + private final static String PRIMER_MAX_SIZE = "PRIMER_MAX_SIZE"; + private final static String PRIMER_PRODUCT_SIZE_RANGE = "PRIMER_PRODUCT_SIZE_RANGE"; + private final static String PRIMER_EXPLAIN_FLAG = "PRIMER_EXPLAIN_FLAG"; + private final static String PRIMER_NUM_RETURN = "PRIMER_NUM_RETURN"; + private final static String PRIMER_NUM_NS_ACCEPTED = "PRIMER_NUM_NS_ACCEPTED"; + private final static String EQUALS = "="; + + public PrimerInputRecord parseRecord(final BufferedReader reader) + throws Exception { + String nextLine = nextStringValue(reader); + if (nextLine.equals("=")) { + return null; + } + PrimerInputRecord result = new PrimerInputRecord(); + result.setSequenceId(nextLine); + result.setSequenceTemplate(nextStringValue(reader)); + result.setSequenceTarget(nextTargetValue(reader)); + result.setPrimerProductMinTm(nextIntegerValue(reader)); + result.setPrimerProductMaxTm(nextIntegerValue(reader)); + result.setPrimerDnaConc(nextDoubleValue(reader)); + result.setPrimerSaltConc(nextDoubleValue(reader)); + result.setPrimerMinTm(nextIntegerValue(reader)); + result.setPrimerOptTm(nextIntegerValue(reader)); + result.setPrimerMaxTm(nextIntegerValue(reader)); + result.setPrimerMinSize(nextIntegerValue(reader)); + result.setPrimerOptSize(nextIntegerValue(reader)); + result.setPrimerMaxSize(nextIntegerValue(reader)); + result.setPrimerProductSizeRange(nextPrimerSizeRangeValue(reader)); + result.setPrimerExplainFlag(nextBooleanValue(reader)); + result.setPrimerNumReturn(nextIntegerValue(reader)); + result.setPrimerNumNsAccepted(nextBooleanValue(reader)); + return result; + } + + private double nextDoubleValue(BufferedReader reader) throws NumberFormatException, Exception { + return Double.parseDouble(nextStringValue(reader)); + } + + private PrimerSizeRange nextPrimerSizeRangeValue(BufferedReader reader) + throws Exception { + String targetValue = nextStringValue(reader); + final String[] params = hyphenPattern.split(targetValue, -1); + if (2 != params.length) { + throw new Exception("Bad format. Insufficient numbered values: '" + + targetValue + "'"); + } + PrimerSizeRange range = new PrimerSizeRange(); + range.setLowerLimit(Integer.parseInt(params[0])); + range.setUpperLimit(Integer.parseInt(params[1])); + return range; + } + + private boolean nextBooleanValue(BufferedReader reader) throws Exception { + return Boolean.parseBoolean(nextStringValue(reader)); + } + + private String nextStringValue(final BufferedReader reader) + throws Exception { + final String line = reader.readLine(); + final String[] params = equalsPattern.split(line, -1); + if (2 != params.length) { + throw new Exception("Bad format. Insufficient columns: '" + line + + "'"); + } + return params[1].trim(); + } + + private int nextIntegerValue(final BufferedReader reader) throws Exception { + return Integer.parseInt(nextStringValue(reader)); + } + + private PrimerSequenceTarget nextTargetValue(final BufferedReader reader) + throws Exception { + String targetValue = nextStringValue(reader); + final String[] params = commaPattern.split(targetValue, -1); + if (2 != params.length) { + throw new Exception("Bad format. Insufficient numbered values: '" + + targetValue + "'"); + } + PrimerSequenceTarget target = new PrimerSequenceTarget(); + target.setLeftValue(Integer.parseInt(params[0])); + target.setRightValue(Integer.parseInt(params[1])); + return target; + } + + public String serialise(final PrimerInputRecord record) throws Exception { + String result = addLine("", SEQUENCE_ID, record.getSequenceId()); + result = addLine(result, SEQUENCE_TEMPLATE, record + .getSequenceTemplate()); + result = addLine(result, SEQUENCE_TARGET, record.getSequenceTarget()); + result = addLine(result, PRIMER_PRODUCT_MIN_TM, record + .getPrimerProductMinTm()); + result = addLine(result, PRIMER_PRODUCT_MAX_TM, record + .getPrimerProductMaxTm()); + result = addLine(result, PRIMER_DNA_CONC, record.getPrimerDnaConc()); + result = addLine(result, PRIMER_SALT_CONC, record.getPrimerSaltConc()); + result = addLine(result, PRIMER_MIN_TM, record.getPrimerMinTm()); + result = addLine(result, PRIMER_OPT_TM, record.getPrimerOptTm()); + result = addLine(result, PRIMER_MAX_TM, record.getPrimerMaxTm()); + result = addLine(result, PRIMER_MIN_SIZE, record.getPrimerMinSize()); + result = addLine(result, PRIMER_OPT_SIZE, record.getPrimerOptSize()); + result = addLine(result, PRIMER_MAX_SIZE, record.getPrimerMaxSize()); + result = addLine(result, PRIMER_PRODUCT_SIZE_RANGE, record + .getPrimerProductSizeRange()); + result = addLine(result, PRIMER_EXPLAIN_FLAG, record + .isPrimerExplainFlag()); + result = addLine(result, PRIMER_NUM_RETURN, record.getPrimerNumReturn()); + result = addLine(result, PRIMER_NUM_NS_ACCEPTED, record + .isPrimerNumNsAccepted()); + return result; + } + + public static void initialise(PrimerInputRecord record) { + record.setPrimerDnaConc(120); + record.setPrimerSaltConc(50); + record.setPrimerExplainFlag(false); + record.setPrimerMaxSize(25); + record.setPrimerExplainFlag(true); + record.setPrimerMaxSize(25); + record.setPrimerMaxTm(75); + record.setPrimerMinSize(18); + record.setPrimerMinTm(55); + record.setPrimerNumNsAccepted(true); + record.setPrimerNumReturn(10000); + record.setPrimerOptSize(20); + record.setPrimerOptTm(65); + record.setPrimerProductMaxTm(85); + record.setPrimerProductMinTm(65); + PrimerSizeRange range = new PrimerSizeRange(); + range.setLowerLimit(50); + range.setUpperLimit(120); + record.setPrimerProductSizeRange(range); + record.setSequenceId(""); + record.setSequenceTemplate(""); + PrimerSequenceTarget target = new PrimerSequenceTarget(); + target.setLeftValue(249); + target.setRightValue(3); + record.setSequenceTarget(target); + } + + private static String addLine(final String result, final String lhs, + final String rhs) { + return result + lhs + EQUALS + rhs + NEWLINE; + } + + private static String addLine(String result, final String lhs, + final double rhs) { + return result + lhs + EQUALS + Double.toString(rhs) + NEWLINE; + } + + private static String addLine(String result, final String lhs, final int rhs) { + return result + lhs + EQUALS + Integer.toString(rhs) + NEWLINE; + } + + private static String addLine(String result, String lhs, + final PrimerSequenceTarget rhs) { + return result + lhs + EQUALS + rhs.getLeftValue() + "," + + rhs.getRightValue() + NEWLINE; + } + + private static String addLine(String result, String lhs, final boolean rhs) { + if (rhs) { + return result + lhs + EQUALS + "1" + NEWLINE; + } else { + return result + lhs + EQUALS + "0" + NEWLINE; + } + } + + private static String addLine(String result, String lhs, + final PrimerSizeRange rhs) { + return result + lhs + EQUALS + rhs.getLowerLimit() + "-" + + rhs.getUpperLimit() + NEWLINE; + } + +} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java b/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java new file mode 100644 index 000000000..8cb3833fe --- /dev/null +++ b/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java @@ -0,0 +1,43 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerinput; + +public class PrimerSequenceTarget { + + protected int leftValue; + protected int rightValue; + + /** + * Gets the value of the leftValue property. + * + */ + public int getLeftValue() { + return leftValue; + } + + /** + * Sets the value of the leftValue property. + * + */ + public void setLeftValue(int value) { + this.leftValue = value; + } + + /** + * Gets the value of the rightValue property. + * + */ + public int getRightValue() { + return rightValue; + } + + /** + * Sets the value of the rightValue property. + * + */ + public void setRightValue(int value) { + this.rightValue = value; + } + +} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java-- b/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java-- new file mode 100644 index 000000000..8cb3833fe --- /dev/null +++ b/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java-- @@ -0,0 +1,43 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerinput; + +public class PrimerSequenceTarget { + + protected int leftValue; + protected int rightValue; + + /** + * Gets the value of the leftValue property. + * + */ + public int getLeftValue() { + return leftValue; + } + + /** + * Sets the value of the leftValue property. + * + */ + public void setLeftValue(int value) { + this.leftValue = value; + } + + /** + * Gets the value of the rightValue property. + * + */ + public int getRightValue() { + return rightValue; + } + + /** + * Sets the value of the rightValue property. + * + */ + public void setRightValue(int value) { + this.rightValue = value; + } + +} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java b/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java new file mode 100644 index 000000000..bc16b6531 --- /dev/null +++ b/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java @@ -0,0 +1,44 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerinput; + + +public class PrimerSizeRange { + + protected int lowerLimit; + protected int upperLimit; + + /** + * Gets the value of the lowerLimit property. + * + */ + public int getLowerLimit() { + return lowerLimit; + } + + /** + * Sets the value of the lowerLimit property. + * + */ + public void setLowerLimit(int value) { + this.lowerLimit = value; + } + + /** + * Gets the value of the upperLimit property. + * + */ + public int getUpperLimit() { + return upperLimit; + } + + /** + * Sets the value of the upperLimit property. + * + */ + public void setUpperLimit(int value) { + this.upperLimit = value; + } + +} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java-- b/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java-- new file mode 100644 index 000000000..bc16b6531 --- /dev/null +++ b/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java-- @@ -0,0 +1,44 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primerinput; + + +public class PrimerSizeRange { + + protected int lowerLimit; + protected int upperLimit; + + /** + * Gets the value of the lowerLimit property. + * + */ + public int getLowerLimit() { + return lowerLimit; + } + + /** + * Sets the value of the lowerLimit property. + * + */ + public void setLowerLimit(int value) { + this.lowerLimit = value; + } + + /** + * Gets the value of the upperLimit property. + * + */ + public int getUpperLimit() { + return upperLimit; + } + + /** + * Sets the value of the upperLimit property. + * + */ + public void setUpperLimit(int value) { + this.upperLimit = value; + } + +} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java new file mode 100644 index 000000000..d77d6b5ad --- /dev/null +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java @@ -0,0 +1,22 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primeroutput; + +import java.io.File; + +import org.qcmg.unused.primeroutput.PrimerOutputHeader; +import org.qcmg.unused.primeroutput.PrimerOutputRecord; +import org.qcmg.unused.reader.ExtendedFileReader; +import org.qcmg.unused.reader.FileReader; + +public class PrimerOutputFileReader extends ExtendedFileReader { + private final static PrimerOutputHeaderSerializer headerSerializer = + new PrimerOutputHeaderSerializer(); + private final static PrimerOutputRecordSerializer recordSerializer = + new PrimerOutputRecordSerializer(); + + public PrimerOutputFileReader(final File file) throws Exception { + super(file, recordSerializer, headerSerializer); + } +} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java-- b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java-- new file mode 100644 index 000000000..937c5993f --- /dev/null +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java-- @@ -0,0 +1,22 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primeroutput; + +import java.io.File; + +import org.qcmg.primeroutput.PrimerOutputHeader; +import org.qcmg.primeroutput.PrimerOutputRecord; +import org.qcmg.reader.ExtendedFileReader; +import org.qcmg.unused.reader.FileReader; + +public class PrimerOutputFileReader extends ExtendedFileReader { + private final static PrimerOutputHeaderSerializer headerSerializer = + new PrimerOutputHeaderSerializer(); + private final static PrimerOutputRecordSerializer recordSerializer = + new PrimerOutputRecordSerializer(); + + public PrimerOutputFileReader(final File file) throws Exception { + super(file, recordSerializer, headerSerializer); + } +} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java new file mode 100644 index 000000000..5a0eb1317 --- /dev/null +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java @@ -0,0 +1,35 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primeroutput; + +import java.io.Closeable; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +import org.qcmg.unused.primeroutput.PrimerOutputRecord; + +public final class PrimerOutputFileWriter implements Closeable { + private static final String EQUALS = "="; + private static final PrimerOutputRecordSerializer serializer = new PrimerOutputRecordSerializer(); + private final OutputStream outputStream; + + public PrimerOutputFileWriter(final File file) throws Exception { + OutputStream stream = new FileOutputStream(file); + outputStream = stream; + } + + public void add(final PrimerOutputRecord record) throws Exception { + String encoded = serializer.serialise(record); + outputStream.write(encoded.getBytes()); + outputStream.flush(); + } + + public void close() throws IOException { + outputStream.write(EQUALS.getBytes()); + outputStream.flush(); + outputStream.close(); + } +} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java-- b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java-- new file mode 100644 index 000000000..7afe95b1b --- /dev/null +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java-- @@ -0,0 +1,35 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primeroutput; + +import java.io.Closeable; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +import org.qcmg.primeroutput.PrimerOutputRecord; + +public final class PrimerOutputFileWriter implements Closeable { + private static final String EQUALS = "="; + private static final PrimerOutputRecordSerializer serializer = new PrimerOutputRecordSerializer(); + private final OutputStream outputStream; + + public PrimerOutputFileWriter(final File file) throws Exception { + OutputStream stream = new FileOutputStream(file); + outputStream = stream; + } + + public void add(final PrimerOutputRecord record) throws Exception { + String encoded = serializer.serialise(record); + outputStream.write(encoded.getBytes()); + outputStream.flush(); + } + + public void close() throws IOException { + outputStream.write(EQUALS.getBytes()); + outputStream.flush(); + outputStream.close(); + } +} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java new file mode 100644 index 000000000..4763d993e --- /dev/null +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java @@ -0,0 +1,563 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2013.10.25 at 10:52:22 AM EST +// + + +package org.qcmg.unused.primeroutput; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + *

Java class for primerOutputHeader complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="primerOutputHeader">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="sequenceId" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="sequenceTemplate" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="sequenceTarget" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="productMinTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="productMaxTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="dnaConc" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="saltConc" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="minTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="optTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="maxTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="minSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="optSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="maxSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="productSizeRange" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="explainFlag" type="{http://www.w3.org/2001/XMLSchema}boolean"/>
+ *         <element name="numReturn" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="numNsAccepted" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="leftExplain" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="rightExplain" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="pairExplain" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="leftNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="rightNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="internalNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="pairNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *       </sequence>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "primerOutputHeader", propOrder = { + "sequenceId", + "sequenceTemplate", + "sequenceTarget", + "productMinTm", + "productMaxTm", + "dnaConc", + "saltConc", + "minTm", + "optTm", + "maxTm", + "minSize", + "optSize", + "maxSize", + "productSizeRange", + "explainFlag", + "numReturn", + "numNsAccepted", + "leftExplain", + "rightExplain", + "pairExplain", + "leftNumReturned", + "rightNumReturned", + "internalNumReturned", + "pairNumReturned" +}) +public class PrimerOutputHeader { + + @XmlElement(required = true) + protected String sequenceId; + @XmlElement(required = true) + protected String sequenceTemplate; + @XmlElement(required = true) + protected String sequenceTarget; + protected int productMinTm; + protected int productMaxTm; + protected double dnaConc; + protected double saltConc; + protected int minTm; + protected int optTm; + protected int maxTm; + protected int minSize; + protected int optSize; + protected int maxSize; + @XmlElement(required = true) + protected String productSizeRange; + protected boolean explainFlag; + protected int numReturn; + protected int numNsAccepted; + @XmlElement(required = true) + protected String leftExplain; + @XmlElement(required = true) + protected String rightExplain; + @XmlElement(required = true) + protected String pairExplain; + protected int leftNumReturned; + protected int rightNumReturned; + protected int internalNumReturned; + protected int pairNumReturned; + + /** + * Gets the value of the sequenceId property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSequenceId() { + return sequenceId; + } + + /** + * Sets the value of the sequenceId property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSequenceId(String value) { + this.sequenceId = value; + } + + /** + * Gets the value of the sequenceTemplate property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSequenceTemplate() { + return sequenceTemplate; + } + + /** + * Sets the value of the sequenceTemplate property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSequenceTemplate(String value) { + this.sequenceTemplate = value; + } + + /** + * Gets the value of the sequenceTarget property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSequenceTarget() { + return sequenceTarget; + } + + /** + * Sets the value of the sequenceTarget property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSequenceTarget(String value) { + this.sequenceTarget = value; + } + + /** + * Gets the value of the productMinTm property. + * + */ + public int getProductMinTm() { + return productMinTm; + } + + /** + * Sets the value of the productMinTm property. + * + */ + public void setProductMinTm(int value) { + this.productMinTm = value; + } + + /** + * Gets the value of the productMaxTm property. + * + */ + public int getProductMaxTm() { + return productMaxTm; + } + + /** + * Sets the value of the productMaxTm property. + * + */ + public void setProductMaxTm(int value) { + this.productMaxTm = value; + } + + /** + * Gets the value of the dnaConc property. + * + */ + public double getDnaConc() { + return dnaConc; + } + + /** + * Sets the value of the dnaConc property. + * + */ + public void setDnaConc(double value) { + this.dnaConc = value; + } + + /** + * Gets the value of the saltConc property. + * + */ + public double getSaltConc() { + return saltConc; + } + + /** + * Sets the value of the saltConc property. + * + */ + public void setSaltConc(double value) { + this.saltConc = value; + } + + /** + * Gets the value of the minTm property. + * + */ + public int getMinTm() { + return minTm; + } + + /** + * Sets the value of the minTm property. + * + */ + public void setMinTm(int value) { + this.minTm = value; + } + + /** + * Gets the value of the optTm property. + * + */ + public int getOptTm() { + return optTm; + } + + /** + * Sets the value of the optTm property. + * + */ + public void setOptTm(int value) { + this.optTm = value; + } + + /** + * Gets the value of the maxTm property. + * + */ + public int getMaxTm() { + return maxTm; + } + + /** + * Sets the value of the maxTm property. + * + */ + public void setMaxTm(int value) { + this.maxTm = value; + } + + /** + * Gets the value of the minSize property. + * + */ + public int getMinSize() { + return minSize; + } + + /** + * Sets the value of the minSize property. + * + */ + public void setMinSize(int value) { + this.minSize = value; + } + + /** + * Gets the value of the optSize property. + * + */ + public int getOptSize() { + return optSize; + } + + /** + * Sets the value of the optSize property. + * + */ + public void setOptSize(int value) { + this.optSize = value; + } + + /** + * Gets the value of the maxSize property. + * + */ + public int getMaxSize() { + return maxSize; + } + + /** + * Sets the value of the maxSize property. + * + */ + public void setMaxSize(int value) { + this.maxSize = value; + } + + /** + * Gets the value of the productSizeRange property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getProductSizeRange() { + return productSizeRange; + } + + /** + * Sets the value of the productSizeRange property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setProductSizeRange(String value) { + this.productSizeRange = value; + } + + /** + * Gets the value of the explainFlag property. + * + */ + public boolean isExplainFlag() { + return explainFlag; + } + + /** + * Sets the value of the explainFlag property. + * + */ + public void setExplainFlag(boolean value) { + this.explainFlag = value; + } + + /** + * Gets the value of the numReturn property. + * + */ + public int getNumReturn() { + return numReturn; + } + + /** + * Sets the value of the numReturn property. + * + */ + public void setNumReturn(int value) { + this.numReturn = value; + } + + /** + * Gets the value of the numNsAccepted property. + * + */ + public int getNumNsAccepted() { + return numNsAccepted; + } + + /** + * Sets the value of the numNsAccepted property. + * + */ + public void setNumNsAccepted(int value) { + this.numNsAccepted = value; + } + + /** + * Gets the value of the leftExplain property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getLeftExplain() { + return leftExplain; + } + + /** + * Sets the value of the leftExplain property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setLeftExplain(String value) { + this.leftExplain = value; + } + + /** + * Gets the value of the rightExplain property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getRightExplain() { + return rightExplain; + } + + /** + * Sets the value of the rightExplain property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setRightExplain(String value) { + this.rightExplain = value; + } + + /** + * Gets the value of the pairExplain property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getPairExplain() { + return pairExplain; + } + + /** + * Sets the value of the pairExplain property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setPairExplain(String value) { + this.pairExplain = value; + } + + /** + * Gets the value of the leftNumReturned property. + * + */ + public int getLeftNumReturned() { + return leftNumReturned; + } + + /** + * Sets the value of the leftNumReturned property. + * + */ + public void setLeftNumReturned(int value) { + this.leftNumReturned = value; + } + + /** + * Gets the value of the rightNumReturned property. + * + */ + public int getRightNumReturned() { + return rightNumReturned; + } + + /** + * Sets the value of the rightNumReturned property. + * + */ + public void setRightNumReturned(int value) { + this.rightNumReturned = value; + } + + /** + * Gets the value of the internalNumReturned property. + * + */ + public int getInternalNumReturned() { + return internalNumReturned; + } + + /** + * Sets the value of the internalNumReturned property. + * + */ + public void setInternalNumReturned(int value) { + this.internalNumReturned = value; + } + + /** + * Gets the value of the pairNumReturned property. + * + */ + public int getPairNumReturned() { + return pairNumReturned; + } + + /** + * Sets the value of the pairNumReturned property. + * + */ + public void setPairNumReturned(int value) { + this.pairNumReturned = value; + } + +} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java-- b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java-- new file mode 100644 index 000000000..4763d993e --- /dev/null +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java-- @@ -0,0 +1,563 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2013.10.25 at 10:52:22 AM EST +// + + +package org.qcmg.unused.primeroutput; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + *

Java class for primerOutputHeader complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="primerOutputHeader">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="sequenceId" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="sequenceTemplate" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="sequenceTarget" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="productMinTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="productMaxTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="dnaConc" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="saltConc" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="minTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="optTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="maxTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="minSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="optSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="maxSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="productSizeRange" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="explainFlag" type="{http://www.w3.org/2001/XMLSchema}boolean"/>
+ *         <element name="numReturn" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="numNsAccepted" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="leftExplain" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="rightExplain" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="pairExplain" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="leftNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="rightNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="internalNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="pairNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *       </sequence>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "primerOutputHeader", propOrder = { + "sequenceId", + "sequenceTemplate", + "sequenceTarget", + "productMinTm", + "productMaxTm", + "dnaConc", + "saltConc", + "minTm", + "optTm", + "maxTm", + "minSize", + "optSize", + "maxSize", + "productSizeRange", + "explainFlag", + "numReturn", + "numNsAccepted", + "leftExplain", + "rightExplain", + "pairExplain", + "leftNumReturned", + "rightNumReturned", + "internalNumReturned", + "pairNumReturned" +}) +public class PrimerOutputHeader { + + @XmlElement(required = true) + protected String sequenceId; + @XmlElement(required = true) + protected String sequenceTemplate; + @XmlElement(required = true) + protected String sequenceTarget; + protected int productMinTm; + protected int productMaxTm; + protected double dnaConc; + protected double saltConc; + protected int minTm; + protected int optTm; + protected int maxTm; + protected int minSize; + protected int optSize; + protected int maxSize; + @XmlElement(required = true) + protected String productSizeRange; + protected boolean explainFlag; + protected int numReturn; + protected int numNsAccepted; + @XmlElement(required = true) + protected String leftExplain; + @XmlElement(required = true) + protected String rightExplain; + @XmlElement(required = true) + protected String pairExplain; + protected int leftNumReturned; + protected int rightNumReturned; + protected int internalNumReturned; + protected int pairNumReturned; + + /** + * Gets the value of the sequenceId property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSequenceId() { + return sequenceId; + } + + /** + * Sets the value of the sequenceId property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSequenceId(String value) { + this.sequenceId = value; + } + + /** + * Gets the value of the sequenceTemplate property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSequenceTemplate() { + return sequenceTemplate; + } + + /** + * Sets the value of the sequenceTemplate property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSequenceTemplate(String value) { + this.sequenceTemplate = value; + } + + /** + * Gets the value of the sequenceTarget property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSequenceTarget() { + return sequenceTarget; + } + + /** + * Sets the value of the sequenceTarget property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSequenceTarget(String value) { + this.sequenceTarget = value; + } + + /** + * Gets the value of the productMinTm property. + * + */ + public int getProductMinTm() { + return productMinTm; + } + + /** + * Sets the value of the productMinTm property. + * + */ + public void setProductMinTm(int value) { + this.productMinTm = value; + } + + /** + * Gets the value of the productMaxTm property. + * + */ + public int getProductMaxTm() { + return productMaxTm; + } + + /** + * Sets the value of the productMaxTm property. + * + */ + public void setProductMaxTm(int value) { + this.productMaxTm = value; + } + + /** + * Gets the value of the dnaConc property. + * + */ + public double getDnaConc() { + return dnaConc; + } + + /** + * Sets the value of the dnaConc property. + * + */ + public void setDnaConc(double value) { + this.dnaConc = value; + } + + /** + * Gets the value of the saltConc property. + * + */ + public double getSaltConc() { + return saltConc; + } + + /** + * Sets the value of the saltConc property. + * + */ + public void setSaltConc(double value) { + this.saltConc = value; + } + + /** + * Gets the value of the minTm property. + * + */ + public int getMinTm() { + return minTm; + } + + /** + * Sets the value of the minTm property. + * + */ + public void setMinTm(int value) { + this.minTm = value; + } + + /** + * Gets the value of the optTm property. + * + */ + public int getOptTm() { + return optTm; + } + + /** + * Sets the value of the optTm property. + * + */ + public void setOptTm(int value) { + this.optTm = value; + } + + /** + * Gets the value of the maxTm property. + * + */ + public int getMaxTm() { + return maxTm; + } + + /** + * Sets the value of the maxTm property. + * + */ + public void setMaxTm(int value) { + this.maxTm = value; + } + + /** + * Gets the value of the minSize property. + * + */ + public int getMinSize() { + return minSize; + } + + /** + * Sets the value of the minSize property. + * + */ + public void setMinSize(int value) { + this.minSize = value; + } + + /** + * Gets the value of the optSize property. + * + */ + public int getOptSize() { + return optSize; + } + + /** + * Sets the value of the optSize property. + * + */ + public void setOptSize(int value) { + this.optSize = value; + } + + /** + * Gets the value of the maxSize property. + * + */ + public int getMaxSize() { + return maxSize; + } + + /** + * Sets the value of the maxSize property. + * + */ + public void setMaxSize(int value) { + this.maxSize = value; + } + + /** + * Gets the value of the productSizeRange property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getProductSizeRange() { + return productSizeRange; + } + + /** + * Sets the value of the productSizeRange property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setProductSizeRange(String value) { + this.productSizeRange = value; + } + + /** + * Gets the value of the explainFlag property. + * + */ + public boolean isExplainFlag() { + return explainFlag; + } + + /** + * Sets the value of the explainFlag property. + * + */ + public void setExplainFlag(boolean value) { + this.explainFlag = value; + } + + /** + * Gets the value of the numReturn property. + * + */ + public int getNumReturn() { + return numReturn; + } + + /** + * Sets the value of the numReturn property. + * + */ + public void setNumReturn(int value) { + this.numReturn = value; + } + + /** + * Gets the value of the numNsAccepted property. + * + */ + public int getNumNsAccepted() { + return numNsAccepted; + } + + /** + * Sets the value of the numNsAccepted property. + * + */ + public void setNumNsAccepted(int value) { + this.numNsAccepted = value; + } + + /** + * Gets the value of the leftExplain property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getLeftExplain() { + return leftExplain; + } + + /** + * Sets the value of the leftExplain property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setLeftExplain(String value) { + this.leftExplain = value; + } + + /** + * Gets the value of the rightExplain property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getRightExplain() { + return rightExplain; + } + + /** + * Sets the value of the rightExplain property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setRightExplain(String value) { + this.rightExplain = value; + } + + /** + * Gets the value of the pairExplain property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getPairExplain() { + return pairExplain; + } + + /** + * Sets the value of the pairExplain property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setPairExplain(String value) { + this.pairExplain = value; + } + + /** + * Gets the value of the leftNumReturned property. + * + */ + public int getLeftNumReturned() { + return leftNumReturned; + } + + /** + * Sets the value of the leftNumReturned property. + * + */ + public void setLeftNumReturned(int value) { + this.leftNumReturned = value; + } + + /** + * Gets the value of the rightNumReturned property. + * + */ + public int getRightNumReturned() { + return rightNumReturned; + } + + /** + * Sets the value of the rightNumReturned property. + * + */ + public void setRightNumReturned(int value) { + this.rightNumReturned = value; + } + + /** + * Gets the value of the internalNumReturned property. + * + */ + public int getInternalNumReturned() { + return internalNumReturned; + } + + /** + * Sets the value of the internalNumReturned property. + * + */ + public void setInternalNumReturned(int value) { + this.internalNumReturned = value; + } + + /** + * Gets the value of the pairNumReturned property. + * + */ + public int getPairNumReturned() { + return pairNumReturned; + } + + /** + * Sets the value of the pairNumReturned property. + * + */ + public void setPairNumReturned(int value) { + this.pairNumReturned = value; + } + +} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java new file mode 100644 index 000000000..f0accc892 --- /dev/null +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java @@ -0,0 +1,128 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primeroutput; + +import java.io.BufferedReader; + +import org.qcmg.qmule.record.Serializer; + +public class PrimerOutputHeaderSerializer extends + Serializer { + private final static String[] FIELDS = { + "SEQUENCE_ID", + "SEQUENCE_TEMPLATE", + "SEQUENCE_TARGET", + "PRIMER_PRODUCT_MIN_TM", + "PRIMER_PRODUCT_MAX_TM", + "PRIMER_DNA_CONC", + "PRIMER_SALT_CONC", + "PRIMER_MIN_TM", + "PRIMER_OPT_TM", + "PRIMER_MAX_TM", + "PRIMER_MIN_SIZE", + "PRIMER_OPT_SIZE", + "PRIMER_MAX_SIZE", + "PRIMER_PRODUCT_SIZE_RANGE", + "PRIMER_EXPLAIN_FLAG", + "PRIMER_NUM_RETURN", + "PRIMER_NUM_NS_ACCEPTED", + "PRIMER_LEFT_EXPLAIN", + "PRIMER_RIGHT_EXPLAIN", + "PRIMER_PAIR_EXPLAIN", + "PRIMER_LEFT_NUM_RETURNED", + "PRIMER_RIGHT_NUM_RETURNED", + "PRIMER_INTERNAL_NUM_RETURNED", + "PRIMER_PAIR_NUM_RETURNED"}; + + static { + assert(24 == FIELDS.length); + } + + @Override + public PrimerOutputHeader parseRecord(final BufferedReader reader) + throws Exception { + PrimerOutputHeader result = new PrimerOutputHeader(); + result.setSequenceId(nextStringValue(reader)); + result.setSequenceTemplate(nextStringValue(reader)); + result.setSequenceTarget(nextStringValue(reader)); + result.setProductMinTm(nextIntegerValue(reader)); + result.setProductMaxTm(nextIntegerValue(reader)); + result.setDnaConc(nextDoubleValue(reader)); + result.setSaltConc(nextDoubleValue(reader)); + result.setMinTm(nextIntegerValue(reader)); + result.setOptTm(nextIntegerValue(reader)); + result.setMaxTm(nextIntegerValue(reader)); + result.setMinSize(nextIntegerValue(reader)); + result.setOptSize(nextIntegerValue(reader)); + result.setMaxSize(nextIntegerValue(reader)); + result.setProductSizeRange(nextStringValue(reader)); + result.setExplainFlag(nextBooleanValue(reader)); + result.setNumReturn(nextIntegerValue(reader)); + result.setNumNsAccepted(nextIntegerValue(reader)); + result.setLeftExplain(nextStringValue(reader)); + result.setRightExplain(nextStringValue(reader)); + result.setPairExplain(nextStringValue(reader)); + result.setLeftNumReturned(nextIntegerValue(reader)); + result.setRightNumReturned(nextIntegerValue(reader)); + result.setInternalNumReturned(nextIntegerValue(reader)); + result.setPairNumReturned(nextIntegerValue(reader)); + return result; + } + + @Override + public String serialise(PrimerOutputHeader record) throws Exception { + String result = createLine(FIELDS[0], record.getSequenceId()); + result += FIELDS[1] + EQUALS + record.getSequenceTemplate() + NEWLINE; + result += FIELDS[2] + EQUALS + record.getSequenceTarget() + NEWLINE; + result += FIELDS[3] + EQUALS + record.getProductMinTm() + NEWLINE; + result += FIELDS[4] + EQUALS + record.getProductMaxTm() + NEWLINE; + result += FIELDS[5] + EQUALS + record.getDnaConc() + NEWLINE; + result += FIELDS[6] + EQUALS + record.getSaltConc() + NEWLINE; + result += FIELDS[7] + EQUALS + record.getMinTm() + NEWLINE; + result += FIELDS[8] + EQUALS + record.getOptTm() + NEWLINE; + result += FIELDS[9] + EQUALS + record.getMaxTm() + NEWLINE; + result += FIELDS[10] + EQUALS + record.getMinSize() + NEWLINE; + result += FIELDS[11] + EQUALS + record.getOptSize() + NEWLINE; + result += FIELDS[12] + EQUALS + record.getMaxSize() + NEWLINE; + result += FIELDS[13] + EQUALS + record.getProductSizeRange() + NEWLINE; + result += FIELDS[14] + EQUALS + record.isExplainFlag() + NEWLINE; + result += FIELDS[15] + EQUALS + record.getNumReturn() + NEWLINE; + result += FIELDS[16] + EQUALS + record.getNumNsAccepted() + NEWLINE; + result += FIELDS[17] + EQUALS + record.getLeftExplain() + NEWLINE; + result += FIELDS[18] + EQUALS + record.getRightExplain() + NEWLINE; + result += FIELDS[19] + EQUALS + record.getPairExplain() + NEWLINE; + result += FIELDS[20] + EQUALS + record.getLeftNumReturned() + NEWLINE; + result += FIELDS[21] + EQUALS + record.getRightNumReturned() + NEWLINE; + result += FIELDS[22] + EQUALS + record.getInternalNumReturned() + NEWLINE; + result += FIELDS[23] + EQUALS + record.getPairNumReturned() + NEWLINE; + return result; + } + + private String createLine(final String fieldName, final String fieldValue) { + return fieldName + EQUALS + fieldValue + NEWLINE; + } + + private double nextDoubleValue(BufferedReader reader) throws NumberFormatException, Exception { + return Double.parseDouble(nextStringValue(reader)); + } + + private boolean nextBooleanValue(BufferedReader reader) throws Exception { + return Boolean.parseBoolean(nextStringValue(reader)); + } + + private String nextStringValue(final BufferedReader reader) + throws Exception { + final String line = reader.readLine(); + final String[] params = equalsPattern.split(line); + if (2 != params.length) { + throw new Exception("Bad format. Insufficient columns: '" + line + + "'"); + } + return params[1].trim(); + } + + private int nextIntegerValue(final BufferedReader reader) throws Exception { + return Integer.parseInt(nextStringValue(reader)); + } +} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java-- b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java-- new file mode 100644 index 000000000..fbb1e483e --- /dev/null +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java-- @@ -0,0 +1,128 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primeroutput; + +import java.io.BufferedReader; + +import org.qcmg.unused.record.Serializer; + +public class PrimerOutputHeaderSerializer extends + Serializer { + private final static String[] FIELDS = { + "SEQUENCE_ID", + "SEQUENCE_TEMPLATE", + "SEQUENCE_TARGET", + "PRIMER_PRODUCT_MIN_TM", + "PRIMER_PRODUCT_MAX_TM", + "PRIMER_DNA_CONC", + "PRIMER_SALT_CONC", + "PRIMER_MIN_TM", + "PRIMER_OPT_TM", + "PRIMER_MAX_TM", + "PRIMER_MIN_SIZE", + "PRIMER_OPT_SIZE", + "PRIMER_MAX_SIZE", + "PRIMER_PRODUCT_SIZE_RANGE", + "PRIMER_EXPLAIN_FLAG", + "PRIMER_NUM_RETURN", + "PRIMER_NUM_NS_ACCEPTED", + "PRIMER_LEFT_EXPLAIN", + "PRIMER_RIGHT_EXPLAIN", + "PRIMER_PAIR_EXPLAIN", + "PRIMER_LEFT_NUM_RETURNED", + "PRIMER_RIGHT_NUM_RETURNED", + "PRIMER_INTERNAL_NUM_RETURNED", + "PRIMER_PAIR_NUM_RETURNED"}; + + static { + assert(24 == FIELDS.length); + } + + @Override + public PrimerOutputHeader parseRecord(final BufferedReader reader) + throws Exception { + PrimerOutputHeader result = new PrimerOutputHeader(); + result.setSequenceId(nextStringValue(reader)); + result.setSequenceTemplate(nextStringValue(reader)); + result.setSequenceTarget(nextStringValue(reader)); + result.setProductMinTm(nextIntegerValue(reader)); + result.setProductMaxTm(nextIntegerValue(reader)); + result.setDnaConc(nextDoubleValue(reader)); + result.setSaltConc(nextDoubleValue(reader)); + result.setMinTm(nextIntegerValue(reader)); + result.setOptTm(nextIntegerValue(reader)); + result.setMaxTm(nextIntegerValue(reader)); + result.setMinSize(nextIntegerValue(reader)); + result.setOptSize(nextIntegerValue(reader)); + result.setMaxSize(nextIntegerValue(reader)); + result.setProductSizeRange(nextStringValue(reader)); + result.setExplainFlag(nextBooleanValue(reader)); + result.setNumReturn(nextIntegerValue(reader)); + result.setNumNsAccepted(nextIntegerValue(reader)); + result.setLeftExplain(nextStringValue(reader)); + result.setRightExplain(nextStringValue(reader)); + result.setPairExplain(nextStringValue(reader)); + result.setLeftNumReturned(nextIntegerValue(reader)); + result.setRightNumReturned(nextIntegerValue(reader)); + result.setInternalNumReturned(nextIntegerValue(reader)); + result.setPairNumReturned(nextIntegerValue(reader)); + return result; + } + + @Override + public String serialise(PrimerOutputHeader record) throws Exception { + String result = createLine(FIELDS[0], record.getSequenceId()); + result += FIELDS[1] + EQUALS + record.getSequenceTemplate() + NEWLINE; + result += FIELDS[2] + EQUALS + record.getSequenceTarget() + NEWLINE; + result += FIELDS[3] + EQUALS + record.getProductMinTm() + NEWLINE; + result += FIELDS[4] + EQUALS + record.getProductMaxTm() + NEWLINE; + result += FIELDS[5] + EQUALS + record.getDnaConc() + NEWLINE; + result += FIELDS[6] + EQUALS + record.getSaltConc() + NEWLINE; + result += FIELDS[7] + EQUALS + record.getMinTm() + NEWLINE; + result += FIELDS[8] + EQUALS + record.getOptTm() + NEWLINE; + result += FIELDS[9] + EQUALS + record.getMaxTm() + NEWLINE; + result += FIELDS[10] + EQUALS + record.getMinSize() + NEWLINE; + result += FIELDS[11] + EQUALS + record.getOptSize() + NEWLINE; + result += FIELDS[12] + EQUALS + record.getMaxSize() + NEWLINE; + result += FIELDS[13] + EQUALS + record.getProductSizeRange() + NEWLINE; + result += FIELDS[14] + EQUALS + record.isExplainFlag() + NEWLINE; + result += FIELDS[15] + EQUALS + record.getNumReturn() + NEWLINE; + result += FIELDS[16] + EQUALS + record.getNumNsAccepted() + NEWLINE; + result += FIELDS[17] + EQUALS + record.getLeftExplain() + NEWLINE; + result += FIELDS[18] + EQUALS + record.getRightExplain() + NEWLINE; + result += FIELDS[19] + EQUALS + record.getPairExplain() + NEWLINE; + result += FIELDS[20] + EQUALS + record.getLeftNumReturned() + NEWLINE; + result += FIELDS[21] + EQUALS + record.getRightNumReturned() + NEWLINE; + result += FIELDS[22] + EQUALS + record.getInternalNumReturned() + NEWLINE; + result += FIELDS[23] + EQUALS + record.getPairNumReturned() + NEWLINE; + return result; + } + + private String createLine(final String fieldName, final String fieldValue) { + return fieldName + EQUALS + fieldValue + NEWLINE; + } + + private double nextDoubleValue(BufferedReader reader) throws NumberFormatException, Exception { + return Double.parseDouble(nextStringValue(reader)); + } + + private boolean nextBooleanValue(BufferedReader reader) throws Exception { + return Boolean.parseBoolean(nextStringValue(reader)); + } + + private String nextStringValue(final BufferedReader reader) + throws Exception { + final String line = reader.readLine(); + final String[] params = equalsPattern.split(line); + if (2 != params.length) { + throw new Exception("Bad format. Insufficient columns: '" + line + + "'"); + } + return params[1].trim(); + } + + private int nextIntegerValue(final BufferedReader reader) throws Exception { + return Integer.parseInt(nextStringValue(reader)); + } +} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java new file mode 100644 index 000000000..f208e770b --- /dev/null +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java @@ -0,0 +1,517 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2013.10.25 at 10:52:22 AM EST +// + + +package org.qcmg.unused.primeroutput; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + *

Java class for primerOutputRecord complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="primerOutputRecord">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="pairPenalty" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="leftPenalty" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="rightPenalty" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="leftSequence" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="rightSequence" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="left" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="right" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="leftTm" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="rightTm" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="leftGcPercent" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="rightGcPercent" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="leftSelfAny" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="rightSelfAny" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="leftSelfEnd" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="rightSelfEnd" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="leftEndStability" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="rightEndStability" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="pairComplAny" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="pairComplEnd" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="pairProductSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="pairProductTm" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="pairProductTmOligoTmDiff" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="pairTOptA" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *       </sequence>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "primerOutputRecord", propOrder = { + "pairPenalty", + "leftPenalty", + "rightPenalty", + "leftSequence", + "rightSequence", + "left", + "right", + "leftTm", + "rightTm", + "leftGcPercent", + "rightGcPercent", + "leftSelfAny", + "rightSelfAny", + "leftSelfEnd", + "rightSelfEnd", + "leftEndStability", + "rightEndStability", + "pairComplAny", + "pairComplEnd", + "pairProductSize", + "pairProductTm", + "pairProductTmOligoTmDiff", + "pairTOptA" +}) +public class PrimerOutputRecord { + + protected double pairPenalty; + protected double leftPenalty; + protected double rightPenalty; + @XmlElement(required = true) + protected String leftSequence; + @XmlElement(required = true) + protected String rightSequence; + @XmlElement(required = true) + protected String left; + @XmlElement(required = true) + protected String right; + protected double leftTm; + protected double rightTm; + protected double leftGcPercent; + protected double rightGcPercent; + protected double leftSelfAny; + protected double rightSelfAny; + protected double leftSelfEnd; + protected double rightSelfEnd; + protected double leftEndStability; + protected double rightEndStability; + protected double pairComplAny; + protected double pairComplEnd; + protected int pairProductSize; + protected double pairProductTm; + protected double pairProductTmOligoTmDiff; + protected double pairTOptA; + + /** + * Gets the value of the pairPenalty property. + * + */ + public double getPairPenalty() { + return pairPenalty; + } + + /** + * Sets the value of the pairPenalty property. + * + */ + public void setPairPenalty(double value) { + this.pairPenalty = value; + } + + /** + * Gets the value of the leftPenalty property. + * + */ + public double getLeftPenalty() { + return leftPenalty; + } + + /** + * Sets the value of the leftPenalty property. + * + */ + public void setLeftPenalty(double value) { + this.leftPenalty = value; + } + + /** + * Gets the value of the rightPenalty property. + * + */ + public double getRightPenalty() { + return rightPenalty; + } + + /** + * Sets the value of the rightPenalty property. + * + */ + public void setRightPenalty(double value) { + this.rightPenalty = value; + } + + /** + * Gets the value of the leftSequence property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getLeftSequence() { + return leftSequence; + } + + /** + * Sets the value of the leftSequence property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setLeftSequence(String value) { + this.leftSequence = value; + } + + /** + * Gets the value of the rightSequence property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getRightSequence() { + return rightSequence; + } + + /** + * Sets the value of the rightSequence property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setRightSequence(String value) { + this.rightSequence = value; + } + + /** + * Gets the value of the left property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getLeft() { + return left; + } + + /** + * Sets the value of the left property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setLeft(String value) { + this.left = value; + } + + /** + * Gets the value of the right property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getRight() { + return right; + } + + /** + * Sets the value of the right property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setRight(String value) { + this.right = value; + } + + /** + * Gets the value of the leftTm property. + * + */ + public double getLeftTm() { + return leftTm; + } + + /** + * Sets the value of the leftTm property. + * + */ + public void setLeftTm(double value) { + this.leftTm = value; + } + + /** + * Gets the value of the rightTm property. + * + */ + public double getRightTm() { + return rightTm; + } + + /** + * Sets the value of the rightTm property. + * + */ + public void setRightTm(double value) { + this.rightTm = value; + } + + /** + * Gets the value of the leftGcPercent property. + * + */ + public double getLeftGcPercent() { + return leftGcPercent; + } + + /** + * Sets the value of the leftGcPercent property. + * + */ + public void setLeftGcPercent(double value) { + this.leftGcPercent = value; + } + + /** + * Gets the value of the rightGcPercent property. + * + */ + public double getRightGcPercent() { + return rightGcPercent; + } + + /** + * Sets the value of the rightGcPercent property. + * + */ + public void setRightGcPercent(double value) { + this.rightGcPercent = value; + } + + /** + * Gets the value of the leftSelfAny property. + * + */ + public double getLeftSelfAny() { + return leftSelfAny; + } + + /** + * Sets the value of the leftSelfAny property. + * + */ + public void setLeftSelfAny(double value) { + this.leftSelfAny = value; + } + + /** + * Gets the value of the rightSelfAny property. + * + */ + public double getRightSelfAny() { + return rightSelfAny; + } + + /** + * Sets the value of the rightSelfAny property. + * + */ + public void setRightSelfAny(double value) { + this.rightSelfAny = value; + } + + /** + * Gets the value of the leftSelfEnd property. + * + */ + public double getLeftSelfEnd() { + return leftSelfEnd; + } + + /** + * Sets the value of the leftSelfEnd property. + * + */ + public void setLeftSelfEnd(double value) { + this.leftSelfEnd = value; + } + + /** + * Gets the value of the rightSelfEnd property. + * + */ + public double getRightSelfEnd() { + return rightSelfEnd; + } + + /** + * Sets the value of the rightSelfEnd property. + * + */ + public void setRightSelfEnd(double value) { + this.rightSelfEnd = value; + } + + /** + * Gets the value of the leftEndStability property. + * + */ + public double getLeftEndStability() { + return leftEndStability; + } + + /** + * Sets the value of the leftEndStability property. + * + */ + public void setLeftEndStability(double value) { + this.leftEndStability = value; + } + + /** + * Gets the value of the rightEndStability property. + * + */ + public double getRightEndStability() { + return rightEndStability; + } + + /** + * Sets the value of the rightEndStability property. + * + */ + public void setRightEndStability(double value) { + this.rightEndStability = value; + } + + /** + * Gets the value of the pairComplAny property. + * + */ + public double getPairComplAny() { + return pairComplAny; + } + + /** + * Sets the value of the pairComplAny property. + * + */ + public void setPairComplAny(double value) { + this.pairComplAny = value; + } + + /** + * Gets the value of the pairComplEnd property. + * + */ + public double getPairComplEnd() { + return pairComplEnd; + } + + /** + * Sets the value of the pairComplEnd property. + * + */ + public void setPairComplEnd(double value) { + this.pairComplEnd = value; + } + + /** + * Gets the value of the pairProductSize property. + * + */ + public int getPairProductSize() { + return pairProductSize; + } + + /** + * Sets the value of the pairProductSize property. + * + */ + public void setPairProductSize(int value) { + this.pairProductSize = value; + } + + /** + * Gets the value of the pairProductTm property. + * + */ + public double getPairProductTm() { + return pairProductTm; + } + + /** + * Sets the value of the pairProductTm property. + * + */ + public void setPairProductTm(double value) { + this.pairProductTm = value; + } + + /** + * Gets the value of the pairProductTmOligoTmDiff property. + * + */ + public double getPairProductTmOligoTmDiff() { + return pairProductTmOligoTmDiff; + } + + /** + * Sets the value of the pairProductTmOligoTmDiff property. + * + */ + public void setPairProductTmOligoTmDiff(double value) { + this.pairProductTmOligoTmDiff = value; + } + + /** + * Gets the value of the pairTOptA property. + * + */ + public double getPairTOptA() { + return pairTOptA; + } + + /** + * Sets the value of the pairTOptA property. + * + */ + public void setPairTOptA(double value) { + this.pairTOptA = value; + } + +} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java-- b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java-- new file mode 100644 index 000000000..f208e770b --- /dev/null +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java-- @@ -0,0 +1,517 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2013.10.25 at 10:52:22 AM EST +// + + +package org.qcmg.unused.primeroutput; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + *

Java class for primerOutputRecord complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="primerOutputRecord">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="pairPenalty" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="leftPenalty" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="rightPenalty" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="leftSequence" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="rightSequence" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="left" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="right" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="leftTm" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="rightTm" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="leftGcPercent" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="rightGcPercent" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="leftSelfAny" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="rightSelfAny" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="leftSelfEnd" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="rightSelfEnd" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="leftEndStability" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="rightEndStability" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="pairComplAny" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="pairComplEnd" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="pairProductSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *         <element name="pairProductTm" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="pairProductTmOligoTmDiff" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *         <element name="pairTOptA" type="{http://www.w3.org/2001/XMLSchema}double"/>
+ *       </sequence>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "primerOutputRecord", propOrder = { + "pairPenalty", + "leftPenalty", + "rightPenalty", + "leftSequence", + "rightSequence", + "left", + "right", + "leftTm", + "rightTm", + "leftGcPercent", + "rightGcPercent", + "leftSelfAny", + "rightSelfAny", + "leftSelfEnd", + "rightSelfEnd", + "leftEndStability", + "rightEndStability", + "pairComplAny", + "pairComplEnd", + "pairProductSize", + "pairProductTm", + "pairProductTmOligoTmDiff", + "pairTOptA" +}) +public class PrimerOutputRecord { + + protected double pairPenalty; + protected double leftPenalty; + protected double rightPenalty; + @XmlElement(required = true) + protected String leftSequence; + @XmlElement(required = true) + protected String rightSequence; + @XmlElement(required = true) + protected String left; + @XmlElement(required = true) + protected String right; + protected double leftTm; + protected double rightTm; + protected double leftGcPercent; + protected double rightGcPercent; + protected double leftSelfAny; + protected double rightSelfAny; + protected double leftSelfEnd; + protected double rightSelfEnd; + protected double leftEndStability; + protected double rightEndStability; + protected double pairComplAny; + protected double pairComplEnd; + protected int pairProductSize; + protected double pairProductTm; + protected double pairProductTmOligoTmDiff; + protected double pairTOptA; + + /** + * Gets the value of the pairPenalty property. + * + */ + public double getPairPenalty() { + return pairPenalty; + } + + /** + * Sets the value of the pairPenalty property. + * + */ + public void setPairPenalty(double value) { + this.pairPenalty = value; + } + + /** + * Gets the value of the leftPenalty property. + * + */ + public double getLeftPenalty() { + return leftPenalty; + } + + /** + * Sets the value of the leftPenalty property. + * + */ + public void setLeftPenalty(double value) { + this.leftPenalty = value; + } + + /** + * Gets the value of the rightPenalty property. + * + */ + public double getRightPenalty() { + return rightPenalty; + } + + /** + * Sets the value of the rightPenalty property. + * + */ + public void setRightPenalty(double value) { + this.rightPenalty = value; + } + + /** + * Gets the value of the leftSequence property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getLeftSequence() { + return leftSequence; + } + + /** + * Sets the value of the leftSequence property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setLeftSequence(String value) { + this.leftSequence = value; + } + + /** + * Gets the value of the rightSequence property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getRightSequence() { + return rightSequence; + } + + /** + * Sets the value of the rightSequence property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setRightSequence(String value) { + this.rightSequence = value; + } + + /** + * Gets the value of the left property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getLeft() { + return left; + } + + /** + * Sets the value of the left property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setLeft(String value) { + this.left = value; + } + + /** + * Gets the value of the right property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getRight() { + return right; + } + + /** + * Sets the value of the right property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setRight(String value) { + this.right = value; + } + + /** + * Gets the value of the leftTm property. + * + */ + public double getLeftTm() { + return leftTm; + } + + /** + * Sets the value of the leftTm property. + * + */ + public void setLeftTm(double value) { + this.leftTm = value; + } + + /** + * Gets the value of the rightTm property. + * + */ + public double getRightTm() { + return rightTm; + } + + /** + * Sets the value of the rightTm property. + * + */ + public void setRightTm(double value) { + this.rightTm = value; + } + + /** + * Gets the value of the leftGcPercent property. + * + */ + public double getLeftGcPercent() { + return leftGcPercent; + } + + /** + * Sets the value of the leftGcPercent property. + * + */ + public void setLeftGcPercent(double value) { + this.leftGcPercent = value; + } + + /** + * Gets the value of the rightGcPercent property. + * + */ + public double getRightGcPercent() { + return rightGcPercent; + } + + /** + * Sets the value of the rightGcPercent property. + * + */ + public void setRightGcPercent(double value) { + this.rightGcPercent = value; + } + + /** + * Gets the value of the leftSelfAny property. + * + */ + public double getLeftSelfAny() { + return leftSelfAny; + } + + /** + * Sets the value of the leftSelfAny property. + * + */ + public void setLeftSelfAny(double value) { + this.leftSelfAny = value; + } + + /** + * Gets the value of the rightSelfAny property. + * + */ + public double getRightSelfAny() { + return rightSelfAny; + } + + /** + * Sets the value of the rightSelfAny property. + * + */ + public void setRightSelfAny(double value) { + this.rightSelfAny = value; + } + + /** + * Gets the value of the leftSelfEnd property. + * + */ + public double getLeftSelfEnd() { + return leftSelfEnd; + } + + /** + * Sets the value of the leftSelfEnd property. + * + */ + public void setLeftSelfEnd(double value) { + this.leftSelfEnd = value; + } + + /** + * Gets the value of the rightSelfEnd property. + * + */ + public double getRightSelfEnd() { + return rightSelfEnd; + } + + /** + * Sets the value of the rightSelfEnd property. + * + */ + public void setRightSelfEnd(double value) { + this.rightSelfEnd = value; + } + + /** + * Gets the value of the leftEndStability property. + * + */ + public double getLeftEndStability() { + return leftEndStability; + } + + /** + * Sets the value of the leftEndStability property. + * + */ + public void setLeftEndStability(double value) { + this.leftEndStability = value; + } + + /** + * Gets the value of the rightEndStability property. + * + */ + public double getRightEndStability() { + return rightEndStability; + } + + /** + * Sets the value of the rightEndStability property. + * + */ + public void setRightEndStability(double value) { + this.rightEndStability = value; + } + + /** + * Gets the value of the pairComplAny property. + * + */ + public double getPairComplAny() { + return pairComplAny; + } + + /** + * Sets the value of the pairComplAny property. + * + */ + public void setPairComplAny(double value) { + this.pairComplAny = value; + } + + /** + * Gets the value of the pairComplEnd property. + * + */ + public double getPairComplEnd() { + return pairComplEnd; + } + + /** + * Sets the value of the pairComplEnd property. + * + */ + public void setPairComplEnd(double value) { + this.pairComplEnd = value; + } + + /** + * Gets the value of the pairProductSize property. + * + */ + public int getPairProductSize() { + return pairProductSize; + } + + /** + * Sets the value of the pairProductSize property. + * + */ + public void setPairProductSize(int value) { + this.pairProductSize = value; + } + + /** + * Gets the value of the pairProductTm property. + * + */ + public double getPairProductTm() { + return pairProductTm; + } + + /** + * Sets the value of the pairProductTm property. + * + */ + public void setPairProductTm(double value) { + this.pairProductTm = value; + } + + /** + * Gets the value of the pairProductTmOligoTmDiff property. + * + */ + public double getPairProductTmOligoTmDiff() { + return pairProductTmOligoTmDiff; + } + + /** + * Sets the value of the pairProductTmOligoTmDiff property. + * + */ + public void setPairProductTmOligoTmDiff(double value) { + this.pairProductTmOligoTmDiff = value; + } + + /** + * Gets the value of the pairTOptA property. + * + */ + public double getPairTOptA() { + return pairTOptA; + } + + /** + * Sets the value of the pairTOptA property. + * + */ + public void setPairTOptA(double value) { + this.pairTOptA = value; + } + +} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java new file mode 100644 index 000000000..8d469a54b --- /dev/null +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java @@ -0,0 +1,136 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primeroutput; + +import java.io.BufferedReader; +import java.io.IOException; + +import org.qcmg.qmule.record.Serializer; + +public final class PrimerOutputRecordSerializer extends + Serializer { + private final static String[] FIELD_PREFIXES = { + "PRIMER_PAIR_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_PAIR_", + "PRIMER_PAIR_", + "PRIMER_PAIR_", + "PRIMER_PAIR_", + "PRIMER_PAIR_", + "PRIMER_PAIR_"}; + + private final static String[] FIELD_SUFFIXES = { + "_PENALTY", + "_PENALTY", + "_PENALTY", + "_SEQUENCE", + "_SEQUENCE", + "", + "", + "_TM", + "_TM", + "_GC_PERCENT", + "_GC_PERCENT", + "_SELF_ANY", + "_SELF_ANY", + "_SELF_END", + "_SELF_END", + "_END_STABILITY", + "_END_STABILITY", + "_COMPL_ANY", + "_COMPL_END", + "_PRODUCT_SIZE", + "_PRODUCT_TM", + "_PRODUCT_TM_OLIGO_TM_DIFF", + "_T_OPT_A"}; + + static { + assert(FIELD_PREFIXES.length == FIELD_SUFFIXES.length); + } + + public PrimerOutputRecord parseRecord(final BufferedReader reader) + throws Exception { + String nextLine = reader.readLine(); + if (null == nextLine) { + return null; + } + PrimerOutputRecord result = new PrimerOutputRecord(); + result.setPairPenalty(doubleValue(nextLine)); + result.setLeftPenalty(nextDoubleValue(reader)); + result.setRightPenalty(nextDoubleValue(reader)); + result.setLeftSequence(nextStringValue(reader)); + result.setRightSequence(nextStringValue(reader)); + result.setLeft(nextStringValue(reader)); + result.setRight(nextStringValue(reader)); + result.setLeftTm(nextDoubleValue(reader)); + result.setRightTm(nextDoubleValue(reader)); + result.setLeftGcPercent(nextDoubleValue(reader)); + result.setRightGcPercent(nextDoubleValue(reader)); + result.setLeftSelfAny(nextDoubleValue(reader)); + result.setRightSelfAny(nextDoubleValue(reader)); + result.setLeftSelfEnd(nextDoubleValue(reader)); + result.setRightSelfEnd(nextDoubleValue(reader)); + result.setLeftEndStability(nextDoubleValue(reader)); + result.setRightEndStability(nextDoubleValue(reader)); + result.setPairComplAny(nextDoubleValue(reader)); + result.setPairComplEnd(nextDoubleValue(reader)); + result.setPairProductSize(nextIntegerValue(reader)); + result.setPairProductTm(nextDoubleValue(reader)); + result.setPairProductTmOligoTmDiff(nextDoubleValue(reader)); + result.setPairTOptA(nextDoubleValue(reader)); + return result; + } + + public String serialise(final PrimerOutputRecord record) throws Exception { + String result = null; + return result; + } + + private double nextDoubleValue(BufferedReader reader) throws NumberFormatException, Exception { + return Double.parseDouble(nextStringValue(reader)); + } + + private double doubleValue(final String line) throws Exception { + final String[] params = equalsPattern.split(line); + if (2 != params.length) { + throw new Exception("Bad format. Insufficient columns: '" + line + + "'"); + } + return Double.parseDouble(params[1].trim()); + } + + private String nextStringValue(final BufferedReader reader) + throws Exception { + final String line = reader.readLine(); + return stringValue(line); + } + + private String stringValue(final String line) throws Exception { + final String[] params = equalsPattern.split(line); + if (2 != params.length) { + throw new Exception("Bad format. Insufficient columns: '" + line + + "'"); + } + return params[1].trim(); + } + + private int nextIntegerValue(final BufferedReader reader) throws Exception { + return Integer.parseInt(nextStringValue(reader)); + } +} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java-- b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java-- new file mode 100644 index 000000000..da0a4d385 --- /dev/null +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java-- @@ -0,0 +1,136 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.primeroutput; + +import java.io.BufferedReader; +import java.io.IOException; + +import org.qcmg.unused.record.Serializer; + +public final class PrimerOutputRecordSerializer extends + Serializer { + private final static String[] FIELD_PREFIXES = { + "PRIMER_PAIR_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_LEFT_", + "PRIMER_RIGHT_", + "PRIMER_PAIR_", + "PRIMER_PAIR_", + "PRIMER_PAIR_", + "PRIMER_PAIR_", + "PRIMER_PAIR_", + "PRIMER_PAIR_"}; + + private final static String[] FIELD_SUFFIXES = { + "_PENALTY", + "_PENALTY", + "_PENALTY", + "_SEQUENCE", + "_SEQUENCE", + "", + "", + "_TM", + "_TM", + "_GC_PERCENT", + "_GC_PERCENT", + "_SELF_ANY", + "_SELF_ANY", + "_SELF_END", + "_SELF_END", + "_END_STABILITY", + "_END_STABILITY", + "_COMPL_ANY", + "_COMPL_END", + "_PRODUCT_SIZE", + "_PRODUCT_TM", + "_PRODUCT_TM_OLIGO_TM_DIFF", + "_T_OPT_A"}; + + static { + assert(FIELD_PREFIXES.length == FIELD_SUFFIXES.length); + } + + public PrimerOutputRecord parseRecord(final BufferedReader reader) + throws Exception { + String nextLine = reader.readLine(); + if (null == nextLine) { + return null; + } + PrimerOutputRecord result = new PrimerOutputRecord(); + result.setPairPenalty(doubleValue(nextLine)); + result.setLeftPenalty(nextDoubleValue(reader)); + result.setRightPenalty(nextDoubleValue(reader)); + result.setLeftSequence(nextStringValue(reader)); + result.setRightSequence(nextStringValue(reader)); + result.setLeft(nextStringValue(reader)); + result.setRight(nextStringValue(reader)); + result.setLeftTm(nextDoubleValue(reader)); + result.setRightTm(nextDoubleValue(reader)); + result.setLeftGcPercent(nextDoubleValue(reader)); + result.setRightGcPercent(nextDoubleValue(reader)); + result.setLeftSelfAny(nextDoubleValue(reader)); + result.setRightSelfAny(nextDoubleValue(reader)); + result.setLeftSelfEnd(nextDoubleValue(reader)); + result.setRightSelfEnd(nextDoubleValue(reader)); + result.setLeftEndStability(nextDoubleValue(reader)); + result.setRightEndStability(nextDoubleValue(reader)); + result.setPairComplAny(nextDoubleValue(reader)); + result.setPairComplEnd(nextDoubleValue(reader)); + result.setPairProductSize(nextIntegerValue(reader)); + result.setPairProductTm(nextDoubleValue(reader)); + result.setPairProductTmOligoTmDiff(nextDoubleValue(reader)); + result.setPairTOptA(nextDoubleValue(reader)); + return result; + } + + public String serialise(final PrimerOutputRecord record) throws Exception { + String result = null; + return result; + } + + private double nextDoubleValue(BufferedReader reader) throws NumberFormatException, Exception { + return Double.parseDouble(nextStringValue(reader)); + } + + private double doubleValue(final String line) throws Exception { + final String[] params = equalsPattern.split(line); + if (2 != params.length) { + throw new Exception("Bad format. Insufficient columns: '" + line + + "'"); + } + return Double.parseDouble(params[1].trim()); + } + + private String nextStringValue(final BufferedReader reader) + throws Exception { + final String line = reader.readLine(); + return stringValue(line); + } + + private String stringValue(final String line) throws Exception { + final String[] params = equalsPattern.split(line); + if (2 != params.length) { + throw new Exception("Bad format. Insufficient columns: '" + line + + "'"); + } + return params[1].trim(); + } + + private int nextIntegerValue(final BufferedReader reader) throws Exception { + return Integer.parseInt(nextStringValue(reader)); + } +} diff --git a/qio/src/org/qcmg/unused/reader/AbstractReader.java b/qio/src/org/qcmg/unused/reader/AbstractReader.java new file mode 100644 index 000000000..fcf1af524 --- /dev/null +++ b/qio/src/org/qcmg/unused/reader/AbstractReader.java @@ -0,0 +1,40 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.reader; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +import org.qcmg.qmule.record.AbstractRecordIterator; +import org.qcmg.qmule.record.Record; +import org.qcmg.unused.exception.RecordIteratorException; + +public abstract class AbstractReader implements Reader, Iterable { + + protected final InputStream inputStream; + + public AbstractReader(final File file) throws IOException { + FileInputStream stream = new FileInputStream(file); + inputStream = stream; + } + + @Override + public Iterator iterator() { + try { + return getRecordIterator(); + } catch (Exception e) { + throw new RecordIteratorException(e); + } + } + + public abstract AbstractRecordIterator getRecordIterator() throws Exception; + + @Override + public void close() throws IOException { + inputStream.close(); + } +} diff --git a/qio/src/org/qcmg/unused/reader/AbstractReader.java-- b/qio/src/org/qcmg/unused/reader/AbstractReader.java-- new file mode 100644 index 000000000..9824af0cd --- /dev/null +++ b/qio/src/org/qcmg/unused/reader/AbstractReader.java-- @@ -0,0 +1,40 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.reader; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +import org.qcmg.exception.RecordIteratorException; +import org.qcmg.unused.record.AbstractRecordIterator; +import org.qcmg.unused.record.Record; + +public abstract class AbstractReader implements Reader, Iterable { + + protected final InputStream inputStream; + + public AbstractReader(final File file) throws IOException { + FileInputStream stream = new FileInputStream(file); + inputStream = stream; + } + + @Override + public Iterator iterator() { + try { + return getRecordIterator(); + } catch (Exception e) { + throw new RecordIteratorException(e); + } + } + + public abstract AbstractRecordIterator getRecordIterator() throws Exception; + + @Override + public void close() throws IOException { + inputStream.close(); + } +} diff --git a/qio/src/org/qcmg/unused/reader/ExtendedFileReader.java b/qio/src/org/qcmg/unused/reader/ExtendedFileReader.java new file mode 100644 index 000000000..3c96319ed --- /dev/null +++ b/qio/src/org/qcmg/unused/reader/ExtendedFileReader.java @@ -0,0 +1,41 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.reader; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStreamReader; +import java.util.Iterator; + +import org.qcmg.qmule.record.ExtendedRecordIterator; +import org.qcmg.qmule.record.Serializer; + +public abstract class ExtendedFileReader extends + FileReader { + private final Serializer headerSerializer; + private final HeaderType header; + + public ExtendedFileReader(final File file, + final Serializer recordSerializer, + final Serializer headerSerializer) throws Exception { + super(file, recordSerializer); + FileInputStream inputStream = new FileInputStream(file); + InputStreamReader inputStreamReader = new InputStreamReader(inputStream); + BufferedReader reader = new BufferedReader(inputStreamReader); + this.headerSerializer = headerSerializer; + header = headerSerializer.parseRecord(reader); + } + + public HeaderType getHeader() { + return header; + } + + @Override + public Iterator getIterator() throws Exception { + return new ExtendedRecordIterator( + getInputStream(), getSerializer(), headerSerializer); + } + +} diff --git a/qio/src/org/qcmg/unused/reader/ExtendedFileReader.java-- b/qio/src/org/qcmg/unused/reader/ExtendedFileReader.java-- new file mode 100644 index 000000000..546ba9e45 --- /dev/null +++ b/qio/src/org/qcmg/unused/reader/ExtendedFileReader.java-- @@ -0,0 +1,41 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.reader; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStreamReader; +import java.util.Iterator; + +import org.qcmg.unused.record.ExtendedRecordIterator; +import org.qcmg.unused.record.Serializer; + +public abstract class ExtendedFileReader extends + FileReader { + private final Serializer headerSerializer; + private final HeaderType header; + + public ExtendedFileReader(final File file, + final Serializer recordSerializer, + final Serializer headerSerializer) throws Exception { + super(file, recordSerializer); + FileInputStream inputStream = new FileInputStream(file); + InputStreamReader inputStreamReader = new InputStreamReader(inputStream); + BufferedReader reader = new BufferedReader(inputStreamReader); + this.headerSerializer = headerSerializer; + header = headerSerializer.parseRecord(reader); + } + + public HeaderType getHeader() { + return header; + } + + @Override + public Iterator getIterator() throws Exception { + return new ExtendedRecordIterator( + getInputStream(), getSerializer(), headerSerializer); + } + +} diff --git a/qio/src/org/qcmg/unused/reader/FileReader.java b/qio/src/org/qcmg/unused/reader/FileReader.java new file mode 100644 index 000000000..0cc4ae6ce --- /dev/null +++ b/qio/src/org/qcmg/unused/reader/FileReader.java @@ -0,0 +1,57 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.reader; + +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.Iterator; + +import org.qcmg.qmule.record.RecordIterator; +import org.qcmg.qmule.record.Serializer; + +public abstract class FileReader implements Closeable, + Iterable { + private final Serializer serializer; + private final File file; + private final FileInputStream inputStream; + + public FileReader(final File file, final Serializer serializer) + throws Exception { + this.file = file; + this.serializer = serializer; + inputStream = new FileInputStream(file); + } + + public Iterator iterator() { + try { + return getIterator(); + } catch (Exception e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + } + + public Iterator getIterator() throws Exception { + return new RecordIterator(inputStream, serializer); + } + + public void close() throws IOException { + inputStream.close(); + } + + public File getFile() { + return file; + } + + public FileInputStream getInputStream() { + return inputStream; + } + + public Serializer getSerializer() { + return serializer; + } + +} diff --git a/qio/src/org/qcmg/unused/reader/FileReader.java-- b/qio/src/org/qcmg/unused/reader/FileReader.java-- new file mode 100644 index 000000000..8651ed657 --- /dev/null +++ b/qio/src/org/qcmg/unused/reader/FileReader.java-- @@ -0,0 +1,57 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.reader; + +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.Iterator; + +import org.qcmg.unused.record.RecordIterator; +import org.qcmg.unused.record.Serializer; + +public abstract class FileReader implements Closeable, + Iterable { + private final Serializer serializer; + private final File file; + private final FileInputStream inputStream; + + public FileReader(final File file, final Serializer serializer) + throws Exception { + this.file = file; + this.serializer = serializer; + inputStream = new FileInputStream(file); + } + + public Iterator iterator() { + try { + return getIterator(); + } catch (Exception e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + } + + public Iterator getIterator() throws Exception { + return new RecordIterator(inputStream, serializer); + } + + public void close() throws IOException { + inputStream.close(); + } + + public File getFile() { + return file; + } + + public FileInputStream getInputStream() { + return inputStream; + } + + public Serializer getSerializer() { + return serializer; + } + +} diff --git a/qio/src/org/qcmg/unused/reader/Reader.java b/qio/src/org/qcmg/unused/reader/Reader.java new file mode 100644 index 000000000..5f7a95a30 --- /dev/null +++ b/qio/src/org/qcmg/unused/reader/Reader.java @@ -0,0 +1,10 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.reader; + +import java.io.Closeable; + +public interface Reader extends Closeable { + +} diff --git a/qio/src/org/qcmg/unused/reader/Reader.java-- b/qio/src/org/qcmg/unused/reader/Reader.java-- new file mode 100644 index 000000000..5f7a95a30 --- /dev/null +++ b/qio/src/org/qcmg/unused/reader/Reader.java-- @@ -0,0 +1,10 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.reader; + +import java.io.Closeable; + +public interface Reader extends Closeable { + +} diff --git a/qio/src/org/qcmg/unused/simple/SimpleFileReader.java b/qio/src/org/qcmg/unused/simple/SimpleFileReader.java new file mode 100644 index 000000000..e6839b3b5 --- /dev/null +++ b/qio/src/org/qcmg/unused/simple/SimpleFileReader.java @@ -0,0 +1,23 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.simple; + +import java.io.File; +import java.io.IOException; + +import org.qcmg.qmule.record.AbstractRecordIterator; +import org.qcmg.unused.reader.AbstractReader; + +public class SimpleFileReader extends AbstractReader { + + public SimpleFileReader(File file) throws IOException { + super(file); + } + + @Override + public AbstractRecordIterator getRecordIterator() throws Exception{ + return new SimpleRecordIterator(inputStream); + } + +} diff --git a/qio/src/org/qcmg/unused/simple/SimpleFileReader.java-- b/qio/src/org/qcmg/unused/simple/SimpleFileReader.java-- new file mode 100644 index 000000000..2a1d6ba91 --- /dev/null +++ b/qio/src/org/qcmg/unused/simple/SimpleFileReader.java-- @@ -0,0 +1,23 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.simple; + +import java.io.File; +import java.io.IOException; + +import org.qcmg.reader.AbstractReader; +import org.qcmg.unused.record.AbstractRecordIterator; + +public class SimpleFileReader extends AbstractReader { + + public SimpleFileReader(File file) throws IOException { + super(file); + } + + @Override + public AbstractRecordIterator getRecordIterator() throws Exception{ + return new SimpleRecordIterator(inputStream); + } + +} diff --git a/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java b/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java new file mode 100644 index 000000000..e384ce5e9 --- /dev/null +++ b/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java @@ -0,0 +1,26 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.simple; + +import java.io.InputStream; + +import org.qcmg.qmule.record.AbstractRecordIterator; + +public class SimpleRecordIterator extends AbstractRecordIterator { + + public SimpleRecordIterator(InputStream stream) throws Exception{ + super(stream); + } + + @Override + protected void readNext() throws Exception { +// try { + next = SimpleSerializer.nextRecord(reader); +// } catch (Exception ex) { +// next = null; +// throw ex; +// } + } + +} diff --git a/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java-- b/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java-- new file mode 100644 index 000000000..438c6695f --- /dev/null +++ b/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java-- @@ -0,0 +1,26 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.simple; + +import java.io.InputStream; + +import org.qcmg.unused.record.AbstractRecordIterator; + +public class SimpleRecordIterator extends AbstractRecordIterator { + + public SimpleRecordIterator(InputStream stream) throws Exception{ + super(stream); + } + + @Override + protected void readNext() throws Exception { +// try { + next = SimpleSerializer.nextRecord(reader); +// } catch (Exception ex) { +// next = null; +// throw ex; +// } + } + +} diff --git a/qio/src/org/qcmg/unused/simple/SimpleSerializer.java b/qio/src/org/qcmg/unused/simple/SimpleSerializer.java new file mode 100644 index 000000000..09f1f99a7 --- /dev/null +++ b/qio/src/org/qcmg/unused/simple/SimpleSerializer.java @@ -0,0 +1,57 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.simple; + +import java.io.BufferedReader; +import java.io.IOException; + +import org.qcmg.qmule.record.SimpleRecord; + +public final class SimpleSerializer { + + private static final String DEFAULT_ID_PREFIX = ">"; + private static final String DEFAULT_HEADER_PREFIX = "#"; + + private static String nextNonheaderLine(final BufferedReader reader) + throws IOException { + String line = reader.readLine(); + while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { + line = reader.readLine(); + } + return line; + } + + public static SimpleRecord nextRecord(final BufferedReader reader) throws Exception { + SimpleRecord result = null; + + String id = nextNonheaderLine(reader); + String sequence = reader.readLine(); +// return parseRecord(id, sequence); + if (null != id && null != sequence) { + result = parseRecord(id, sequence); + } + + return result; + } + + static String parseID(final String value) throws Exception { + if ( ! value.startsWith(DEFAULT_ID_PREFIX)) { + throw new Exception("Bad id format: " + value); + } + return value; + } + + static String parseSequence(final String sequence) throws Exception { + if (sequence.startsWith(DEFAULT_ID_PREFIX)) { + throw new Exception("Bad sequence format: " + sequence); + } + return sequence; + } + + static SimpleRecord parseRecord(final String id, final String sequence) + throws Exception { + return new SimpleRecord(parseID(id), parseSequence(sequence)); + } + +} diff --git a/qio/src/org/qcmg/unused/simple/SimpleSerializer.java-- b/qio/src/org/qcmg/unused/simple/SimpleSerializer.java-- new file mode 100644 index 000000000..07c4491b2 --- /dev/null +++ b/qio/src/org/qcmg/unused/simple/SimpleSerializer.java-- @@ -0,0 +1,57 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.simple; + +import java.io.BufferedReader; +import java.io.IOException; + +import org.qcmg.unused.record.SimpleRecord; + +public final class SimpleSerializer { + + private static final String DEFAULT_ID_PREFIX = ">"; + private static final String DEFAULT_HEADER_PREFIX = "#"; + + private static String nextNonheaderLine(final BufferedReader reader) + throws IOException { + String line = reader.readLine(); + while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { + line = reader.readLine(); + } + return line; + } + + public static SimpleRecord nextRecord(final BufferedReader reader) throws Exception { + SimpleRecord result = null; + + String id = nextNonheaderLine(reader); + String sequence = reader.readLine(); +// return parseRecord(id, sequence); + if (null != id && null != sequence) { + result = parseRecord(id, sequence); + } + + return result; + } + + static String parseID(final String value) throws Exception { + if ( ! value.startsWith(DEFAULT_ID_PREFIX)) { + throw new Exception("Bad id format: " + value); + } + return value; + } + + static String parseSequence(final String sequence) throws Exception { + if (sequence.startsWith(DEFAULT_ID_PREFIX)) { + throw new Exception("Bad sequence format: " + sequence); + } + return sequence; + } + + static SimpleRecord parseRecord(final String id, final String sequence) + throws Exception { + return new SimpleRecord(parseID(id), parseSequence(sequence)); + } + +} From 1d68169ad7adaffb0f443479c4c4c52a6c0543d4 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 18:44:47 +1000 Subject: [PATCH 15/73] mv package only used by qmule --- .../org/qcmg/qmule/gff3/GFF3FileReader.java | 44 ++++ .../org/qcmg/qmule/gff3/GFF3FileReader.java-- | 44 ++++ .../org/qcmg/qmule/gff3/GFF3FileWriter.java | 36 +++ .../org/qcmg/qmule/gff3/GFF3FileWriter.java-- | 36 +++ qio/src/org/qcmg/qmule/gff3/GFF3Record.java | 244 ++++++++++++++++++ qio/src/org/qcmg/qmule/gff3/GFF3Record.java-- | 244 ++++++++++++++++++ ...RecordChromosomeAndPositionComparator.java | 32 +++ ...cordChromosomeAndPositionComparator.java-- | 32 +++ .../qcmg/qmule/gff3/GFF3RecordIterator.java | 49 ++++ .../qcmg/qmule/gff3/GFF3RecordIterator.java-- | 49 ++++ .../gff3/GFF3RecordPositionComparator.java | 21 ++ .../gff3/GFF3RecordPositionComparator.java-- | 21 ++ .../org/qcmg/qmule/gff3/GFF3Serializer.java | 94 +++++++ .../org/qcmg/qmule/gff3/GFF3Serializer.java-- | 94 +++++++ .../qmule/record/AbstractRecordIterator.java | 47 ++++ .../record/AbstractRecordIterator.java-- | 47 ++++ .../qmule/record/ExtendedRecordIterator.java | 52 ++++ .../record/ExtendedRecordIterator.java-- | 52 ++++ qio/src/org/qcmg/qmule/record/Record.java | 8 + qio/src/org/qcmg/qmule/record/Record.java-- | 8 + .../org/qcmg/qmule/record/RecordIterator.java | 47 ++++ .../qcmg/qmule/record/RecordIterator.java-- | 47 ++++ qio/src/org/qcmg/qmule/record/Serializer.java | 39 +++ .../org/qcmg/qmule/record/Serializer.java-- | 39 +++ .../org/qcmg/qmule/record/SimpleRecord.java | 40 +++ .../org/qcmg/qmule/record/SimpleRecord.java-- | 40 +++ .../org/qcmg/qmule/tab/TabbedFileReader.java | 61 +++++ .../qcmg/qmule/tab/TabbedFileReader.java-- | 61 +++++ .../org/qcmg/qmule/tab/TabbedFileWriter.java | 46 ++++ .../qcmg/qmule/tab/TabbedFileWriter.java-- | 46 ++++ qio/src/org/qcmg/qmule/tab/TabbedHeader.java | 24 ++ .../org/qcmg/qmule/tab/TabbedHeader.java-- | 24 ++ qio/src/org/qcmg/qmule/tab/TabbedRecord.java | 19 ++ .../org/qcmg/qmule/tab/TabbedRecord.java-- | 19 ++ .../qcmg/qmule/tab/TabbedRecordIterator.java | 47 ++++ .../qmule/tab/TabbedRecordIterator.java-- | 47 ++++ .../org/qcmg/qmule/tab/TabbedSerializer.java | 51 ++++ .../qcmg/qmule/tab/TabbedSerializer.java-- | 51 ++++ 38 files changed, 2002 insertions(+) create mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3FileReader.java create mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3FileReader.java-- create mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3FileWriter.java create mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3FileWriter.java-- create mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3Record.java create mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3Record.java-- create mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3RecordChromosomeAndPositionComparator.java create mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3RecordChromosomeAndPositionComparator.java-- create mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3RecordIterator.java create mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3RecordIterator.java-- create mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3RecordPositionComparator.java create mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3RecordPositionComparator.java-- create mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3Serializer.java create mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3Serializer.java-- create mode 100644 qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java create mode 100644 qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java-- create mode 100644 qio/src/org/qcmg/qmule/record/ExtendedRecordIterator.java create mode 100644 qio/src/org/qcmg/qmule/record/ExtendedRecordIterator.java-- create mode 100644 qio/src/org/qcmg/qmule/record/Record.java create mode 100644 qio/src/org/qcmg/qmule/record/Record.java-- create mode 100644 qio/src/org/qcmg/qmule/record/RecordIterator.java create mode 100644 qio/src/org/qcmg/qmule/record/RecordIterator.java-- create mode 100644 qio/src/org/qcmg/qmule/record/Serializer.java create mode 100644 qio/src/org/qcmg/qmule/record/Serializer.java-- create mode 100644 qio/src/org/qcmg/qmule/record/SimpleRecord.java create mode 100644 qio/src/org/qcmg/qmule/record/SimpleRecord.java-- create mode 100644 qio/src/org/qcmg/qmule/tab/TabbedFileReader.java create mode 100644 qio/src/org/qcmg/qmule/tab/TabbedFileReader.java-- create mode 100644 qio/src/org/qcmg/qmule/tab/TabbedFileWriter.java create mode 100644 qio/src/org/qcmg/qmule/tab/TabbedFileWriter.java-- create mode 100644 qio/src/org/qcmg/qmule/tab/TabbedHeader.java create mode 100644 qio/src/org/qcmg/qmule/tab/TabbedHeader.java-- create mode 100644 qio/src/org/qcmg/qmule/tab/TabbedRecord.java create mode 100644 qio/src/org/qcmg/qmule/tab/TabbedRecord.java-- create mode 100644 qio/src/org/qcmg/qmule/tab/TabbedRecordIterator.java create mode 100644 qio/src/org/qcmg/qmule/tab/TabbedRecordIterator.java-- create mode 100644 qio/src/org/qcmg/qmule/tab/TabbedSerializer.java create mode 100644 qio/src/org/qcmg/qmule/tab/TabbedSerializer.java-- diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3FileReader.java b/qio/src/org/qcmg/qmule/gff3/GFF3FileReader.java new file mode 100644 index 000000000..c6a7b0ea2 --- /dev/null +++ b/qio/src/org/qcmg/qmule/gff3/GFF3FileReader.java @@ -0,0 +1,44 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.gff3; + +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +public final class GFF3FileReader implements Closeable, Iterable { + private final File file; + private final InputStream inputStream; + + public GFF3FileReader(final File file) throws FileNotFoundException { + this.file = file; + FileInputStream fileStream = new FileInputStream(file); + inputStream = fileStream; + } + + @Override + public Iterator iterator() { + return getRecordIterator(); + } + + public GFF3RecordIterator getRecordIterator() { + return new GFF3RecordIterator(inputStream); + } + + @Override + public void close() throws IOException { + inputStream.close(); + } + + public File getFile() { + return file; + } +} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3FileReader.java-- b/qio/src/org/qcmg/qmule/gff3/GFF3FileReader.java-- new file mode 100644 index 000000000..112ae5f4e --- /dev/null +++ b/qio/src/org/qcmg/qmule/gff3/GFF3FileReader.java-- @@ -0,0 +1,44 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.gff3; + +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +public final class GFF3FileReader implements Closeable, Iterable { + private final File file; + private final InputStream inputStream; + + public GFF3FileReader(final File file) throws FileNotFoundException { + this.file = file; + FileInputStream fileStream = new FileInputStream(file); + inputStream = fileStream; + } + + @Override + public Iterator iterator() { + return getRecordIterator(); + } + + public GFF3RecordIterator getRecordIterator() { + return new GFF3RecordIterator(inputStream); + } + + @Override + public void close() throws IOException { + inputStream.close(); + } + + public File getFile() { + return file; + } +} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3FileWriter.java b/qio/src/org/qcmg/qmule/gff3/GFF3FileWriter.java new file mode 100644 index 000000000..9a2adbada --- /dev/null +++ b/qio/src/org/qcmg/qmule/gff3/GFF3FileWriter.java @@ -0,0 +1,36 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.gff3; + +import java.io.Closeable; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +public final class GFF3FileWriter implements Closeable { + private final File file; + private final OutputStream outputStream; + + public GFF3FileWriter(final File file) throws FileNotFoundException { + this.file = file; + OutputStream stream = new FileOutputStream(file); + outputStream = stream; + } + + public void add(final GFF3Record record) throws IOException { + String encoded = GFF3Serializer.serialise(record) + "\n"; + outputStream.write(encoded.getBytes()); + outputStream.flush(); + } + + @Override + public void close() throws IOException { + } + + public File getFile() { + return file; + } +} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3FileWriter.java-- b/qio/src/org/qcmg/qmule/gff3/GFF3FileWriter.java-- new file mode 100644 index 000000000..672c4acdd --- /dev/null +++ b/qio/src/org/qcmg/qmule/gff3/GFF3FileWriter.java-- @@ -0,0 +1,36 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.gff3; + +import java.io.Closeable; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +public final class GFF3FileWriter implements Closeable { + private final File file; + private final OutputStream outputStream; + + public GFF3FileWriter(final File file) throws FileNotFoundException { + this.file = file; + OutputStream stream = new FileOutputStream(file); + outputStream = stream; + } + + public void add(final GFF3Record record) throws IOException { + String encoded = GFF3Serializer.serialise(record) + "\n"; + outputStream.write(encoded.getBytes()); + outputStream.flush(); + } + + @Override + public void close() throws IOException { + } + + public File getFile() { + return file; + } +} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3Record.java b/qio/src/org/qcmg/qmule/gff3/GFF3Record.java new file mode 100644 index 000000000..c102b061c --- /dev/null +++ b/qio/src/org/qcmg/qmule/gff3/GFF3Record.java @@ -0,0 +1,244 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.gff3; + + +public class GFF3Record { + + protected String seqId; + protected String source; + protected String type; + protected int start; + protected int end; + protected String score; + protected String strand; + protected String phase; + protected String attributes; + protected String rawData; + + /** + * Gets the value of the seqId property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSeqId() { + return seqId; + } + + /** + * Sets the value of the seqId property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSeqId(String value) { + this.seqId = value; + } + + /** + * Gets the value of the source property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSource() { + return source; + } + + /** + * Sets the value of the source property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSource(String value) { + this.source = value; + } + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + + /** + * Gets the value of the start property. + * + */ + public int getStart() { + return start; + } + + /** + * Sets the value of the start property. + * + */ + public void setStart(int value) { + this.start = value; + } + + /** + * Gets the value of the end property. + * + */ + public int getEnd() { + return end; + } + + /** + * Sets the value of the end property. + * + */ + public void setEnd(int value) { + this.end = value; + } + + /** + * Gets the value of the score property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getScore() { + return score; + } + + /** + * Sets the value of the score property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setScore(String value) { + this.score = value; + } + + /** + * Gets the value of the strand property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getStrand() { + return strand; + } + + /** + * Sets the value of the strand property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setStrand(String value) { + this.strand = value; + } + + /** + * Gets the value of the phase property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getPhase() { + return phase; + } + + /** + * Sets the value of the phase property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setPhase(String value) { + this.phase = value; + } + + /** + * Gets the value of the attributes property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getAttributes() { + return attributes; + } + + /** + * Sets the value of the attributes property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setAttributes(String value) { + this.attributes = value; + } + + /** + * Gets the value of the rawData property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getRawData() { + return rawData; + } + + /** + * Sets the value of the rawData property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setRawData(String value) { + this.rawData = value; + } + +} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3Record.java-- b/qio/src/org/qcmg/qmule/gff3/GFF3Record.java-- new file mode 100644 index 000000000..ebde722f6 --- /dev/null +++ b/qio/src/org/qcmg/qmule/gff3/GFF3Record.java-- @@ -0,0 +1,244 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.gff3; + + +public class GFF3Record { + + protected String seqId; + protected String source; + protected String type; + protected int start; + protected int end; + protected String score; + protected String strand; + protected String phase; + protected String attributes; + protected String rawData; + + /** + * Gets the value of the seqId property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSeqId() { + return seqId; + } + + /** + * Sets the value of the seqId property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSeqId(String value) { + this.seqId = value; + } + + /** + * Gets the value of the source property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSource() { + return source; + } + + /** + * Sets the value of the source property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSource(String value) { + this.source = value; + } + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + + /** + * Gets the value of the start property. + * + */ + public int getStart() { + return start; + } + + /** + * Sets the value of the start property. + * + */ + public void setStart(int value) { + this.start = value; + } + + /** + * Gets the value of the end property. + * + */ + public int getEnd() { + return end; + } + + /** + * Sets the value of the end property. + * + */ + public void setEnd(int value) { + this.end = value; + } + + /** + * Gets the value of the score property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getScore() { + return score; + } + + /** + * Sets the value of the score property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setScore(String value) { + this.score = value; + } + + /** + * Gets the value of the strand property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getStrand() { + return strand; + } + + /** + * Sets the value of the strand property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setStrand(String value) { + this.strand = value; + } + + /** + * Gets the value of the phase property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getPhase() { + return phase; + } + + /** + * Sets the value of the phase property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setPhase(String value) { + this.phase = value; + } + + /** + * Gets the value of the attributes property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getAttributes() { + return attributes; + } + + /** + * Sets the value of the attributes property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setAttributes(String value) { + this.attributes = value; + } + + /** + * Gets the value of the rawData property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getRawData() { + return rawData; + } + + /** + * Sets the value of the rawData property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setRawData(String value) { + this.rawData = value; + } + +} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3RecordChromosomeAndPositionComparator.java b/qio/src/org/qcmg/qmule/gff3/GFF3RecordChromosomeAndPositionComparator.java new file mode 100644 index 000000000..cfb36a747 --- /dev/null +++ b/qio/src/org/qcmg/qmule/gff3/GFF3RecordChromosomeAndPositionComparator.java @@ -0,0 +1,32 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.gff3; + +import java.util.Comparator; + +import org.qcmg.common.model.ReferenceNameComparator; + +public class GFF3RecordChromosomeAndPositionComparator implements + Comparator { + + private static final Comparator chrComp = new ReferenceNameComparator(); + + public int compare(GFF3Record recordA, GFF3Record recordB) { + + // first compare chromosome + int chrcompare = chrComp.compare(recordA.getSeqId(), recordB.getSeqId()); + + if (chrcompare != 0) return chrcompare; + + return compareStart(recordA, recordB) + compareEnd(recordA, recordB); + } + + public int compareStart(GFF3Record recordA, GFF3Record recordB) { + return recordA.getStart() - recordB.getStart(); + } + + public int compareEnd(GFF3Record recordA, GFF3Record recordB) { + return recordA.getEnd() - recordB.getEnd(); + } +} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3RecordChromosomeAndPositionComparator.java-- b/qio/src/org/qcmg/qmule/gff3/GFF3RecordChromosomeAndPositionComparator.java-- new file mode 100644 index 000000000..0efc4c6b2 --- /dev/null +++ b/qio/src/org/qcmg/qmule/gff3/GFF3RecordChromosomeAndPositionComparator.java-- @@ -0,0 +1,32 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.gff3; + +import java.util.Comparator; + +import org.qcmg.common.model.ReferenceNameComparator; + +public class GFF3RecordChromosomeAndPositionComparator implements + Comparator { + + private static final Comparator chrComp = new ReferenceNameComparator(); + + public int compare(GFF3Record recordA, GFF3Record recordB) { + + // first compare chromosome + int chrcompare = chrComp.compare(recordA.getSeqId(), recordB.getSeqId()); + + if (chrcompare != 0) return chrcompare; + + return compareStart(recordA, recordB) + compareEnd(recordA, recordB); + } + + public int compareStart(GFF3Record recordA, GFF3Record recordB) { + return recordA.getStart() - recordB.getStart(); + } + + public int compareEnd(GFF3Record recordA, GFF3Record recordB) { + return recordA.getEnd() - recordB.getEnd(); + } +} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3RecordIterator.java b/qio/src/org/qcmg/qmule/gff3/GFF3RecordIterator.java new file mode 100644 index 000000000..6246aaa61 --- /dev/null +++ b/qio/src/org/qcmg/qmule/gff3/GFF3RecordIterator.java @@ -0,0 +1,49 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.gff3; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.NoSuchElementException; + +public final class GFF3RecordIterator implements Iterator { + private final BufferedReader reader; + private GFF3Record next; + + public GFF3RecordIterator(final InputStream stream) { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public GFF3Record next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + GFF3Record result = next; + readNext(); + return result; + } + + private void readNext() { + try { + next = GFF3Serializer.nextRecord(reader); + } catch (NoSuchElementException e) { + throw e; + } catch (Exception ex) { + throw new RuntimeException(ex.getMessage(), ex); + } + } + + @SuppressWarnings("unchecked") + public void remove() { + throw new UnsupportedOperationException(); + } +} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3RecordIterator.java-- b/qio/src/org/qcmg/qmule/gff3/GFF3RecordIterator.java-- new file mode 100644 index 000000000..a98e4b73e --- /dev/null +++ b/qio/src/org/qcmg/qmule/gff3/GFF3RecordIterator.java-- @@ -0,0 +1,49 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.gff3; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.NoSuchElementException; + +public final class GFF3RecordIterator implements Iterator { + private final BufferedReader reader; + private GFF3Record next; + + public GFF3RecordIterator(final InputStream stream) { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public GFF3Record next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + GFF3Record result = next; + readNext(); + return result; + } + + private void readNext() { + try { + next = GFF3Serializer.nextRecord(reader); + } catch (NoSuchElementException e) { + throw e; + } catch (Exception ex) { + throw new RuntimeException(ex.getMessage(), ex); + } + } + + @SuppressWarnings("unchecked") + public void remove() { + throw new UnsupportedOperationException(); + } +} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3RecordPositionComparator.java b/qio/src/org/qcmg/qmule/gff3/GFF3RecordPositionComparator.java new file mode 100644 index 000000000..2a763621d --- /dev/null +++ b/qio/src/org/qcmg/qmule/gff3/GFF3RecordPositionComparator.java @@ -0,0 +1,21 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.gff3; + +import java.util.Comparator; + +public class GFF3RecordPositionComparator implements + Comparator { + public int compare(GFF3Record recordA, GFF3Record recordB) { + return compareStart(recordA, recordB) + compareEnd(recordA, recordB); + } + + public int compareStart(GFF3Record recordA, GFF3Record recordB) { + return recordA.getStart() - recordB.getStart(); + } + + public int compareEnd(GFF3Record recordA, GFF3Record recordB) { + return recordA.getEnd() - recordB.getEnd(); + } +} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3RecordPositionComparator.java-- b/qio/src/org/qcmg/qmule/gff3/GFF3RecordPositionComparator.java-- new file mode 100644 index 000000000..4f5548ceb --- /dev/null +++ b/qio/src/org/qcmg/qmule/gff3/GFF3RecordPositionComparator.java-- @@ -0,0 +1,21 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.gff3; + +import java.util.Comparator; + +public class GFF3RecordPositionComparator implements + Comparator { + public int compare(GFF3Record recordA, GFF3Record recordB) { + return compareStart(recordA, recordB) + compareEnd(recordA, recordB); + } + + public int compareStart(GFF3Record recordA, GFF3Record recordB) { + return recordA.getStart() - recordB.getStart(); + } + + public int compareEnd(GFF3Record recordA, GFF3Record recordB) { + return recordA.getEnd() - recordB.getEnd(); + } +} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3Serializer.java b/qio/src/org/qcmg/qmule/gff3/GFF3Serializer.java new file mode 100644 index 000000000..f10ca34e0 --- /dev/null +++ b/qio/src/org/qcmg/qmule/gff3/GFF3Serializer.java @@ -0,0 +1,94 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.gff3; + +import java.io.BufferedReader; +import java.io.IOException; + +import org.qcmg.common.util.Constants; +import org.qcmg.common.util.TabTokenizer; + +public final class GFF3Serializer { + private static final String DEFAULT_HEADER_PREFIX = "#"; + + private static String nextNonheaderLine(final BufferedReader reader) + throws IOException { + String line = reader.readLine(); + while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { + line = reader.readLine(); + } + return line; + } + + public static GFF3Record nextRecord(final BufferedReader reader) + throws Exception, IOException { + GFF3Record result = null; + try { + + String line = nextNonheaderLine(reader); + if (null != line) { + result = parseRecord(line); + } + } catch (IOException e) { + throw e; + } catch (Exception e) { + throw e; + } + return result; + } + + static GFF3Record parseRecord(final String line) throws Exception { + String[] params = TabTokenizer.tokenize(line); + if (8 > params.length) { + throw new Exception("Bad GFF3 format. Insufficient columns: '" + line + "'"); + } + GFF3Record result = new GFF3Record(); + result.setRawData(line); + result.setSeqId(params[0]); + result.setSource(params[1]); + result.setType(params[2]); + result.setStart(Integer.parseInt(params[3])); + result.setEnd(Integer.parseInt(params[4])); + result.setScore(params[5]); + result.setStrand(params[6]); + result.setPhase(params[7]); + if (8 < params.length) { + result.setAttributes(params[8]); + } + return result; + } + + public static GFF3Record duplicate(final GFF3Record record) { + GFF3Record result = new GFF3Record(); + result.setSeqId(record.getSeqId()); + result.setSource(record.getSource()); + result.setType(record.getType()); + result.setStart(record.getStart()); + result.setEnd(record.getEnd()); + result.setScore(record.getScore()); + result.setStrand(record.getStrand()); + result.setPhase(record.getPhase()); + result.setAttributes(record.getAttributes()); + return result; + } + + public static String serialise(final GFF3Record record) { + StringBuilder result = new StringBuilder(record.getSeqId()).append(Constants.TAB); + result.append(record.getSource()).append(Constants.TAB); + result.append(record.getType()).append(Constants.TAB); + result.append(record.getStart()).append(Constants.TAB); + result.append(record.getEnd()).append(Constants.TAB); + result.append(record.getScore()).append(Constants.TAB); + result.append(record.getStrand()).append(Constants.TAB); + result.append(record.getPhase()).append(Constants.TAB); + if (null != record.getAttributes()) { + result.append(record.getAttributes()); + } + return result.toString(); + } + +} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3Serializer.java-- b/qio/src/org/qcmg/qmule/gff3/GFF3Serializer.java-- new file mode 100644 index 000000000..a48cc461d --- /dev/null +++ b/qio/src/org/qcmg/qmule/gff3/GFF3Serializer.java-- @@ -0,0 +1,94 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.gff3; + +import java.io.BufferedReader; +import java.io.IOException; + +import org.qcmg.common.util.Constants; +import org.qcmg.common.util.TabTokenizer; + +public final class GFF3Serializer { + private static final String DEFAULT_HEADER_PREFIX = "#"; + + private static String nextNonheaderLine(final BufferedReader reader) + throws IOException { + String line = reader.readLine(); + while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { + line = reader.readLine(); + } + return line; + } + + public static GFF3Record nextRecord(final BufferedReader reader) + throws Exception, IOException { + GFF3Record result = null; + try { + + String line = nextNonheaderLine(reader); + if (null != line) { + result = parseRecord(line); + } + } catch (IOException e) { + throw e; + } catch (Exception e) { + throw e; + } + return result; + } + + static GFF3Record parseRecord(final String line) throws Exception { + String[] params = TabTokenizer.tokenize(line); + if (8 > params.length) { + throw new Exception("Bad GFF3 format. Insufficient columns: '" + line + "'"); + } + GFF3Record result = new GFF3Record(); + result.setRawData(line); + result.setSeqId(params[0]); + result.setSource(params[1]); + result.setType(params[2]); + result.setStart(Integer.parseInt(params[3])); + result.setEnd(Integer.parseInt(params[4])); + result.setScore(params[5]); + result.setStrand(params[6]); + result.setPhase(params[7]); + if (8 < params.length) { + result.setAttributes(params[8]); + } + return result; + } + + public static GFF3Record duplicate(final GFF3Record record) { + GFF3Record result = new GFF3Record(); + result.setSeqId(record.getSeqId()); + result.setSource(record.getSource()); + result.setType(record.getType()); + result.setStart(record.getStart()); + result.setEnd(record.getEnd()); + result.setScore(record.getScore()); + result.setStrand(record.getStrand()); + result.setPhase(record.getPhase()); + result.setAttributes(record.getAttributes()); + return result; + } + + public static String serialise(final GFF3Record record) { + StringBuilder result = new StringBuilder(record.getSeqId()).append(Constants.TAB); + result.append(record.getSource()).append(Constants.TAB); + result.append(record.getType()).append(Constants.TAB); + result.append(record.getStart()).append(Constants.TAB); + result.append(record.getEnd()).append(Constants.TAB); + result.append(record.getScore()).append(Constants.TAB); + result.append(record.getStrand()).append(Constants.TAB); + result.append(record.getPhase()).append(Constants.TAB); + if (null != record.getAttributes()) { + result.append(record.getAttributes()); + } + return result.toString(); + } + +} diff --git a/qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java b/qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java new file mode 100644 index 000000000..e84fff5ae --- /dev/null +++ b/qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java @@ -0,0 +1,47 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.record; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.concurrent.atomic.AtomicLong; + +import org.qcmg.unused.exception.RecordIteratorException; + +public abstract class AbstractRecordIterator implements Iterator { + + protected final BufferedReader reader; + private final AtomicLong counter; + protected Record next; + + public AbstractRecordIterator(final InputStream stream) throws Exception { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + counter = new AtomicLong(0); + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public Record next() { + counter.incrementAndGet(); + Record result = next; + try { + readNext(); + } catch (Exception e) { + throw new RecordIteratorException(e.getMessage() + " [Record count: " + counter.get() +"]", e); + } + return result; + } + + protected abstract void readNext() throws Exception; + + public void remove() { + } + +} diff --git a/qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java-- b/qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java-- new file mode 100644 index 000000000..7f100f601 --- /dev/null +++ b/qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java-- @@ -0,0 +1,47 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.record; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.concurrent.atomic.AtomicLong; + +import org.qcmg.exception.RecordIteratorException; + +public abstract class AbstractRecordIterator implements Iterator { + + protected final BufferedReader reader; + private final AtomicLong counter; + protected Record next; + + public AbstractRecordIterator(final InputStream stream) throws Exception { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + counter = new AtomicLong(0); + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public Record next() { + counter.incrementAndGet(); + Record result = next; + try { + readNext(); + } catch (Exception e) { + throw new RecordIteratorException(e.getMessage() + " [Record count: " + counter.get() +"]", e); + } + return result; + } + + protected abstract void readNext() throws Exception; + + public void remove() { + } + +} diff --git a/qio/src/org/qcmg/qmule/record/ExtendedRecordIterator.java b/qio/src/org/qcmg/qmule/record/ExtendedRecordIterator.java new file mode 100644 index 000000000..eea2f06f7 --- /dev/null +++ b/qio/src/org/qcmg/qmule/record/ExtendedRecordIterator.java @@ -0,0 +1,52 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.record; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.concurrent.atomic.AtomicLong; + +public final class ExtendedRecordIterator implements + Iterator { + private final Serializer serializer; + private final BufferedReader reader; + private final AtomicLong counter; + private RecordType next; + + public ExtendedRecordIterator(final InputStream stream, + final Serializer serializer, + final Serializer headerSerializer) throws Exception { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + counter = new AtomicLong(0); + this.serializer = serializer; + headerSerializer.nextRecord(reader); // skip header + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public RecordType next() { + counter.incrementAndGet(); + RecordType result = next; + try { + readNext(); + } catch (Exception e) { + throw new RuntimeException(e.getMessage() + " [Record count: " + + counter.get() + "]", e); + } + return result; + } + + private void readNext() throws Exception { + next = serializer.nextRecord(reader); + } + + public void remove() { + } +} diff --git a/qio/src/org/qcmg/qmule/record/ExtendedRecordIterator.java-- b/qio/src/org/qcmg/qmule/record/ExtendedRecordIterator.java-- new file mode 100644 index 000000000..be668b779 --- /dev/null +++ b/qio/src/org/qcmg/qmule/record/ExtendedRecordIterator.java-- @@ -0,0 +1,52 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.record; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.concurrent.atomic.AtomicLong; + +public final class ExtendedRecordIterator implements + Iterator { + private final Serializer serializer; + private final BufferedReader reader; + private final AtomicLong counter; + private RecordType next; + + public ExtendedRecordIterator(final InputStream stream, + final Serializer serializer, + final Serializer headerSerializer) throws Exception { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + counter = new AtomicLong(0); + this.serializer = serializer; + headerSerializer.nextRecord(reader); // skip header + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public RecordType next() { + counter.incrementAndGet(); + RecordType result = next; + try { + readNext(); + } catch (Exception e) { + throw new RuntimeException(e.getMessage() + " [Record count: " + + counter.get() + "]", e); + } + return result; + } + + private void readNext() throws Exception { + next = serializer.nextRecord(reader); + } + + public void remove() { + } +} diff --git a/qio/src/org/qcmg/qmule/record/Record.java b/qio/src/org/qcmg/qmule/record/Record.java new file mode 100644 index 000000000..df32154be --- /dev/null +++ b/qio/src/org/qcmg/qmule/record/Record.java @@ -0,0 +1,8 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.record; + +public interface Record { + public static final String TAB_DELIMITER = "\t"; +} diff --git a/qio/src/org/qcmg/qmule/record/Record.java-- b/qio/src/org/qcmg/qmule/record/Record.java-- new file mode 100644 index 000000000..60e2f037c --- /dev/null +++ b/qio/src/org/qcmg/qmule/record/Record.java-- @@ -0,0 +1,8 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.record; + +public interface Record { + public static final String TAB_DELIMITER = "\t"; +} diff --git a/qio/src/org/qcmg/qmule/record/RecordIterator.java b/qio/src/org/qcmg/qmule/record/RecordIterator.java new file mode 100644 index 000000000..02cb439ea --- /dev/null +++ b/qio/src/org/qcmg/qmule/record/RecordIterator.java @@ -0,0 +1,47 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.record; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.concurrent.atomic.AtomicLong; + +public final class RecordIterator implements Iterator { + private final Serializer serializer; + private final BufferedReader reader; + private final AtomicLong counter; + private RecordType next; + + public RecordIterator(final InputStream stream, final Serializer serializer) throws Exception { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + counter = new AtomicLong(0); + this.serializer = serializer; + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public RecordType next() { + counter.incrementAndGet(); + RecordType result = next; + try { + readNext(); + } catch (Exception e) { + throw new RuntimeException(e.getMessage() + " [Record count: " + counter.get() +"]", e); + } + return result; + } + + private void readNext() throws Exception { + next = serializer.nextRecord(reader); + } + + public void remove() { + } +} diff --git a/qio/src/org/qcmg/qmule/record/RecordIterator.java-- b/qio/src/org/qcmg/qmule/record/RecordIterator.java-- new file mode 100644 index 000000000..3c2f89503 --- /dev/null +++ b/qio/src/org/qcmg/qmule/record/RecordIterator.java-- @@ -0,0 +1,47 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.record; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.concurrent.atomic.AtomicLong; + +public final class RecordIterator implements Iterator { + private final Serializer serializer; + private final BufferedReader reader; + private final AtomicLong counter; + private RecordType next; + + public RecordIterator(final InputStream stream, final Serializer serializer) throws Exception { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + counter = new AtomicLong(0); + this.serializer = serializer; + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public RecordType next() { + counter.incrementAndGet(); + RecordType result = next; + try { + readNext(); + } catch (Exception e) { + throw new RuntimeException(e.getMessage() + " [Record count: " + counter.get() +"]", e); + } + return result; + } + + private void readNext() throws Exception { + next = serializer.nextRecord(reader); + } + + public void remove() { + } +} diff --git a/qio/src/org/qcmg/qmule/record/Serializer.java b/qio/src/org/qcmg/qmule/record/Serializer.java new file mode 100644 index 000000000..c0e427be4 --- /dev/null +++ b/qio/src/org/qcmg/qmule/record/Serializer.java @@ -0,0 +1,39 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.record; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.regex.Pattern; + +public abstract class Serializer { + public static final String HASH = "#"; + public static final String NEWLINE = "\n"; + public static final String EQUALS = "="; + public static final Pattern tabbedPattern = Pattern.compile("[\\t]+"); + public static final Pattern colonPattern = Pattern.compile("[:]+"); + public static final Pattern hyphenPattern = Pattern.compile("[-]+"); + public static final Pattern equalsPattern = Pattern.compile("[=]+"); + public static final Pattern commaPattern = Pattern.compile("[,]+"); + + public RecordType nextRecord(final BufferedReader reader) throws Exception { + RecordType result = null; + try { + result = parseRecord(reader); + } catch (IOException e) { + throw e; + } catch (Exception e) { + throw e; + } + return result; + } + + public abstract String serialise(final RecordType record) throws Exception; + + public abstract RecordType parseRecord(BufferedReader reader) + throws Exception; +} diff --git a/qio/src/org/qcmg/qmule/record/Serializer.java-- b/qio/src/org/qcmg/qmule/record/Serializer.java-- new file mode 100644 index 000000000..c0c5c2fc8 --- /dev/null +++ b/qio/src/org/qcmg/qmule/record/Serializer.java-- @@ -0,0 +1,39 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.record; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.regex.Pattern; + +public abstract class Serializer { + public static final String HASH = "#"; + public static final String NEWLINE = "\n"; + public static final String EQUALS = "="; + public static final Pattern tabbedPattern = Pattern.compile("[\\t]+"); + public static final Pattern colonPattern = Pattern.compile("[:]+"); + public static final Pattern hyphenPattern = Pattern.compile("[-]+"); + public static final Pattern equalsPattern = Pattern.compile("[=]+"); + public static final Pattern commaPattern = Pattern.compile("[,]+"); + + public RecordType nextRecord(final BufferedReader reader) throws Exception { + RecordType result = null; + try { + result = parseRecord(reader); + } catch (IOException e) { + throw e; + } catch (Exception e) { + throw e; + } + return result; + } + + public abstract String serialise(final RecordType record) throws Exception; + + public abstract RecordType parseRecord(BufferedReader reader) + throws Exception; +} diff --git a/qio/src/org/qcmg/qmule/record/SimpleRecord.java b/qio/src/org/qcmg/qmule/record/SimpleRecord.java new file mode 100644 index 000000000..80002e538 --- /dev/null +++ b/qio/src/org/qcmg/qmule/record/SimpleRecord.java @@ -0,0 +1,40 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.record; + +/** + * Simple data container class for records that have an id, and some data + *

+ * eg. the .csfasta format from SOLiD sequence alignment files. + * Each record is split over two lines. The first line starts with '>' followed by the ID, + * the subsequent line contains the colour space sequence + * + * @author oholmes + */ +public class SimpleRecord implements Record { + + private String id; + private String data; + + public SimpleRecord() {} + + public SimpleRecord(String id, String data) { + this.id = id; + this.data = data; + } + + public void setId(String id) { + this.id = id; + } + public String getId() { + return id; + } + + public void setData(String data) { + this.data = data; + } + public String getData() { + return data; + } +} diff --git a/qio/src/org/qcmg/qmule/record/SimpleRecord.java-- b/qio/src/org/qcmg/qmule/record/SimpleRecord.java-- new file mode 100644 index 000000000..5c1f75e56 --- /dev/null +++ b/qio/src/org/qcmg/qmule/record/SimpleRecord.java-- @@ -0,0 +1,40 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.record; + +/** + * Simple data container class for records that have an id, and some data + *

+ * eg. the .csfasta format from SOLiD sequence alignment files. + * Each record is split over two lines. The first line starts with '>' followed by the ID, + * the subsequent line contains the colour space sequence + * + * @author oholmes + */ +public class SimpleRecord implements Record { + + private String id; + private String data; + + public SimpleRecord() {} + + public SimpleRecord(String id, String data) { + this.id = id; + this.data = data; + } + + public void setId(String id) { + this.id = id; + } + public String getId() { + return id; + } + + public void setData(String data) { + this.data = data; + } + public String getData() { + return data; + } +} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedFileReader.java b/qio/src/org/qcmg/qmule/tab/TabbedFileReader.java new file mode 100644 index 000000000..42c282784 --- /dev/null +++ b/qio/src/org/qcmg/qmule/tab/TabbedFileReader.java @@ -0,0 +1,61 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.tab; + +import java.io.BufferedReader; +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.zip.GZIPInputStream; + +import org.qcmg.Utils.IOStreamUtils; +import org.qcmg.common.util.FileUtils; +import org.qcmg.vcf.VCFSerializer; + +public final class TabbedFileReader implements Closeable, Iterable { + private final File file; + private final InputStream inputStream; + private final TabbedHeader header; + + public TabbedFileReader(final File file) throws IOException { + this.file = file; + boolean isGzip = FileUtils.isInputGZip( file); + try(InputStream stream = (isGzip) ? new GZIPInputStream(new FileInputStream(file), 65536) : new FileInputStream(file);) { + BufferedReader in = new BufferedReader(new InputStreamReader(stream)); + header = TabbedSerializer.readHeader(in); + } + + // create a new stream rather a closed one + inputStream = (isGzip) ? new GZIPInputStream(new FileInputStream(file), 65536) : new FileInputStream(file); + } + + public TabbedHeader getHeader() { + return header; + } + + @Override + public Iterator iterator() { + return getRecordIterator(); + } + + public TabbedRecordIterator getRecordIterator() { + return new TabbedRecordIterator(inputStream); + } + + @Override + public void close() throws IOException { + inputStream.close(); + } + + public File getFile() { + return file; + } +} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedFileReader.java-- b/qio/src/org/qcmg/qmule/tab/TabbedFileReader.java-- new file mode 100644 index 000000000..d3f9f31d8 --- /dev/null +++ b/qio/src/org/qcmg/qmule/tab/TabbedFileReader.java-- @@ -0,0 +1,61 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.unused.tab; + +import java.io.BufferedReader; +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.zip.GZIPInputStream; + +import org.qcmg.Utils.IOStreamUtils; +import org.qcmg.common.util.FileUtils; +import org.qcmg.vcf.VCFSerializer; + +public final class TabbedFileReader implements Closeable, Iterable { + private final File file; + private final InputStream inputStream; + private final TabbedHeader header; + + public TabbedFileReader(final File file) throws IOException { + this.file = file; + boolean isGzip = FileUtils.isInputGZip( file); + try(InputStream stream = (isGzip) ? new GZIPInputStream(new FileInputStream(file), 65536) : new FileInputStream(file);) { + BufferedReader in = new BufferedReader(new InputStreamReader(stream)); + header = TabbedSerializer.readHeader(in); + } + + // create a new stream rather a closed one + inputStream = (isGzip) ? new GZIPInputStream(new FileInputStream(file), 65536) : new FileInputStream(file); + } + + public TabbedHeader getHeader() { + return header; + } + + @Override + public Iterator iterator() { + return getRecordIterator(); + } + + public TabbedRecordIterator getRecordIterator() { + return new TabbedRecordIterator(inputStream); + } + + @Override + public void close() throws IOException { + inputStream.close(); + } + + public File getFile() { + return file; + } +} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedFileWriter.java b/qio/src/org/qcmg/qmule/tab/TabbedFileWriter.java new file mode 100644 index 000000000..61d83e006 --- /dev/null +++ b/qio/src/org/qcmg/qmule/tab/TabbedFileWriter.java @@ -0,0 +1,46 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.tab; + +import java.io.Closeable; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +public final class TabbedFileWriter implements Closeable { + private final File file; + private final OutputStream outputStream; + + public TabbedFileWriter(final File file) throws Exception { + this.file = file; + OutputStream stream = new FileOutputStream(file); + outputStream = stream; + } + + public void addHeader(final TabbedHeader header) throws Exception { + for (String headerLine : header) { + String encoded = headerLine + "\n"; + outputStream.write(encoded.getBytes()); + } + outputStream.flush(); + } + + public void add(final TabbedRecord record) throws Exception { + String encoded = record.getData() + "\n"; + outputStream.write(encoded.getBytes()); + outputStream.flush(); + } + + @Override + public void close() throws IOException { + // flush anything outstanding and then close + outputStream.flush(); + outputStream.close(); + } + + public File getFile() { + return file; + } +} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedFileWriter.java-- b/qio/src/org/qcmg/qmule/tab/TabbedFileWriter.java-- new file mode 100644 index 000000000..ef8352ac4 --- /dev/null +++ b/qio/src/org/qcmg/qmule/tab/TabbedFileWriter.java-- @@ -0,0 +1,46 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.tab; + +import java.io.Closeable; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +public final class TabbedFileWriter implements Closeable { + private final File file; + private final OutputStream outputStream; + + public TabbedFileWriter(final File file) throws Exception { + this.file = file; + OutputStream stream = new FileOutputStream(file); + outputStream = stream; + } + + public void addHeader(final TabbedHeader header) throws Exception { + for (String headerLine : header) { + String encoded = headerLine + "\n"; + outputStream.write(encoded.getBytes()); + } + outputStream.flush(); + } + + public void add(final TabbedRecord record) throws Exception { + String encoded = record.getData() + "\n"; + outputStream.write(encoded.getBytes()); + outputStream.flush(); + } + + @Override + public void close() throws IOException { + // flush anything outstanding and then close + outputStream.flush(); + outputStream.close(); + } + + public File getFile() { + return file; + } +} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedHeader.java b/qio/src/org/qcmg/qmule/tab/TabbedHeader.java new file mode 100644 index 000000000..1c3fe0745 --- /dev/null +++ b/qio/src/org/qcmg/qmule/tab/TabbedHeader.java @@ -0,0 +1,24 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.tab; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +public final class TabbedHeader implements Iterable { + private final List records = new ArrayList<>(); + + public TabbedHeader(final List headerRecords) { + if (null != headerRecords) { + records.addAll(headerRecords); + } + } + + @Override + public Iterator iterator() { + return records.iterator(); + } + +} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedHeader.java-- b/qio/src/org/qcmg/qmule/tab/TabbedHeader.java-- new file mode 100644 index 000000000..28bd5406e --- /dev/null +++ b/qio/src/org/qcmg/qmule/tab/TabbedHeader.java-- @@ -0,0 +1,24 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.tab; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +public final class TabbedHeader implements Iterable { + private final List records = new ArrayList<>(); + + public TabbedHeader(final List headerRecords) { + if (null != headerRecords) { + records.addAll(headerRecords); + } + } + + @Override + public Iterator iterator() { + return records.iterator(); + } + +} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedRecord.java b/qio/src/org/qcmg/qmule/tab/TabbedRecord.java new file mode 100644 index 000000000..fd4b84c50 --- /dev/null +++ b/qio/src/org/qcmg/qmule/tab/TabbedRecord.java @@ -0,0 +1,19 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.tab; + + +public class TabbedRecord { + private String data; + + public String getData() { + return data; + } + public String[] getDataArray() { + return data.replace("\"", "").split("\t"); + } + public void setData(String data) { + this.data = data; + } +} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedRecord.java-- b/qio/src/org/qcmg/qmule/tab/TabbedRecord.java-- new file mode 100644 index 000000000..8cc7d08ac --- /dev/null +++ b/qio/src/org/qcmg/qmule/tab/TabbedRecord.java-- @@ -0,0 +1,19 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.tab; + + +public class TabbedRecord { + private String data; + + public String getData() { + return data; + } + public String[] getDataArray() { + return data.replace("\"", "").split("\t"); + } + public void setData(String data) { + this.data = data; + } +} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedRecordIterator.java b/qio/src/org/qcmg/qmule/tab/TabbedRecordIterator.java new file mode 100644 index 000000000..741cad0b9 --- /dev/null +++ b/qio/src/org/qcmg/qmule/tab/TabbedRecordIterator.java @@ -0,0 +1,47 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.tab; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.NoSuchElementException; + +public final class TabbedRecordIterator implements Iterator { + private final BufferedReader reader; + private TabbedRecord next; + + public TabbedRecordIterator(final InputStream stream) { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public TabbedRecord next() { + if (!hasNext()) throw new NoSuchElementException(); + + TabbedRecord result = next; + readNext(); + return result; + } + + private void readNext() { + try { + next = TabbedSerializer.nextRecord(reader); + } catch (NoSuchElementException e) { + throw e; + } catch (Exception ex) { + throw new RuntimeException(ex.getMessage(), ex); + } + } + + public void remove() { + throw new UnsupportedOperationException(); + } +} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedRecordIterator.java-- b/qio/src/org/qcmg/qmule/tab/TabbedRecordIterator.java-- new file mode 100644 index 000000000..b531e4112 --- /dev/null +++ b/qio/src/org/qcmg/qmule/tab/TabbedRecordIterator.java-- @@ -0,0 +1,47 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.tab; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.NoSuchElementException; + +public final class TabbedRecordIterator implements Iterator { + private final BufferedReader reader; + private TabbedRecord next; + + public TabbedRecordIterator(final InputStream stream) { + InputStreamReader streamReader = new InputStreamReader(stream); + reader = new BufferedReader(streamReader); + readNext(); + } + + public boolean hasNext() { + return null != next; + } + + public TabbedRecord next() { + if (!hasNext()) throw new NoSuchElementException(); + + TabbedRecord result = next; + readNext(); + return result; + } + + private void readNext() { + try { + next = TabbedSerializer.nextRecord(reader); + } catch (NoSuchElementException e) { + throw e; + } catch (Exception ex) { + throw new RuntimeException(ex.getMessage(), ex); + } + } + + public void remove() { + throw new UnsupportedOperationException(); + } +} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedSerializer.java b/qio/src/org/qcmg/qmule/tab/TabbedSerializer.java new file mode 100644 index 000000000..b89741e5a --- /dev/null +++ b/qio/src/org/qcmg/qmule/tab/TabbedSerializer.java @@ -0,0 +1,51 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.tab; + +import java.io.BufferedReader; + +import static org.qcmg.common.util.Constants.HASH_STRING; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public final class TabbedSerializer { + + public static TabbedHeader readHeader(final BufferedReader reader) throws IOException { + List headerLines = new ArrayList<>(); + String line = reader.readLine(); + while (null != line && line.startsWith(HASH_STRING)) { + headerLines.add(line); + line = reader.readLine(); + } + return new TabbedHeader(headerLines); + } + + private static String nextNonheaderLine(final BufferedReader reader) throws IOException { + String line = reader.readLine(); + while (null != line && line.startsWith(HASH_STRING)) { + line = reader.readLine(); + } + return line; + } + + public static TabbedRecord nextRecord(final BufferedReader reader) throws IOException { + TabbedRecord result = null; + String line = nextNonheaderLine(reader); + if (null != line) { + result = parseRecord(line); + } + return result; + } + + static TabbedRecord parseRecord(final String line) { + TabbedRecord result = new TabbedRecord(); + result.setData(line); + return result; + } +} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedSerializer.java-- b/qio/src/org/qcmg/qmule/tab/TabbedSerializer.java-- new file mode 100644 index 000000000..58a56fb21 --- /dev/null +++ b/qio/src/org/qcmg/qmule/tab/TabbedSerializer.java-- @@ -0,0 +1,51 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.unused.tab; + +import java.io.BufferedReader; + +import static org.qcmg.common.util.Constants.HASH_STRING; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public final class TabbedSerializer { + + public static TabbedHeader readHeader(final BufferedReader reader) throws IOException { + List headerLines = new ArrayList<>(); + String line = reader.readLine(); + while (null != line && line.startsWith(HASH_STRING)) { + headerLines.add(line); + line = reader.readLine(); + } + return new TabbedHeader(headerLines); + } + + private static String nextNonheaderLine(final BufferedReader reader) throws IOException { + String line = reader.readLine(); + while (null != line && line.startsWith(HASH_STRING)) { + line = reader.readLine(); + } + return line; + } + + public static TabbedRecord nextRecord(final BufferedReader reader) throws IOException { + TabbedRecord result = null; + String line = nextNonheaderLine(reader); + if (null != line) { + result = parseRecord(line); + } + return result; + } + + static TabbedRecord parseRecord(final String line) { + TabbedRecord result = new TabbedRecord(); + result.setData(line); + return result; + } +} From 885a4bb4ee50ad1391f63d12a859333644ce822a Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 18:56:20 +1000 Subject: [PATCH 16/73] mv qmule only package --- qio/src/org/qcmg/{ => qmule}/chrconv/ChrConvFileReader.java | 2 +- qio/src/org/qcmg/{ => qmule}/chrconv/ChrConvRecordIterator.java | 2 +- qio/src/org/qcmg/{ => qmule}/chrconv/ChrConvSerializer.java | 2 +- .../qcmg/{ => qmule}/chrconv/ChromosomeConversionRecord.java | 2 +- qio/src/org/qcmg/{ => qmule}/dbsnp/Dbsnp130Record.java | 2 +- qio/src/org/qcmg/{ => qmule}/dbsnp/DbsnpFileReader.java | 2 +- qio/src/org/qcmg/{ => qmule}/dbsnp/DbsnpRecordIterator.java | 2 +- qio/src/org/qcmg/{ => qmule}/dbsnp/DbsnpSerializer.java | 2 +- .../org/qcmg/{ => qmule}/germlinedb/GermlineDBFileReader.java | 2 +- .../org/qcmg/{ => qmule}/germlinedb/GermlineDBFileWriter.java | 2 +- qio/src/org/qcmg/{ => qmule}/germlinedb/GermlineDBRecord.java | 2 +- .../qcmg/{ => qmule}/germlinedb/GermlineDBRecordIterator.java | 2 +- .../org/qcmg/{ => qmule}/germlinedb/GermlineDBSerializer.java | 2 +- qio/src/org/qcmg/{ => qmule}/pileup/VerifiedSnpFileReader.java | 2 +- qio/src/org/qcmg/{ => qmule}/pileup/VerifiedSnpRecord.java | 2 +- .../org/qcmg/{ => qmule}/pileup/VerifiedSnpRecordIterator.java | 2 +- qio/src/org/qcmg/{ => qmule}/pileup/VerifiedSnpSerializer.java | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) rename qio/src/org/qcmg/{ => qmule}/chrconv/ChrConvFileReader.java (97%) rename qio/src/org/qcmg/{ => qmule}/chrconv/ChrConvRecordIterator.java (97%) rename qio/src/org/qcmg/{ => qmule}/chrconv/ChrConvSerializer.java (97%) rename qio/src/org/qcmg/{ => qmule}/chrconv/ChromosomeConversionRecord.java (97%) rename qio/src/org/qcmg/{ => qmule}/dbsnp/Dbsnp130Record.java (99%) rename qio/src/org/qcmg/{ => qmule}/dbsnp/DbsnpFileReader.java (97%) rename qio/src/org/qcmg/{ => qmule}/dbsnp/DbsnpRecordIterator.java (97%) rename qio/src/org/qcmg/{ => qmule}/dbsnp/DbsnpSerializer.java (97%) rename qio/src/org/qcmg/{ => qmule}/germlinedb/GermlineDBFileReader.java (98%) rename qio/src/org/qcmg/{ => qmule}/germlinedb/GermlineDBFileWriter.java (97%) rename qio/src/org/qcmg/{ => qmule}/germlinedb/GermlineDBRecord.java (96%) rename qio/src/org/qcmg/{ => qmule}/germlinedb/GermlineDBRecordIterator.java (97%) rename qio/src/org/qcmg/{ => qmule}/germlinedb/GermlineDBSerializer.java (97%) rename qio/src/org/qcmg/{ => qmule}/pileup/VerifiedSnpFileReader.java (97%) rename qio/src/org/qcmg/{ => qmule}/pileup/VerifiedSnpRecord.java (98%) rename qio/src/org/qcmg/{ => qmule}/pileup/VerifiedSnpRecordIterator.java (97%) rename qio/src/org/qcmg/{ => qmule}/pileup/VerifiedSnpSerializer.java (98%) diff --git a/qio/src/org/qcmg/chrconv/ChrConvFileReader.java b/qio/src/org/qcmg/qmule/chrconv/ChrConvFileReader.java similarity index 97% rename from qio/src/org/qcmg/chrconv/ChrConvFileReader.java rename to qio/src/org/qcmg/qmule/chrconv/ChrConvFileReader.java index 749eba7d6..523907105 100644 --- a/qio/src/org/qcmg/chrconv/ChrConvFileReader.java +++ b/qio/src/org/qcmg/qmule/chrconv/ChrConvFileReader.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.chrconv; +package org.qcmg.qmule.chrconv; import java.io.Closeable; import java.io.File; diff --git a/qio/src/org/qcmg/chrconv/ChrConvRecordIterator.java b/qio/src/org/qcmg/qmule/chrconv/ChrConvRecordIterator.java similarity index 97% rename from qio/src/org/qcmg/chrconv/ChrConvRecordIterator.java rename to qio/src/org/qcmg/qmule/chrconv/ChrConvRecordIterator.java index c7b0d624e..7cda497a6 100644 --- a/qio/src/org/qcmg/chrconv/ChrConvRecordIterator.java +++ b/qio/src/org/qcmg/qmule/chrconv/ChrConvRecordIterator.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.chrconv; +package org.qcmg.qmule.chrconv; import java.io.BufferedReader; import java.io.InputStream; diff --git a/qio/src/org/qcmg/chrconv/ChrConvSerializer.java b/qio/src/org/qcmg/qmule/chrconv/ChrConvSerializer.java similarity index 97% rename from qio/src/org/qcmg/chrconv/ChrConvSerializer.java rename to qio/src/org/qcmg/qmule/chrconv/ChrConvSerializer.java index 425f76cdc..f78664e50 100644 --- a/qio/src/org/qcmg/chrconv/ChrConvSerializer.java +++ b/qio/src/org/qcmg/qmule/chrconv/ChrConvSerializer.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.chrconv; +package org.qcmg.qmule.chrconv; import java.io.BufferedReader; import java.io.IOException; diff --git a/qio/src/org/qcmg/chrconv/ChromosomeConversionRecord.java b/qio/src/org/qcmg/qmule/chrconv/ChromosomeConversionRecord.java similarity index 97% rename from qio/src/org/qcmg/chrconv/ChromosomeConversionRecord.java rename to qio/src/org/qcmg/qmule/chrconv/ChromosomeConversionRecord.java index 1a4535fc0..245f14b52 100644 --- a/qio/src/org/qcmg/chrconv/ChromosomeConversionRecord.java +++ b/qio/src/org/qcmg/qmule/chrconv/ChromosomeConversionRecord.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.chrconv; +package org.qcmg.qmule.chrconv; public class ChromosomeConversionRecord { diff --git a/qio/src/org/qcmg/dbsnp/Dbsnp130Record.java b/qio/src/org/qcmg/qmule/dbsnp/Dbsnp130Record.java similarity index 99% rename from qio/src/org/qcmg/dbsnp/Dbsnp130Record.java rename to qio/src/org/qcmg/qmule/dbsnp/Dbsnp130Record.java index 09e19ee52..d2787e400 100644 --- a/qio/src/org/qcmg/dbsnp/Dbsnp130Record.java +++ b/qio/src/org/qcmg/qmule/dbsnp/Dbsnp130Record.java @@ -9,7 +9,7 @@ // -package org.qcmg.dbsnp; +package org.qcmg.qmule.dbsnp; diff --git a/qio/src/org/qcmg/dbsnp/DbsnpFileReader.java b/qio/src/org/qcmg/qmule/dbsnp/DbsnpFileReader.java similarity index 97% rename from qio/src/org/qcmg/dbsnp/DbsnpFileReader.java rename to qio/src/org/qcmg/qmule/dbsnp/DbsnpFileReader.java index 1974d3a80..33facf9fa 100644 --- a/qio/src/org/qcmg/dbsnp/DbsnpFileReader.java +++ b/qio/src/org/qcmg/qmule/dbsnp/DbsnpFileReader.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.dbsnp; +package org.qcmg.qmule.dbsnp; import java.io.Closeable; import java.io.File; diff --git a/qio/src/org/qcmg/dbsnp/DbsnpRecordIterator.java b/qio/src/org/qcmg/qmule/dbsnp/DbsnpRecordIterator.java similarity index 97% rename from qio/src/org/qcmg/dbsnp/DbsnpRecordIterator.java rename to qio/src/org/qcmg/qmule/dbsnp/DbsnpRecordIterator.java index 2bd3b84b5..6e8fe36f1 100644 --- a/qio/src/org/qcmg/dbsnp/DbsnpRecordIterator.java +++ b/qio/src/org/qcmg/qmule/dbsnp/DbsnpRecordIterator.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.dbsnp; +package org.qcmg.qmule.dbsnp; import java.io.BufferedReader; import java.io.InputStream; diff --git a/qio/src/org/qcmg/dbsnp/DbsnpSerializer.java b/qio/src/org/qcmg/qmule/dbsnp/DbsnpSerializer.java similarity index 97% rename from qio/src/org/qcmg/dbsnp/DbsnpSerializer.java rename to qio/src/org/qcmg/qmule/dbsnp/DbsnpSerializer.java index 3fb02eb72..a2b528ca8 100644 --- a/qio/src/org/qcmg/dbsnp/DbsnpSerializer.java +++ b/qio/src/org/qcmg/qmule/dbsnp/DbsnpSerializer.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.dbsnp; +package org.qcmg.qmule.dbsnp; import java.io.BufferedReader; import java.io.IOException; diff --git a/qio/src/org/qcmg/germlinedb/GermlineDBFileReader.java b/qio/src/org/qcmg/qmule/germlinedb/GermlineDBFileReader.java similarity index 98% rename from qio/src/org/qcmg/germlinedb/GermlineDBFileReader.java rename to qio/src/org/qcmg/qmule/germlinedb/GermlineDBFileReader.java index 0d187716e..063e6d10a 100644 --- a/qio/src/org/qcmg/germlinedb/GermlineDBFileReader.java +++ b/qio/src/org/qcmg/qmule/germlinedb/GermlineDBFileReader.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.germlinedb; +package org.qcmg.qmule.germlinedb; import java.io.Closeable; import java.io.File; diff --git a/qio/src/org/qcmg/germlinedb/GermlineDBFileWriter.java b/qio/src/org/qcmg/qmule/germlinedb/GermlineDBFileWriter.java similarity index 97% rename from qio/src/org/qcmg/germlinedb/GermlineDBFileWriter.java rename to qio/src/org/qcmg/qmule/germlinedb/GermlineDBFileWriter.java index 6f3baac9f..92cf426c9 100644 --- a/qio/src/org/qcmg/germlinedb/GermlineDBFileWriter.java +++ b/qio/src/org/qcmg/qmule/germlinedb/GermlineDBFileWriter.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.germlinedb; +package org.qcmg.qmule.germlinedb; import java.io.Closeable; import java.io.File; diff --git a/qio/src/org/qcmg/germlinedb/GermlineDBRecord.java b/qio/src/org/qcmg/qmule/germlinedb/GermlineDBRecord.java similarity index 96% rename from qio/src/org/qcmg/germlinedb/GermlineDBRecord.java rename to qio/src/org/qcmg/qmule/germlinedb/GermlineDBRecord.java index 43eb003e5..d32793c49 100644 --- a/qio/src/org/qcmg/germlinedb/GermlineDBRecord.java +++ b/qio/src/org/qcmg/qmule/germlinedb/GermlineDBRecord.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.germlinedb; +package org.qcmg.qmule.germlinedb; public class GermlineDBRecord { diff --git a/qio/src/org/qcmg/germlinedb/GermlineDBRecordIterator.java b/qio/src/org/qcmg/qmule/germlinedb/GermlineDBRecordIterator.java similarity index 97% rename from qio/src/org/qcmg/germlinedb/GermlineDBRecordIterator.java rename to qio/src/org/qcmg/qmule/germlinedb/GermlineDBRecordIterator.java index c0dc029e1..b603331c2 100644 --- a/qio/src/org/qcmg/germlinedb/GermlineDBRecordIterator.java +++ b/qio/src/org/qcmg/qmule/germlinedb/GermlineDBRecordIterator.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.germlinedb; +package org.qcmg.qmule.germlinedb; import java.io.BufferedReader; import java.io.InputStream; diff --git a/qio/src/org/qcmg/germlinedb/GermlineDBSerializer.java b/qio/src/org/qcmg/qmule/germlinedb/GermlineDBSerializer.java similarity index 97% rename from qio/src/org/qcmg/germlinedb/GermlineDBSerializer.java rename to qio/src/org/qcmg/qmule/germlinedb/GermlineDBSerializer.java index 1bc4d911f..28b0bc647 100644 --- a/qio/src/org/qcmg/germlinedb/GermlineDBSerializer.java +++ b/qio/src/org/qcmg/qmule/germlinedb/GermlineDBSerializer.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.germlinedb; +package org.qcmg.qmule.germlinedb; import java.io.BufferedReader; import java.io.IOException; diff --git a/qio/src/org/qcmg/pileup/VerifiedSnpFileReader.java b/qio/src/org/qcmg/qmule/pileup/VerifiedSnpFileReader.java similarity index 97% rename from qio/src/org/qcmg/pileup/VerifiedSnpFileReader.java rename to qio/src/org/qcmg/qmule/pileup/VerifiedSnpFileReader.java index 97d8d824a..72e23f230 100644 --- a/qio/src/org/qcmg/pileup/VerifiedSnpFileReader.java +++ b/qio/src/org/qcmg/qmule/pileup/VerifiedSnpFileReader.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.pileup; +package org.qcmg.qmule.pileup; import java.io.Closeable; import java.io.File; diff --git a/qio/src/org/qcmg/pileup/VerifiedSnpRecord.java b/qio/src/org/qcmg/qmule/pileup/VerifiedSnpRecord.java similarity index 98% rename from qio/src/org/qcmg/pileup/VerifiedSnpRecord.java rename to qio/src/org/qcmg/qmule/pileup/VerifiedSnpRecord.java index 05890eccc..d10a126ba 100644 --- a/qio/src/org/qcmg/pileup/VerifiedSnpRecord.java +++ b/qio/src/org/qcmg/qmule/pileup/VerifiedSnpRecord.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.pileup; +package org.qcmg.qmule.pileup; import org.qcmg.common.model.Classification; diff --git a/qio/src/org/qcmg/pileup/VerifiedSnpRecordIterator.java b/qio/src/org/qcmg/qmule/pileup/VerifiedSnpRecordIterator.java similarity index 97% rename from qio/src/org/qcmg/pileup/VerifiedSnpRecordIterator.java rename to qio/src/org/qcmg/qmule/pileup/VerifiedSnpRecordIterator.java index 5a8a2d435..67b20f0dd 100644 --- a/qio/src/org/qcmg/pileup/VerifiedSnpRecordIterator.java +++ b/qio/src/org/qcmg/qmule/pileup/VerifiedSnpRecordIterator.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.pileup; +package org.qcmg.qmule.pileup; import java.io.BufferedReader; import java.io.InputStream; diff --git a/qio/src/org/qcmg/pileup/VerifiedSnpSerializer.java b/qio/src/org/qcmg/qmule/pileup/VerifiedSnpSerializer.java similarity index 98% rename from qio/src/org/qcmg/pileup/VerifiedSnpSerializer.java rename to qio/src/org/qcmg/qmule/pileup/VerifiedSnpSerializer.java index 230f20f22..5c4d07ae9 100644 --- a/qio/src/org/qcmg/pileup/VerifiedSnpSerializer.java +++ b/qio/src/org/qcmg/qmule/pileup/VerifiedSnpSerializer.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.pileup; +package org.qcmg.qmule.pileup; import java.io.BufferedReader; import java.io.IOException; From 2e5187bfe65ce334a8468aba67ff06bfcb45f9e1 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 18:59:15 +1000 Subject: [PATCH 17/73] delete qmule only package --- qio/src/org/qcmg/string/StringFileReader.java | 98 ------------------- qio/src/org/qcmg/string/StringFileWriter.java | 46 --------- qio/src/org/qcmg/string/StringHeader.java | 24 ----- .../org/qcmg/string/StringRecordIterator.java | 48 --------- qio/src/org/qcmg/string/StringSerializer.java | 37 ------- qio/src/org/qcmg/tab/TabbedFileReader.java | 61 ------------ qio/src/org/qcmg/tab/TabbedFileWriter.java | 46 --------- qio/src/org/qcmg/tab/TabbedHeader.java | 24 ----- qio/src/org/qcmg/tab/TabbedRecord.java | 19 ---- .../org/qcmg/tab/TabbedRecordIterator.java | 47 --------- qio/src/org/qcmg/tab/TabbedSerializer.java | 51 ---------- 11 files changed, 501 deletions(-) delete mode 100644 qio/src/org/qcmg/string/StringFileReader.java delete mode 100644 qio/src/org/qcmg/string/StringFileWriter.java delete mode 100644 qio/src/org/qcmg/string/StringHeader.java delete mode 100644 qio/src/org/qcmg/string/StringRecordIterator.java delete mode 100644 qio/src/org/qcmg/string/StringSerializer.java delete mode 100644 qio/src/org/qcmg/tab/TabbedFileReader.java delete mode 100644 qio/src/org/qcmg/tab/TabbedFileWriter.java delete mode 100644 qio/src/org/qcmg/tab/TabbedHeader.java delete mode 100644 qio/src/org/qcmg/tab/TabbedRecord.java delete mode 100644 qio/src/org/qcmg/tab/TabbedRecordIterator.java delete mode 100644 qio/src/org/qcmg/tab/TabbedSerializer.java diff --git a/qio/src/org/qcmg/string/StringFileReader.java b/qio/src/org/qcmg/string/StringFileReader.java deleted file mode 100644 index 509defff8..000000000 --- a/qio/src/org/qcmg/string/StringFileReader.java +++ /dev/null @@ -1,98 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.string; - -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.Closeable; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.nio.file.Files; -import java.util.Iterator; -import java.util.zip.GZIPInputStream; - -import org.qcmg.common.util.Constants; -import org.qcmg.common.util.FileUtils; - -public final class StringFileReader implements Closeable, Iterable { - private final File file; - private final InputStream inputStream; - private final InputStream fin; - private final BufferedInputStream in; - private final StringHeader header; - private final CharSequence headerDiscriminator; - public static final int DEFAULT_BUFFER_SIZE = 65536; - - public StringFileReader(final File file, int bufferSize) throws IOException { - this(file, bufferSize, Constants.HASH_STRING); - } - public StringFileReader(final File file, int bufferSize, CharSequence headerDiscriminator) throws IOException { - this.file = file; - this.headerDiscriminator = headerDiscriminator; - boolean isGzip = FileUtils.isInputGZip( file); - - try (InputStream fin = Files.newInputStream(file.toPath()); - BufferedInputStream in = new BufferedInputStream(fin, DEFAULT_BUFFER_SIZE); - InputStream stream = (isGzip) ? new GZIPInputStream(in, bufferSize) : in; - BufferedReader bin = new BufferedReader(new InputStreamReader(stream)); ) { - - header = StringSerializer.readHeader(bin, headerDiscriminator.toString()); - } - - // create a new stream rather a closed one - fin = Files.newInputStream(file.toPath()); - in = new BufferedInputStream(fin, DEFAULT_BUFFER_SIZE); - inputStream = (isGzip) ? new GZIPInputStream(in, bufferSize) : in; - } - - public StringFileReader(final File file) throws IOException { - this(file, DEFAULT_BUFFER_SIZE); - } - - public StringHeader getHeader() { - return header; - } - - @Override - public Iterator iterator() { - return getRecordIterator(); - } - - public StringRecordIterator getRecordIterator() { - return new StringRecordIterator(inputStream, headerDiscriminator.toString()); - } - - @Override - public void close() throws IOException { - try { - inputStream.close(); - } catch (IOException ioe) { - // TODO Auto-generated catch block - ioe.printStackTrace(); - } finally { - try { - in.close(); - } catch (Exception e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } finally { - try { - fin.close(); - } catch (Exception e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - } - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/string/StringFileWriter.java b/qio/src/org/qcmg/string/StringFileWriter.java deleted file mode 100644 index b865f68f2..000000000 --- a/qio/src/org/qcmg/string/StringFileWriter.java +++ /dev/null @@ -1,46 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.string; - -import java.io.Closeable; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -public final class StringFileWriter implements Closeable { - private final File file; - private final OutputStream outputStream; - - public StringFileWriter(final File file) throws Exception { - this.file = file; - OutputStream stream = new FileOutputStream(file); - outputStream = stream; - } - - public void addHeader(final StringHeader header) throws Exception { - for (String headerLine : header) { - String encoded = headerLine + "\n"; - outputStream.write(encoded.getBytes()); - } - outputStream.flush(); - } - - public void add(final String record) throws Exception { - String encoded = record + "\n"; - outputStream.write(encoded.getBytes()); - outputStream.flush(); - } - - @Override - public void close() throws IOException { - // flush anything outstanding and then close - outputStream.flush(); - outputStream.close(); - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/string/StringHeader.java b/qio/src/org/qcmg/string/StringHeader.java deleted file mode 100644 index 1a2a53d9e..000000000 --- a/qio/src/org/qcmg/string/StringHeader.java +++ /dev/null @@ -1,24 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.string; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -public final class StringHeader implements Iterable { - private final List records = new ArrayList<>(); - - public StringHeader(final List headerRecords) { - if (null != headerRecords) { - records.addAll(headerRecords); - } - } - - @Override - public Iterator iterator() { - return records.iterator(); - } - -} diff --git a/qio/src/org/qcmg/string/StringRecordIterator.java b/qio/src/org/qcmg/string/StringRecordIterator.java deleted file mode 100644 index ed13264dc..000000000 --- a/qio/src/org/qcmg/string/StringRecordIterator.java +++ /dev/null @@ -1,48 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.string; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.NoSuchElementException; - -public final class StringRecordIterator implements Iterator { - private final BufferedReader reader; - private String next; - private final String headerDiscriminator; - - public StringRecordIterator(final InputStream stream, String headerDiscriminator) { - this.headerDiscriminator = headerDiscriminator; - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public String next() { - if (!hasNext()) throw new NoSuchElementException(); - String result = next; - readNext(); - return result; - } - - private void readNext() { - try { - next = StringSerializer.nextRecord(reader, headerDiscriminator); - } catch (NoSuchElementException e) { - throw e; - } catch (Exception ex) { - throw new RuntimeException(ex.getMessage(), ex); - } - } - - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/qio/src/org/qcmg/string/StringSerializer.java b/qio/src/org/qcmg/string/StringSerializer.java deleted file mode 100644 index 1b69948dc..000000000 --- a/qio/src/org/qcmg/string/StringSerializer.java +++ /dev/null @@ -1,37 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.string; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -public final class StringSerializer { - - public static StringHeader readHeader(final BufferedReader reader, String headerDiscriminator) throws IOException { - List headerLines = new ArrayList<>(); - String line = reader.readLine(); - while (null != line && line.startsWith(headerDiscriminator)) { - headerLines.add(line); - line = reader.readLine(); - } - return new StringHeader(headerLines); - } - - private static String nextNonheaderLine(final BufferedReader reader, String headerDiscriminator) throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(headerDiscriminator)) { - line = reader.readLine(); - } - return line; - } - - public static String nextRecord(final BufferedReader reader, String headerDiscriminator) throws IOException { - return nextNonheaderLine(reader, headerDiscriminator); - } -} diff --git a/qio/src/org/qcmg/tab/TabbedFileReader.java b/qio/src/org/qcmg/tab/TabbedFileReader.java deleted file mode 100644 index dc2406af0..000000000 --- a/qio/src/org/qcmg/tab/TabbedFileReader.java +++ /dev/null @@ -1,61 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.tab; - -import java.io.BufferedReader; -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.zip.GZIPInputStream; - -import org.qcmg.Utils.IOStreamUtils; -import org.qcmg.common.util.FileUtils; -import org.qcmg.vcf.VCFSerializer; - -public final class TabbedFileReader implements Closeable, Iterable { - private final File file; - private final InputStream inputStream; - private final TabbedHeader header; - - public TabbedFileReader(final File file) throws IOException { - this.file = file; - boolean isGzip = FileUtils.isInputGZip( file); - try(InputStream stream = (isGzip) ? new GZIPInputStream(new FileInputStream(file), 65536) : new FileInputStream(file);) { - BufferedReader in = new BufferedReader(new InputStreamReader(stream)); - header = TabbedSerializer.readHeader(in); - } - - // create a new stream rather a closed one - inputStream = (isGzip) ? new GZIPInputStream(new FileInputStream(file), 65536) : new FileInputStream(file); - } - - public TabbedHeader getHeader() { - return header; - } - - @Override - public Iterator iterator() { - return getRecordIterator(); - } - - public TabbedRecordIterator getRecordIterator() { - return new TabbedRecordIterator(inputStream); - } - - @Override - public void close() throws IOException { - inputStream.close(); - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/tab/TabbedFileWriter.java b/qio/src/org/qcmg/tab/TabbedFileWriter.java deleted file mode 100644 index 1356e1693..000000000 --- a/qio/src/org/qcmg/tab/TabbedFileWriter.java +++ /dev/null @@ -1,46 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.tab; - -import java.io.Closeable; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -public final class TabbedFileWriter implements Closeable { - private final File file; - private final OutputStream outputStream; - - public TabbedFileWriter(final File file) throws Exception { - this.file = file; - OutputStream stream = new FileOutputStream(file); - outputStream = stream; - } - - public void addHeader(final TabbedHeader header) throws Exception { - for (String headerLine : header) { - String encoded = headerLine + "\n"; - outputStream.write(encoded.getBytes()); - } - outputStream.flush(); - } - - public void add(final TabbedRecord record) throws Exception { - String encoded = record.getData() + "\n"; - outputStream.write(encoded.getBytes()); - outputStream.flush(); - } - - @Override - public void close() throws IOException { - // flush anything outstanding and then close - outputStream.flush(); - outputStream.close(); - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/tab/TabbedHeader.java b/qio/src/org/qcmg/tab/TabbedHeader.java deleted file mode 100644 index e9555e7f8..000000000 --- a/qio/src/org/qcmg/tab/TabbedHeader.java +++ /dev/null @@ -1,24 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.tab; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -public final class TabbedHeader implements Iterable { - private final List records = new ArrayList<>(); - - public TabbedHeader(final List headerRecords) { - if (null != headerRecords) { - records.addAll(headerRecords); - } - } - - @Override - public Iterator iterator() { - return records.iterator(); - } - -} diff --git a/qio/src/org/qcmg/tab/TabbedRecord.java b/qio/src/org/qcmg/tab/TabbedRecord.java deleted file mode 100644 index d30920b58..000000000 --- a/qio/src/org/qcmg/tab/TabbedRecord.java +++ /dev/null @@ -1,19 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.tab; - - -public class TabbedRecord { - private String data; - - public String getData() { - return data; - } - public String[] getDataArray() { - return data.replace("\"", "").split("\t"); - } - public void setData(String data) { - this.data = data; - } -} diff --git a/qio/src/org/qcmg/tab/TabbedRecordIterator.java b/qio/src/org/qcmg/tab/TabbedRecordIterator.java deleted file mode 100644 index 482e30fdc..000000000 --- a/qio/src/org/qcmg/tab/TabbedRecordIterator.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.tab; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.NoSuchElementException; - -public final class TabbedRecordIterator implements Iterator { - private final BufferedReader reader; - private TabbedRecord next; - - public TabbedRecordIterator(final InputStream stream) { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public TabbedRecord next() { - if (!hasNext()) throw new NoSuchElementException(); - - TabbedRecord result = next; - readNext(); - return result; - } - - private void readNext() { - try { - next = TabbedSerializer.nextRecord(reader); - } catch (NoSuchElementException e) { - throw e; - } catch (Exception ex) { - throw new RuntimeException(ex.getMessage(), ex); - } - } - - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/qio/src/org/qcmg/tab/TabbedSerializer.java b/qio/src/org/qcmg/tab/TabbedSerializer.java deleted file mode 100644 index 9e23dd36f..000000000 --- a/qio/src/org/qcmg/tab/TabbedSerializer.java +++ /dev/null @@ -1,51 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.tab; - -import java.io.BufferedReader; - -import static org.qcmg.common.util.Constants.HASH_STRING; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -public final class TabbedSerializer { - - public static TabbedHeader readHeader(final BufferedReader reader) throws IOException { - List headerLines = new ArrayList<>(); - String line = reader.readLine(); - while (null != line && line.startsWith(HASH_STRING)) { - headerLines.add(line); - line = reader.readLine(); - } - return new TabbedHeader(headerLines); - } - - private static String nextNonheaderLine(final BufferedReader reader) throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(HASH_STRING)) { - line = reader.readLine(); - } - return line; - } - - public static TabbedRecord nextRecord(final BufferedReader reader) throws IOException { - TabbedRecord result = null; - String line = nextNonheaderLine(reader); - if (null != line) { - result = parseRecord(line); - } - return result; - } - - static TabbedRecord parseRecord(final String line) { - TabbedRecord result = new TabbedRecord(); - result.setData(line); - return result; - } -} From 49d60f4cc5375f0380ec25055289552def2bb03b Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 19:00:30 +1000 Subject: [PATCH 18/73] mv primer* to unused --- .../PrimerDesignFileReader.java | 17 - .../PrimerDesignRecord.java | 135 ----- .../PrimerDesignRecordSerializer.java | 80 --- .../primerdesignsummary/PrimerPosition.java | 68 --- .../primerinput/PrimerInputFileReader.java | 17 - .../primerinput/PrimerInputFileWriter.java | 33 - .../qcmg/primerinput/PrimerInputRecord.java | 331 ---------- .../PrimerInputRecordSerializer.java | 201 ------- .../primerinput/PrimerSequenceTarget.java | 43 -- .../org/qcmg/primerinput/PrimerSizeRange.java | 44 -- .../primeroutput/PrimerOutputFileReader.java | 22 - .../primeroutput/PrimerOutputFileWriter.java | 35 -- .../qcmg/primeroutput/PrimerOutputHeader.java | 563 ------------------ .../PrimerOutputHeaderSerializer.java | 128 ---- .../qcmg/primeroutput/PrimerOutputRecord.java | 517 ---------------- .../PrimerOutputRecordSerializer.java | 136 ----- 16 files changed, 2370 deletions(-) delete mode 100644 qio/src/org/qcmg/primerdesignsummary/PrimerDesignFileReader.java delete mode 100644 qio/src/org/qcmg/primerdesignsummary/PrimerDesignRecord.java delete mode 100644 qio/src/org/qcmg/primerdesignsummary/PrimerDesignRecordSerializer.java delete mode 100644 qio/src/org/qcmg/primerdesignsummary/PrimerPosition.java delete mode 100644 qio/src/org/qcmg/primerinput/PrimerInputFileReader.java delete mode 100644 qio/src/org/qcmg/primerinput/PrimerInputFileWriter.java delete mode 100644 qio/src/org/qcmg/primerinput/PrimerInputRecord.java delete mode 100644 qio/src/org/qcmg/primerinput/PrimerInputRecordSerializer.java delete mode 100644 qio/src/org/qcmg/primerinput/PrimerSequenceTarget.java delete mode 100644 qio/src/org/qcmg/primerinput/PrimerSizeRange.java delete mode 100644 qio/src/org/qcmg/primeroutput/PrimerOutputFileReader.java delete mode 100644 qio/src/org/qcmg/primeroutput/PrimerOutputFileWriter.java delete mode 100644 qio/src/org/qcmg/primeroutput/PrimerOutputHeader.java delete mode 100644 qio/src/org/qcmg/primeroutput/PrimerOutputHeaderSerializer.java delete mode 100644 qio/src/org/qcmg/primeroutput/PrimerOutputRecord.java delete mode 100644 qio/src/org/qcmg/primeroutput/PrimerOutputRecordSerializer.java diff --git a/qio/src/org/qcmg/primerdesignsummary/PrimerDesignFileReader.java b/qio/src/org/qcmg/primerdesignsummary/PrimerDesignFileReader.java deleted file mode 100644 index 0cacc654b..000000000 --- a/qio/src/org/qcmg/primerdesignsummary/PrimerDesignFileReader.java +++ /dev/null @@ -1,17 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.primerdesignsummary; - -import java.io.File; - -import org.qcmg.reader.FileReader; - -public class PrimerDesignFileReader extends FileReader { - private final static PrimerDesignRecordSerializer serializer = - new PrimerDesignRecordSerializer(); - - public PrimerDesignFileReader(final File file) throws Exception { - super(file, serializer); - } -} diff --git a/qio/src/org/qcmg/primerdesignsummary/PrimerDesignRecord.java b/qio/src/org/qcmg/primerdesignsummary/PrimerDesignRecord.java deleted file mode 100644 index d76846657..000000000 --- a/qio/src/org/qcmg/primerdesignsummary/PrimerDesignRecord.java +++ /dev/null @@ -1,135 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ - -package org.qcmg.primerdesignsummary; - -public class PrimerDesignRecord { - - protected String snpId; - protected String gene; - protected PrimerPosition position; - protected String baseChange; - protected String snpClass; - - /** - * Gets the value of the snpId property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSnpId() { - return snpId; - } - - /** - * Sets the value of the snpId property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSnpId(String value) { - this.snpId = value; - } - - /** - * Gets the value of the gene property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getGene() { - return gene; - } - - /** - * Sets the value of the gene property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setGene(String value) { - this.gene = value; - } - - /** - * Gets the value of the position property. - * - * @return - * possible object is - * {@link PrimerPosition } - * - */ - public PrimerPosition getPosition() { - return position; - } - - /** - * Sets the value of the position property. - * - * @param value - * allowed object is - * {@link PrimerPosition } - * - */ - public void setPosition(PrimerPosition value) { - this.position = value; - } - - /** - * Gets the value of the baseChange property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getBaseChange() { - return baseChange; - } - - /** - * Sets the value of the baseChange property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setBaseChange(String value) { - this.baseChange = value; - } - - /** - * Gets the value of the snpClass property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSnpClass() { - return snpClass; - } - - /** - * Sets the value of the snpClass property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSnpClass(String value) { - this.snpClass = value; - } - -} diff --git a/qio/src/org/qcmg/primerdesignsummary/PrimerDesignRecordSerializer.java b/qio/src/org/qcmg/primerdesignsummary/PrimerDesignRecordSerializer.java deleted file mode 100644 index 3b0708578..000000000 --- a/qio/src/org/qcmg/primerdesignsummary/PrimerDesignRecordSerializer.java +++ /dev/null @@ -1,80 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.primerdesignsummary; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.regex.Pattern; - -import org.qcmg.record.Serializer; - -public final class PrimerDesignRecordSerializer extends Serializer { - public PrimerDesignRecord parseRecord(final String line) throws Exception { - String[] params = tabbedPattern.split(line); - if (5 > params.length) { - throw new Exception("Bad primer design record format: '" + line - + "'"); - } - String encodedPosition = params[2].trim(); - PrimerPosition primerPosition = parsePrimerPosition(encodedPosition); - - PrimerDesignRecord result = new PrimerDesignRecord(); - result.setSnpId(params[0].trim()); - result.setGene(params[1].trim()); - result.setPosition(primerPosition); - result.setBaseChange(params[3].trim()); - result.setSnpClass(params[4].trim()); - return result; - } - - public PrimerPosition parsePrimerPosition(String encodedPosition) - throws Exception { - String[] positionParams = colonPattern.split(encodedPosition); - if (2 != positionParams.length) { - throw new Exception("Bad primer design record position format: '" - + encodedPosition + "'"); - } - String chromosome = positionParams[0].trim(); - String positionRange = positionParams[1].trim(); - - String[] positions = hyphenPattern.split(positionRange); - if (2 != positions.length) { - throw new Exception("Bad primer design record position format: '" - + encodedPosition + "'"); - } - int start = Integer.parseInt(positions[0]); - int end = Integer.parseInt(positions[1]); - - PrimerPosition primerPosition = new PrimerPosition(); - primerPosition.setChromosome(chromosome); - primerPosition.setStart(start); - primerPosition.setEnd(end); - return primerPosition; - } - - public String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(HASH)) { - line = reader.readLine(); - } - return line; - } - - public String serialise(PrimerDesignRecord record) throws Exception { - // TODO Auto-generated method stub - return null; - } - - @Override - public PrimerDesignRecord parseRecord(BufferedReader reader) - throws Exception { - String line = nextNonheaderLine(reader); - PrimerDesignRecord result = null; - if (null != line) { - result = parseRecord(line); - } - return result; - } -} diff --git a/qio/src/org/qcmg/primerdesignsummary/PrimerPosition.java b/qio/src/org/qcmg/primerdesignsummary/PrimerPosition.java deleted file mode 100644 index 813ada995..000000000 --- a/qio/src/org/qcmg/primerdesignsummary/PrimerPosition.java +++ /dev/null @@ -1,68 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.primerdesignsummary; - -public class PrimerPosition { - - protected String chromosome; - protected int start; - protected int end; - - /** - * Gets the value of the chromosome property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getChromosome() { - return chromosome; - } - - /** - * Sets the value of the chromosome property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setChromosome(String value) { - this.chromosome = value; - } - - /** - * Gets the value of the start property. - * - */ - public int getStart() { - return start; - } - - /** - * Sets the value of the start property. - * - */ - public void setStart(int value) { - this.start = value; - } - - /** - * Gets the value of the end property. - * - */ - public int getEnd() { - return end; - } - - /** - * Sets the value of the end property. - * - */ - public void setEnd(int value) { - this.end = value; - } - -} diff --git a/qio/src/org/qcmg/primerinput/PrimerInputFileReader.java b/qio/src/org/qcmg/primerinput/PrimerInputFileReader.java deleted file mode 100644 index 5a9336258..000000000 --- a/qio/src/org/qcmg/primerinput/PrimerInputFileReader.java +++ /dev/null @@ -1,17 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.primerinput; - -import java.io.File; - -import org.qcmg.reader.FileReader; - -public class PrimerInputFileReader extends FileReader { - private final static PrimerInputRecordSerializer serializer = - new PrimerInputRecordSerializer(); - - public PrimerInputFileReader(final File file) throws Exception { - super(file, serializer); - } -} diff --git a/qio/src/org/qcmg/primerinput/PrimerInputFileWriter.java b/qio/src/org/qcmg/primerinput/PrimerInputFileWriter.java deleted file mode 100644 index 5af197a91..000000000 --- a/qio/src/org/qcmg/primerinput/PrimerInputFileWriter.java +++ /dev/null @@ -1,33 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.primerinput; - -import java.io.Closeable; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -public final class PrimerInputFileWriter implements Closeable { - private static final String EQUALS = "="; - private static final PrimerInputRecordSerializer serializer = new PrimerInputRecordSerializer(); - private final OutputStream outputStream; - - public PrimerInputFileWriter(final File file) throws Exception { - OutputStream stream = new FileOutputStream(file); - outputStream = stream; - } - - public void add(final PrimerInputRecord record) throws Exception { - String encoded = serializer.serialise(record); - outputStream.write(encoded.getBytes()); - outputStream.flush(); - } - - public void close() throws IOException { - outputStream.write(EQUALS.getBytes()); - outputStream.flush(); - outputStream.close(); - } -} diff --git a/qio/src/org/qcmg/primerinput/PrimerInputRecord.java b/qio/src/org/qcmg/primerinput/PrimerInputRecord.java deleted file mode 100644 index c631b37e8..000000000 --- a/qio/src/org/qcmg/primerinput/PrimerInputRecord.java +++ /dev/null @@ -1,331 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.primerinput; - - -public class PrimerInputRecord { - - protected String sequenceId; - protected String sequenceTemplate; - protected PrimerSequenceTarget sequenceTarget; - protected int primerProductMinTm; - protected int primerProductMaxTm; - protected double primerDnaConc; - protected double primerSaltConc; - protected int primerMinTm; - protected int primerOptTm; - protected int primerMaxTm; - protected int primerMinSize; - protected int primerOptSize; - protected int primerMaxSize; - protected PrimerSizeRange primerProductSizeRange; - protected boolean primerExplainFlag; - protected int primerNumReturn; - protected boolean primerNumNsAccepted; - - /** - * Gets the value of the sequenceId property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSequenceId() { - return sequenceId; - } - - /** - * Sets the value of the sequenceId property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSequenceId(String value) { - this.sequenceId = value; - } - - /** - * Gets the value of the sequenceTemplate property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSequenceTemplate() { - return sequenceTemplate; - } - - /** - * Sets the value of the sequenceTemplate property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSequenceTemplate(String value) { - this.sequenceTemplate = value; - } - - /** - * Gets the value of the sequenceTarget property. - * - * @return - * possible object is - * {@link PrimerSequenceTarget } - * - */ - public PrimerSequenceTarget getSequenceTarget() { - return sequenceTarget; - } - - /** - * Sets the value of the sequenceTarget property. - * - * @param value - * allowed object is - * {@link PrimerSequenceTarget } - * - */ - public void setSequenceTarget(PrimerSequenceTarget value) { - this.sequenceTarget = value; - } - - /** - * Gets the value of the primerProductMinTm property. - * - */ - public int getPrimerProductMinTm() { - return primerProductMinTm; - } - - /** - * Sets the value of the primerProductMinTm property. - * - */ - public void setPrimerProductMinTm(int value) { - this.primerProductMinTm = value; - } - - /** - * Gets the value of the primerProductMaxTm property. - * - */ - public int getPrimerProductMaxTm() { - return primerProductMaxTm; - } - - /** - * Sets the value of the primerProductMaxTm property. - * - */ - public void setPrimerProductMaxTm(int value) { - this.primerProductMaxTm = value; - } - - /** - * Gets the value of the primerDnaConc property. - * - */ - public double getPrimerDnaConc() { - return primerDnaConc; - } - - /** - * Sets the value of the primerDnaConc property. - * - */ - public void setPrimerDnaConc(double value) { - this.primerDnaConc = value; - } - - /** - * Gets the value of the primerSaltConc property. - * - */ - public double getPrimerSaltConc() { - return primerSaltConc; - } - - /** - * Sets the value of the primerSaltConc property. - * - */ - public void setPrimerSaltConc(double value) { - this.primerSaltConc = value; - } - - /** - * Gets the value of the primerMinTm property. - * - */ - public int getPrimerMinTm() { - return primerMinTm; - } - - /** - * Sets the value of the primerMinTm property. - * - */ - public void setPrimerMinTm(int value) { - this.primerMinTm = value; - } - - /** - * Gets the value of the primerOptTm property. - * - */ - public int getPrimerOptTm() { - return primerOptTm; - } - - /** - * Sets the value of the primerOptTm property. - * - */ - public void setPrimerOptTm(int value) { - this.primerOptTm = value; - } - - /** - * Gets the value of the primerMaxTm property. - * - */ - public int getPrimerMaxTm() { - return primerMaxTm; - } - - /** - * Sets the value of the primerMaxTm property. - * - */ - public void setPrimerMaxTm(int value) { - this.primerMaxTm = value; - } - - /** - * Gets the value of the primerMinSize property. - * - */ - public int getPrimerMinSize() { - return primerMinSize; - } - - /** - * Sets the value of the primerMinSize property. - * - */ - public void setPrimerMinSize(int value) { - this.primerMinSize = value; - } - - /** - * Gets the value of the primerOptSize property. - * - */ - public int getPrimerOptSize() { - return primerOptSize; - } - - /** - * Sets the value of the primerOptSize property. - * - */ - public void setPrimerOptSize(int value) { - this.primerOptSize = value; - } - - /** - * Gets the value of the primerMaxSize property. - * - */ - public int getPrimerMaxSize() { - return primerMaxSize; - } - - /** - * Sets the value of the primerMaxSize property. - * - */ - public void setPrimerMaxSize(int value) { - this.primerMaxSize = value; - } - - /** - * Gets the value of the primerProductSizeRange property. - * - * @return - * possible object is - * {@link PrimerSizeRange } - * - */ - public PrimerSizeRange getPrimerProductSizeRange() { - return primerProductSizeRange; - } - - /** - * Sets the value of the primerProductSizeRange property. - * - * @param value - * allowed object is - * {@link PrimerSizeRange } - * - */ - public void setPrimerProductSizeRange(PrimerSizeRange value) { - this.primerProductSizeRange = value; - } - - /** - * Gets the value of the primerExplainFlag property. - * - */ - public boolean isPrimerExplainFlag() { - return primerExplainFlag; - } - - /** - * Sets the value of the primerExplainFlag property. - * - */ - public void setPrimerExplainFlag(boolean value) { - this.primerExplainFlag = value; - } - - /** - * Gets the value of the primerNumReturn property. - * - */ - public int getPrimerNumReturn() { - return primerNumReturn; - } - - /** - * Sets the value of the primerNumReturn property. - * - */ - public void setPrimerNumReturn(int value) { - this.primerNumReturn = value; - } - - /** - * Gets the value of the primerNumNsAccepted property. - * - */ - public boolean isPrimerNumNsAccepted() { - return primerNumNsAccepted; - } - - /** - * Sets the value of the primerNumNsAccepted property. - * - */ - public void setPrimerNumNsAccepted(boolean value) { - this.primerNumNsAccepted = value; - } - -} diff --git a/qio/src/org/qcmg/primerinput/PrimerInputRecordSerializer.java b/qio/src/org/qcmg/primerinput/PrimerInputRecordSerializer.java deleted file mode 100644 index fc6c9044f..000000000 --- a/qio/src/org/qcmg/primerinput/PrimerInputRecordSerializer.java +++ /dev/null @@ -1,201 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.primerinput; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.regex.Pattern; - -import org.qcmg.gff3.GFF3Record; -import org.qcmg.record.Serializer; - -public final class PrimerInputRecordSerializer extends - Serializer { - private final static String SEQUENCE_ID = "SEQUENCE_ID"; - private final static String SEQUENCE_TEMPLATE = "SEQUENCE_TEMPLATE"; - private final static String SEQUENCE_TARGET = "SEQUENCE_TARGET"; - private final static String PRIMER_PRODUCT_MIN_TM = "PRIMER_PRODUCT_MIN_TM"; - private final static String PRIMER_PRODUCT_MAX_TM = "PRIMER_PRODUCT_MAX_TM"; - private final static String PRIMER_DNA_CONC = "PRIMER_DNA_CONC"; - private final static String PRIMER_SALT_CONC = "PRIMER_SALT_CONC"; - private final static String PRIMER_MIN_TM = "PRIMER_MIN_TM"; - private final static String PRIMER_OPT_TM = "PRIMER_OPT_TM"; - private final static String PRIMER_MAX_TM = "PRIMER_MAX_TM"; - private final static String PRIMER_MIN_SIZE = "PRIMER_MIN_SIZE"; - private final static String PRIMER_OPT_SIZE = "PRIMER_OPT_SIZE"; - private final static String PRIMER_MAX_SIZE = "PRIMER_MAX_SIZE"; - private final static String PRIMER_PRODUCT_SIZE_RANGE = "PRIMER_PRODUCT_SIZE_RANGE"; - private final static String PRIMER_EXPLAIN_FLAG = "PRIMER_EXPLAIN_FLAG"; - private final static String PRIMER_NUM_RETURN = "PRIMER_NUM_RETURN"; - private final static String PRIMER_NUM_NS_ACCEPTED = "PRIMER_NUM_NS_ACCEPTED"; - private final static String EQUALS = "="; - - public PrimerInputRecord parseRecord(final BufferedReader reader) - throws Exception { - String nextLine = nextStringValue(reader); - if (nextLine.equals("=")) { - return null; - } - PrimerInputRecord result = new PrimerInputRecord(); - result.setSequenceId(nextLine); - result.setSequenceTemplate(nextStringValue(reader)); - result.setSequenceTarget(nextTargetValue(reader)); - result.setPrimerProductMinTm(nextIntegerValue(reader)); - result.setPrimerProductMaxTm(nextIntegerValue(reader)); - result.setPrimerDnaConc(nextDoubleValue(reader)); - result.setPrimerSaltConc(nextDoubleValue(reader)); - result.setPrimerMinTm(nextIntegerValue(reader)); - result.setPrimerOptTm(nextIntegerValue(reader)); - result.setPrimerMaxTm(nextIntegerValue(reader)); - result.setPrimerMinSize(nextIntegerValue(reader)); - result.setPrimerOptSize(nextIntegerValue(reader)); - result.setPrimerMaxSize(nextIntegerValue(reader)); - result.setPrimerProductSizeRange(nextPrimerSizeRangeValue(reader)); - result.setPrimerExplainFlag(nextBooleanValue(reader)); - result.setPrimerNumReturn(nextIntegerValue(reader)); - result.setPrimerNumNsAccepted(nextBooleanValue(reader)); - return result; - } - - private double nextDoubleValue(BufferedReader reader) throws NumberFormatException, Exception { - return Double.parseDouble(nextStringValue(reader)); - } - - private PrimerSizeRange nextPrimerSizeRangeValue(BufferedReader reader) - throws Exception { - String targetValue = nextStringValue(reader); - final String[] params = hyphenPattern.split(targetValue, -1); - if (2 != params.length) { - throw new Exception("Bad format. Insufficient numbered values: '" - + targetValue + "'"); - } - PrimerSizeRange range = new PrimerSizeRange(); - range.setLowerLimit(Integer.parseInt(params[0])); - range.setUpperLimit(Integer.parseInt(params[1])); - return range; - } - - private boolean nextBooleanValue(BufferedReader reader) throws Exception { - return Boolean.parseBoolean(nextStringValue(reader)); - } - - private String nextStringValue(final BufferedReader reader) - throws Exception { - final String line = reader.readLine(); - final String[] params = equalsPattern.split(line, -1); - if (2 != params.length) { - throw new Exception("Bad format. Insufficient columns: '" + line - + "'"); - } - return params[1].trim(); - } - - private int nextIntegerValue(final BufferedReader reader) throws Exception { - return Integer.parseInt(nextStringValue(reader)); - } - - private PrimerSequenceTarget nextTargetValue(final BufferedReader reader) - throws Exception { - String targetValue = nextStringValue(reader); - final String[] params = commaPattern.split(targetValue, -1); - if (2 != params.length) { - throw new Exception("Bad format. Insufficient numbered values: '" - + targetValue + "'"); - } - PrimerSequenceTarget target = new PrimerSequenceTarget(); - target.setLeftValue(Integer.parseInt(params[0])); - target.setRightValue(Integer.parseInt(params[1])); - return target; - } - - public String serialise(final PrimerInputRecord record) throws Exception { - String result = addLine("", SEQUENCE_ID, record.getSequenceId()); - result = addLine(result, SEQUENCE_TEMPLATE, record - .getSequenceTemplate()); - result = addLine(result, SEQUENCE_TARGET, record.getSequenceTarget()); - result = addLine(result, PRIMER_PRODUCT_MIN_TM, record - .getPrimerProductMinTm()); - result = addLine(result, PRIMER_PRODUCT_MAX_TM, record - .getPrimerProductMaxTm()); - result = addLine(result, PRIMER_DNA_CONC, record.getPrimerDnaConc()); - result = addLine(result, PRIMER_SALT_CONC, record.getPrimerSaltConc()); - result = addLine(result, PRIMER_MIN_TM, record.getPrimerMinTm()); - result = addLine(result, PRIMER_OPT_TM, record.getPrimerOptTm()); - result = addLine(result, PRIMER_MAX_TM, record.getPrimerMaxTm()); - result = addLine(result, PRIMER_MIN_SIZE, record.getPrimerMinSize()); - result = addLine(result, PRIMER_OPT_SIZE, record.getPrimerOptSize()); - result = addLine(result, PRIMER_MAX_SIZE, record.getPrimerMaxSize()); - result = addLine(result, PRIMER_PRODUCT_SIZE_RANGE, record - .getPrimerProductSizeRange()); - result = addLine(result, PRIMER_EXPLAIN_FLAG, record - .isPrimerExplainFlag()); - result = addLine(result, PRIMER_NUM_RETURN, record.getPrimerNumReturn()); - result = addLine(result, PRIMER_NUM_NS_ACCEPTED, record - .isPrimerNumNsAccepted()); - return result; - } - - public static void initialise(PrimerInputRecord record) { - record.setPrimerDnaConc(120); - record.setPrimerSaltConc(50); - record.setPrimerExplainFlag(false); - record.setPrimerMaxSize(25); - record.setPrimerExplainFlag(true); - record.setPrimerMaxSize(25); - record.setPrimerMaxTm(75); - record.setPrimerMinSize(18); - record.setPrimerMinTm(55); - record.setPrimerNumNsAccepted(true); - record.setPrimerNumReturn(10000); - record.setPrimerOptSize(20); - record.setPrimerOptTm(65); - record.setPrimerProductMaxTm(85); - record.setPrimerProductMinTm(65); - PrimerSizeRange range = new PrimerSizeRange(); - range.setLowerLimit(50); - range.setUpperLimit(120); - record.setPrimerProductSizeRange(range); - record.setSequenceId(""); - record.setSequenceTemplate(""); - PrimerSequenceTarget target = new PrimerSequenceTarget(); - target.setLeftValue(249); - target.setRightValue(3); - record.setSequenceTarget(target); - } - - private static String addLine(final String result, final String lhs, - final String rhs) { - return result + lhs + EQUALS + rhs + NEWLINE; - } - - private static String addLine(String result, final String lhs, - final double rhs) { - return result + lhs + EQUALS + Double.toString(rhs) + NEWLINE; - } - - private static String addLine(String result, final String lhs, final int rhs) { - return result + lhs + EQUALS + Integer.toString(rhs) + NEWLINE; - } - - private static String addLine(String result, String lhs, - final PrimerSequenceTarget rhs) { - return result + lhs + EQUALS + rhs.getLeftValue() + "," - + rhs.getRightValue() + NEWLINE; - } - - private static String addLine(String result, String lhs, final boolean rhs) { - if (rhs) { - return result + lhs + EQUALS + "1" + NEWLINE; - } else { - return result + lhs + EQUALS + "0" + NEWLINE; - } - } - - private static String addLine(String result, String lhs, - final PrimerSizeRange rhs) { - return result + lhs + EQUALS + rhs.getLowerLimit() + "-" - + rhs.getUpperLimit() + NEWLINE; - } - -} diff --git a/qio/src/org/qcmg/primerinput/PrimerSequenceTarget.java b/qio/src/org/qcmg/primerinput/PrimerSequenceTarget.java deleted file mode 100644 index 92683ae07..000000000 --- a/qio/src/org/qcmg/primerinput/PrimerSequenceTarget.java +++ /dev/null @@ -1,43 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.primerinput; - -public class PrimerSequenceTarget { - - protected int leftValue; - protected int rightValue; - - /** - * Gets the value of the leftValue property. - * - */ - public int getLeftValue() { - return leftValue; - } - - /** - * Sets the value of the leftValue property. - * - */ - public void setLeftValue(int value) { - this.leftValue = value; - } - - /** - * Gets the value of the rightValue property. - * - */ - public int getRightValue() { - return rightValue; - } - - /** - * Sets the value of the rightValue property. - * - */ - public void setRightValue(int value) { - this.rightValue = value; - } - -} diff --git a/qio/src/org/qcmg/primerinput/PrimerSizeRange.java b/qio/src/org/qcmg/primerinput/PrimerSizeRange.java deleted file mode 100644 index 57799f363..000000000 --- a/qio/src/org/qcmg/primerinput/PrimerSizeRange.java +++ /dev/null @@ -1,44 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.primerinput; - - -public class PrimerSizeRange { - - protected int lowerLimit; - protected int upperLimit; - - /** - * Gets the value of the lowerLimit property. - * - */ - public int getLowerLimit() { - return lowerLimit; - } - - /** - * Sets the value of the lowerLimit property. - * - */ - public void setLowerLimit(int value) { - this.lowerLimit = value; - } - - /** - * Gets the value of the upperLimit property. - * - */ - public int getUpperLimit() { - return upperLimit; - } - - /** - * Sets the value of the upperLimit property. - * - */ - public void setUpperLimit(int value) { - this.upperLimit = value; - } - -} diff --git a/qio/src/org/qcmg/primeroutput/PrimerOutputFileReader.java b/qio/src/org/qcmg/primeroutput/PrimerOutputFileReader.java deleted file mode 100644 index 52d479366..000000000 --- a/qio/src/org/qcmg/primeroutput/PrimerOutputFileReader.java +++ /dev/null @@ -1,22 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.primeroutput; - -import java.io.File; - -import org.qcmg.primeroutput.PrimerOutputHeader; -import org.qcmg.primeroutput.PrimerOutputRecord; -import org.qcmg.reader.ExtendedFileReader; -import org.qcmg.reader.FileReader; - -public class PrimerOutputFileReader extends ExtendedFileReader { - private final static PrimerOutputHeaderSerializer headerSerializer = - new PrimerOutputHeaderSerializer(); - private final static PrimerOutputRecordSerializer recordSerializer = - new PrimerOutputRecordSerializer(); - - public PrimerOutputFileReader(final File file) throws Exception { - super(file, recordSerializer, headerSerializer); - } -} diff --git a/qio/src/org/qcmg/primeroutput/PrimerOutputFileWriter.java b/qio/src/org/qcmg/primeroutput/PrimerOutputFileWriter.java deleted file mode 100644 index 175270918..000000000 --- a/qio/src/org/qcmg/primeroutput/PrimerOutputFileWriter.java +++ /dev/null @@ -1,35 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.primeroutput; - -import java.io.Closeable; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -import org.qcmg.primeroutput.PrimerOutputRecord; - -public final class PrimerOutputFileWriter implements Closeable { - private static final String EQUALS = "="; - private static final PrimerOutputRecordSerializer serializer = new PrimerOutputRecordSerializer(); - private final OutputStream outputStream; - - public PrimerOutputFileWriter(final File file) throws Exception { - OutputStream stream = new FileOutputStream(file); - outputStream = stream; - } - - public void add(final PrimerOutputRecord record) throws Exception { - String encoded = serializer.serialise(record); - outputStream.write(encoded.getBytes()); - outputStream.flush(); - } - - public void close() throws IOException { - outputStream.write(EQUALS.getBytes()); - outputStream.flush(); - outputStream.close(); - } -} diff --git a/qio/src/org/qcmg/primeroutput/PrimerOutputHeader.java b/qio/src/org/qcmg/primeroutput/PrimerOutputHeader.java deleted file mode 100644 index d499b8a93..000000000 --- a/qio/src/org/qcmg/primeroutput/PrimerOutputHeader.java +++ /dev/null @@ -1,563 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -// -// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 -// See http://java.sun.com/xml/jaxb -// Any modifications to this file will be lost upon recompilation of the source schema. -// Generated on: 2013.10.25 at 10:52:22 AM EST -// - - -package org.qcmg.primeroutput; - -import javax.xml.bind.annotation.XmlAccessType; -import javax.xml.bind.annotation.XmlAccessorType; -import javax.xml.bind.annotation.XmlElement; -import javax.xml.bind.annotation.XmlType; - - -/** - *

Java class for primerOutputHeader complex type. - * - *

The following schema fragment specifies the expected content contained within this class. - * - *

- * <complexType name="primerOutputHeader">
- *   <complexContent>
- *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
- *       <sequence>
- *         <element name="sequenceId" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="sequenceTemplate" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="sequenceTarget" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="productMinTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="productMaxTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="dnaConc" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="saltConc" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="minTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="optTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="maxTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="minSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="optSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="maxSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="productSizeRange" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="explainFlag" type="{http://www.w3.org/2001/XMLSchema}boolean"/>
- *         <element name="numReturn" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="numNsAccepted" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="leftExplain" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="rightExplain" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="pairExplain" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="leftNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="rightNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="internalNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="pairNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *       </sequence>
- *     </restriction>
- *   </complexContent>
- * </complexType>
- * 
- * - * - */ -@XmlAccessorType(XmlAccessType.FIELD) -@XmlType(name = "primerOutputHeader", propOrder = { - "sequenceId", - "sequenceTemplate", - "sequenceTarget", - "productMinTm", - "productMaxTm", - "dnaConc", - "saltConc", - "minTm", - "optTm", - "maxTm", - "minSize", - "optSize", - "maxSize", - "productSizeRange", - "explainFlag", - "numReturn", - "numNsAccepted", - "leftExplain", - "rightExplain", - "pairExplain", - "leftNumReturned", - "rightNumReturned", - "internalNumReturned", - "pairNumReturned" -}) -public class PrimerOutputHeader { - - @XmlElement(required = true) - protected String sequenceId; - @XmlElement(required = true) - protected String sequenceTemplate; - @XmlElement(required = true) - protected String sequenceTarget; - protected int productMinTm; - protected int productMaxTm; - protected double dnaConc; - protected double saltConc; - protected int minTm; - protected int optTm; - protected int maxTm; - protected int minSize; - protected int optSize; - protected int maxSize; - @XmlElement(required = true) - protected String productSizeRange; - protected boolean explainFlag; - protected int numReturn; - protected int numNsAccepted; - @XmlElement(required = true) - protected String leftExplain; - @XmlElement(required = true) - protected String rightExplain; - @XmlElement(required = true) - protected String pairExplain; - protected int leftNumReturned; - protected int rightNumReturned; - protected int internalNumReturned; - protected int pairNumReturned; - - /** - * Gets the value of the sequenceId property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSequenceId() { - return sequenceId; - } - - /** - * Sets the value of the sequenceId property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSequenceId(String value) { - this.sequenceId = value; - } - - /** - * Gets the value of the sequenceTemplate property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSequenceTemplate() { - return sequenceTemplate; - } - - /** - * Sets the value of the sequenceTemplate property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSequenceTemplate(String value) { - this.sequenceTemplate = value; - } - - /** - * Gets the value of the sequenceTarget property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSequenceTarget() { - return sequenceTarget; - } - - /** - * Sets the value of the sequenceTarget property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSequenceTarget(String value) { - this.sequenceTarget = value; - } - - /** - * Gets the value of the productMinTm property. - * - */ - public int getProductMinTm() { - return productMinTm; - } - - /** - * Sets the value of the productMinTm property. - * - */ - public void setProductMinTm(int value) { - this.productMinTm = value; - } - - /** - * Gets the value of the productMaxTm property. - * - */ - public int getProductMaxTm() { - return productMaxTm; - } - - /** - * Sets the value of the productMaxTm property. - * - */ - public void setProductMaxTm(int value) { - this.productMaxTm = value; - } - - /** - * Gets the value of the dnaConc property. - * - */ - public double getDnaConc() { - return dnaConc; - } - - /** - * Sets the value of the dnaConc property. - * - */ - public void setDnaConc(double value) { - this.dnaConc = value; - } - - /** - * Gets the value of the saltConc property. - * - */ - public double getSaltConc() { - return saltConc; - } - - /** - * Sets the value of the saltConc property. - * - */ - public void setSaltConc(double value) { - this.saltConc = value; - } - - /** - * Gets the value of the minTm property. - * - */ - public int getMinTm() { - return minTm; - } - - /** - * Sets the value of the minTm property. - * - */ - public void setMinTm(int value) { - this.minTm = value; - } - - /** - * Gets the value of the optTm property. - * - */ - public int getOptTm() { - return optTm; - } - - /** - * Sets the value of the optTm property. - * - */ - public void setOptTm(int value) { - this.optTm = value; - } - - /** - * Gets the value of the maxTm property. - * - */ - public int getMaxTm() { - return maxTm; - } - - /** - * Sets the value of the maxTm property. - * - */ - public void setMaxTm(int value) { - this.maxTm = value; - } - - /** - * Gets the value of the minSize property. - * - */ - public int getMinSize() { - return minSize; - } - - /** - * Sets the value of the minSize property. - * - */ - public void setMinSize(int value) { - this.minSize = value; - } - - /** - * Gets the value of the optSize property. - * - */ - public int getOptSize() { - return optSize; - } - - /** - * Sets the value of the optSize property. - * - */ - public void setOptSize(int value) { - this.optSize = value; - } - - /** - * Gets the value of the maxSize property. - * - */ - public int getMaxSize() { - return maxSize; - } - - /** - * Sets the value of the maxSize property. - * - */ - public void setMaxSize(int value) { - this.maxSize = value; - } - - /** - * Gets the value of the productSizeRange property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getProductSizeRange() { - return productSizeRange; - } - - /** - * Sets the value of the productSizeRange property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setProductSizeRange(String value) { - this.productSizeRange = value; - } - - /** - * Gets the value of the explainFlag property. - * - */ - public boolean isExplainFlag() { - return explainFlag; - } - - /** - * Sets the value of the explainFlag property. - * - */ - public void setExplainFlag(boolean value) { - this.explainFlag = value; - } - - /** - * Gets the value of the numReturn property. - * - */ - public int getNumReturn() { - return numReturn; - } - - /** - * Sets the value of the numReturn property. - * - */ - public void setNumReturn(int value) { - this.numReturn = value; - } - - /** - * Gets the value of the numNsAccepted property. - * - */ - public int getNumNsAccepted() { - return numNsAccepted; - } - - /** - * Sets the value of the numNsAccepted property. - * - */ - public void setNumNsAccepted(int value) { - this.numNsAccepted = value; - } - - /** - * Gets the value of the leftExplain property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getLeftExplain() { - return leftExplain; - } - - /** - * Sets the value of the leftExplain property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setLeftExplain(String value) { - this.leftExplain = value; - } - - /** - * Gets the value of the rightExplain property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getRightExplain() { - return rightExplain; - } - - /** - * Sets the value of the rightExplain property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setRightExplain(String value) { - this.rightExplain = value; - } - - /** - * Gets the value of the pairExplain property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getPairExplain() { - return pairExplain; - } - - /** - * Sets the value of the pairExplain property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setPairExplain(String value) { - this.pairExplain = value; - } - - /** - * Gets the value of the leftNumReturned property. - * - */ - public int getLeftNumReturned() { - return leftNumReturned; - } - - /** - * Sets the value of the leftNumReturned property. - * - */ - public void setLeftNumReturned(int value) { - this.leftNumReturned = value; - } - - /** - * Gets the value of the rightNumReturned property. - * - */ - public int getRightNumReturned() { - return rightNumReturned; - } - - /** - * Sets the value of the rightNumReturned property. - * - */ - public void setRightNumReturned(int value) { - this.rightNumReturned = value; - } - - /** - * Gets the value of the internalNumReturned property. - * - */ - public int getInternalNumReturned() { - return internalNumReturned; - } - - /** - * Sets the value of the internalNumReturned property. - * - */ - public void setInternalNumReturned(int value) { - this.internalNumReturned = value; - } - - /** - * Gets the value of the pairNumReturned property. - * - */ - public int getPairNumReturned() { - return pairNumReturned; - } - - /** - * Sets the value of the pairNumReturned property. - * - */ - public void setPairNumReturned(int value) { - this.pairNumReturned = value; - } - -} diff --git a/qio/src/org/qcmg/primeroutput/PrimerOutputHeaderSerializer.java b/qio/src/org/qcmg/primeroutput/PrimerOutputHeaderSerializer.java deleted file mode 100644 index 633729039..000000000 --- a/qio/src/org/qcmg/primeroutput/PrimerOutputHeaderSerializer.java +++ /dev/null @@ -1,128 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.primeroutput; - -import java.io.BufferedReader; - -import org.qcmg.record.Serializer; - -public class PrimerOutputHeaderSerializer extends - Serializer { - private final static String[] FIELDS = { - "SEQUENCE_ID", - "SEQUENCE_TEMPLATE", - "SEQUENCE_TARGET", - "PRIMER_PRODUCT_MIN_TM", - "PRIMER_PRODUCT_MAX_TM", - "PRIMER_DNA_CONC", - "PRIMER_SALT_CONC", - "PRIMER_MIN_TM", - "PRIMER_OPT_TM", - "PRIMER_MAX_TM", - "PRIMER_MIN_SIZE", - "PRIMER_OPT_SIZE", - "PRIMER_MAX_SIZE", - "PRIMER_PRODUCT_SIZE_RANGE", - "PRIMER_EXPLAIN_FLAG", - "PRIMER_NUM_RETURN", - "PRIMER_NUM_NS_ACCEPTED", - "PRIMER_LEFT_EXPLAIN", - "PRIMER_RIGHT_EXPLAIN", - "PRIMER_PAIR_EXPLAIN", - "PRIMER_LEFT_NUM_RETURNED", - "PRIMER_RIGHT_NUM_RETURNED", - "PRIMER_INTERNAL_NUM_RETURNED", - "PRIMER_PAIR_NUM_RETURNED"}; - - static { - assert(24 == FIELDS.length); - } - - @Override - public PrimerOutputHeader parseRecord(final BufferedReader reader) - throws Exception { - PrimerOutputHeader result = new PrimerOutputHeader(); - result.setSequenceId(nextStringValue(reader)); - result.setSequenceTemplate(nextStringValue(reader)); - result.setSequenceTarget(nextStringValue(reader)); - result.setProductMinTm(nextIntegerValue(reader)); - result.setProductMaxTm(nextIntegerValue(reader)); - result.setDnaConc(nextDoubleValue(reader)); - result.setSaltConc(nextDoubleValue(reader)); - result.setMinTm(nextIntegerValue(reader)); - result.setOptTm(nextIntegerValue(reader)); - result.setMaxTm(nextIntegerValue(reader)); - result.setMinSize(nextIntegerValue(reader)); - result.setOptSize(nextIntegerValue(reader)); - result.setMaxSize(nextIntegerValue(reader)); - result.setProductSizeRange(nextStringValue(reader)); - result.setExplainFlag(nextBooleanValue(reader)); - result.setNumReturn(nextIntegerValue(reader)); - result.setNumNsAccepted(nextIntegerValue(reader)); - result.setLeftExplain(nextStringValue(reader)); - result.setRightExplain(nextStringValue(reader)); - result.setPairExplain(nextStringValue(reader)); - result.setLeftNumReturned(nextIntegerValue(reader)); - result.setRightNumReturned(nextIntegerValue(reader)); - result.setInternalNumReturned(nextIntegerValue(reader)); - result.setPairNumReturned(nextIntegerValue(reader)); - return result; - } - - @Override - public String serialise(PrimerOutputHeader record) throws Exception { - String result = createLine(FIELDS[0], record.getSequenceId()); - result += FIELDS[1] + EQUALS + record.getSequenceTemplate() + NEWLINE; - result += FIELDS[2] + EQUALS + record.getSequenceTarget() + NEWLINE; - result += FIELDS[3] + EQUALS + record.getProductMinTm() + NEWLINE; - result += FIELDS[4] + EQUALS + record.getProductMaxTm() + NEWLINE; - result += FIELDS[5] + EQUALS + record.getDnaConc() + NEWLINE; - result += FIELDS[6] + EQUALS + record.getSaltConc() + NEWLINE; - result += FIELDS[7] + EQUALS + record.getMinTm() + NEWLINE; - result += FIELDS[8] + EQUALS + record.getOptTm() + NEWLINE; - result += FIELDS[9] + EQUALS + record.getMaxTm() + NEWLINE; - result += FIELDS[10] + EQUALS + record.getMinSize() + NEWLINE; - result += FIELDS[11] + EQUALS + record.getOptSize() + NEWLINE; - result += FIELDS[12] + EQUALS + record.getMaxSize() + NEWLINE; - result += FIELDS[13] + EQUALS + record.getProductSizeRange() + NEWLINE; - result += FIELDS[14] + EQUALS + record.isExplainFlag() + NEWLINE; - result += FIELDS[15] + EQUALS + record.getNumReturn() + NEWLINE; - result += FIELDS[16] + EQUALS + record.getNumNsAccepted() + NEWLINE; - result += FIELDS[17] + EQUALS + record.getLeftExplain() + NEWLINE; - result += FIELDS[18] + EQUALS + record.getRightExplain() + NEWLINE; - result += FIELDS[19] + EQUALS + record.getPairExplain() + NEWLINE; - result += FIELDS[20] + EQUALS + record.getLeftNumReturned() + NEWLINE; - result += FIELDS[21] + EQUALS + record.getRightNumReturned() + NEWLINE; - result += FIELDS[22] + EQUALS + record.getInternalNumReturned() + NEWLINE; - result += FIELDS[23] + EQUALS + record.getPairNumReturned() + NEWLINE; - return result; - } - - private String createLine(final String fieldName, final String fieldValue) { - return fieldName + EQUALS + fieldValue + NEWLINE; - } - - private double nextDoubleValue(BufferedReader reader) throws NumberFormatException, Exception { - return Double.parseDouble(nextStringValue(reader)); - } - - private boolean nextBooleanValue(BufferedReader reader) throws Exception { - return Boolean.parseBoolean(nextStringValue(reader)); - } - - private String nextStringValue(final BufferedReader reader) - throws Exception { - final String line = reader.readLine(); - final String[] params = equalsPattern.split(line); - if (2 != params.length) { - throw new Exception("Bad format. Insufficient columns: '" + line - + "'"); - } - return params[1].trim(); - } - - private int nextIntegerValue(final BufferedReader reader) throws Exception { - return Integer.parseInt(nextStringValue(reader)); - } -} diff --git a/qio/src/org/qcmg/primeroutput/PrimerOutputRecord.java b/qio/src/org/qcmg/primeroutput/PrimerOutputRecord.java deleted file mode 100644 index af173dea7..000000000 --- a/qio/src/org/qcmg/primeroutput/PrimerOutputRecord.java +++ /dev/null @@ -1,517 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -// -// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 -// See http://java.sun.com/xml/jaxb -// Any modifications to this file will be lost upon recompilation of the source schema. -// Generated on: 2013.10.25 at 10:52:22 AM EST -// - - -package org.qcmg.primeroutput; - -import javax.xml.bind.annotation.XmlAccessType; -import javax.xml.bind.annotation.XmlAccessorType; -import javax.xml.bind.annotation.XmlElement; -import javax.xml.bind.annotation.XmlType; - - -/** - *

Java class for primerOutputRecord complex type. - * - *

The following schema fragment specifies the expected content contained within this class. - * - *

- * <complexType name="primerOutputRecord">
- *   <complexContent>
- *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
- *       <sequence>
- *         <element name="pairPenalty" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="leftPenalty" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="rightPenalty" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="leftSequence" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="rightSequence" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="left" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="right" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="leftTm" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="rightTm" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="leftGcPercent" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="rightGcPercent" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="leftSelfAny" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="rightSelfAny" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="leftSelfEnd" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="rightSelfEnd" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="leftEndStability" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="rightEndStability" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="pairComplAny" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="pairComplEnd" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="pairProductSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="pairProductTm" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="pairProductTmOligoTmDiff" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="pairTOptA" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *       </sequence>
- *     </restriction>
- *   </complexContent>
- * </complexType>
- * 
- * - * - */ -@XmlAccessorType(XmlAccessType.FIELD) -@XmlType(name = "primerOutputRecord", propOrder = { - "pairPenalty", - "leftPenalty", - "rightPenalty", - "leftSequence", - "rightSequence", - "left", - "right", - "leftTm", - "rightTm", - "leftGcPercent", - "rightGcPercent", - "leftSelfAny", - "rightSelfAny", - "leftSelfEnd", - "rightSelfEnd", - "leftEndStability", - "rightEndStability", - "pairComplAny", - "pairComplEnd", - "pairProductSize", - "pairProductTm", - "pairProductTmOligoTmDiff", - "pairTOptA" -}) -public class PrimerOutputRecord { - - protected double pairPenalty; - protected double leftPenalty; - protected double rightPenalty; - @XmlElement(required = true) - protected String leftSequence; - @XmlElement(required = true) - protected String rightSequence; - @XmlElement(required = true) - protected String left; - @XmlElement(required = true) - protected String right; - protected double leftTm; - protected double rightTm; - protected double leftGcPercent; - protected double rightGcPercent; - protected double leftSelfAny; - protected double rightSelfAny; - protected double leftSelfEnd; - protected double rightSelfEnd; - protected double leftEndStability; - protected double rightEndStability; - protected double pairComplAny; - protected double pairComplEnd; - protected int pairProductSize; - protected double pairProductTm; - protected double pairProductTmOligoTmDiff; - protected double pairTOptA; - - /** - * Gets the value of the pairPenalty property. - * - */ - public double getPairPenalty() { - return pairPenalty; - } - - /** - * Sets the value of the pairPenalty property. - * - */ - public void setPairPenalty(double value) { - this.pairPenalty = value; - } - - /** - * Gets the value of the leftPenalty property. - * - */ - public double getLeftPenalty() { - return leftPenalty; - } - - /** - * Sets the value of the leftPenalty property. - * - */ - public void setLeftPenalty(double value) { - this.leftPenalty = value; - } - - /** - * Gets the value of the rightPenalty property. - * - */ - public double getRightPenalty() { - return rightPenalty; - } - - /** - * Sets the value of the rightPenalty property. - * - */ - public void setRightPenalty(double value) { - this.rightPenalty = value; - } - - /** - * Gets the value of the leftSequence property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getLeftSequence() { - return leftSequence; - } - - /** - * Sets the value of the leftSequence property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setLeftSequence(String value) { - this.leftSequence = value; - } - - /** - * Gets the value of the rightSequence property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getRightSequence() { - return rightSequence; - } - - /** - * Sets the value of the rightSequence property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setRightSequence(String value) { - this.rightSequence = value; - } - - /** - * Gets the value of the left property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getLeft() { - return left; - } - - /** - * Sets the value of the left property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setLeft(String value) { - this.left = value; - } - - /** - * Gets the value of the right property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getRight() { - return right; - } - - /** - * Sets the value of the right property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setRight(String value) { - this.right = value; - } - - /** - * Gets the value of the leftTm property. - * - */ - public double getLeftTm() { - return leftTm; - } - - /** - * Sets the value of the leftTm property. - * - */ - public void setLeftTm(double value) { - this.leftTm = value; - } - - /** - * Gets the value of the rightTm property. - * - */ - public double getRightTm() { - return rightTm; - } - - /** - * Sets the value of the rightTm property. - * - */ - public void setRightTm(double value) { - this.rightTm = value; - } - - /** - * Gets the value of the leftGcPercent property. - * - */ - public double getLeftGcPercent() { - return leftGcPercent; - } - - /** - * Sets the value of the leftGcPercent property. - * - */ - public void setLeftGcPercent(double value) { - this.leftGcPercent = value; - } - - /** - * Gets the value of the rightGcPercent property. - * - */ - public double getRightGcPercent() { - return rightGcPercent; - } - - /** - * Sets the value of the rightGcPercent property. - * - */ - public void setRightGcPercent(double value) { - this.rightGcPercent = value; - } - - /** - * Gets the value of the leftSelfAny property. - * - */ - public double getLeftSelfAny() { - return leftSelfAny; - } - - /** - * Sets the value of the leftSelfAny property. - * - */ - public void setLeftSelfAny(double value) { - this.leftSelfAny = value; - } - - /** - * Gets the value of the rightSelfAny property. - * - */ - public double getRightSelfAny() { - return rightSelfAny; - } - - /** - * Sets the value of the rightSelfAny property. - * - */ - public void setRightSelfAny(double value) { - this.rightSelfAny = value; - } - - /** - * Gets the value of the leftSelfEnd property. - * - */ - public double getLeftSelfEnd() { - return leftSelfEnd; - } - - /** - * Sets the value of the leftSelfEnd property. - * - */ - public void setLeftSelfEnd(double value) { - this.leftSelfEnd = value; - } - - /** - * Gets the value of the rightSelfEnd property. - * - */ - public double getRightSelfEnd() { - return rightSelfEnd; - } - - /** - * Sets the value of the rightSelfEnd property. - * - */ - public void setRightSelfEnd(double value) { - this.rightSelfEnd = value; - } - - /** - * Gets the value of the leftEndStability property. - * - */ - public double getLeftEndStability() { - return leftEndStability; - } - - /** - * Sets the value of the leftEndStability property. - * - */ - public void setLeftEndStability(double value) { - this.leftEndStability = value; - } - - /** - * Gets the value of the rightEndStability property. - * - */ - public double getRightEndStability() { - return rightEndStability; - } - - /** - * Sets the value of the rightEndStability property. - * - */ - public void setRightEndStability(double value) { - this.rightEndStability = value; - } - - /** - * Gets the value of the pairComplAny property. - * - */ - public double getPairComplAny() { - return pairComplAny; - } - - /** - * Sets the value of the pairComplAny property. - * - */ - public void setPairComplAny(double value) { - this.pairComplAny = value; - } - - /** - * Gets the value of the pairComplEnd property. - * - */ - public double getPairComplEnd() { - return pairComplEnd; - } - - /** - * Sets the value of the pairComplEnd property. - * - */ - public void setPairComplEnd(double value) { - this.pairComplEnd = value; - } - - /** - * Gets the value of the pairProductSize property. - * - */ - public int getPairProductSize() { - return pairProductSize; - } - - /** - * Sets the value of the pairProductSize property. - * - */ - public void setPairProductSize(int value) { - this.pairProductSize = value; - } - - /** - * Gets the value of the pairProductTm property. - * - */ - public double getPairProductTm() { - return pairProductTm; - } - - /** - * Sets the value of the pairProductTm property. - * - */ - public void setPairProductTm(double value) { - this.pairProductTm = value; - } - - /** - * Gets the value of the pairProductTmOligoTmDiff property. - * - */ - public double getPairProductTmOligoTmDiff() { - return pairProductTmOligoTmDiff; - } - - /** - * Sets the value of the pairProductTmOligoTmDiff property. - * - */ - public void setPairProductTmOligoTmDiff(double value) { - this.pairProductTmOligoTmDiff = value; - } - - /** - * Gets the value of the pairTOptA property. - * - */ - public double getPairTOptA() { - return pairTOptA; - } - - /** - * Sets the value of the pairTOptA property. - * - */ - public void setPairTOptA(double value) { - this.pairTOptA = value; - } - -} diff --git a/qio/src/org/qcmg/primeroutput/PrimerOutputRecordSerializer.java b/qio/src/org/qcmg/primeroutput/PrimerOutputRecordSerializer.java deleted file mode 100644 index f23ba63a4..000000000 --- a/qio/src/org/qcmg/primeroutput/PrimerOutputRecordSerializer.java +++ /dev/null @@ -1,136 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.primeroutput; - -import java.io.BufferedReader; -import java.io.IOException; - -import org.qcmg.record.Serializer; - -public final class PrimerOutputRecordSerializer extends - Serializer { - private final static String[] FIELD_PREFIXES = { - "PRIMER_PAIR_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_PAIR_", - "PRIMER_PAIR_", - "PRIMER_PAIR_", - "PRIMER_PAIR_", - "PRIMER_PAIR_", - "PRIMER_PAIR_"}; - - private final static String[] FIELD_SUFFIXES = { - "_PENALTY", - "_PENALTY", - "_PENALTY", - "_SEQUENCE", - "_SEQUENCE", - "", - "", - "_TM", - "_TM", - "_GC_PERCENT", - "_GC_PERCENT", - "_SELF_ANY", - "_SELF_ANY", - "_SELF_END", - "_SELF_END", - "_END_STABILITY", - "_END_STABILITY", - "_COMPL_ANY", - "_COMPL_END", - "_PRODUCT_SIZE", - "_PRODUCT_TM", - "_PRODUCT_TM_OLIGO_TM_DIFF", - "_T_OPT_A"}; - - static { - assert(FIELD_PREFIXES.length == FIELD_SUFFIXES.length); - } - - public PrimerOutputRecord parseRecord(final BufferedReader reader) - throws Exception { - String nextLine = reader.readLine(); - if (null == nextLine) { - return null; - } - PrimerOutputRecord result = new PrimerOutputRecord(); - result.setPairPenalty(doubleValue(nextLine)); - result.setLeftPenalty(nextDoubleValue(reader)); - result.setRightPenalty(nextDoubleValue(reader)); - result.setLeftSequence(nextStringValue(reader)); - result.setRightSequence(nextStringValue(reader)); - result.setLeft(nextStringValue(reader)); - result.setRight(nextStringValue(reader)); - result.setLeftTm(nextDoubleValue(reader)); - result.setRightTm(nextDoubleValue(reader)); - result.setLeftGcPercent(nextDoubleValue(reader)); - result.setRightGcPercent(nextDoubleValue(reader)); - result.setLeftSelfAny(nextDoubleValue(reader)); - result.setRightSelfAny(nextDoubleValue(reader)); - result.setLeftSelfEnd(nextDoubleValue(reader)); - result.setRightSelfEnd(nextDoubleValue(reader)); - result.setLeftEndStability(nextDoubleValue(reader)); - result.setRightEndStability(nextDoubleValue(reader)); - result.setPairComplAny(nextDoubleValue(reader)); - result.setPairComplEnd(nextDoubleValue(reader)); - result.setPairProductSize(nextIntegerValue(reader)); - result.setPairProductTm(nextDoubleValue(reader)); - result.setPairProductTmOligoTmDiff(nextDoubleValue(reader)); - result.setPairTOptA(nextDoubleValue(reader)); - return result; - } - - public String serialise(final PrimerOutputRecord record) throws Exception { - String result = null; - return result; - } - - private double nextDoubleValue(BufferedReader reader) throws NumberFormatException, Exception { - return Double.parseDouble(nextStringValue(reader)); - } - - private double doubleValue(final String line) throws Exception { - final String[] params = equalsPattern.split(line); - if (2 != params.length) { - throw new Exception("Bad format. Insufficient columns: '" + line - + "'"); - } - return Double.parseDouble(params[1].trim()); - } - - private String nextStringValue(final BufferedReader reader) - throws Exception { - final String line = reader.readLine(); - return stringValue(line); - } - - private String stringValue(final String line) throws Exception { - final String[] params = equalsPattern.split(line); - if (2 != params.length) { - throw new Exception("Bad format. Insufficient columns: '" + line - + "'"); - } - return params[1].trim(); - } - - private int nextIntegerValue(final BufferedReader reader) throws Exception { - return Integer.parseInt(nextStringValue(reader)); - } -} From 6b2ad0a84da37176dfabc35d3e24e6900e42636b Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 19:02:31 +1000 Subject: [PATCH 19/73] mv unused package --- qio/src/org/qcmg/reader/AbstractReader.java | 40 ------------- .../org/qcmg/reader/ExtendedFileReader.java | 41 ------------- qio/src/org/qcmg/reader/FileReader.java | 57 ------------------- qio/src/org/qcmg/reader/Reader.java | 10 ---- qio/src/org/qcmg/simple/SimpleFileReader.java | 23 -------- .../org/qcmg/simple/SimpleRecordIterator.java | 26 --------- qio/src/org/qcmg/simple/SimpleSerializer.java | 57 ------------------- 7 files changed, 254 deletions(-) delete mode 100644 qio/src/org/qcmg/reader/AbstractReader.java delete mode 100644 qio/src/org/qcmg/reader/ExtendedFileReader.java delete mode 100644 qio/src/org/qcmg/reader/FileReader.java delete mode 100644 qio/src/org/qcmg/reader/Reader.java delete mode 100644 qio/src/org/qcmg/simple/SimpleFileReader.java delete mode 100644 qio/src/org/qcmg/simple/SimpleRecordIterator.java delete mode 100644 qio/src/org/qcmg/simple/SimpleSerializer.java diff --git a/qio/src/org/qcmg/reader/AbstractReader.java b/qio/src/org/qcmg/reader/AbstractReader.java deleted file mode 100644 index ecdfbd6af..000000000 --- a/qio/src/org/qcmg/reader/AbstractReader.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.reader; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; - -import org.qcmg.exception.RecordIteratorException; -import org.qcmg.record.AbstractRecordIterator; -import org.qcmg.record.Record; - -public abstract class AbstractReader implements Reader, Iterable { - - protected final InputStream inputStream; - - public AbstractReader(final File file) throws IOException { - FileInputStream stream = new FileInputStream(file); - inputStream = stream; - } - - @Override - public Iterator iterator() { - try { - return getRecordIterator(); - } catch (Exception e) { - throw new RecordIteratorException(e); - } - } - - public abstract AbstractRecordIterator getRecordIterator() throws Exception; - - @Override - public void close() throws IOException { - inputStream.close(); - } -} diff --git a/qio/src/org/qcmg/reader/ExtendedFileReader.java b/qio/src/org/qcmg/reader/ExtendedFileReader.java deleted file mode 100644 index e02cf8dfd..000000000 --- a/qio/src/org/qcmg/reader/ExtendedFileReader.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.reader; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStreamReader; -import java.util.Iterator; - -import org.qcmg.record.ExtendedRecordIterator; -import org.qcmg.record.Serializer; - -public abstract class ExtendedFileReader extends - FileReader { - private final Serializer headerSerializer; - private final HeaderType header; - - public ExtendedFileReader(final File file, - final Serializer recordSerializer, - final Serializer headerSerializer) throws Exception { - super(file, recordSerializer); - FileInputStream inputStream = new FileInputStream(file); - InputStreamReader inputStreamReader = new InputStreamReader(inputStream); - BufferedReader reader = new BufferedReader(inputStreamReader); - this.headerSerializer = headerSerializer; - header = headerSerializer.parseRecord(reader); - } - - public HeaderType getHeader() { - return header; - } - - @Override - public Iterator getIterator() throws Exception { - return new ExtendedRecordIterator( - getInputStream(), getSerializer(), headerSerializer); - } - -} diff --git a/qio/src/org/qcmg/reader/FileReader.java b/qio/src/org/qcmg/reader/FileReader.java deleted file mode 100644 index 01d3633b5..000000000 --- a/qio/src/org/qcmg/reader/FileReader.java +++ /dev/null @@ -1,57 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.reader; - -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.Iterator; - -import org.qcmg.record.RecordIterator; -import org.qcmg.record.Serializer; - -public abstract class FileReader implements Closeable, - Iterable { - private final Serializer serializer; - private final File file; - private final FileInputStream inputStream; - - public FileReader(final File file, final Serializer serializer) - throws Exception { - this.file = file; - this.serializer = serializer; - inputStream = new FileInputStream(file); - } - - public Iterator iterator() { - try { - return getIterator(); - } catch (Exception e) { - e.printStackTrace(); - throw new RuntimeException(e); - } - } - - public Iterator getIterator() throws Exception { - return new RecordIterator(inputStream, serializer); - } - - public void close() throws IOException { - inputStream.close(); - } - - public File getFile() { - return file; - } - - public FileInputStream getInputStream() { - return inputStream; - } - - public Serializer getSerializer() { - return serializer; - } - -} diff --git a/qio/src/org/qcmg/reader/Reader.java b/qio/src/org/qcmg/reader/Reader.java deleted file mode 100644 index 97849294e..000000000 --- a/qio/src/org/qcmg/reader/Reader.java +++ /dev/null @@ -1,10 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.reader; - -import java.io.Closeable; - -public interface Reader extends Closeable { - -} diff --git a/qio/src/org/qcmg/simple/SimpleFileReader.java b/qio/src/org/qcmg/simple/SimpleFileReader.java deleted file mode 100644 index 40421ed12..000000000 --- a/qio/src/org/qcmg/simple/SimpleFileReader.java +++ /dev/null @@ -1,23 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.simple; - -import java.io.File; -import java.io.IOException; - -import org.qcmg.reader.AbstractReader; -import org.qcmg.record.AbstractRecordIterator; - -public class SimpleFileReader extends AbstractReader { - - public SimpleFileReader(File file) throws IOException { - super(file); - } - - @Override - public AbstractRecordIterator getRecordIterator() throws Exception{ - return new SimpleRecordIterator(inputStream); - } - -} diff --git a/qio/src/org/qcmg/simple/SimpleRecordIterator.java b/qio/src/org/qcmg/simple/SimpleRecordIterator.java deleted file mode 100644 index 4e0713aee..000000000 --- a/qio/src/org/qcmg/simple/SimpleRecordIterator.java +++ /dev/null @@ -1,26 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.simple; - -import java.io.InputStream; - -import org.qcmg.record.AbstractRecordIterator; - -public class SimpleRecordIterator extends AbstractRecordIterator { - - public SimpleRecordIterator(InputStream stream) throws Exception{ - super(stream); - } - - @Override - protected void readNext() throws Exception { -// try { - next = SimpleSerializer.nextRecord(reader); -// } catch (Exception ex) { -// next = null; -// throw ex; -// } - } - -} diff --git a/qio/src/org/qcmg/simple/SimpleSerializer.java b/qio/src/org/qcmg/simple/SimpleSerializer.java deleted file mode 100644 index e8e65e6eb..000000000 --- a/qio/src/org/qcmg/simple/SimpleSerializer.java +++ /dev/null @@ -1,57 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.simple; - -import java.io.BufferedReader; -import java.io.IOException; - -import org.qcmg.record.SimpleRecord; - -public final class SimpleSerializer { - - private static final String DEFAULT_ID_PREFIX = ">"; - private static final String DEFAULT_HEADER_PREFIX = "#"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { - line = reader.readLine(); - } - return line; - } - - public static SimpleRecord nextRecord(final BufferedReader reader) throws Exception { - SimpleRecord result = null; - - String id = nextNonheaderLine(reader); - String sequence = reader.readLine(); -// return parseRecord(id, sequence); - if (null != id && null != sequence) { - result = parseRecord(id, sequence); - } - - return result; - } - - static String parseID(final String value) throws Exception { - if ( ! value.startsWith(DEFAULT_ID_PREFIX)) { - throw new Exception("Bad id format: " + value); - } - return value; - } - - static String parseSequence(final String sequence) throws Exception { - if (sequence.startsWith(DEFAULT_ID_PREFIX)) { - throw new Exception("Bad sequence format: " + sequence); - } - return sequence; - } - - static SimpleRecord parseRecord(final String id, final String sequence) - throws Exception { - return new SimpleRecord(parseID(id), parseSequence(sequence)); - } - -} From f7b9e04feee1b31504f3a97c3eb8bf901f192ba3 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 19:04:29 +1000 Subject: [PATCH 20/73] mv maf to unused --- qio/src/org/qcmg/maf/MAFFileReader.java | 42 ------------- qio/src/org/qcmg/maf/MAFRecordIterator.java | 54 ---------------- qio/src/org/qcmg/maf/MAFSerializer.java | 68 --------------------- 3 files changed, 164 deletions(-) delete mode 100644 qio/src/org/qcmg/maf/MAFFileReader.java delete mode 100644 qio/src/org/qcmg/maf/MAFRecordIterator.java delete mode 100644 qio/src/org/qcmg/maf/MAFSerializer.java diff --git a/qio/src/org/qcmg/maf/MAFFileReader.java b/qio/src/org/qcmg/maf/MAFFileReader.java deleted file mode 100644 index b9c001228..000000000 --- a/qio/src/org/qcmg/maf/MAFFileReader.java +++ /dev/null @@ -1,42 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.maf; - -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; - -import org.qcmg.common.maf.MAFRecord; - -public final class MAFFileReader implements Closeable, Iterable { - private final File file; - private final InputStream inputStream; - - public MAFFileReader(final File file) throws IOException { - this.file = file; - FileInputStream fileStream = new FileInputStream(file); - inputStream = fileStream; - } - - public Iterator iterator() { - return getRecordIterator(); - } - - public MAFRecordIterator getRecordIterator() { - return new MAFRecordIterator(inputStream); - } - - public void close() throws IOException { - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/maf/MAFRecordIterator.java b/qio/src/org/qcmg/maf/MAFRecordIterator.java deleted file mode 100644 index eb618d7e9..000000000 --- a/qio/src/org/qcmg/maf/MAFRecordIterator.java +++ /dev/null @@ -1,54 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.maf; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.NoSuchElementException; - -import org.qcmg.common.maf.MAFRecord; - -public final class MAFRecordIterator implements Iterator { - private final BufferedReader reader; - private MAFRecord next; - - public MAFRecordIterator(final InputStream stream) { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public MAFRecord next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - MAFRecord result = next; - readNext(); - return result; - } - - private void readNext() { - try { - next = MAFSerializer.nextRecord(reader); - } catch (NoSuchElementException e) { - throw e; - } catch (Exception ex) { - throw new RuntimeException(ex.getMessage(), ex); - } - } - - @SuppressWarnings("unchecked") - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/qio/src/org/qcmg/maf/MAFSerializer.java b/qio/src/org/qcmg/maf/MAFSerializer.java deleted file mode 100644 index 5f1b6549c..000000000 --- a/qio/src/org/qcmg/maf/MAFSerializer.java +++ /dev/null @@ -1,68 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.maf; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.regex.Pattern; - -import org.qcmg.common.maf.MAFRecord; - -public final class MAFSerializer { - private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - private static final String DEFAULT_HEADER_PREFIX = "#"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { - line = reader.readLine(); - } - return line; - } - - public static MAFRecord nextRecord(final BufferedReader reader) - throws IOException , Exception { - MAFRecord result = null; - String line = nextNonheaderLine(reader); - if (null != line) { - result = parseRecord(line); - } - return result; - } - - static MAFRecord parseRecord(final String line) throws Exception { - String[] params = tabbedPattern.split(line, -1); - if (8 > params.length) { - throw new Exception("Bad VCF format. Insufficient columns: '" + line + "'"); - } - MAFRecord result = new MAFRecord(); -// result.setChromosome(params[0]); -// result.setPosition(Integer.parseInt(params[1])); -// result.setRef(params[3].charAt(0)); -// result.setAlt(params[4].charAt(0)); -// result.setGenotype(params[9]); -// calculateGenotypeEnum(result); - return result; - } - - private static void calculateGenotypeEnum(MAFRecord record) { - -// String genotypeString = record.getGenotype().substring(0, 3); -// -// if ("0/1".equals(genotypeString)) { -// record.setGenotypeEnum(GenotypeEnum.getGenotypeEnum(record.getRef(), record.getAlt())); -// } else if ("1/1".equals(genotypeString)) { -// record.setGenotypeEnum(GenotypeEnum.getGenotypeEnum(record.getAlt(), record.getAlt())); -// } else if ("0/0".equals(genotypeString)) { -// record.setGenotypeEnum(GenotypeEnum.getGenotypeEnum(record.getRef(), record.getRef())); -// } else { -// System.out.println("unhandled genotype string: " + genotypeString); -// } - - } -} From baa13b87c95c57b416adbbe891b76250a72fca80 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 19:08:32 +1000 Subject: [PATCH 21/73] mv old record to unused --- .../qcmg/record/AbstractRecordIterator.java | 47 ----------------- .../qcmg/record/ExtendedRecordIterator.java | 52 ------------------- qio/src/org/qcmg/record/Record.java | 8 --- qio/src/org/qcmg/record/RecordIterator.java | 47 ----------------- qio/src/org/qcmg/record/Serializer.java | 39 -------------- qio/src/org/qcmg/record/SimpleRecord.java | 40 -------------- 6 files changed, 233 deletions(-) delete mode 100644 qio/src/org/qcmg/record/AbstractRecordIterator.java delete mode 100644 qio/src/org/qcmg/record/ExtendedRecordIterator.java delete mode 100644 qio/src/org/qcmg/record/Record.java delete mode 100644 qio/src/org/qcmg/record/RecordIterator.java delete mode 100644 qio/src/org/qcmg/record/Serializer.java delete mode 100644 qio/src/org/qcmg/record/SimpleRecord.java diff --git a/qio/src/org/qcmg/record/AbstractRecordIterator.java b/qio/src/org/qcmg/record/AbstractRecordIterator.java deleted file mode 100644 index c4e5c39bd..000000000 --- a/qio/src/org/qcmg/record/AbstractRecordIterator.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.record; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.concurrent.atomic.AtomicLong; - -import org.qcmg.exception.RecordIteratorException; - -public abstract class AbstractRecordIterator implements Iterator { - - protected final BufferedReader reader; - private final AtomicLong counter; - protected Record next; - - public AbstractRecordIterator(final InputStream stream) throws Exception { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - counter = new AtomicLong(0); - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public Record next() { - counter.incrementAndGet(); - Record result = next; - try { - readNext(); - } catch (Exception e) { - throw new RecordIteratorException(e.getMessage() + " [Record count: " + counter.get() +"]", e); - } - return result; - } - - protected abstract void readNext() throws Exception; - - public void remove() { - } - -} diff --git a/qio/src/org/qcmg/record/ExtendedRecordIterator.java b/qio/src/org/qcmg/record/ExtendedRecordIterator.java deleted file mode 100644 index 4e13f26db..000000000 --- a/qio/src/org/qcmg/record/ExtendedRecordIterator.java +++ /dev/null @@ -1,52 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.record; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.concurrent.atomic.AtomicLong; - -public final class ExtendedRecordIterator implements - Iterator { - private final Serializer serializer; - private final BufferedReader reader; - private final AtomicLong counter; - private RecordType next; - - public ExtendedRecordIterator(final InputStream stream, - final Serializer serializer, - final Serializer headerSerializer) throws Exception { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - counter = new AtomicLong(0); - this.serializer = serializer; - headerSerializer.nextRecord(reader); // skip header - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public RecordType next() { - counter.incrementAndGet(); - RecordType result = next; - try { - readNext(); - } catch (Exception e) { - throw new RuntimeException(e.getMessage() + " [Record count: " - + counter.get() + "]", e); - } - return result; - } - - private void readNext() throws Exception { - next = serializer.nextRecord(reader); - } - - public void remove() { - } -} diff --git a/qio/src/org/qcmg/record/Record.java b/qio/src/org/qcmg/record/Record.java deleted file mode 100644 index 95dcb7fd4..000000000 --- a/qio/src/org/qcmg/record/Record.java +++ /dev/null @@ -1,8 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.record; - -public interface Record { - public static final String TAB_DELIMITER = "\t"; -} diff --git a/qio/src/org/qcmg/record/RecordIterator.java b/qio/src/org/qcmg/record/RecordIterator.java deleted file mode 100644 index 39ec69505..000000000 --- a/qio/src/org/qcmg/record/RecordIterator.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.record; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.concurrent.atomic.AtomicLong; - -public final class RecordIterator implements Iterator { - private final Serializer serializer; - private final BufferedReader reader; - private final AtomicLong counter; - private RecordType next; - - public RecordIterator(final InputStream stream, final Serializer serializer) throws Exception { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - counter = new AtomicLong(0); - this.serializer = serializer; - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public RecordType next() { - counter.incrementAndGet(); - RecordType result = next; - try { - readNext(); - } catch (Exception e) { - throw new RuntimeException(e.getMessage() + " [Record count: " + counter.get() +"]", e); - } - return result; - } - - private void readNext() throws Exception { - next = serializer.nextRecord(reader); - } - - public void remove() { - } -} diff --git a/qio/src/org/qcmg/record/Serializer.java b/qio/src/org/qcmg/record/Serializer.java deleted file mode 100644 index 142d28eb5..000000000 --- a/qio/src/org/qcmg/record/Serializer.java +++ /dev/null @@ -1,39 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.record; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.regex.Pattern; - -public abstract class Serializer { - public static final String HASH = "#"; - public static final String NEWLINE = "\n"; - public static final String EQUALS = "="; - public static final Pattern tabbedPattern = Pattern.compile("[\\t]+"); - public static final Pattern colonPattern = Pattern.compile("[:]+"); - public static final Pattern hyphenPattern = Pattern.compile("[-]+"); - public static final Pattern equalsPattern = Pattern.compile("[=]+"); - public static final Pattern commaPattern = Pattern.compile("[,]+"); - - public RecordType nextRecord(final BufferedReader reader) throws Exception { - RecordType result = null; - try { - result = parseRecord(reader); - } catch (IOException e) { - throw e; - } catch (Exception e) { - throw e; - } - return result; - } - - public abstract String serialise(final RecordType record) throws Exception; - - public abstract RecordType parseRecord(BufferedReader reader) - throws Exception; -} diff --git a/qio/src/org/qcmg/record/SimpleRecord.java b/qio/src/org/qcmg/record/SimpleRecord.java deleted file mode 100644 index 763346035..000000000 --- a/qio/src/org/qcmg/record/SimpleRecord.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.record; - -/** - * Simple data container class for records that have an id, and some data - *

- * eg. the .csfasta format from SOLiD sequence alignment files. - * Each record is split over two lines. The first line starts with '>' followed by the ID, - * the subsequent line contains the colour space sequence - * - * @author oholmes - */ -public class SimpleRecord implements Record { - - private String id; - private String data; - - public SimpleRecord() {} - - public SimpleRecord(String id, String data) { - this.id = id; - this.data = data; - } - - public void setId(String id) { - this.id = id; - } - public String getId() { - return id; - } - - public void setData(String data) { - this.data = data; - } - public String getData() { - return data; - } -} From d8a55416c49fc5b45ef1313a0eec102abac5adc8 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 19:26:02 +1000 Subject: [PATCH 22/73] recovery qmule src --- qmule/src/org/qcmg/qmule/AlignerCompare.java | 272 ++++++ .../src/org/qcmg/qmule/AlignerCompare.java-- | 272 ++++++ .../qcmg/qmule/AnnotateDCCWithGFFRegions.java | 710 ++++++++++++++++ .../qmule/AnnotateDCCWithGFFRegions.java-- | 710 ++++++++++++++++ qmule/src/org/qcmg/qmule/BAM2CS.java | 183 ++++ qmule/src/org/qcmg/qmule/BAM2CS.java-- | 183 ++++ qmule/src/org/qcmg/qmule/BAMCompress.java | 156 ++++ qmule/src/org/qcmg/qmule/BAMCompress.java-- | 156 ++++ .../src/org/qcmg/qmule/BAMHeaderChecker.java | 250 ++++++ .../org/qcmg/qmule/BAMHeaderChecker.java-- | 250 ++++++ qmule/src/org/qcmg/qmule/BAMPileupUtil.java | 124 +++ qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- | 124 +++ .../src/org/qcmg/qmule/BamMismatchCounts.java | 160 ++++ .../org/qcmg/qmule/BamMismatchCounts.java-- | 160 ++++ .../src/org/qcmg/qmule/BamRecordCounter.java | 44 + .../org/qcmg/qmule/BamRecordCounter.java-- | 44 + .../qcmg/qmule/CompareReferenceRegions.java | 676 +++++++++++++++ .../qcmg/qmule/CompareReferenceRegions.java-- | 676 +++++++++++++++ .../src/org/qcmg/qmule/DbSnpChrLiftover.java | 86 ++ .../org/qcmg/qmule/DbSnpChrLiftover.java-- | 86 ++ .../org/qcmg/qmule/GermlineDBStripper.java | 46 + .../org/qcmg/qmule/GermlineDBStripper.java-- | 47 + qmule/src/org/qcmg/qmule/GetBamRecords.java | 226 +++++ qmule/src/org/qcmg/qmule/GetBamRecords.java-- | 226 +++++ qmule/src/org/qcmg/qmule/GetInsetSize.java | 35 + qmule/src/org/qcmg/qmule/GetInsetSize.java-- | 35 + qmule/src/org/qcmg/qmule/IndelDCCHeader.java | 395 +++++++++ .../src/org/qcmg/qmule/IndelDCCHeader.java-- | 395 +++++++++ qmule/src/org/qcmg/qmule/MAF2DCC1.java | 418 +++++++++ qmule/src/org/qcmg/qmule/MAF2DCC1.java-- | 418 +++++++++ qmule/src/org/qcmg/qmule/Main.java | 100 +++ qmule/src/org/qcmg/qmule/Main.java-- | 100 +++ qmule/src/org/qcmg/qmule/Messages.java | 132 +++ qmule/src/org/qcmg/qmule/Messages.java-- | 132 +++ qmule/src/org/qcmg/qmule/Options.java | 512 +++++++++++ qmule/src/org/qcmg/qmule/Options.java-- | 512 +++++++++++ qmule/src/org/qcmg/qmule/Pileup.java | 101 +++ qmule/src/org/qcmg/qmule/Pileup.java-- | 101 +++ qmule/src/org/qcmg/qmule/PileupStats.java | 254 ++++++ qmule/src/org/qcmg/qmule/PileupStats.java-- | 254 ++++++ qmule/src/org/qcmg/qmule/QMuleException.java | 28 + .../src/org/qcmg/qmule/QMuleException.java-- | 28 + qmule/src/org/qcmg/qmule/QueryCADDLib.java | 187 ++++ qmule/src/org/qcmg/qmule/QueryCADDLib.java-- | 187 ++++ .../qcmg/qmule/ReAnnotateDccWithDbSNP.java | 280 ++++++ .../qcmg/qmule/ReAnnotateDccWithDbSNP.java-- | 280 ++++++ qmule/src/org/qcmg/qmule/ReadPartGZFile.java | 152 ++++ .../src/org/qcmg/qmule/ReadPartGZFile.java-- | 152 ++++ qmule/src/org/qcmg/qmule/ReadsAppend.java | 95 +++ qmule/src/org/qcmg/qmule/ReadsAppend.java-- | 95 +++ qmule/src/org/qcmg/qmule/RunGatk.java | 141 +++ qmule/src/org/qcmg/qmule/RunGatk.java-- | 141 +++ .../org/qcmg/qmule/SmithWatermanGotoh.java | 368 ++++++++ .../org/qcmg/qmule/SmithWatermanGotoh.java-- | 368 ++++++++ .../qmule/SnpToReferenceRegionFilter.java | 647 ++++++++++++++ .../qmule/SnpToReferenceRegionFilter.java-- | 647 ++++++++++++++ qmule/src/org/qcmg/qmule/SubSample.java | 165 ++++ qmule/src/org/qcmg/qmule/SubSample.java-- | 165 ++++ qmule/src/org/qcmg/qmule/TestFileFinder.java | 23 + .../src/org/qcmg/qmule/TestFileFinder.java-- | 23 + qmule/src/org/qcmg/qmule/TestJarUpdate.java | 191 +++++ qmule/src/org/qcmg/qmule/TestJarUpdate.java-- | 191 +++++ qmule/src/org/qcmg/qmule/TestSort.java | 109 +++ qmule/src/org/qcmg/qmule/TestSort.java-- | 109 +++ .../src/org/qcmg/qmule/TranscriptomeMule.java | 192 +++++ .../org/qcmg/qmule/TranscriptomeMule.java-- | 192 +++++ .../src/org/qcmg/qmule/WiggleFromPileup.java | 302 +++++++ .../org/qcmg/qmule/WiggleFromPileup.java-- | 302 +++++++ .../qcmg/qmule/WiggleFromPileupTakeTwo.java | 307 +++++++ .../qcmg/qmule/WiggleFromPileupTakeTwo.java-- | 307 +++++++ qmule/src/org/qcmg/qmule/XCvsZP.java | 117 +++ qmule/src/org/qcmg/qmule/XCvsZP.java-- | 117 +++ qmule/src/org/qcmg/qmule/bam/CheckBam.java | 339 ++++++++ qmule/src/org/qcmg/qmule/bam/CheckBam.java-- | 339 ++++++++ .../qcmg/qmule/bam/GetContigsFromHeader.java | 127 +++ .../qmule/bam/GetContigsFromHeader.java-- | 127 +++ qmule/src/org/qcmg/qmule/messages.properties | 107 +++ qmule/src/org/qcmg/qmule/qcnv/CNVseq.java | 226 +++++ qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- | 226 +++++ qmule/src/org/qcmg/qmule/qcnv/Main.java | 57 ++ qmule/src/org/qcmg/qmule/qcnv/Main.java-- | 57 ++ qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java | 152 ++++ qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- | 152 ++++ qmule/src/org/qcmg/qmule/qcnv/Options.java | 169 ++++ qmule/src/org/qcmg/qmule/qcnv/Options.java-- | 169 ++++ qmule/src/org/qcmg/qmule/queryChrMT.java | 68 ++ qmule/src/org/qcmg/qmule/queryChrMT.java-- | 68 ++ .../org/qcmg/qmule/snppicker/CompareSnps.java | 205 +++++ .../qcmg/qmule/snppicker/CompareSnps.java-- | 205 +++++ .../qmule/snppicker/ExamineVerifiedSnps.java | 237 ++++++ .../snppicker/ExamineVerifiedSnps.java-- | 237 ++++++ .../qcmg/qmule/snppicker/GatkUniqueSnps.java | 488 +++++++++++ .../qmule/snppicker/GatkUniqueSnps.java-- | 488 +++++++++++ qmule/src/org/qcmg/qmule/snppicker/Mule.java | 85 ++ .../src/org/qcmg/qmule/snppicker/Mule.java-- | 85 ++ .../org/qcmg/qmule/snppicker/SnpPicker.java | 802 ++++++++++++++++++ .../org/qcmg/qmule/snppicker/SnpPicker.java-- | 802 ++++++++++++++++++ .../org/qcmg/qmule/snppicker/UniqueQSnps.java | 200 +++++ .../qcmg/qmule/snppicker/UniqueQSnps.java-- | 200 +++++ .../org/qcmg/qmule/snppicker/UniqueSnps.java | 263 ++++++ .../qcmg/qmule/snppicker/UniqueSnps.java-- | 263 ++++++ .../qcmg/qmule/snppicker/VariantRecord.java | 193 +++++ .../qcmg/qmule/snppicker/VariantRecord.java-- | 193 +++++ .../qmule/util/IGVBatchFileGenerator.java | 78 ++ .../qmule/util/IGVBatchFileGenerator.java-- | 78 ++ .../org/qcmg/qmule/util/TabbedDataLoader.java | 61 ++ .../qcmg/qmule/util/TabbedDataLoader.java-- | 61 ++ qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java | 269 ++++++ .../src/org/qcmg/qmule/vcf/CompareVCFs.java-- | 269 ++++++ .../src/org/qcmg/qmule/vcf/ConvertVcfChr.java | 116 +++ .../org/qcmg/qmule/vcf/ConvertVcfChr.java-- | 116 +++ .../qcmg/qmule/vcf/RefAndMultiGenotype.java | 101 +++ .../qcmg/qmule/vcf/RefAndMultiGenotype.java-- | 101 +++ 113 files changed, 24948 insertions(+) create mode 100644 qmule/src/org/qcmg/qmule/AlignerCompare.java create mode 100644 qmule/src/org/qcmg/qmule/AlignerCompare.java-- create mode 100644 qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java create mode 100644 qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- create mode 100644 qmule/src/org/qcmg/qmule/BAM2CS.java create mode 100644 qmule/src/org/qcmg/qmule/BAM2CS.java-- create mode 100644 qmule/src/org/qcmg/qmule/BAMCompress.java create mode 100644 qmule/src/org/qcmg/qmule/BAMCompress.java-- create mode 100644 qmule/src/org/qcmg/qmule/BAMHeaderChecker.java create mode 100644 qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- create mode 100644 qmule/src/org/qcmg/qmule/BAMPileupUtil.java create mode 100644 qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- create mode 100644 qmule/src/org/qcmg/qmule/BamMismatchCounts.java create mode 100644 qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- create mode 100644 qmule/src/org/qcmg/qmule/BamRecordCounter.java create mode 100644 qmule/src/org/qcmg/qmule/BamRecordCounter.java-- create mode 100644 qmule/src/org/qcmg/qmule/CompareReferenceRegions.java create mode 100644 qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- create mode 100644 qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java create mode 100644 qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- create mode 100644 qmule/src/org/qcmg/qmule/GermlineDBStripper.java create mode 100644 qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- create mode 100644 qmule/src/org/qcmg/qmule/GetBamRecords.java create mode 100644 qmule/src/org/qcmg/qmule/GetBamRecords.java-- create mode 100644 qmule/src/org/qcmg/qmule/GetInsetSize.java create mode 100644 qmule/src/org/qcmg/qmule/GetInsetSize.java-- create mode 100644 qmule/src/org/qcmg/qmule/IndelDCCHeader.java create mode 100644 qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- create mode 100644 qmule/src/org/qcmg/qmule/MAF2DCC1.java create mode 100644 qmule/src/org/qcmg/qmule/MAF2DCC1.java-- create mode 100644 qmule/src/org/qcmg/qmule/Main.java create mode 100644 qmule/src/org/qcmg/qmule/Main.java-- create mode 100644 qmule/src/org/qcmg/qmule/Messages.java create mode 100644 qmule/src/org/qcmg/qmule/Messages.java-- create mode 100644 qmule/src/org/qcmg/qmule/Options.java create mode 100644 qmule/src/org/qcmg/qmule/Options.java-- create mode 100644 qmule/src/org/qcmg/qmule/Pileup.java create mode 100644 qmule/src/org/qcmg/qmule/Pileup.java-- create mode 100644 qmule/src/org/qcmg/qmule/PileupStats.java create mode 100644 qmule/src/org/qcmg/qmule/PileupStats.java-- create mode 100644 qmule/src/org/qcmg/qmule/QMuleException.java create mode 100644 qmule/src/org/qcmg/qmule/QMuleException.java-- create mode 100644 qmule/src/org/qcmg/qmule/QueryCADDLib.java create mode 100644 qmule/src/org/qcmg/qmule/QueryCADDLib.java-- create mode 100644 qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java create mode 100644 qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- create mode 100644 qmule/src/org/qcmg/qmule/ReadPartGZFile.java create mode 100644 qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- create mode 100644 qmule/src/org/qcmg/qmule/ReadsAppend.java create mode 100644 qmule/src/org/qcmg/qmule/ReadsAppend.java-- create mode 100644 qmule/src/org/qcmg/qmule/RunGatk.java create mode 100644 qmule/src/org/qcmg/qmule/RunGatk.java-- create mode 100644 qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java create mode 100644 qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java-- create mode 100644 qmule/src/org/qcmg/qmule/SnpToReferenceRegionFilter.java create mode 100644 qmule/src/org/qcmg/qmule/SnpToReferenceRegionFilter.java-- create mode 100644 qmule/src/org/qcmg/qmule/SubSample.java create mode 100644 qmule/src/org/qcmg/qmule/SubSample.java-- create mode 100644 qmule/src/org/qcmg/qmule/TestFileFinder.java create mode 100644 qmule/src/org/qcmg/qmule/TestFileFinder.java-- create mode 100644 qmule/src/org/qcmg/qmule/TestJarUpdate.java create mode 100644 qmule/src/org/qcmg/qmule/TestJarUpdate.java-- create mode 100644 qmule/src/org/qcmg/qmule/TestSort.java create mode 100644 qmule/src/org/qcmg/qmule/TestSort.java-- create mode 100644 qmule/src/org/qcmg/qmule/TranscriptomeMule.java create mode 100644 qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- create mode 100644 qmule/src/org/qcmg/qmule/WiggleFromPileup.java create mode 100644 qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- create mode 100644 qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java create mode 100644 qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- create mode 100644 qmule/src/org/qcmg/qmule/XCvsZP.java create mode 100644 qmule/src/org/qcmg/qmule/XCvsZP.java-- create mode 100644 qmule/src/org/qcmg/qmule/bam/CheckBam.java create mode 100644 qmule/src/org/qcmg/qmule/bam/CheckBam.java-- create mode 100644 qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java create mode 100644 qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- create mode 100644 qmule/src/org/qcmg/qmule/messages.properties create mode 100644 qmule/src/org/qcmg/qmule/qcnv/CNVseq.java create mode 100644 qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- create mode 100644 qmule/src/org/qcmg/qmule/qcnv/Main.java create mode 100644 qmule/src/org/qcmg/qmule/qcnv/Main.java-- create mode 100644 qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java create mode 100644 qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- create mode 100644 qmule/src/org/qcmg/qmule/qcnv/Options.java create mode 100644 qmule/src/org/qcmg/qmule/qcnv/Options.java-- create mode 100644 qmule/src/org/qcmg/qmule/queryChrMT.java create mode 100644 qmule/src/org/qcmg/qmule/queryChrMT.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java create mode 100644 qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java create mode 100644 qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java create mode 100644 qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/Mule.java create mode 100644 qmule/src/org/qcmg/qmule/snppicker/Mule.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java create mode 100644 qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java create mode 100644 qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java create mode 100644 qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- create mode 100644 qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java create mode 100644 qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- create mode 100644 qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java create mode 100644 qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- create mode 100644 qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java create mode 100644 qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- create mode 100644 qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java create mode 100644 qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- create mode 100644 qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java create mode 100644 qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- create mode 100644 qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java create mode 100644 qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- diff --git a/qmule/src/org/qcmg/qmule/AlignerCompare.java b/qmule/src/org/qcmg/qmule/AlignerCompare.java new file mode 100644 index 000000000..5c8538a93 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/AlignerCompare.java @@ -0,0 +1,272 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.ArrayList; +import java.util.Objects; + +import htsjdk.samtools.SAMFileHeader.SortOrder; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordIterator; +import htsjdk.samtools.ValidationStringency; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.SAMOrBAMWriterFactory; + +public class AlignerCompare { + static QLogger logger = QLoggerFactory.getLogger(AlignerCompare.class); + boolean discardNonPrimary; + SamReader firReader; + SamReader secReader; + + SAMOrBAMWriterFactory sameWriter; + SAMOrBAMWriterFactory diffWriter_first; + SAMOrBAMWriterFactory diffWriter_second; + + SAMOrBAMWriterFactory unsureWriter_first; + SAMOrBAMWriterFactory unsureWriter_second; + + + long total_bam1 = 0; + long total_bam2 = 0; + long total_same = 0; + long noDiff_bam1 = 0; + long noDiff_bam2 = 0; + long noSecondary_bam1 = 0; + long nosupplementary_bam1 = 0; + long noSecondary_bam2 = 0; + long nosupplementary_bam2 = 0; + long nounsureAlignment = 0; + + + AlignerCompare(File firBam, File secBam, String prefix, boolean flag) throws Exception{ + //check inputs: sort by query name + firReader = SAMFileReaderFactory.createSAMFileReader(firBam, ValidationStringency.SILENT); + secReader = SAMFileReaderFactory.createSAMFileReader(secBam, ValidationStringency.SILENT); + discardNonPrimary = flag; + + if(! firReader.getFileHeader().getSortOrder().equals(SortOrder.queryname)) + throw new Exception("Please sort the input BAM by queryname: " + firBam.getAbsolutePath()); + + if(! secReader.getFileHeader().getSortOrder().equals(SortOrder.queryname)) + throw new Exception("Please sort the input BAM by queryname: " + secBam.getAbsolutePath()); + + + logger.info("input BAM1: " + firBam.getAbsolutePath()); + logger.info("input BAM2: " + secBam.getAbsolutePath()); + logger.info("discard secondary or supplementary alignments: " + String.valueOf(discardNonPrimary)); + + //create outputs + File outsame = new File(prefix + ".identical.bam" ); + File outdiff_first = new File(prefix + ".different.first.bam" ); + File outdiff_second = new File(prefix + ".different.second.bam" ); + + if(! firBam.getName().equals(secBam.getName())){ + outdiff_first = new File( prefix + ".different." + firBam.getName() ); + outdiff_second = new File( prefix + ".different." + secBam.getName() ); + } + + sameWriter = new SAMOrBAMWriterFactory(firReader.getFileHeader(), true, outsame); + diffWriter_first = new SAMOrBAMWriterFactory(firReader.getFileHeader(), true, outdiff_first ); + diffWriter_second = new SAMOrBAMWriterFactory(secReader.getFileHeader(), true, outdiff_second ); + + logger.info("output of identical alignments: " + outsame.getAbsolutePath()); + logger.info("output of different alignments from BAM1: " + outdiff_first.getAbsolutePath()); + logger.info("output of different alignments from BAM2: " + outdiff_second.getAbsolutePath()); + + //execute comparison + compareExecutor(); + + + //close IOs + firReader.close(); + secReader.close(); + sameWriter.closeWriter(); + diffWriter_first.closeWriter(); + diffWriter_second.closeWriter(); + + } + + void compareExecutor() throws Exception{ + ArrayList from1 = new ArrayList (); + ArrayList from2 = new ArrayList (); + SAMRecordIterator it1 = firReader.iterator(); + SAMRecordIterator it2 = secReader.iterator(); + //stats + long noRead = 0; + long noAlign1 = 1; + long noAlign2 = 1; + long noSame = 0; + + //initialize + SAMRecord record1 = it1.next(); + SAMRecord record2 = it2.next(); + String Id = record1.getReadName(); + from1.add(record1); + from2.add(record2); + + //get all aligner from same read + while( it1.hasNext() || it2.hasNext()){ + while(it1.hasNext()){ + noAlign1 ++; + record1 = it1.next() ; + if(record1.getReadName().equals(Id)){ + from1.add(record1); + }else //if not equals(Id) + break; + } //end while + + while( it2.hasNext() ){ + noAlign2 ++; + record2 = it2.next(); + if(record2.getReadName().equals(Id)){ + from2.add(record2); + }else + break; //exit while, record2 is read for next loop + } + //compare alignment in arraylist which filtered out secondary or supplenmentary alignments + noSame += classifyReads( AlignerFilter(from1, unsureWriter_first) , AlignerFilter(from2, unsureWriter_second) ); + + //clear arraylist and store current reads into arraylist for next loop + noRead ++; + from1.clear(); + from2.clear(); + from1.add(record1); + from2.add(record2); + Id = record1.getReadName(); + } + + logger.info(String.format("There are %d reads with %d alignments from BAM1", noRead, noAlign1)); + logger.info(String.format("There are %d reads with %d alignments from BAM2", noRead, noAlign2)); + logger.info(String.format("There are %d alignments are identical from both BAM", noSame)); + logger.info(String.format("Different alignments from BAM1 are %d, from BAM2 are %d", noDiff_bam1, noDiff_bam2)); + logger.info( String.format("discard %d secondary alignments and %d supplementary alignments from BAM1",noSecondary_bam1,nosupplementary_bam1)); + logger.info(String.format("discard %d secondary alignments and %d supplementary alignments from BAM2",noSecondary_bam2,nosupplementary_bam2)); + + + } + + /** + * + * @param from: an input alignments with same read id + * @return ArrayList : cleaned alignments excluding secondary and supplementary alignments + */ + ArrayList AlignerFilter(ArrayList from, SAMOrBAMWriterFactory factory) throws Exception{ + ArrayList cleaned = new ArrayList(); + + for(SAMRecord record : from) + if( discardNonPrimary && record.isSecondaryOrSupplementary()){ + if( record.getNotPrimaryAlignmentFlag()) + noSecondary_bam1 ++; + else if( record.getSupplementaryAlignmentFlag()) + nosupplementary_bam1 ++; + else + throw new Exception(record.getReadName() + " record flag error: record.isSecondaryOrSupplementary but not (secondary or supplementary) : " + record.getFlags()); + }else + cleaned.add(record); + +/* //record these multi alignments for further investigation + if(cleaned.size() != 2){ + for(SAMRecord record : cleaned){ + factory.getWriter().addAlignment(record); + nounsureAlignment ++; + + } + } +*/ + return cleaned; + } + + + int classifyReads(ArrayList from1, ArrayList from2) throws Exception{ + ArrayList toremove1 = new ArrayList(); + ArrayList toremove2 = new ArrayList(); + + for(SAMRecord record1 : from1){ + for(SAMRecord record2: from2){ + if(!record1.getReadName().equals(record2.getReadName())) + throw new Exception("error during process: reads with different name are store in arrayList for comparison: " + + record1.getReadName() + " != " + record2.getReadName() ) ; + if (record1.getFlags() == record2.getFlags() && + record1.getReferenceName().equals(record2.getReferenceName()) && + record1.getAlignmentStart() == record2.getAlignmentStart() && + record1.getAlignmentEnd() == record2.getAlignmentEnd() && + record1.getMappingQuality() == record2.getMappingQuality() && + record1.getCigarString().equals(record2.getCigarString()) && + Objects.equals(record1.getAttribute("MD") , record2.getAttribute("MD"))){ + sameWriter.getWriter().addAlignment(record1); + toremove1.add(record1); + toremove2.add(record2); + } + } + } + + //record the left differnt aligner + from1.removeAll(toremove1); + for(SAMRecord record1 : from1) + diffWriter_first.getWriter().addAlignment(record1); + + from2.removeAll(toremove2); + for(SAMRecord record2: from2) + diffWriter_second.getWriter().addAlignment(record2); + + //count unique alignment number + noDiff_bam1 += from1.size(); + noDiff_bam2 += from2.size(); + + return toremove1.size(); + } + + public static void main(String[] args) throws Exception{ + + Options op = new Options(AlignerCompare.class, args); + if(op.hasHelpOption()){ + System.out.println(Messages.getMessage("USAGE_AlignerCompare")); + op.displayHelp(); + System.exit(0); + } + + if( op.getInputFileNames().length != 2 + || op.getOutputFileNames().length != 1 ){ + System.err.println("improper parameters passed to command line, please refer to"); + System.out.println(Messages.getMessage("USAGE_AlignerCompare")); + op.displayHelp(); + System.exit(1); + } + + File f1 = new File(op.getInputFileNames()[0]); + File f2 = new File(op.getInputFileNames()[1]); + if(! f1.exists() || ! f2.exists()) + throw new Exception("input not exists: " + args[0] + " or " + args[1]); + + //assign to true if no "compareAll" option + boolean flag = ! op.hasCompareAllOption(); + + if(op.hasLogOption()) + logger = QLoggerFactory.getLogger(AlignerCompare.class,op.getLogFile(), op.getLogLevel()); + else + logger = QLoggerFactory.getLogger(AlignerCompare.class,op.getOutputFileNames()[0] + ".log", op.getLogLevel()); + + String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + logger.logInitialExecutionStats( "qmule " + AlignerCompare.class.getName(), version,args); + + long startTime = System.currentTimeMillis(); + AlignerCompare compare = new AlignerCompare( f1, f2, op.getOutputFileNames()[0], flag ); + + logger.info( String.format("It took %d hours, %d minutes to perform the comparison", + (int) (System.currentTimeMillis() - startTime) / (1000*60*60), + (int) ( (System.currentTimeMillis() - startTime) / (1000*60) ) % 60) ); + logger.logFinalExecutionStats(0); + + } + + +} diff --git a/qmule/src/org/qcmg/qmule/AlignerCompare.java-- b/qmule/src/org/qcmg/qmule/AlignerCompare.java-- new file mode 100644 index 000000000..5c8538a93 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/AlignerCompare.java-- @@ -0,0 +1,272 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.ArrayList; +import java.util.Objects; + +import htsjdk.samtools.SAMFileHeader.SortOrder; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordIterator; +import htsjdk.samtools.ValidationStringency; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.SAMOrBAMWriterFactory; + +public class AlignerCompare { + static QLogger logger = QLoggerFactory.getLogger(AlignerCompare.class); + boolean discardNonPrimary; + SamReader firReader; + SamReader secReader; + + SAMOrBAMWriterFactory sameWriter; + SAMOrBAMWriterFactory diffWriter_first; + SAMOrBAMWriterFactory diffWriter_second; + + SAMOrBAMWriterFactory unsureWriter_first; + SAMOrBAMWriterFactory unsureWriter_second; + + + long total_bam1 = 0; + long total_bam2 = 0; + long total_same = 0; + long noDiff_bam1 = 0; + long noDiff_bam2 = 0; + long noSecondary_bam1 = 0; + long nosupplementary_bam1 = 0; + long noSecondary_bam2 = 0; + long nosupplementary_bam2 = 0; + long nounsureAlignment = 0; + + + AlignerCompare(File firBam, File secBam, String prefix, boolean flag) throws Exception{ + //check inputs: sort by query name + firReader = SAMFileReaderFactory.createSAMFileReader(firBam, ValidationStringency.SILENT); + secReader = SAMFileReaderFactory.createSAMFileReader(secBam, ValidationStringency.SILENT); + discardNonPrimary = flag; + + if(! firReader.getFileHeader().getSortOrder().equals(SortOrder.queryname)) + throw new Exception("Please sort the input BAM by queryname: " + firBam.getAbsolutePath()); + + if(! secReader.getFileHeader().getSortOrder().equals(SortOrder.queryname)) + throw new Exception("Please sort the input BAM by queryname: " + secBam.getAbsolutePath()); + + + logger.info("input BAM1: " + firBam.getAbsolutePath()); + logger.info("input BAM2: " + secBam.getAbsolutePath()); + logger.info("discard secondary or supplementary alignments: " + String.valueOf(discardNonPrimary)); + + //create outputs + File outsame = new File(prefix + ".identical.bam" ); + File outdiff_first = new File(prefix + ".different.first.bam" ); + File outdiff_second = new File(prefix + ".different.second.bam" ); + + if(! firBam.getName().equals(secBam.getName())){ + outdiff_first = new File( prefix + ".different." + firBam.getName() ); + outdiff_second = new File( prefix + ".different." + secBam.getName() ); + } + + sameWriter = new SAMOrBAMWriterFactory(firReader.getFileHeader(), true, outsame); + diffWriter_first = new SAMOrBAMWriterFactory(firReader.getFileHeader(), true, outdiff_first ); + diffWriter_second = new SAMOrBAMWriterFactory(secReader.getFileHeader(), true, outdiff_second ); + + logger.info("output of identical alignments: " + outsame.getAbsolutePath()); + logger.info("output of different alignments from BAM1: " + outdiff_first.getAbsolutePath()); + logger.info("output of different alignments from BAM2: " + outdiff_second.getAbsolutePath()); + + //execute comparison + compareExecutor(); + + + //close IOs + firReader.close(); + secReader.close(); + sameWriter.closeWriter(); + diffWriter_first.closeWriter(); + diffWriter_second.closeWriter(); + + } + + void compareExecutor() throws Exception{ + ArrayList from1 = new ArrayList (); + ArrayList from2 = new ArrayList (); + SAMRecordIterator it1 = firReader.iterator(); + SAMRecordIterator it2 = secReader.iterator(); + //stats + long noRead = 0; + long noAlign1 = 1; + long noAlign2 = 1; + long noSame = 0; + + //initialize + SAMRecord record1 = it1.next(); + SAMRecord record2 = it2.next(); + String Id = record1.getReadName(); + from1.add(record1); + from2.add(record2); + + //get all aligner from same read + while( it1.hasNext() || it2.hasNext()){ + while(it1.hasNext()){ + noAlign1 ++; + record1 = it1.next() ; + if(record1.getReadName().equals(Id)){ + from1.add(record1); + }else //if not equals(Id) + break; + } //end while + + while( it2.hasNext() ){ + noAlign2 ++; + record2 = it2.next(); + if(record2.getReadName().equals(Id)){ + from2.add(record2); + }else + break; //exit while, record2 is read for next loop + } + //compare alignment in arraylist which filtered out secondary or supplenmentary alignments + noSame += classifyReads( AlignerFilter(from1, unsureWriter_first) , AlignerFilter(from2, unsureWriter_second) ); + + //clear arraylist and store current reads into arraylist for next loop + noRead ++; + from1.clear(); + from2.clear(); + from1.add(record1); + from2.add(record2); + Id = record1.getReadName(); + } + + logger.info(String.format("There are %d reads with %d alignments from BAM1", noRead, noAlign1)); + logger.info(String.format("There are %d reads with %d alignments from BAM2", noRead, noAlign2)); + logger.info(String.format("There are %d alignments are identical from both BAM", noSame)); + logger.info(String.format("Different alignments from BAM1 are %d, from BAM2 are %d", noDiff_bam1, noDiff_bam2)); + logger.info( String.format("discard %d secondary alignments and %d supplementary alignments from BAM1",noSecondary_bam1,nosupplementary_bam1)); + logger.info(String.format("discard %d secondary alignments and %d supplementary alignments from BAM2",noSecondary_bam2,nosupplementary_bam2)); + + + } + + /** + * + * @param from: an input alignments with same read id + * @return ArrayList : cleaned alignments excluding secondary and supplementary alignments + */ + ArrayList AlignerFilter(ArrayList from, SAMOrBAMWriterFactory factory) throws Exception{ + ArrayList cleaned = new ArrayList(); + + for(SAMRecord record : from) + if( discardNonPrimary && record.isSecondaryOrSupplementary()){ + if( record.getNotPrimaryAlignmentFlag()) + noSecondary_bam1 ++; + else if( record.getSupplementaryAlignmentFlag()) + nosupplementary_bam1 ++; + else + throw new Exception(record.getReadName() + " record flag error: record.isSecondaryOrSupplementary but not (secondary or supplementary) : " + record.getFlags()); + }else + cleaned.add(record); + +/* //record these multi alignments for further investigation + if(cleaned.size() != 2){ + for(SAMRecord record : cleaned){ + factory.getWriter().addAlignment(record); + nounsureAlignment ++; + + } + } +*/ + return cleaned; + } + + + int classifyReads(ArrayList from1, ArrayList from2) throws Exception{ + ArrayList toremove1 = new ArrayList(); + ArrayList toremove2 = new ArrayList(); + + for(SAMRecord record1 : from1){ + for(SAMRecord record2: from2){ + if(!record1.getReadName().equals(record2.getReadName())) + throw new Exception("error during process: reads with different name are store in arrayList for comparison: " + + record1.getReadName() + " != " + record2.getReadName() ) ; + if (record1.getFlags() == record2.getFlags() && + record1.getReferenceName().equals(record2.getReferenceName()) && + record1.getAlignmentStart() == record2.getAlignmentStart() && + record1.getAlignmentEnd() == record2.getAlignmentEnd() && + record1.getMappingQuality() == record2.getMappingQuality() && + record1.getCigarString().equals(record2.getCigarString()) && + Objects.equals(record1.getAttribute("MD") , record2.getAttribute("MD"))){ + sameWriter.getWriter().addAlignment(record1); + toremove1.add(record1); + toremove2.add(record2); + } + } + } + + //record the left differnt aligner + from1.removeAll(toremove1); + for(SAMRecord record1 : from1) + diffWriter_first.getWriter().addAlignment(record1); + + from2.removeAll(toremove2); + for(SAMRecord record2: from2) + diffWriter_second.getWriter().addAlignment(record2); + + //count unique alignment number + noDiff_bam1 += from1.size(); + noDiff_bam2 += from2.size(); + + return toremove1.size(); + } + + public static void main(String[] args) throws Exception{ + + Options op = new Options(AlignerCompare.class, args); + if(op.hasHelpOption()){ + System.out.println(Messages.getMessage("USAGE_AlignerCompare")); + op.displayHelp(); + System.exit(0); + } + + if( op.getInputFileNames().length != 2 + || op.getOutputFileNames().length != 1 ){ + System.err.println("improper parameters passed to command line, please refer to"); + System.out.println(Messages.getMessage("USAGE_AlignerCompare")); + op.displayHelp(); + System.exit(1); + } + + File f1 = new File(op.getInputFileNames()[0]); + File f2 = new File(op.getInputFileNames()[1]); + if(! f1.exists() || ! f2.exists()) + throw new Exception("input not exists: " + args[0] + " or " + args[1]); + + //assign to true if no "compareAll" option + boolean flag = ! op.hasCompareAllOption(); + + if(op.hasLogOption()) + logger = QLoggerFactory.getLogger(AlignerCompare.class,op.getLogFile(), op.getLogLevel()); + else + logger = QLoggerFactory.getLogger(AlignerCompare.class,op.getOutputFileNames()[0] + ".log", op.getLogLevel()); + + String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + logger.logInitialExecutionStats( "qmule " + AlignerCompare.class.getName(), version,args); + + long startTime = System.currentTimeMillis(); + AlignerCompare compare = new AlignerCompare( f1, f2, op.getOutputFileNames()[0], flag ); + + logger.info( String.format("It took %d hours, %d minutes to perform the comparison", + (int) (System.currentTimeMillis() - startTime) / (1000*60*60), + (int) ( (System.currentTimeMillis() - startTime) / (1000*60) ) % 60) ); + logger.logFinalExecutionStats(0); + + } + + +} diff --git a/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java b/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java new file mode 100644 index 000000000..ee7a1eb00 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java @@ -0,0 +1,710 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; +import java.util.Vector; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrPositionName; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; + + +public class AnnotateDCCWithGFFRegions { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private List chromosomes = new ArrayList(); + private final int exitStatus = 0; + private Map> inputRecords = new HashMap>(); + private final Map> compareRecords = new HashMap>(); + private int overlapCount = 0; + private int notOverlappingCount = 0; + private int recordCount; + private Vector inputFileHeader = new Vector(); + private String inputFileType; + private String compareFileType; + private static QLogger logger; + private static final String MAF = "maf"; + private static final String GFF3 = "gff3"; + private static final String BED = "bed"; + private static final String VCF = "vcf"; + private static final String TAB = "txt"; + private static final String DCC1 = "dcc1"; + private static final String DCCQ = "dccq"; + private BufferedWriter outputFileWriter; + private File outputFile; + private String[] features; + private boolean stranded; + private final int GFF_STRAND_INDEX = 6; + private int DCC_STRAND_INDEX = -1; + private int QCMGFLAG_COLUMN_INDEX = -1; + private int REFERENCE_ALLELE_INDEX = -1; + private int TUMOUR_ALLELE_INDEX = -1; + private String annotation; + private int MUTATION_TYPE_INDEX; + //private static final int PATIENT_MIN = 5; + + public int engage() throws Exception { + + loadGFFFile(cmdLineInputFiles[1], compareRecords); + if (compareRecords.isEmpty()) { + logger.info("No positions loaded from gff file"); + } + + logger.info("Starting to process DCC records."); + + outputFile = new File(cmdLineOutputFiles[0]); + + outputFileWriter = new BufferedWriter(new FileWriter(outputFile)); + + inputFileType = null; + inputFileType = getFileType(cmdLineInputFiles[0]); + recordCount = loadDCCFile(cmdLineInputFiles[0], inputFileHeader, inputFileType); + logger.info("Finished processing DCC records."); + outputFileWriter.close(); + logger.info("SUMMARY"); + logger.info("Total DCC Records: " + recordCount); + logger.info("Total Records in supplied reference regions: " + overlapCount); + logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); + return exitStatus; + } + + private String getFileType(String fileName) throws QMuleException { + int index = fileName.lastIndexOf(".") + 1; + String name = fileName.substring(index, fileName.length()); + + if (name.equals("dcc")) { + return "dcc1"; + } + + if (!name.equals(DCC1) && !name.equals(DCCQ)) { + throw new QMuleException("FILE_TYPE_ERROR"); + } + + return name; + } + + private int loadGFFFile(String file, Map> records) throws Exception { + TabbedFileReader reader = new TabbedFileReader(new File(file)); + int recordCount = 0; + try { + + Iterator iterator = reader.getRecordIterator(); + + while (iterator.hasNext()) { + + TabbedRecord tab = iterator.next(); + + if (tab.getData().startsWith("#")) { + continue; + } + recordCount++; + ChrPosition chrPos = getChrPosition(GFF3, tab, Integer.toString(recordCount)); + String key = chrPos.getChromosome().replace("chr", ""); + if (records.containsKey(key)) { + records.get(key).put(chrPos, tab); + } else { + TreeMap map = new TreeMap(); + map.put(chrPos, tab); + records.put(key,map); + } + if (!chromosomes.contains(key)) { + chromosomes.add(key); + } + } + } finally { + reader.close(); + } + + logger.info("loaded gff file, total records: " + recordCount); + return recordCount; + } + + private int loadDCCFile(String file, Vector header, String fileType) throws Exception { + TabbedFileReader reader = new TabbedFileReader(new File(file)); + + int recordCount = 0; + try { + + Iterator iterator = reader.getRecordIterator(); + + if (reader.getHeader() != null) { + Iterator iter = reader.getHeader().iterator(); + while (iter.hasNext()) { + header.add(iter.next()); + } + } + while (iterator.hasNext()) { + + TabbedRecord inputRecord = iterator.next(); + if (inputRecord.getData().startsWith("#") || inputRecord.getData().startsWith("Hugo") || inputRecord.getData().startsWith("analysis") || + inputRecord.getData().startsWith("mutation")) { + header.add(inputRecord.getData()); + continue; + } + + if (header.size() > 0) { + parseDCCHeader(header, fileType); + logger.info("Column of DCC file to annotate: " + QCMGFLAG_COLUMN_INDEX); + writeHeader(fileType, header); + header.clear(); + } + + recordCount++; + ChrPosition chrPos = getChrPosition(fileType, inputRecord, null); + String key = chrPos.getChromosome().replace("chr", ""); + TreeMap compareMap = compareRecords.get(key); + boolean isOverlapping = false; + if (compareMap != null) { + //check to see if it is overlapping with the comparison reference region + for (Entry compareEntry : compareMap.entrySet()) { + ChrPosition comparePos = compareEntry.getKey(); + if (comparePos.getEndPosition() < chrPos.getStartPosition()) { + continue; + } else if (comparePos.getStartPosition() > chrPos.getEndPosition()) { + break; + } else { + String[] vals = inputRecord.getDataArray(); + + if (annotation != null) { + String oldInfo = vals[QCMGFLAG_COLUMN_INDEX]; + if (!oldInfo.contains("GERM") && tabbedRecordMatchesCompareRecord(chrPos, inputRecord, compareEntry)) { + if (annotation != null && !oldInfo.contains("GERM")) { + if (annotateWithGermline(vals, compareEntry.getValue().getDataArray())) { + isOverlapping = true; + if (!oldInfo.equals("") && !oldInfo.endsWith(";")) { + oldInfo += ";"; + } + oldInfo += annotation; + inputRecord = buildOutputString(inputRecord, vals, oldInfo); + } + } + } + } else { + if (tabbedRecordFallsInCompareRecord(chrPos, inputRecord, compareEntry)) { + isOverlapping = true; + String oldInfo = vals[QCMGFLAG_COLUMN_INDEX]; + //annotate with gff feature + String feature = getFeatures(compareEntry.getValue()); + if (!oldInfo.equals("") && !oldInfo.endsWith(";") && !feature.equals("")) { + oldInfo += ";"; + } + oldInfo += feature; + inputRecord = buildOutputString(inputRecord, vals, oldInfo); + } + } + + } + } + } + + if (isOverlapping) { + overlapCount++; + } else { + notOverlappingCount++; + } + + writeRecord(inputRecord); + + if (recordCount % 50000 == 0) { + logger.info("Processed records: " + recordCount); + } + } + } finally { + reader.close(); + } + return recordCount; + } + + private TabbedRecord buildOutputString(TabbedRecord inputRecord, String[] vals, + String oldInfo) { + vals[QCMGFLAG_COLUMN_INDEX] = oldInfo; + String data= ""; + for (String s: vals) { + data += s + "\t"; + } + inputRecord.setData(data); + return inputRecord; + } + + private boolean annotateWithGermline(String[] inputValues, String[] gffValues) throws QMuleException { + String[] attribs = gffValues[getFeatureIndex("attribs")].split(";"); + String gffMotif = getGFF3Motif(attribs); + //int patientCount = getPatientCount(attribs); + if (gffMotif == null) { + String position = gffValues[0] + ":" + gffValues[3] + "-" + gffValues[4]; + throw new QMuleException("NULL_GFF_MOTIF", position); + } + String dccMotif = getDCCMotif(inputValues); + if ((dccMotif == null || gffMotif.equals(dccMotif))) { + return true; + } + + return false; + } + + private int getPatientCount(String[] attribs) { + for (String s: attribs) { + if (s.startsWith("PatientCount")) { + return new Integer(s.split("=")[1]); + } + } + return 0; + } + + private String getGFF3Motif(String[] attribs) { + + String referenceAllele = null; + String tumourAllele = null; + for (String s: attribs) { + if (s.startsWith("ReferenceAllele")) { + referenceAllele = s.split("=")[1]; + } + if (s.startsWith("TumourAllele")) { + tumourAllele = s.split("=")[1]; + } + } + + if (referenceAllele.contains("-") && !tumourAllele.contains("-")) { + return tumourAllele; + } + if (!referenceAllele.contains("-") && tumourAllele.contains("-")) { + return referenceAllele; + } + return null; + } + + private String getDCCMotif(String[] inputValues) { + String mutationType = inputValues[MUTATION_TYPE_INDEX]; + String refAllele = inputValues[REFERENCE_ALLELE_INDEX]; + String tumourAllele = inputValues[TUMOUR_ALLELE_INDEX]; + + if (mutationType.equals("2")) { + return tumourAllele; + } else if (mutationType.equals("3")) { + return refAllele; + } + return null; + } + + public void parseDCCHeader(List headers, String inputFileType) throws QMuleException { + + for (String header: headers) { + String[] values = header.split("\t"); + if (values.length == 28 && inputFileType.equals(DCC1) + || values.length == 39 && inputFileType.equals(DCCQ)) { + //check dcc header + for (int i=0; i compareEntry) { + if (compareEntry != null) { + ChrPosition compareChrPos = compareEntry.getKey(); + if ((inputChrPos.getStartPosition() == compareChrPos.getStartPosition() + && inputChrPos.getEndPosition() == compareChrPos.getEndPosition())) { + //check strand if this option is provided + if (stranded) { + String inputStrand = inputRecord.getDataArray()[DCC_STRAND_INDEX]; + String compareStrand = compareEntry.getValue().getDataArray()[GFF_STRAND_INDEX]; + if (inputStrand.equals(compareStrand)) { + return true; + } + } else { + return true; + } + } + } + return false; + } + + private boolean tabbedRecordFallsInCompareRecord(ChrPosition inputChrPos, TabbedRecord inputRecord, Entry entry) { + if (entry != null) { + ChrPosition compareChrPos = entry.getKey(); + if ((inputChrPos.getStartPosition() >= compareChrPos.getStartPosition() && inputChrPos.getStartPosition() <= compareChrPos.getEndPosition()) || + (inputChrPos.getEndPosition() >= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() <= compareChrPos.getEndPosition()) + || (inputChrPos.getStartPosition() <= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() >= compareChrPos.getEndPosition())) { + //check strand if this option is provided + if (stranded) { + String inputStrand = inputRecord.getDataArray()[DCC_STRAND_INDEX]; + String compareStrand = entry.getValue().getDataArray()[GFF_STRAND_INDEX]; + if (inputStrand.equals(compareStrand)) { + return true; + } + } else { + return true; + } + } + } + return false; + } + + public String[] getCmdLineInputFiles() { + return cmdLineInputFiles; + } + + public void setCmdLineInputFiles(String[] cmdLineInputFiles) { + this.cmdLineInputFiles = cmdLineInputFiles; + } + + + private void writeHeader(String file, Vector header) throws IOException { + + for (String h: header) { + outputFileWriter.write(h + "\n"); + } + } + + public List getChromosomes() { + return chromosomes; + } + + public void setChromosomes(List chromosomes) { + this.chromosomes = chromosomes; + } + + + public int getOverlapCount() { + return overlapCount; + } + + public void setOverlapCount(int overlapCount) { + this.overlapCount = overlapCount; + } + + public int getNotOverlappingCount() { + return notOverlappingCount; + } + + public void setNotOverlappingCount(int notOverlappingCount) { + this.notOverlappingCount = notOverlappingCount; + } + + public int getMafCount() { + return recordCount; + } + + public void setMafCount(int mafCount) { + this.recordCount = mafCount; + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(AnnotateDCCWithGFFRegions.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("AnnotateDCCWithGFFRegions", AnnotateDCCWithGFFRegions.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 2) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + cmdLineOutputFiles = options.getOutputFileNames(); + if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); + } + + for (String file : cmdLineOutputFiles) { + if (new File(file).exists() && !new File(file).isDirectory()) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); + } + } + features = options.getFeature(); + annotation = options.getAnnotation(); + if (features == null && annotation == null) { + logger.info("Features to annotate: " + "feature"); + } else if (features != null){ + String featureString = new String(); + for (String f : features) { + featureString += f; + } + logger.info("Features to annotate: " + featureString); + } + logger.info("Annotation is : " + annotation); + stranded = options.hasStrandedOption(); + if (options.getColumn() != null) { + this.QCMGFLAG_COLUMN_INDEX = new Integer(options.getColumn()) - 1; + } + + + + logger.info("Require matching strand: " + stranded); + logger.info("DCC file: " + cmdLineInputFiles[0]); + logger.info("GFF file: " + cmdLineInputFiles[1]); + + } + + return returnStatus; + } + + public static void main(String[] args) throws Exception { + AnnotateDCCWithGFFRegions sp = new AnnotateDCCWithGFFRegions(); + LoadReferencedClasses.loadClasses(AnnotateDCCWithGFFRegions.class); + sp.setup(args); + int exitStatus = sp.engage(); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + public String[] getCmdLineOutputFiles() { + return cmdLineOutputFiles; + } + + public void setCmdLineOutputFiles(String[] cmdLineOutputFiles) { + this.cmdLineOutputFiles = cmdLineOutputFiles; + } + + public Map> getInputRecords() { + return inputRecords; + } + + public void setInputRecords( + Map> inputRecords) { + this.inputRecords = inputRecords; + } + + public Vector getInputFileHeader() { + return inputFileHeader; + } + + public void setInputFileHeader(Vector inputFileHeader) { + this.inputFileHeader = inputFileHeader; + } + + public File getOutputFile() { + return outputFile; + } + + public int getREFERENCE_ALLELE_INDEX() { + return REFERENCE_ALLELE_INDEX; + } + + public void setREFERENCE_ALLELE_INDEX(int rEFERENCE_ALLELE_INDEX) { + REFERENCE_ALLELE_INDEX = rEFERENCE_ALLELE_INDEX; + } + + public int getTUMOUR_ALLELE_INDEX() { + return TUMOUR_ALLELE_INDEX; + } + + public void setTUMOUR_ALLELE_INDEX(int tUMOUR_ALLELE_INDEX) { + TUMOUR_ALLELE_INDEX = tUMOUR_ALLELE_INDEX; + } + + public int getMUTATION_TYPE_INDEX() { + return MUTATION_TYPE_INDEX; + } + + public void setMUTATION_TYPE_INDEX(int mUTATION_TYPE_INDEX) { + MUTATION_TYPE_INDEX = mUTATION_TYPE_INDEX; + } + + public void setOutputFile(File outputFile) { + this.outputFile = outputFile; + } + + public String getAnnotation() { + return this.annotation; + } + +} diff --git a/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- b/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- new file mode 100644 index 000000000..ee7a1eb00 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- @@ -0,0 +1,710 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; +import java.util.Vector; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrPositionName; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; + + +public class AnnotateDCCWithGFFRegions { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private List chromosomes = new ArrayList(); + private final int exitStatus = 0; + private Map> inputRecords = new HashMap>(); + private final Map> compareRecords = new HashMap>(); + private int overlapCount = 0; + private int notOverlappingCount = 0; + private int recordCount; + private Vector inputFileHeader = new Vector(); + private String inputFileType; + private String compareFileType; + private static QLogger logger; + private static final String MAF = "maf"; + private static final String GFF3 = "gff3"; + private static final String BED = "bed"; + private static final String VCF = "vcf"; + private static final String TAB = "txt"; + private static final String DCC1 = "dcc1"; + private static final String DCCQ = "dccq"; + private BufferedWriter outputFileWriter; + private File outputFile; + private String[] features; + private boolean stranded; + private final int GFF_STRAND_INDEX = 6; + private int DCC_STRAND_INDEX = -1; + private int QCMGFLAG_COLUMN_INDEX = -1; + private int REFERENCE_ALLELE_INDEX = -1; + private int TUMOUR_ALLELE_INDEX = -1; + private String annotation; + private int MUTATION_TYPE_INDEX; + //private static final int PATIENT_MIN = 5; + + public int engage() throws Exception { + + loadGFFFile(cmdLineInputFiles[1], compareRecords); + if (compareRecords.isEmpty()) { + logger.info("No positions loaded from gff file"); + } + + logger.info("Starting to process DCC records."); + + outputFile = new File(cmdLineOutputFiles[0]); + + outputFileWriter = new BufferedWriter(new FileWriter(outputFile)); + + inputFileType = null; + inputFileType = getFileType(cmdLineInputFiles[0]); + recordCount = loadDCCFile(cmdLineInputFiles[0], inputFileHeader, inputFileType); + logger.info("Finished processing DCC records."); + outputFileWriter.close(); + logger.info("SUMMARY"); + logger.info("Total DCC Records: " + recordCount); + logger.info("Total Records in supplied reference regions: " + overlapCount); + logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); + return exitStatus; + } + + private String getFileType(String fileName) throws QMuleException { + int index = fileName.lastIndexOf(".") + 1; + String name = fileName.substring(index, fileName.length()); + + if (name.equals("dcc")) { + return "dcc1"; + } + + if (!name.equals(DCC1) && !name.equals(DCCQ)) { + throw new QMuleException("FILE_TYPE_ERROR"); + } + + return name; + } + + private int loadGFFFile(String file, Map> records) throws Exception { + TabbedFileReader reader = new TabbedFileReader(new File(file)); + int recordCount = 0; + try { + + Iterator iterator = reader.getRecordIterator(); + + while (iterator.hasNext()) { + + TabbedRecord tab = iterator.next(); + + if (tab.getData().startsWith("#")) { + continue; + } + recordCount++; + ChrPosition chrPos = getChrPosition(GFF3, tab, Integer.toString(recordCount)); + String key = chrPos.getChromosome().replace("chr", ""); + if (records.containsKey(key)) { + records.get(key).put(chrPos, tab); + } else { + TreeMap map = new TreeMap(); + map.put(chrPos, tab); + records.put(key,map); + } + if (!chromosomes.contains(key)) { + chromosomes.add(key); + } + } + } finally { + reader.close(); + } + + logger.info("loaded gff file, total records: " + recordCount); + return recordCount; + } + + private int loadDCCFile(String file, Vector header, String fileType) throws Exception { + TabbedFileReader reader = new TabbedFileReader(new File(file)); + + int recordCount = 0; + try { + + Iterator iterator = reader.getRecordIterator(); + + if (reader.getHeader() != null) { + Iterator iter = reader.getHeader().iterator(); + while (iter.hasNext()) { + header.add(iter.next()); + } + } + while (iterator.hasNext()) { + + TabbedRecord inputRecord = iterator.next(); + if (inputRecord.getData().startsWith("#") || inputRecord.getData().startsWith("Hugo") || inputRecord.getData().startsWith("analysis") || + inputRecord.getData().startsWith("mutation")) { + header.add(inputRecord.getData()); + continue; + } + + if (header.size() > 0) { + parseDCCHeader(header, fileType); + logger.info("Column of DCC file to annotate: " + QCMGFLAG_COLUMN_INDEX); + writeHeader(fileType, header); + header.clear(); + } + + recordCount++; + ChrPosition chrPos = getChrPosition(fileType, inputRecord, null); + String key = chrPos.getChromosome().replace("chr", ""); + TreeMap compareMap = compareRecords.get(key); + boolean isOverlapping = false; + if (compareMap != null) { + //check to see if it is overlapping with the comparison reference region + for (Entry compareEntry : compareMap.entrySet()) { + ChrPosition comparePos = compareEntry.getKey(); + if (comparePos.getEndPosition() < chrPos.getStartPosition()) { + continue; + } else if (comparePos.getStartPosition() > chrPos.getEndPosition()) { + break; + } else { + String[] vals = inputRecord.getDataArray(); + + if (annotation != null) { + String oldInfo = vals[QCMGFLAG_COLUMN_INDEX]; + if (!oldInfo.contains("GERM") && tabbedRecordMatchesCompareRecord(chrPos, inputRecord, compareEntry)) { + if (annotation != null && !oldInfo.contains("GERM")) { + if (annotateWithGermline(vals, compareEntry.getValue().getDataArray())) { + isOverlapping = true; + if (!oldInfo.equals("") && !oldInfo.endsWith(";")) { + oldInfo += ";"; + } + oldInfo += annotation; + inputRecord = buildOutputString(inputRecord, vals, oldInfo); + } + } + } + } else { + if (tabbedRecordFallsInCompareRecord(chrPos, inputRecord, compareEntry)) { + isOverlapping = true; + String oldInfo = vals[QCMGFLAG_COLUMN_INDEX]; + //annotate with gff feature + String feature = getFeatures(compareEntry.getValue()); + if (!oldInfo.equals("") && !oldInfo.endsWith(";") && !feature.equals("")) { + oldInfo += ";"; + } + oldInfo += feature; + inputRecord = buildOutputString(inputRecord, vals, oldInfo); + } + } + + } + } + } + + if (isOverlapping) { + overlapCount++; + } else { + notOverlappingCount++; + } + + writeRecord(inputRecord); + + if (recordCount % 50000 == 0) { + logger.info("Processed records: " + recordCount); + } + } + } finally { + reader.close(); + } + return recordCount; + } + + private TabbedRecord buildOutputString(TabbedRecord inputRecord, String[] vals, + String oldInfo) { + vals[QCMGFLAG_COLUMN_INDEX] = oldInfo; + String data= ""; + for (String s: vals) { + data += s + "\t"; + } + inputRecord.setData(data); + return inputRecord; + } + + private boolean annotateWithGermline(String[] inputValues, String[] gffValues) throws QMuleException { + String[] attribs = gffValues[getFeatureIndex("attribs")].split(";"); + String gffMotif = getGFF3Motif(attribs); + //int patientCount = getPatientCount(attribs); + if (gffMotif == null) { + String position = gffValues[0] + ":" + gffValues[3] + "-" + gffValues[4]; + throw new QMuleException("NULL_GFF_MOTIF", position); + } + String dccMotif = getDCCMotif(inputValues); + if ((dccMotif == null || gffMotif.equals(dccMotif))) { + return true; + } + + return false; + } + + private int getPatientCount(String[] attribs) { + for (String s: attribs) { + if (s.startsWith("PatientCount")) { + return new Integer(s.split("=")[1]); + } + } + return 0; + } + + private String getGFF3Motif(String[] attribs) { + + String referenceAllele = null; + String tumourAllele = null; + for (String s: attribs) { + if (s.startsWith("ReferenceAllele")) { + referenceAllele = s.split("=")[1]; + } + if (s.startsWith("TumourAllele")) { + tumourAllele = s.split("=")[1]; + } + } + + if (referenceAllele.contains("-") && !tumourAllele.contains("-")) { + return tumourAllele; + } + if (!referenceAllele.contains("-") && tumourAllele.contains("-")) { + return referenceAllele; + } + return null; + } + + private String getDCCMotif(String[] inputValues) { + String mutationType = inputValues[MUTATION_TYPE_INDEX]; + String refAllele = inputValues[REFERENCE_ALLELE_INDEX]; + String tumourAllele = inputValues[TUMOUR_ALLELE_INDEX]; + + if (mutationType.equals("2")) { + return tumourAllele; + } else if (mutationType.equals("3")) { + return refAllele; + } + return null; + } + + public void parseDCCHeader(List headers, String inputFileType) throws QMuleException { + + for (String header: headers) { + String[] values = header.split("\t"); + if (values.length == 28 && inputFileType.equals(DCC1) + || values.length == 39 && inputFileType.equals(DCCQ)) { + //check dcc header + for (int i=0; i compareEntry) { + if (compareEntry != null) { + ChrPosition compareChrPos = compareEntry.getKey(); + if ((inputChrPos.getStartPosition() == compareChrPos.getStartPosition() + && inputChrPos.getEndPosition() == compareChrPos.getEndPosition())) { + //check strand if this option is provided + if (stranded) { + String inputStrand = inputRecord.getDataArray()[DCC_STRAND_INDEX]; + String compareStrand = compareEntry.getValue().getDataArray()[GFF_STRAND_INDEX]; + if (inputStrand.equals(compareStrand)) { + return true; + } + } else { + return true; + } + } + } + return false; + } + + private boolean tabbedRecordFallsInCompareRecord(ChrPosition inputChrPos, TabbedRecord inputRecord, Entry entry) { + if (entry != null) { + ChrPosition compareChrPos = entry.getKey(); + if ((inputChrPos.getStartPosition() >= compareChrPos.getStartPosition() && inputChrPos.getStartPosition() <= compareChrPos.getEndPosition()) || + (inputChrPos.getEndPosition() >= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() <= compareChrPos.getEndPosition()) + || (inputChrPos.getStartPosition() <= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() >= compareChrPos.getEndPosition())) { + //check strand if this option is provided + if (stranded) { + String inputStrand = inputRecord.getDataArray()[DCC_STRAND_INDEX]; + String compareStrand = entry.getValue().getDataArray()[GFF_STRAND_INDEX]; + if (inputStrand.equals(compareStrand)) { + return true; + } + } else { + return true; + } + } + } + return false; + } + + public String[] getCmdLineInputFiles() { + return cmdLineInputFiles; + } + + public void setCmdLineInputFiles(String[] cmdLineInputFiles) { + this.cmdLineInputFiles = cmdLineInputFiles; + } + + + private void writeHeader(String file, Vector header) throws IOException { + + for (String h: header) { + outputFileWriter.write(h + "\n"); + } + } + + public List getChromosomes() { + return chromosomes; + } + + public void setChromosomes(List chromosomes) { + this.chromosomes = chromosomes; + } + + + public int getOverlapCount() { + return overlapCount; + } + + public void setOverlapCount(int overlapCount) { + this.overlapCount = overlapCount; + } + + public int getNotOverlappingCount() { + return notOverlappingCount; + } + + public void setNotOverlappingCount(int notOverlappingCount) { + this.notOverlappingCount = notOverlappingCount; + } + + public int getMafCount() { + return recordCount; + } + + public void setMafCount(int mafCount) { + this.recordCount = mafCount; + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(AnnotateDCCWithGFFRegions.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("AnnotateDCCWithGFFRegions", AnnotateDCCWithGFFRegions.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 2) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + cmdLineOutputFiles = options.getOutputFileNames(); + if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); + } + + for (String file : cmdLineOutputFiles) { + if (new File(file).exists() && !new File(file).isDirectory()) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); + } + } + features = options.getFeature(); + annotation = options.getAnnotation(); + if (features == null && annotation == null) { + logger.info("Features to annotate: " + "feature"); + } else if (features != null){ + String featureString = new String(); + for (String f : features) { + featureString += f; + } + logger.info("Features to annotate: " + featureString); + } + logger.info("Annotation is : " + annotation); + stranded = options.hasStrandedOption(); + if (options.getColumn() != null) { + this.QCMGFLAG_COLUMN_INDEX = new Integer(options.getColumn()) - 1; + } + + + + logger.info("Require matching strand: " + stranded); + logger.info("DCC file: " + cmdLineInputFiles[0]); + logger.info("GFF file: " + cmdLineInputFiles[1]); + + } + + return returnStatus; + } + + public static void main(String[] args) throws Exception { + AnnotateDCCWithGFFRegions sp = new AnnotateDCCWithGFFRegions(); + LoadReferencedClasses.loadClasses(AnnotateDCCWithGFFRegions.class); + sp.setup(args); + int exitStatus = sp.engage(); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + public String[] getCmdLineOutputFiles() { + return cmdLineOutputFiles; + } + + public void setCmdLineOutputFiles(String[] cmdLineOutputFiles) { + this.cmdLineOutputFiles = cmdLineOutputFiles; + } + + public Map> getInputRecords() { + return inputRecords; + } + + public void setInputRecords( + Map> inputRecords) { + this.inputRecords = inputRecords; + } + + public Vector getInputFileHeader() { + return inputFileHeader; + } + + public void setInputFileHeader(Vector inputFileHeader) { + this.inputFileHeader = inputFileHeader; + } + + public File getOutputFile() { + return outputFile; + } + + public int getREFERENCE_ALLELE_INDEX() { + return REFERENCE_ALLELE_INDEX; + } + + public void setREFERENCE_ALLELE_INDEX(int rEFERENCE_ALLELE_INDEX) { + REFERENCE_ALLELE_INDEX = rEFERENCE_ALLELE_INDEX; + } + + public int getTUMOUR_ALLELE_INDEX() { + return TUMOUR_ALLELE_INDEX; + } + + public void setTUMOUR_ALLELE_INDEX(int tUMOUR_ALLELE_INDEX) { + TUMOUR_ALLELE_INDEX = tUMOUR_ALLELE_INDEX; + } + + public int getMUTATION_TYPE_INDEX() { + return MUTATION_TYPE_INDEX; + } + + public void setMUTATION_TYPE_INDEX(int mUTATION_TYPE_INDEX) { + MUTATION_TYPE_INDEX = mUTATION_TYPE_INDEX; + } + + public void setOutputFile(File outputFile) { + this.outputFile = outputFile; + } + + public String getAnnotation() { + return this.annotation; + } + +} diff --git a/qmule/src/org/qcmg/qmule/BAM2CS.java b/qmule/src/org/qcmg/qmule/BAM2CS.java new file mode 100644 index 000000000..13d4d21f5 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BAM2CS.java @@ -0,0 +1,183 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; + +import java.io.*; +import java.net.InetAddress; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.HashMap; +import java.util.Iterator; + +import org.qcmg.common.string.StringUtils; + + +public class BAM2CS { + File inBAM; + File outDir; + HashMap outFast = new HashMap(); + HashMap outQual = new HashMap(); + + + BAM2CS(final String[] args) throws Exception{ + inBAM = new File(args[0]); + outDir = new File(args[1]); + printHeader(null); + } + + /** + * retrive the CS and CQ value from BAM record to output csfasta or qual file + * @throws Exception + */ + void CreateCSfile() throws Exception{ + + SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault(); + SamReader reader = samReaderFactory.open(inBAM); + int num = 0; + for (SAMRecord record : reader) { + String id = ">" + record.getReadName(); + Add2Fasta(id, record.getAttribute("CS").toString()); + add2Qual(id, record.getAttribute("CQ").toString()); + num ++; + } + + reader.close(); + closeWriters(); + + System.out.println(getTime() + " total output records " + num); + System.exit(0); + } + + /** + * Add header information to Writer. If Writer is null, print to STD + * @param Writer + * @throws Exception + */ + private void printHeader(PrintWriter Writer) throws Exception{ + if(Writer == null){ + System.out.println(getTime() + " tool name: qmule org.qcmg.qmule.BAM2CS"); + System.out.println(getTime() + " host: " + InetAddress.getLocalHost().getHostName()); + System.out.println(getTime() + " input: " + inBAM.getAbsolutePath()); + System.out.println(getTime() + " output directory: " + outDir.getAbsolutePath()); + }else{ + Writer.println("#" + getTime() + " tool name: qmule org.qcmg.qmule.BAM2CS"); + Writer.println("#" + getTime() + " host: " + InetAddress.getLocalHost().getHostName()); + Writer.println("#" + getTime() + " input: " + inBAM.getAbsolutePath()); + } + } + + private void closeWriters(){ + //close all csfasta files + Iterator itr = outFast.values().iterator(); + while(itr.hasNext()){ + PrintWriter Writer = itr.next(); + Writer.close(); + } + + //close all qual files + itr = outQual.values().iterator(); + while(itr.hasNext()){ + PrintWriter Writer = itr.next(); + Writer.close(); + } + } + + /** + * Add raw color sequence into output csfasta; If the output file isn't exist, create a new one with header lines + * @param id + * @param seq + * @throws Exception + */ + private void Add2Fasta(String id, String seq) throws Exception{ + //sequence length should -1 since it start with 'T' or 'G' + int len = seq.length() - 1; + PrintWriter Writer; + + //get writer or create an new one + if(outFast.containsKey(len)){ + Writer = outFast.get(len); + }else{ + String fname = inBAM.getName(); + int index = fname.lastIndexOf('.'); + fname = fname.substring(0,index) + "." + len + ".csfasta"; + File csFile = new File(outDir, fname); + Writer = new PrintWriter(new FileWriter(csFile)); + outFast.put(len, Writer); + printHeader(Writer); + System.out.println(getTime() + " creating output: " + csFile.getAbsolutePath() ); + } + + Writer.println(id); + Writer.println(seq); + } + /** + * cover CQ value into raw qual sequence and addto output qual; + * If the output file isn't exist, create a new one with header lines. + * @param id + * @param seq + * @throws Exception + */ + void add2Qual(String id, String seq) throws Exception{ + int len = seq.length(); + PrintWriter writer; + + //get writer or create an new one + if(outQual.containsKey(len)){ + writer = outQual.get(len); + }else{ + String fname = inBAM.getName(); + int index = fname.lastIndexOf('.'); + fname = fname.substring(0,index) + "." + len + ".qual"; + File csFile = new File(outDir, fname); + writer = new PrintWriter(new FileWriter(csFile)); + outQual.put(len, writer); + printHeader(writer); + System.out.println(getTime() + " creating output: " + csFile.getAbsolutePath() ); + } + + //convert ascii to int + String qual = ""; + for(int i = 0; i < len; i ++){ + char c = seq.charAt(i); + int j = c; + + if(StringUtils.isNullOrEmpty(qual)){ + qual += j; + } else { + qual += " " + j; + } + } + + writer.println(id); + writer.println(qual); + + } + + private String getTime(){ + Calendar currentDate = Calendar.getInstance(); + SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); + return "[" + formatter.format(currentDate.getTime()) + "]"; + } + public static void main(final String[] args) throws IOException, InterruptedException { + + try{ + BAM2CS myCS = new BAM2CS(args); + myCS.CreateCSfile(); + System.exit(0); + }catch(Exception e){ + System.err.println(e.toString()); + Thread.sleep(1); + System.out.println("usage: qmule org.qcmg.qmule.BAM2CS "); + System.exit(1); + } + + } +} diff --git a/qmule/src/org/qcmg/qmule/BAM2CS.java-- b/qmule/src/org/qcmg/qmule/BAM2CS.java-- new file mode 100644 index 000000000..13d4d21f5 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BAM2CS.java-- @@ -0,0 +1,183 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; + +import java.io.*; +import java.net.InetAddress; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.HashMap; +import java.util.Iterator; + +import org.qcmg.common.string.StringUtils; + + +public class BAM2CS { + File inBAM; + File outDir; + HashMap outFast = new HashMap(); + HashMap outQual = new HashMap(); + + + BAM2CS(final String[] args) throws Exception{ + inBAM = new File(args[0]); + outDir = new File(args[1]); + printHeader(null); + } + + /** + * retrive the CS and CQ value from BAM record to output csfasta or qual file + * @throws Exception + */ + void CreateCSfile() throws Exception{ + + SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault(); + SamReader reader = samReaderFactory.open(inBAM); + int num = 0; + for (SAMRecord record : reader) { + String id = ">" + record.getReadName(); + Add2Fasta(id, record.getAttribute("CS").toString()); + add2Qual(id, record.getAttribute("CQ").toString()); + num ++; + } + + reader.close(); + closeWriters(); + + System.out.println(getTime() + " total output records " + num); + System.exit(0); + } + + /** + * Add header information to Writer. If Writer is null, print to STD + * @param Writer + * @throws Exception + */ + private void printHeader(PrintWriter Writer) throws Exception{ + if(Writer == null){ + System.out.println(getTime() + " tool name: qmule org.qcmg.qmule.BAM2CS"); + System.out.println(getTime() + " host: " + InetAddress.getLocalHost().getHostName()); + System.out.println(getTime() + " input: " + inBAM.getAbsolutePath()); + System.out.println(getTime() + " output directory: " + outDir.getAbsolutePath()); + }else{ + Writer.println("#" + getTime() + " tool name: qmule org.qcmg.qmule.BAM2CS"); + Writer.println("#" + getTime() + " host: " + InetAddress.getLocalHost().getHostName()); + Writer.println("#" + getTime() + " input: " + inBAM.getAbsolutePath()); + } + } + + private void closeWriters(){ + //close all csfasta files + Iterator itr = outFast.values().iterator(); + while(itr.hasNext()){ + PrintWriter Writer = itr.next(); + Writer.close(); + } + + //close all qual files + itr = outQual.values().iterator(); + while(itr.hasNext()){ + PrintWriter Writer = itr.next(); + Writer.close(); + } + } + + /** + * Add raw color sequence into output csfasta; If the output file isn't exist, create a new one with header lines + * @param id + * @param seq + * @throws Exception + */ + private void Add2Fasta(String id, String seq) throws Exception{ + //sequence length should -1 since it start with 'T' or 'G' + int len = seq.length() - 1; + PrintWriter Writer; + + //get writer or create an new one + if(outFast.containsKey(len)){ + Writer = outFast.get(len); + }else{ + String fname = inBAM.getName(); + int index = fname.lastIndexOf('.'); + fname = fname.substring(0,index) + "." + len + ".csfasta"; + File csFile = new File(outDir, fname); + Writer = new PrintWriter(new FileWriter(csFile)); + outFast.put(len, Writer); + printHeader(Writer); + System.out.println(getTime() + " creating output: " + csFile.getAbsolutePath() ); + } + + Writer.println(id); + Writer.println(seq); + } + /** + * cover CQ value into raw qual sequence and addto output qual; + * If the output file isn't exist, create a new one with header lines. + * @param id + * @param seq + * @throws Exception + */ + void add2Qual(String id, String seq) throws Exception{ + int len = seq.length(); + PrintWriter writer; + + //get writer or create an new one + if(outQual.containsKey(len)){ + writer = outQual.get(len); + }else{ + String fname = inBAM.getName(); + int index = fname.lastIndexOf('.'); + fname = fname.substring(0,index) + "." + len + ".qual"; + File csFile = new File(outDir, fname); + writer = new PrintWriter(new FileWriter(csFile)); + outQual.put(len, writer); + printHeader(writer); + System.out.println(getTime() + " creating output: " + csFile.getAbsolutePath() ); + } + + //convert ascii to int + String qual = ""; + for(int i = 0; i < len; i ++){ + char c = seq.charAt(i); + int j = c; + + if(StringUtils.isNullOrEmpty(qual)){ + qual += j; + } else { + qual += " " + j; + } + } + + writer.println(id); + writer.println(qual); + + } + + private String getTime(){ + Calendar currentDate = Calendar.getInstance(); + SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); + return "[" + formatter.format(currentDate.getTime()) + "]"; + } + public static void main(final String[] args) throws IOException, InterruptedException { + + try{ + BAM2CS myCS = new BAM2CS(args); + myCS.CreateCSfile(); + System.exit(0); + }catch(Exception e){ + System.err.println(e.toString()); + Thread.sleep(1); + System.out.println("usage: qmule org.qcmg.qmule.BAM2CS "); + System.exit(1); + } + + } +} diff --git a/qmule/src/org/qcmg/qmule/BAMCompress.java b/qmule/src/org/qcmg/qmule/BAMCompress.java new file mode 100644 index 000000000..7ae4254a3 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BAMCompress.java @@ -0,0 +1,156 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.List; + +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMFileWriterFactory; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.ValidationStringency; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.SAMOrBAMWriterFactory; + +public class BAMCompress { + static QLogger logger = QLoggerFactory.getLogger(BAMCompress.class); + private static File input; + private static File output; + private static int level; + + BAMCompress(File input, File output, int level) throws Exception{ + this.input = input; + this.output = output; + this.level = level; + + logger.info("input file: " + input.getAbsolutePath()); + logger.info("output file name: " + output.getAbsolutePath()); + logger.info("compress level for output BAM: " + level); + } + + public void replaceSeq() throws Exception{ + + SamReader reader = SAMFileReaderFactory.createSAMFileReader( input, ValidationStringency.SILENT); + SAMFileWriter writer = new SAMFileWriterFactory() .makeBAMWriter(reader.getFileHeader(), false, output, level); + + for( SAMRecord record : reader){ + //only replace fully mapped reads, that is no clipping, indels and pading + if( seekFullMppaed(record) && seekMismatch(record) ){ + byte[] base = record.getReadBases(); + for(int i = 0; i < base.length; i++) + base[i] = 'N'; + record.setReadBases(base); + } + + if(record.isValid() == null) // if valid + writer.addAlignment( record ); + } + + reader.close(); + writer.close(); + + logger.info( "input " + reportFileSize(input) ); + logger.info( "output " + reportFileSize(output) ); + + } + + public String reportFileSize(File f){ + + double bytes_in = f.length(); + double kilobytes = (bytes_in / 1024); + double megabytes = (kilobytes / 1024); + double gigabytes = (megabytes / 1024); + + return String.format("file size is %.2fG or %.2fK", gigabytes, kilobytes); + } + + + private boolean seekMismatch(SAMRecord r) { + String attribute = (String)r.getAttribute("MD"); + if (null != attribute) { + for (int i = 0, size = attribute.length() ; i < size ; ) { + char c = attribute.charAt(i); + if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { + return false; + } else if ( c == '^') { + //skip the insertion base + while (++i < size && Character.isLetter(attribute.charAt(i))) {} + } else i++; // need to increment this or could end up with infinite loop... + } + return true; + } + return false; + } + + private boolean seekFullMppaed(SAMRecord r){ + + if(r.getReadUnmappedFlag()) + return false; + + //reads with clips or indel, skips, pads + List ele = r.getCigar().getCigarElements(); + for (CigarElement element : r.getCigar().getCigarElements()){ + if( element.getLength() > 0){ + if(element.getOperator() == CigarOperator.H ||element.getOperator() == CigarOperator.S) { + return false; + }else if (element.getOperator() == CigarOperator.I ||element.getOperator() == CigarOperator.D){ + return false; + }else if (element.getOperator() == CigarOperator.P ||element.getOperator() == CigarOperator.N){ + return false; + } + } + } + + return true; + } + + + public static void main(String[] args) throws Exception{ + Options op = new Options(BAMCompress.class, args); + if(op.hasHelpOption()){ + System.out.println(Messages.getMessage("USAGE_BAMCompress")); + op.displayHelp(); + System.exit(0); + } + + String output = op.getOutputFileNames()[0]; + String input = op.getInputFileNames()[0]; + if(! new File(input).exists() ) + throw new Exception("input file not exists: " + args[0]); + + if(op.hasLogOption()) + logger = QLoggerFactory.getLogger(BAMCompress.class, op.getLogFile(), op.getLogLevel()); + else + logger = QLoggerFactory.getLogger(BAMCompress.class, output + ".log", op.getLogLevel()); + + String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + logger.logInitialExecutionStats( "qmule " + BAMCompress.class.getName(), version,args); + + int level = op.getcompressLevel(); //default compress level + + logger.logInitialExecutionStats( "qmule " + BAMCompress.class.getName(), null,args); + + long startTime = System.currentTimeMillis(); + BAMCompress compress = new BAMCompress(new File(input), new File(output) , level ); + compress.replaceSeq(); + + logger.info( String.format("It took %d hours, %d seconds to perform the compression", + (int) (System.currentTimeMillis() - startTime) / (1000*60*60), + (int) ( (System.currentTimeMillis() - startTime) / (1000*60) ) % 60) ); + logger.logFinalExecutionStats(0); + + } + + +} diff --git a/qmule/src/org/qcmg/qmule/BAMCompress.java-- b/qmule/src/org/qcmg/qmule/BAMCompress.java-- new file mode 100644 index 000000000..7ae4254a3 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BAMCompress.java-- @@ -0,0 +1,156 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.List; + +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMFileWriterFactory; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.ValidationStringency; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.SAMOrBAMWriterFactory; + +public class BAMCompress { + static QLogger logger = QLoggerFactory.getLogger(BAMCompress.class); + private static File input; + private static File output; + private static int level; + + BAMCompress(File input, File output, int level) throws Exception{ + this.input = input; + this.output = output; + this.level = level; + + logger.info("input file: " + input.getAbsolutePath()); + logger.info("output file name: " + output.getAbsolutePath()); + logger.info("compress level for output BAM: " + level); + } + + public void replaceSeq() throws Exception{ + + SamReader reader = SAMFileReaderFactory.createSAMFileReader( input, ValidationStringency.SILENT); + SAMFileWriter writer = new SAMFileWriterFactory() .makeBAMWriter(reader.getFileHeader(), false, output, level); + + for( SAMRecord record : reader){ + //only replace fully mapped reads, that is no clipping, indels and pading + if( seekFullMppaed(record) && seekMismatch(record) ){ + byte[] base = record.getReadBases(); + for(int i = 0; i < base.length; i++) + base[i] = 'N'; + record.setReadBases(base); + } + + if(record.isValid() == null) // if valid + writer.addAlignment( record ); + } + + reader.close(); + writer.close(); + + logger.info( "input " + reportFileSize(input) ); + logger.info( "output " + reportFileSize(output) ); + + } + + public String reportFileSize(File f){ + + double bytes_in = f.length(); + double kilobytes = (bytes_in / 1024); + double megabytes = (kilobytes / 1024); + double gigabytes = (megabytes / 1024); + + return String.format("file size is %.2fG or %.2fK", gigabytes, kilobytes); + } + + + private boolean seekMismatch(SAMRecord r) { + String attribute = (String)r.getAttribute("MD"); + if (null != attribute) { + for (int i = 0, size = attribute.length() ; i < size ; ) { + char c = attribute.charAt(i); + if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { + return false; + } else if ( c == '^') { + //skip the insertion base + while (++i < size && Character.isLetter(attribute.charAt(i))) {} + } else i++; // need to increment this or could end up with infinite loop... + } + return true; + } + return false; + } + + private boolean seekFullMppaed(SAMRecord r){ + + if(r.getReadUnmappedFlag()) + return false; + + //reads with clips or indel, skips, pads + List ele = r.getCigar().getCigarElements(); + for (CigarElement element : r.getCigar().getCigarElements()){ + if( element.getLength() > 0){ + if(element.getOperator() == CigarOperator.H ||element.getOperator() == CigarOperator.S) { + return false; + }else if (element.getOperator() == CigarOperator.I ||element.getOperator() == CigarOperator.D){ + return false; + }else if (element.getOperator() == CigarOperator.P ||element.getOperator() == CigarOperator.N){ + return false; + } + } + } + + return true; + } + + + public static void main(String[] args) throws Exception{ + Options op = new Options(BAMCompress.class, args); + if(op.hasHelpOption()){ + System.out.println(Messages.getMessage("USAGE_BAMCompress")); + op.displayHelp(); + System.exit(0); + } + + String output = op.getOutputFileNames()[0]; + String input = op.getInputFileNames()[0]; + if(! new File(input).exists() ) + throw new Exception("input file not exists: " + args[0]); + + if(op.hasLogOption()) + logger = QLoggerFactory.getLogger(BAMCompress.class, op.getLogFile(), op.getLogLevel()); + else + logger = QLoggerFactory.getLogger(BAMCompress.class, output + ".log", op.getLogLevel()); + + String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + logger.logInitialExecutionStats( "qmule " + BAMCompress.class.getName(), version,args); + + int level = op.getcompressLevel(); //default compress level + + logger.logInitialExecutionStats( "qmule " + BAMCompress.class.getName(), null,args); + + long startTime = System.currentTimeMillis(); + BAMCompress compress = new BAMCompress(new File(input), new File(output) , level ); + compress.replaceSeq(); + + logger.info( String.format("It took %d hours, %d seconds to perform the compression", + (int) (System.currentTimeMillis() - startTime) / (1000*60*60), + (int) ( (System.currentTimeMillis() - startTime) / (1000*60) ) % 60) ); + logger.logFinalExecutionStats(0); + + } + + +} diff --git a/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java b/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java new file mode 100644 index 000000000..363f5ccbc --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java @@ -0,0 +1,250 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.sql.ResultSet; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMReadGroupRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +//import org.qcmg.db.ConnectionType; +//import org.qcmg.db.GeneusDBConnection; +import org.qcmg.picard.SAMFileReaderFactory; + +public class BAMHeaderChecker { + /* + private static final String SEPERATOR = "&"; + + private static QLogger logger; + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + + private final List bamFiles = new ArrayList(); + private List bamDirectories = new ArrayList(); + + private final Map results = new HashMap(); + + private int exitStatus; + + private int engage() throws Exception { + + bamDirectories = Arrays.asList(FileUtils.findDirectories(cmdLineInputFiles[0], "seq_final", true)); + + logger.info("Will check the following directories for bam files:"); + for (File f : bamDirectories) { + logger.info(f.getAbsolutePath()); + bamFiles.addAll(Arrays.asList(FileUtils.findFilesEndingWithFilter(f.getAbsolutePath(), ".bam"))); + } + + // only operates on seq_final bams +// bamFiles = Arrays.asList(FileUtils.findFiles(cmdLineInputFiles[0], ".bam")); + + // loop through each file and get patient, experiment and input_type + String patient = null; + String experiment = null; + String input = null; + + GeneusDBConnection conn = new GeneusDBConnection(ConnectionType.QCMG_MAPSET); + + try { + for (File bamFile : bamFiles) { + String bamFileName = bamFile.getAbsolutePath(); + logger.info("examining bam file: " + bamFileName); + String bamFileSmallName = bamFileName.substring(bamFileName.lastIndexOf(System.getProperty("file.separator")) + 1 , bamFileName.indexOf(".bam")); + + patient = bamFileSmallName.substring(0, 9); //APGI_1234 + experiment = bamFileSmallName.substring(10, bamFileSmallName.lastIndexOf(".")); //APGI_1234 + input = bamFileSmallName.substring(bamFileSmallName.lastIndexOf(".") + 1); //APGI_1234 + logger.info("patient: " + patient + ", experiment: " + experiment + ", input: " + input); + + // get details from bam header + List constituentFiles = getConstituentBamFiles(bamFile); + List trackliteConstituentFiles = getTrackliteBamFiles(patient, experiment, input, conn); + + //loop through tracklite constituentFiles and check that they all have an entry in bam header ConstituentFiles + for (String trackliteBam : trackliteConstituentFiles) { + String [] params = trackliteBam.split(SEPERATOR); + + String result = "OK"; + boolean trackliteMatch = false; + + for (String headerFileBam : constituentFiles) { + if (headerFileBam.contains(params[0]) && headerFileBam.contains(params[1])) { + trackliteMatch = true; + break; + } + } + + if ( ! trackliteMatch) { + result = "no corresponding entry in bam file header for tracklite details: " + params[0] + ":" + params[1]; + logger.warn(result); + } + results.put(bamFileSmallName, result); + } + } + } finally { + conn.closeConnection(); + } + + logger.info(""); + logger.info(""); + logger.info("SUMMARY:"); + for (Entry resultsEntry : results.entrySet()) { + logger.info(resultsEntry.getKey() + " : " + resultsEntry.getValue()); + } + logger.info("DONE"); + + return exitStatus; + } + + private List getTrackliteBamFiles(String patient, String experiment, String input, GeneusDBConnection conn) throws Exception { + List trackliteResults = new ArrayList (); + + String sql = "SELECT patient_id, run_name, barcode FROM tracklite_run tr, tracklite_sample ts" + + " WHERE tr.sample_id = ts.processing_id" + + " AND ts.patient_id = '" + patient.replace('_', '-') + "'" + + " AND tr.experiment_type = '" + experiment + "'" + + " AND tr.input_type = '" + input + "'" + + "AND tr.run_status = 'complete'"; + + ResultSet rs = null; + try { + rs = conn.executeSelectQuery(sql); + + while (rs.next()) { + String runName = rs.getString(2); + String barCode = rs.getString(3); + logger.debug("runName: " + runName + ", barCode: " + barCode); + trackliteResults.add(runName + SEPERATOR + barCode); + } + + } finally { + try { + if (null != rs && null != rs.getStatement() ) { + rs.getStatement().close(); + } + } finally { + if (null != rs) rs.close(); + } + } + + return trackliteResults; + } + + private List getConstituentBamFiles(File bamFile) { + List results = new ArrayList(); + SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFile); + try { + + SAMFileHeader header = reader.getFileHeader(); + // get the read groups + for (SAMReadGroupRecord readGroup : header.getReadGroups()) { + String constituentBamFile = readGroup.getAttribute("zc"); + if (null == constituentBamFile) + constituentBamFile = readGroup.getAttribute("ZC"); + + if (null != constituentBamFile) { + constituentBamFile = constituentBamFile.substring(2); + logger.debug("read group ZC attribute: " + constituentBamFile); + results.add(constituentBamFile); + } else { + logger.debug("null ZC attribute in file: " + bamFile.getAbsolutePath()); + } + } + + } finally { + reader.close(); + } + return results; + } + + + + + public static void main(String[] args) throws Exception { + BAMHeaderChecker sp = new BAMHeaderChecker(); + int exitStatus = 0; + try { + exitStatus = sp.setup(args); + } catch (Exception e) { + exitStatus = 1; + if (null != logger) + logger.error("Exception caught whilst running BAMHeaderChecker:", e); + else System.err.println("Exception caught whilst running BAMHeaderChecker"); + } + + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(BAMHeaderChecker.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("BAMHeaderChecker", BAMHeaderChecker.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + */ +} diff --git a/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- b/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- new file mode 100644 index 000000000..363f5ccbc --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- @@ -0,0 +1,250 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.sql.ResultSet; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMReadGroupRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +//import org.qcmg.db.ConnectionType; +//import org.qcmg.db.GeneusDBConnection; +import org.qcmg.picard.SAMFileReaderFactory; + +public class BAMHeaderChecker { + /* + private static final String SEPERATOR = "&"; + + private static QLogger logger; + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + + private final List bamFiles = new ArrayList(); + private List bamDirectories = new ArrayList(); + + private final Map results = new HashMap(); + + private int exitStatus; + + private int engage() throws Exception { + + bamDirectories = Arrays.asList(FileUtils.findDirectories(cmdLineInputFiles[0], "seq_final", true)); + + logger.info("Will check the following directories for bam files:"); + for (File f : bamDirectories) { + logger.info(f.getAbsolutePath()); + bamFiles.addAll(Arrays.asList(FileUtils.findFilesEndingWithFilter(f.getAbsolutePath(), ".bam"))); + } + + // only operates on seq_final bams +// bamFiles = Arrays.asList(FileUtils.findFiles(cmdLineInputFiles[0], ".bam")); + + // loop through each file and get patient, experiment and input_type + String patient = null; + String experiment = null; + String input = null; + + GeneusDBConnection conn = new GeneusDBConnection(ConnectionType.QCMG_MAPSET); + + try { + for (File bamFile : bamFiles) { + String bamFileName = bamFile.getAbsolutePath(); + logger.info("examining bam file: " + bamFileName); + String bamFileSmallName = bamFileName.substring(bamFileName.lastIndexOf(System.getProperty("file.separator")) + 1 , bamFileName.indexOf(".bam")); + + patient = bamFileSmallName.substring(0, 9); //APGI_1234 + experiment = bamFileSmallName.substring(10, bamFileSmallName.lastIndexOf(".")); //APGI_1234 + input = bamFileSmallName.substring(bamFileSmallName.lastIndexOf(".") + 1); //APGI_1234 + logger.info("patient: " + patient + ", experiment: " + experiment + ", input: " + input); + + // get details from bam header + List constituentFiles = getConstituentBamFiles(bamFile); + List trackliteConstituentFiles = getTrackliteBamFiles(patient, experiment, input, conn); + + //loop through tracklite constituentFiles and check that they all have an entry in bam header ConstituentFiles + for (String trackliteBam : trackliteConstituentFiles) { + String [] params = trackliteBam.split(SEPERATOR); + + String result = "OK"; + boolean trackliteMatch = false; + + for (String headerFileBam : constituentFiles) { + if (headerFileBam.contains(params[0]) && headerFileBam.contains(params[1])) { + trackliteMatch = true; + break; + } + } + + if ( ! trackliteMatch) { + result = "no corresponding entry in bam file header for tracklite details: " + params[0] + ":" + params[1]; + logger.warn(result); + } + results.put(bamFileSmallName, result); + } + } + } finally { + conn.closeConnection(); + } + + logger.info(""); + logger.info(""); + logger.info("SUMMARY:"); + for (Entry resultsEntry : results.entrySet()) { + logger.info(resultsEntry.getKey() + " : " + resultsEntry.getValue()); + } + logger.info("DONE"); + + return exitStatus; + } + + private List getTrackliteBamFiles(String patient, String experiment, String input, GeneusDBConnection conn) throws Exception { + List trackliteResults = new ArrayList (); + + String sql = "SELECT patient_id, run_name, barcode FROM tracklite_run tr, tracklite_sample ts" + + " WHERE tr.sample_id = ts.processing_id" + + " AND ts.patient_id = '" + patient.replace('_', '-') + "'" + + " AND tr.experiment_type = '" + experiment + "'" + + " AND tr.input_type = '" + input + "'" + + "AND tr.run_status = 'complete'"; + + ResultSet rs = null; + try { + rs = conn.executeSelectQuery(sql); + + while (rs.next()) { + String runName = rs.getString(2); + String barCode = rs.getString(3); + logger.debug("runName: " + runName + ", barCode: " + barCode); + trackliteResults.add(runName + SEPERATOR + barCode); + } + + } finally { + try { + if (null != rs && null != rs.getStatement() ) { + rs.getStatement().close(); + } + } finally { + if (null != rs) rs.close(); + } + } + + return trackliteResults; + } + + private List getConstituentBamFiles(File bamFile) { + List results = new ArrayList(); + SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFile); + try { + + SAMFileHeader header = reader.getFileHeader(); + // get the read groups + for (SAMReadGroupRecord readGroup : header.getReadGroups()) { + String constituentBamFile = readGroup.getAttribute("zc"); + if (null == constituentBamFile) + constituentBamFile = readGroup.getAttribute("ZC"); + + if (null != constituentBamFile) { + constituentBamFile = constituentBamFile.substring(2); + logger.debug("read group ZC attribute: " + constituentBamFile); + results.add(constituentBamFile); + } else { + logger.debug("null ZC attribute in file: " + bamFile.getAbsolutePath()); + } + } + + } finally { + reader.close(); + } + return results; + } + + + + + public static void main(String[] args) throws Exception { + BAMHeaderChecker sp = new BAMHeaderChecker(); + int exitStatus = 0; + try { + exitStatus = sp.setup(args); + } catch (Exception e) { + exitStatus = 1; + if (null != logger) + logger.error("Exception caught whilst running BAMHeaderChecker:", e); + else System.err.println("Exception caught whilst running BAMHeaderChecker"); + } + + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(BAMHeaderChecker.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("BAMHeaderChecker", BAMHeaderChecker.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + */ +} diff --git a/qmule/src/org/qcmg/qmule/BAMPileupUtil.java b/qmule/src/org/qcmg/qmule/BAMPileupUtil.java new file mode 100644 index 000000000..b8646c1ee --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BAMPileupUtil.java @@ -0,0 +1,124 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import htsjdk.samtools.Cigar; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; + +public class BAMPileupUtil { + + public static int SM_CUTOFF = 14; + public static int MD_CUTOFF = 3; + public static int CIGAR_CUTOFF = 34; + + public static int readLengthMatchCounter = 0; + public static int posiitonInDeletionCounter = 0; + + private static final QLogger logger = QLoggerFactory.getLogger(BAMPileupUtil.class); + + +// public static void examinePileup(List sams, VCFRecord record) { +//// int normalCoverage = 0; +// String pileup = ""; +// String qualities = ""; +// for (SAMRecord sam : sams ) { +// +// if ( eligibleSamRecord(sam)) { +//// ++normalCoverage; +// +// int offset = getReadPosition(sam, record.getPosition()); +// +// if (offset < 0) { +// logger.info("invalid offset position - position falls within deletion?? position: "+ record.getPosition() + ", alignment start: " + sam.getAlignmentStart() + ", alignment end: " + sam.getAlignmentEnd() + ", read length: " + sam.getReadLength() + " cigar: "+ sam.getCigarString()); +// continue; +// } +// +// if (offset >= sam.getReadLength()) { +//// throw new Exception("offset [position: " + record.getPosition() + ", read start pos(unclipped): " + sam.getUnclippedStart() + ", read end pos(unclipped): " + sam.getUnclippedEnd()+ "] is larger than read length!!!: " + sam.format()); +// // set to last entry in sequence +//// logger.info("adjusting offset to read length -1"); +//// String read = sam.getReadString(); +//// int refPosition = sam.getReferencePositionAtReadPosition(offset); +// logger.info("offset: " + offset + ", position: " + record.getPosition() + ", alignment start: " + sam.getAlignmentStart() + ", unclipped alignment start: " + sam.getUnclippedStart() + ", alignment end: " + sam.getAlignmentEnd()); +// logger.info( sam.format()); +//// offset = sam.getReadLength() -1; +//// logger.info("char at adjusted offset: " + read.charAt(offset)); +//// logger.info("md tag: " + sam.getStringAttribute("MD")); +// continue; +// } +// +// char c = sam.getReadString().charAt(offset); +// pileup += sam.getReadNegativeStrandFlag() ? Character.toLowerCase(c) : c; +// qualities += sam.getBaseQualityString().charAt(offset); +// } +// } +// +// +// if (pileup.length() > 0) +// record.setPileup(PileupUtil.getPileupCounts(pileup, qualities)); +// +// } + + /** + * Determines whether a sam record is eligible by applying some filtering criteria. + * Currently filters on the SM tag value, some of the flags, and the Cigar string + * + *

NOTE that we should also be filtering on MD tag, but GATK removes this + * tag when it does its local realignment, so there is no need to include this check for the time being + * + * @param record SAMRecord that is being put through the filter check + * @return boolean indicating if the record has passed the filter + */ + public static boolean eligibleSamRecord(SAMRecord record) { + if (null == record) return false; + Integer sm = record.getIntegerAttribute("SM"); + return ! record.getDuplicateReadFlag() + && (null == sm ? false : sm.intValue() > SM_CUTOFF) +// && tallyMDMismatches(record.getStringAttribute("MD")) < MD_CUTOFF // + && ((record.getReadPairedFlag() && record.getSecondOfPairFlag() && record.getProperPairFlag()) + || tallyCigarMatchMismatches(record.getCigar()) > CIGAR_CUTOFF); + + } + + public static int tallyCigarMatchMismatches(Cigar cigar) { + int tally = 0; + if (null != cigar) { + for (CigarElement element : cigar.getCigarElements()) { + if (CigarOperator.M == element.getOperator()) { + tally += element.getLength(); + } + } + } + return tally; + } + + public static int tallyMDMismatches(String mdData) { + int count = 0; + if (null != mdData) { + for (int i = 0, size = mdData.length() ; i < size ; ) { + + if (isValidMismatch(mdData.charAt(i))) { + count++; + i++; + } else if ('^' == mdData.charAt(i)) { + while (++i < size && Character.isLetter(mdData.charAt(i))) {} + } else i++; // need to increment this or could end up with infinite loop... + } + } + return count; + } + + private static boolean isValidMismatch(char c) { + return c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N'; + } + +} diff --git a/qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- b/qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- new file mode 100644 index 000000000..b8646c1ee --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- @@ -0,0 +1,124 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import htsjdk.samtools.Cigar; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; + +public class BAMPileupUtil { + + public static int SM_CUTOFF = 14; + public static int MD_CUTOFF = 3; + public static int CIGAR_CUTOFF = 34; + + public static int readLengthMatchCounter = 0; + public static int posiitonInDeletionCounter = 0; + + private static final QLogger logger = QLoggerFactory.getLogger(BAMPileupUtil.class); + + +// public static void examinePileup(List sams, VCFRecord record) { +//// int normalCoverage = 0; +// String pileup = ""; +// String qualities = ""; +// for (SAMRecord sam : sams ) { +// +// if ( eligibleSamRecord(sam)) { +//// ++normalCoverage; +// +// int offset = getReadPosition(sam, record.getPosition()); +// +// if (offset < 0) { +// logger.info("invalid offset position - position falls within deletion?? position: "+ record.getPosition() + ", alignment start: " + sam.getAlignmentStart() + ", alignment end: " + sam.getAlignmentEnd() + ", read length: " + sam.getReadLength() + " cigar: "+ sam.getCigarString()); +// continue; +// } +// +// if (offset >= sam.getReadLength()) { +//// throw new Exception("offset [position: " + record.getPosition() + ", read start pos(unclipped): " + sam.getUnclippedStart() + ", read end pos(unclipped): " + sam.getUnclippedEnd()+ "] is larger than read length!!!: " + sam.format()); +// // set to last entry in sequence +//// logger.info("adjusting offset to read length -1"); +//// String read = sam.getReadString(); +//// int refPosition = sam.getReferencePositionAtReadPosition(offset); +// logger.info("offset: " + offset + ", position: " + record.getPosition() + ", alignment start: " + sam.getAlignmentStart() + ", unclipped alignment start: " + sam.getUnclippedStart() + ", alignment end: " + sam.getAlignmentEnd()); +// logger.info( sam.format()); +//// offset = sam.getReadLength() -1; +//// logger.info("char at adjusted offset: " + read.charAt(offset)); +//// logger.info("md tag: " + sam.getStringAttribute("MD")); +// continue; +// } +// +// char c = sam.getReadString().charAt(offset); +// pileup += sam.getReadNegativeStrandFlag() ? Character.toLowerCase(c) : c; +// qualities += sam.getBaseQualityString().charAt(offset); +// } +// } +// +// +// if (pileup.length() > 0) +// record.setPileup(PileupUtil.getPileupCounts(pileup, qualities)); +// +// } + + /** + * Determines whether a sam record is eligible by applying some filtering criteria. + * Currently filters on the SM tag value, some of the flags, and the Cigar string + * + *

NOTE that we should also be filtering on MD tag, but GATK removes this + * tag when it does its local realignment, so there is no need to include this check for the time being + * + * @param record SAMRecord that is being put through the filter check + * @return boolean indicating if the record has passed the filter + */ + public static boolean eligibleSamRecord(SAMRecord record) { + if (null == record) return false; + Integer sm = record.getIntegerAttribute("SM"); + return ! record.getDuplicateReadFlag() + && (null == sm ? false : sm.intValue() > SM_CUTOFF) +// && tallyMDMismatches(record.getStringAttribute("MD")) < MD_CUTOFF // + && ((record.getReadPairedFlag() && record.getSecondOfPairFlag() && record.getProperPairFlag()) + || tallyCigarMatchMismatches(record.getCigar()) > CIGAR_CUTOFF); + + } + + public static int tallyCigarMatchMismatches(Cigar cigar) { + int tally = 0; + if (null != cigar) { + for (CigarElement element : cigar.getCigarElements()) { + if (CigarOperator.M == element.getOperator()) { + tally += element.getLength(); + } + } + } + return tally; + } + + public static int tallyMDMismatches(String mdData) { + int count = 0; + if (null != mdData) { + for (int i = 0, size = mdData.length() ; i < size ; ) { + + if (isValidMismatch(mdData.charAt(i))) { + count++; + i++; + } else if ('^' == mdData.charAt(i)) { + while (++i < size && Character.isLetter(mdData.charAt(i))) {} + } else i++; // need to increment this or could end up with infinite loop... + } + } + return count; + } + + private static boolean isValidMismatch(char c) { + return c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N'; + } + +} diff --git a/qmule/src/org/qcmg/qmule/BamMismatchCounts.java b/qmule/src/org/qcmg/qmule/BamMismatchCounts.java new file mode 100644 index 000000000..4501a5994 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BamMismatchCounts.java @@ -0,0 +1,160 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.ValidationStringency; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.SAMFileReaderFactory; + +public class BamMismatchCounts { + static QLogger logger = QLoggerFactory.getLogger(BamMismatchCounts.class); + static long[] mismatch = new long[100]; + + static HashMap counts = new HashMap(); + static long total = 0; + static long unmapped = 0; + static long clipped = 0; + static long indel = 0; + static long skipPad = 0; + static long fullMapped = 0; + static long noMDreads = 0; + + /** + * count the mismatch base number based on the MD field + * @param r: samrecord + */ + private static void countMismatch(SAMRecord r) { + String attribute = (String)r.getAttribute("MD"); + if (null != attribute) { + int count = 0; + for (int i = 0, size = attribute.length() ; i < size ; ) { + char c = attribute.charAt(i); + if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { + count++; + i++; + } else if ( c == '^') { + //skip the insertion base + while (++i < size && Character.isLetter(attribute.charAt(i))) {} + } else i++; // need to increment this or could end up with infinite loop... + } + mismatch[count] ++; + + }else + noMDreads ++; + + + } + + /** + * + * @param r: sam record + * @return true if this read is full length mapped without any indels, skips and pads + */ + static private Boolean seekFullMapped(SAMRecord r){ + + if(r.getReadUnmappedFlag()){ + unmapped ++; + return false; + } + //reads with clips or indel, skips, pads + else{ + List ele = r.getCigar().getCigarElements(); + for (CigarElement element : r.getCigar().getCigarElements()){ + if( element.getLength() > 0){ + if(element.getOperator() == CigarOperator.H ||element.getOperator() == CigarOperator.S) { + clipped ++; + return false; + }else if (element.getOperator() == CigarOperator.I ||element.getOperator() == CigarOperator.D){ + indel ++; + return false; + }else if (element.getOperator() == CigarOperator.P ||element.getOperator() == CigarOperator.N){ + skipPad ++; + return false; + } + } + } + //count mismatch after the for loop + return true; + } + } + + /** + * survey the mismatch stats on full length mapped reads + * @param args: SAM/BAM file with full path, log file with full path + * @throws Exception + */ + public static void main(final String[] args) throws Exception { + Options op = new Options(BamMismatchCounts.class, args); + if(op.hasHelpOption()){ + System.out.println(Messages.getMessage("USAGE_BamMismatchCounts")); + op.displayHelp(); + System.exit(0); + } + + + if(op.hasLogOption()) + logger = QLoggerFactory.getLogger(BamMismatchCounts.class, op.getLogFile(), op.getLogLevel()); + else + logger = QLoggerFactory.getLogger(BamMismatchCounts.class,op.getOutputFileNames()[0] + ".log", op.getLogLevel()); + + String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + logger.logInitialExecutionStats( "qmule " + BamMismatchCounts.class.getName(), version,args); + + String output = op.getOutputFileNames()[0]; + String input = op.getInputFileNames()[0]; + SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(input), + ValidationStringency.SILENT); + + for(int i = 0; i < 100; i++) mismatch[i] = 0; + for (SAMRecord r : reader){ + total ++; + if(seekFullMapped( r)){ + fullMapped ++; + countMismatch(r); + } + } + reader.close(); + + //report mismatch + String S_mismatch = "mismatch matrix for fully mapped reads is below:\nmismatch\treads_number\tratio_to_(fullmapped,total)\n"; + for(int i = 0; i < 100; i++) + if(mismatch[i] > 0){ + int p1 = Math.round(mismatch[i] * 100 / fullMapped); + int p2 = Math.round(mismatch[i] * 100 / total); + S_mismatch += String.format("%d\t%d\t(%d%%,%d%%)\n", i,mismatch[i],p1, p2); + } + + Files.write(Paths.get(output), S_mismatch.getBytes() ); + + logger.info("total records in file: " + total ); + logger.info("unmapped records: " + unmapped); + logger.info("records with clipping (CIGAR S,H): " + clipped); + logger.info("records with indel (CIGAR I,D) : " + indel); + logger.info("records with skipping or padding (CIGAR N,P) : " + skipPad); + logger.info("records mapped full-length: " + fullMapped); + logger.info("records mapped full-length but missing MD field: " + noMDreads); + logger.info("the mismatch counts matrix is outputed to " + args[1]); + logger.logFinalExecutionStats(0); + + } + +} diff --git a/qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- b/qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- new file mode 100644 index 000000000..4501a5994 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- @@ -0,0 +1,160 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.ValidationStringency; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.SAMFileReaderFactory; + +public class BamMismatchCounts { + static QLogger logger = QLoggerFactory.getLogger(BamMismatchCounts.class); + static long[] mismatch = new long[100]; + + static HashMap counts = new HashMap(); + static long total = 0; + static long unmapped = 0; + static long clipped = 0; + static long indel = 0; + static long skipPad = 0; + static long fullMapped = 0; + static long noMDreads = 0; + + /** + * count the mismatch base number based on the MD field + * @param r: samrecord + */ + private static void countMismatch(SAMRecord r) { + String attribute = (String)r.getAttribute("MD"); + if (null != attribute) { + int count = 0; + for (int i = 0, size = attribute.length() ; i < size ; ) { + char c = attribute.charAt(i); + if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { + count++; + i++; + } else if ( c == '^') { + //skip the insertion base + while (++i < size && Character.isLetter(attribute.charAt(i))) {} + } else i++; // need to increment this or could end up with infinite loop... + } + mismatch[count] ++; + + }else + noMDreads ++; + + + } + + /** + * + * @param r: sam record + * @return true if this read is full length mapped without any indels, skips and pads + */ + static private Boolean seekFullMapped(SAMRecord r){ + + if(r.getReadUnmappedFlag()){ + unmapped ++; + return false; + } + //reads with clips or indel, skips, pads + else{ + List ele = r.getCigar().getCigarElements(); + for (CigarElement element : r.getCigar().getCigarElements()){ + if( element.getLength() > 0){ + if(element.getOperator() == CigarOperator.H ||element.getOperator() == CigarOperator.S) { + clipped ++; + return false; + }else if (element.getOperator() == CigarOperator.I ||element.getOperator() == CigarOperator.D){ + indel ++; + return false; + }else if (element.getOperator() == CigarOperator.P ||element.getOperator() == CigarOperator.N){ + skipPad ++; + return false; + } + } + } + //count mismatch after the for loop + return true; + } + } + + /** + * survey the mismatch stats on full length mapped reads + * @param args: SAM/BAM file with full path, log file with full path + * @throws Exception + */ + public static void main(final String[] args) throws Exception { + Options op = new Options(BamMismatchCounts.class, args); + if(op.hasHelpOption()){ + System.out.println(Messages.getMessage("USAGE_BamMismatchCounts")); + op.displayHelp(); + System.exit(0); + } + + + if(op.hasLogOption()) + logger = QLoggerFactory.getLogger(BamMismatchCounts.class, op.getLogFile(), op.getLogLevel()); + else + logger = QLoggerFactory.getLogger(BamMismatchCounts.class,op.getOutputFileNames()[0] + ".log", op.getLogLevel()); + + String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + logger.logInitialExecutionStats( "qmule " + BamMismatchCounts.class.getName(), version,args); + + String output = op.getOutputFileNames()[0]; + String input = op.getInputFileNames()[0]; + SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(input), + ValidationStringency.SILENT); + + for(int i = 0; i < 100; i++) mismatch[i] = 0; + for (SAMRecord r : reader){ + total ++; + if(seekFullMapped( r)){ + fullMapped ++; + countMismatch(r); + } + } + reader.close(); + + //report mismatch + String S_mismatch = "mismatch matrix for fully mapped reads is below:\nmismatch\treads_number\tratio_to_(fullmapped,total)\n"; + for(int i = 0; i < 100; i++) + if(mismatch[i] > 0){ + int p1 = Math.round(mismatch[i] * 100 / fullMapped); + int p2 = Math.round(mismatch[i] * 100 / total); + S_mismatch += String.format("%d\t%d\t(%d%%,%d%%)\n", i,mismatch[i],p1, p2); + } + + Files.write(Paths.get(output), S_mismatch.getBytes() ); + + logger.info("total records in file: " + total ); + logger.info("unmapped records: " + unmapped); + logger.info("records with clipping (CIGAR S,H): " + clipped); + logger.info("records with indel (CIGAR I,D) : " + indel); + logger.info("records with skipping or padding (CIGAR N,P) : " + skipPad); + logger.info("records mapped full-length: " + fullMapped); + logger.info("records mapped full-length but missing MD field: " + noMDreads); + logger.info("the mismatch counts matrix is outputed to " + args[1]); + logger.logFinalExecutionStats(0); + + } + +} diff --git a/qmule/src/org/qcmg/qmule/BamRecordCounter.java b/qmule/src/org/qcmg/qmule/BamRecordCounter.java new file mode 100644 index 000000000..d81e01a9c --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BamRecordCounter.java @@ -0,0 +1,44 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; + +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.SAMFileReaderFactory; + +public class BamRecordCounter { + + private static final QLogger logger = QLoggerFactory.getLogger(BamRecordCounter.class); + + public static void main(String args[]) { + + if (null != args && args.length > 0) { + for (String filename : args) { + SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(filename)); + long count = 0; + long duplicates = 0; + long startTime = System.currentTimeMillis(); + for (SAMRecord r : reader) { + count++; + if (r.getDuplicateReadFlag()) + duplicates++; + } + logger.info("no of records in file [" + filename + "] is: " + count); + logger.info("no of duplicate records: " + duplicates); + logger.info("It took " + (System.currentTimeMillis() - startTime) + "ms to perform the count."); + } + } else { + logger.info("USAGE: qmule " + BamRecordCounter.class.getName() + " "); + } + } + +} diff --git a/qmule/src/org/qcmg/qmule/BamRecordCounter.java-- b/qmule/src/org/qcmg/qmule/BamRecordCounter.java-- new file mode 100644 index 000000000..d81e01a9c --- /dev/null +++ b/qmule/src/org/qcmg/qmule/BamRecordCounter.java-- @@ -0,0 +1,44 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; + +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.SAMFileReaderFactory; + +public class BamRecordCounter { + + private static final QLogger logger = QLoggerFactory.getLogger(BamRecordCounter.class); + + public static void main(String args[]) { + + if (null != args && args.length > 0) { + for (String filename : args) { + SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(filename)); + long count = 0; + long duplicates = 0; + long startTime = System.currentTimeMillis(); + for (SAMRecord r : reader) { + count++; + if (r.getDuplicateReadFlag()) + duplicates++; + } + logger.info("no of records in file [" + filename + "] is: " + count); + logger.info("no of duplicate records: " + duplicates); + logger.info("It took " + (System.currentTimeMillis() - startTime) + "ms to perform the count."); + } + } else { + logger.info("USAGE: qmule " + BamRecordCounter.class.getName() + " "); + } + } + +} diff --git a/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java b/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java new file mode 100644 index 000000000..3b3fbc798 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java @@ -0,0 +1,676 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.Map.Entry; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrPositionName; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; + + +public class CompareReferenceRegions { + + private static final String MODE_ONEWAY = "oneway"; + private static final String MODE_ANNOTATE = "annotate"; + private static final String MODE_TWOWAY = "twoway"; + private static final String MODE_INTERSECT = "intersect"; + private static final String MODE_UNIQUE = "unique"; + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private List chromosomes = new ArrayList(); + private int overlapCount = 0; + private int notOverlappingCount = 0; + private int recordCount; + private String mode; + private int column; + private String annotation; + private static QLogger logger; + private static final String MAF = "maf"; + private static final String GFF3 = "gff3"; + private static final String GTF = "gtf"; + private static final String BED = "bed"; + private static final String VCF = "vcf"; + private static final String TAB = "txt"; + private static final String DCC1 = "dcc1"; + + private void runOnewayComparison(File inputFile, File comparisonFile, + File outputOverlapFile, File outputNoOverlapFile) throws Exception { + + if (mode.equals(MODE_ANNOTATE)) { + logger.info("If overlapping, will annotate column: " + column+1 +" of file with the annotation " + annotation); + } + + //get a list of the chromosomes + setUp(inputFile, outputOverlapFile, outputNoOverlapFile); + + logger.info("Input file: " + inputFile.getAbsolutePath()); + logger.info("Comparison file: " + comparisonFile.getAbsolutePath()); + + logger.info("Chromosomes to analyze: " + chromosomes.size()); + + for (String c: chromosomes) { + logger.info("Getting records for chromosome: " + c); + Map inputRecords = readRecords(inputFile, c); + Map compareRecords = readRecords(comparisonFile, c); + compareRecords(inputRecords, compareRecords, outputOverlapFile, outputNoOverlapFile); + } + logSummary(); + clear(); + } + + private void logSummary() { + logger.info("SUMMARY"); + logger.info("Total Records: " + recordCount); + logger.info("Total Records in supplied reference regions: " + overlapCount); + logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); + } + + private void runAnnotateComparison(File inputFile, File comparisonFile, + File outputOverlapFile) throws Exception { + + //get a list of the chromosomes + setUp(inputFile, outputOverlapFile, null); + + logger.info("Input file: " + inputFile.getAbsolutePath()); + logger.info("Comparison file: " + comparisonFile.getAbsolutePath()); + + logger.info("Chromosomes to analyze: " + chromosomes.size()); + + for (String c: chromosomes) { + logger.info("Getting records for chromosome: " + c); + Map inputRecords = readRecords(inputFile, c); + Map compareRecords = readRecords(comparisonFile, c); + compareRecordsAndAnnotate(inputRecords, compareRecords, outputOverlapFile); + } + logSummary(); + clear(); + } + + private void runIntersectComparison() throws Exception { + //Set first input file as primary + File primaryInputFile = new File(cmdLineInputFiles[0]); + //Single output file + File outputFile = new File(cmdLineOutputFiles[0]); + + int[] counts = new int[cmdLineInputFiles.length]; + counts[0] = 0; + + setUp(primaryInputFile, outputFile, null); + + //logging + logger.info("Input file 1: " + primaryInputFile.getAbsolutePath()); + for (int i=1; i inputRecords = readRecords(primaryInputFile, c); + counts[0] += inputRecords.size(); + for (int i=1; i compareRecords = readRecords(compareFile, c); + counts[i] += compareRecords.size(); + compareOverlapRecords(c, inputRecords, compareRecords, getFileType(primaryInputFile)); + } + overlapCount += inputRecords.size(); + //any input records left at the end are intersecting + writeRecords(inputRecords, outputFile); + } + for (int i=0; i inputRecords = readRecords(primaryInputFile, c); + Map compareRecords = new TreeMap(); + counts[f] += inputRecords.size(); + for (int i=0; i currentRecords = readRecords(compareFile, c); + counts[i] = counts[i] + currentRecords.size(); + compareRecords.putAll(currentRecords); + } + } + compareOverlapRecords(c, inputRecords, compareRecords, getFileType(primaryInputFile)); + notOverlappingCount += inputRecords.size(); + //any input records left at the end are unique + writeRecords(inputRecords, outputFile); + logger.info(counts[f] + " total records for file " +cmdLineInputFiles[f]); + for (int i=0; i inputRecords, Map compareRecords, String inputFileType) throws Exception { + + Iterator> entries = inputRecords.entrySet().iterator(); + while (entries.hasNext()) { + Entry entry = entries.next(); + + boolean isOverlapping = compareRecord(entry, compareRecords, inputFileType); + + if (mode.equals(MODE_INTERSECT) && !isOverlapping) { + //remove input record if it isn't overlapping and won't intersect with all records + entries.remove(); + } + if (mode.equals(MODE_UNIQUE) && isOverlapping) { + entries.remove(); + } + } + } + + private void compareRecordsAndAnnotate(Map inputRecords, + Map compareRecords, + File outputOverlapFile) throws Exception { + BufferedWriter overlapWriter = new BufferedWriter(new FileWriter(outputOverlapFile, true)); + + try { + for (Entry entry : inputRecords.entrySet()) { + recordCount++; + boolean isOverlapping = compareRecord(entry, compareRecords, null); + + if (isOverlapping) { + overlapCount++; + } else { + notOverlappingCount++; + } + writeRecord(overlapWriter, entry.getValue()); + } + } finally { + overlapWriter.close(); + } + } + + private void compareRecords(Map inputRecords, + Map compareRecords, + File outputOverlapFile, File outputNoOverlapFile) throws Exception { + BufferedWriter overlapWriter = new BufferedWriter(new FileWriter(outputOverlapFile, true)); + BufferedWriter noOverlapWriter = new BufferedWriter(new FileWriter(outputNoOverlapFile, true)); + + try { + for (Entry entry : inputRecords.entrySet()) { + + recordCount++; + + boolean isOverlapping = compareRecord(entry, compareRecords, null); + + if (isOverlapping) { + overlapCount++; + writeRecord(overlapWriter, entry.getValue()); + } else { + notOverlappingCount++; + if (mode.equals(MODE_ANNOTATE)) { + + } else { + writeRecord(noOverlapWriter, entry.getValue()); + } + } + } + } finally { + overlapWriter.close(); + noOverlapWriter.close(); + } + } + + private boolean compareRecord(Entry entry, Map compareRecords, String inputFileType) throws Exception { + ChrPosition inputChrPos = entry.getKey(); + TabbedRecord inputRecord = entry.getValue(); + boolean isOverlapping = false; + //check to see if it is overlapping with the comparison reference region + for (Entry compareEntry : compareRecords.entrySet()) { + ChrPosition comparePos = compareEntry.getKey(); + if (comparePos.getEndPosition() < inputChrPos.getStartPosition()) { + continue; + } else if (comparePos.getStartPosition() > inputChrPos.getEndPosition()) { + break; + } else { + if (tabbedRecordFallsInCompareRecord(inputChrPos, inputRecord, compareEntry)) { + isOverlapping = true; + if (mode.equals(MODE_ANNOTATE)) { + String[] values = inputRecord.getDataArray(); + String oldVal = values[column]; + if (oldVal.equals("")) { + values[column] = annotation; + } else { + if (oldVal.endsWith(";")) { + values[column] = oldVal + annotation; + } else { + values[column] = oldVal + ";" + annotation; + } + } + String data = ""; + for (String s: values) { + data += s + "\t"; + } + inputRecord.setData(data); + } + if (mode.equals(MODE_INTERSECT)) { + //change the ends?? + int[] indexes = getChrIndex(inputFileType, entry.getValue().getData().split("\t")); + String[] array = inputRecord.getDataArray(); + + if (inputChrPos.getStartPosition() > compareEntry.getKey().getStartPosition()) { + array[indexes[1]] = Integer.toString(compareEntry.getKey().getStartPosition()); + } + if (inputChrPos.getEndPosition() < compareEntry.getKey().getEndPosition()) { + array[indexes[2]] = Integer.toString(compareEntry.getKey().getEndPosition()); + } + String data = ""; + for (String s: array) { + data += s + "\t"; + } + inputRecord.setData(data); + entry.setValue(inputRecord); + } + } + } + } + return isOverlapping; + } + + + private void writeRecords(Map records, File outputFile) throws IOException { + BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile, true)); + + for (Entry entry: records.entrySet()) { + writeRecord(writer, entry.getValue()); + } + writer.close(); + } + + private void writeRecord(BufferedWriter writer, TabbedRecord record) throws IOException { + if (!record.getData().endsWith("\n")) { + record.setData(record.getData() + "\n"); + } + writer.write(record.getData()); + } + + private TreeMap readRecords(File inputFile, String chromosome) throws Exception { + + TabbedFileReader reader = new TabbedFileReader(inputFile); + TreeMap records = new TreeMap(); + String fileType = getFileType(inputFile); + try { + + Iterator iterator = reader.getRecordIterator(); + + while (iterator.hasNext()) { + + TabbedRecord tab = iterator.next(); + if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo") || tab.getData().startsWith("analysis") || tab.getData().startsWith("Chr") ) { + continue; + } + ChrPosition chrPos = getChrPosition(fileType, tab); + if (chrPos.getChromosome().equals(chromosome)) { + records.put(chrPos, tab); + } + } + + } finally { + reader.close(); + } + + return records; + } + + private String getFileType(File inputFile) { + int index = inputFile.getName().lastIndexOf(".") + 1; + String name = inputFile.getName().substring(index, inputFile.getName().length()); + + if (name.equals("dcc")) { + return "dcc1"; + } + + return name; + } + + private void setUp(File file, File outputFileOne, File outputFileTwo) throws Exception { + TabbedFileReader reader = new TabbedFileReader(file); + Iterator iterator = reader.getRecordIterator(); + + String fileType = getFileType(file); + List header = new ArrayList(); + if (reader.getHeader() != null) { + Iterator iter = reader.getHeader().iterator(); + while (iter.hasNext()) { + header.add(iter.next()); + } + } + + while (iterator.hasNext()) { + + TabbedRecord tab = iterator.next(); + + if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo") || tab.getData().startsWith("analysis") || tab.getData().startsWith("Chr") ) { + header.add(tab.getData()); + continue; + } + + ChrPosition chrPos = getChrPosition(fileType, tab); + + if (!chromosomes.contains(chrPos.getChromosome())) { + chromosomes.add(chrPos.getChromosome()); + } + } + reader.close(); + + if (outputFileOne != null) { + writeHeader(header, outputFileOne); + } + if (outputFileTwo != null) { + writeHeader(header, outputFileTwo); + } + + } + + private int[] getChrIndex(String inputFileType, String[] values) throws Exception { + + int chrIndex = 0; + int startIndex = 0; + int endIndex = 0; + + if (inputFileType.equals(MAF)) { + chrIndex = 4; + startIndex = 5; + endIndex = 6; + } else if (inputFileType.equals(DCC1)) { + chrIndex = 4; + startIndex = 5; + endIndex = 6; + } else if (inputFileType.equals(BED)) { + chrIndex = 0; + startIndex = 1; + endIndex = 2; + } else if (inputFileType.equals(GFF3) || inputFileType.equals(GTF)) { + chrIndex = 0; + startIndex = 3; + endIndex = 4; + } else if (inputFileType.equals(VCF)) { + chrIndex = 0; + startIndex = 1; + endIndex = 1; + if (values.length >= 8) { + String[] infos = values[7].split("\t"); + + for (String info : infos) { + String[] params = info.split("="); + if (params.length == 2) { + if (params[0].equals("END")) { + endIndex = 2; + values[2] = params[1]; + } + } + } + } + //NEED TO CHANGE FOR INDELS + } else if (inputFileType.equals(TAB)) { + chrIndex = 0; + startIndex = 1; + endIndex = 2; + } else { + throw new Exception("Input file type is not recognized"); + } + int[] arr = {chrIndex, startIndex, endIndex}; + return arr; + } + + private ChrPosition getChrPosition(String inputFileType, TabbedRecord tab) throws Exception { + String[] values = tab.getData().split("\t"); + ChrPosition chr = null; + + int[] indexes = getChrIndex(inputFileType, values); + int chrIndex = indexes[0]; + int startIndex = indexes[1]; + int endIndex = indexes[2]; + + if (inputFileType.equals(BED)) { + chr = new ChrRangePosition(values[chrIndex], new Integer(values[startIndex])+1, new Integer(values[endIndex])+1); + } else { + String chromosome = values[chrIndex]; + if (!chromosome.contains("GL") && !chromosome.startsWith("chr")) { + chromosome = "chr" + chromosome; + } + if (chromosome.equals("chrM")) { + chromosome = "chrMT"; + } + if (inputFileType.equals(MAF)) { + chr = new ChrPositionName(chromosome, new Integer(values[startIndex]), new Integer(values[endIndex]), values[0]); + } else { + chr = new ChrRangePosition(chromosome, new Integer(values[startIndex]), new Integer(values[endIndex])); + } + } + return chr; + } + + private boolean tabbedRecordFallsInCompareRecord(ChrPosition inputChrPos, TabbedRecord inputRecord, Entry entry) { + if (entry != null) { + ChrPosition compareChrPos = entry.getKey(); + if ((inputChrPos.getStartPosition() >= compareChrPos.getStartPosition() && inputChrPos.getStartPosition() <= compareChrPos.getEndPosition()) || + (inputChrPos.getEndPosition() >= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() <= compareChrPos.getEndPosition()) + || (inputChrPos.getStartPosition() <= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() >= compareChrPos.getEndPosition())) { + return true; + } + } + return false; + } + + public String[] getCmdLineInputFiles() { + return cmdLineInputFiles; + } + + public void setCmdLineInputFiles(String[] cmdLineInputFiles) { + this.cmdLineInputFiles = cmdLineInputFiles; + } + + + private void writeHeader(List header, File outputOverlapFile) throws IOException { + BufferedWriter writer = new BufferedWriter(new FileWriter(outputOverlapFile, true)); + + for (String h: header) { + + writer.write(h + "\n"); + } + writer.close(); + } + + public List getChromosomes() { + return chromosomes; + } + + public void setChromosomes(List chromosomes) { + this.chromosomes = chromosomes; + } + + + public int getOverlapCount() { + return overlapCount; + } + + public void setOverlapCount(int overlapCount) { + this.overlapCount = overlapCount; + } + + public int getNotOverlappingCount() { + return notOverlappingCount; + } + + public void setNotOverlappingCount(int notOverlappingCount) { + this.notOverlappingCount = notOverlappingCount; + } + + public int getMafCount() { + return recordCount; + } + + public void setMafCount(int mafCount) { + this.recordCount = mafCount; + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(CompareReferenceRegions.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CompareReferenceRegions", CompareReferenceRegions.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + //output files + cmdLineOutputFiles = options.getOutputFileNames(); + + if (cmdLineOutputFiles.length >= 1) { + if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); + } + + for (String file : cmdLineOutputFiles) { + if (new File(file).exists() && !new File(file).isDirectory()) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); + } + } + } + mode = options.getMode(); + if (mode == null) { + mode = MODE_ONEWAY; + } + logger.info("Mode: " + mode); + + if (mode.equals(MODE_ANNOTATE)) { + //take away 1 to get index of column rather than column number + column = new Integer(options.getColumn()) -1; + annotation = options.getAnnotation(); + } + + return engage(); + } + + return returnStatus; + } + + + private int engage() throws Exception { + + if (mode.equals(MODE_ONEWAY) || mode.equals(MODE_TWOWAY)) { + runOnewayComparison(new File(cmdLineInputFiles[0]), new File(cmdLineInputFiles[1]), new File(cmdLineOutputFiles[0]), new File(cmdLineOutputFiles[1])); + if (mode.equals(MODE_TWOWAY)) { + runOnewayComparison(new File(cmdLineInputFiles[1]), new File(cmdLineInputFiles[0]), new File(cmdLineOutputFiles[2]), new File(cmdLineOutputFiles[3])); + } + } else if (mode.equals(MODE_ANNOTATE)) { + runAnnotateComparison(new File(cmdLineInputFiles[0]), new File(cmdLineInputFiles[1]), new File(cmdLineOutputFiles[0])); + } else if (mode.equals(MODE_INTERSECT)) { + runIntersectComparison(); + } else if (mode.equals(MODE_UNIQUE)) { + runUniqueComparison(); + } else { + throw new QMuleException("MODE_ERROR", mode); + } + return 0; + } + + + private void clear() { + recordCount = 0; + overlapCount = 0; + notOverlappingCount = 0; + } + + public static void main(String[] args) throws Exception { + CompareReferenceRegions sp = new CompareReferenceRegions(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + +} diff --git a/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- b/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- new file mode 100644 index 000000000..3b3fbc798 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- @@ -0,0 +1,676 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.Map.Entry; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrPositionName; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; + + +public class CompareReferenceRegions { + + private static final String MODE_ONEWAY = "oneway"; + private static final String MODE_ANNOTATE = "annotate"; + private static final String MODE_TWOWAY = "twoway"; + private static final String MODE_INTERSECT = "intersect"; + private static final String MODE_UNIQUE = "unique"; + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private List chromosomes = new ArrayList(); + private int overlapCount = 0; + private int notOverlappingCount = 0; + private int recordCount; + private String mode; + private int column; + private String annotation; + private static QLogger logger; + private static final String MAF = "maf"; + private static final String GFF3 = "gff3"; + private static final String GTF = "gtf"; + private static final String BED = "bed"; + private static final String VCF = "vcf"; + private static final String TAB = "txt"; + private static final String DCC1 = "dcc1"; + + private void runOnewayComparison(File inputFile, File comparisonFile, + File outputOverlapFile, File outputNoOverlapFile) throws Exception { + + if (mode.equals(MODE_ANNOTATE)) { + logger.info("If overlapping, will annotate column: " + column+1 +" of file with the annotation " + annotation); + } + + //get a list of the chromosomes + setUp(inputFile, outputOverlapFile, outputNoOverlapFile); + + logger.info("Input file: " + inputFile.getAbsolutePath()); + logger.info("Comparison file: " + comparisonFile.getAbsolutePath()); + + logger.info("Chromosomes to analyze: " + chromosomes.size()); + + for (String c: chromosomes) { + logger.info("Getting records for chromosome: " + c); + Map inputRecords = readRecords(inputFile, c); + Map compareRecords = readRecords(comparisonFile, c); + compareRecords(inputRecords, compareRecords, outputOverlapFile, outputNoOverlapFile); + } + logSummary(); + clear(); + } + + private void logSummary() { + logger.info("SUMMARY"); + logger.info("Total Records: " + recordCount); + logger.info("Total Records in supplied reference regions: " + overlapCount); + logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); + } + + private void runAnnotateComparison(File inputFile, File comparisonFile, + File outputOverlapFile) throws Exception { + + //get a list of the chromosomes + setUp(inputFile, outputOverlapFile, null); + + logger.info("Input file: " + inputFile.getAbsolutePath()); + logger.info("Comparison file: " + comparisonFile.getAbsolutePath()); + + logger.info("Chromosomes to analyze: " + chromosomes.size()); + + for (String c: chromosomes) { + logger.info("Getting records for chromosome: " + c); + Map inputRecords = readRecords(inputFile, c); + Map compareRecords = readRecords(comparisonFile, c); + compareRecordsAndAnnotate(inputRecords, compareRecords, outputOverlapFile); + } + logSummary(); + clear(); + } + + private void runIntersectComparison() throws Exception { + //Set first input file as primary + File primaryInputFile = new File(cmdLineInputFiles[0]); + //Single output file + File outputFile = new File(cmdLineOutputFiles[0]); + + int[] counts = new int[cmdLineInputFiles.length]; + counts[0] = 0; + + setUp(primaryInputFile, outputFile, null); + + //logging + logger.info("Input file 1: " + primaryInputFile.getAbsolutePath()); + for (int i=1; i inputRecords = readRecords(primaryInputFile, c); + counts[0] += inputRecords.size(); + for (int i=1; i compareRecords = readRecords(compareFile, c); + counts[i] += compareRecords.size(); + compareOverlapRecords(c, inputRecords, compareRecords, getFileType(primaryInputFile)); + } + overlapCount += inputRecords.size(); + //any input records left at the end are intersecting + writeRecords(inputRecords, outputFile); + } + for (int i=0; i inputRecords = readRecords(primaryInputFile, c); + Map compareRecords = new TreeMap(); + counts[f] += inputRecords.size(); + for (int i=0; i currentRecords = readRecords(compareFile, c); + counts[i] = counts[i] + currentRecords.size(); + compareRecords.putAll(currentRecords); + } + } + compareOverlapRecords(c, inputRecords, compareRecords, getFileType(primaryInputFile)); + notOverlappingCount += inputRecords.size(); + //any input records left at the end are unique + writeRecords(inputRecords, outputFile); + logger.info(counts[f] + " total records for file " +cmdLineInputFiles[f]); + for (int i=0; i inputRecords, Map compareRecords, String inputFileType) throws Exception { + + Iterator> entries = inputRecords.entrySet().iterator(); + while (entries.hasNext()) { + Entry entry = entries.next(); + + boolean isOverlapping = compareRecord(entry, compareRecords, inputFileType); + + if (mode.equals(MODE_INTERSECT) && !isOverlapping) { + //remove input record if it isn't overlapping and won't intersect with all records + entries.remove(); + } + if (mode.equals(MODE_UNIQUE) && isOverlapping) { + entries.remove(); + } + } + } + + private void compareRecordsAndAnnotate(Map inputRecords, + Map compareRecords, + File outputOverlapFile) throws Exception { + BufferedWriter overlapWriter = new BufferedWriter(new FileWriter(outputOverlapFile, true)); + + try { + for (Entry entry : inputRecords.entrySet()) { + recordCount++; + boolean isOverlapping = compareRecord(entry, compareRecords, null); + + if (isOverlapping) { + overlapCount++; + } else { + notOverlappingCount++; + } + writeRecord(overlapWriter, entry.getValue()); + } + } finally { + overlapWriter.close(); + } + } + + private void compareRecords(Map inputRecords, + Map compareRecords, + File outputOverlapFile, File outputNoOverlapFile) throws Exception { + BufferedWriter overlapWriter = new BufferedWriter(new FileWriter(outputOverlapFile, true)); + BufferedWriter noOverlapWriter = new BufferedWriter(new FileWriter(outputNoOverlapFile, true)); + + try { + for (Entry entry : inputRecords.entrySet()) { + + recordCount++; + + boolean isOverlapping = compareRecord(entry, compareRecords, null); + + if (isOverlapping) { + overlapCount++; + writeRecord(overlapWriter, entry.getValue()); + } else { + notOverlappingCount++; + if (mode.equals(MODE_ANNOTATE)) { + + } else { + writeRecord(noOverlapWriter, entry.getValue()); + } + } + } + } finally { + overlapWriter.close(); + noOverlapWriter.close(); + } + } + + private boolean compareRecord(Entry entry, Map compareRecords, String inputFileType) throws Exception { + ChrPosition inputChrPos = entry.getKey(); + TabbedRecord inputRecord = entry.getValue(); + boolean isOverlapping = false; + //check to see if it is overlapping with the comparison reference region + for (Entry compareEntry : compareRecords.entrySet()) { + ChrPosition comparePos = compareEntry.getKey(); + if (comparePos.getEndPosition() < inputChrPos.getStartPosition()) { + continue; + } else if (comparePos.getStartPosition() > inputChrPos.getEndPosition()) { + break; + } else { + if (tabbedRecordFallsInCompareRecord(inputChrPos, inputRecord, compareEntry)) { + isOverlapping = true; + if (mode.equals(MODE_ANNOTATE)) { + String[] values = inputRecord.getDataArray(); + String oldVal = values[column]; + if (oldVal.equals("")) { + values[column] = annotation; + } else { + if (oldVal.endsWith(";")) { + values[column] = oldVal + annotation; + } else { + values[column] = oldVal + ";" + annotation; + } + } + String data = ""; + for (String s: values) { + data += s + "\t"; + } + inputRecord.setData(data); + } + if (mode.equals(MODE_INTERSECT)) { + //change the ends?? + int[] indexes = getChrIndex(inputFileType, entry.getValue().getData().split("\t")); + String[] array = inputRecord.getDataArray(); + + if (inputChrPos.getStartPosition() > compareEntry.getKey().getStartPosition()) { + array[indexes[1]] = Integer.toString(compareEntry.getKey().getStartPosition()); + } + if (inputChrPos.getEndPosition() < compareEntry.getKey().getEndPosition()) { + array[indexes[2]] = Integer.toString(compareEntry.getKey().getEndPosition()); + } + String data = ""; + for (String s: array) { + data += s + "\t"; + } + inputRecord.setData(data); + entry.setValue(inputRecord); + } + } + } + } + return isOverlapping; + } + + + private void writeRecords(Map records, File outputFile) throws IOException { + BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile, true)); + + for (Entry entry: records.entrySet()) { + writeRecord(writer, entry.getValue()); + } + writer.close(); + } + + private void writeRecord(BufferedWriter writer, TabbedRecord record) throws IOException { + if (!record.getData().endsWith("\n")) { + record.setData(record.getData() + "\n"); + } + writer.write(record.getData()); + } + + private TreeMap readRecords(File inputFile, String chromosome) throws Exception { + + TabbedFileReader reader = new TabbedFileReader(inputFile); + TreeMap records = new TreeMap(); + String fileType = getFileType(inputFile); + try { + + Iterator iterator = reader.getRecordIterator(); + + while (iterator.hasNext()) { + + TabbedRecord tab = iterator.next(); + if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo") || tab.getData().startsWith("analysis") || tab.getData().startsWith("Chr") ) { + continue; + } + ChrPosition chrPos = getChrPosition(fileType, tab); + if (chrPos.getChromosome().equals(chromosome)) { + records.put(chrPos, tab); + } + } + + } finally { + reader.close(); + } + + return records; + } + + private String getFileType(File inputFile) { + int index = inputFile.getName().lastIndexOf(".") + 1; + String name = inputFile.getName().substring(index, inputFile.getName().length()); + + if (name.equals("dcc")) { + return "dcc1"; + } + + return name; + } + + private void setUp(File file, File outputFileOne, File outputFileTwo) throws Exception { + TabbedFileReader reader = new TabbedFileReader(file); + Iterator iterator = reader.getRecordIterator(); + + String fileType = getFileType(file); + List header = new ArrayList(); + if (reader.getHeader() != null) { + Iterator iter = reader.getHeader().iterator(); + while (iter.hasNext()) { + header.add(iter.next()); + } + } + + while (iterator.hasNext()) { + + TabbedRecord tab = iterator.next(); + + if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo") || tab.getData().startsWith("analysis") || tab.getData().startsWith("Chr") ) { + header.add(tab.getData()); + continue; + } + + ChrPosition chrPos = getChrPosition(fileType, tab); + + if (!chromosomes.contains(chrPos.getChromosome())) { + chromosomes.add(chrPos.getChromosome()); + } + } + reader.close(); + + if (outputFileOne != null) { + writeHeader(header, outputFileOne); + } + if (outputFileTwo != null) { + writeHeader(header, outputFileTwo); + } + + } + + private int[] getChrIndex(String inputFileType, String[] values) throws Exception { + + int chrIndex = 0; + int startIndex = 0; + int endIndex = 0; + + if (inputFileType.equals(MAF)) { + chrIndex = 4; + startIndex = 5; + endIndex = 6; + } else if (inputFileType.equals(DCC1)) { + chrIndex = 4; + startIndex = 5; + endIndex = 6; + } else if (inputFileType.equals(BED)) { + chrIndex = 0; + startIndex = 1; + endIndex = 2; + } else if (inputFileType.equals(GFF3) || inputFileType.equals(GTF)) { + chrIndex = 0; + startIndex = 3; + endIndex = 4; + } else if (inputFileType.equals(VCF)) { + chrIndex = 0; + startIndex = 1; + endIndex = 1; + if (values.length >= 8) { + String[] infos = values[7].split("\t"); + + for (String info : infos) { + String[] params = info.split("="); + if (params.length == 2) { + if (params[0].equals("END")) { + endIndex = 2; + values[2] = params[1]; + } + } + } + } + //NEED TO CHANGE FOR INDELS + } else if (inputFileType.equals(TAB)) { + chrIndex = 0; + startIndex = 1; + endIndex = 2; + } else { + throw new Exception("Input file type is not recognized"); + } + int[] arr = {chrIndex, startIndex, endIndex}; + return arr; + } + + private ChrPosition getChrPosition(String inputFileType, TabbedRecord tab) throws Exception { + String[] values = tab.getData().split("\t"); + ChrPosition chr = null; + + int[] indexes = getChrIndex(inputFileType, values); + int chrIndex = indexes[0]; + int startIndex = indexes[1]; + int endIndex = indexes[2]; + + if (inputFileType.equals(BED)) { + chr = new ChrRangePosition(values[chrIndex], new Integer(values[startIndex])+1, new Integer(values[endIndex])+1); + } else { + String chromosome = values[chrIndex]; + if (!chromosome.contains("GL") && !chromosome.startsWith("chr")) { + chromosome = "chr" + chromosome; + } + if (chromosome.equals("chrM")) { + chromosome = "chrMT"; + } + if (inputFileType.equals(MAF)) { + chr = new ChrPositionName(chromosome, new Integer(values[startIndex]), new Integer(values[endIndex]), values[0]); + } else { + chr = new ChrRangePosition(chromosome, new Integer(values[startIndex]), new Integer(values[endIndex])); + } + } + return chr; + } + + private boolean tabbedRecordFallsInCompareRecord(ChrPosition inputChrPos, TabbedRecord inputRecord, Entry entry) { + if (entry != null) { + ChrPosition compareChrPos = entry.getKey(); + if ((inputChrPos.getStartPosition() >= compareChrPos.getStartPosition() && inputChrPos.getStartPosition() <= compareChrPos.getEndPosition()) || + (inputChrPos.getEndPosition() >= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() <= compareChrPos.getEndPosition()) + || (inputChrPos.getStartPosition() <= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() >= compareChrPos.getEndPosition())) { + return true; + } + } + return false; + } + + public String[] getCmdLineInputFiles() { + return cmdLineInputFiles; + } + + public void setCmdLineInputFiles(String[] cmdLineInputFiles) { + this.cmdLineInputFiles = cmdLineInputFiles; + } + + + private void writeHeader(List header, File outputOverlapFile) throws IOException { + BufferedWriter writer = new BufferedWriter(new FileWriter(outputOverlapFile, true)); + + for (String h: header) { + + writer.write(h + "\n"); + } + writer.close(); + } + + public List getChromosomes() { + return chromosomes; + } + + public void setChromosomes(List chromosomes) { + this.chromosomes = chromosomes; + } + + + public int getOverlapCount() { + return overlapCount; + } + + public void setOverlapCount(int overlapCount) { + this.overlapCount = overlapCount; + } + + public int getNotOverlappingCount() { + return notOverlappingCount; + } + + public void setNotOverlappingCount(int notOverlappingCount) { + this.notOverlappingCount = notOverlappingCount; + } + + public int getMafCount() { + return recordCount; + } + + public void setMafCount(int mafCount) { + this.recordCount = mafCount; + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(CompareReferenceRegions.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CompareReferenceRegions", CompareReferenceRegions.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + //output files + cmdLineOutputFiles = options.getOutputFileNames(); + + if (cmdLineOutputFiles.length >= 1) { + if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); + } + + for (String file : cmdLineOutputFiles) { + if (new File(file).exists() && !new File(file).isDirectory()) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); + } + } + } + mode = options.getMode(); + if (mode == null) { + mode = MODE_ONEWAY; + } + logger.info("Mode: " + mode); + + if (mode.equals(MODE_ANNOTATE)) { + //take away 1 to get index of column rather than column number + column = new Integer(options.getColumn()) -1; + annotation = options.getAnnotation(); + } + + return engage(); + } + + return returnStatus; + } + + + private int engage() throws Exception { + + if (mode.equals(MODE_ONEWAY) || mode.equals(MODE_TWOWAY)) { + runOnewayComparison(new File(cmdLineInputFiles[0]), new File(cmdLineInputFiles[1]), new File(cmdLineOutputFiles[0]), new File(cmdLineOutputFiles[1])); + if (mode.equals(MODE_TWOWAY)) { + runOnewayComparison(new File(cmdLineInputFiles[1]), new File(cmdLineInputFiles[0]), new File(cmdLineOutputFiles[2]), new File(cmdLineOutputFiles[3])); + } + } else if (mode.equals(MODE_ANNOTATE)) { + runAnnotateComparison(new File(cmdLineInputFiles[0]), new File(cmdLineInputFiles[1]), new File(cmdLineOutputFiles[0])); + } else if (mode.equals(MODE_INTERSECT)) { + runIntersectComparison(); + } else if (mode.equals(MODE_UNIQUE)) { + runUniqueComparison(); + } else { + throw new QMuleException("MODE_ERROR", mode); + } + return 0; + } + + + private void clear() { + recordCount = 0; + overlapCount = 0; + notOverlappingCount = 0; + } + + public static void main(String[] args) throws Exception { + CompareReferenceRegions sp = new CompareReferenceRegions(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + +} diff --git a/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java b/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java new file mode 100644 index 000000000..84fc72a32 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java @@ -0,0 +1,86 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.Set; +import java.util.TreeSet; + +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedFileWriter; +import org.qcmg.qmule.tab.TabbedHeader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class DbSnpChrLiftover { + + private static char TAB = '\t'; + + String inputVCF; + String outputVCF; + + + private final Set uniqueChrNames = new TreeSet(); + + public DbSnpChrLiftover() {} + + private void getUniqueChrNames() throws Exception { + TabbedFileReader reader = new TabbedFileReader(new File(inputVCF)); + TabbedFileWriter writer = new TabbedFileWriter(new File(outputVCF)); + try { + + TabbedHeader header = reader.getHeader(); + + // writer out header + writer.addHeader(header); + + for (TabbedRecord record : reader) { + String [] params = TabTokenizer.tokenize(record.getData()); + String chr = params[0]; + uniqueChrNames.add(chr); + + // switch the chr + params[0] = "chr" + chr; + + StringBuilder sb = new StringBuilder(); + for (int i = 0, len = params.length ; i < len ; i ++) { + sb.append(params[i]); + if (i < len-1) sb.append(TAB); + } + + record.setData(sb.toString()); + + writer.add(record); + } + + } finally { + try { + writer.close(); + } finally { + reader.close(); + } + } + + + for (String chr : uniqueChrNames) { + System.out.println("chr: " + chr); + } + } + + + public static void main(String[] args) throws Exception { + if (args.length < 2) + throw new IllegalArgumentException("USAGE: DbSnpChrLiftover "); + + DbSnpChrLiftover dcl = new DbSnpChrLiftover(); + + + dcl.inputVCF = args[0]; + dcl.outputVCF = args[1]; + + dcl.getUniqueChrNames(); + + } + +} diff --git a/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- b/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- new file mode 100644 index 000000000..84fc72a32 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- @@ -0,0 +1,86 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.Set; +import java.util.TreeSet; + +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedFileWriter; +import org.qcmg.qmule.tab.TabbedHeader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class DbSnpChrLiftover { + + private static char TAB = '\t'; + + String inputVCF; + String outputVCF; + + + private final Set uniqueChrNames = new TreeSet(); + + public DbSnpChrLiftover() {} + + private void getUniqueChrNames() throws Exception { + TabbedFileReader reader = new TabbedFileReader(new File(inputVCF)); + TabbedFileWriter writer = new TabbedFileWriter(new File(outputVCF)); + try { + + TabbedHeader header = reader.getHeader(); + + // writer out header + writer.addHeader(header); + + for (TabbedRecord record : reader) { + String [] params = TabTokenizer.tokenize(record.getData()); + String chr = params[0]; + uniqueChrNames.add(chr); + + // switch the chr + params[0] = "chr" + chr; + + StringBuilder sb = new StringBuilder(); + for (int i = 0, len = params.length ; i < len ; i ++) { + sb.append(params[i]); + if (i < len-1) sb.append(TAB); + } + + record.setData(sb.toString()); + + writer.add(record); + } + + } finally { + try { + writer.close(); + } finally { + reader.close(); + } + } + + + for (String chr : uniqueChrNames) { + System.out.println("chr: " + chr); + } + } + + + public static void main(String[] args) throws Exception { + if (args.length < 2) + throw new IllegalArgumentException("USAGE: DbSnpChrLiftover "); + + DbSnpChrLiftover dcl = new DbSnpChrLiftover(); + + + dcl.inputVCF = args[0]; + dcl.outputVCF = args[1]; + + dcl.getUniqueChrNames(); + + } + +} diff --git a/qmule/src/org/qcmg/qmule/GermlineDBStripper.java b/qmule/src/org/qcmg/qmule/GermlineDBStripper.java new file mode 100644 index 000000000..88ceefb1b --- /dev/null +++ b/qmule/src/org/qcmg/qmule/GermlineDBStripper.java @@ -0,0 +1,46 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.IOException; + +import org.qcmg.qmule.germlinedb.GermlineDBFileReader; +import org.qcmg.qmule.germlinedb.GermlineDBFileWriter; +import org.qcmg.qmule.germlinedb.GermlineDBRecord; + +public class GermlineDBStripper { + + public static void main(String[] args) throws IOException { + + String germlineDB = args[0]; + String germlineDBClassA = args[1]; + String header = "analysis_id\tcontrol_sample_id\tvariation_id\tvariation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tquality_score\tprobability\tread_count\tis_annotated\tvalidation_status\tvalidation_platform\txref_ensembl_var_id\tnote\tflag"; + + GermlineDBFileReader reader = new GermlineDBFileReader(new File(germlineDB)); + GermlineDBFileWriter writer = new GermlineDBFileWriter(new File(germlineDBClassA)); + + try { + writer.add(header+"\n"); + + // strip out all non-classA entities from Germline_DB + int totalCount = 0, classACount = 0; + for (GermlineDBRecord record : reader) { + ++totalCount; + if ("--".equals(record.getFlag())) { + ++classACount; + writer.add(record.getData() + "\n"); + } + } + System.out.println("total count: " + totalCount + ", classA count: " + classACount); + + } finally { + try { + reader.close(); + } finally { + writer.close(); + } + } + } +} diff --git a/qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- b/qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- new file mode 100644 index 000000000..71bd5e9cf --- /dev/null +++ b/qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- @@ -0,0 +1,47 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.IOException; + +import org.qcmg.germlinedb.GermlineDBFileReader; +import org.qcmg.germlinedb.GermlineDBFileWriter; +import org.qcmg.germlinedb.GermlineDBRecord; + +public class GermlineDBStripper { + + + public static void main(String[] args) throws IOException { + + String germlineDB = args[0]; + String germlineDBClassA = args[1]; + String header = "analysis_id\tcontrol_sample_id\tvariation_id\tvariation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tquality_score\tprobability\tread_count\tis_annotated\tvalidation_status\tvalidation_platform\txref_ensembl_var_id\tnote\tflag"; + + GermlineDBFileReader reader = new GermlineDBFileReader(new File(germlineDB)); + GermlineDBFileWriter writer = new GermlineDBFileWriter(new File(germlineDBClassA)); + + try { + writer.add(header+"\n"); + + // strip out all non-classA entities from Germline_DB + int totalCount = 0, classACount = 0; + for (GermlineDBRecord record : reader) { + ++totalCount; + if ("--".equals(record.getFlag())) { + ++classACount; + writer.add(record.getData() + "\n"); + } + } + System.out.println("total count: " + totalCount + ", classA count: " + classACount); + + } finally { + try { + reader.close(); + } finally { + writer.close(); + } + } + } +} diff --git a/qmule/src/org/qcmg/qmule/GetBamRecords.java b/qmule/src/org/qcmg/qmule/GetBamRecords.java new file mode 100644 index 000000000..114351d71 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/GetBamRecords.java @@ -0,0 +1,226 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMUtils; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.picard.QJumper; + +public class GetBamRecords { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + List records; + + private String position; + + private int exitStatus; + private static QLogger logger; + + + public int engage() throws Exception { + + logger.info("Setting up the QJumper"); + QJumper jumper = new QJumper(); + jumper.setupReader(cmdLineInputFiles[0]); + + String contig = position.substring(0, position.indexOf(":")); + int start = Integer.parseInt(position.substring(position.indexOf(":")+1)); + + logger.info("config: " + contig); + logger.info("start: " + start); + + records = jumper.getOverlappingRecordsAtPosition(contig, start, start); + + logger.info("unfiltered read count: " + records.size()+ ""); + + int filteredCount = 0, readsWithBaseAtPosition = 0, duplicateCount = 0, properlyPaired = 0,properlyPairedAll = 0, pairedAll = 0, paired = 0, notPrimaryAlignment = 0, unmapped = 0; + String qualityString = "", qualityPhredString = ""; + String baseString = ""; + int unmappedSecondaryDuplicates = 0, unmappedSecondaryDuplicatesProperly = 0; + + char[] novelStartBases = new char[1024]; // hmmmmm + Set forwardStrand = new HashSet(); + Set reverseStrand = new HashSet(); + int j = 0; + + for (SAMRecord rec : records) { + int readPosition = org.qcmg.picard.util.SAMUtils.getIndexInReadFromPosition(rec, start); + if (readPosition >= 0 && readPosition < rec.getReadLength()) { + char c = rec.getReadString().charAt(readPosition); + if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { + readsWithBaseAtPosition++; + if (rec.getDuplicateReadFlag()) { + duplicateCount++; + } else { + byte [] baseQuals = SAMUtils.fastqToPhred(rec.getBaseQualityString()); + qualityPhredString +=baseQuals[readPosition] + ","; + baseString += (rec.getReadNegativeStrandFlag() ? Character.toLowerCase(c) : c) + ""; +// baseString += c + ","; + qualityString +=rec.getBaseQualityString().charAt(readPosition) + ""; + + if (rec.getMappingQuality() >= 10 && rec.getBaseQualities()[readPosition] >= 10) { + if (rec.getReadNegativeStrandFlag()) { + if (reverseStrand.add(rec.getAlignmentStart())) { + novelStartBases[j++] = c; + } + } else { + if (forwardStrand.add(rec.getAlignmentStart())) { + novelStartBases[j++] = c; + } + } + } + } + } + + if (rec.getReadPairedFlag()) { + paired++; + if ( rec.getProperPairFlag()) properlyPaired++; + + } + if (rec.getReadUnmappedFlag()) unmapped++; + if (rec.getReadUnmappedFlag()) unmapped++; + if (rec.getNotPrimaryAlignmentFlag()) notPrimaryAlignment++; + + + if ( ! rec.getDuplicateReadFlag() && ! rec.getNotPrimaryAlignmentFlag() && ! rec.getReadUnmappedFlag()) + unmappedSecondaryDuplicates++; + if ( ! rec.getDuplicateReadFlag() && ! rec.getNotPrimaryAlignmentFlag() && ! rec.getReadUnmappedFlag() + && (rec.getReadPairedFlag() ? rec.getProperPairFlag() : true)) +// && (rec.getReadPairedFlag() && rec.getProperPairFlag())) + unmappedSecondaryDuplicatesProperly++; + } + + if (rec.getReadPairedFlag()) { + pairedAll++; + if (rec.getProperPairFlag()) properlyPairedAll++; + } + + if (BAMPileupUtil.eligibleSamRecord(rec)) { + ++filteredCount; + logger.info("***" + rec.getSAMString()); + } else logger.info(rec.getSAMString()); + + + + } + + + logger.info("SUMMARY: "); + logger.info("Total no of records: " + records.size() ); + logger.info("No of records with a base at position: " + readsWithBaseAtPosition); + logger.info("No of duplicate records (that have a base at position): " + duplicateCount); + logger.info("No of unique records (that have a base at position): " + (readsWithBaseAtPosition-duplicateCount)); + logger.info("No of unique paired records (that have a base at position): " + paired); + logger.info("No of unique properly paired records (that have a base at position): " + properlyPaired); + logger.info("No of records not primary aligned (that have a base at position): " + notPrimaryAlignment); + logger.info("No of records not mapped (that have a base at position): " + unmapped); + logger.info("unmappedSecondaryDuplicates (that have a base at position): " + unmappedSecondaryDuplicates); + logger.info("unmappedSecondaryDuplicatesProperly (that have a base at position): " + unmappedSecondaryDuplicatesProperly); + logger.info("No of paired records (all): " + pairedAll); + logger.info("No of properly paired records (all): " + properlyPairedAll); + logger.info("Unique record bases: " + baseString.substring(0,baseString.length() > 0 ? baseString.length() : 0)); + logger.info("Unique record base qualities: " + qualityString.substring(0,qualityString.length() > 0 ? qualityString.length() : 0)); + logger.info("Unique record base qualities (phred): " + qualityPhredString.substring(0,qualityPhredString.length() > 0 ? qualityPhredString.length() : 0)); + logger.info("filtered read count: " + filteredCount + " out of " + records.size() ); + logger.info("Novel start bases: " + new String(novelStartBases)); + + jumper.closeReader(); + + writeToFile(); + + return exitStatus; + } + + private void writeToFile() { + if (null != cmdLineOutputFiles && cmdLineOutputFiles.length == 1) { + try (FileWriter writer = new FileWriter(cmdLineOutputFiles[0]);){ + for (SAMRecord rec : records) { + writer.write(rec.getSAMString()); + } + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + } + + + + public static void main(String[] args) throws Exception { + GetBamRecords sp = new GetBamRecords(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(GetBamRecords.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("GetBamRecords", GetBamRecords.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + position = options.getPosition(); + position = options.getPosition(); + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/GetBamRecords.java-- b/qmule/src/org/qcmg/qmule/GetBamRecords.java-- new file mode 100644 index 000000000..114351d71 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/GetBamRecords.java-- @@ -0,0 +1,226 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMUtils; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.picard.QJumper; + +public class GetBamRecords { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + List records; + + private String position; + + private int exitStatus; + private static QLogger logger; + + + public int engage() throws Exception { + + logger.info("Setting up the QJumper"); + QJumper jumper = new QJumper(); + jumper.setupReader(cmdLineInputFiles[0]); + + String contig = position.substring(0, position.indexOf(":")); + int start = Integer.parseInt(position.substring(position.indexOf(":")+1)); + + logger.info("config: " + contig); + logger.info("start: " + start); + + records = jumper.getOverlappingRecordsAtPosition(contig, start, start); + + logger.info("unfiltered read count: " + records.size()+ ""); + + int filteredCount = 0, readsWithBaseAtPosition = 0, duplicateCount = 0, properlyPaired = 0,properlyPairedAll = 0, pairedAll = 0, paired = 0, notPrimaryAlignment = 0, unmapped = 0; + String qualityString = "", qualityPhredString = ""; + String baseString = ""; + int unmappedSecondaryDuplicates = 0, unmappedSecondaryDuplicatesProperly = 0; + + char[] novelStartBases = new char[1024]; // hmmmmm + Set forwardStrand = new HashSet(); + Set reverseStrand = new HashSet(); + int j = 0; + + for (SAMRecord rec : records) { + int readPosition = org.qcmg.picard.util.SAMUtils.getIndexInReadFromPosition(rec, start); + if (readPosition >= 0 && readPosition < rec.getReadLength()) { + char c = rec.getReadString().charAt(readPosition); + if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { + readsWithBaseAtPosition++; + if (rec.getDuplicateReadFlag()) { + duplicateCount++; + } else { + byte [] baseQuals = SAMUtils.fastqToPhred(rec.getBaseQualityString()); + qualityPhredString +=baseQuals[readPosition] + ","; + baseString += (rec.getReadNegativeStrandFlag() ? Character.toLowerCase(c) : c) + ""; +// baseString += c + ","; + qualityString +=rec.getBaseQualityString().charAt(readPosition) + ""; + + if (rec.getMappingQuality() >= 10 && rec.getBaseQualities()[readPosition] >= 10) { + if (rec.getReadNegativeStrandFlag()) { + if (reverseStrand.add(rec.getAlignmentStart())) { + novelStartBases[j++] = c; + } + } else { + if (forwardStrand.add(rec.getAlignmentStart())) { + novelStartBases[j++] = c; + } + } + } + } + } + + if (rec.getReadPairedFlag()) { + paired++; + if ( rec.getProperPairFlag()) properlyPaired++; + + } + if (rec.getReadUnmappedFlag()) unmapped++; + if (rec.getReadUnmappedFlag()) unmapped++; + if (rec.getNotPrimaryAlignmentFlag()) notPrimaryAlignment++; + + + if ( ! rec.getDuplicateReadFlag() && ! rec.getNotPrimaryAlignmentFlag() && ! rec.getReadUnmappedFlag()) + unmappedSecondaryDuplicates++; + if ( ! rec.getDuplicateReadFlag() && ! rec.getNotPrimaryAlignmentFlag() && ! rec.getReadUnmappedFlag() + && (rec.getReadPairedFlag() ? rec.getProperPairFlag() : true)) +// && (rec.getReadPairedFlag() && rec.getProperPairFlag())) + unmappedSecondaryDuplicatesProperly++; + } + + if (rec.getReadPairedFlag()) { + pairedAll++; + if (rec.getProperPairFlag()) properlyPairedAll++; + } + + if (BAMPileupUtil.eligibleSamRecord(rec)) { + ++filteredCount; + logger.info("***" + rec.getSAMString()); + } else logger.info(rec.getSAMString()); + + + + } + + + logger.info("SUMMARY: "); + logger.info("Total no of records: " + records.size() ); + logger.info("No of records with a base at position: " + readsWithBaseAtPosition); + logger.info("No of duplicate records (that have a base at position): " + duplicateCount); + logger.info("No of unique records (that have a base at position): " + (readsWithBaseAtPosition-duplicateCount)); + logger.info("No of unique paired records (that have a base at position): " + paired); + logger.info("No of unique properly paired records (that have a base at position): " + properlyPaired); + logger.info("No of records not primary aligned (that have a base at position): " + notPrimaryAlignment); + logger.info("No of records not mapped (that have a base at position): " + unmapped); + logger.info("unmappedSecondaryDuplicates (that have a base at position): " + unmappedSecondaryDuplicates); + logger.info("unmappedSecondaryDuplicatesProperly (that have a base at position): " + unmappedSecondaryDuplicatesProperly); + logger.info("No of paired records (all): " + pairedAll); + logger.info("No of properly paired records (all): " + properlyPairedAll); + logger.info("Unique record bases: " + baseString.substring(0,baseString.length() > 0 ? baseString.length() : 0)); + logger.info("Unique record base qualities: " + qualityString.substring(0,qualityString.length() > 0 ? qualityString.length() : 0)); + logger.info("Unique record base qualities (phred): " + qualityPhredString.substring(0,qualityPhredString.length() > 0 ? qualityPhredString.length() : 0)); + logger.info("filtered read count: " + filteredCount + " out of " + records.size() ); + logger.info("Novel start bases: " + new String(novelStartBases)); + + jumper.closeReader(); + + writeToFile(); + + return exitStatus; + } + + private void writeToFile() { + if (null != cmdLineOutputFiles && cmdLineOutputFiles.length == 1) { + try (FileWriter writer = new FileWriter(cmdLineOutputFiles[0]);){ + for (SAMRecord rec : records) { + writer.write(rec.getSAMString()); + } + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + } + + + + public static void main(String[] args) throws Exception { + GetBamRecords sp = new GetBamRecords(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(GetBamRecords.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("GetBamRecords", GetBamRecords.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + position = options.getPosition(); + position = options.getPosition(); + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/GetInsetSize.java b/qmule/src/org/qcmg/qmule/GetInsetSize.java new file mode 100644 index 000000000..44d5cc8c6 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/GetInsetSize.java @@ -0,0 +1,35 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; + +import org.qcmg.picard.SAMFileReaderFactory; + +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +public class GetInsetSize { + public static void main(String[] args) throws Exception{ + + File input = new File(args[0]); + SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); //new SAMFileReader(input); + int min =3000; + int max = 0; + String aaa = "AAA"; + for( SAMRecord record : reader){ + + if(record.getAttribute("XC").equals(aaa)){ + int size = Math.abs( record.getInferredInsertSize()); + if(size > max) max = size; + if(size < min) min = size; + } + } + reader.close(); + System.out.println(String.format("Insert range %d-%d\n", min, max)); + } + +} diff --git a/qmule/src/org/qcmg/qmule/GetInsetSize.java-- b/qmule/src/org/qcmg/qmule/GetInsetSize.java-- new file mode 100644 index 000000000..44d5cc8c6 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/GetInsetSize.java-- @@ -0,0 +1,35 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; + +import org.qcmg.picard.SAMFileReaderFactory; + +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +public class GetInsetSize { + public static void main(String[] args) throws Exception{ + + File input = new File(args[0]); + SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); //new SAMFileReader(input); + int min =3000; + int max = 0; + String aaa = "AAA"; + for( SAMRecord record : reader){ + + if(record.getAttribute("XC").equals(aaa)){ + int size = Math.abs( record.getInferredInsertSize()); + if(size > max) max = size; + if(size < min) min = size; + } + } + reader.close(); + System.out.println(String.format("Insert range %d-%d\n", min, max)); + } + +} diff --git a/qmule/src/org/qcmg/qmule/IndelDCCHeader.java b/qmule/src/org/qcmg/qmule/IndelDCCHeader.java new file mode 100644 index 000000000..408ef9027 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/IndelDCCHeader.java @@ -0,0 +1,395 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; + +import htsjdk.samtools.SAMFileHeader; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.meta.QDccMeta; +import org.qcmg.common.meta.QLimsMeta; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.util.QDccMetaFactory; +import org.qcmg.picard.util.QLimsMetaFactory; + +public class IndelDCCHeader { + + private String logFile; + private File somaticOutputFile; + private File germlineOutputFile; + private String mode; + private File normalBam; + private File tumourBam; + private String uuid; + private boolean qexecPresent = false; + private ArrayList qexec = new ArrayList(); + private boolean completeHeaderPresent = false; + private File somaticFile; + private File germlineFile; + private String tumourSampleId; + private String normalSampleId; + private static QLogger logger; + + public void setup(String args[]) throws Exception{ + + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(IndelDCCHeader.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("IndelDCCHeader", IndelDCCHeader.class.getPackage().getImplementationVersion(), args); + + // get list of file names + String[] cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 2) { + throw new QMuleException("INSUFFICIENT_INPUT_FILES"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + somaticFile = new File(cmdLineInputFiles[0]); + germlineFile = new File(cmdLineInputFiles[1]); + tumourBam = new File(options.getTumour()); + normalBam = new File(options.getNormal()); + + if ( ! FileUtils.canFileBeRead(tumourBam)) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , tumourBam.getAbsolutePath()); + } + if ( ! FileUtils.canFileBeRead(normalBam)) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , tumourBam.getAbsolutePath()); + } + + String[] cmdLineOutputFiles = options.getOutputFileNames(); + + somaticOutputFile = new File(cmdLineOutputFiles[0]); + germlineOutputFile = new File(cmdLineOutputFiles[1]); + + if (cmdLineOutputFiles.length != 2) { + throw new QMuleException("TOO_MANY_OUTPUTFILE"); + } + if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); + } + for (String file : cmdLineOutputFiles) { + if (new File(file).exists()) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); + } + } + + mode = options.getMode(); + + if (mode == null || (!mode.equals("pindel") && !mode.equals("gatk"))) { + throw new QMuleException("MODE_ERROR", mode); + } + + logger.info("Somatic input DCC: " + somaticFile.getAbsolutePath()); + logger.info("Germline input DCC: " + germlineFile.getAbsolutePath()); + logger.info("Output DCC: " + somaticOutputFile.getAbsolutePath()); + logger.info("Output DCC: " + germlineOutputFile.getAbsolutePath()); + logger.info("Tumour bam: " + tumourBam.getAbsolutePath()); + logger.info("Normal bam: " + normalBam.getAbsolutePath()); + logger.info("Mode: " + mode); + + } + } + + public int annotate() throws Exception { + //double check to make sure that uuid isn't already present + checkForUUid(); + + StringBuilder header = new StringBuilder(); + if (completeHeaderPresent) { + logger.info("UUid already present in header. No annotation is taking place"); + } else if (qexecPresent){ + StringBuilder sb = new StringBuilder(); + for (String s: qexec) { + sb.append(s + "\n"); + } + header.append(sb.toString()); + header.append(getDCCMeta()); + QLimsMeta tumour = QLimsMetaFactory.getLimsMeta("TEST", tumourBam.getAbsolutePath()); + tumourSampleId = tumour.getSample(); + header.append(tumour.getLimsMetaDataToString()); + QLimsMeta normal = QLimsMetaFactory.getLimsMeta("CONTROL", normalBam.getAbsolutePath()); + normalSampleId = normal.getSample(); + header.append(normal.getLimsMetaDataToString()); + //write somatic + writeOutputFile(header.toString(), somaticFile, somaticOutputFile, false); + //write germline + writeOutputFile(header.toString(), germlineFile, germlineOutputFile, true); + } + + return 0; + } + + public File getSomaticOutputFile() { + return somaticOutputFile; + } + + public void setSomaticOutputFile(File somaticOutputFile) { + this.somaticOutputFile = somaticOutputFile; + } + + public File getGermlineOutputFile() { + return germlineOutputFile; + } + + public void setGermlineOutputFile(File germlineOutputFile) { + this.germlineOutputFile = germlineOutputFile; + } + + public File getSomaticFile() { + return somaticFile; + } + + public void setSomaticFile(File somaticFile) { + this.somaticFile = somaticFile; + } + + public File getGermlineFile() { + return germlineFile; + } + + public void setGermlineFile(File germlineFile) { + this.germlineFile = germlineFile; + } + + public boolean isQexecPresent() { + return qexecPresent; + } + + public void setQexecPresent(boolean qexecPresent) { + this.qexecPresent = qexecPresent; + } + + public ArrayList getQexec() { + return qexec; + } + + public void setQexec(ArrayList qexec) { + this.qexec = qexec; + } + + public boolean isCompleteHeaderPresent() { + return completeHeaderPresent; + } + + public void setCompleteHeaderPresent(boolean completeHeaderPresent) { + this.completeHeaderPresent = completeHeaderPresent; + } + + public void checkForUUid() throws IOException, QMuleException { + BufferedReader reader = new BufferedReader(new FileReader(somaticFile)); + + String line; + boolean ddcMeta = false; + boolean uuidHere = false; + boolean uuidInResults = false; + qexec = new ArrayList(); + while((line = reader.readLine()) != null) { + if (line.startsWith("#") || line.startsWith("analysis")) { + if (line.contains("Uuid") || line.contains("uuid")) { + uuidHere = true; + } + if (line.startsWith("#Q_EXEC")) { + qexec.add(line); + } + if (line.startsWith("#Q_DCCMETA")) { + ddcMeta = true; + } + } else { + String[] values = line.split("\t"); + if (isCorrectUuidFormat(values[0])) { + uuidInResults = true; + } + } + } + reader.close(); + if (ddcMeta && uuidHere && uuidInResults) { + logger.info("Complete header already present."); + completeHeaderPresent = true; + } else if (uuidHere && qexec.size() == 14) { + qexecPresent = true; + logger.info("QExec header and uuid present."); + String q = ""; + for (String s: qexec) { + if (s.contains("Uuid")) { + q = s.replace("-", "_"); + String potentialUuid = s.split("\t")[2].replace("-", "_"); + if (isCorrectUuidFormat(potentialUuid)) { + uuid = potentialUuid; + } else { + logger.info("UUid was not correct format: " + potentialUuid); + throw new QMuleException("UUID_ERROR"); + } + } + } + qexec.remove(0); + qexec.add(0, q); + } else { + logger.info("Could not determine if UUid and DCC header is present"); + throw new QMuleException("UUID_ERROR"); + } + } + + public boolean isCorrectUuidFormat(String potentialUuid) { + if (potentialUuid.length() == 36 && potentialUuid.split("_").length == 5) { + return true; + } + return false; + } + + public String getDCCMeta() throws Exception { + SAMFileHeader tHeader = SAMFileReaderFactory.createSAMFileReader(tumourBam).getFileHeader(); + SAMFileHeader nHeader = SAMFileReaderFactory.createSAMFileReader(normalBam).getFileHeader(); + QDccMeta meta; + + meta = QDccMetaFactory.getDccMeta(uuid, nHeader, tHeader, mode); + return meta.getDCCMetaDataToString(); + } + + public void writeOutputFile(String header, File inputFile, File outputFile, boolean isGermline) throws IOException { + BufferedReader reader = new BufferedReader(new FileReader(inputFile)); + BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile)); + + if (!completeHeaderPresent) { + writer.write(header); + } + + String line; + while((line = reader.readLine()) != null) { + if (!line.startsWith("#") && !line.startsWith("analysis") && !completeHeaderPresent) { + writer.write(replaceIdsInLine(line, isGermline) + "\n"); + } else { + if (qexecPresent && !line.startsWith("#Q_EXEC")) { + writer.write(line + "\n"); + } + } + } + reader.close(); + writer.close(); + } + + public String getTumourSampleId() { + return tumourSampleId; + } + + public void setTumourSampleId(String tumourSampleId) { + this.tumourSampleId = tumourSampleId; + } + + public String getNormalSampleId() { + return normalSampleId; + } + + public void setNormalSampleId(String normalSampleId) { + this.normalSampleId = normalSampleId; + } + + public String replaceIdsInLine(String line, boolean isGermline) { + String[] values = line.split("\t"); + + StringBuilder sb = new StringBuilder(); + for (int i=0; i< values.length; i++) { + if (i==0 && !completeHeaderPresent) { + sb.append(uuid + "\t"); + } else if (i==1 && !completeHeaderPresent){ + if (isGermline) { + sb.append(normalSampleId + "\t"); + } else { + sb.append(tumourSampleId + "\t"); + } + } else if (i==2 && !completeHeaderPresent) { + String[] mutationStrs = values[i].split("_"); + String count = "_" + mutationStrs[mutationStrs.length-1]; + if (isGermline) { + sb.append(uuid + "_" + normalSampleId + count + "\t"); + } else { + sb.append(uuid + "_"+ tumourSampleId + count + "\t"); + } + } else { + sb.append(values[i] + "\t"); + } + } + return sb.toString(); + } + + public String getUuid() { + return uuid; + } + + public void setUuid(String uuid) { + this.uuid = uuid; + } + + public String getMode() { + return mode; + } + + public void setMode(String mode) { + this.mode = mode; + } + + public File getNormalBam() { + return normalBam; + } + + public void setNormalBam(File normalBam) { + this.normalBam = normalBam; + } + + public File getTumourBam() { + return tumourBam; + } + + public void setTumourBam(File tumourBam) { + this.tumourBam = tumourBam; + } + + public static void main(String[] args) throws Exception { + IndelDCCHeader sp = new IndelDCCHeader(); + LoadReferencedClasses.loadClasses(IndelDCCHeader.class); + sp.setup(args); + int exitStatus = sp.annotate(); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + +} diff --git a/qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- b/qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- new file mode 100644 index 000000000..408ef9027 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- @@ -0,0 +1,395 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; + +import htsjdk.samtools.SAMFileHeader; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.meta.QDccMeta; +import org.qcmg.common.meta.QLimsMeta; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.util.QDccMetaFactory; +import org.qcmg.picard.util.QLimsMetaFactory; + +public class IndelDCCHeader { + + private String logFile; + private File somaticOutputFile; + private File germlineOutputFile; + private String mode; + private File normalBam; + private File tumourBam; + private String uuid; + private boolean qexecPresent = false; + private ArrayList qexec = new ArrayList(); + private boolean completeHeaderPresent = false; + private File somaticFile; + private File germlineFile; + private String tumourSampleId; + private String normalSampleId; + private static QLogger logger; + + public void setup(String args[]) throws Exception{ + + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(IndelDCCHeader.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("IndelDCCHeader", IndelDCCHeader.class.getPackage().getImplementationVersion(), args); + + // get list of file names + String[] cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 2) { + throw new QMuleException("INSUFFICIENT_INPUT_FILES"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + somaticFile = new File(cmdLineInputFiles[0]); + germlineFile = new File(cmdLineInputFiles[1]); + tumourBam = new File(options.getTumour()); + normalBam = new File(options.getNormal()); + + if ( ! FileUtils.canFileBeRead(tumourBam)) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , tumourBam.getAbsolutePath()); + } + if ( ! FileUtils.canFileBeRead(normalBam)) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , tumourBam.getAbsolutePath()); + } + + String[] cmdLineOutputFiles = options.getOutputFileNames(); + + somaticOutputFile = new File(cmdLineOutputFiles[0]); + germlineOutputFile = new File(cmdLineOutputFiles[1]); + + if (cmdLineOutputFiles.length != 2) { + throw new QMuleException("TOO_MANY_OUTPUTFILE"); + } + if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); + } + for (String file : cmdLineOutputFiles) { + if (new File(file).exists()) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); + } + } + + mode = options.getMode(); + + if (mode == null || (!mode.equals("pindel") && !mode.equals("gatk"))) { + throw new QMuleException("MODE_ERROR", mode); + } + + logger.info("Somatic input DCC: " + somaticFile.getAbsolutePath()); + logger.info("Germline input DCC: " + germlineFile.getAbsolutePath()); + logger.info("Output DCC: " + somaticOutputFile.getAbsolutePath()); + logger.info("Output DCC: " + germlineOutputFile.getAbsolutePath()); + logger.info("Tumour bam: " + tumourBam.getAbsolutePath()); + logger.info("Normal bam: " + normalBam.getAbsolutePath()); + logger.info("Mode: " + mode); + + } + } + + public int annotate() throws Exception { + //double check to make sure that uuid isn't already present + checkForUUid(); + + StringBuilder header = new StringBuilder(); + if (completeHeaderPresent) { + logger.info("UUid already present in header. No annotation is taking place"); + } else if (qexecPresent){ + StringBuilder sb = new StringBuilder(); + for (String s: qexec) { + sb.append(s + "\n"); + } + header.append(sb.toString()); + header.append(getDCCMeta()); + QLimsMeta tumour = QLimsMetaFactory.getLimsMeta("TEST", tumourBam.getAbsolutePath()); + tumourSampleId = tumour.getSample(); + header.append(tumour.getLimsMetaDataToString()); + QLimsMeta normal = QLimsMetaFactory.getLimsMeta("CONTROL", normalBam.getAbsolutePath()); + normalSampleId = normal.getSample(); + header.append(normal.getLimsMetaDataToString()); + //write somatic + writeOutputFile(header.toString(), somaticFile, somaticOutputFile, false); + //write germline + writeOutputFile(header.toString(), germlineFile, germlineOutputFile, true); + } + + return 0; + } + + public File getSomaticOutputFile() { + return somaticOutputFile; + } + + public void setSomaticOutputFile(File somaticOutputFile) { + this.somaticOutputFile = somaticOutputFile; + } + + public File getGermlineOutputFile() { + return germlineOutputFile; + } + + public void setGermlineOutputFile(File germlineOutputFile) { + this.germlineOutputFile = germlineOutputFile; + } + + public File getSomaticFile() { + return somaticFile; + } + + public void setSomaticFile(File somaticFile) { + this.somaticFile = somaticFile; + } + + public File getGermlineFile() { + return germlineFile; + } + + public void setGermlineFile(File germlineFile) { + this.germlineFile = germlineFile; + } + + public boolean isQexecPresent() { + return qexecPresent; + } + + public void setQexecPresent(boolean qexecPresent) { + this.qexecPresent = qexecPresent; + } + + public ArrayList getQexec() { + return qexec; + } + + public void setQexec(ArrayList qexec) { + this.qexec = qexec; + } + + public boolean isCompleteHeaderPresent() { + return completeHeaderPresent; + } + + public void setCompleteHeaderPresent(boolean completeHeaderPresent) { + this.completeHeaderPresent = completeHeaderPresent; + } + + public void checkForUUid() throws IOException, QMuleException { + BufferedReader reader = new BufferedReader(new FileReader(somaticFile)); + + String line; + boolean ddcMeta = false; + boolean uuidHere = false; + boolean uuidInResults = false; + qexec = new ArrayList(); + while((line = reader.readLine()) != null) { + if (line.startsWith("#") || line.startsWith("analysis")) { + if (line.contains("Uuid") || line.contains("uuid")) { + uuidHere = true; + } + if (line.startsWith("#Q_EXEC")) { + qexec.add(line); + } + if (line.startsWith("#Q_DCCMETA")) { + ddcMeta = true; + } + } else { + String[] values = line.split("\t"); + if (isCorrectUuidFormat(values[0])) { + uuidInResults = true; + } + } + } + reader.close(); + if (ddcMeta && uuidHere && uuidInResults) { + logger.info("Complete header already present."); + completeHeaderPresent = true; + } else if (uuidHere && qexec.size() == 14) { + qexecPresent = true; + logger.info("QExec header and uuid present."); + String q = ""; + for (String s: qexec) { + if (s.contains("Uuid")) { + q = s.replace("-", "_"); + String potentialUuid = s.split("\t")[2].replace("-", "_"); + if (isCorrectUuidFormat(potentialUuid)) { + uuid = potentialUuid; + } else { + logger.info("UUid was not correct format: " + potentialUuid); + throw new QMuleException("UUID_ERROR"); + } + } + } + qexec.remove(0); + qexec.add(0, q); + } else { + logger.info("Could not determine if UUid and DCC header is present"); + throw new QMuleException("UUID_ERROR"); + } + } + + public boolean isCorrectUuidFormat(String potentialUuid) { + if (potentialUuid.length() == 36 && potentialUuid.split("_").length == 5) { + return true; + } + return false; + } + + public String getDCCMeta() throws Exception { + SAMFileHeader tHeader = SAMFileReaderFactory.createSAMFileReader(tumourBam).getFileHeader(); + SAMFileHeader nHeader = SAMFileReaderFactory.createSAMFileReader(normalBam).getFileHeader(); + QDccMeta meta; + + meta = QDccMetaFactory.getDccMeta(uuid, nHeader, tHeader, mode); + return meta.getDCCMetaDataToString(); + } + + public void writeOutputFile(String header, File inputFile, File outputFile, boolean isGermline) throws IOException { + BufferedReader reader = new BufferedReader(new FileReader(inputFile)); + BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile)); + + if (!completeHeaderPresent) { + writer.write(header); + } + + String line; + while((line = reader.readLine()) != null) { + if (!line.startsWith("#") && !line.startsWith("analysis") && !completeHeaderPresent) { + writer.write(replaceIdsInLine(line, isGermline) + "\n"); + } else { + if (qexecPresent && !line.startsWith("#Q_EXEC")) { + writer.write(line + "\n"); + } + } + } + reader.close(); + writer.close(); + } + + public String getTumourSampleId() { + return tumourSampleId; + } + + public void setTumourSampleId(String tumourSampleId) { + this.tumourSampleId = tumourSampleId; + } + + public String getNormalSampleId() { + return normalSampleId; + } + + public void setNormalSampleId(String normalSampleId) { + this.normalSampleId = normalSampleId; + } + + public String replaceIdsInLine(String line, boolean isGermline) { + String[] values = line.split("\t"); + + StringBuilder sb = new StringBuilder(); + for (int i=0; i< values.length; i++) { + if (i==0 && !completeHeaderPresent) { + sb.append(uuid + "\t"); + } else if (i==1 && !completeHeaderPresent){ + if (isGermline) { + sb.append(normalSampleId + "\t"); + } else { + sb.append(tumourSampleId + "\t"); + } + } else if (i==2 && !completeHeaderPresent) { + String[] mutationStrs = values[i].split("_"); + String count = "_" + mutationStrs[mutationStrs.length-1]; + if (isGermline) { + sb.append(uuid + "_" + normalSampleId + count + "\t"); + } else { + sb.append(uuid + "_"+ tumourSampleId + count + "\t"); + } + } else { + sb.append(values[i] + "\t"); + } + } + return sb.toString(); + } + + public String getUuid() { + return uuid; + } + + public void setUuid(String uuid) { + this.uuid = uuid; + } + + public String getMode() { + return mode; + } + + public void setMode(String mode) { + this.mode = mode; + } + + public File getNormalBam() { + return normalBam; + } + + public void setNormalBam(File normalBam) { + this.normalBam = normalBam; + } + + public File getTumourBam() { + return tumourBam; + } + + public void setTumourBam(File tumourBam) { + this.tumourBam = tumourBam; + } + + public static void main(String[] args) throws Exception { + IndelDCCHeader sp = new IndelDCCHeader(); + LoadReferencedClasses.loadClasses(IndelDCCHeader.class); + sp.setup(args); + int exitStatus = sp.annotate(); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + +} diff --git a/qmule/src/org/qcmg/qmule/MAF2DCC1.java b/qmule/src/org/qcmg/qmule/MAF2DCC1.java new file mode 100644 index 000000000..998a34a10 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/MAF2DCC1.java @@ -0,0 +1,418 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedFileWriter; +import org.qcmg.qmule.tab.TabbedHeader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class MAF2DCC1 { + + private String logFile; + private File mafFile; + private final List dccFiles = new ArrayList(); + private File outputDccFile; + private static QLogger logger; + private Map> mafRecords = new HashMap<>(); + private int inputMafRecordCount; + private int[] mafColumnIndexes; + private int[] dccColumnIndexes; + private String mode; + + + public String getLogFile() { + return logFile; + } + + public File getMafFile() { + return mafFile; + } + + public File getOutputDccFile() { + return outputDccFile; + } + + public Map> getMafRecords() { + return mafRecords; + } + + public void setMafRecords(Map> mafRecords) { + this.mafRecords = mafRecords; + } + + public int[] getMafColumnIndexes() { + return mafColumnIndexes; + } + + public void setMafColumnIndexes(int[] mafColumnIndexes) { + this.mafColumnIndexes = mafColumnIndexes; + } + + public int[] getDccColumnIndexes() { + return dccColumnIndexes; + } + + public void setDccColumnIndexes(int[] dccColumnIndexes) { + this.dccColumnIndexes = dccColumnIndexes; + } + + public String getMode() { + return mode; + } + + public void setMode(String mode) { + this.mode = mode; + } + + public int getInputMafRecordCount() { + return inputMafRecordCount; + } + + public List getDccFiles() { + return dccFiles; + } + + public void setup(String args[]) throws Exception{ + + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(MAF2DCC1.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("MAF2DCC1", MAF2DCC1.class.getPackage().getImplementationVersion(), args); + + // get list of file names + String[] cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 2) { + throw new QMuleException("INSUFFICIENT_INPUT_FILES"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + mafFile = new File(cmdLineInputFiles[0]); + + for (int i=1; i 0) { + logger.warn("Could not find matches for the following records: "); + for (ChrPosition key : mafRecords.keySet()) { + logger.info("Missing at positions: " + key.toString()); + } + throw new QMuleException("MISSING_DCC_RECORDS", Integer.toString(mafRecords.size())); + } + + if (countInMaf != inputMafRecordCount || mafRecords.size() > 0) { + throw new QMuleException("COUNT_ERROR", Integer.toString(countInMaf), Integer.toString(inputMafRecordCount)); + } + + logger.info("Added " + countInMaf + " records to the dcc1 output file"); + + return 0; + } + + private void readMafFile() throws Exception { + TabbedFileReader reader = new TabbedFileReader(mafFile); + try { + int count = 0; + boolean checkForMissingColumnIndex = true; + for (TabbedRecord rec : reader) { + count++; + //header + if (rec.getData().startsWith("Hugo")) { + mafColumnIndexes = findColumnIndexesFromHeader(rec); + } else { + // only need to do this once + if (checkForMissingColumnIndex) { + if (missingColumnIndex(mafColumnIndexes)) { + throw new QMuleException("NO_COLUMN_INDEX", mafFile.getAbsolutePath()); + } + checkForMissingColumnIndex = false; + } + addToMafRecordMap(rec, count); + inputMafRecordCount++; + } + } + + logger.info("Number of input maf records: " + inputMafRecordCount); + + } finally { + reader.close(); + } + } + + private int compare(File dccFile, int count, TabbedFileWriter writer) throws Exception { + logger.info("Looking in dcc file: " + dccFile.getAbsolutePath()); + int countInMaf = 0; + int total = 0; + boolean checkForMissingColumnIndex = true; + + try (TabbedFileReader reader = new TabbedFileReader(dccFile);) { + if (count == 1) { + TabbedHeader header = reader.getHeader(); + writer.addHeader(header); + } + for (TabbedRecord rec : reader) { + //header + + if (rec.getData().startsWith("analysis_id")) { + //mutation id column + dccColumnIndexes = findColumnIndexesFromHeader(rec); + if (count == 1) { + writer.add(rec); + } + } else { + total++; + if (total % 10000 == 0) { + logger.info("Processed: " + total + " dcc records" ); + } + if (checkForMissingColumnIndex) { + if (missingColumnIndex(mafColumnIndexes)) { + throw new QMuleException("NO_MUTATION_ID", dccFile.getAbsolutePath()); + } + checkForMissingColumnIndex = false; + } + String[] strArray = rec.getDataArray(); + String chr = strArray[dccColumnIndexes[0]].replace("chr", ""); + if (chr.equals("M")) { + chr += "T"; + } + ChrPosition chrPos = new ChrRangePosition(chr, Integer.valueOf(strArray[dccColumnIndexes[1]]), Integer.valueOf(strArray[dccColumnIndexes[2]])); + if (recordInMaf(chrPos, rec)) { + writer.add(rec); + countInMaf++; + } + } + } + } + logger.info("Finished looking in dcc file: " + dccFile.getAbsolutePath() + " found " + countInMaf + " maf record/s." ); + return countInMaf; + } + + public void addToMafRecordMap(TabbedRecord rec, int count) throws QMuleException { + String[] strArray = rec.getDataArray(); + + //need to screw around with chr1 vs 1 vs chrMT vs chrM + String chr = strArray[mafColumnIndexes[0]].replace("chr", ""); + + if (chr.equals("M")) { + chr += "T"; + } + ChrPosition chrPos = new ChrRangePosition(chr, Integer.valueOf(strArray[mafColumnIndexes[1]]), Integer.valueOf(strArray[mafColumnIndexes[2]])); + + List recordsAtThisPosition = mafRecords.get(chrPos); + if (null == recordsAtThisPosition) { + recordsAtThisPosition = new ArrayList(2); + mafRecords.put(chrPos, recordsAtThisPosition); + } + recordsAtThisPosition.add(rec); + + } + + public boolean missingColumnIndex(int[] columnIndexes) throws QMuleException { + for (int i =0; i< columnIndexes.length; i++) { + if (columnIndexes[i] == -1) { + throw new QMuleException("NO_COLUMN_INDEX"); + } + } + return false; + } + + public int[] findColumnIndexesFromHeader(TabbedRecord rec) { + int[] mutationColumns = {-1, -1, -1, -1, -1, -1}; + String[] strArray = rec.getDataArray(); + for (int i=0; i recordsAtThisPosition = mafRecords.get(dccChrPos); + if (null != recordsAtThisPosition && ! recordsAtThisPosition.isEmpty()) { + + if (recordsAtThisPosition.size() > 1) { + logger.info("more than 1 record for position: " + dccChrPos); + } + + // check to see if any of the records match our dccRec + List recordsToRemove = new ArrayList<>(2); + + for (TabbedRecord tr : recordsAtThisPosition) { + if (matchOtherColumns(tr, dccRec)) { + matches++; + if (matches > 1) { + throw new QMuleException("T0O_MANY_MATCHES", dccChrPos.toString()); + } + + // remove record from array + recordsToRemove.add(tr); + matchFound = true; + } + } + + // remove records that have been matched + recordsAtThisPosition.removeAll(recordsToRemove); + + // check to see if there are any records left, if not, remove entry from map + if (recordsAtThisPosition.isEmpty()) { + mafRecords.remove(dccChrPos); + } + } + + return matchFound; + } + + public boolean matchOtherColumns(TabbedRecord mafRec, TabbedRecord dccRec) { + String[] mafValues = mafRec.getDataArray(); + String[] dccValues = dccRec.getDataArray(); + + if (mode.equals("snp")) { + if (matchingMutation(mafValues[mafColumnIndexes[3]], dccValues[dccColumnIndexes[3]])) { + return true; + } + } + if (mode.equals("indel")) { + if (matchingMutation(mafValues[mafColumnIndexes[3]], dccValues[dccColumnIndexes[3]]) && + mafValues[mafColumnIndexes[4]].equals(dccValues[dccColumnIndexes[4]]) && + mafValues[mafColumnIndexes[5]].equals(dccValues[dccColumnIndexes[5]])) { + return true; + } + } + + + return false; + } + + public boolean matchingMutation(String mafMutation, String dccMutation) { + if ((mafMutation.equals("SNP") && dccMutation.equals("1")) || + (mafMutation.equals("INS") && dccMutation.equals("2")) || + (mafMutation.equals("DEL") && dccMutation.equals("3"))) { + return true; + } + return false; + } + + public boolean match(ChrPosition mafChrPos, ChrPosition dccChrPos) { + if (mafChrPos.getChromosome().equals(dccChrPos.getChromosome()) + && mafChrPos.getStartPosition() == dccChrPos.getStartPosition() + && mafChrPos.getEndPosition() == dccChrPos.getEndPosition()) { + return true; + } + return false; + } + + + public static void main(String[] args) throws Exception { + MAF2DCC1 sp = new MAF2DCC1(); + LoadReferencedClasses.loadClasses(MAF2DCC1.class); + sp.setup(args); + + int exitStatus = sp.annotate(); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + +} diff --git a/qmule/src/org/qcmg/qmule/MAF2DCC1.java-- b/qmule/src/org/qcmg/qmule/MAF2DCC1.java-- new file mode 100644 index 000000000..998a34a10 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/MAF2DCC1.java-- @@ -0,0 +1,418 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedFileWriter; +import org.qcmg.qmule.tab.TabbedHeader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class MAF2DCC1 { + + private String logFile; + private File mafFile; + private final List dccFiles = new ArrayList(); + private File outputDccFile; + private static QLogger logger; + private Map> mafRecords = new HashMap<>(); + private int inputMafRecordCount; + private int[] mafColumnIndexes; + private int[] dccColumnIndexes; + private String mode; + + + public String getLogFile() { + return logFile; + } + + public File getMafFile() { + return mafFile; + } + + public File getOutputDccFile() { + return outputDccFile; + } + + public Map> getMafRecords() { + return mafRecords; + } + + public void setMafRecords(Map> mafRecords) { + this.mafRecords = mafRecords; + } + + public int[] getMafColumnIndexes() { + return mafColumnIndexes; + } + + public void setMafColumnIndexes(int[] mafColumnIndexes) { + this.mafColumnIndexes = mafColumnIndexes; + } + + public int[] getDccColumnIndexes() { + return dccColumnIndexes; + } + + public void setDccColumnIndexes(int[] dccColumnIndexes) { + this.dccColumnIndexes = dccColumnIndexes; + } + + public String getMode() { + return mode; + } + + public void setMode(String mode) { + this.mode = mode; + } + + public int getInputMafRecordCount() { + return inputMafRecordCount; + } + + public List getDccFiles() { + return dccFiles; + } + + public void setup(String args[]) throws Exception{ + + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(MAF2DCC1.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("MAF2DCC1", MAF2DCC1.class.getPackage().getImplementationVersion(), args); + + // get list of file names + String[] cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 2) { + throw new QMuleException("INSUFFICIENT_INPUT_FILES"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + mafFile = new File(cmdLineInputFiles[0]); + + for (int i=1; i 0) { + logger.warn("Could not find matches for the following records: "); + for (ChrPosition key : mafRecords.keySet()) { + logger.info("Missing at positions: " + key.toString()); + } + throw new QMuleException("MISSING_DCC_RECORDS", Integer.toString(mafRecords.size())); + } + + if (countInMaf != inputMafRecordCount || mafRecords.size() > 0) { + throw new QMuleException("COUNT_ERROR", Integer.toString(countInMaf), Integer.toString(inputMafRecordCount)); + } + + logger.info("Added " + countInMaf + " records to the dcc1 output file"); + + return 0; + } + + private void readMafFile() throws Exception { + TabbedFileReader reader = new TabbedFileReader(mafFile); + try { + int count = 0; + boolean checkForMissingColumnIndex = true; + for (TabbedRecord rec : reader) { + count++; + //header + if (rec.getData().startsWith("Hugo")) { + mafColumnIndexes = findColumnIndexesFromHeader(rec); + } else { + // only need to do this once + if (checkForMissingColumnIndex) { + if (missingColumnIndex(mafColumnIndexes)) { + throw new QMuleException("NO_COLUMN_INDEX", mafFile.getAbsolutePath()); + } + checkForMissingColumnIndex = false; + } + addToMafRecordMap(rec, count); + inputMafRecordCount++; + } + } + + logger.info("Number of input maf records: " + inputMafRecordCount); + + } finally { + reader.close(); + } + } + + private int compare(File dccFile, int count, TabbedFileWriter writer) throws Exception { + logger.info("Looking in dcc file: " + dccFile.getAbsolutePath()); + int countInMaf = 0; + int total = 0; + boolean checkForMissingColumnIndex = true; + + try (TabbedFileReader reader = new TabbedFileReader(dccFile);) { + if (count == 1) { + TabbedHeader header = reader.getHeader(); + writer.addHeader(header); + } + for (TabbedRecord rec : reader) { + //header + + if (rec.getData().startsWith("analysis_id")) { + //mutation id column + dccColumnIndexes = findColumnIndexesFromHeader(rec); + if (count == 1) { + writer.add(rec); + } + } else { + total++; + if (total % 10000 == 0) { + logger.info("Processed: " + total + " dcc records" ); + } + if (checkForMissingColumnIndex) { + if (missingColumnIndex(mafColumnIndexes)) { + throw new QMuleException("NO_MUTATION_ID", dccFile.getAbsolutePath()); + } + checkForMissingColumnIndex = false; + } + String[] strArray = rec.getDataArray(); + String chr = strArray[dccColumnIndexes[0]].replace("chr", ""); + if (chr.equals("M")) { + chr += "T"; + } + ChrPosition chrPos = new ChrRangePosition(chr, Integer.valueOf(strArray[dccColumnIndexes[1]]), Integer.valueOf(strArray[dccColumnIndexes[2]])); + if (recordInMaf(chrPos, rec)) { + writer.add(rec); + countInMaf++; + } + } + } + } + logger.info("Finished looking in dcc file: " + dccFile.getAbsolutePath() + " found " + countInMaf + " maf record/s." ); + return countInMaf; + } + + public void addToMafRecordMap(TabbedRecord rec, int count) throws QMuleException { + String[] strArray = rec.getDataArray(); + + //need to screw around with chr1 vs 1 vs chrMT vs chrM + String chr = strArray[mafColumnIndexes[0]].replace("chr", ""); + + if (chr.equals("M")) { + chr += "T"; + } + ChrPosition chrPos = new ChrRangePosition(chr, Integer.valueOf(strArray[mafColumnIndexes[1]]), Integer.valueOf(strArray[mafColumnIndexes[2]])); + + List recordsAtThisPosition = mafRecords.get(chrPos); + if (null == recordsAtThisPosition) { + recordsAtThisPosition = new ArrayList(2); + mafRecords.put(chrPos, recordsAtThisPosition); + } + recordsAtThisPosition.add(rec); + + } + + public boolean missingColumnIndex(int[] columnIndexes) throws QMuleException { + for (int i =0; i< columnIndexes.length; i++) { + if (columnIndexes[i] == -1) { + throw new QMuleException("NO_COLUMN_INDEX"); + } + } + return false; + } + + public int[] findColumnIndexesFromHeader(TabbedRecord rec) { + int[] mutationColumns = {-1, -1, -1, -1, -1, -1}; + String[] strArray = rec.getDataArray(); + for (int i=0; i recordsAtThisPosition = mafRecords.get(dccChrPos); + if (null != recordsAtThisPosition && ! recordsAtThisPosition.isEmpty()) { + + if (recordsAtThisPosition.size() > 1) { + logger.info("more than 1 record for position: " + dccChrPos); + } + + // check to see if any of the records match our dccRec + List recordsToRemove = new ArrayList<>(2); + + for (TabbedRecord tr : recordsAtThisPosition) { + if (matchOtherColumns(tr, dccRec)) { + matches++; + if (matches > 1) { + throw new QMuleException("T0O_MANY_MATCHES", dccChrPos.toString()); + } + + // remove record from array + recordsToRemove.add(tr); + matchFound = true; + } + } + + // remove records that have been matched + recordsAtThisPosition.removeAll(recordsToRemove); + + // check to see if there are any records left, if not, remove entry from map + if (recordsAtThisPosition.isEmpty()) { + mafRecords.remove(dccChrPos); + } + } + + return matchFound; + } + + public boolean matchOtherColumns(TabbedRecord mafRec, TabbedRecord dccRec) { + String[] mafValues = mafRec.getDataArray(); + String[] dccValues = dccRec.getDataArray(); + + if (mode.equals("snp")) { + if (matchingMutation(mafValues[mafColumnIndexes[3]], dccValues[dccColumnIndexes[3]])) { + return true; + } + } + if (mode.equals("indel")) { + if (matchingMutation(mafValues[mafColumnIndexes[3]], dccValues[dccColumnIndexes[3]]) && + mafValues[mafColumnIndexes[4]].equals(dccValues[dccColumnIndexes[4]]) && + mafValues[mafColumnIndexes[5]].equals(dccValues[dccColumnIndexes[5]])) { + return true; + } + } + + + return false; + } + + public boolean matchingMutation(String mafMutation, String dccMutation) { + if ((mafMutation.equals("SNP") && dccMutation.equals("1")) || + (mafMutation.equals("INS") && dccMutation.equals("2")) || + (mafMutation.equals("DEL") && dccMutation.equals("3"))) { + return true; + } + return false; + } + + public boolean match(ChrPosition mafChrPos, ChrPosition dccChrPos) { + if (mafChrPos.getChromosome().equals(dccChrPos.getChromosome()) + && mafChrPos.getStartPosition() == dccChrPos.getStartPosition() + && mafChrPos.getEndPosition() == dccChrPos.getEndPosition()) { + return true; + } + return false; + } + + + public static void main(String[] args) throws Exception { + MAF2DCC1 sp = new MAF2DCC1(); + LoadReferencedClasses.loadClasses(MAF2DCC1.class); + sp.setup(args); + + int exitStatus = sp.annotate(); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + +} diff --git a/qmule/src/org/qcmg/qmule/Main.java b/qmule/src/org/qcmg/qmule/Main.java new file mode 100644 index 000000000..fc7560b17 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/Main.java @@ -0,0 +1,100 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + + + +/** + * The entry point for the command-line SAM/BAM merging tool. + */ +public final class Main { + +// enum Tool { +// GetBamRecords("org.qcmg.qmule.GetBamRecords"); +//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"), +//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"), +//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"); +// +//// private final String name; +// private final String fullyQualifiedName; +// +// private Tool(String fullyQualifiedName) { +//// this.name = name; +// this.fullyQualifiedName = fullyQualifiedName; +// } +// +// public String getFullyQualifiedName() { +// return fullyQualifiedName; +// } +// public static Tool getTool(String name) { +// for (Tool t : Tool.values()) { +// if (name.equals(t.name())) return t; +// } +// throw new IllegalArgumentException("Tool not found: " + name); +// } +// } + + /** + * Performs a single merge based on the supplied arguments. Errors will + * terminate the merge and display error and usage messages. + * + * @param args + * the command-line arguments. + * @throws ClassNotFoundException + */ + public static void main(final String[] args) throws ClassNotFoundException { + Options options = null; + try { + options = new Options(args); + } catch (Exception e) { + e.printStackTrace(); + } + System.out.println(Messages.USAGE); + try { + options.displayHelp(); + } catch (Exception e) { + e.printStackTrace(); + } + +// String toolName = options.getToolName(); +// Tool t = Tool.getTool(toolName); +// Class tool = Class.forName(t.getFullyQualifiedName()); +// System.out.println("Class: " + tool.getCanonicalName()); +// // Create the array of Argument Types +// Class[] argTypes = { args.getClass()}; // array is Object! +// // Now find the method +// Method m = null; +// try { +// m = tool.getMethod("main", argTypes); +// } catch (SecurityException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } catch (NoSuchMethodException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } +// System.out.println(m); +// +// // Create the actual argument array +// Object passedArgv[] = { args }; +// +// // Now invoke the method. +// try { +// m.invoke(null, passedArgv); +// } catch (IllegalArgumentException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } catch (IllegalAccessException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } catch (InvocationTargetException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } + +//) Method m = tool.getMethod("main", Object.class); +// m.iinvoke(args); + System.exit(0); + } +} diff --git a/qmule/src/org/qcmg/qmule/Main.java-- b/qmule/src/org/qcmg/qmule/Main.java-- new file mode 100644 index 000000000..fc7560b17 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/Main.java-- @@ -0,0 +1,100 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + + + +/** + * The entry point for the command-line SAM/BAM merging tool. + */ +public final class Main { + +// enum Tool { +// GetBamRecords("org.qcmg.qmule.GetBamRecords"); +//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"), +//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"), +//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"); +// +//// private final String name; +// private final String fullyQualifiedName; +// +// private Tool(String fullyQualifiedName) { +//// this.name = name; +// this.fullyQualifiedName = fullyQualifiedName; +// } +// +// public String getFullyQualifiedName() { +// return fullyQualifiedName; +// } +// public static Tool getTool(String name) { +// for (Tool t : Tool.values()) { +// if (name.equals(t.name())) return t; +// } +// throw new IllegalArgumentException("Tool not found: " + name); +// } +// } + + /** + * Performs a single merge based on the supplied arguments. Errors will + * terminate the merge and display error and usage messages. + * + * @param args + * the command-line arguments. + * @throws ClassNotFoundException + */ + public static void main(final String[] args) throws ClassNotFoundException { + Options options = null; + try { + options = new Options(args); + } catch (Exception e) { + e.printStackTrace(); + } + System.out.println(Messages.USAGE); + try { + options.displayHelp(); + } catch (Exception e) { + e.printStackTrace(); + } + +// String toolName = options.getToolName(); +// Tool t = Tool.getTool(toolName); +// Class tool = Class.forName(t.getFullyQualifiedName()); +// System.out.println("Class: " + tool.getCanonicalName()); +// // Create the array of Argument Types +// Class[] argTypes = { args.getClass()}; // array is Object! +// // Now find the method +// Method m = null; +// try { +// m = tool.getMethod("main", argTypes); +// } catch (SecurityException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } catch (NoSuchMethodException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } +// System.out.println(m); +// +// // Create the actual argument array +// Object passedArgv[] = { args }; +// +// // Now invoke the method. +// try { +// m.invoke(null, passedArgv); +// } catch (IllegalArgumentException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } catch (IllegalAccessException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } catch (InvocationTargetException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } + +//) Method m = tool.getMethod("main", Object.class); +// m.iinvoke(args); + System.exit(0); + } +} diff --git a/qmule/src/org/qcmg/qmule/Messages.java b/qmule/src/org/qcmg/qmule/Messages.java new file mode 100644 index 000000000..302f166f1 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/Messages.java @@ -0,0 +1,132 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.text.MessageFormat; +import java.util.ResourceBundle; + +/** + * Class used to lookup messages from this package's message bundles. + */ +public final class Messages { + + /** The Constant messages. */ + static final ResourceBundle messages = ResourceBundle + .getBundle("org.qcmg.qmule.messages"); + + /** The Constant ERROR_PREFIX. */ + static final String ERROR_PREFIX = getProgramName() + ": "; + + /** The Constant USAGE. */ + public static final String USAGE = getMessage("USAGE"); + + /** + * Gets the message. + * + * @param identifier the identifier + * @return the message + */ + public static String getMessage(final String identifier) { + return messages.getString(identifier); + } + + /** + * Gets the message. + * + * @param identifier the identifier + * @param argument the argument + * @return the message + */ + public static String getMessage(final String identifier, final String argument) { + final String message = Messages.getMessage(identifier); + Object[] arguments = { argument }; + return MessageFormat.format(message, arguments); + } + + /** + * Gets the message. + * + * @param identifier the identifier + * @param arg1 the arg1 + * @param arg2 the arg2 + * @return the message + */ + public static String getMessage(final String identifier, final String arg1, + final String arg2) { + final String message = Messages.getMessage(identifier); + Object[] arguments = { arg1, arg2 }; + return MessageFormat.format(message, arguments); + } + + /** + * Gets the message. + * + * @param identifier the identifier + * @param arg1 the arg1 + * @param arg2 the arg2 + * @param arg3 the arg3 + * @return the message + */ + public static String getMessage(final String identifier, final String arg1, + final String arg2, final String arg3) { + final String message = Messages.getMessage(identifier); + Object[] arguments = { arg1, arg2, arg3 }; + return MessageFormat.format(message, arguments); + } + + /** + * Gets the message. + * + * @param identifier the identifier + * @param arguments the arguments + * @return the message + */ + public static String getMessage(final String identifier, final Object[] arguments) { + final String message = Messages.getMessage(identifier); + return MessageFormat.format(message, arguments); + } + + /** + * Gets the program name. + * + * @return the program name + */ + static String getProgramName() { + return Messages.class.getPackage().getImplementationTitle(); + } + + /** + * Gets the program version. + * + * @return the program version + */ + static String getProgramVersion() { + return Messages.class.getPackage().getImplementationVersion(); + } + + /** + * Gets the version message. + * + * @return the version message + * @throws Exception the exception + */ + public static String getVersionMessage() throws Exception { + return getProgramName() + ", version " + getProgramVersion(); + } + + /** + * Reconstruct command line. + * + * @param args the args + * @return the string + */ + public static String reconstructCommandLine(final String[] args) { + String result = getProgramName() + " "; + for (final String arg : args) { + result += arg + " "; + } + return result; + } + +} diff --git a/qmule/src/org/qcmg/qmule/Messages.java-- b/qmule/src/org/qcmg/qmule/Messages.java-- new file mode 100644 index 000000000..302f166f1 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/Messages.java-- @@ -0,0 +1,132 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.text.MessageFormat; +import java.util.ResourceBundle; + +/** + * Class used to lookup messages from this package's message bundles. + */ +public final class Messages { + + /** The Constant messages. */ + static final ResourceBundle messages = ResourceBundle + .getBundle("org.qcmg.qmule.messages"); + + /** The Constant ERROR_PREFIX. */ + static final String ERROR_PREFIX = getProgramName() + ": "; + + /** The Constant USAGE. */ + public static final String USAGE = getMessage("USAGE"); + + /** + * Gets the message. + * + * @param identifier the identifier + * @return the message + */ + public static String getMessage(final String identifier) { + return messages.getString(identifier); + } + + /** + * Gets the message. + * + * @param identifier the identifier + * @param argument the argument + * @return the message + */ + public static String getMessage(final String identifier, final String argument) { + final String message = Messages.getMessage(identifier); + Object[] arguments = { argument }; + return MessageFormat.format(message, arguments); + } + + /** + * Gets the message. + * + * @param identifier the identifier + * @param arg1 the arg1 + * @param arg2 the arg2 + * @return the message + */ + public static String getMessage(final String identifier, final String arg1, + final String arg2) { + final String message = Messages.getMessage(identifier); + Object[] arguments = { arg1, arg2 }; + return MessageFormat.format(message, arguments); + } + + /** + * Gets the message. + * + * @param identifier the identifier + * @param arg1 the arg1 + * @param arg2 the arg2 + * @param arg3 the arg3 + * @return the message + */ + public static String getMessage(final String identifier, final String arg1, + final String arg2, final String arg3) { + final String message = Messages.getMessage(identifier); + Object[] arguments = { arg1, arg2, arg3 }; + return MessageFormat.format(message, arguments); + } + + /** + * Gets the message. + * + * @param identifier the identifier + * @param arguments the arguments + * @return the message + */ + public static String getMessage(final String identifier, final Object[] arguments) { + final String message = Messages.getMessage(identifier); + return MessageFormat.format(message, arguments); + } + + /** + * Gets the program name. + * + * @return the program name + */ + static String getProgramName() { + return Messages.class.getPackage().getImplementationTitle(); + } + + /** + * Gets the program version. + * + * @return the program version + */ + static String getProgramVersion() { + return Messages.class.getPackage().getImplementationVersion(); + } + + /** + * Gets the version message. + * + * @return the version message + * @throws Exception the exception + */ + public static String getVersionMessage() throws Exception { + return getProgramName() + ", version " + getProgramVersion(); + } + + /** + * Reconstruct command line. + * + * @param args the args + * @return the string + */ + public static String reconstructCommandLine(final String[] args) { + String result = getProgramName() + " "; + for (final String arg : args) { + result += arg + " "; + } + return result; + } + +} diff --git a/qmule/src/org/qcmg/qmule/Options.java b/qmule/src/org/qcmg/qmule/Options.java new file mode 100644 index 000000000..c83f4812d --- /dev/null +++ b/qmule/src/org/qcmg/qmule/Options.java @@ -0,0 +1,512 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import static java.util.Arrays.asList; + +import java.io.IOException; +import java.util.List; +import java.util.Properties; + +import joptsimple.OptionParser; +import joptsimple.OptionSet; + +/** + * The Class Options. + */ +public final class Options { + + public enum Ids{ + PATIENT, + SOMATIC_ANALYSIS, + GEMLINE_ANALYSIS, + TUMOUR_SAMPLE, + NORMAL_SAMPLE; + } + + /** The Constant HELP_DESCRIPTION. */ + private static final String HELP_DESCRIPTION = Messages + .getMessage("HELP_OPTION_DESCRIPTION"); + + /** The Constant VERSION_DESCRIPTION. */ + private static final String VERSION_DESCRIPTION = Messages + .getMessage("VERSION_OPTION_DESCRIPTION"); + + /** The Constant INPUT_DESCRIPTION. */ + private static final String INPUT_DESCRIPTION = Messages + .getMessage("INPUT_OPTION_DESCRIPTION"); + + /** The Constant OUTPUT_DESCRIPTION. */ + private static final String OUTPUT_DESCRIPTION = Messages + .getMessage("OUTPUT_OPTION_DESCRIPTION"); + + /** The parser. */ + private final OptionParser parser = new OptionParser(); + + /** The options. */ + private final OptionSet options; + + /** The command line. */ + private final String commandLine; + + /** The input file names. */ + private final String[] inputFileNames; + + /** The output file names. */ + private final String[] outputFileNames; + + /** The log file */ + private String logFile; + + /** The log level */ + private String logLevel; + + private String patientId; + private String somaticAnalysisId; + private String germlineAnalysisId; + private String normalSampleId; + private String tumourSampleId; + private String position; + private String pileupFormat; + private int normalCoverage; + private int numberOfThreads; + private int tumourCoverage; + private int minCoverage; + private String mafMode; + private String gff; + private String fasta; + private String[] gffRegions; + private int noOfBases; + private String mode; + + + private String column; + + private String annotation; + + private String features; + + private String tumour; + + private String normal; + + private String analysis; + + /** + * Instantiates a new options. + * + * @param args the args + * @throws Exception the exception + */ + @SuppressWarnings("unchecked") + public Options(final String[] args) throws Exception { + commandLine = Messages.reconstructCommandLine(args); + +// parser.accepts("qmule", "Tool").withRequiredArg().ofType(String.class).describedAs("tool name"); + parser.accepts("output", OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); + parser.accepts("input", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("inputfile"); + parser.accepts("log", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("logfile"); + parser.accepts("loglevel", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("loglevel"); + parser.accepts("help", HELP_DESCRIPTION); + parser.accepts("version", VERSION_DESCRIPTION); + parser.accepts("patientId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("patientId"); + parser.accepts("somaticAnalysisId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("somaticAnalysisId"); + parser.accepts("germlineAnalysisId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("germlineAnalysisId"); + parser.accepts("normalSampleId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("normalSampleId"); + parser.accepts("tumourSampleId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("tumourSampleId"); + parser.accepts("position", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("position"); + parser.accepts("pileupFormat", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("pileupFormat"); + parser.accepts("normalCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) + .describedAs("normalCoverage"); + parser.accepts("numberOfThreads", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) + .describedAs("numberOfThreads"); + parser.accepts("tumourCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) + .describedAs("tumourCoverage"); + parser.accepts("minCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) + .describedAs("minCoverage"); + parser.accepts("mafMode", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("mafMode"); + parser.accepts("mode", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("mode"); + parser.accepts("column", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("column"); + parser.accepts("annotation", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("annotation"); + parser.accepts("gffFile", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("gffFile"); + parser.accepts("fasta", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("fasta"); + parser.accepts("feature", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("feature"); + parser.accepts("tumour", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("tumour"); + parser.accepts("normal", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("normal"); + parser.accepts("analysis", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("analysis"); + parser.accepts("verifiedInvalid", INPUT_DESCRIPTION); + parser.accepts("gffRegions", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).withValuesSeparatedBy(',').describedAs("gffRegions"); + parser.accepts("noOfBases", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class).describedAs("noOfBases"); + parser.accepts("proportion", Messages + .getMessage("PROPORTION_OPTION_DESCRIPTION")).withRequiredArg().ofType(Double.class); + parser.accepts("stranded", Messages + .getMessage("STRANDED_OPTION_DESCRIPTION")); + parser.accepts("compareAll",Messages.getMessage("COMPAREALL_OPTION")); + + parser.posixlyCorrect(true); + options = parser.parse(args); + + List inputList = options.valuesOf("input"); + inputFileNames = new String[inputList.size()]; + inputList.toArray(inputFileNames); + + List outputList = options.valuesOf("output"); + outputFileNames = new String[outputList.size()]; + outputList.toArray(outputFileNames); + + logFile = (String) options.valueOf("log"); + logLevel = (String) options.valueOf("loglevel"); + + patientId = (String) options.valueOf("patientId"); + somaticAnalysisId = (String) options.valueOf("somaticAnalysisId"); + germlineAnalysisId = (String) options.valueOf("germlineAnalysisId"); + normalSampleId = (String) options.valueOf("normalSampleId"); + tumourSampleId = (String) options.valueOf("tumourSampleId"); + + // WiggleFromPileup specific options + pileupFormat = (String) options.valueOf("pileupFormat"); + if (null != options.valueOf("normalCoverage")) + normalCoverage = (Integer) options.valueOf("normalCoverage"); + if (null != options.valueOf("tumourCoverage")) + tumourCoverage = (Integer) options.valueOf("tumourCoverage"); + // end of WiggleFromPileup specific options + + //compareReferenceRegions + mode = (String) options.valueOf("mode"); + column = (String) options.valueOf("column"); + annotation = (String) options.valueOf("annotation"); + features = (String) options.valueOf("feature"); + position = (String) options.valueOf("position"); + mafMode = (String) options.valueOf("mafMode"); + + gff = (String) options.valueOf("gffFile"); + fasta = (String) options.valueOf("fasta"); + + tumour = (String) options.valueOf("tumour"); + normal = (String) options.valueOf("normal"); + analysis = (String) options.valueOf("analysis"); + + // gffRegions + List gffRegionsArgs = (List) options.valuesOf("gffRegions"); + gffRegions = new String[gffRegionsArgs.size()]; + gffRegionsArgs.toArray(gffRegions); + + // MafAddCPG specific + if (null != options.valueOf("noOfBases")) + noOfBases = (Integer) options.valueOf("noOfBases"); + + // qsignature + if (null != options.valueOf("minCoverage")) + minCoverage = (Integer) options.valueOf("minCoverage"); + + if (null != options.valueOf("numberOfThreads")) + numberOfThreads = (Integer) options.valueOf("numberOfThreads"); + + } + + /** + * + * @param className + * @param args + * @throws Exception + */ + public Options( final Class myclass, final String[] args) throws Exception { + commandLine = Messages.reconstructCommandLine(args); + + parser.acceptsAll( asList("h", "help"), HELP_DESCRIPTION ); +// parser.acceptsAll( asList("v", "version"), VERSION_DESCRIPTION); + parser.acceptsAll( asList("i", "input"), INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("input"); + parser.acceptsAll(asList("o", "output"), OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); + parser.accepts("log", Messages.getMessage("LOG_OPTION_DESCRIPTION")).withRequiredArg().ofType(String.class).describedAs("logfile"); + parser.accepts("loglevel", Messages.getMessage("LOGLEVEL_OPTION_DESCRIPTION")).withRequiredArg().ofType(String.class).describedAs("loglevel"); + + if( myclass.equals(AlignerCompare.class) ){ + parser.accepts("compareAll",Messages.getMessage("COMPAREALL_OPTION")); + parser.acceptsAll( asList("o", "output"), Messages.getMessage("OUTPUT_AlignerCompare")).withRequiredArg().ofType(String.class).describedAs("output"); + }else if(myclass.equals(SubSample.class)) { + parser.accepts("proportion",Messages.getMessage("PROPORTION_OPTION_DESCRIPTION")).withRequiredArg().ofType(Double.class).describedAs("[0,1]"); + }else if(myclass.equals(BAMCompress.class)){ + parser.accepts("compressLevel",Messages.getMessage("COMPRESS_LEVEL_DESCRIPTION") ).withRequiredArg().ofType(Integer.class).describedAs("[0,9]"); + } + + + //else if( myclass.equals(BamMismatchCounts.class)){} + + options = parser.parse(args); + + List inputList = options.valuesOf("input"); + inputFileNames = new String[inputList.size()]; + inputList.toArray(inputFileNames); + + List outputList = options.valuesOf("output"); + outputFileNames = new String[outputList.size()]; + outputList.toArray(outputFileNames); + + } + + public String getTumour() { + return tumour; + } + + public void setTumour(String tumour) { + this.tumour = tumour; + } + + public String getNormal() { + return normal; + } + + public void setNormal(String normal) { + this.normal = normal; + } + + public String getAnalysis() { + return analysis; + } + + public void setAnalysis(String analysis) { + this.analysis = analysis; + } + + /** + * Checks for input option. + * + * @return true, if successful + */ + public boolean hasInputOption() { + return options.has("input"); + } + + /** + * Checks for output option. + * + * @return true, if successful + */ + public boolean hasOutputOption() { + return options.has("o") || options.has("output"); + } + + /** + * Checks for version option. + * + * @return true, if successful + */ + public boolean hasVersionOption() { + return options.has("version"); + } + + public boolean getIncludeInvalid() { + return options.has("verifiedInvalid"); + } + + /** + * Checks for help option. + * + * @return true, if successful + */ + public boolean hasHelpOption() { + return options.has("help"); + } + + public boolean hasCompareAllOption() { + return options.has("compareAll"); + } + + /** + * Checks for log option. + * + * @return true, if successful + */ + public boolean hasLogOption() { + return options.has("log"); + } + + /** + * Checks for non options. + * + * @return true, if successful + */ + public boolean hasNonOptions() { + return 0 != options.nonOptionArguments().size(); + } + + /** + * Gets the input file names. + * + * @return the input file names + */ + public String[] getInputFileNames() { + return inputFileNames; + } + + /** + * Gets the output file names. + * + * @return the output file names + */ + public String[] getOutputFileNames() { + return outputFileNames; + } + + /** + * Gets the command line. + * + * @return the command line + */ + public String getCommandLine() { + return commandLine; + } + + public boolean hasStrandedOption() { + return options.has("stranded"); + } + + public String getPosition() { + return position; + } + public String getPileupFormat() { + return pileupFormat; + } + public int getNormalCoverage() { + return normalCoverage; + } + public int getTumourCoverage() { + return tumourCoverage; + } + public int getMinCoverage() { + return minCoverage; + } + public int getNumberOfThreads() { + return numberOfThreads; + } + public String getMafMode() { + return mafMode; + } + public String getGffFile() { + return gff; + } + public String getFastaFile() { + return fasta; + } + + public String getMode() { + return mode; + } + + public int getcompressLevel() throws Exception{ + if(options.has("compressLevel")){ + int l = (int) options.valueOf("compressLevel"); + if(l >= 0 && l <= 9) + return l; + else + throw new Exception("compressLevel must between [0,9]"); + } + + return 5; + } + //subSample + public double getPROPORTION() throws Exception{ + if(options.has("proportion")){ + + double prop = (double) options.valueOf("proportion"); +// double prop = Double.parseDouble( (String) options.valueOf("proportion") ); + if(prop > 0 && prop <= 1){ + return prop; + + } + } + throw new Exception("no proportion are specified"); + } + + + /** + * Display help. + * + * @throws Exception the exception + */ + public void displayHelp() throws IOException { + parser.printHelpOn(System.out); + } + + /** + * Detect bad options. + * + * @throws Exception the exception + */ + public void detectBadOptions() throws Exception { + if (hasNonOptions()) { + throw new Exception("ALL_ARGUMENTS_MUST_BE_OPTIONS"); + } + if (hasOutputOption() && 1 != getOutputFileNames().length) { + throw new Exception("MULTIPLE_OUTPUT_FILES_SPECIFIED"); + } + if (!hasInputOption()) { + throw new Exception("MISSING_INPUT_OPTIONS"); + } + } + + public String getLogFile(){ + return logFile; + } + + public String getLogLevel(){ + return logLevel; + } + + public Properties getIds() { + Properties props = new Properties(); + props.put(Ids.PATIENT, patientId); + props.put(Ids.SOMATIC_ANALYSIS, somaticAnalysisId); + props.put(Ids.GEMLINE_ANALYSIS, germlineAnalysisId); + props.put(Ids.NORMAL_SAMPLE, normalSampleId); + props.put(Ids.TUMOUR_SAMPLE, tumourSampleId); + return props; + } + + public String[] getGffRegions() { + + return gffRegions; + } + + public int getNoOfBases() { + + return noOfBases; + } + + public String getColumn() { + return column; + } + + public String getAnnotation() { + return annotation; + } + + public String[] getFeature() { + if (features != null) { + return features.split(","); + } + return null; + } + +} diff --git a/qmule/src/org/qcmg/qmule/Options.java-- b/qmule/src/org/qcmg/qmule/Options.java-- new file mode 100644 index 000000000..c83f4812d --- /dev/null +++ b/qmule/src/org/qcmg/qmule/Options.java-- @@ -0,0 +1,512 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import static java.util.Arrays.asList; + +import java.io.IOException; +import java.util.List; +import java.util.Properties; + +import joptsimple.OptionParser; +import joptsimple.OptionSet; + +/** + * The Class Options. + */ +public final class Options { + + public enum Ids{ + PATIENT, + SOMATIC_ANALYSIS, + GEMLINE_ANALYSIS, + TUMOUR_SAMPLE, + NORMAL_SAMPLE; + } + + /** The Constant HELP_DESCRIPTION. */ + private static final String HELP_DESCRIPTION = Messages + .getMessage("HELP_OPTION_DESCRIPTION"); + + /** The Constant VERSION_DESCRIPTION. */ + private static final String VERSION_DESCRIPTION = Messages + .getMessage("VERSION_OPTION_DESCRIPTION"); + + /** The Constant INPUT_DESCRIPTION. */ + private static final String INPUT_DESCRIPTION = Messages + .getMessage("INPUT_OPTION_DESCRIPTION"); + + /** The Constant OUTPUT_DESCRIPTION. */ + private static final String OUTPUT_DESCRIPTION = Messages + .getMessage("OUTPUT_OPTION_DESCRIPTION"); + + /** The parser. */ + private final OptionParser parser = new OptionParser(); + + /** The options. */ + private final OptionSet options; + + /** The command line. */ + private final String commandLine; + + /** The input file names. */ + private final String[] inputFileNames; + + /** The output file names. */ + private final String[] outputFileNames; + + /** The log file */ + private String logFile; + + /** The log level */ + private String logLevel; + + private String patientId; + private String somaticAnalysisId; + private String germlineAnalysisId; + private String normalSampleId; + private String tumourSampleId; + private String position; + private String pileupFormat; + private int normalCoverage; + private int numberOfThreads; + private int tumourCoverage; + private int minCoverage; + private String mafMode; + private String gff; + private String fasta; + private String[] gffRegions; + private int noOfBases; + private String mode; + + + private String column; + + private String annotation; + + private String features; + + private String tumour; + + private String normal; + + private String analysis; + + /** + * Instantiates a new options. + * + * @param args the args + * @throws Exception the exception + */ + @SuppressWarnings("unchecked") + public Options(final String[] args) throws Exception { + commandLine = Messages.reconstructCommandLine(args); + +// parser.accepts("qmule", "Tool").withRequiredArg().ofType(String.class).describedAs("tool name"); + parser.accepts("output", OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); + parser.accepts("input", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("inputfile"); + parser.accepts("log", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("logfile"); + parser.accepts("loglevel", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("loglevel"); + parser.accepts("help", HELP_DESCRIPTION); + parser.accepts("version", VERSION_DESCRIPTION); + parser.accepts("patientId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("patientId"); + parser.accepts("somaticAnalysisId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("somaticAnalysisId"); + parser.accepts("germlineAnalysisId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("germlineAnalysisId"); + parser.accepts("normalSampleId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("normalSampleId"); + parser.accepts("tumourSampleId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("tumourSampleId"); + parser.accepts("position", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("position"); + parser.accepts("pileupFormat", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("pileupFormat"); + parser.accepts("normalCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) + .describedAs("normalCoverage"); + parser.accepts("numberOfThreads", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) + .describedAs("numberOfThreads"); + parser.accepts("tumourCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) + .describedAs("tumourCoverage"); + parser.accepts("minCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) + .describedAs("minCoverage"); + parser.accepts("mafMode", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("mafMode"); + parser.accepts("mode", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("mode"); + parser.accepts("column", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("column"); + parser.accepts("annotation", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("annotation"); + parser.accepts("gffFile", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("gffFile"); + parser.accepts("fasta", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("fasta"); + parser.accepts("feature", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("feature"); + parser.accepts("tumour", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("tumour"); + parser.accepts("normal", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("normal"); + parser.accepts("analysis", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) + .describedAs("analysis"); + parser.accepts("verifiedInvalid", INPUT_DESCRIPTION); + parser.accepts("gffRegions", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).withValuesSeparatedBy(',').describedAs("gffRegions"); + parser.accepts("noOfBases", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class).describedAs("noOfBases"); + parser.accepts("proportion", Messages + .getMessage("PROPORTION_OPTION_DESCRIPTION")).withRequiredArg().ofType(Double.class); + parser.accepts("stranded", Messages + .getMessage("STRANDED_OPTION_DESCRIPTION")); + parser.accepts("compareAll",Messages.getMessage("COMPAREALL_OPTION")); + + parser.posixlyCorrect(true); + options = parser.parse(args); + + List inputList = options.valuesOf("input"); + inputFileNames = new String[inputList.size()]; + inputList.toArray(inputFileNames); + + List outputList = options.valuesOf("output"); + outputFileNames = new String[outputList.size()]; + outputList.toArray(outputFileNames); + + logFile = (String) options.valueOf("log"); + logLevel = (String) options.valueOf("loglevel"); + + patientId = (String) options.valueOf("patientId"); + somaticAnalysisId = (String) options.valueOf("somaticAnalysisId"); + germlineAnalysisId = (String) options.valueOf("germlineAnalysisId"); + normalSampleId = (String) options.valueOf("normalSampleId"); + tumourSampleId = (String) options.valueOf("tumourSampleId"); + + // WiggleFromPileup specific options + pileupFormat = (String) options.valueOf("pileupFormat"); + if (null != options.valueOf("normalCoverage")) + normalCoverage = (Integer) options.valueOf("normalCoverage"); + if (null != options.valueOf("tumourCoverage")) + tumourCoverage = (Integer) options.valueOf("tumourCoverage"); + // end of WiggleFromPileup specific options + + //compareReferenceRegions + mode = (String) options.valueOf("mode"); + column = (String) options.valueOf("column"); + annotation = (String) options.valueOf("annotation"); + features = (String) options.valueOf("feature"); + position = (String) options.valueOf("position"); + mafMode = (String) options.valueOf("mafMode"); + + gff = (String) options.valueOf("gffFile"); + fasta = (String) options.valueOf("fasta"); + + tumour = (String) options.valueOf("tumour"); + normal = (String) options.valueOf("normal"); + analysis = (String) options.valueOf("analysis"); + + // gffRegions + List gffRegionsArgs = (List) options.valuesOf("gffRegions"); + gffRegions = new String[gffRegionsArgs.size()]; + gffRegionsArgs.toArray(gffRegions); + + // MafAddCPG specific + if (null != options.valueOf("noOfBases")) + noOfBases = (Integer) options.valueOf("noOfBases"); + + // qsignature + if (null != options.valueOf("minCoverage")) + minCoverage = (Integer) options.valueOf("minCoverage"); + + if (null != options.valueOf("numberOfThreads")) + numberOfThreads = (Integer) options.valueOf("numberOfThreads"); + + } + + /** + * + * @param className + * @param args + * @throws Exception + */ + public Options( final Class myclass, final String[] args) throws Exception { + commandLine = Messages.reconstructCommandLine(args); + + parser.acceptsAll( asList("h", "help"), HELP_DESCRIPTION ); +// parser.acceptsAll( asList("v", "version"), VERSION_DESCRIPTION); + parser.acceptsAll( asList("i", "input"), INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("input"); + parser.acceptsAll(asList("o", "output"), OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); + parser.accepts("log", Messages.getMessage("LOG_OPTION_DESCRIPTION")).withRequiredArg().ofType(String.class).describedAs("logfile"); + parser.accepts("loglevel", Messages.getMessage("LOGLEVEL_OPTION_DESCRIPTION")).withRequiredArg().ofType(String.class).describedAs("loglevel"); + + if( myclass.equals(AlignerCompare.class) ){ + parser.accepts("compareAll",Messages.getMessage("COMPAREALL_OPTION")); + parser.acceptsAll( asList("o", "output"), Messages.getMessage("OUTPUT_AlignerCompare")).withRequiredArg().ofType(String.class).describedAs("output"); + }else if(myclass.equals(SubSample.class)) { + parser.accepts("proportion",Messages.getMessage("PROPORTION_OPTION_DESCRIPTION")).withRequiredArg().ofType(Double.class).describedAs("[0,1]"); + }else if(myclass.equals(BAMCompress.class)){ + parser.accepts("compressLevel",Messages.getMessage("COMPRESS_LEVEL_DESCRIPTION") ).withRequiredArg().ofType(Integer.class).describedAs("[0,9]"); + } + + + //else if( myclass.equals(BamMismatchCounts.class)){} + + options = parser.parse(args); + + List inputList = options.valuesOf("input"); + inputFileNames = new String[inputList.size()]; + inputList.toArray(inputFileNames); + + List outputList = options.valuesOf("output"); + outputFileNames = new String[outputList.size()]; + outputList.toArray(outputFileNames); + + } + + public String getTumour() { + return tumour; + } + + public void setTumour(String tumour) { + this.tumour = tumour; + } + + public String getNormal() { + return normal; + } + + public void setNormal(String normal) { + this.normal = normal; + } + + public String getAnalysis() { + return analysis; + } + + public void setAnalysis(String analysis) { + this.analysis = analysis; + } + + /** + * Checks for input option. + * + * @return true, if successful + */ + public boolean hasInputOption() { + return options.has("input"); + } + + /** + * Checks for output option. + * + * @return true, if successful + */ + public boolean hasOutputOption() { + return options.has("o") || options.has("output"); + } + + /** + * Checks for version option. + * + * @return true, if successful + */ + public boolean hasVersionOption() { + return options.has("version"); + } + + public boolean getIncludeInvalid() { + return options.has("verifiedInvalid"); + } + + /** + * Checks for help option. + * + * @return true, if successful + */ + public boolean hasHelpOption() { + return options.has("help"); + } + + public boolean hasCompareAllOption() { + return options.has("compareAll"); + } + + /** + * Checks for log option. + * + * @return true, if successful + */ + public boolean hasLogOption() { + return options.has("log"); + } + + /** + * Checks for non options. + * + * @return true, if successful + */ + public boolean hasNonOptions() { + return 0 != options.nonOptionArguments().size(); + } + + /** + * Gets the input file names. + * + * @return the input file names + */ + public String[] getInputFileNames() { + return inputFileNames; + } + + /** + * Gets the output file names. + * + * @return the output file names + */ + public String[] getOutputFileNames() { + return outputFileNames; + } + + /** + * Gets the command line. + * + * @return the command line + */ + public String getCommandLine() { + return commandLine; + } + + public boolean hasStrandedOption() { + return options.has("stranded"); + } + + public String getPosition() { + return position; + } + public String getPileupFormat() { + return pileupFormat; + } + public int getNormalCoverage() { + return normalCoverage; + } + public int getTumourCoverage() { + return tumourCoverage; + } + public int getMinCoverage() { + return minCoverage; + } + public int getNumberOfThreads() { + return numberOfThreads; + } + public String getMafMode() { + return mafMode; + } + public String getGffFile() { + return gff; + } + public String getFastaFile() { + return fasta; + } + + public String getMode() { + return mode; + } + + public int getcompressLevel() throws Exception{ + if(options.has("compressLevel")){ + int l = (int) options.valueOf("compressLevel"); + if(l >= 0 && l <= 9) + return l; + else + throw new Exception("compressLevel must between [0,9]"); + } + + return 5; + } + //subSample + public double getPROPORTION() throws Exception{ + if(options.has("proportion")){ + + double prop = (double) options.valueOf("proportion"); +// double prop = Double.parseDouble( (String) options.valueOf("proportion") ); + if(prop > 0 && prop <= 1){ + return prop; + + } + } + throw new Exception("no proportion are specified"); + } + + + /** + * Display help. + * + * @throws Exception the exception + */ + public void displayHelp() throws IOException { + parser.printHelpOn(System.out); + } + + /** + * Detect bad options. + * + * @throws Exception the exception + */ + public void detectBadOptions() throws Exception { + if (hasNonOptions()) { + throw new Exception("ALL_ARGUMENTS_MUST_BE_OPTIONS"); + } + if (hasOutputOption() && 1 != getOutputFileNames().length) { + throw new Exception("MULTIPLE_OUTPUT_FILES_SPECIFIED"); + } + if (!hasInputOption()) { + throw new Exception("MISSING_INPUT_OPTIONS"); + } + } + + public String getLogFile(){ + return logFile; + } + + public String getLogLevel(){ + return logLevel; + } + + public Properties getIds() { + Properties props = new Properties(); + props.put(Ids.PATIENT, patientId); + props.put(Ids.SOMATIC_ANALYSIS, somaticAnalysisId); + props.put(Ids.GEMLINE_ANALYSIS, germlineAnalysisId); + props.put(Ids.NORMAL_SAMPLE, normalSampleId); + props.put(Ids.TUMOUR_SAMPLE, tumourSampleId); + return props; + } + + public String[] getGffRegions() { + + return gffRegions; + } + + public int getNoOfBases() { + + return noOfBases; + } + + public String getColumn() { + return column; + } + + public String getAnnotation() { + return annotation; + } + + public String[] getFeature() { + if (features != null) { + return features.split(","); + } + return null; + } + +} diff --git a/qmule/src/org/qcmg/qmule/Pileup.java b/qmule/src/org/qcmg/qmule/Pileup.java new file mode 100644 index 000000000..c1503ab6a --- /dev/null +++ b/qmule/src/org/qcmg/qmule/Pileup.java @@ -0,0 +1,101 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; +import java.util.Map.Entry; + +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrPositionComparator; +import org.qcmg.common.model.QPileupSimpleRecord; +import org.qcmg.picard.SAMFileReaderFactory; + + +public class Pileup { + private static final Comparator COMPARATOR = new ChrPositionComparator(); + private static QLogger logger = QLoggerFactory.getLogger(Pileup.class); + + Map pileup = new TreeMap(); +// Map pileup = new HashMap(10000000, 0.99f); + + private void engage(String args[]) throws IOException { + + SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(args[0])); + FileWriter writer = new FileWriter(new File(args[1])); + + int counter = 0; + for (SAMRecord sr : reader) { + parseRecord(sr); + if (++counter % 100000 == 0) { + logger.info("hit " + counter + " reads in bam file, size of pileup map is: " + pileup.size()); + + // output contents of pileup to file to clear memory + // get current chromosome and position an write out + //all records a couple of hundred bases prior to that position + writePileup(writer, sr.getReferenceName(), sr.getAlignmentStart() - 500); + } + } + logger.info("Done!! No of reads in file: " + counter + ", size of pileup map is: " + pileup.size() ); + } + + private void writePileup(FileWriter writer, String chromosome, int position) throws IOException { + ChrPosition chrPos = ChrPointPosition.valueOf(chromosome, position); + + Iterator> iter = pileup.entrySet().iterator(); + + while (iter.hasNext()) { + Map.Entry entry = iter.next(); + if (0 < COMPARATOR.compare(chrPos, entry.getKey())) { + + writer.write(entry.getKey().getChromosome() + "\t" + + entry.getKey().getStartPosition() + "\t" + + entry.getValue().getFormattedString()); + + iter.remove(); + } + } + + } + + private void parseRecord(SAMRecord sr) { + + ChrPosition chrPos; + QPileupSimpleRecord pileupRec; + int position = 0; + + for (byte b : sr.getReadBases()) { + chrPos = ChrPointPosition.valueOf(sr.getReferenceName(), sr.getAlignmentStart() + position++); + pileupRec = pileup.get(chrPos); + if (null == pileupRec) { + pileupRec = new QPileupSimpleRecord(); + pileup.put(chrPos, pileupRec); + } + pileupRec.incrementBase(b); + } + + + } + + + + public static void main(String[] args) throws IOException { + Pileup p = new Pileup(); + p.engage(args); + } +} diff --git a/qmule/src/org/qcmg/qmule/Pileup.java-- b/qmule/src/org/qcmg/qmule/Pileup.java-- new file mode 100644 index 000000000..c1503ab6a --- /dev/null +++ b/qmule/src/org/qcmg/qmule/Pileup.java-- @@ -0,0 +1,101 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; +import java.util.Map.Entry; + +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrPositionComparator; +import org.qcmg.common.model.QPileupSimpleRecord; +import org.qcmg.picard.SAMFileReaderFactory; + + +public class Pileup { + private static final Comparator COMPARATOR = new ChrPositionComparator(); + private static QLogger logger = QLoggerFactory.getLogger(Pileup.class); + + Map pileup = new TreeMap(); +// Map pileup = new HashMap(10000000, 0.99f); + + private void engage(String args[]) throws IOException { + + SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(args[0])); + FileWriter writer = new FileWriter(new File(args[1])); + + int counter = 0; + for (SAMRecord sr : reader) { + parseRecord(sr); + if (++counter % 100000 == 0) { + logger.info("hit " + counter + " reads in bam file, size of pileup map is: " + pileup.size()); + + // output contents of pileup to file to clear memory + // get current chromosome and position an write out + //all records a couple of hundred bases prior to that position + writePileup(writer, sr.getReferenceName(), sr.getAlignmentStart() - 500); + } + } + logger.info("Done!! No of reads in file: " + counter + ", size of pileup map is: " + pileup.size() ); + } + + private void writePileup(FileWriter writer, String chromosome, int position) throws IOException { + ChrPosition chrPos = ChrPointPosition.valueOf(chromosome, position); + + Iterator> iter = pileup.entrySet().iterator(); + + while (iter.hasNext()) { + Map.Entry entry = iter.next(); + if (0 < COMPARATOR.compare(chrPos, entry.getKey())) { + + writer.write(entry.getKey().getChromosome() + "\t" + + entry.getKey().getStartPosition() + "\t" + + entry.getValue().getFormattedString()); + + iter.remove(); + } + } + + } + + private void parseRecord(SAMRecord sr) { + + ChrPosition chrPos; + QPileupSimpleRecord pileupRec; + int position = 0; + + for (byte b : sr.getReadBases()) { + chrPos = ChrPointPosition.valueOf(sr.getReferenceName(), sr.getAlignmentStart() + position++); + pileupRec = pileup.get(chrPos); + if (null == pileupRec) { + pileupRec = new QPileupSimpleRecord(); + pileup.put(chrPos, pileupRec); + } + pileupRec.incrementBase(b); + } + + + } + + + + public static void main(String[] args) throws IOException { + Pileup p = new Pileup(); + p.engage(args); + } +} diff --git a/qmule/src/org/qcmg/qmule/PileupStats.java b/qmule/src/org/qcmg/qmule/PileupStats.java new file mode 100644 index 000000000..e2ea6d844 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/PileupStats.java @@ -0,0 +1,254 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Map.Entry; +import java.util.TreeMap; + +import htsjdk.samtools.Cigar; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordIterator; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.picard.SAMFileReaderFactory; + +public class PileupStats { + + private String logFile; + private File inputFile; + private File outputFile; + private File bamFile; + private static QLogger logger; + + public int engage() throws Exception { + + BufferedReader reader = new BufferedReader(new FileReader(inputFile)); + BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile)); + + writer.write(getHeader()); + String line; + int count = 0; + while ((line = reader.readLine()) != null) { + String[] values = line.split("\t"); + + String result = pileup(values[0], new Integer(values[1]), new Integer(values[2])); + + writer.write(line + "\t" + result + "\n"); + //System.out.println(line + "\t " + result); + if (count++ % 1000 == 0) { + logger.info("Number processed: " + count); + } + } + logger.info("Total processed: " + count); + reader.close(); + writer.close(); + + return 0; + } + + private String getHeader() { + return "chr\tposition\tposition\tbed\tbed\tbed\ttotal reads\ttotal unmapped" + + "\ttotal mates unmapped\ttotal indels\ttotal mismatch reads\ttotal soft clips" + + "\ttotal hard clips\ttotal spliced reads\ttotal duplicates\tmismatch counts\tsplice lengths\n"; + } + + private String pileup(String chromosome, int start, int end) throws IOException { + SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFile, "silent"); + + SAMRecordIterator iterator = reader.queryOverlapping(chromosome, start, end); + + int totalReads = 0; + int totalMatesUnmapped = 0; + int totalUnmapped = 0; + int totalDuplicates = 0; + int totalMismatches = 0; + int totalSpliced = 0; + int totalSoftClips = 0; + int totalHardClips = 0; + int totalIndels = 0; + TreeMap spliceMap = new TreeMap(); + TreeMap mismatchMap = new TreeMap(); + + while (iterator.hasNext()) { + SAMRecord record = iterator.next(); + if (record.getReadUnmappedFlag()) { + totalUnmapped++; + } else { + totalReads++; + if (record.getDuplicateReadFlag()) { + totalDuplicates++; + } else { + + if (record.getMateUnmappedFlag()) { + totalMatesUnmapped++; + } + + //cigars + Cigar cigar = record.getCigar(); + + for (CigarElement ce : cigar.getCigarElements()) { + if (ce.getOperator().equals(CigarOperator.DELETION) || ce.getOperator().equals(CigarOperator.INSERTION)) { + totalIndels++; + } + + if (ce.getOperator().equals(CigarOperator.SOFT_CLIP)) { + totalSoftClips++; + } + if (ce.getOperator().equals(CigarOperator.HARD_CLIP)) { + totalHardClips++; + } + if (ce.getOperator().equals(CigarOperator.N)) { + totalSpliced++; + Integer length = new Integer(ce.getLength()); + int count = 1; + if (spliceMap.containsKey(length)) { + count += spliceMap.get(length); + } + spliceMap.put(length, count); + } + } + + //MD tag + String mdData = (String) record.getAttribute("MD"); + int matches = tallyMDMismatches(mdData); + if (matches > 0) { + totalMismatches++; + } + int count = 1; + if (mismatchMap.containsKey(matches)) { + count += mismatchMap.get(matches); + } + mismatchMap.put(matches, count); + + } + } + + } + + iterator.close(); + reader.close(); + + String spliceCounts = getMapString(spliceMap); + String mismatchCounts = getMapString(mismatchMap); + + String result = totalReads + "\t" + totalUnmapped + "\t" + totalMatesUnmapped + "\t" + totalIndels + "\t" + + totalMismatches + "\t" + totalSoftClips + "\t" + totalHardClips + "\t" + totalSpliced + "\t" + totalDuplicates + + "\t" + mismatchCounts + "\t" + spliceCounts; + return result; + } + + private String getMapString(TreeMap map) { + StringBuilder sb = new StringBuilder(); + + for (Entry entry: map.entrySet()) { + sb.append(entry.getKey() + ":" + entry.getValue() + ";"); + } + + return sb.toString(); + } + + public int tallyMDMismatches(String mdData) { + int count = 0; + if (null != mdData) { + for (int i = 0, size = mdData.length() ; i < size ; ) { + char c = mdData.charAt(i); + if (isValidMismatch(c)) { + count++; + i++; + } else if ('^' == c) { + while (++i < size && Character.isLetter(mdData.charAt(i))) {} + } else i++; // need to increment this or could end up with infinite loop... + } + } + return count; + } + + private boolean isValidMismatch(char c) { + return c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N'; + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(PileupStats.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("PileupStats", PileupStats.class.getPackage().getImplementationVersion(), args); + + // get list of file names + String[] cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 2) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + String[] cmdLineOutputFiles = options.getOutputFileNames(); + if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); + } + + for (String file : cmdLineOutputFiles) { + if (new File(file).exists() && !new File(file).isDirectory()) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); + } + } + + bamFile = new File(cmdLineInputFiles[0]); + inputFile = new File(cmdLineInputFiles[1]); + outputFile = new File(cmdLineOutputFiles[0]); + logger.info("Bam file: " + bamFile); + logger.info("Input file: " + inputFile); + logger.info("Output file: " + outputFile); + + } + + return returnStatus; + } + + public static void main(String[] args) throws Exception { + PileupStats sp = new PileupStats(); + sp.setup(args); + int exitStatus = sp.engage(); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } +} diff --git a/qmule/src/org/qcmg/qmule/PileupStats.java-- b/qmule/src/org/qcmg/qmule/PileupStats.java-- new file mode 100644 index 000000000..e2ea6d844 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/PileupStats.java-- @@ -0,0 +1,254 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Map.Entry; +import java.util.TreeMap; + +import htsjdk.samtools.Cigar; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordIterator; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.picard.SAMFileReaderFactory; + +public class PileupStats { + + private String logFile; + private File inputFile; + private File outputFile; + private File bamFile; + private static QLogger logger; + + public int engage() throws Exception { + + BufferedReader reader = new BufferedReader(new FileReader(inputFile)); + BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile)); + + writer.write(getHeader()); + String line; + int count = 0; + while ((line = reader.readLine()) != null) { + String[] values = line.split("\t"); + + String result = pileup(values[0], new Integer(values[1]), new Integer(values[2])); + + writer.write(line + "\t" + result + "\n"); + //System.out.println(line + "\t " + result); + if (count++ % 1000 == 0) { + logger.info("Number processed: " + count); + } + } + logger.info("Total processed: " + count); + reader.close(); + writer.close(); + + return 0; + } + + private String getHeader() { + return "chr\tposition\tposition\tbed\tbed\tbed\ttotal reads\ttotal unmapped" + + "\ttotal mates unmapped\ttotal indels\ttotal mismatch reads\ttotal soft clips" + + "\ttotal hard clips\ttotal spliced reads\ttotal duplicates\tmismatch counts\tsplice lengths\n"; + } + + private String pileup(String chromosome, int start, int end) throws IOException { + SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFile, "silent"); + + SAMRecordIterator iterator = reader.queryOverlapping(chromosome, start, end); + + int totalReads = 0; + int totalMatesUnmapped = 0; + int totalUnmapped = 0; + int totalDuplicates = 0; + int totalMismatches = 0; + int totalSpliced = 0; + int totalSoftClips = 0; + int totalHardClips = 0; + int totalIndels = 0; + TreeMap spliceMap = new TreeMap(); + TreeMap mismatchMap = new TreeMap(); + + while (iterator.hasNext()) { + SAMRecord record = iterator.next(); + if (record.getReadUnmappedFlag()) { + totalUnmapped++; + } else { + totalReads++; + if (record.getDuplicateReadFlag()) { + totalDuplicates++; + } else { + + if (record.getMateUnmappedFlag()) { + totalMatesUnmapped++; + } + + //cigars + Cigar cigar = record.getCigar(); + + for (CigarElement ce : cigar.getCigarElements()) { + if (ce.getOperator().equals(CigarOperator.DELETION) || ce.getOperator().equals(CigarOperator.INSERTION)) { + totalIndels++; + } + + if (ce.getOperator().equals(CigarOperator.SOFT_CLIP)) { + totalSoftClips++; + } + if (ce.getOperator().equals(CigarOperator.HARD_CLIP)) { + totalHardClips++; + } + if (ce.getOperator().equals(CigarOperator.N)) { + totalSpliced++; + Integer length = new Integer(ce.getLength()); + int count = 1; + if (spliceMap.containsKey(length)) { + count += spliceMap.get(length); + } + spliceMap.put(length, count); + } + } + + //MD tag + String mdData = (String) record.getAttribute("MD"); + int matches = tallyMDMismatches(mdData); + if (matches > 0) { + totalMismatches++; + } + int count = 1; + if (mismatchMap.containsKey(matches)) { + count += mismatchMap.get(matches); + } + mismatchMap.put(matches, count); + + } + } + + } + + iterator.close(); + reader.close(); + + String spliceCounts = getMapString(spliceMap); + String mismatchCounts = getMapString(mismatchMap); + + String result = totalReads + "\t" + totalUnmapped + "\t" + totalMatesUnmapped + "\t" + totalIndels + "\t" + + totalMismatches + "\t" + totalSoftClips + "\t" + totalHardClips + "\t" + totalSpliced + "\t" + totalDuplicates + + "\t" + mismatchCounts + "\t" + spliceCounts; + return result; + } + + private String getMapString(TreeMap map) { + StringBuilder sb = new StringBuilder(); + + for (Entry entry: map.entrySet()) { + sb.append(entry.getKey() + ":" + entry.getValue() + ";"); + } + + return sb.toString(); + } + + public int tallyMDMismatches(String mdData) { + int count = 0; + if (null != mdData) { + for (int i = 0, size = mdData.length() ; i < size ; ) { + char c = mdData.charAt(i); + if (isValidMismatch(c)) { + count++; + i++; + } else if ('^' == c) { + while (++i < size && Character.isLetter(mdData.charAt(i))) {} + } else i++; // need to increment this or could end up with infinite loop... + } + } + return count; + } + + private boolean isValidMismatch(char c) { + return c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N'; + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(PileupStats.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("PileupStats", PileupStats.class.getPackage().getImplementationVersion(), args); + + // get list of file names + String[] cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 2) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + String[] cmdLineOutputFiles = options.getOutputFileNames(); + if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); + } + + for (String file : cmdLineOutputFiles) { + if (new File(file).exists() && !new File(file).isDirectory()) { + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); + } + } + + bamFile = new File(cmdLineInputFiles[0]); + inputFile = new File(cmdLineInputFiles[1]); + outputFile = new File(cmdLineOutputFiles[0]); + logger.info("Bam file: " + bamFile); + logger.info("Input file: " + inputFile); + logger.info("Output file: " + outputFile); + + } + + return returnStatus; + } + + public static void main(String[] args) throws Exception { + PileupStats sp = new PileupStats(); + sp.setup(args); + int exitStatus = sp.engage(); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } +} diff --git a/qmule/src/org/qcmg/qmule/QMuleException.java b/qmule/src/org/qcmg/qmule/QMuleException.java new file mode 100644 index 000000000..2e85e03f0 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/QMuleException.java @@ -0,0 +1,28 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +public final class QMuleException extends Exception { + private static final long serialVersionUID = -4575755996356751582L; + + public QMuleException(final String identifier) { + super(Messages.getMessage(identifier)); + } + + public QMuleException(final String identifier, final String argument) { + super(Messages.getMessage(identifier, argument)); + } + + public QMuleException(final String identifier, final String arg1, final String arg2) { + super(Messages.getMessage(identifier, arg1, arg2)); + } + + public QMuleException(final String identifier, final String arg1, final String arg2, final String arg3) { + super(Messages.getMessage(identifier, arg1, arg2, arg3)); + } + + public QMuleException(final String identifier, final Object[] arguments) { + super(Messages.getMessage(identifier, arguments)); + } +} diff --git a/qmule/src/org/qcmg/qmule/QMuleException.java-- b/qmule/src/org/qcmg/qmule/QMuleException.java-- new file mode 100644 index 000000000..2e85e03f0 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/QMuleException.java-- @@ -0,0 +1,28 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +public final class QMuleException extends Exception { + private static final long serialVersionUID = -4575755996356751582L; + + public QMuleException(final String identifier) { + super(Messages.getMessage(identifier)); + } + + public QMuleException(final String identifier, final String argument) { + super(Messages.getMessage(identifier, argument)); + } + + public QMuleException(final String identifier, final String arg1, final String arg2) { + super(Messages.getMessage(identifier, arg1, arg2)); + } + + public QMuleException(final String identifier, final String arg1, final String arg2, final String arg3) { + super(Messages.getMessage(identifier, arg1, arg2, arg3)); + } + + public QMuleException(final String identifier, final Object[] arguments) { + super(Messages.getMessage(identifier, arguments)); + } +} diff --git a/qmule/src/org/qcmg/qmule/QueryCADDLib.java b/qmule/src/org/qcmg/qmule/QueryCADDLib.java new file mode 100644 index 000000000..eece05fe3 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/QueryCADDLib.java @@ -0,0 +1,187 @@ +/** + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. +*/ +package org.qcmg.qmule; + + +import htsjdk.tribble.readers.TabixReader; + +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.header.VcfHeader; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.vcf.VCFFileReader; + + +public class QueryCADDLib { +// protected final static ArrayList libBlocks = new ArrayList<>(); +// protected final static ArrayList inputBlocks = new ArrayList<>(); +// protected final static ArrayList outputBlocks = new ArrayList<>(); + + protected final static Map positionRecordMap = new HashMap(); + protected static long outputNo = 0; + protected static long blockNo = 0; + protected static long inputNo = 0; + final String CADD = "CADD"; + + public QueryCADDLib(final String input_gzip_file, final String vcf, final String output, final int gap) throws IOException{ + + TabixReader tabix = new TabixReader( input_gzip_file); + String chr = null; + int pos = 0; + int start = -1; + + System.out.println("Below is the stats for each queried block, follow the format \norder: query(ref,start,end) [CADDLibBlockSize, inputVariantNo, outputVariantNo, runtime]"); + + try (VCFFileReader reader = new VCFFileReader(vcf); + FileWriter writer = new FileWriter(new File(output))) { + for (final VcfRecord re : reader){ + if(re.getChromosome().equals(chr) && + (re.getPosition() - pos) < gap ){ + pos = re.getPosition(); + add2Map(re); + }else{ + //s1: query(chr:start:pos), and output + if(chr != null){ + if(chr.startsWith("chr")) chr = chr.substring(3); + TabixReader.Iterator it = tabix.query(chr, start-1, pos); + //debug + System.out.print(String.format("%8d: query(%s, %8d, %8d) ", blockNo++, chr, start, pos)); + query( it, writer ); + + } + //s2: reset +// //debug bf clear +// for( Entry entry: positionRecordMap.entrySet()){ +// if(entry.getValue().getFilter() == null) +// System.out.println(entry.getValue().toString()); +// } + + positionRecordMap.clear(); + chr = re.getChromosome(); + start = re.getPosition(); + pos = re.getPosition(); + add2Map(re); + } + } + //last block + if(chr != null){ + if(chr.startsWith("chr")) chr = chr.substring(3); + TabixReader.Iterator it = tabix.query(chr, start, pos); + query( it, writer ); + } + + }//end try + + System.out.println("total input variants is " + inputNo); + System.out.println("total outputed and annotated variants is " + outputNo); + System.out.println("total query CADD library time is " + blockNo); + + } + + /** + * it remove "chr" string from reference name if exists + * @param re input vcf record + */ + private void add2Map(VcfRecord re){ + ChrPosition chr = re.getChrPosition(); + if(chr.getChromosome().startsWith("chr")) + chr = new ChrRangePosition(re.getChromosome().substring(3), re.getChrPosition().getStartPosition(), re.getChrPosition().getEndPosition()); // orig.getChromosome().substring(3); + + + re.setFilter(null); //for debug + positionRecordMap.put(chr, re); + } + + + private void query(TabixReader.Iterator it,FileWriter writer ) throws IOException{ + long startTime = System.currentTimeMillis(); + + String line; + String[] eles; + String last = null; + + int blockSize = 0; + int outputSize = 0; + + while(( line = it.next())!= null){ + blockSize ++; + eles = TabTokenizer.tokenize(line, '\t'); + int s = Integer.parseInt(eles[1]); //start position = second column + int e = s + eles[2].length() - 1; //start position + length -1 + + //only retrive the first annotation entry from CADD library + String entry = eles[0] + ":" + eles[1] + ":" +eles[2]+ ":" + eles[4]; + if(entry.equals(last)) continue; + else last = entry; + + VcfRecord inputVcf = positionRecordMap.get(new ChrRangePosition(eles[0], s, e )); + + if ( (null == inputVcf) || !inputVcf.getRef().equalsIgnoreCase(eles[2])) continue; + + String[] allels = {inputVcf.getAlt()}; + if(inputVcf.getAlt().contains(",")) + allels = TabTokenizer.tokenize(inputVcf.getAlt(), ','); + + String cadd = ""; + + //it will exit loop once find the matched allele + for(String al : allels) + if(al.equalsIgnoreCase(eles[4])){ + cadd = String.format("(%s=>%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s)", eles[2],eles[4],eles[8],eles[10],eles[11],eles[12],eles[17], + eles[21],eles[26],eles[35],eles[39],eles[72],eles[82],eles[83],eles[86],eles[92],eles[92],eles[93],eles[96]); + String info = inputVcf.getInfoRecord().getField(CADD); + info = (info == null)? CADD + "=" + cadd : CADD + "=" + info + "," + cadd; + inputVcf.appendInfo( info); + + writer.append(inputVcf.toString() + "\n"); + outputSize ++; + } + } + + //get stats + long endTime = System.currentTimeMillis(); + String time = QLogger.getRunTime(startTime, endTime); + System.out.println(String.format("[ %8d,%8d,%8d, %s ] ", blockSize, positionRecordMap.size(), outputSize, time)); + inputNo += positionRecordMap.size(); + outputNo += outputSize; + } + + + public static void main(String[] args) { + + long startTime = System.currentTimeMillis(); + try{ + String gzlib = args[0]; + String input = args[1]; + String output = args[2]; + int gap = 1000; + if(args.length > 3) + gap = Integer.parseInt(args[3]); + + new QueryCADDLib(gzlib, input, output, gap); + + }catch(Exception e){ + e.printStackTrace(); + System.err.println("Usage: java -cp qmule-0.1pre.jar QueryCADDLib "); + } + + long endTime = System.currentTimeMillis(); + String time = QLogger.getRunTime(startTime, endTime); + System.out.println("run Time is " + time); + } +} + diff --git a/qmule/src/org/qcmg/qmule/QueryCADDLib.java-- b/qmule/src/org/qcmg/qmule/QueryCADDLib.java-- new file mode 100644 index 000000000..eece05fe3 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/QueryCADDLib.java-- @@ -0,0 +1,187 @@ +/** + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. +*/ +package org.qcmg.qmule; + + +import htsjdk.tribble.readers.TabixReader; + +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.header.VcfHeader; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.vcf.VCFFileReader; + + +public class QueryCADDLib { +// protected final static ArrayList libBlocks = new ArrayList<>(); +// protected final static ArrayList inputBlocks = new ArrayList<>(); +// protected final static ArrayList outputBlocks = new ArrayList<>(); + + protected final static Map positionRecordMap = new HashMap(); + protected static long outputNo = 0; + protected static long blockNo = 0; + protected static long inputNo = 0; + final String CADD = "CADD"; + + public QueryCADDLib(final String input_gzip_file, final String vcf, final String output, final int gap) throws IOException{ + + TabixReader tabix = new TabixReader( input_gzip_file); + String chr = null; + int pos = 0; + int start = -1; + + System.out.println("Below is the stats for each queried block, follow the format \norder: query(ref,start,end) [CADDLibBlockSize, inputVariantNo, outputVariantNo, runtime]"); + + try (VCFFileReader reader = new VCFFileReader(vcf); + FileWriter writer = new FileWriter(new File(output))) { + for (final VcfRecord re : reader){ + if(re.getChromosome().equals(chr) && + (re.getPosition() - pos) < gap ){ + pos = re.getPosition(); + add2Map(re); + }else{ + //s1: query(chr:start:pos), and output + if(chr != null){ + if(chr.startsWith("chr")) chr = chr.substring(3); + TabixReader.Iterator it = tabix.query(chr, start-1, pos); + //debug + System.out.print(String.format("%8d: query(%s, %8d, %8d) ", blockNo++, chr, start, pos)); + query( it, writer ); + + } + //s2: reset +// //debug bf clear +// for( Entry entry: positionRecordMap.entrySet()){ +// if(entry.getValue().getFilter() == null) +// System.out.println(entry.getValue().toString()); +// } + + positionRecordMap.clear(); + chr = re.getChromosome(); + start = re.getPosition(); + pos = re.getPosition(); + add2Map(re); + } + } + //last block + if(chr != null){ + if(chr.startsWith("chr")) chr = chr.substring(3); + TabixReader.Iterator it = tabix.query(chr, start, pos); + query( it, writer ); + } + + }//end try + + System.out.println("total input variants is " + inputNo); + System.out.println("total outputed and annotated variants is " + outputNo); + System.out.println("total query CADD library time is " + blockNo); + + } + + /** + * it remove "chr" string from reference name if exists + * @param re input vcf record + */ + private void add2Map(VcfRecord re){ + ChrPosition chr = re.getChrPosition(); + if(chr.getChromosome().startsWith("chr")) + chr = new ChrRangePosition(re.getChromosome().substring(3), re.getChrPosition().getStartPosition(), re.getChrPosition().getEndPosition()); // orig.getChromosome().substring(3); + + + re.setFilter(null); //for debug + positionRecordMap.put(chr, re); + } + + + private void query(TabixReader.Iterator it,FileWriter writer ) throws IOException{ + long startTime = System.currentTimeMillis(); + + String line; + String[] eles; + String last = null; + + int blockSize = 0; + int outputSize = 0; + + while(( line = it.next())!= null){ + blockSize ++; + eles = TabTokenizer.tokenize(line, '\t'); + int s = Integer.parseInt(eles[1]); //start position = second column + int e = s + eles[2].length() - 1; //start position + length -1 + + //only retrive the first annotation entry from CADD library + String entry = eles[0] + ":" + eles[1] + ":" +eles[2]+ ":" + eles[4]; + if(entry.equals(last)) continue; + else last = entry; + + VcfRecord inputVcf = positionRecordMap.get(new ChrRangePosition(eles[0], s, e )); + + if ( (null == inputVcf) || !inputVcf.getRef().equalsIgnoreCase(eles[2])) continue; + + String[] allels = {inputVcf.getAlt()}; + if(inputVcf.getAlt().contains(",")) + allels = TabTokenizer.tokenize(inputVcf.getAlt(), ','); + + String cadd = ""; + + //it will exit loop once find the matched allele + for(String al : allels) + if(al.equalsIgnoreCase(eles[4])){ + cadd = String.format("(%s=>%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s)", eles[2],eles[4],eles[8],eles[10],eles[11],eles[12],eles[17], + eles[21],eles[26],eles[35],eles[39],eles[72],eles[82],eles[83],eles[86],eles[92],eles[92],eles[93],eles[96]); + String info = inputVcf.getInfoRecord().getField(CADD); + info = (info == null)? CADD + "=" + cadd : CADD + "=" + info + "," + cadd; + inputVcf.appendInfo( info); + + writer.append(inputVcf.toString() + "\n"); + outputSize ++; + } + } + + //get stats + long endTime = System.currentTimeMillis(); + String time = QLogger.getRunTime(startTime, endTime); + System.out.println(String.format("[ %8d,%8d,%8d, %s ] ", blockSize, positionRecordMap.size(), outputSize, time)); + inputNo += positionRecordMap.size(); + outputNo += outputSize; + } + + + public static void main(String[] args) { + + long startTime = System.currentTimeMillis(); + try{ + String gzlib = args[0]; + String input = args[1]; + String output = args[2]; + int gap = 1000; + if(args.length > 3) + gap = Integer.parseInt(args[3]); + + new QueryCADDLib(gzlib, input, output, gap); + + }catch(Exception e){ + e.printStackTrace(); + System.err.println("Usage: java -cp qmule-0.1pre.jar QueryCADDLib "); + } + + long endTime = System.currentTimeMillis(); + String time = QLogger.getRunTime(startTime, endTime); + System.out.println("run Time is " + time); + } +} + diff --git a/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java b/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java new file mode 100644 index 000000000..86499809c --- /dev/null +++ b/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java @@ -0,0 +1,280 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileWriter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrPositionComparator; +import org.qcmg.common.string.StringUtils; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; +import org.qcmg.vcf.VCFFileReader; + +public class ReAnnotateDccWithDbSNP { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + private String header; + + private static QLogger logger; + + private Map dccs = new HashMap(); + + + public int engage() throws Exception { + + loadDccFile(); + + updateDBSnpData(); + + writeDCCOutput(); + + + return exitStatus; + } + + private void writeDCCOutput() throws Exception { + if ( ! StringUtils.isNullOrEmpty(cmdLineOutputFiles[0])) { + FileWriter writer = new FileWriter(new File(cmdLineOutputFiles[0])); + try { + //sort + List data = new ArrayList(dccs.keySet()); + Collections.sort(data, new ChrPositionComparator()); + + + writer.write(header + "\tdbSnpVer\n"); + + for (ChrPosition cp : data) { + String[] dcc = dccs.get(cp); + StringBuilder sb = new StringBuilder(); + for (String s : dcc) { + if (sb.length() > 0) sb.append('\t'); + sb.append(s); + } + writer.write(sb.toString() + '\n'); + } + + } finally { + writer.close(); + } + } + } + + + private void loadDccFile() throws Exception { + logger.info("Attempting to load dcc data"); + TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); + int count = 0; + try { + for (TabbedRecord rec : reader) { + if (++count == 1) { // header line + header = rec.getData(); + continue; + } + String[] params = TabTokenizer.tokenize(rec.getData()); + ChrPosition cp = ChrPointPosition.valueOf(params[4], Integer.parseInt(params[5])); + + // reset dbsnpid + params[20] = null; +// StringBuilder sb = new StringBuilder(); +// for (String s : params) { +// if (sb.length() > 0) sb.append('\t'); +// sb.append(s); +// } +// rec.setData(sb.toString()); + dccs.put(cp, params); + } + } finally { + reader.close(); + } + logger.info("Attempting to load dcc data - DONE with " + dccs.size() + " entries"); + } + + private void updateDBSnpData() throws Exception { + + VCFFileReader reader = new VCFFileReader(new File(cmdLineInputFiles[1])); + + int count = 0, multipleVersions = 0; + int pre30 = 0, thirty = 0, thirtyOne = 0, thirtyTwo = 0, thirtyThree = 0, thirtyFour = 0, thirtyFive = 0; + try { + for (VcfRecord dbSNPVcf : reader) { + if (++count % 1000000 == 0) + logger.info("hit " + count + " dbsnp records"); + + if ( ! StringUtils.doesStringContainSubString(dbSNPVcf.getInfo(), "VC=SNV", false)) continue; + // vcf dbSNP record chromosome does not contain "chr", whereas the positionRecordMap does - add + String[] params = dccs.get(ChrPointPosition.valueOf(dbSNPVcf.getChromosome(), dbSNPVcf.getPosition())); + if (null == params) continue; + + // if no dbsnp data - continue + String previousDBSnpValue = params[20]; + if ( ! StringUtils.isNullOrEmpty(previousDBSnpValue)) { + multipleVersions++; + continue; + } + +// logger.info("Resetting previousDBSnpValue of: " + previousDBSnpValue + " to " + dbSNPVcf.getId()); + + // only proceed if we have a SNP variant record + int startIndex = dbSNPVcf.getInfo().indexOf("dbSNPBuildID=") + 13; + int endIndex = dbSNPVcf.getInfo().indexOf(";" , startIndex); + String dbSnpVersion = dbSNPVcf.getInfo().substring(startIndex, endIndex); +// logger.info("dbsnp version = " + dbSnpVersion); + + int dbSnpVersionInt = Integer.parseInt(dbSnpVersion); + if (dbSnpVersionInt < 130) pre30++; + else if (dbSnpVersionInt == 130) thirty++; + else if (dbSnpVersionInt == 131) thirtyOne++; + else if (dbSnpVersionInt == 132) thirtyTwo++; + else if (dbSnpVersionInt == 133) thirtyThree++; + else if (dbSnpVersionInt == 134) thirtyFour++; + else if (dbSnpVersionInt == 135) thirtyFive++; + else if (dbSnpVersionInt > 135) logger.info("hmmm: " + dbSnpVersionInt); + + params[20] = dbSNPVcf.getId(); + params = Arrays.copyOf(params, params.length + 1); + params[params.length -1] = dbSnpVersion; + dccs.put(ChrPointPosition.valueOf(dbSNPVcf.getChromosome(), dbSNPVcf.getPosition()), params); + + +// GenotypeEnum tumour = snpRecord.getTumourGenotype(); +// //TODO should we continue if the tumour Genotype is null?? +// if (null == tumour) continue; +// +// // multiple dbSNP entries can exist for a position. +// // if we already have dbSNP info for this snp, check to see if the dbSNP alt is shorter than the existing dbSNP record +// // if so, proceed, and re-write dbSNP details (if applicable). +// int dbSNPAltLengh = dbSNPVcf.getAlt().length(); +// if (snpRecord.getDbSnpAltLength() > 0 && dbSNPAltLengh > snpRecord.getDbSnpAltLength()) { +// continue; +// } +// +// // deal with multiple alt bases +// String [] alts = null; +// if (dbSNPAltLengh == 1) { +// alts = new String[] {dbSNPVcf.getAlt()}; +// } else if (dbSNPAltLengh > 1){ +// alts = TabTokenizer.tokenize(dbSNPVcf.getAlt(), ','); +// } +// +// if (null != alts) { +// for (String alt : alts) { +// +// GenotypeEnum dbSnpGenotype = BaseUtils.getGenotypeEnum(dbSNPVcf.getRef() + alt); +// if (null == dbSnpGenotype) { +// logger.warn("Couldn't get Genotype from dbSNP position with variant: " + alt); +// continue; +// } +//// // no longer flip the genotype as dbSNP is reporting on the +ve strand +////// if (reverseStrand) { +////// dbSnpGenotype = dbSnpGenotype.getComplement(); +////// } +// if (tumour == dbSnpGenotype || (tumour.isHomozygous() && dbSnpGenotype.containsAllele(tumour.getFirstAllele()))) { +// boolean reverseStrand = StringUtils.doesStringContainSubString(dbSNPVcf.getInfo(), "RV", false); +//// boolean reverseStrand = VcfUtils.isDbSNPVcfRecordOnReverseStrand(dbSNPVcf.getInfo()); +// snpRecord.setDbSnpStrand(reverseStrand ? '-' : '+'); +// snpRecord.setDbSnpId(dbSNPVcf.getId()); +// snpRecord.setDbSnpGenotype(dbSnpGenotype); +// snpRecord.setDbSnpAltLength(dbSNPAltLengh); +// break; +// } +// } +// } + } + } finally { + reader.close(); + } + logger.info("STATS:"); + logger.info("No of dcc records with dbSNP version of pre 130: " + pre30); + logger.info("No of dcc records with dbSNP version of 130: " + thirty); + logger.info("No of dcc records with dbSNP version of 131: " + thirtyOne); + logger.info("No of dcc records with dbSNP version of 132: " + thirtyTwo); + logger.info("No of dcc records with dbSNP version of 133: " + thirtyThree); + logger.info("No of dcc records with dbSNP version of 134: " + thirtyFour); + logger.info("No of dcc records with dbSNP version of 135: " + thirtyFive); + logger.info("No of dcc records with duplicate dbSNP versions : " + multipleVersions); + logger.info("Total no of dcc records with dbSNP data : " + (pre30 + thirty + thirtyOne + thirtyTwo + thirtyThree + thirtyFour + thirtyFive)); + } + + public static void main(String[] args) throws Exception { + ReAnnotateDccWithDbSNP sp = new ReAnnotateDccWithDbSNP(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(ReAnnotateDccWithDbSNP.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("ReAnnotateDccWithDbSNP", ReAnnotateDccWithDbSNP.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- b/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- new file mode 100644 index 000000000..86499809c --- /dev/null +++ b/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- @@ -0,0 +1,280 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileWriter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrPositionComparator; +import org.qcmg.common.string.StringUtils; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; +import org.qcmg.vcf.VCFFileReader; + +public class ReAnnotateDccWithDbSNP { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + private String header; + + private static QLogger logger; + + private Map dccs = new HashMap(); + + + public int engage() throws Exception { + + loadDccFile(); + + updateDBSnpData(); + + writeDCCOutput(); + + + return exitStatus; + } + + private void writeDCCOutput() throws Exception { + if ( ! StringUtils.isNullOrEmpty(cmdLineOutputFiles[0])) { + FileWriter writer = new FileWriter(new File(cmdLineOutputFiles[0])); + try { + //sort + List data = new ArrayList(dccs.keySet()); + Collections.sort(data, new ChrPositionComparator()); + + + writer.write(header + "\tdbSnpVer\n"); + + for (ChrPosition cp : data) { + String[] dcc = dccs.get(cp); + StringBuilder sb = new StringBuilder(); + for (String s : dcc) { + if (sb.length() > 0) sb.append('\t'); + sb.append(s); + } + writer.write(sb.toString() + '\n'); + } + + } finally { + writer.close(); + } + } + } + + + private void loadDccFile() throws Exception { + logger.info("Attempting to load dcc data"); + TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); + int count = 0; + try { + for (TabbedRecord rec : reader) { + if (++count == 1) { // header line + header = rec.getData(); + continue; + } + String[] params = TabTokenizer.tokenize(rec.getData()); + ChrPosition cp = ChrPointPosition.valueOf(params[4], Integer.parseInt(params[5])); + + // reset dbsnpid + params[20] = null; +// StringBuilder sb = new StringBuilder(); +// for (String s : params) { +// if (sb.length() > 0) sb.append('\t'); +// sb.append(s); +// } +// rec.setData(sb.toString()); + dccs.put(cp, params); + } + } finally { + reader.close(); + } + logger.info("Attempting to load dcc data - DONE with " + dccs.size() + " entries"); + } + + private void updateDBSnpData() throws Exception { + + VCFFileReader reader = new VCFFileReader(new File(cmdLineInputFiles[1])); + + int count = 0, multipleVersions = 0; + int pre30 = 0, thirty = 0, thirtyOne = 0, thirtyTwo = 0, thirtyThree = 0, thirtyFour = 0, thirtyFive = 0; + try { + for (VcfRecord dbSNPVcf : reader) { + if (++count % 1000000 == 0) + logger.info("hit " + count + " dbsnp records"); + + if ( ! StringUtils.doesStringContainSubString(dbSNPVcf.getInfo(), "VC=SNV", false)) continue; + // vcf dbSNP record chromosome does not contain "chr", whereas the positionRecordMap does - add + String[] params = dccs.get(ChrPointPosition.valueOf(dbSNPVcf.getChromosome(), dbSNPVcf.getPosition())); + if (null == params) continue; + + // if no dbsnp data - continue + String previousDBSnpValue = params[20]; + if ( ! StringUtils.isNullOrEmpty(previousDBSnpValue)) { + multipleVersions++; + continue; + } + +// logger.info("Resetting previousDBSnpValue of: " + previousDBSnpValue + " to " + dbSNPVcf.getId()); + + // only proceed if we have a SNP variant record + int startIndex = dbSNPVcf.getInfo().indexOf("dbSNPBuildID=") + 13; + int endIndex = dbSNPVcf.getInfo().indexOf(";" , startIndex); + String dbSnpVersion = dbSNPVcf.getInfo().substring(startIndex, endIndex); +// logger.info("dbsnp version = " + dbSnpVersion); + + int dbSnpVersionInt = Integer.parseInt(dbSnpVersion); + if (dbSnpVersionInt < 130) pre30++; + else if (dbSnpVersionInt == 130) thirty++; + else if (dbSnpVersionInt == 131) thirtyOne++; + else if (dbSnpVersionInt == 132) thirtyTwo++; + else if (dbSnpVersionInt == 133) thirtyThree++; + else if (dbSnpVersionInt == 134) thirtyFour++; + else if (dbSnpVersionInt == 135) thirtyFive++; + else if (dbSnpVersionInt > 135) logger.info("hmmm: " + dbSnpVersionInt); + + params[20] = dbSNPVcf.getId(); + params = Arrays.copyOf(params, params.length + 1); + params[params.length -1] = dbSnpVersion; + dccs.put(ChrPointPosition.valueOf(dbSNPVcf.getChromosome(), dbSNPVcf.getPosition()), params); + + +// GenotypeEnum tumour = snpRecord.getTumourGenotype(); +// //TODO should we continue if the tumour Genotype is null?? +// if (null == tumour) continue; +// +// // multiple dbSNP entries can exist for a position. +// // if we already have dbSNP info for this snp, check to see if the dbSNP alt is shorter than the existing dbSNP record +// // if so, proceed, and re-write dbSNP details (if applicable). +// int dbSNPAltLengh = dbSNPVcf.getAlt().length(); +// if (snpRecord.getDbSnpAltLength() > 0 && dbSNPAltLengh > snpRecord.getDbSnpAltLength()) { +// continue; +// } +// +// // deal with multiple alt bases +// String [] alts = null; +// if (dbSNPAltLengh == 1) { +// alts = new String[] {dbSNPVcf.getAlt()}; +// } else if (dbSNPAltLengh > 1){ +// alts = TabTokenizer.tokenize(dbSNPVcf.getAlt(), ','); +// } +// +// if (null != alts) { +// for (String alt : alts) { +// +// GenotypeEnum dbSnpGenotype = BaseUtils.getGenotypeEnum(dbSNPVcf.getRef() + alt); +// if (null == dbSnpGenotype) { +// logger.warn("Couldn't get Genotype from dbSNP position with variant: " + alt); +// continue; +// } +//// // no longer flip the genotype as dbSNP is reporting on the +ve strand +////// if (reverseStrand) { +////// dbSnpGenotype = dbSnpGenotype.getComplement(); +////// } +// if (tumour == dbSnpGenotype || (tumour.isHomozygous() && dbSnpGenotype.containsAllele(tumour.getFirstAllele()))) { +// boolean reverseStrand = StringUtils.doesStringContainSubString(dbSNPVcf.getInfo(), "RV", false); +//// boolean reverseStrand = VcfUtils.isDbSNPVcfRecordOnReverseStrand(dbSNPVcf.getInfo()); +// snpRecord.setDbSnpStrand(reverseStrand ? '-' : '+'); +// snpRecord.setDbSnpId(dbSNPVcf.getId()); +// snpRecord.setDbSnpGenotype(dbSnpGenotype); +// snpRecord.setDbSnpAltLength(dbSNPAltLengh); +// break; +// } +// } +// } + } + } finally { + reader.close(); + } + logger.info("STATS:"); + logger.info("No of dcc records with dbSNP version of pre 130: " + pre30); + logger.info("No of dcc records with dbSNP version of 130: " + thirty); + logger.info("No of dcc records with dbSNP version of 131: " + thirtyOne); + logger.info("No of dcc records with dbSNP version of 132: " + thirtyTwo); + logger.info("No of dcc records with dbSNP version of 133: " + thirtyThree); + logger.info("No of dcc records with dbSNP version of 134: " + thirtyFour); + logger.info("No of dcc records with dbSNP version of 135: " + thirtyFive); + logger.info("No of dcc records with duplicate dbSNP versions : " + multipleVersions); + logger.info("Total no of dcc records with dbSNP data : " + (pre30 + thirty + thirtyOne + thirtyTwo + thirtyThree + thirtyFour + thirtyFive)); + } + + public static void main(String[] args) throws Exception { + ReAnnotateDccWithDbSNP sp = new ReAnnotateDccWithDbSNP(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(ReAnnotateDccWithDbSNP.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("ReAnnotateDccWithDbSNP", ReAnnotateDccWithDbSNP.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/ReadPartGZFile.java b/qmule/src/org/qcmg/qmule/ReadPartGZFile.java new file mode 100644 index 000000000..ee8018ccc --- /dev/null +++ b/qmule/src/org/qcmg/qmule/ReadPartGZFile.java @@ -0,0 +1,152 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import htsjdk.tribble.readers.TabixReader; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.HashSet; +import java.util.Set; +import java.util.zip.GZIPInputStream; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.util.FileUtils; +import org.qcmg.vcf.VCFSerializer; + + +public class ReadPartGZFile { + +// static InputStream getInputStream(File input_gzip_file) throws FileNotFoundException, IOException{ +// InputStream inputStream; +// // if (FileUtils.isFileGZip(input_gzip_file)) { +// if (FileUtils.isInputGZip(input_gzip_file)) { +// GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(input_gzip_file)); +// try(InputStreamReader streamReader = new InputStreamReader(gzis)){ +// inputStream = new GZIPInputStream(new FileInputStream(input_gzip_file)); +// } +// } else { +// FileInputStream stream = new FileInputStream(input_gzip_file); +// try(InputStreamReader streamReader = new InputStreamReader(stream)){ +// BufferedReader in = new BufferedReader(streamReader); +// inputStream = new FileInputStream(input_gzip_file); +// } +// } +// return inputStream; +// } + + + + + ReadPartGZFile(File input_gzip_file, int no) throws Exception{ + + //get a new stream rather than a closed one + InputStream inputStream = FileUtils.isInputGZip( input_gzip_file) ? + new GZIPInputStream(new FileInputStream(input_gzip_file), 65536) : new FileInputStream(input_gzip_file); + + try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream) )){ + int num = 0; + String line; + while( (line = reader.readLine() ) != null){ + if( ++num > no) break; + System.out.println(line); + } + } + + + } + static void countLines(File input_gzip_file) throws FileNotFoundException, IOException, InterruptedException{ + HashSet uniqRef = new HashSet(); + + long startTime = System.currentTimeMillis(); + long num = 0; +// InputStream inputStream = getInputStream(input_gzip_file); + InputStream inputStream = FileUtils.isInputGZip( input_gzip_file) ? + new GZIPInputStream(new FileInputStream(input_gzip_file), 65536) : new FileInputStream(input_gzip_file); + + try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream) )){ + String line; + while( (line = reader.readLine() ) != null){ + uniqRef.add(line.split("\\t")[0]); + num ++; + } + } + + System.out.println(String.format("Read file: %s\nLine number: %d", input_gzip_file.getAbsoluteFile(), num)); + System.out.println("Uniq reference name are " + uniqRef ); + + + } + + static void countUniqPosition(String input_gzip_file, String indexFile) throws IOException{ + // TabixReader tabix = new TabixReader( input_gzip_file, indexFile); + TabixReader tabix = new TabixReader( input_gzip_file); + Set chrs = tabix.getChromosomes(); + HashSet uniqPos = new HashSet(); + long total_uniq = 0; + long num = 0; + System.out.println("total reference number is " + chrs.size() + " from " + input_gzip_file); + for(String str : chrs){ + + uniqPos.clear(); + TabixReader.Iterator it = tabix.query(str); + + + String line; + while(( line = it.next())!= null){ + // String[] eles = TabTokenizer.tokenize(line, '\t'); + // uniqPos.add(eles[1]); + // uniqPos.add(line.split("\\t")[1]); + num ++; + } + + //debug + System.out.println("There are " + num+ " position recorded in reference " + str); + num ++; + + + // total_uniq += uniqPos.size(); + // System.out.println("There are " + uniqPos.size() + " uniq position recorded in reference " + str); + + } + +// System.out.println("Total uniq position recorded in all reference is " + total_uniq); +// System.out.println("Total records in whole file is " + num); + + } + + public static void main(String[] args) { + try{ + long startTime = System.currentTimeMillis(); + File input = new File(args[0]); + int no = Integer.parseInt(args[1]); + + if(no > 0) + new ReadPartGZFile(input, no ); + else if (no == 0) + countUniqPosition(args[0], null); + else + countLines(input); + + long endTime = System.currentTimeMillis(); + String time = QLogger.getRunTime(startTime, endTime); + System.out.println("run Time is " + time); + + }catch(Exception e){ + e.printStackTrace(); + //System.out.println(e.printStackTrace();); + System.err.println("Usage: java -cp qmule-0.1pre.jar ReadPartGZFile "); + + } + + } +} diff --git a/qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- b/qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- new file mode 100644 index 000000000..ee8018ccc --- /dev/null +++ b/qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- @@ -0,0 +1,152 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import htsjdk.tribble.readers.TabixReader; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.HashSet; +import java.util.Set; +import java.util.zip.GZIPInputStream; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.util.FileUtils; +import org.qcmg.vcf.VCFSerializer; + + +public class ReadPartGZFile { + +// static InputStream getInputStream(File input_gzip_file) throws FileNotFoundException, IOException{ +// InputStream inputStream; +// // if (FileUtils.isFileGZip(input_gzip_file)) { +// if (FileUtils.isInputGZip(input_gzip_file)) { +// GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(input_gzip_file)); +// try(InputStreamReader streamReader = new InputStreamReader(gzis)){ +// inputStream = new GZIPInputStream(new FileInputStream(input_gzip_file)); +// } +// } else { +// FileInputStream stream = new FileInputStream(input_gzip_file); +// try(InputStreamReader streamReader = new InputStreamReader(stream)){ +// BufferedReader in = new BufferedReader(streamReader); +// inputStream = new FileInputStream(input_gzip_file); +// } +// } +// return inputStream; +// } + + + + + ReadPartGZFile(File input_gzip_file, int no) throws Exception{ + + //get a new stream rather than a closed one + InputStream inputStream = FileUtils.isInputGZip( input_gzip_file) ? + new GZIPInputStream(new FileInputStream(input_gzip_file), 65536) : new FileInputStream(input_gzip_file); + + try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream) )){ + int num = 0; + String line; + while( (line = reader.readLine() ) != null){ + if( ++num > no) break; + System.out.println(line); + } + } + + + } + static void countLines(File input_gzip_file) throws FileNotFoundException, IOException, InterruptedException{ + HashSet uniqRef = new HashSet(); + + long startTime = System.currentTimeMillis(); + long num = 0; +// InputStream inputStream = getInputStream(input_gzip_file); + InputStream inputStream = FileUtils.isInputGZip( input_gzip_file) ? + new GZIPInputStream(new FileInputStream(input_gzip_file), 65536) : new FileInputStream(input_gzip_file); + + try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream) )){ + String line; + while( (line = reader.readLine() ) != null){ + uniqRef.add(line.split("\\t")[0]); + num ++; + } + } + + System.out.println(String.format("Read file: %s\nLine number: %d", input_gzip_file.getAbsoluteFile(), num)); + System.out.println("Uniq reference name are " + uniqRef ); + + + } + + static void countUniqPosition(String input_gzip_file, String indexFile) throws IOException{ + // TabixReader tabix = new TabixReader( input_gzip_file, indexFile); + TabixReader tabix = new TabixReader( input_gzip_file); + Set chrs = tabix.getChromosomes(); + HashSet uniqPos = new HashSet(); + long total_uniq = 0; + long num = 0; + System.out.println("total reference number is " + chrs.size() + " from " + input_gzip_file); + for(String str : chrs){ + + uniqPos.clear(); + TabixReader.Iterator it = tabix.query(str); + + + String line; + while(( line = it.next())!= null){ + // String[] eles = TabTokenizer.tokenize(line, '\t'); + // uniqPos.add(eles[1]); + // uniqPos.add(line.split("\\t")[1]); + num ++; + } + + //debug + System.out.println("There are " + num+ " position recorded in reference " + str); + num ++; + + + // total_uniq += uniqPos.size(); + // System.out.println("There are " + uniqPos.size() + " uniq position recorded in reference " + str); + + } + +// System.out.println("Total uniq position recorded in all reference is " + total_uniq); +// System.out.println("Total records in whole file is " + num); + + } + + public static void main(String[] args) { + try{ + long startTime = System.currentTimeMillis(); + File input = new File(args[0]); + int no = Integer.parseInt(args[1]); + + if(no > 0) + new ReadPartGZFile(input, no ); + else if (no == 0) + countUniqPosition(args[0], null); + else + countLines(input); + + long endTime = System.currentTimeMillis(); + String time = QLogger.getRunTime(startTime, endTime); + System.out.println("run Time is " + time); + + }catch(Exception e){ + e.printStackTrace(); + //System.out.println(e.printStackTrace();); + System.err.println("Usage: java -cp qmule-0.1pre.jar ReadPartGZFile "); + + } + + } +} diff --git a/qmule/src/org/qcmg/qmule/ReadsAppend.java b/qmule/src/org/qcmg/qmule/ReadsAppend.java new file mode 100644 index 000000000..4c2ce5fab --- /dev/null +++ b/qmule/src/org/qcmg/qmule/ReadsAppend.java @@ -0,0 +1,95 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.IOException; + +import htsjdk.samtools.*; + +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.List; + +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.SAMOrBAMWriterFactory; + + +public class ReadsAppend { + File[] inputs; + File output; + + + ReadsAppend(File output, File[] inputs ) throws Exception{ + this.output = output; + this.inputs = inputs; + merging(); + } + + /** + * retrive the CS and CQ value from BAM record to output csfasta or qual file + * @throws Exception + */ + void merging() throws Exception{ + System.out.println("start time : " + getTime()); + + List readers = new ArrayList<>(); + for (File f: inputs) { + readers.add( SAMFileReaderFactory.createSAMFileReader(f)); + } + + SAMFileHeader header = readers.get(0).getFileHeader().clone(); + + SAMOrBAMWriterFactory factory = new SAMOrBAMWriterFactory(header, true, output,2000000 ); + SAMFileWriter writer = factory.getWriter(); + + for( SamReader reader : readers){ + for( SAMRecord record : reader) { + writer.addAlignment(record); + } + reader.close(); + } + + factory.closeWriter(); + System.out.println("end time : " + getTime()); + System.exit(0); + } + + + private String getTime(){ + Calendar currentDate = Calendar.getInstance(); + SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); + return "[" + formatter.format(currentDate.getTime()) + "]"; + } + public static void main(final String[] args) throws IOException, InterruptedException { + + try{ + if(args.length < 2) + throw new Exception("missing inputs or outputs name"); + + File output = new File(args[0]); + File[] inputs = new File[args.length-1]; + for (int i = 1; i < args.length; i++) { + inputs[i-1] = new File(args[i]) ; + + System.out.println(inputs[i-1].toString()); + } + + + new ReadsAppend(output, inputs ); + + System.exit(0); + }catch(Exception e){ + System.err.println(e.toString()); + Thread.sleep(1); + System.out.println("usage: qmule org.qcmg.qmule.ReadsAppend "); + System.exit(1); + } + + } +} diff --git a/qmule/src/org/qcmg/qmule/ReadsAppend.java-- b/qmule/src/org/qcmg/qmule/ReadsAppend.java-- new file mode 100644 index 000000000..4c2ce5fab --- /dev/null +++ b/qmule/src/org/qcmg/qmule/ReadsAppend.java-- @@ -0,0 +1,95 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.IOException; + +import htsjdk.samtools.*; + +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.List; + +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.SAMOrBAMWriterFactory; + + +public class ReadsAppend { + File[] inputs; + File output; + + + ReadsAppend(File output, File[] inputs ) throws Exception{ + this.output = output; + this.inputs = inputs; + merging(); + } + + /** + * retrive the CS and CQ value from BAM record to output csfasta or qual file + * @throws Exception + */ + void merging() throws Exception{ + System.out.println("start time : " + getTime()); + + List readers = new ArrayList<>(); + for (File f: inputs) { + readers.add( SAMFileReaderFactory.createSAMFileReader(f)); + } + + SAMFileHeader header = readers.get(0).getFileHeader().clone(); + + SAMOrBAMWriterFactory factory = new SAMOrBAMWriterFactory(header, true, output,2000000 ); + SAMFileWriter writer = factory.getWriter(); + + for( SamReader reader : readers){ + for( SAMRecord record : reader) { + writer.addAlignment(record); + } + reader.close(); + } + + factory.closeWriter(); + System.out.println("end time : " + getTime()); + System.exit(0); + } + + + private String getTime(){ + Calendar currentDate = Calendar.getInstance(); + SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); + return "[" + formatter.format(currentDate.getTime()) + "]"; + } + public static void main(final String[] args) throws IOException, InterruptedException { + + try{ + if(args.length < 2) + throw new Exception("missing inputs or outputs name"); + + File output = new File(args[0]); + File[] inputs = new File[args.length-1]; + for (int i = 1; i < args.length; i++) { + inputs[i-1] = new File(args[i]) ; + + System.out.println(inputs[i-1].toString()); + } + + + new ReadsAppend(output, inputs ); + + System.exit(0); + }catch(Exception e){ + System.err.println(e.toString()); + Thread.sleep(1); + System.out.println("usage: qmule org.qcmg.qmule.ReadsAppend "); + System.exit(1); + } + + } +} diff --git a/qmule/src/org/qcmg/qmule/RunGatk.java b/qmule/src/org/qcmg/qmule/RunGatk.java new file mode 100644 index 000000000..b2e13458d --- /dev/null +++ b/qmule/src/org/qcmg/qmule/RunGatk.java @@ -0,0 +1,141 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + + +public class RunGatk { + +// public static String PATH="/panfs/home/oholmes/devel/QCMGScripts/o.holmes/gatk/pbs4java/"; +// public static final String PARAMS=" -l walltime=124:00:00 -v patient="; +// public static int jobCounter = 1; +// +// // inputs +// public static String patientId; +// public static String mixture; +// public static String normalBamFile; +// public static String tumourBamFile; +// public static String outputDir; +// +// public static String patientParams; +// public static String nodeName; +// public static String startPoint; +// +// public static void main(String[] args) throws IOException, InterruptedException, Exception { +// +// if (args.length < 5) throw new IllegalArgumentException("USAGE: RunGatk []"); +// +// patientId = args[0]; +// mixture = args[1]; +// normalBamFile = args[2]; +// tumourBamFile = args[3]; +// outputDir = args[4]; +// if (args.length == 6) { +// PATH = args[5]; +// } +// if (args.length == 7) { +// PATH = args[6]; +// } +// +// patientParams = PARAMS + patientId + ",mixture=" + mixture; +// +// String mergeParams = patientParams + ",normalBam=" + normalBamFile + ",tumourBam=" + tumourBamFile; +// +// +// String jobName = jobCounter++ + "RG_" + mixture; +// System.out.println("About to submit merge job"); +// +// Job merge = new Job(jobName, PATH + "run_gatk_merge_1.sh" + mergeParams); +//// merge.setQueue(queue); +// merge.queue(); +// String status = merge.getStatus(); +// System.out.println("1st job status: " + status); +// while ("N/A".equals(status)) { +// Thread.sleep(1500); +// String [] jobs = Job.SearchJobsByName(jobName, true); +// System.out.println("Sleeping till job status changes..." + status + ", id: " + merge.getId() + " no of jobs: " + jobs.length); +// +// for (int i = 0 ; i < jobs.length ; i++) { +// System.out.println("jobs[" + i + "] : " + jobs[i]); +// merge = Job.getJobById(jobs[i]); +// status = merge.getStatus(); +// System.out.println("job.getJobStatus: " + Job.getJobStatus(jobs[i])); +// +// } +// } +// nodeName = merge.getExecuteNode().substring(0, merge.getExecuteNode().indexOf('/')); +// +// +// +// System.out.println("About to submit clean 1 job"); +// // clean 1 +// String script = PATH + "run_gatk_clean_1.sh" + patientParams; +// Job clean1 = submitDependantJob(merge, "1", script, true); +// +// +// System.out.println("About to submit clean 2 job"); +// // clean 2 +// script = PATH + "run_gatk_clean_2.sh" + patientParams; +// Job clean2 = submitDependantJob(clean1, "1", script, true); +// +// // clean 3 +// script = PATH + "run_gatk_clean_3.sh" + patientParams; +// Job clean3 = submitDependantJob(clean2, "6", script, true); +// +//// String scriptToRun = PATH + "run_gatk_clean_4.sh" + patientParams; +// +// System.out.println("About to submit clean 4 job"); +// script = PATH + "run_gatk_clean_4.sh" + patientParams; +// Job clean4 = submitDependantJob(clean3, "1", script, true); +// +// // split +// System.out.println("About to submit split job"); +// script = PATH + "run_gatk_split.sh" + patientParams; +// Job split = submitDependantJob(clean4, "1", script, true); +// +// runMergeDelUG(split, "ND"); +// runMergeDelUG(split, "TD"); +// } +// +// private static void runMergeDelUG(Job splitJob, String type) throws IOException, InterruptedException, Exception { +// String script = PATH + "run_gatk_merge_2.sh" + patientParams + ",type=" + type; +// Job mergeJob = submitDependantJob(splitJob, "1", script, true); +// +// // delete +// script = PATH + "run_gatk_del_split_files.sh" + patientParams + ",type=" + type; +// Job deleteJob = submitDependantJob(mergeJob, "1", script, true); +// +// +// // UG +// script = PATH + "run_gatk_UG.sh" + patientParams + ",type=" + type; +// Job unifiedGenotyperJob = submitDependantJob(mergeJob, "4", script, false); +// +// } +// +// private static Job submitDependantJob(Job depJob, String ppn, String script, boolean onNode) throws IOException, InterruptedException, Exception { +// +// String jobName; +// ArrayList dependantJobs; +// String[] jobs; +// jobName = jobCounter++ + "RG_" + mixture; +// Job newJob = new Job(jobName, script); +//// Job newJob = new Job(jobName, PATH + script + patientParams + ",type=" + type); +//// newJob.setQueue(queue); +// if (onNode) { +// newJob.setExecuteNode(nodeName); +// newJob.setNodes(nodeName); +// } +// newJob.setPpn(ppn); +// dependantJobs = new ArrayList(); +// dependantJobs.add(depJob.getId() + " "); +// newJob.setAfterOK(dependantJobs); +// newJob.queue(); +// // sleep to allow job to make it to the queue +// Thread.sleep(1000); +// +// jobs = Job.SearchJobsByName(jobName, true); +// newJob = Job.getJobById(jobs[0]); +// return newJob; +// } + +} diff --git a/qmule/src/org/qcmg/qmule/RunGatk.java-- b/qmule/src/org/qcmg/qmule/RunGatk.java-- new file mode 100644 index 000000000..b2e13458d --- /dev/null +++ b/qmule/src/org/qcmg/qmule/RunGatk.java-- @@ -0,0 +1,141 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + + +public class RunGatk { + +// public static String PATH="/panfs/home/oholmes/devel/QCMGScripts/o.holmes/gatk/pbs4java/"; +// public static final String PARAMS=" -l walltime=124:00:00 -v patient="; +// public static int jobCounter = 1; +// +// // inputs +// public static String patientId; +// public static String mixture; +// public static String normalBamFile; +// public static String tumourBamFile; +// public static String outputDir; +// +// public static String patientParams; +// public static String nodeName; +// public static String startPoint; +// +// public static void main(String[] args) throws IOException, InterruptedException, Exception { +// +// if (args.length < 5) throw new IllegalArgumentException("USAGE: RunGatk []"); +// +// patientId = args[0]; +// mixture = args[1]; +// normalBamFile = args[2]; +// tumourBamFile = args[3]; +// outputDir = args[4]; +// if (args.length == 6) { +// PATH = args[5]; +// } +// if (args.length == 7) { +// PATH = args[6]; +// } +// +// patientParams = PARAMS + patientId + ",mixture=" + mixture; +// +// String mergeParams = patientParams + ",normalBam=" + normalBamFile + ",tumourBam=" + tumourBamFile; +// +// +// String jobName = jobCounter++ + "RG_" + mixture; +// System.out.println("About to submit merge job"); +// +// Job merge = new Job(jobName, PATH + "run_gatk_merge_1.sh" + mergeParams); +//// merge.setQueue(queue); +// merge.queue(); +// String status = merge.getStatus(); +// System.out.println("1st job status: " + status); +// while ("N/A".equals(status)) { +// Thread.sleep(1500); +// String [] jobs = Job.SearchJobsByName(jobName, true); +// System.out.println("Sleeping till job status changes..." + status + ", id: " + merge.getId() + " no of jobs: " + jobs.length); +// +// for (int i = 0 ; i < jobs.length ; i++) { +// System.out.println("jobs[" + i + "] : " + jobs[i]); +// merge = Job.getJobById(jobs[i]); +// status = merge.getStatus(); +// System.out.println("job.getJobStatus: " + Job.getJobStatus(jobs[i])); +// +// } +// } +// nodeName = merge.getExecuteNode().substring(0, merge.getExecuteNode().indexOf('/')); +// +// +// +// System.out.println("About to submit clean 1 job"); +// // clean 1 +// String script = PATH + "run_gatk_clean_1.sh" + patientParams; +// Job clean1 = submitDependantJob(merge, "1", script, true); +// +// +// System.out.println("About to submit clean 2 job"); +// // clean 2 +// script = PATH + "run_gatk_clean_2.sh" + patientParams; +// Job clean2 = submitDependantJob(clean1, "1", script, true); +// +// // clean 3 +// script = PATH + "run_gatk_clean_3.sh" + patientParams; +// Job clean3 = submitDependantJob(clean2, "6", script, true); +// +//// String scriptToRun = PATH + "run_gatk_clean_4.sh" + patientParams; +// +// System.out.println("About to submit clean 4 job"); +// script = PATH + "run_gatk_clean_4.sh" + patientParams; +// Job clean4 = submitDependantJob(clean3, "1", script, true); +// +// // split +// System.out.println("About to submit split job"); +// script = PATH + "run_gatk_split.sh" + patientParams; +// Job split = submitDependantJob(clean4, "1", script, true); +// +// runMergeDelUG(split, "ND"); +// runMergeDelUG(split, "TD"); +// } +// +// private static void runMergeDelUG(Job splitJob, String type) throws IOException, InterruptedException, Exception { +// String script = PATH + "run_gatk_merge_2.sh" + patientParams + ",type=" + type; +// Job mergeJob = submitDependantJob(splitJob, "1", script, true); +// +// // delete +// script = PATH + "run_gatk_del_split_files.sh" + patientParams + ",type=" + type; +// Job deleteJob = submitDependantJob(mergeJob, "1", script, true); +// +// +// // UG +// script = PATH + "run_gatk_UG.sh" + patientParams + ",type=" + type; +// Job unifiedGenotyperJob = submitDependantJob(mergeJob, "4", script, false); +// +// } +// +// private static Job submitDependantJob(Job depJob, String ppn, String script, boolean onNode) throws IOException, InterruptedException, Exception { +// +// String jobName; +// ArrayList dependantJobs; +// String[] jobs; +// jobName = jobCounter++ + "RG_" + mixture; +// Job newJob = new Job(jobName, script); +//// Job newJob = new Job(jobName, PATH + script + patientParams + ",type=" + type); +//// newJob.setQueue(queue); +// if (onNode) { +// newJob.setExecuteNode(nodeName); +// newJob.setNodes(nodeName); +// } +// newJob.setPpn(ppn); +// dependantJobs = new ArrayList(); +// dependantJobs.add(depJob.getId() + " "); +// newJob.setAfterOK(dependantJobs); +// newJob.queue(); +// // sleep to allow job to make it to the queue +// Thread.sleep(1000); +// +// jobs = Job.SearchJobsByName(jobName, true); +// newJob = Job.getJobById(jobs[0]); +// return newJob; +// } + +} diff --git a/qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java b/qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java new file mode 100644 index 000000000..6730aa5ff --- /dev/null +++ b/qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java @@ -0,0 +1,368 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; + +public class SmithWatermanGotoh { + + private final float gapOpen; + private final float gapExtend; + private final int matchScore; + private final int mismatchScore; + private final String sequenceA; + private final String sequenceB; + private final int rows; + private final int columns; + private int[][] pointerMatrix; + private short[][] verticalGaps; + private short[][] horizontalGaps; + private int bestRow; + private int bestColumn; + private float bestScore; + private static final int STOP = 0; + private static final int LEFT = 1; + private static final int DIAGONAL = 2; + private static final int UP = 3; + private static final String GAP = "-"; + private static final String EMPTY = " "; + private static final String MISMATCH = "."; + private static final String MATCH = "|"; + private static final String TAB = ""; + + public SmithWatermanGotoh(File fileA, File fileB, int matchScore, int mismatchScore, float gapOpen, float gapExtend) throws IOException { + + this.sequenceA = readFastaFile(fileA); + this.sequenceB = readFastaFile(fileB); + this.gapOpen = gapOpen; + this.gapExtend = gapExtend; + this.matchScore = matchScore; + this.mismatchScore = mismatchScore; + this.rows = sequenceA.length() + 1;//i + this.columns = sequenceB.length() + 1;//j + align(); + } + + public SmithWatermanGotoh(String a, String b, int matchScore, int mismatchScore, float gapOpen, float gapExtend) { + + this.sequenceA = a; + this.sequenceB = b; + this.gapOpen = gapOpen; + this.gapExtend = gapExtend; + this.matchScore = matchScore; + this.mismatchScore = mismatchScore; + this.rows = sequenceA.length() + 1;//i + this.columns = sequenceB.length() + 1;//j + align(); + } + + private String readFastaFile(File file) throws IOException { + + try (FileInputStream inputStream = new FileInputStream(file)) { + StringBuilder buffer = new StringBuilder(); + int ch; + while ((ch = inputStream.read()) != -1) { + buffer.append((char)ch); + } + inputStream.close(); + + String seq = buffer.toString(); + + if (seq.startsWith(">")) { + int index = seq.indexOf("\n"); + return seq.substring(index, seq.length()).replace("\n", "").toUpperCase(); + } else { + return seq.replace("\n", "").toUpperCase(); + } + } + } + + private void align() { + fillMatrix(); + traceback(); +// System.out.println(getDiffs()); + } + + private void fillMatrix() { + //etutorials.org/Misc/blast/Part+II+Theory/Chapter+3.+Sequence+Alignment/3.2+Local+Alignment+Smith-Waterman/ + //Gotoh: http://www.cse.msu.edu/~cse891/Sect001/notes_alignment.pdf + //https://github.com/ekg/smithwaterman/blob/master/SmithWatermanGotoh.cpp + //http://cci.lbl.gov/cctbx_sources/mmtbx/alignment.py + // + //The dynamic programming algorithm was improved in performance by Gotoh (1982) by using the linear +// relationship for a gap weight wx = g + rx, where the weight for a gap of length x is the sum of a gap +// opening penalty (g) and a gap extension penalty (r) times the gap length (x), and by simplifying +// the dynamic programming algorithm. He reasoned that two of the terms that are maximized in the +// dynamic programming algorithm and designated here Pij and Qij depend only on the values in the +// current and previous row and column, as indicated below. + + initialize(); + + //storage for current calculations + float[] bestScores = new float[columns];//score if xi aligns to gap after yi + float[] queryGapScores = new float[columns];//best score of alignment x1..xi to y1..yi + + for (int i=0; i queryGapOpenScore) { + //add extend score + queryGapScores[column] = queryGapExtendScore; + //increase size of gap + int gapLength = verticalGaps[row-1][column] + 1; + verticalGaps[row][column] = (short) gapLength; + } else { + //add open score + queryGapScores[column] = queryGapOpenScore; + } + + //calculate horizontal gaps + referenceGapExtendScore = currentAnchorGapScore - gapExtend; + referenceGapOpenScore = bestScores[column-1] - gapOpen; + + if (referenceGapExtendScore > referenceGapOpenScore) { + //add extend score + currentAnchorGapScore = referenceGapExtendScore; + //increase size of gap + short gapLength = (short) (horizontalGaps[row][column-1] + 1); + horizontalGaps[row][column] = gapLength; + } else { + //add open score + currentAnchorGapScore = referenceGapOpenScore; + } + + //test scores + bestScoreDiagonal = bestScores[column]; + bestScores[column] = findMaximum(totalSimilarityScore, queryGapScores[column], currentAnchorGapScore); + + //determine trackback direction + float score = bestScores[column]; + if (score == 0) { + pointerMatrix[row][column] = STOP; + } else if (score == totalSimilarityScore) { + pointerMatrix[row][column] = DIAGONAL; + } else if (score == queryGapScores[column]) { + pointerMatrix[row][column] = UP; + } else { + pointerMatrix[row][column] = LEFT; + } + + //set current cell if this is the best score + if (score > bestScore) { + bestRow = row; + bestColumn = column; + bestScore = score; + } + } + } + } + + + private void initialize() { + pointerMatrix = new int[rows][columns]; + verticalGaps = new short[rows][columns]; + horizontalGaps = new short[rows][columns]; + for (int i=0; i")) { + int index = seq.indexOf("\n"); + return seq.substring(index, seq.length()).replace("\n", "").toUpperCase(); + } else { + return seq.replace("\n", "").toUpperCase(); + } + } + } + + private void align() { + fillMatrix(); + traceback(); +// System.out.println(getDiffs()); + } + + private void fillMatrix() { + //etutorials.org/Misc/blast/Part+II+Theory/Chapter+3.+Sequence+Alignment/3.2+Local+Alignment+Smith-Waterman/ + //Gotoh: http://www.cse.msu.edu/~cse891/Sect001/notes_alignment.pdf + //https://github.com/ekg/smithwaterman/blob/master/SmithWatermanGotoh.cpp + //http://cci.lbl.gov/cctbx_sources/mmtbx/alignment.py + // + //The dynamic programming algorithm was improved in performance by Gotoh (1982) by using the linear +// relationship for a gap weight wx = g + rx, where the weight for a gap of length x is the sum of a gap +// opening penalty (g) and a gap extension penalty (r) times the gap length (x), and by simplifying +// the dynamic programming algorithm. He reasoned that two of the terms that are maximized in the +// dynamic programming algorithm and designated here Pij and Qij depend only on the values in the +// current and previous row and column, as indicated below. + + initialize(); + + //storage for current calculations + float[] bestScores = new float[columns];//score if xi aligns to gap after yi + float[] queryGapScores = new float[columns];//best score of alignment x1..xi to y1..yi + + for (int i=0; i queryGapOpenScore) { + //add extend score + queryGapScores[column] = queryGapExtendScore; + //increase size of gap + int gapLength = verticalGaps[row-1][column] + 1; + verticalGaps[row][column] = (short) gapLength; + } else { + //add open score + queryGapScores[column] = queryGapOpenScore; + } + + //calculate horizontal gaps + referenceGapExtendScore = currentAnchorGapScore - gapExtend; + referenceGapOpenScore = bestScores[column-1] - gapOpen; + + if (referenceGapExtendScore > referenceGapOpenScore) { + //add extend score + currentAnchorGapScore = referenceGapExtendScore; + //increase size of gap + short gapLength = (short) (horizontalGaps[row][column-1] + 1); + horizontalGaps[row][column] = gapLength; + } else { + //add open score + currentAnchorGapScore = referenceGapOpenScore; + } + + //test scores + bestScoreDiagonal = bestScores[column]; + bestScores[column] = findMaximum(totalSimilarityScore, queryGapScores[column], currentAnchorGapScore); + + //determine trackback direction + float score = bestScores[column]; + if (score == 0) { + pointerMatrix[row][column] = STOP; + } else if (score == totalSimilarityScore) { + pointerMatrix[row][column] = DIAGONAL; + } else if (score == queryGapScores[column]) { + pointerMatrix[row][column] = UP; + } else { + pointerMatrix[row][column] = LEFT; + } + + //set current cell if this is the best score + if (score > bestScore) { + bestRow = row; + bestColumn = column; + bestScore = score; + } + } + } + } + + + private void initialize() { + pointerMatrix = new int[rows][columns]; + verticalGaps = new short[rows][columns]; + horizontalGaps = new short[rows][columns]; + for (int i=0; i chromosomes = new ArrayList(); +// private int exitStatus; +// private Map> vcfRecords = new HashMap>(); +// private Map> mafRecords = new HashMap>(); +// private Map> gffRecords = new HashMap>(); +// private Map> bedRecords = new HashMap>(); +// private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); +// private List overlappingMafRecords = new ArrayList(); +// private List notOverlappingMafRecords = new ArrayList(); +// private List overlappingVcfRecords = new ArrayList(); +// private List notOverlappingVcfRecords = new ArrayList(); +// private int overlapCount = 0; +// private int notOverlappingCount = 0; +// private int snpCount; +// private Vector header = new Vector(); +// private String inputSnpType; +// +// private static QLogger logger; +// +// public int engage() throws Exception { +// inputSnpType = null; +// if (cmdLineInputFiles[0].endsWith("maf")) { +// logger.info("MAF File: " + cmdLineInputFiles[0]); +// loadMafFile(); +// inputSnpType = "MAF"; +// if (mafRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from maf file"); +// } +// if (cmdLineInputFiles[0].endsWith("vcf")) { +// logger.info("VCF File: " + cmdLineInputFiles[0]); +// loadVCFFile(); +// inputSnpType = "VCF"; +// if (vcfRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from vcf file"); +// } +// if (cmdLineInputFiles[1].endsWith("bed")) { +// logger.info("BED File: " + cmdLineInputFiles[1]); +// } else if (cmdLineInputFiles[1].endsWith("gff3")) { +// logger.info("GFF3 File: " + cmdLineInputFiles[1]); +// } +// logger.info("Output file: " + cmdLineOutputFiles[0]); +// logger.info("Output file: " + cmdLineOutputFiles[1]); +// +// String fileType = null; +// if (cmdLineInputFiles[1].endsWith("bed")) { +// loadBedFile(); +// fileType = "bed"; +// } else if (cmdLineInputFiles[1].endsWith("gff3")) { +// fileType = "gff3"; +// loadGffFile(); +// } else { +// throw new IllegalArgumentException("File type for reference regions is not recognized. Must be bed or gff3"); +// } +// +// Collections.sort(chromosomes,COMPARATOR); +// +// writeHeader(); +// +// if (fileType.equals("bed")) { +// if (bedRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from bed file"); +// for (String c: chromosomes) { +// if (inputSnpType.equals("MAF")) { +// +// compareBedRecordsToMaf(c, bedRecords.get(c)); +// } +// if (inputSnpType.equals("VCF")) { +// compareBedRecordsToVcf(c, bedRecords.get(c)); +// } +// } +// } +// +// if (fileType.equals("gff3")) { +// if (gffRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); +// for (String c: chromosomes) { +// logger.info("Chromosome: " + c); +// if (inputSnpType.equals("MAF")) { +// compareGFFRecordsToMaf(c, gffRecords.get(c)); +// } +// if (inputSnpType.equals("VCF")) { +// compareGFFRecordsToVcf(c, gffRecords.get(c)); +// } +// } +// } +// +// logger.info("SUMMARY"); +// logger.info("Total Records in " +inputSnpType+ ": " + snpCount); +// logger.info("Total Records in supplied reference regions: " + overlapCount); +// logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); +// return exitStatus; +// } +// +// private void loadVCFFile() throws Exception { +// VCFFileReader reader = new VCFFileReader(new File(cmdLineInputFiles[0])); +// try { +// header = new Vector(); +// Iterator iterator = reader.getRecordIterator(); +// snpCount = 0; +// if (reader.getHeader() != null) { +// Iterator iter = reader.getHeader().iterator(); +// while (iter.hasNext()) { +// header.add(iter.next()); +// } +// } +// while (iterator.hasNext()) { +// +// VCFRecord vcfRec = iterator.next(); +// +// snpCount++; +// if (vcfRecords.containsKey(vcfRec.getChromosome())) { +// vcfRecords.get(vcfRec.getChromosome()).add(vcfRec); +// } else { +// List list = new ArrayList(); +// list.add(vcfRec); +// vcfRecords.put(vcfRec.getChromosome(),list); +// } +// if (!chromosomes.contains(vcfRec.getChromosome())) { +// chromosomes.add(vcfRec.getChromosome()); +// } +// } +// logger.info("loaded maf file, total records: " + snpCount); +// } finally { +// reader.close(); +// } +// } +// +// private void loadMafFile() throws Exception { +// TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); +// try { +// header = new Vector(); +// Iterator iterator = reader.getRecordIterator(); +// snpCount = 0; +// if (reader.getHeader() != null) { +// Iterator iter = reader.getHeader().iterator(); +// while (iter.hasNext()) { +// header.add(iter.next()); +// } +// } +// while (iterator.hasNext()) { +// +// TabbedRecord tab = iterator.next(); +// +// if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo")) { +// header.add(tab.getData()); +// continue; +// } +// snpCount++; +// MAFRecord mafRec = convertToMafRecord(tab.getData().split("\t")); +// mafRec.setData(tab.getData()); +// if (mafRecords.containsKey(mafRec.getChromosome())) { +// mafRecords.get(mafRec.getChromosome()).add(mafRec); +// } else { +// List list = new ArrayList(); +// list.add(mafRec); +// mafRecords.put(mafRec.getChromosome(),list); +// } +// if (!chromosomes.contains(mafRec.getChromosome())) { +// chromosomes.add(mafRec.getChromosome()); +// } +// } +// logger.info("loaded maf file, total records: " + snpCount); +// } finally { +// reader.close(); +// } +// } +// +// private void loadBedFile() throws IOException { +// BEDFileReader reader = new BEDFileReader(new File(cmdLineInputFiles[1])); +// try { +// int count = 0; +// for (BEDRecord record : reader) { +// count++; +// String chr = record.getChrom(); +// if (inputSnpType.equals("MAF")) { +// chr = record.getChrom().replace("chr", ""); +// } +// if (bedRecords.containsKey(chr)) { +// bedRecords.get(chr).put(record.getChromStart(), record); +// } else { +// TreeMap map = new TreeMap(); +// map.put(record.getChromStart(), record); +// bedRecords.put(chr,map); +// } +// } +// logger.info("loaded bed file, total record: " + count); +// } finally { +// reader.close(); +// } +// +// } +// +// private void loadGffFile() throws Exception { +// GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); +// try { +// int count = 0; +// for (GFF3Record record : reader) { +// count++; +// String chr = record.getSeqId(); +// if (inputSnpType.equals("MAF")) { +// chr = record.getSeqId().replace("chr", ""); +// } +// if (gffRecords.containsKey(chr)) { +// gffRecords.get(chr).put(record.getStart(), record); +// } else { +// TreeMap map = new TreeMap(); +// map.put(record.getStart(), record); +// gffRecords.put(chr,map); +// } +// } +// +// logger.info("loaded gff3 file, total record: " + count); +// } finally { +// reader.close(); +// } +// } +// +// public void compareBedRecordsToVcf(String chromosome, TreeMap map) throws IOException { +// List vcfList = vcfRecords.get(chromosome); +// +// //bed positions are zero based +// if (map != null) { +// +// for (VCFRecord snp : vcfList) { +// Entry floor = map.floorEntry(new Integer(snp.getPosition())); +// Entry ceiling = map.ceilingEntry(new Integer(snp.getPosition())); +// +// if (vcfRecordFallsInBEDRecord(snp, floor) || vcfRecordFallsInBEDRecord(snp, ceiling)) { +// overlapCount++; +// overlappingVcfRecords.add(snp); +// } else { +// notOverlappingCount++; +// notOverlappingVcfRecords.add(snp); +// if (notOverlappingCount % 10000 == 0) { +// logger.info("Processed records: " + notOverlappingCount); +// } +// } +// } +// } else { +// notOverlappingVcfRecords.addAll(vcfList); +// notOverlappingCount += vcfList.size(); +// } +// writeParsedVcfRecords(); +// } +// +// public void compareBedRecordsToMaf(String chromosome, TreeMap map) throws IOException { +// List mafList = mafRecords.get(chromosome); +// +// //bed positions are zero based +// if (map != null) { +// +// for (MAFRecord snp : mafList) { +// +// Entry floor = map.floorEntry(new Integer(snp.getStartPosition())); +// Entry ceiling = map.ceilingEntry(new Integer(snp.getStartPosition())); +// +// if (mafRecordFallsInBEDRecord(snp, floor) || mafRecordFallsInBEDRecord(snp, ceiling)) { +// overlapCount++; +// overlappingMafRecords.add(snp); +// } else { +// notOverlappingCount++; +// notOverlappingMafRecords.add(snp); +// if (notOverlappingCount % 10000 == 0) { +// logger.info("Processed records: " + notOverlappingCount); +// } +// } +// +// } +// } else { +// notOverlappingMafRecords.addAll(mafList); +// notOverlappingCount += mafList.size(); +// } +// writeParsedMafRecords(); +// } +// +// public void compareGFFRecordsToVcf(String chromosome, TreeMap map) throws IOException { +// List vcfList = vcfRecords.get(chromosome); +// +// if (map != null) { +// +// logger.info("List size: " + vcfList.size()); +// for (VCFRecord snp : vcfList) { +// Entry floor = map.floorEntry(new Integer(snp.getPosition())); +// Entry ceiling = map.ceilingEntry(new Integer(snp.getPosition())); +// +// if (vcfRecordFallsInGFF3Record(snp, floor) || vcfRecordFallsInGFF3Record(snp, ceiling)) { +// overlapCount++; +// overlappingVcfRecords.add(snp); +// } else { +// notOverlappingCount++; +// notOverlappingVcfRecords.add(snp); +// if (notOverlappingCount % 10000 == 0) { +// logger.info("Processed records: " + notOverlappingCount); +// } +// } +// } +// } else { +// notOverlappingVcfRecords.addAll(vcfList); +// notOverlappingCount += vcfList.size(); +// } +// writeParsedVcfRecords(); +// } +// +// public void compareGFFRecordsToMaf(String chromosome, TreeMap map) throws IOException { +// List mafList = mafRecords.get(chromosome); +// +// if (map != null) { +// +// for (MAFRecord snp : mafList) { +// +// Entry floor = map.floorEntry(new Integer(snp.getStartPosition())); +// Entry ceiling = map.ceilingEntry(new Integer(snp.getStartPosition())); +// +// if (mafRecordFallsInGFF3Record(snp, floor) || mafRecordFallsInGFF3Record(snp, ceiling)) { +// overlapCount++; +// overlappingMafRecords.add(snp); +// } else { +// notOverlappingCount++; +// notOverlappingMafRecords.add(snp); +// if (notOverlappingCount % 10000 == 0) { +// logger.info("Processed records: " + notOverlappingCount); +// } +// } +// } +// } else { +// notOverlappingMafRecords.addAll(mafList); +// notOverlappingCount += mafList.size(); +// } +// writeParsedMafRecords(); +// } +// +// +// private boolean mafRecordFallsInGFF3Record(MAFRecord snp, Entry entry) { +// if (entry != null) { +// if (snp.getStartPosition() >= entry.getValue().getStart() && snp.getStartPosition() <= entry.getValue().getEnd() || +// snp.getEndPosition() >= entry.getValue().getStart() && snp.getEndPosition() <= entry.getValue().getEnd()) { +// return true; +// } +// } +// return false; +// } +// +// private boolean mafRecordFallsInBEDRecord(MAFRecord snp, Entry entry) { +// if (entry != null) { +// if (snp.getStartPosition() >= entry.getValue().getChromStart()+1 && snp.getStartPosition() <= entry.getValue().getChromEnd() || +// snp.getEndPosition() >= entry.getValue().getChromStart()+1 && snp.getEndPosition() <= entry.getValue().getChromEnd()) { +// return true; +// } +// } +// return false; +// } +// +// private boolean vcfRecordFallsInGFF3Record(VCFRecord snp, Entry entry) { +// if (entry != null) { +// if (snp.getPosition() >= entry.getValue().getStart() && snp.getPosition() <= entry.getValue().getEnd()) { +// return true; +// } +// } +// return false; +// } +// +// private boolean vcfRecordFallsInBEDRecord(VCFRecord snp, Entry entry) { +// if (entry != null) { +// if (snp.getPosition() >= entry.getValue().getChromStart()+1 && snp.getPosition() <= entry.getValue().getChromEnd()) { +// return true; +// } +// } +// return false; +// } +// +// public String[] getCmdLineInputFiles() { +// return cmdLineInputFiles; +// } +// +// public void setCmdLineInputFiles(String[] cmdLineInputFiles) { +// this.cmdLineInputFiles = cmdLineInputFiles; +// } +// +// public String[] getCmdLineOutputFiles() { +// return cmdLineOutputFiles; +// } +// +// public void setCmdLineOutputFiles(String[] cmdLineOutputFiles) { +// this.cmdLineOutputFiles = cmdLineOutputFiles; +// } +// +// private void writeParsedMafRecords() throws IOException { +// writeMafRecordsToFile(cmdLineOutputFiles[0], overlappingMafRecords); +// writeMafRecordsToFile(cmdLineOutputFiles[1], notOverlappingMafRecords); +// } +// +// private void writeParsedVcfRecords() throws IOException { +// writeVcfRecordsToFile(cmdLineOutputFiles[0], overlappingVcfRecords); +// writeVcfRecordsToFile(cmdLineOutputFiles[1], notOverlappingVcfRecords); +// } +// +// private void writeHeader() throws IOException { +// writeHeader(cmdLineOutputFiles[0]); +// writeHeader(cmdLineOutputFiles[1]); +// } +// +// private void writeHeader(String fileName) throws IOException { +// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); +// +// for (String h: header) { +// writer.write(h + "\n"); +// } +// writer.close(); +// } +// +// private void writeMafRecordsToFile(String fileName, +// List outputRecords) throws IOException { +// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); +// +// for (MAFRecord r: outputRecords) { +// writer.write(r.getData() + "\n"); +// } +// +// writer.close(); +// outputRecords.clear(); +// } +// +// private void writeVcfRecordsToFile(String fileName, +// List outputRecords) throws IOException { +// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); +// +// for (VCFRecord r: outputRecords) { +// writer.write(r.toString() + "\n"); +// } +// +// writer.close(); +// outputRecords.clear(); +// } +// +// +// protected int setup(String args[]) throws Exception{ +// int returnStatus = 1; +// if (null == args || args.length == 0) { +// System.err.println(Messages.USAGE); +// System.exit(1); +// } +// Options options = new Options(args); +// +// if (options.hasHelpOption()) { +// System.err.println(Messages.USAGE); +// options.displayHelp(); +// returnStatus = 0; +// } else if (options.hasVersionOption()) { +// System.err.println(Messages.getVersionMessage()); +// returnStatus = 0; +// } else if (options.getInputFileNames().length < 1) { +// System.err.println(Messages.USAGE); +// } else if ( ! options.hasLogOption()) { +// System.err.println(Messages.USAGE); +// } else { +// // configure logging +// logFile = options.getLogFile(); +// logger = QLoggerFactory.getLogger(SnpToReferenceRegionFilter.class, logFile, options.getLogLevel()); +// logger.logInitialExecutionStats("SnpMafBedFileComparison", SnpToReferenceRegionFilter.class.getPackage().getImplementationVersion(), args); +// +// // get list of file names +// cmdLineInputFiles = options.getInputFileNames(); +// if (cmdLineInputFiles.length < 1) { +// throw new QMuleException("INSUFFICIENT_ARGUMENTS"); +// } else { +// // loop through supplied files - check they can be read +// for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { +// if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { +// throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); +// } +// } +// } +// +// // check supplied output files can be written to +// if (null != options.getOutputFileNames()) { +// cmdLineOutputFiles = options.getOutputFileNames(); +// for (String outputFile : cmdLineOutputFiles) { +// if ( ! FileUtils.canFileBeWrittenTo(outputFile)) +// throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); +// } +// } +// return engage(); +// } +// return returnStatus; +// } +// +// +// public static void main(String[] args) throws Exception { +// SnpToReferenceRegionFilter sp = new SnpToReferenceRegionFilter(); +// int exitStatus = sp.setup(args); +// if (null != logger) +// logger.logFinalExecutionStats(exitStatus); +// +// System.exit(exitStatus); +// } +// +// public static MAFRecord convertToMafRecord(String[] params) { +// MAFRecord maf = new MAFRecord(); +//// System.out.println(params[0]); +//// maf.setHugoSymbol(params[0]); +//// maf.setEntrezGeneId(params[1]); +//// maf.setCenter(params[2]); +//// maf.setNcbiBuild(Integer.parseInt(params[3])); +// maf.setChromosome(params[0]); +// maf.setStartPosition(Integer.parseInt(params[1])); +// maf.setEndPosition(Integer.parseInt(params[1])); +//// maf.setStrand(params[7].charAt(0)); +//// maf.setVariantClassification(params[8]); +//// maf.setVariantType(params[9]); +//// maf.setRef(params[10]); +//// maf.setTumourAllele1(params[11]); +//// maf.setTumourAllele2(params[12]); +//// maf.setDbSnpId(params[13]); +//// maf.setDbSnpValStatus(params[14]); +//// maf.setTumourSampleBarcode(params[15]); +//// maf.setNormalSampleBarcode(params[16]); +//// maf.setNormalAllele1(params[17]); +//// maf.setNormalAllele2(params[18]); +//// maf.setTumourValidationAllele1(params[19]); +//// maf.setTumourValidationAllele2(params[20]); +//// maf.setNormalValidationAllele1(params[21]); +//// maf.setNormalValidationAllele2(params[22]); +//// maf.setVerificationStatus(params[23]); +//// maf.setValidationStatus(params[24]); +//// maf.setMutationStatus(params[25]); +//// maf.setSequencingPhase(params[26]); +//// maf.setSequencingSource(params[27]); +//// maf.setValidationMethod(params[28]); +//// maf.setScore(params[29]); +//// maf.setBamFile(params[30]); +//// maf.setSequencer(params[31]); +//// // QCMG +//// if (params.length > 32) +//// maf.setFlag(params[32]); +//// if (params.length > 33) +//// maf.setNd(params[33]); +//// if (params.length > 34) +//// maf.setTd(params[34]); +//// if (params.length > 35) +//// maf.setCanonicalTranscriptId(params[35]); +//// if (params.length > 36) +//// maf.setCanonicalAAChange(params[36]); +//// if (params.length > 37) +//// maf.setCanonicalBaseChange(params[37]); +//// if (params.length > 38) +//// maf.setAlternateTranscriptId(params[38]); +//// if (params.length > 39) +//// maf.setAlternateAAChange(params[39]); +//// if (params.length > 40) +//// maf.setAlternateBaseChange(params[40]); +// +// return maf; +// } +// +// public List getChromosomes() { +// return chromosomes; +// } +// +// public void setChromosomes(List chromosomes) { +// this.chromosomes = chromosomes; +// } +// +// public Map> getMafRecords() { +// return mafRecords; +// } +// +// public void setMafRecords(Map> mafRecords) { +// this.mafRecords = mafRecords; +// } +// +// public List getOverlappingRecords() { +// return overlappingMafRecords; +// } +// +// public void setOverlappingRecords(List overlappingRecords) { +// this.overlappingMafRecords = overlappingRecords; +// } +// +// public List getNotOverlappingRecords() { +// return notOverlappingMafRecords; +// } +// +// public void setNotOverlappingRecords(List notOverlappingRecords) { +// this.notOverlappingMafRecords = notOverlappingRecords; +// } +// +// public int getOverlapCount() { +// return overlapCount; +// } +// +// public void setOverlapCount(int overlapCount) { +// this.overlapCount = overlapCount; +// } +// +// public int getNotOverlappingCount() { +// return notOverlappingCount; +// } +// +// public void setNotOverlappingCount(int notOverlappingCount) { +// this.notOverlappingCount = notOverlappingCount; +// } +// +// public int getMafCount() { +// return snpCount; +// } +// +// public void setMafCount(int mafCount) { +// this.snpCount = mafCount; +// } +// +// +// +//} diff --git a/qmule/src/org/qcmg/qmule/SnpToReferenceRegionFilter.java-- b/qmule/src/org/qcmg/qmule/SnpToReferenceRegionFilter.java-- new file mode 100644 index 000000000..c0a138f29 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/SnpToReferenceRegionFilter.java-- @@ -0,0 +1,647 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +//package org.qcmg.qmule; +// +//import java.io.BufferedWriter; +//import java.io.File; +//import java.io.FileWriter; +//import java.io.IOException; +//import java.util.ArrayList; +//import java.util.Collections; +//import java.util.HashMap; +//import java.util.Iterator; +//import java.util.List; +//import java.util.Map; +//import java.util.Map.Entry; +//import java.util.TreeMap; +//import java.util.Vector; +// +//import org.qcmg.bed.BEDFileReader; +//import org.qcmg.bed.BEDRecord; +//import org.qcmg.common.log.QLogger; +//import org.qcmg.common.log.QLoggerFactory; +//import org.qcmg.common.model.ReferenceNameComparator; +//import org.qcmg.common.model.VCFRecord; +//import org.qcmg.common.util.FileUtils; +//import org.qcmg.qmule.gff3.GFF3FileReader; +//import org.qcmg.qmule.gff3.GFF3Record; +//import org.qcmg.maf.MAFRecord; +//import org.qcmg.qmule.tab.TabbedFileReader; +//import org.qcmg.qmule.tab.TabbedRecord; +//import org.qcmg.vcf.VCFFileReader; +// +//public class SnpToReferenceRegionFilter { +// +// private String logFile; +// private String[] cmdLineInputFiles; +// private String[] cmdLineOutputFiles; +// private List chromosomes = new ArrayList(); +// private int exitStatus; +// private Map> vcfRecords = new HashMap>(); +// private Map> mafRecords = new HashMap>(); +// private Map> gffRecords = new HashMap>(); +// private Map> bedRecords = new HashMap>(); +// private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); +// private List overlappingMafRecords = new ArrayList(); +// private List notOverlappingMafRecords = new ArrayList(); +// private List overlappingVcfRecords = new ArrayList(); +// private List notOverlappingVcfRecords = new ArrayList(); +// private int overlapCount = 0; +// private int notOverlappingCount = 0; +// private int snpCount; +// private Vector header = new Vector(); +// private String inputSnpType; +// +// private static QLogger logger; +// +// public int engage() throws Exception { +// inputSnpType = null; +// if (cmdLineInputFiles[0].endsWith("maf")) { +// logger.info("MAF File: " + cmdLineInputFiles[0]); +// loadMafFile(); +// inputSnpType = "MAF"; +// if (mafRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from maf file"); +// } +// if (cmdLineInputFiles[0].endsWith("vcf")) { +// logger.info("VCF File: " + cmdLineInputFiles[0]); +// loadVCFFile(); +// inputSnpType = "VCF"; +// if (vcfRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from vcf file"); +// } +// if (cmdLineInputFiles[1].endsWith("bed")) { +// logger.info("BED File: " + cmdLineInputFiles[1]); +// } else if (cmdLineInputFiles[1].endsWith("gff3")) { +// logger.info("GFF3 File: " + cmdLineInputFiles[1]); +// } +// logger.info("Output file: " + cmdLineOutputFiles[0]); +// logger.info("Output file: " + cmdLineOutputFiles[1]); +// +// String fileType = null; +// if (cmdLineInputFiles[1].endsWith("bed")) { +// loadBedFile(); +// fileType = "bed"; +// } else if (cmdLineInputFiles[1].endsWith("gff3")) { +// fileType = "gff3"; +// loadGffFile(); +// } else { +// throw new IllegalArgumentException("File type for reference regions is not recognized. Must be bed or gff3"); +// } +// +// Collections.sort(chromosomes,COMPARATOR); +// +// writeHeader(); +// +// if (fileType.equals("bed")) { +// if (bedRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from bed file"); +// for (String c: chromosomes) { +// if (inputSnpType.equals("MAF")) { +// +// compareBedRecordsToMaf(c, bedRecords.get(c)); +// } +// if (inputSnpType.equals("VCF")) { +// compareBedRecordsToVcf(c, bedRecords.get(c)); +// } +// } +// } +// +// if (fileType.equals("gff3")) { +// if (gffRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); +// for (String c: chromosomes) { +// logger.info("Chromosome: " + c); +// if (inputSnpType.equals("MAF")) { +// compareGFFRecordsToMaf(c, gffRecords.get(c)); +// } +// if (inputSnpType.equals("VCF")) { +// compareGFFRecordsToVcf(c, gffRecords.get(c)); +// } +// } +// } +// +// logger.info("SUMMARY"); +// logger.info("Total Records in " +inputSnpType+ ": " + snpCount); +// logger.info("Total Records in supplied reference regions: " + overlapCount); +// logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); +// return exitStatus; +// } +// +// private void loadVCFFile() throws Exception { +// VCFFileReader reader = new VCFFileReader(new File(cmdLineInputFiles[0])); +// try { +// header = new Vector(); +// Iterator iterator = reader.getRecordIterator(); +// snpCount = 0; +// if (reader.getHeader() != null) { +// Iterator iter = reader.getHeader().iterator(); +// while (iter.hasNext()) { +// header.add(iter.next()); +// } +// } +// while (iterator.hasNext()) { +// +// VCFRecord vcfRec = iterator.next(); +// +// snpCount++; +// if (vcfRecords.containsKey(vcfRec.getChromosome())) { +// vcfRecords.get(vcfRec.getChromosome()).add(vcfRec); +// } else { +// List list = new ArrayList(); +// list.add(vcfRec); +// vcfRecords.put(vcfRec.getChromosome(),list); +// } +// if (!chromosomes.contains(vcfRec.getChromosome())) { +// chromosomes.add(vcfRec.getChromosome()); +// } +// } +// logger.info("loaded maf file, total records: " + snpCount); +// } finally { +// reader.close(); +// } +// } +// +// private void loadMafFile() throws Exception { +// TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); +// try { +// header = new Vector(); +// Iterator iterator = reader.getRecordIterator(); +// snpCount = 0; +// if (reader.getHeader() != null) { +// Iterator iter = reader.getHeader().iterator(); +// while (iter.hasNext()) { +// header.add(iter.next()); +// } +// } +// while (iterator.hasNext()) { +// +// TabbedRecord tab = iterator.next(); +// +// if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo")) { +// header.add(tab.getData()); +// continue; +// } +// snpCount++; +// MAFRecord mafRec = convertToMafRecord(tab.getData().split("\t")); +// mafRec.setData(tab.getData()); +// if (mafRecords.containsKey(mafRec.getChromosome())) { +// mafRecords.get(mafRec.getChromosome()).add(mafRec); +// } else { +// List list = new ArrayList(); +// list.add(mafRec); +// mafRecords.put(mafRec.getChromosome(),list); +// } +// if (!chromosomes.contains(mafRec.getChromosome())) { +// chromosomes.add(mafRec.getChromosome()); +// } +// } +// logger.info("loaded maf file, total records: " + snpCount); +// } finally { +// reader.close(); +// } +// } +// +// private void loadBedFile() throws IOException { +// BEDFileReader reader = new BEDFileReader(new File(cmdLineInputFiles[1])); +// try { +// int count = 0; +// for (BEDRecord record : reader) { +// count++; +// String chr = record.getChrom(); +// if (inputSnpType.equals("MAF")) { +// chr = record.getChrom().replace("chr", ""); +// } +// if (bedRecords.containsKey(chr)) { +// bedRecords.get(chr).put(record.getChromStart(), record); +// } else { +// TreeMap map = new TreeMap(); +// map.put(record.getChromStart(), record); +// bedRecords.put(chr,map); +// } +// } +// logger.info("loaded bed file, total record: " + count); +// } finally { +// reader.close(); +// } +// +// } +// +// private void loadGffFile() throws Exception { +// GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); +// try { +// int count = 0; +// for (GFF3Record record : reader) { +// count++; +// String chr = record.getSeqId(); +// if (inputSnpType.equals("MAF")) { +// chr = record.getSeqId().replace("chr", ""); +// } +// if (gffRecords.containsKey(chr)) { +// gffRecords.get(chr).put(record.getStart(), record); +// } else { +// TreeMap map = new TreeMap(); +// map.put(record.getStart(), record); +// gffRecords.put(chr,map); +// } +// } +// +// logger.info("loaded gff3 file, total record: " + count); +// } finally { +// reader.close(); +// } +// } +// +// public void compareBedRecordsToVcf(String chromosome, TreeMap map) throws IOException { +// List vcfList = vcfRecords.get(chromosome); +// +// //bed positions are zero based +// if (map != null) { +// +// for (VCFRecord snp : vcfList) { +// Entry floor = map.floorEntry(new Integer(snp.getPosition())); +// Entry ceiling = map.ceilingEntry(new Integer(snp.getPosition())); +// +// if (vcfRecordFallsInBEDRecord(snp, floor) || vcfRecordFallsInBEDRecord(snp, ceiling)) { +// overlapCount++; +// overlappingVcfRecords.add(snp); +// } else { +// notOverlappingCount++; +// notOverlappingVcfRecords.add(snp); +// if (notOverlappingCount % 10000 == 0) { +// logger.info("Processed records: " + notOverlappingCount); +// } +// } +// } +// } else { +// notOverlappingVcfRecords.addAll(vcfList); +// notOverlappingCount += vcfList.size(); +// } +// writeParsedVcfRecords(); +// } +// +// public void compareBedRecordsToMaf(String chromosome, TreeMap map) throws IOException { +// List mafList = mafRecords.get(chromosome); +// +// //bed positions are zero based +// if (map != null) { +// +// for (MAFRecord snp : mafList) { +// +// Entry floor = map.floorEntry(new Integer(snp.getStartPosition())); +// Entry ceiling = map.ceilingEntry(new Integer(snp.getStartPosition())); +// +// if (mafRecordFallsInBEDRecord(snp, floor) || mafRecordFallsInBEDRecord(snp, ceiling)) { +// overlapCount++; +// overlappingMafRecords.add(snp); +// } else { +// notOverlappingCount++; +// notOverlappingMafRecords.add(snp); +// if (notOverlappingCount % 10000 == 0) { +// logger.info("Processed records: " + notOverlappingCount); +// } +// } +// +// } +// } else { +// notOverlappingMafRecords.addAll(mafList); +// notOverlappingCount += mafList.size(); +// } +// writeParsedMafRecords(); +// } +// +// public void compareGFFRecordsToVcf(String chromosome, TreeMap map) throws IOException { +// List vcfList = vcfRecords.get(chromosome); +// +// if (map != null) { +// +// logger.info("List size: " + vcfList.size()); +// for (VCFRecord snp : vcfList) { +// Entry floor = map.floorEntry(new Integer(snp.getPosition())); +// Entry ceiling = map.ceilingEntry(new Integer(snp.getPosition())); +// +// if (vcfRecordFallsInGFF3Record(snp, floor) || vcfRecordFallsInGFF3Record(snp, ceiling)) { +// overlapCount++; +// overlappingVcfRecords.add(snp); +// } else { +// notOverlappingCount++; +// notOverlappingVcfRecords.add(snp); +// if (notOverlappingCount % 10000 == 0) { +// logger.info("Processed records: " + notOverlappingCount); +// } +// } +// } +// } else { +// notOverlappingVcfRecords.addAll(vcfList); +// notOverlappingCount += vcfList.size(); +// } +// writeParsedVcfRecords(); +// } +// +// public void compareGFFRecordsToMaf(String chromosome, TreeMap map) throws IOException { +// List mafList = mafRecords.get(chromosome); +// +// if (map != null) { +// +// for (MAFRecord snp : mafList) { +// +// Entry floor = map.floorEntry(new Integer(snp.getStartPosition())); +// Entry ceiling = map.ceilingEntry(new Integer(snp.getStartPosition())); +// +// if (mafRecordFallsInGFF3Record(snp, floor) || mafRecordFallsInGFF3Record(snp, ceiling)) { +// overlapCount++; +// overlappingMafRecords.add(snp); +// } else { +// notOverlappingCount++; +// notOverlappingMafRecords.add(snp); +// if (notOverlappingCount % 10000 == 0) { +// logger.info("Processed records: " + notOverlappingCount); +// } +// } +// } +// } else { +// notOverlappingMafRecords.addAll(mafList); +// notOverlappingCount += mafList.size(); +// } +// writeParsedMafRecords(); +// } +// +// +// private boolean mafRecordFallsInGFF3Record(MAFRecord snp, Entry entry) { +// if (entry != null) { +// if (snp.getStartPosition() >= entry.getValue().getStart() && snp.getStartPosition() <= entry.getValue().getEnd() || +// snp.getEndPosition() >= entry.getValue().getStart() && snp.getEndPosition() <= entry.getValue().getEnd()) { +// return true; +// } +// } +// return false; +// } +// +// private boolean mafRecordFallsInBEDRecord(MAFRecord snp, Entry entry) { +// if (entry != null) { +// if (snp.getStartPosition() >= entry.getValue().getChromStart()+1 && snp.getStartPosition() <= entry.getValue().getChromEnd() || +// snp.getEndPosition() >= entry.getValue().getChromStart()+1 && snp.getEndPosition() <= entry.getValue().getChromEnd()) { +// return true; +// } +// } +// return false; +// } +// +// private boolean vcfRecordFallsInGFF3Record(VCFRecord snp, Entry entry) { +// if (entry != null) { +// if (snp.getPosition() >= entry.getValue().getStart() && snp.getPosition() <= entry.getValue().getEnd()) { +// return true; +// } +// } +// return false; +// } +// +// private boolean vcfRecordFallsInBEDRecord(VCFRecord snp, Entry entry) { +// if (entry != null) { +// if (snp.getPosition() >= entry.getValue().getChromStart()+1 && snp.getPosition() <= entry.getValue().getChromEnd()) { +// return true; +// } +// } +// return false; +// } +// +// public String[] getCmdLineInputFiles() { +// return cmdLineInputFiles; +// } +// +// public void setCmdLineInputFiles(String[] cmdLineInputFiles) { +// this.cmdLineInputFiles = cmdLineInputFiles; +// } +// +// public String[] getCmdLineOutputFiles() { +// return cmdLineOutputFiles; +// } +// +// public void setCmdLineOutputFiles(String[] cmdLineOutputFiles) { +// this.cmdLineOutputFiles = cmdLineOutputFiles; +// } +// +// private void writeParsedMafRecords() throws IOException { +// writeMafRecordsToFile(cmdLineOutputFiles[0], overlappingMafRecords); +// writeMafRecordsToFile(cmdLineOutputFiles[1], notOverlappingMafRecords); +// } +// +// private void writeParsedVcfRecords() throws IOException { +// writeVcfRecordsToFile(cmdLineOutputFiles[0], overlappingVcfRecords); +// writeVcfRecordsToFile(cmdLineOutputFiles[1], notOverlappingVcfRecords); +// } +// +// private void writeHeader() throws IOException { +// writeHeader(cmdLineOutputFiles[0]); +// writeHeader(cmdLineOutputFiles[1]); +// } +// +// private void writeHeader(String fileName) throws IOException { +// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); +// +// for (String h: header) { +// writer.write(h + "\n"); +// } +// writer.close(); +// } +// +// private void writeMafRecordsToFile(String fileName, +// List outputRecords) throws IOException { +// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); +// +// for (MAFRecord r: outputRecords) { +// writer.write(r.getData() + "\n"); +// } +// +// writer.close(); +// outputRecords.clear(); +// } +// +// private void writeVcfRecordsToFile(String fileName, +// List outputRecords) throws IOException { +// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); +// +// for (VCFRecord r: outputRecords) { +// writer.write(r.toString() + "\n"); +// } +// +// writer.close(); +// outputRecords.clear(); +// } +// +// +// protected int setup(String args[]) throws Exception{ +// int returnStatus = 1; +// if (null == args || args.length == 0) { +// System.err.println(Messages.USAGE); +// System.exit(1); +// } +// Options options = new Options(args); +// +// if (options.hasHelpOption()) { +// System.err.println(Messages.USAGE); +// options.displayHelp(); +// returnStatus = 0; +// } else if (options.hasVersionOption()) { +// System.err.println(Messages.getVersionMessage()); +// returnStatus = 0; +// } else if (options.getInputFileNames().length < 1) { +// System.err.println(Messages.USAGE); +// } else if ( ! options.hasLogOption()) { +// System.err.println(Messages.USAGE); +// } else { +// // configure logging +// logFile = options.getLogFile(); +// logger = QLoggerFactory.getLogger(SnpToReferenceRegionFilter.class, logFile, options.getLogLevel()); +// logger.logInitialExecutionStats("SnpMafBedFileComparison", SnpToReferenceRegionFilter.class.getPackage().getImplementationVersion(), args); +// +// // get list of file names +// cmdLineInputFiles = options.getInputFileNames(); +// if (cmdLineInputFiles.length < 1) { +// throw new QMuleException("INSUFFICIENT_ARGUMENTS"); +// } else { +// // loop through supplied files - check they can be read +// for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { +// if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { +// throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); +// } +// } +// } +// +// // check supplied output files can be written to +// if (null != options.getOutputFileNames()) { +// cmdLineOutputFiles = options.getOutputFileNames(); +// for (String outputFile : cmdLineOutputFiles) { +// if ( ! FileUtils.canFileBeWrittenTo(outputFile)) +// throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); +// } +// } +// return engage(); +// } +// return returnStatus; +// } +// +// +// public static void main(String[] args) throws Exception { +// SnpToReferenceRegionFilter sp = new SnpToReferenceRegionFilter(); +// int exitStatus = sp.setup(args); +// if (null != logger) +// logger.logFinalExecutionStats(exitStatus); +// +// System.exit(exitStatus); +// } +// +// public static MAFRecord convertToMafRecord(String[] params) { +// MAFRecord maf = new MAFRecord(); +//// System.out.println(params[0]); +//// maf.setHugoSymbol(params[0]); +//// maf.setEntrezGeneId(params[1]); +//// maf.setCenter(params[2]); +//// maf.setNcbiBuild(Integer.parseInt(params[3])); +// maf.setChromosome(params[0]); +// maf.setStartPosition(Integer.parseInt(params[1])); +// maf.setEndPosition(Integer.parseInt(params[1])); +//// maf.setStrand(params[7].charAt(0)); +//// maf.setVariantClassification(params[8]); +//// maf.setVariantType(params[9]); +//// maf.setRef(params[10]); +//// maf.setTumourAllele1(params[11]); +//// maf.setTumourAllele2(params[12]); +//// maf.setDbSnpId(params[13]); +//// maf.setDbSnpValStatus(params[14]); +//// maf.setTumourSampleBarcode(params[15]); +//// maf.setNormalSampleBarcode(params[16]); +//// maf.setNormalAllele1(params[17]); +//// maf.setNormalAllele2(params[18]); +//// maf.setTumourValidationAllele1(params[19]); +//// maf.setTumourValidationAllele2(params[20]); +//// maf.setNormalValidationAllele1(params[21]); +//// maf.setNormalValidationAllele2(params[22]); +//// maf.setVerificationStatus(params[23]); +//// maf.setValidationStatus(params[24]); +//// maf.setMutationStatus(params[25]); +//// maf.setSequencingPhase(params[26]); +//// maf.setSequencingSource(params[27]); +//// maf.setValidationMethod(params[28]); +//// maf.setScore(params[29]); +//// maf.setBamFile(params[30]); +//// maf.setSequencer(params[31]); +//// // QCMG +//// if (params.length > 32) +//// maf.setFlag(params[32]); +//// if (params.length > 33) +//// maf.setNd(params[33]); +//// if (params.length > 34) +//// maf.setTd(params[34]); +//// if (params.length > 35) +//// maf.setCanonicalTranscriptId(params[35]); +//// if (params.length > 36) +//// maf.setCanonicalAAChange(params[36]); +//// if (params.length > 37) +//// maf.setCanonicalBaseChange(params[37]); +//// if (params.length > 38) +//// maf.setAlternateTranscriptId(params[38]); +//// if (params.length > 39) +//// maf.setAlternateAAChange(params[39]); +//// if (params.length > 40) +//// maf.setAlternateBaseChange(params[40]); +// +// return maf; +// } +// +// public List getChromosomes() { +// return chromosomes; +// } +// +// public void setChromosomes(List chromosomes) { +// this.chromosomes = chromosomes; +// } +// +// public Map> getMafRecords() { +// return mafRecords; +// } +// +// public void setMafRecords(Map> mafRecords) { +// this.mafRecords = mafRecords; +// } +// +// public List getOverlappingRecords() { +// return overlappingMafRecords; +// } +// +// public void setOverlappingRecords(List overlappingRecords) { +// this.overlappingMafRecords = overlappingRecords; +// } +// +// public List getNotOverlappingRecords() { +// return notOverlappingMafRecords; +// } +// +// public void setNotOverlappingRecords(List notOverlappingRecords) { +// this.notOverlappingMafRecords = notOverlappingRecords; +// } +// +// public int getOverlapCount() { +// return overlapCount; +// } +// +// public void setOverlapCount(int overlapCount) { +// this.overlapCount = overlapCount; +// } +// +// public int getNotOverlappingCount() { +// return notOverlappingCount; +// } +// +// public void setNotOverlappingCount(int notOverlappingCount) { +// this.notOverlappingCount = notOverlappingCount; +// } +// +// public int getMafCount() { +// return snpCount; +// } +// +// public void setMafCount(int mafCount) { +// this.snpCount = mafCount; +// } +// +// +// +//} diff --git a/qmule/src/org/qcmg/qmule/SubSample.java b/qmule/src/org/qcmg/qmule/SubSample.java new file mode 100644 index 000000000..63f71a737 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/SubSample.java @@ -0,0 +1,165 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.ArrayList; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.HeaderUtils; +import org.qcmg.picard.SAMFileReaderFactory; + +import htsjdk.samtools.*; + +public class SubSample { + SamReader reader; + SAMFileWriter writer; + double proportion; + QLogger logger; + + SubSample(Options op, QLogger log) throws Exception{ + + proportion = op.getPROPORTION(); + logger = log; + + String[] inputs =op.getInputFileNames(); + String[] outputs =op.getOutputFileNames(); + if(inputs.length == 0 || outputs.length == 0) + throw new Exception("please specify input/output"); + + //get initialized logger + File input = new File(inputs[0]); + File output = new File(outputs[0]); + if(!input.canRead()) + throw new Exception("unreadable input: " + input.getAbsolutePath()); + + reader = SAMFileReaderFactory.createSAMFileReader(input,ValidationStringency.LENIENT); + SAMFileHeader header = reader.getFileHeader(); + if(header.getSortOrder() != SAMFileHeader.SortOrder.queryname){ + throw new Exception("the input BAM is not sorted by queryname"); + } + SAMFileWriterFactory writeFactory = new SAMFileWriterFactory(); + HeaderUtils.addProgramRecord(header, op.getCommandLine(), null ); + + writer = writeFactory.makeSAMOrBAMWriter(header, false, output ); + + + } + + void run() throws Exception{ + int numPair = 0; + int numSingle = 0; + int numtotal = 0; + SAMRecordIterator ie = reader.iterator(); + ArrayList adjacents = new ArrayList(); + adjacents.add(ie.next()); + + while(ie.hasNext()){ + numtotal ++; + SAMRecord record = ie.next(); + + //select reads + if(! record.getReadName().equals(adjacents.get(0).getReadName())){ + //select pairs + if(adjacents.size() > 1) + numPair += selectPair( adjacents); + //select single + else if(Math.random() < proportion ){ + writer.addAlignment(adjacents.get(0)); + numSingle ++; + } + //after reporting clear the arraylist + adjacents.clear(); + } + adjacents.add(record); + + } + + //select last records + if(adjacents.size() > 1) + selectPair( adjacents); + else if(Math.random() < proportion ) + writer.addAlignment(adjacents.get(0)); + + reader.close(); + writer.close(); + + logger.info("total reads in input is " + numtotal); + logger.info("select paired reads is " + numPair); + logger.info("select single reads is " + numSingle); + logger.info("the rate of selected reads is "+ ((double)(numPair + numSingle)) / numtotal); + + } + + private int selectPair(ArrayList pairs) { + + if(pairs.size() == 0 ){ + logger.error("Program Error: select reads from empty arraylist! "); + return 0; + } + if(pairs.size() == 1 ){ + logger.error("program Error: single read in paired arraylist -- " + pairs.get(0).getReadName()); + return 0; + } + + int num = 0; + while(pairs.size() >= 2){ + //seek pair one by one + SAMRecord first = pairs.get(0); + SAMRecord mate = null; + pairs.remove(first); + + for(int i = 0; i < pairs.size(); i ++){ + if(first.getReadGroup().getId().equals(pairs.get(i).getReadGroup().getId())){ + mate = pairs.get(i); + pairs.remove(mate); + break; + } + } + + + if(Math.random() < proportion ){ + num ++; //number of selected paired reads + writer.addAlignment(first); + if(mate != null){ + num ++; + writer.addAlignment(mate); + }else{ + logger.error("paired reads missing mate -- " + pairs.get(0).getReadName()); + } + } + } + + return num; + } + + public static void main(String[] args) throws Exception{ + Options op = new Options(SubSample.class, args); + if(op.hasHelpOption()){ + System.out.println(Messages.getMessage("USAGE_SUBSAMPLE")); + op.displayHelp(); + System.exit(0); + } + + String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + QLogger logger = QLoggerFactory.getLogger(SubSample.class, op.getLogFile(), op.getLogLevel()); + try{ + logger.logInitialExecutionStats(SubSample.class.toString(), version, args); + logger.exec("Porportion " + op.getPROPORTION()); + SubSample mySample = new SubSample(op, logger); + mySample.run(); + logger.logFinalExecutionStats(0); + System.exit(0); + }catch(Exception e){ + System.err.println( e.getMessage() + e.toString()); + logger.logFinalExecutionStats(-1); + System.exit(1); + } + } + +} diff --git a/qmule/src/org/qcmg/qmule/SubSample.java-- b/qmule/src/org/qcmg/qmule/SubSample.java-- new file mode 100644 index 000000000..63f71a737 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/SubSample.java-- @@ -0,0 +1,165 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.ArrayList; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.picard.HeaderUtils; +import org.qcmg.picard.SAMFileReaderFactory; + +import htsjdk.samtools.*; + +public class SubSample { + SamReader reader; + SAMFileWriter writer; + double proportion; + QLogger logger; + + SubSample(Options op, QLogger log) throws Exception{ + + proportion = op.getPROPORTION(); + logger = log; + + String[] inputs =op.getInputFileNames(); + String[] outputs =op.getOutputFileNames(); + if(inputs.length == 0 || outputs.length == 0) + throw new Exception("please specify input/output"); + + //get initialized logger + File input = new File(inputs[0]); + File output = new File(outputs[0]); + if(!input.canRead()) + throw new Exception("unreadable input: " + input.getAbsolutePath()); + + reader = SAMFileReaderFactory.createSAMFileReader(input,ValidationStringency.LENIENT); + SAMFileHeader header = reader.getFileHeader(); + if(header.getSortOrder() != SAMFileHeader.SortOrder.queryname){ + throw new Exception("the input BAM is not sorted by queryname"); + } + SAMFileWriterFactory writeFactory = new SAMFileWriterFactory(); + HeaderUtils.addProgramRecord(header, op.getCommandLine(), null ); + + writer = writeFactory.makeSAMOrBAMWriter(header, false, output ); + + + } + + void run() throws Exception{ + int numPair = 0; + int numSingle = 0; + int numtotal = 0; + SAMRecordIterator ie = reader.iterator(); + ArrayList adjacents = new ArrayList(); + adjacents.add(ie.next()); + + while(ie.hasNext()){ + numtotal ++; + SAMRecord record = ie.next(); + + //select reads + if(! record.getReadName().equals(adjacents.get(0).getReadName())){ + //select pairs + if(adjacents.size() > 1) + numPair += selectPair( adjacents); + //select single + else if(Math.random() < proportion ){ + writer.addAlignment(adjacents.get(0)); + numSingle ++; + } + //after reporting clear the arraylist + adjacents.clear(); + } + adjacents.add(record); + + } + + //select last records + if(adjacents.size() > 1) + selectPair( adjacents); + else if(Math.random() < proportion ) + writer.addAlignment(adjacents.get(0)); + + reader.close(); + writer.close(); + + logger.info("total reads in input is " + numtotal); + logger.info("select paired reads is " + numPair); + logger.info("select single reads is " + numSingle); + logger.info("the rate of selected reads is "+ ((double)(numPair + numSingle)) / numtotal); + + } + + private int selectPair(ArrayList pairs) { + + if(pairs.size() == 0 ){ + logger.error("Program Error: select reads from empty arraylist! "); + return 0; + } + if(pairs.size() == 1 ){ + logger.error("program Error: single read in paired arraylist -- " + pairs.get(0).getReadName()); + return 0; + } + + int num = 0; + while(pairs.size() >= 2){ + //seek pair one by one + SAMRecord first = pairs.get(0); + SAMRecord mate = null; + pairs.remove(first); + + for(int i = 0; i < pairs.size(); i ++){ + if(first.getReadGroup().getId().equals(pairs.get(i).getReadGroup().getId())){ + mate = pairs.get(i); + pairs.remove(mate); + break; + } + } + + + if(Math.random() < proportion ){ + num ++; //number of selected paired reads + writer.addAlignment(first); + if(mate != null){ + num ++; + writer.addAlignment(mate); + }else{ + logger.error("paired reads missing mate -- " + pairs.get(0).getReadName()); + } + } + } + + return num; + } + + public static void main(String[] args) throws Exception{ + Options op = new Options(SubSample.class, args); + if(op.hasHelpOption()){ + System.out.println(Messages.getMessage("USAGE_SUBSAMPLE")); + op.displayHelp(); + System.exit(0); + } + + String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + QLogger logger = QLoggerFactory.getLogger(SubSample.class, op.getLogFile(), op.getLogLevel()); + try{ + logger.logInitialExecutionStats(SubSample.class.toString(), version, args); + logger.exec("Porportion " + op.getPROPORTION()); + SubSample mySample = new SubSample(op, logger); + mySample.run(); + logger.logFinalExecutionStats(0); + System.exit(0); + }catch(Exception e){ + System.err.println( e.getMessage() + e.toString()); + logger.logFinalExecutionStats(-1); + System.exit(1); + } + } + +} diff --git a/qmule/src/org/qcmg/qmule/TestFileFinder.java b/qmule/src/org/qcmg/qmule/TestFileFinder.java new file mode 100644 index 000000000..28da0aa08 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/TestFileFinder.java @@ -0,0 +1,23 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; + +import org.qcmg.common.util.FileUtils; + +public class TestFileFinder { + public static void main(String[] args) { + File [] files = FileUtils.findDirectories(args[0], "seq_final", true); + System.out.println("no of files: " + files.length); + for (File f : files) { + System.out.println("file found: " + f.getAbsolutePath()); + } +// File [] files = FileUtils.findFiles(args[0], "java", true); +// System.out.println("no of files: " + files.length); +// for (File f : files) { +// System.out.println("file found: " + f.getAbsolutePath()); +// } + } +} diff --git a/qmule/src/org/qcmg/qmule/TestFileFinder.java-- b/qmule/src/org/qcmg/qmule/TestFileFinder.java-- new file mode 100644 index 000000000..28da0aa08 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/TestFileFinder.java-- @@ -0,0 +1,23 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; + +import org.qcmg.common.util.FileUtils; + +public class TestFileFinder { + public static void main(String[] args) { + File [] files = FileUtils.findDirectories(args[0], "seq_final", true); + System.out.println("no of files: " + files.length); + for (File f : files) { + System.out.println("file found: " + f.getAbsolutePath()); + } +// File [] files = FileUtils.findFiles(args[0], "java", true); +// System.out.println("no of files: " + files.length); +// for (File f : files) { +// System.out.println("file found: " + f.getAbsolutePath()); +// } + } +} diff --git a/qmule/src/org/qcmg/qmule/TestJarUpdate.java b/qmule/src/org/qcmg/qmule/TestJarUpdate.java new file mode 100644 index 000000000..c1937f55a --- /dev/null +++ b/qmule/src/org/qcmg/qmule/TestJarUpdate.java @@ -0,0 +1,191 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMFileWriterFactory; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.picard.SAMFileReaderFactory; + +public class TestJarUpdate { + + private SAMFileWriter writer; + private SamReader reader; + + private void doWork() throws Exception{ + try { + + LoadReferencedClasses.loadClasses(getClass()); + +// URL className = getClass().getResource(TestJarUpdate.class.getName()); +// if (null != className) +// System.out.println("url: " + className.getFile()); +// else +// System.out.println("url: " + null); +// +// File jarFile = new File(TestJarUpdate.class.getProtectionDomain().getCodeSource().getLocation().toURI()); +// if (null != jarFile) +// System.out.println("jarFile: " + jarFile.getName()); +// else +// System.out.println("jarFile: " + null); +// +// System.out.println("is file type valid jar: " + FileUtils.isFileTypeValid(jarFile, "jar")); +// +// System.out.println("BEFORE: no of loaded packages: " + Package.getPackages().length); +// +// if (FileUtils.isFileTypeValid(jarFile, "jar")) { +// +// // got jar file - load and +// JarFile jf = new JarFile(jarFile); +// Attributes att = jf.getManifest().getMainAttributes(); +// System.out.println("att.size" + att.size()); +// String classpath = att.getValue("Class-Path"); +// System.out.println("classpath: " + classpath); +// +// String [] jars = classpath.split(" "); +// for (String jar : jars) { +// JarFile internalJarFile = new JarFile(jar); +// Enumeration enums = internalJarFile.entries(); +// while (enums.hasMoreElements()) { +// JarEntry je = enums.nextElement(); +// if (FileUtils.isFileTypeValid(je.getName(), "class")) { +// String blah = je.getName().replace(".class", ""); +// blah = blah.replaceAll(System.getProperty("file.separator"), "."); +// System.out.println("about to load class: " + blah); +// this.getClass().getClassLoader().loadClass(blah); +// } +// } +// } +// +// } +// +// System.out.println("AFTER: no of loaded packages: " + Package.getPackages().length); + + + // write to bam file + // sleep for a few mins to allow the sam jar file to be removed/replaced + // close bam file + // tinker with class loader + File inputFile = File.createTempFile("testJarUpdateInput", ".sam"); + inputFile.deleteOnExit(); + File outputFile = File.createTempFile("testJarUpdateOutput", ".bam"); +// outputFile.deleteOnExit(); + + createCoverageSam(inputFile); + + reader = SAMFileReaderFactory.createSAMFileReader(inputFile); + + SAMFileHeader header = reader.getFileHeader(); + List recs = new ArrayList(); + + for( SAMRecord rec : reader) { + recs.add(rec); + } + + + SAMFileWriterFactory factory = new SAMFileWriterFactory(); + + writer = factory.makeSAMOrBAMWriter(header, true, outputFile); + +// for (int i = 0 ; i < 100 ; i++) + for( SAMRecord rec : recs) { + for (int i = 0 ; i < 100 ; i++) + writer.addAlignment(rec); + } + + System.out.println("About to sleep!"); + System.gc(); + Thread.sleep(60000); + System.out.println("Am awake now"); + + close(); + System.out.println("DONE!!!"); + } finally { + System.out.println("about to run close quietly"); + closeQuietly(); + System.out.println("DONE!!! again"); + } + } + + + public static void main(String[] args) throws Exception { + TestJarUpdate tju = new TestJarUpdate(); + tju.doWork(); + } + + + private void close() throws Exception { + try { + writer.close(); + reader.close(); + } catch (Exception e) { + System.out.println("Exception caught in close(): "); +// e.printStackTrace(); + throw new Exception("CANNOT_CLOSE_FILES"); + } + } + + private void closeQuietly() { + try { + close(); + } catch (Exception e) { +// e.printStackTrace(); + } + } + + public static final void createCoverageSam(final File fileName) throws Exception { + + OutputStream os = new FileOutputStream(fileName); + PrintStream ps = new PrintStream(os); + + ps.println("@HD VN:1.0 SO:coordinate"); + ps.println("@RG ID:ZZ SM:ES DS:rl=50 "); + ps.println("@RG ID:ZZZ SM:ES DS:rl=50 "); + ps.println("@PG ID:SOLID-GffToSam VN:1.4.3"); + ps.println("@SQ SN:chr1 LN:249250621"); + ps.println("@SQ SN:chr2 LN:243199373"); + ps.println("@SQ SN:chr3 LN:198022430"); + ps.println("@SQ SN:chr4 LN:191154276"); + ps.println("@SQ SN:chr5 LN:180915260"); + ps.println("@SQ SN:chr6 LN:171115067"); + ps.println("@SQ SN:chr7 LN:159138663"); + ps.println("@SQ SN:chr8 LN:146364022"); + ps.println("@SQ SN:chr9 LN:141213431"); + ps.println("@SQ SN:chr10 LN:135534747"); + ps.println("@SQ SN:chr11 LN:135006516"); + ps.println("@SQ SN:chr12 LN:133851895"); + ps.println("@SQ SN:chr13 LN:115169878"); + ps.println("@SQ SN:chr14 LN:107349540"); + ps.println("@SQ SN:chr15 LN:102531392"); + ps.println("@SQ SN:chr16 LN:90354753"); + ps.println("@SQ SN:chr17 LN:81195210"); + ps.println("@SQ SN:chr18 LN:78077248"); + ps.println("@SQ SN:chr19 LN:59128983"); + ps.println("@SQ SN:chr20 LN:63025520"); + ps.println("@SQ SN:chr21 LN:48129895"); + ps.println("@SQ SN:chr22 LN:51304566"); + ps.println("@SQ SN:chrX LN:155270560"); + ps.println("@SQ SN:chrY LN:59373566"); + ps.println("@SQ SN:chrM LN:16571"); + ps.println("1290_738_1025 0 chr1 54026 255 45M5H * 0 0 AACATTCCAAAAGTCAACCATCCAAGTTTATTCTAAATAGATGTG !DDDDDDDDDDDDDDDD''DDDDDD9DDDDDDDDD:<3B''DDD! RG:Z:ZZ CS:Z:T301130201000212101113201021003302230033233111 CQ:Z:BBB=B:@5?>B9A5?>B?'A49<475%@;6<+;9@'4)+8'1?:>"); + ps.println("2333_755_492 16 chr2 10103 255 10H40M * 0 0 CACACCACACCCACACACCACACACCACACCCACACCCAC !=DD?%+DD<)=DDD<@9)9C:DA.:DD>%%,?+%;<-1"); + ps.println("1879_282_595 0 chr3 60775 255 40M10H * 0 0 TCTAAATTTGTTTGATCACATACTCCTTTTCTGGCTAACA !DD,*@DDD''DD>5:DD>;DDDD=CDD8%%DA9-DDC0! RG:Z:ZZ CS:Z:T0223303001200123211133122020003210323011 CQ:Z:=><=,*7685'970/'437(4<:54*:84%%;/3''?;)("); + ps.close(); + os.close(); + } +} diff --git a/qmule/src/org/qcmg/qmule/TestJarUpdate.java-- b/qmule/src/org/qcmg/qmule/TestJarUpdate.java-- new file mode 100644 index 000000000..c1937f55a --- /dev/null +++ b/qmule/src/org/qcmg/qmule/TestJarUpdate.java-- @@ -0,0 +1,191 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMFileWriterFactory; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.picard.SAMFileReaderFactory; + +public class TestJarUpdate { + + private SAMFileWriter writer; + private SamReader reader; + + private void doWork() throws Exception{ + try { + + LoadReferencedClasses.loadClasses(getClass()); + +// URL className = getClass().getResource(TestJarUpdate.class.getName()); +// if (null != className) +// System.out.println("url: " + className.getFile()); +// else +// System.out.println("url: " + null); +// +// File jarFile = new File(TestJarUpdate.class.getProtectionDomain().getCodeSource().getLocation().toURI()); +// if (null != jarFile) +// System.out.println("jarFile: " + jarFile.getName()); +// else +// System.out.println("jarFile: " + null); +// +// System.out.println("is file type valid jar: " + FileUtils.isFileTypeValid(jarFile, "jar")); +// +// System.out.println("BEFORE: no of loaded packages: " + Package.getPackages().length); +// +// if (FileUtils.isFileTypeValid(jarFile, "jar")) { +// +// // got jar file - load and +// JarFile jf = new JarFile(jarFile); +// Attributes att = jf.getManifest().getMainAttributes(); +// System.out.println("att.size" + att.size()); +// String classpath = att.getValue("Class-Path"); +// System.out.println("classpath: " + classpath); +// +// String [] jars = classpath.split(" "); +// for (String jar : jars) { +// JarFile internalJarFile = new JarFile(jar); +// Enumeration enums = internalJarFile.entries(); +// while (enums.hasMoreElements()) { +// JarEntry je = enums.nextElement(); +// if (FileUtils.isFileTypeValid(je.getName(), "class")) { +// String blah = je.getName().replace(".class", ""); +// blah = blah.replaceAll(System.getProperty("file.separator"), "."); +// System.out.println("about to load class: " + blah); +// this.getClass().getClassLoader().loadClass(blah); +// } +// } +// } +// +// } +// +// System.out.println("AFTER: no of loaded packages: " + Package.getPackages().length); + + + // write to bam file + // sleep for a few mins to allow the sam jar file to be removed/replaced + // close bam file + // tinker with class loader + File inputFile = File.createTempFile("testJarUpdateInput", ".sam"); + inputFile.deleteOnExit(); + File outputFile = File.createTempFile("testJarUpdateOutput", ".bam"); +// outputFile.deleteOnExit(); + + createCoverageSam(inputFile); + + reader = SAMFileReaderFactory.createSAMFileReader(inputFile); + + SAMFileHeader header = reader.getFileHeader(); + List recs = new ArrayList(); + + for( SAMRecord rec : reader) { + recs.add(rec); + } + + + SAMFileWriterFactory factory = new SAMFileWriterFactory(); + + writer = factory.makeSAMOrBAMWriter(header, true, outputFile); + +// for (int i = 0 ; i < 100 ; i++) + for( SAMRecord rec : recs) { + for (int i = 0 ; i < 100 ; i++) + writer.addAlignment(rec); + } + + System.out.println("About to sleep!"); + System.gc(); + Thread.sleep(60000); + System.out.println("Am awake now"); + + close(); + System.out.println("DONE!!!"); + } finally { + System.out.println("about to run close quietly"); + closeQuietly(); + System.out.println("DONE!!! again"); + } + } + + + public static void main(String[] args) throws Exception { + TestJarUpdate tju = new TestJarUpdate(); + tju.doWork(); + } + + + private void close() throws Exception { + try { + writer.close(); + reader.close(); + } catch (Exception e) { + System.out.println("Exception caught in close(): "); +// e.printStackTrace(); + throw new Exception("CANNOT_CLOSE_FILES"); + } + } + + private void closeQuietly() { + try { + close(); + } catch (Exception e) { +// e.printStackTrace(); + } + } + + public static final void createCoverageSam(final File fileName) throws Exception { + + OutputStream os = new FileOutputStream(fileName); + PrintStream ps = new PrintStream(os); + + ps.println("@HD VN:1.0 SO:coordinate"); + ps.println("@RG ID:ZZ SM:ES DS:rl=50 "); + ps.println("@RG ID:ZZZ SM:ES DS:rl=50 "); + ps.println("@PG ID:SOLID-GffToSam VN:1.4.3"); + ps.println("@SQ SN:chr1 LN:249250621"); + ps.println("@SQ SN:chr2 LN:243199373"); + ps.println("@SQ SN:chr3 LN:198022430"); + ps.println("@SQ SN:chr4 LN:191154276"); + ps.println("@SQ SN:chr5 LN:180915260"); + ps.println("@SQ SN:chr6 LN:171115067"); + ps.println("@SQ SN:chr7 LN:159138663"); + ps.println("@SQ SN:chr8 LN:146364022"); + ps.println("@SQ SN:chr9 LN:141213431"); + ps.println("@SQ SN:chr10 LN:135534747"); + ps.println("@SQ SN:chr11 LN:135006516"); + ps.println("@SQ SN:chr12 LN:133851895"); + ps.println("@SQ SN:chr13 LN:115169878"); + ps.println("@SQ SN:chr14 LN:107349540"); + ps.println("@SQ SN:chr15 LN:102531392"); + ps.println("@SQ SN:chr16 LN:90354753"); + ps.println("@SQ SN:chr17 LN:81195210"); + ps.println("@SQ SN:chr18 LN:78077248"); + ps.println("@SQ SN:chr19 LN:59128983"); + ps.println("@SQ SN:chr20 LN:63025520"); + ps.println("@SQ SN:chr21 LN:48129895"); + ps.println("@SQ SN:chr22 LN:51304566"); + ps.println("@SQ SN:chrX LN:155270560"); + ps.println("@SQ SN:chrY LN:59373566"); + ps.println("@SQ SN:chrM LN:16571"); + ps.println("1290_738_1025 0 chr1 54026 255 45M5H * 0 0 AACATTCCAAAAGTCAACCATCCAAGTTTATTCTAAATAGATGTG !DDDDDDDDDDDDDDDD''DDDDDD9DDDDDDDDD:<3B''DDD! RG:Z:ZZ CS:Z:T301130201000212101113201021003302230033233111 CQ:Z:BBB=B:@5?>B9A5?>B?'A49<475%@;6<+;9@'4)+8'1?:>"); + ps.println("2333_755_492 16 chr2 10103 255 10H40M * 0 0 CACACCACACCCACACACCACACACCACACCCACACCCAC !=DD?%+DD<)=DDD<@9)9C:DA.:DD>%%,?+%;<-1"); + ps.println("1879_282_595 0 chr3 60775 255 40M10H * 0 0 TCTAAATTTGTTTGATCACATACTCCTTTTCTGGCTAACA !DD,*@DDD''DD>5:DD>;DDDD=CDD8%%DA9-DDC0! RG:Z:ZZ CS:Z:T0223303001200123211133122020003210323011 CQ:Z:=><=,*7685'970/'437(4<:54*:84%%;/3''?;)("); + ps.close(); + os.close(); + } +} diff --git a/qmule/src/org/qcmg/qmule/TestSort.java b/qmule/src/org/qcmg/qmule/TestSort.java new file mode 100644 index 000000000..cf9faddb6 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/TestSort.java @@ -0,0 +1,109 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.IOException; +import java.net.InetAddress; +import java.text.SimpleDateFormat; +import java.util.Calendar; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMFileWriterFactory; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.picard.SAMFileReaderFactory; + +public class TestSort { + private final File input; + private final File output; + private final int maxRecordsInRam; + private SAMFileHeader.SortOrder sort = SAMFileHeader.SortOrder.unsorted; + + TestSort(final String[] args) throws Exception{ + input = new File(args[0]); + output = new File(args[1]); + maxRecordsInRam = Integer.parseInt(args[2]); + + String sortOrder = args[3]; + if(sortOrder.equalsIgnoreCase("coordinate")) + sort = SAMFileHeader.SortOrder.coordinate; + else if(sortOrder.equalsIgnoreCase("queryname")) + sort = SAMFileHeader.SortOrder.queryname; + else if(! sortOrder.equalsIgnoreCase("unsorted")) + throw new Exception( sortOrder + " isn't valid SAMFileHeader sort order!"); + + System.out.println(getTime() + " host: " + InetAddress.getLocalHost().getHostName()); + System.out.println(getTime() + " input: " + input.getAbsolutePath()); + System.out.println(getTime() + " output: " + output.getAbsolutePath()); + System.out.println(getTime() + " sort order: " + sortOrder); + System.out.println(getTime() + " max Records In RAM: " + maxRecordsInRam); + } + + public void Sorting() throws Exception{ + SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); + SAMFileHeader header = reader.getFileHeader(); + + SAMFileWriterFactory writeFactory = new SAMFileWriterFactory(); + htsjdk.samtools.SAMFileWriterImpl.setDefaultMaxRecordsInRam(maxRecordsInRam ); + header.setSortOrder(sort); + if(sort.equals(SAMFileHeader.SortOrder.coordinate)) + writeFactory.setCreateIndex(true); + final SAMFileWriter writer = writeFactory.makeSAMOrBAMWriter(header, false, output); + + int num = 0; + for (SAMRecord record : reader) { + if(num % maxRecordsInRam == 0) + printRunInfo(num); + + writer.addAlignment(record); + num ++; + } + +// System.out.println(getTime() + " Merging tmp into output BAM, tmp location are " + htsjdk.samtools.util.IOUtil.getDefaultTmpDir()); + reader.close(); + writer.close(); + + System.out.println(getTime() + " created output: " + output.getAbsolutePath()); + } + + private void printRunInfo(int number) throws IOException{ + Runtime runtime = Runtime.getRuntime(); + int mb = 1024 * 1024; + long totalRAM = runtime.totalMemory() / mb; + long usedRAM = (runtime.totalMemory() - runtime.freeMemory()) / mb; + + String dateNow = getTime(); + + String info = String.format("%s read %d record. Total memeory: %dM, used memory: %dM", + dateNow, number, totalRAM, usedRAM); + + System.out.println(info); + } + + private String getTime(){ + Calendar currentDate = Calendar.getInstance(); + SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); + return "[" + formatter.format(currentDate.getTime()) + "]"; + } + + public static void main(final String[] args) { + try{ + TestSort mysort = new TestSort(args); + mysort.Sorting(); + System.exit(0); + }catch(Exception e){ + System.err.println("usage:qmule.TestSort [queryname/coordinate/unsorted]"); + System.err.println(e.toString()); + System.exit(1); + } + + + } +} diff --git a/qmule/src/org/qcmg/qmule/TestSort.java-- b/qmule/src/org/qcmg/qmule/TestSort.java-- new file mode 100644 index 000000000..cf9faddb6 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/TestSort.java-- @@ -0,0 +1,109 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.IOException; +import java.net.InetAddress; +import java.text.SimpleDateFormat; +import java.util.Calendar; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMFileWriterFactory; +import htsjdk.samtools.SAMRecord; + +import org.qcmg.picard.SAMFileReaderFactory; + +public class TestSort { + private final File input; + private final File output; + private final int maxRecordsInRam; + private SAMFileHeader.SortOrder sort = SAMFileHeader.SortOrder.unsorted; + + TestSort(final String[] args) throws Exception{ + input = new File(args[0]); + output = new File(args[1]); + maxRecordsInRam = Integer.parseInt(args[2]); + + String sortOrder = args[3]; + if(sortOrder.equalsIgnoreCase("coordinate")) + sort = SAMFileHeader.SortOrder.coordinate; + else if(sortOrder.equalsIgnoreCase("queryname")) + sort = SAMFileHeader.SortOrder.queryname; + else if(! sortOrder.equalsIgnoreCase("unsorted")) + throw new Exception( sortOrder + " isn't valid SAMFileHeader sort order!"); + + System.out.println(getTime() + " host: " + InetAddress.getLocalHost().getHostName()); + System.out.println(getTime() + " input: " + input.getAbsolutePath()); + System.out.println(getTime() + " output: " + output.getAbsolutePath()); + System.out.println(getTime() + " sort order: " + sortOrder); + System.out.println(getTime() + " max Records In RAM: " + maxRecordsInRam); + } + + public void Sorting() throws Exception{ + SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); + SAMFileHeader header = reader.getFileHeader(); + + SAMFileWriterFactory writeFactory = new SAMFileWriterFactory(); + htsjdk.samtools.SAMFileWriterImpl.setDefaultMaxRecordsInRam(maxRecordsInRam ); + header.setSortOrder(sort); + if(sort.equals(SAMFileHeader.SortOrder.coordinate)) + writeFactory.setCreateIndex(true); + final SAMFileWriter writer = writeFactory.makeSAMOrBAMWriter(header, false, output); + + int num = 0; + for (SAMRecord record : reader) { + if(num % maxRecordsInRam == 0) + printRunInfo(num); + + writer.addAlignment(record); + num ++; + } + +// System.out.println(getTime() + " Merging tmp into output BAM, tmp location are " + htsjdk.samtools.util.IOUtil.getDefaultTmpDir()); + reader.close(); + writer.close(); + + System.out.println(getTime() + " created output: " + output.getAbsolutePath()); + } + + private void printRunInfo(int number) throws IOException{ + Runtime runtime = Runtime.getRuntime(); + int mb = 1024 * 1024; + long totalRAM = runtime.totalMemory() / mb; + long usedRAM = (runtime.totalMemory() - runtime.freeMemory()) / mb; + + String dateNow = getTime(); + + String info = String.format("%s read %d record. Total memeory: %dM, used memory: %dM", + dateNow, number, totalRAM, usedRAM); + + System.out.println(info); + } + + private String getTime(){ + Calendar currentDate = Calendar.getInstance(); + SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); + return "[" + formatter.format(currentDate.getTime()) + "]"; + } + + public static void main(final String[] args) { + try{ + TestSort mysort = new TestSort(args); + mysort.Sorting(); + System.exit(0); + }catch(Exception e){ + System.err.println("usage:qmule.TestSort [queryname/coordinate/unsorted]"); + System.err.println(e.toString()); + System.exit(1); + } + + + } +} diff --git a/qmule/src/org/qcmg/qmule/TranscriptomeMule.java b/qmule/src/org/qcmg/qmule/TranscriptomeMule.java new file mode 100644 index 000000000..c9b4f95f5 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/TranscriptomeMule.java @@ -0,0 +1,192 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.PileupElement; +import org.qcmg.common.string.StringUtils; +import org.qcmg.common.util.Constants; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.PileupUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.picard.util.PileupElementUtil; +import org.qcmg.pileup.PileupFileReader; + +public class TranscriptomeMule { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + private final static int MIN_COVERAGE = 3; + // assuming all the tumours have been merged together, and we only have a single entry +// private static int[] tumourStartPositions = null; + private int[] tumourStartPositions = null; + + private final List positions = new ArrayList<>(100000); + + private static QLogger logger; + + public int engage() throws Exception { + logger.info("loading samtools mpileup data"); + walkPileup(cmdLineInputFiles[0]); + logger.info("loading samtools mpileup data - DONE [" + positions.size() + "]"); + + logger.info("outputting data"); + writeOutput(cmdLineOutputFiles[0]); + logger.info("outputting data - DONE"); + + return exitStatus; + } + + private void writeOutput(String outputFile) throws IOException { + FileWriter writer = new FileWriter(outputFile); + String header = "chr\tposition\tref\tpileup"; + + try { + writer.write(header + "\n"); + for (StringBuilder sb : positions) { + writer.write(sb.toString() + Constants.NEW_LINE); + } +// for (QSnpRecord record : positions) +// writer.write(record.getChromosome() + "\t" +// + record.getPosition() + "\t" +// + record.getRef() + "\t" +// + record.getTumourNucleotides() + "\n"); + } finally { + writer.close(); + } + } + + private void parsePileup(String record) { +// private void parsePileup(PileupRecord record) { + String[] params = TabTokenizer.tokenize(record); +// String[] params = tabbedPattern.split(record.getPileup(), -1); + if (null == tumourStartPositions) { + // set up the number of tumour start positions + // dependent on the number of columns in the input + // HUGE assumption that the mpileup data only contains tumour data here... + + //TODO is this right? + // first 3 columns are chr pos ref + int noOfSamples = (params.length -3) /3; + tumourStartPositions = new int[noOfSamples]; + for (int i = 0 ; i < noOfSamples ; i++) { + tumourStartPositions[i] = (i+1) * 3; + } + } + + // get coverage for both normal and tumour + int tumourCoverage = PileupUtils.getCoverageCount(params, tumourStartPositions); + if (tumourCoverage < MIN_COVERAGE) return; + + String tumourBases = PileupUtils.getBases(params, tumourStartPositions); + + // means there is an indel at this position - ignore + if (tumourBases.contains("+") || tumourBases.contains("-")) return; + String tumourBaseQualities = PileupUtils.getQualities(params, tumourStartPositions); + + // get bases as PileupElement collections + List tumourBaseCounts = PileupElementUtil.getPileupCounts(tumourBases, tumourBaseQualities); + + // get variant count for both + int tumourVariantCount = PileupElementUtil.getLargestVariantCount(tumourBaseCounts); + + if (tumourVariantCount >= 3) { + // keeper + StringBuilder sb = new StringBuilder(params[0]); + StringUtils.updateStringBuilder(sb, params[1], Constants.TAB); + StringUtils.updateStringBuilder(sb, params[2], Constants.TAB); + StringUtils.updateStringBuilder(sb, PileupElementUtil.getOABS(tumourBaseCounts, params[2].charAt(0)), Constants.TAB); + +// QSnpRecord rec = new QSnpRecord(params[0], Integer.parseInt(params[1]), params[2]); +// rec.setTumourOABS(PileupElementUtil.getOABS(tumourBaseCounts, rec.getRef().charAt(0))); + positions.add(sb); + } + + } + + private void walkPileup(String pileupFileName) throws Exception { + PileupFileReader reader = new PileupFileReader(new File(pileupFileName)); + int count = 0; + try { + for (String record : reader) { +// for (PileupRecord record : reader) { + parsePileup(record); + if (++count % 1000000 == 0) + logger.info("hit " + count + " pileup records, with " + positions.size() + " keepers."); + } + } finally { + reader.close(); + } + } + + public static void main(String[] args) throws Exception { + TranscriptomeMule sp = new TranscriptomeMule(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(TranscriptomeMule.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("Example", TranscriptomeMule.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- b/qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- new file mode 100644 index 000000000..c9b4f95f5 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- @@ -0,0 +1,192 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.PileupElement; +import org.qcmg.common.string.StringUtils; +import org.qcmg.common.util.Constants; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.PileupUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.picard.util.PileupElementUtil; +import org.qcmg.pileup.PileupFileReader; + +public class TranscriptomeMule { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + private final static int MIN_COVERAGE = 3; + // assuming all the tumours have been merged together, and we only have a single entry +// private static int[] tumourStartPositions = null; + private int[] tumourStartPositions = null; + + private final List positions = new ArrayList<>(100000); + + private static QLogger logger; + + public int engage() throws Exception { + logger.info("loading samtools mpileup data"); + walkPileup(cmdLineInputFiles[0]); + logger.info("loading samtools mpileup data - DONE [" + positions.size() + "]"); + + logger.info("outputting data"); + writeOutput(cmdLineOutputFiles[0]); + logger.info("outputting data - DONE"); + + return exitStatus; + } + + private void writeOutput(String outputFile) throws IOException { + FileWriter writer = new FileWriter(outputFile); + String header = "chr\tposition\tref\tpileup"; + + try { + writer.write(header + "\n"); + for (StringBuilder sb : positions) { + writer.write(sb.toString() + Constants.NEW_LINE); + } +// for (QSnpRecord record : positions) +// writer.write(record.getChromosome() + "\t" +// + record.getPosition() + "\t" +// + record.getRef() + "\t" +// + record.getTumourNucleotides() + "\n"); + } finally { + writer.close(); + } + } + + private void parsePileup(String record) { +// private void parsePileup(PileupRecord record) { + String[] params = TabTokenizer.tokenize(record); +// String[] params = tabbedPattern.split(record.getPileup(), -1); + if (null == tumourStartPositions) { + // set up the number of tumour start positions + // dependent on the number of columns in the input + // HUGE assumption that the mpileup data only contains tumour data here... + + //TODO is this right? + // first 3 columns are chr pos ref + int noOfSamples = (params.length -3) /3; + tumourStartPositions = new int[noOfSamples]; + for (int i = 0 ; i < noOfSamples ; i++) { + tumourStartPositions[i] = (i+1) * 3; + } + } + + // get coverage for both normal and tumour + int tumourCoverage = PileupUtils.getCoverageCount(params, tumourStartPositions); + if (tumourCoverage < MIN_COVERAGE) return; + + String tumourBases = PileupUtils.getBases(params, tumourStartPositions); + + // means there is an indel at this position - ignore + if (tumourBases.contains("+") || tumourBases.contains("-")) return; + String tumourBaseQualities = PileupUtils.getQualities(params, tumourStartPositions); + + // get bases as PileupElement collections + List tumourBaseCounts = PileupElementUtil.getPileupCounts(tumourBases, tumourBaseQualities); + + // get variant count for both + int tumourVariantCount = PileupElementUtil.getLargestVariantCount(tumourBaseCounts); + + if (tumourVariantCount >= 3) { + // keeper + StringBuilder sb = new StringBuilder(params[0]); + StringUtils.updateStringBuilder(sb, params[1], Constants.TAB); + StringUtils.updateStringBuilder(sb, params[2], Constants.TAB); + StringUtils.updateStringBuilder(sb, PileupElementUtil.getOABS(tumourBaseCounts, params[2].charAt(0)), Constants.TAB); + +// QSnpRecord rec = new QSnpRecord(params[0], Integer.parseInt(params[1]), params[2]); +// rec.setTumourOABS(PileupElementUtil.getOABS(tumourBaseCounts, rec.getRef().charAt(0))); + positions.add(sb); + } + + } + + private void walkPileup(String pileupFileName) throws Exception { + PileupFileReader reader = new PileupFileReader(new File(pileupFileName)); + int count = 0; + try { + for (String record : reader) { +// for (PileupRecord record : reader) { + parsePileup(record); + if (++count % 1000000 == 0) + logger.info("hit " + count + " pileup records, with " + positions.size() + " keepers."); + } + } finally { + reader.close(); + } + } + + public static void main(String[] args) throws Exception { + TranscriptomeMule sp = new TranscriptomeMule(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(TranscriptomeMule.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("Example", TranscriptomeMule.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileup.java b/qmule/src/org/qcmg/qmule/WiggleFromPileup.java new file mode 100644 index 000000000..222727290 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/WiggleFromPileup.java @@ -0,0 +1,302 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Pattern; +import java.util.zip.GZIPOutputStream; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ReferenceNameComparator; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.PileupUtils; +import org.qcmg.qmule.gff3.GFF3FileReader; +import org.qcmg.qmule.gff3.GFF3Record; +import org.qcmg.qmule.gff3.GFF3RecordChromosomeAndPositionComparator; +import org.qcmg.pileup.PileupFileReader; + +public class WiggleFromPileup { + + private final static Pattern tabbedPattern = Pattern.compile("[\\t]"); + private boolean compressOutput; + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + private String pileupFormat; + private int normalCoverage, tumourCoverage; + private int noOfNormalFiles, noOfTumourFiles; + private long covGood, covBad, totalCov; + private int[] normalStartPositions, tumourStartPositions; + private String currentChromosome = "chr1"; + + private int lastPosition; + + private final List gffs = new ArrayList(); + + private static GFF3Record gffRecord; + private static Iterator iter; + + private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); + private final static GFF3RecordChromosomeAndPositionComparator CHR_POS_COMP = new GFF3RecordChromosomeAndPositionComparator(); + + + private static QLogger logger; + + public int engage() throws Exception { + + // setup + initialise(); + + loadGffFile(); + + Collections.sort(gffs, CHR_POS_COMP); + + if (gffs.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); + + // parse pileup file + parsePileup(); + + logger.info("bases with enough coverage: " + covGood + ", those with not enough coverage: " + covBad + ", total: " + totalCov); + + return exitStatus; + } + + private void loadGffFile() throws Exception { + GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); + try { + int totalNoOfbaits = 0, ignoredBaits = 0; + for (GFF3Record record : reader) { + totalNoOfbaits++; + if (isGff3RecordBait(record.getType())) { + gffs.add(record); + } else ignoredBaits++; + } + + logger.info("loaded gff3 file, total no of baits: " + totalNoOfbaits + ", entries in collection: " + gffs.size() + ", entries that didn't make it: " + ignoredBaits); + } finally { + reader.close(); + } + } + + protected static boolean isGff3RecordBait(String type) { + return "exon".equals(type); + } +// protected static boolean isGff3RecordBait(String type) { +// return "bait_1_100".equals(type) +// || "bait".equals(type) +// || "highbait".equals(type) +// || "lowbait".equals(type); +// } + + private void initialise() { + noOfNormalFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'N'); + noOfTumourFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'T'); + normalStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, true); + tumourStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, false); + +// logger.info("start positions: " + Arrays.deepToString(normalStartPositions) + ", " + Arrays.deepToString(tumourStartPositions)); + } + + private void parsePileup() throws Exception { + Writer writer = getWriter(cmdLineOutputFiles[0]); + + iter = gffs.iterator(); + if (iter.hasNext()) { + setGffRecord(iter.next()); + } else { + throw new RuntimeException("Unable to set next Gff record"); + } + + PileupFileReader reader = new PileupFileReader(new File(cmdLineInputFiles[0])); + StringBuilder sb = new StringBuilder(); + try { + for (String pr : reader) { +// for (PileupRecord pr : reader) { + addWiggleData(pr, sb); +// addWiggleData(tabbedPattern.split(pr.getPileup(), -1), sb); + if (++totalCov % 100000 == 0 && sb.length() > 0) { + writer.write(sb.toString()); + sb = new StringBuilder(); + + if (totalCov % 10000000 == 0) + logger.info("hit " + totalCov + " pileup records"); + } + } + + // empty contents of StringBuilder to writer + if (sb.length() > 0) writer.write(sb.toString()); + + } finally { + writer.close(); + reader.close(); + } + } + + private Writer getWriter(String fileName) throws IOException { + Writer writer = null; + if (compressOutput) { + writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(fileName))); + } else { + writer = new FileWriter(new File(fileName)); + } + return writer; + } + + protected static boolean isPositionInBait(String chromosome, int position, Iterator iter, GFF3Record currentRecord) { + + if (chromosome.equals(currentRecord.getSeqId())) { + + if (position < currentRecord.getStart()) { + return false; + } else if (position <= currentRecord.getEnd()) { + return true; + } else { + return advanceGff3Record(chromosome, position, iter); + } + } else if (COMPARATOR.compare(chromosome, currentRecord.getSeqId()) < 0) { + // pileup position is in lower chromosome than gffRecord + return false; + } else { + // pileup position is in higher chromosome than gffRecord + // advance iterator + return advanceGff3Record(chromosome, position, iter); + } + } + + private static boolean advanceGff3Record(String chromosome, int position, + Iterator iter) { + if ( ! iter.hasNext()) { + // no more entries in gffs + return false; + } else { + setGffRecord(iter.next()); + return isPositionInBait(chromosome, position, iter, getGffRecord()); + } + } + + private void addWiggleData(String paramString, StringBuilder sb) { + int firstTabIndex = paramString.indexOf('\t'); + String chromosome = paramString.substring(0, firstTabIndex); + int position = Integer.parseInt(paramString.substring(firstTabIndex+1, paramString.indexOf('\t', firstTabIndex+1))); + + if ( ! isPositionInBait(chromosome, position, iter, getGffRecord())) return; +// if ( ! isPositionInBait(chromosome, position)) return; + + if (position != lastPosition +1 || ! currentChromosome.equalsIgnoreCase(chromosome)) { + // add new header to the StringBuilder + String wiggleHeader = "fixedStep chrom=" + chromosome + " start=" + position + " step=1\n"; + sb.append(wiggleHeader); + + // update last position and current chromosome + currentChromosome = chromosome; + } + lastPosition = position; + String [] params = tabbedPattern.split(paramString, -1); + + if (PileupUtils.getCoverageCount(params, normalStartPositions) < normalCoverage) { + sb.append("0\n"); + ++covBad; + } else { + if (PileupUtils.getCoverageCount(params, tumourStartPositions) >= tumourCoverage) { + sb.append("1\n"); + ++covGood; + } else { + sb.append("0\n"); + ++covBad; + } + } + } + + public static void main(String[] args) throws Exception { + WiggleFromPileup sp = new WiggleFromPileup(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(WiggleFromPileup.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("WiggleFromPileup", WiggleFromPileup.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + // get app specific options + pileupFormat = options.getPileupFormat(); + normalCoverage = options.getNormalCoverage(); + tumourCoverage = options.getTumourCoverage(); + compressOutput = FileUtils.isFileNameGZip(new File(cmdLineOutputFiles[0])); + + if (null == pileupFormat) throw new IllegalArgumentException("Please specify a pileupFormat relating to the pileup file (eg. NNTT)"); + if (normalCoverage == 0) throw new IllegalArgumentException("Please specify a normal coverage value (eg. 20)"); + if (tumourCoverage == 0) throw new IllegalArgumentException("Please specify a tumour coverage value (eg. 20)"); + + logger.tool("about to run with pileupFormat: " + pileupFormat + ", normal cov: " + normalCoverage + ", tumour cov: " + tumourCoverage + ", compressOutput: " + compressOutput); + + return engage(); + } + return returnStatus; + } + + protected static void setGffRecord(GFF3Record gffRecord) { + WiggleFromPileup.gffRecord = gffRecord; + } + + protected static GFF3Record getGffRecord() { + return gffRecord; + } +} diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- b/qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- new file mode 100644 index 000000000..222727290 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- @@ -0,0 +1,302 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Pattern; +import java.util.zip.GZIPOutputStream; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ReferenceNameComparator; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.PileupUtils; +import org.qcmg.qmule.gff3.GFF3FileReader; +import org.qcmg.qmule.gff3.GFF3Record; +import org.qcmg.qmule.gff3.GFF3RecordChromosomeAndPositionComparator; +import org.qcmg.pileup.PileupFileReader; + +public class WiggleFromPileup { + + private final static Pattern tabbedPattern = Pattern.compile("[\\t]"); + private boolean compressOutput; + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + private String pileupFormat; + private int normalCoverage, tumourCoverage; + private int noOfNormalFiles, noOfTumourFiles; + private long covGood, covBad, totalCov; + private int[] normalStartPositions, tumourStartPositions; + private String currentChromosome = "chr1"; + + private int lastPosition; + + private final List gffs = new ArrayList(); + + private static GFF3Record gffRecord; + private static Iterator iter; + + private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); + private final static GFF3RecordChromosomeAndPositionComparator CHR_POS_COMP = new GFF3RecordChromosomeAndPositionComparator(); + + + private static QLogger logger; + + public int engage() throws Exception { + + // setup + initialise(); + + loadGffFile(); + + Collections.sort(gffs, CHR_POS_COMP); + + if (gffs.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); + + // parse pileup file + parsePileup(); + + logger.info("bases with enough coverage: " + covGood + ", those with not enough coverage: " + covBad + ", total: " + totalCov); + + return exitStatus; + } + + private void loadGffFile() throws Exception { + GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); + try { + int totalNoOfbaits = 0, ignoredBaits = 0; + for (GFF3Record record : reader) { + totalNoOfbaits++; + if (isGff3RecordBait(record.getType())) { + gffs.add(record); + } else ignoredBaits++; + } + + logger.info("loaded gff3 file, total no of baits: " + totalNoOfbaits + ", entries in collection: " + gffs.size() + ", entries that didn't make it: " + ignoredBaits); + } finally { + reader.close(); + } + } + + protected static boolean isGff3RecordBait(String type) { + return "exon".equals(type); + } +// protected static boolean isGff3RecordBait(String type) { +// return "bait_1_100".equals(type) +// || "bait".equals(type) +// || "highbait".equals(type) +// || "lowbait".equals(type); +// } + + private void initialise() { + noOfNormalFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'N'); + noOfTumourFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'T'); + normalStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, true); + tumourStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, false); + +// logger.info("start positions: " + Arrays.deepToString(normalStartPositions) + ", " + Arrays.deepToString(tumourStartPositions)); + } + + private void parsePileup() throws Exception { + Writer writer = getWriter(cmdLineOutputFiles[0]); + + iter = gffs.iterator(); + if (iter.hasNext()) { + setGffRecord(iter.next()); + } else { + throw new RuntimeException("Unable to set next Gff record"); + } + + PileupFileReader reader = new PileupFileReader(new File(cmdLineInputFiles[0])); + StringBuilder sb = new StringBuilder(); + try { + for (String pr : reader) { +// for (PileupRecord pr : reader) { + addWiggleData(pr, sb); +// addWiggleData(tabbedPattern.split(pr.getPileup(), -1), sb); + if (++totalCov % 100000 == 0 && sb.length() > 0) { + writer.write(sb.toString()); + sb = new StringBuilder(); + + if (totalCov % 10000000 == 0) + logger.info("hit " + totalCov + " pileup records"); + } + } + + // empty contents of StringBuilder to writer + if (sb.length() > 0) writer.write(sb.toString()); + + } finally { + writer.close(); + reader.close(); + } + } + + private Writer getWriter(String fileName) throws IOException { + Writer writer = null; + if (compressOutput) { + writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(fileName))); + } else { + writer = new FileWriter(new File(fileName)); + } + return writer; + } + + protected static boolean isPositionInBait(String chromosome, int position, Iterator iter, GFF3Record currentRecord) { + + if (chromosome.equals(currentRecord.getSeqId())) { + + if (position < currentRecord.getStart()) { + return false; + } else if (position <= currentRecord.getEnd()) { + return true; + } else { + return advanceGff3Record(chromosome, position, iter); + } + } else if (COMPARATOR.compare(chromosome, currentRecord.getSeqId()) < 0) { + // pileup position is in lower chromosome than gffRecord + return false; + } else { + // pileup position is in higher chromosome than gffRecord + // advance iterator + return advanceGff3Record(chromosome, position, iter); + } + } + + private static boolean advanceGff3Record(String chromosome, int position, + Iterator iter) { + if ( ! iter.hasNext()) { + // no more entries in gffs + return false; + } else { + setGffRecord(iter.next()); + return isPositionInBait(chromosome, position, iter, getGffRecord()); + } + } + + private void addWiggleData(String paramString, StringBuilder sb) { + int firstTabIndex = paramString.indexOf('\t'); + String chromosome = paramString.substring(0, firstTabIndex); + int position = Integer.parseInt(paramString.substring(firstTabIndex+1, paramString.indexOf('\t', firstTabIndex+1))); + + if ( ! isPositionInBait(chromosome, position, iter, getGffRecord())) return; +// if ( ! isPositionInBait(chromosome, position)) return; + + if (position != lastPosition +1 || ! currentChromosome.equalsIgnoreCase(chromosome)) { + // add new header to the StringBuilder + String wiggleHeader = "fixedStep chrom=" + chromosome + " start=" + position + " step=1\n"; + sb.append(wiggleHeader); + + // update last position and current chromosome + currentChromosome = chromosome; + } + lastPosition = position; + String [] params = tabbedPattern.split(paramString, -1); + + if (PileupUtils.getCoverageCount(params, normalStartPositions) < normalCoverage) { + sb.append("0\n"); + ++covBad; + } else { + if (PileupUtils.getCoverageCount(params, tumourStartPositions) >= tumourCoverage) { + sb.append("1\n"); + ++covGood; + } else { + sb.append("0\n"); + ++covBad; + } + } + } + + public static void main(String[] args) throws Exception { + WiggleFromPileup sp = new WiggleFromPileup(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(WiggleFromPileup.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("WiggleFromPileup", WiggleFromPileup.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + // get app specific options + pileupFormat = options.getPileupFormat(); + normalCoverage = options.getNormalCoverage(); + tumourCoverage = options.getTumourCoverage(); + compressOutput = FileUtils.isFileNameGZip(new File(cmdLineOutputFiles[0])); + + if (null == pileupFormat) throw new IllegalArgumentException("Please specify a pileupFormat relating to the pileup file (eg. NNTT)"); + if (normalCoverage == 0) throw new IllegalArgumentException("Please specify a normal coverage value (eg. 20)"); + if (tumourCoverage == 0) throw new IllegalArgumentException("Please specify a tumour coverage value (eg. 20)"); + + logger.tool("about to run with pileupFormat: " + pileupFormat + ", normal cov: " + normalCoverage + ", tumour cov: " + tumourCoverage + ", compressOutput: " + compressOutput); + + return engage(); + } + return returnStatus; + } + + protected static void setGffRecord(GFF3Record gffRecord) { + WiggleFromPileup.gffRecord = gffRecord; + } + + protected static GFF3Record getGffRecord() { + return gffRecord; + } +} diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java b/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java new file mode 100644 index 000000000..36c6a7a8f --- /dev/null +++ b/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java @@ -0,0 +1,307 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.zip.GZIPOutputStream; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.PositionRange; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.common.util.PileupUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.qmule.gff3.GFF3FileReader; +import org.qcmg.qmule.gff3.GFF3Record; +import org.qcmg.pileup.PileupFileReader; + +public class WiggleFromPileupTakeTwo { + + private static QLogger logger; +// private final static Pattern tabbedPattern = Pattern.compile("[\\t]"); +// private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); + + private boolean compressOutput; + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + private String pileupFormat; + private int normalCoverage, tumourCoverage; + private int noOfNormalFiles, noOfTumourFiles; + private long covGood, covBad, totalCov; + private int[] normalStartPositions, tumourStartPositions; + private String currentChromosome; + private String[] gffRegions; + private PriorityQueue currentQueue; + private PositionRange currentRange; + private int lastPosition; + private final Map> regionsOfInterest = new HashMap>(); + + + public int engage() throws Exception { + // setup + initialise(); + + loadGffFile(); + + logger.info("no of entries in regionsOfInterest: " + regionsOfInterest.size()); + + long baseCount = 0; + for (PriorityQueue ranges : regionsOfInterest.values()) { + for (PositionRange pr : ranges) { + baseCount += (pr.getEnd() - pr.getStart()); + } + } + logger.info("total no of bases covered by gff regions of interest: " + baseCount); + + + if (regionsOfInterest.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); + + // parse pileup file + parsePileup(); + + logger.info("bases with enough coverage: " + covGood + ", those with not enough coverage: " + covBad + ", total: " + totalCov); + + return exitStatus; + } + + private void loadGffFile() throws Exception { + GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); + try { + int totalNoOfbaits = 0, ignoredBaits = 0; + for (GFF3Record record : reader) { + totalNoOfbaits++; + if (isGff3RecordCorrectType(record.getType())) { + populateRegionsOfInterest(record); + } else ignoredBaits++; + } + + logger.info("loaded gff3 file, total no of baits: " + totalNoOfbaits + ", entries in collection: " + (totalNoOfbaits - ignoredBaits) + ", entries that didn't make it: " + ignoredBaits); + } finally { + reader.close(); + } + } + + private void populateRegionsOfInterest(GFF3Record record) { + // get collection corresponding to chromosome + PriorityQueue ranges = regionsOfInterest.get(record.getSeqId()); + if (null == ranges) { + ranges = new PriorityQueue(); + ranges.add(new PositionRange(record.getStart(), record.getEnd())); + regionsOfInterest.put(record.getSeqId(), ranges); + } else { + // loop through PositionRanges and see if any are adjacent + // not very efficient, but will do for now + boolean rangeExtended = false; + for (PositionRange pr : ranges) { + if (pr.isAdjacentToEnd(record.getStart())) { + pr.extendRange(record.getEnd()); + rangeExtended = true; + break; + } + } + if ( ! rangeExtended) { + // add new PositionRange + ranges.add(new PositionRange(record.getStart(), record.getEnd())); + } + } + } + + protected boolean isGff3RecordCorrectType(String type) { + for (String regionName : gffRegions) { + if (type.equals(regionName)) return true; + } + return false; + } + + private void initialise() { + noOfNormalFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'N'); + noOfTumourFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'T'); + normalStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, true); + tumourStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, false); + } + + private void parsePileup() throws Exception { + Writer writer = getWriter(cmdLineOutputFiles[0]); + + PileupFileReader reader = new PileupFileReader(new File(cmdLineInputFiles[0])); + StringBuilder sb = new StringBuilder(); + try { + for (String pr : reader) { +// for (PileupRecord pr : reader) { + addWiggleData(pr, sb); +// addWiggleData(tabbedPattern.split(pr.getPileup(), -1), sb); + if (++totalCov % 100000 == 0 && sb.length() > 0) { + writer.write(sb.toString()); + sb = new StringBuilder(); + + if (totalCov % 10000000 == 0) + logger.info("hit " + totalCov + " pileup records"); + } + } + + // empty contents of StringBuilder to writer + if (sb.length() > 0) writer.write(sb.toString()); + + } finally { + writer.close(); + reader.close(); + } + } + + private Writer getWriter(String fileName) throws IOException { + Writer writer = null; + if (compressOutput) { + writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(fileName))); + } else { + writer = new FileWriter(new File(fileName)); + } + return writer; + } + + protected boolean isPositionInRegionOfInterest(int position, PriorityQueue ranges) { + if (null == currentRange) return false; + + if (position < currentRange.getStart()) { + return false; + } else if (position <= currentRange.getEnd()) { + return true; + } else { + // advance queue + currentRange = ranges.poll(); + return isPositionInRegionOfInterest(position, ranges); + } + } + + private void addWiggleData(String paramString, StringBuilder sb) { + int firstTabIndex = paramString.indexOf('\t'); + String chromosome = paramString.substring(0, firstTabIndex); + int position = Integer.parseInt(paramString.substring(firstTabIndex+1, paramString.indexOf('\t', firstTabIndex+1))); + boolean chromosomeUpdated = false; + if ( ! chromosome.equalsIgnoreCase(currentChromosome)) { + // update last position and current chromosome + currentChromosome = chromosome; + chromosomeUpdated = true; + currentQueue = regionsOfInterest.get(chromosome); + if (null == currentQueue) { + logger.warn("no ranges found for chr: " + chromosome); + currentRange = null; + } else { + currentRange = currentQueue.poll(); + } + } + + if ( ! isPositionInRegionOfInterest(position, currentQueue)) return; + + if (position != lastPosition +1 || chromosomeUpdated) { + String wiggleHeader = "fixedStep chrom=" + chromosome + " start=" + position + " step=1\n"; + sb.append(wiggleHeader); + } + lastPosition = position; + String [] params = TabTokenizer.tokenize(paramString); +// String [] params = tabbedPattern.split(paramString, -1); + + if (PileupUtils.getCoverageCount(params, normalStartPositions) < normalCoverage) { + sb.append("0\n"); + ++covBad; + } else { + if (PileupUtils.getCoverageCount(params, tumourStartPositions) >= tumourCoverage) { + sb.append("1\n"); + ++covGood; + } else { + sb.append("0\n"); + ++covBad; + } + } + } + + public static void main(String[] args) throws Exception { + LoadReferencedClasses.loadClasses(WiggleFromPileupTakeTwo.class); + WiggleFromPileupTakeTwo sp = new WiggleFromPileupTakeTwo(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(WiggleFromPileupTakeTwo.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("WiggleFromPileup", WiggleFromPileupTakeTwo.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + // get app specific options + pileupFormat = options.getPileupFormat(); + normalCoverage = options.getNormalCoverage(); + tumourCoverage = options.getTumourCoverage(); + compressOutput = FileUtils.isFileNameGZip(new File(cmdLineOutputFiles[0])); + gffRegions = options.getGffRegions(); + + + if (null == pileupFormat) throw new IllegalArgumentException("Please specify a pileupFormat relating to the pileup file (eg. NNTT)"); + if (normalCoverage == 0) throw new IllegalArgumentException("Please specify a normal coverage value (eg. 20)"); + if (tumourCoverage == 0) throw new IllegalArgumentException("Please specify a tumour coverage value (eg. 20)"); + if (gffRegions.length == 0) throw new IllegalArgumentException("Please specify the region names within the gff3 file you are interested in"); + + logger.tool("about to run with pileupFormat: " + pileupFormat + ", normal cov: " + normalCoverage + ", tumour cov: " + tumourCoverage + ", compressOutput: " + compressOutput + ", gff regions: " + Arrays.deepToString(gffRegions)); + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- b/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- new file mode 100644 index 000000000..36c6a7a8f --- /dev/null +++ b/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- @@ -0,0 +1,307 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.zip.GZIPOutputStream; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.PositionRange; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.LoadReferencedClasses; +import org.qcmg.common.util.PileupUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.qmule.gff3.GFF3FileReader; +import org.qcmg.qmule.gff3.GFF3Record; +import org.qcmg.pileup.PileupFileReader; + +public class WiggleFromPileupTakeTwo { + + private static QLogger logger; +// private final static Pattern tabbedPattern = Pattern.compile("[\\t]"); +// private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); + + private boolean compressOutput; + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + private String pileupFormat; + private int normalCoverage, tumourCoverage; + private int noOfNormalFiles, noOfTumourFiles; + private long covGood, covBad, totalCov; + private int[] normalStartPositions, tumourStartPositions; + private String currentChromosome; + private String[] gffRegions; + private PriorityQueue currentQueue; + private PositionRange currentRange; + private int lastPosition; + private final Map> regionsOfInterest = new HashMap>(); + + + public int engage() throws Exception { + // setup + initialise(); + + loadGffFile(); + + logger.info("no of entries in regionsOfInterest: " + regionsOfInterest.size()); + + long baseCount = 0; + for (PriorityQueue ranges : regionsOfInterest.values()) { + for (PositionRange pr : ranges) { + baseCount += (pr.getEnd() - pr.getStart()); + } + } + logger.info("total no of bases covered by gff regions of interest: " + baseCount); + + + if (regionsOfInterest.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); + + // parse pileup file + parsePileup(); + + logger.info("bases with enough coverage: " + covGood + ", those with not enough coverage: " + covBad + ", total: " + totalCov); + + return exitStatus; + } + + private void loadGffFile() throws Exception { + GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); + try { + int totalNoOfbaits = 0, ignoredBaits = 0; + for (GFF3Record record : reader) { + totalNoOfbaits++; + if (isGff3RecordCorrectType(record.getType())) { + populateRegionsOfInterest(record); + } else ignoredBaits++; + } + + logger.info("loaded gff3 file, total no of baits: " + totalNoOfbaits + ", entries in collection: " + (totalNoOfbaits - ignoredBaits) + ", entries that didn't make it: " + ignoredBaits); + } finally { + reader.close(); + } + } + + private void populateRegionsOfInterest(GFF3Record record) { + // get collection corresponding to chromosome + PriorityQueue ranges = regionsOfInterest.get(record.getSeqId()); + if (null == ranges) { + ranges = new PriorityQueue(); + ranges.add(new PositionRange(record.getStart(), record.getEnd())); + regionsOfInterest.put(record.getSeqId(), ranges); + } else { + // loop through PositionRanges and see if any are adjacent + // not very efficient, but will do for now + boolean rangeExtended = false; + for (PositionRange pr : ranges) { + if (pr.isAdjacentToEnd(record.getStart())) { + pr.extendRange(record.getEnd()); + rangeExtended = true; + break; + } + } + if ( ! rangeExtended) { + // add new PositionRange + ranges.add(new PositionRange(record.getStart(), record.getEnd())); + } + } + } + + protected boolean isGff3RecordCorrectType(String type) { + for (String regionName : gffRegions) { + if (type.equals(regionName)) return true; + } + return false; + } + + private void initialise() { + noOfNormalFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'N'); + noOfTumourFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'T'); + normalStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, true); + tumourStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, false); + } + + private void parsePileup() throws Exception { + Writer writer = getWriter(cmdLineOutputFiles[0]); + + PileupFileReader reader = new PileupFileReader(new File(cmdLineInputFiles[0])); + StringBuilder sb = new StringBuilder(); + try { + for (String pr : reader) { +// for (PileupRecord pr : reader) { + addWiggleData(pr, sb); +// addWiggleData(tabbedPattern.split(pr.getPileup(), -1), sb); + if (++totalCov % 100000 == 0 && sb.length() > 0) { + writer.write(sb.toString()); + sb = new StringBuilder(); + + if (totalCov % 10000000 == 0) + logger.info("hit " + totalCov + " pileup records"); + } + } + + // empty contents of StringBuilder to writer + if (sb.length() > 0) writer.write(sb.toString()); + + } finally { + writer.close(); + reader.close(); + } + } + + private Writer getWriter(String fileName) throws IOException { + Writer writer = null; + if (compressOutput) { + writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(fileName))); + } else { + writer = new FileWriter(new File(fileName)); + } + return writer; + } + + protected boolean isPositionInRegionOfInterest(int position, PriorityQueue ranges) { + if (null == currentRange) return false; + + if (position < currentRange.getStart()) { + return false; + } else if (position <= currentRange.getEnd()) { + return true; + } else { + // advance queue + currentRange = ranges.poll(); + return isPositionInRegionOfInterest(position, ranges); + } + } + + private void addWiggleData(String paramString, StringBuilder sb) { + int firstTabIndex = paramString.indexOf('\t'); + String chromosome = paramString.substring(0, firstTabIndex); + int position = Integer.parseInt(paramString.substring(firstTabIndex+1, paramString.indexOf('\t', firstTabIndex+1))); + boolean chromosomeUpdated = false; + if ( ! chromosome.equalsIgnoreCase(currentChromosome)) { + // update last position and current chromosome + currentChromosome = chromosome; + chromosomeUpdated = true; + currentQueue = regionsOfInterest.get(chromosome); + if (null == currentQueue) { + logger.warn("no ranges found for chr: " + chromosome); + currentRange = null; + } else { + currentRange = currentQueue.poll(); + } + } + + if ( ! isPositionInRegionOfInterest(position, currentQueue)) return; + + if (position != lastPosition +1 || chromosomeUpdated) { + String wiggleHeader = "fixedStep chrom=" + chromosome + " start=" + position + " step=1\n"; + sb.append(wiggleHeader); + } + lastPosition = position; + String [] params = TabTokenizer.tokenize(paramString); +// String [] params = tabbedPattern.split(paramString, -1); + + if (PileupUtils.getCoverageCount(params, normalStartPositions) < normalCoverage) { + sb.append("0\n"); + ++covBad; + } else { + if (PileupUtils.getCoverageCount(params, tumourStartPositions) >= tumourCoverage) { + sb.append("1\n"); + ++covGood; + } else { + sb.append("0\n"); + ++covBad; + } + } + } + + public static void main(String[] args) throws Exception { + LoadReferencedClasses.loadClasses(WiggleFromPileupTakeTwo.class); + WiggleFromPileupTakeTwo sp = new WiggleFromPileupTakeTwo(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = 1; + if (null == args || args.length == 0) { + System.err.println(Messages.USAGE); + System.exit(1); + } + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(WiggleFromPileupTakeTwo.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("WiggleFromPileup", WiggleFromPileupTakeTwo.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + // get app specific options + pileupFormat = options.getPileupFormat(); + normalCoverage = options.getNormalCoverage(); + tumourCoverage = options.getTumourCoverage(); + compressOutput = FileUtils.isFileNameGZip(new File(cmdLineOutputFiles[0])); + gffRegions = options.getGffRegions(); + + + if (null == pileupFormat) throw new IllegalArgumentException("Please specify a pileupFormat relating to the pileup file (eg. NNTT)"); + if (normalCoverage == 0) throw new IllegalArgumentException("Please specify a normal coverage value (eg. 20)"); + if (tumourCoverage == 0) throw new IllegalArgumentException("Please specify a tumour coverage value (eg. 20)"); + if (gffRegions.length == 0) throw new IllegalArgumentException("Please specify the region names within the gff3 file you are interested in"); + + logger.tool("about to run with pileupFormat: " + pileupFormat + ", normal cov: " + normalCoverage + ", tumour cov: " + tumourCoverage + ", compressOutput: " + compressOutput + ", gff regions: " + Arrays.deepToString(gffRegions)); + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/XCvsZP.java b/qmule/src/org/qcmg/qmule/XCvsZP.java new file mode 100644 index 000000000..e7973ade9 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/XCvsZP.java @@ -0,0 +1,117 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.lang.Math; + +import org.qcmg.picard.SAMFileReaderFactory; + +import htsjdk.samtools.BAMIndex; +import htsjdk.samtools.BAMIndexMetaData; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; + +public class XCvsZP { + + + XCvsZP(File input) throws Exception{ + SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); //new SAMFileReader(input); + + HashMap matric = countToMatric( reader ); + + ArrayList keys = getKeys(matric ); + printMatric(matric, keys); + + reader.close(); + + } + + ArrayList getKeys( HashMap matric ){ + Set myset = new HashSet(); + + Iterator itr = matric.keySet().iterator(); + while( itr.hasNext()){ + String key = itr.next().toString(); + String[] zpxc = key.split("_"); + myset.add(zpxc[0]); + myset.add(zpxc[1]); + } + ArrayList mylist = new ArrayList(myset); + Collections.sort(mylist); + + + return mylist; + } + + + void printMatric( HashMap matric, ArrayList keys ){ + System.out.print("\t\tZP \t(reads_Number/total_number)\n"); + System.out.print("-------------------------------------------------------------------------------------------------------------------------------------------------\n XC\t|" ); + for(int i = 0; i < keys.size(); i ++) + System.out.print( "\t " + keys.get(i) + " "); + + for(int i = 0; i < keys.size(); i ++){ + System.out.print( "\n\t|" + keys.get(i) + "|\t"); + for(int j = 0; j < keys.size(); j ++){ + String xc_zp = keys.get(i) + "_" + keys.get(j); + if(matric.containsKey(xc_zp)) + System.out.print(String.format("%.4f\t", matric.get(xc_zp)) ); + else + System.out.print("-----\t"); + } + } + } + + + HashMap countToMatric( SamReader reader) throws Exception{ + + HashMap matric = new HashMap(); + HashMap rateMatric = new HashMap(); + + long numRead = 0; + for( SAMRecord record : reader){ + String xc = record.getAttribute("XC").toString(); + String zp = record.getAttribute("ZP").toString(); + String key = xc + "_" + zp; + + long value = 1; + if( matric.containsKey(key)) + value = matric.get(key) + 1; + + matric.put(key, value); + numRead ++; + } + + System.out.println("Total number of reads is " + numRead + "\n"); + + //convert to float with %.4f formart + for(Map.Entry set: matric.entrySet()){ + String key = set.getKey(); + int value = Math.round((set.getValue() * 10000 )/ numRead ); + rateMatric.put(key, ((float) value/10000 )); + } + + return rateMatric; + } + + + + public static void main(String[] args) throws Exception{ + + XCvsZP vs = new XCvsZP(new File(args[0]) ); + + } +} diff --git a/qmule/src/org/qcmg/qmule/XCvsZP.java-- b/qmule/src/org/qcmg/qmule/XCvsZP.java-- new file mode 100644 index 000000000..e7973ade9 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/XCvsZP.java-- @@ -0,0 +1,117 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.lang.Math; + +import org.qcmg.picard.SAMFileReaderFactory; + +import htsjdk.samtools.BAMIndex; +import htsjdk.samtools.BAMIndexMetaData; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecord; + +public class XCvsZP { + + + XCvsZP(File input) throws Exception{ + SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); //new SAMFileReader(input); + + HashMap matric = countToMatric( reader ); + + ArrayList keys = getKeys(matric ); + printMatric(matric, keys); + + reader.close(); + + } + + ArrayList getKeys( HashMap matric ){ + Set myset = new HashSet(); + + Iterator itr = matric.keySet().iterator(); + while( itr.hasNext()){ + String key = itr.next().toString(); + String[] zpxc = key.split("_"); + myset.add(zpxc[0]); + myset.add(zpxc[1]); + } + ArrayList mylist = new ArrayList(myset); + Collections.sort(mylist); + + + return mylist; + } + + + void printMatric( HashMap matric, ArrayList keys ){ + System.out.print("\t\tZP \t(reads_Number/total_number)\n"); + System.out.print("-------------------------------------------------------------------------------------------------------------------------------------------------\n XC\t|" ); + for(int i = 0; i < keys.size(); i ++) + System.out.print( "\t " + keys.get(i) + " "); + + for(int i = 0; i < keys.size(); i ++){ + System.out.print( "\n\t|" + keys.get(i) + "|\t"); + for(int j = 0; j < keys.size(); j ++){ + String xc_zp = keys.get(i) + "_" + keys.get(j); + if(matric.containsKey(xc_zp)) + System.out.print(String.format("%.4f\t", matric.get(xc_zp)) ); + else + System.out.print("-----\t"); + } + } + } + + + HashMap countToMatric( SamReader reader) throws Exception{ + + HashMap matric = new HashMap(); + HashMap rateMatric = new HashMap(); + + long numRead = 0; + for( SAMRecord record : reader){ + String xc = record.getAttribute("XC").toString(); + String zp = record.getAttribute("ZP").toString(); + String key = xc + "_" + zp; + + long value = 1; + if( matric.containsKey(key)) + value = matric.get(key) + 1; + + matric.put(key, value); + numRead ++; + } + + System.out.println("Total number of reads is " + numRead + "\n"); + + //convert to float with %.4f formart + for(Map.Entry set: matric.entrySet()){ + String key = set.getKey(); + int value = Math.round((set.getValue() * 10000 )/ numRead ); + rateMatric.put(key, ((float) value/10000 )); + } + + return rateMatric; + } + + + + public static void main(String[] args) throws Exception{ + + XCvsZP vs = new XCvsZP(new File(args[0]) ); + + } +} diff --git a/qmule/src/org/qcmg/qmule/bam/CheckBam.java b/qmule/src/org/qcmg/qmule/bam/CheckBam.java new file mode 100644 index 000000000..3154595c1 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/bam/CheckBam.java @@ -0,0 +1,339 @@ +/** + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. +*/ +package org.qcmg.qmule.bam; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordIterator; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.SamReader; + +import java.io.File; +import java.util.AbstractQueue; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicLongArray; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.qmule.GetBamRecords; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; + + +public class CheckBam { + + private final static String UNMAPPED_READS = "Unmapped"; + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private File bamFIle; + private int numberOfThreads = 1; + + + private static final int READ_PAIRED_FLAG = 0x1; + private static final int PROPER_PAIR_FLAG = 0x2; + private static final int READ_UNMAPPED_FLAG = 0x4; + private static final int MATE_UNMAPPED_FLAG = 0x8; + private static final int READ_STRAND_FLAG = 0x10; + private static final int MATE_STRAND_FLAG = 0x20; + private static final int FIRST_OF_PAIR_FLAG = 0x40; + private static final int SECOND_OF_PAIR_FLAG = 0x80; + private static final int NOT_PRIMARY_ALIGNMENT_FLAG = 0x100; + private static final int READ_FAILS_VENDOR_QUALITY_CHECK_FLAG = 0x200; + private static final int DUPLICATE_READ_FLAG = 0x400; + private static final int SUPPLEMENTARY_ALIGNMENT_FLAG = 0x800; + + + private int exitStatus; + private static QLogger logger; + + private final AtomicLong counter = new AtomicLong(); + +// long [] flagCounter = new long[5000]; + AtomicLongArray flags = new AtomicLongArray(5000); + + + public int engage() throws Exception { + + logger.info("Get reference contigs from bam header"); + bamFIle = new File(cmdLineInputFiles[0]); + + final AbstractQueue sequences = new ConcurrentLinkedQueue(); + + try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { + if ( ! reader.hasIndex() && numberOfThreads > 1) { + logger.warn("Using 1 producer thread - no index found for bam file: " + bamFIle.getAbsolutePath()); + numberOfThreads = 1; + } + + SAMFileHeader header = reader.getFileHeader(); + List samSequences = header.getSequenceDictionary().getSequences(); + List orderedSamSequences = new ArrayList(); + orderedSamSequences.addAll(samSequences); + Collections.sort(orderedSamSequences, new Comparator(){ + @Override + public int compare(SAMSequenceRecord o1, SAMSequenceRecord o2) { + return o2.getSequenceLength() - o1.getSequenceLength(); + } + }); + // add the unmapped reads marker + sequences.add(UNMAPPED_READS); + + for (SAMSequenceRecord rec : orderedSamSequences) { + sequences.add(rec.getSequenceName()); + } + } + + + logger.info("will create " + numberOfThreads + " threads"); + + final CountDownLatch pLatch = new CountDownLatch(numberOfThreads); +// setpup and kick-off single Producer thread + ExecutorService producerThreads = Executors.newFixedThreadPool(numberOfThreads); + if (1 == numberOfThreads) { + producerThreads.execute(new SingleProducer(Thread.currentThread(), pLatch)); + } else { + for (int i = 0 ; i < numberOfThreads ; i++) { + producerThreads.execute(new Producer(Thread.currentThread(), pLatch, sequences)); + } + } + + // don't allow any new threads to start + producerThreads.shutdown(); + + logger.info("waiting for Producer thread to finish"); + pLatch.await(); + logger.info("Producer thread finished, counter size: " + counter.longValue()); + // output flag stats too + long dups = 0; + long sups = 0; + long mapped = 0; + long paired = 0; + long properPair = 0; + long r1 = 0; + long r2 = 0; + for (int i = 0 ; i < flags.length() ; i++) { + long l = flags.get(i); + if (l > 0) { + + if ((i & READ_PAIRED_FLAG) != 0) { + paired += l; + } + if ((i & PROPER_PAIR_FLAG) != 0) { + properPair += l; + } + if ((i & READ_UNMAPPED_FLAG) == 0) { + mapped += l; + } + if ((i & FIRST_OF_PAIR_FLAG) != 0) { + r1 += l; + } + if ((i & SECOND_OF_PAIR_FLAG) != 0) { + r2 += l; + } + if ((i & DUPLICATE_READ_FLAG) != 0) { + dups += l; + } + if ((i & SUPPLEMENTARY_ALIGNMENT_FLAG) != 0) { + sups += l; + } + logger.info("flag: " + i + " : " + l + " hits"); + } + } + logger.info("total read count: " + counter.longValue()); + logger.info("dups: " + dups + " (" + (((double) dups / counter.longValue()) * 100) + "%)"); + logger.info("sups: " + sups + " (" + (((double) sups / counter.longValue()) * 100) + "%)"); + logger.info("mapped: " + mapped + " (" + (((double) mapped / counter.longValue()) * 100) + "%)"); + logger.info("paired: " + paired + " (" + (((double) paired / counter.longValue()) * 100) + "%)"); + logger.info("properPair: " + properPair + " (" + (((double)properPair / counter.longValue()) * 100) + "%)"); + logger.info("r1: " + r1 + " (" + (((double) r1 / counter.longValue()) * 100) + "%)"); + logger.info("r2: " + r2 + " (" + (((double) r2 / counter.longValue()) * 100) + "%)"); + + return exitStatus; + } + + + + public class Producer implements Runnable { + private final Thread mainThread; + private final CountDownLatch pLatch; + private final AbstractQueue sequences; + private final QLogger log = QLoggerFactory.getLogger(Producer.class); + + private final long [] flagCounter = new long[5000]; + + Producer(Thread mainThread, CountDownLatch pLatch, AbstractQueue sequences) { + this.mainThread = mainThread; + this.pLatch = pLatch; + this.sequences = sequences; + } + + @Override + public void run() { + log.debug("Start Producer "); + + long count = 0; + + try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { + + while (true) { + String sequence = sequences.poll(); + if (null == sequence) break; + SAMRecordIterator iter = UNMAPPED_READS.equals(sequence) ? reader.queryUnmapped() : reader.query(sequence, 0, 0, false) ; + log.info("retrieving records for sequence: " + sequence); + while (iter.hasNext()) { + int flag = iter.next().getFlags(); + flagCounter[flag] ++ ; + // update count for this flag + if (++count % 2000000 == 0) { + log.info("added " + count/1000000 + "M"); + } + } + iter.close(); + } + + } catch (Exception e) { + log.error(Thread.currentThread().getName() + " " + e.getMessage(), e); + mainThread.interrupt(); + } finally { + pLatch.countDown(); + } + // update the shared counter + counter.addAndGet(count); + //update the flag Counter + int i = 0 ; + for (long l : flagCounter) { + if (l > 0) { + flags.addAndGet(i, l); + } + i++; + } + } + } + + public class SingleProducer implements Runnable { + private final Thread mainThread; + private final QLogger log = QLoggerFactory.getLogger(SingleProducer.class); + private final CountDownLatch pLatch; + private final long [] flagCounter = new long[5000]; + + SingleProducer(Thread mainThread, CountDownLatch pLatch) { + this.mainThread = mainThread; + this.pLatch = pLatch; + } + + @Override + public void run() { + log.debug("Start SingleProducer "); + + long count = 0; + + try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { + + for (SAMRecord r : reader) { + int flag = r.getFlags(); + flagCounter[flag] ++ ; + if (++count % 2000000 == 0) { + log.info("added " + count/1000000 + "M"); + } + } + + } catch (Exception e) { + log.error(Thread.currentThread().getName() + " " + e.getMessage(), e); + mainThread.interrupt(); + } finally { + pLatch.countDown(); + } + // update the shared counter + counter.addAndGet(count); + //update the flag Counter + int i = 0 ; + for (long l : flagCounter) { + if (l > 0) { + flags.addAndGet(i, l); + } + i++; + } + } + } + + public static void main(String[] args) throws Exception { + CheckBam sp = new CheckBam(); + int exitStatus = sp.setup(args); + if (null != logger) { + logger.logFinalExecutionStats(exitStatus); + } + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(GetBamRecords.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CheckBam", CheckBam.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + int nt = options.getNumberOfThreads(); + if (nt > 0) { + numberOfThreads = nt; + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/bam/CheckBam.java-- b/qmule/src/org/qcmg/qmule/bam/CheckBam.java-- new file mode 100644 index 000000000..3154595c1 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/bam/CheckBam.java-- @@ -0,0 +1,339 @@ +/** + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. +*/ +package org.qcmg.qmule.bam; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordIterator; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.SamReader; + +import java.io.File; +import java.util.AbstractQueue; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicLongArray; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.qmule.GetBamRecords; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; + + +public class CheckBam { + + private final static String UNMAPPED_READS = "Unmapped"; + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private File bamFIle; + private int numberOfThreads = 1; + + + private static final int READ_PAIRED_FLAG = 0x1; + private static final int PROPER_PAIR_FLAG = 0x2; + private static final int READ_UNMAPPED_FLAG = 0x4; + private static final int MATE_UNMAPPED_FLAG = 0x8; + private static final int READ_STRAND_FLAG = 0x10; + private static final int MATE_STRAND_FLAG = 0x20; + private static final int FIRST_OF_PAIR_FLAG = 0x40; + private static final int SECOND_OF_PAIR_FLAG = 0x80; + private static final int NOT_PRIMARY_ALIGNMENT_FLAG = 0x100; + private static final int READ_FAILS_VENDOR_QUALITY_CHECK_FLAG = 0x200; + private static final int DUPLICATE_READ_FLAG = 0x400; + private static final int SUPPLEMENTARY_ALIGNMENT_FLAG = 0x800; + + + private int exitStatus; + private static QLogger logger; + + private final AtomicLong counter = new AtomicLong(); + +// long [] flagCounter = new long[5000]; + AtomicLongArray flags = new AtomicLongArray(5000); + + + public int engage() throws Exception { + + logger.info("Get reference contigs from bam header"); + bamFIle = new File(cmdLineInputFiles[0]); + + final AbstractQueue sequences = new ConcurrentLinkedQueue(); + + try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { + if ( ! reader.hasIndex() && numberOfThreads > 1) { + logger.warn("Using 1 producer thread - no index found for bam file: " + bamFIle.getAbsolutePath()); + numberOfThreads = 1; + } + + SAMFileHeader header = reader.getFileHeader(); + List samSequences = header.getSequenceDictionary().getSequences(); + List orderedSamSequences = new ArrayList(); + orderedSamSequences.addAll(samSequences); + Collections.sort(orderedSamSequences, new Comparator(){ + @Override + public int compare(SAMSequenceRecord o1, SAMSequenceRecord o2) { + return o2.getSequenceLength() - o1.getSequenceLength(); + } + }); + // add the unmapped reads marker + sequences.add(UNMAPPED_READS); + + for (SAMSequenceRecord rec : orderedSamSequences) { + sequences.add(rec.getSequenceName()); + } + } + + + logger.info("will create " + numberOfThreads + " threads"); + + final CountDownLatch pLatch = new CountDownLatch(numberOfThreads); +// setpup and kick-off single Producer thread + ExecutorService producerThreads = Executors.newFixedThreadPool(numberOfThreads); + if (1 == numberOfThreads) { + producerThreads.execute(new SingleProducer(Thread.currentThread(), pLatch)); + } else { + for (int i = 0 ; i < numberOfThreads ; i++) { + producerThreads.execute(new Producer(Thread.currentThread(), pLatch, sequences)); + } + } + + // don't allow any new threads to start + producerThreads.shutdown(); + + logger.info("waiting for Producer thread to finish"); + pLatch.await(); + logger.info("Producer thread finished, counter size: " + counter.longValue()); + // output flag stats too + long dups = 0; + long sups = 0; + long mapped = 0; + long paired = 0; + long properPair = 0; + long r1 = 0; + long r2 = 0; + for (int i = 0 ; i < flags.length() ; i++) { + long l = flags.get(i); + if (l > 0) { + + if ((i & READ_PAIRED_FLAG) != 0) { + paired += l; + } + if ((i & PROPER_PAIR_FLAG) != 0) { + properPair += l; + } + if ((i & READ_UNMAPPED_FLAG) == 0) { + mapped += l; + } + if ((i & FIRST_OF_PAIR_FLAG) != 0) { + r1 += l; + } + if ((i & SECOND_OF_PAIR_FLAG) != 0) { + r2 += l; + } + if ((i & DUPLICATE_READ_FLAG) != 0) { + dups += l; + } + if ((i & SUPPLEMENTARY_ALIGNMENT_FLAG) != 0) { + sups += l; + } + logger.info("flag: " + i + " : " + l + " hits"); + } + } + logger.info("total read count: " + counter.longValue()); + logger.info("dups: " + dups + " (" + (((double) dups / counter.longValue()) * 100) + "%)"); + logger.info("sups: " + sups + " (" + (((double) sups / counter.longValue()) * 100) + "%)"); + logger.info("mapped: " + mapped + " (" + (((double) mapped / counter.longValue()) * 100) + "%)"); + logger.info("paired: " + paired + " (" + (((double) paired / counter.longValue()) * 100) + "%)"); + logger.info("properPair: " + properPair + " (" + (((double)properPair / counter.longValue()) * 100) + "%)"); + logger.info("r1: " + r1 + " (" + (((double) r1 / counter.longValue()) * 100) + "%)"); + logger.info("r2: " + r2 + " (" + (((double) r2 / counter.longValue()) * 100) + "%)"); + + return exitStatus; + } + + + + public class Producer implements Runnable { + private final Thread mainThread; + private final CountDownLatch pLatch; + private final AbstractQueue sequences; + private final QLogger log = QLoggerFactory.getLogger(Producer.class); + + private final long [] flagCounter = new long[5000]; + + Producer(Thread mainThread, CountDownLatch pLatch, AbstractQueue sequences) { + this.mainThread = mainThread; + this.pLatch = pLatch; + this.sequences = sequences; + } + + @Override + public void run() { + log.debug("Start Producer "); + + long count = 0; + + try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { + + while (true) { + String sequence = sequences.poll(); + if (null == sequence) break; + SAMRecordIterator iter = UNMAPPED_READS.equals(sequence) ? reader.queryUnmapped() : reader.query(sequence, 0, 0, false) ; + log.info("retrieving records for sequence: " + sequence); + while (iter.hasNext()) { + int flag = iter.next().getFlags(); + flagCounter[flag] ++ ; + // update count for this flag + if (++count % 2000000 == 0) { + log.info("added " + count/1000000 + "M"); + } + } + iter.close(); + } + + } catch (Exception e) { + log.error(Thread.currentThread().getName() + " " + e.getMessage(), e); + mainThread.interrupt(); + } finally { + pLatch.countDown(); + } + // update the shared counter + counter.addAndGet(count); + //update the flag Counter + int i = 0 ; + for (long l : flagCounter) { + if (l > 0) { + flags.addAndGet(i, l); + } + i++; + } + } + } + + public class SingleProducer implements Runnable { + private final Thread mainThread; + private final QLogger log = QLoggerFactory.getLogger(SingleProducer.class); + private final CountDownLatch pLatch; + private final long [] flagCounter = new long[5000]; + + SingleProducer(Thread mainThread, CountDownLatch pLatch) { + this.mainThread = mainThread; + this.pLatch = pLatch; + } + + @Override + public void run() { + log.debug("Start SingleProducer "); + + long count = 0; + + try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { + + for (SAMRecord r : reader) { + int flag = r.getFlags(); + flagCounter[flag] ++ ; + if (++count % 2000000 == 0) { + log.info("added " + count/1000000 + "M"); + } + } + + } catch (Exception e) { + log.error(Thread.currentThread().getName() + " " + e.getMessage(), e); + mainThread.interrupt(); + } finally { + pLatch.countDown(); + } + // update the shared counter + counter.addAndGet(count); + //update the flag Counter + int i = 0 ; + for (long l : flagCounter) { + if (l > 0) { + flags.addAndGet(i, l); + } + i++; + } + } + } + + public static void main(String[] args) throws Exception { + CheckBam sp = new CheckBam(); + int exitStatus = sp.setup(args); + if (null != logger) { + logger.logFinalExecutionStats(exitStatus); + } + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(GetBamRecords.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CheckBam", CheckBam.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + int nt = options.getNumberOfThreads(); + if (nt > 0) { + numberOfThreads = nt; + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java b/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java new file mode 100644 index 000000000..b480f21f4 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java @@ -0,0 +1,127 @@ +package org.qcmg.qmule.bam; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; + +import org.qcmg.common.log.QLogger; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; + +public class GetContigsFromHeader { + + private static QLogger logger; + + private int setup(String [] args) throws IOException { + /* + * first arg should be the header, + * second arg (if present) should be how many times the genome should be diviied up + */ + + SamReaderFactory factory = SamReaderFactory.make(); + SamReader reader = factory.open(new File(args[0])); + SAMFileHeader header = reader.getFileHeader(); + + SAMSequenceDictionary dict = header.getSequenceDictionary(); + Map map = dict.getSequences().stream().collect(Collectors.groupingBy(SAMSequenceRecord::getSequenceName, Collectors.summingInt(SAMSequenceRecord::getSequenceLength))); + + + + if (args.length > 1 && null != args[1]) { + int numberOfContigs = map.keySet().size(); + long length = map.values().stream().mapToLong(Integer::longValue).sum(); + int numberOfEntries = Integer.parseInt(args[1]) - 1; + + long noOFBasesPerEntry = length / numberOfEntries; + + System.out.println("genome length: " + length + ", numberOfEntries: " + numberOfEntries + ", noOFBasesPerEntry: " + noOFBasesPerEntry + ", numberOfContigs: " + numberOfContigs); + + + Map results = new HashMap<>(); + Set contigs = new HashSet<>(); + + List sortedContigs = map.entrySet().stream().sorted((e1, e2) -> e2.getValue() - e1.getValue()).map(e -> e.getKey()).collect(Collectors.toList()); + + + for (String contig : sortedContigs) { + System.out.println("looking at contig: " + contig); + Integer contigLength = map.get(contig); + if ( ! contigs.contains(contig)) { + if (contigLength >= noOFBasesPerEntry) { + results.put(contig, contigLength); + contigs.add(contig); + } else { + AtomicLong basesToMakeUp = new AtomicLong(noOFBasesPerEntry - contigLength); +// long basesToMakeUp = noOFBasesPerEntry - e.getValue(); + StringBuilder key = new StringBuilder(); + key.append(contig); + contigs.add(contig); + while (basesToMakeUp.longValue() > 1000000) { + Optional> e1 = map.entrySet().stream().filter(en -> ! contigs.contains(en.getKey())).filter(en -> en.getValue() < basesToMakeUp.longValue()).max((en1, en2) -> en2.getValue() - en1.getValue()); + if (e1.isPresent()) { + key.append(" -L "); + key.append(e1.get().getKey()); + basesToMakeUp.addAndGet( - e1.get().getValue()); + contigs.add(e1.get().getKey()); + } else { + break; + } + } + results.put(key.toString(), (int)noOFBasesPerEntry - basesToMakeUp.intValue()); + } + } + } + + results.forEach((k,v) -> System.out.println("contigs: " + k + ", size: " + v)); + System.out.println("contigs.size(): " + contigs.size()); + + /* + * write file + */ + if (args.length > 2 && null != args[2]) { + try (Writer writer = new FileWriter(args[2]);) { + + /* + * sort according to number of bases + */ + results.entrySet().stream().sorted((e1, e2) -> e2.getValue() - e1.getValue()).forEach(e -> { + try { + writer.write(e.getKey() + "\n"); + } catch (IOException e3) { + // TODO Auto-generated catch block + e3.printStackTrace(); + } + }); + } + } + } + + return 0; + } + + public static void main(String[] args) throws Exception { + GetContigsFromHeader sp = new GetContigsFromHeader(); + int exitStatus = sp.setup(args); + if (null != logger) { + logger.logFinalExecutionStats(exitStatus); + } + + System.exit(exitStatus); + } + +} + diff --git a/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- b/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- new file mode 100644 index 000000000..b480f21f4 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- @@ -0,0 +1,127 @@ +package org.qcmg.qmule.bam; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; + +import org.qcmg.common.log.QLogger; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; + +public class GetContigsFromHeader { + + private static QLogger logger; + + private int setup(String [] args) throws IOException { + /* + * first arg should be the header, + * second arg (if present) should be how many times the genome should be diviied up + */ + + SamReaderFactory factory = SamReaderFactory.make(); + SamReader reader = factory.open(new File(args[0])); + SAMFileHeader header = reader.getFileHeader(); + + SAMSequenceDictionary dict = header.getSequenceDictionary(); + Map map = dict.getSequences().stream().collect(Collectors.groupingBy(SAMSequenceRecord::getSequenceName, Collectors.summingInt(SAMSequenceRecord::getSequenceLength))); + + + + if (args.length > 1 && null != args[1]) { + int numberOfContigs = map.keySet().size(); + long length = map.values().stream().mapToLong(Integer::longValue).sum(); + int numberOfEntries = Integer.parseInt(args[1]) - 1; + + long noOFBasesPerEntry = length / numberOfEntries; + + System.out.println("genome length: " + length + ", numberOfEntries: " + numberOfEntries + ", noOFBasesPerEntry: " + noOFBasesPerEntry + ", numberOfContigs: " + numberOfContigs); + + + Map results = new HashMap<>(); + Set contigs = new HashSet<>(); + + List sortedContigs = map.entrySet().stream().sorted((e1, e2) -> e2.getValue() - e1.getValue()).map(e -> e.getKey()).collect(Collectors.toList()); + + + for (String contig : sortedContigs) { + System.out.println("looking at contig: " + contig); + Integer contigLength = map.get(contig); + if ( ! contigs.contains(contig)) { + if (contigLength >= noOFBasesPerEntry) { + results.put(contig, contigLength); + contigs.add(contig); + } else { + AtomicLong basesToMakeUp = new AtomicLong(noOFBasesPerEntry - contigLength); +// long basesToMakeUp = noOFBasesPerEntry - e.getValue(); + StringBuilder key = new StringBuilder(); + key.append(contig); + contigs.add(contig); + while (basesToMakeUp.longValue() > 1000000) { + Optional> e1 = map.entrySet().stream().filter(en -> ! contigs.contains(en.getKey())).filter(en -> en.getValue() < basesToMakeUp.longValue()).max((en1, en2) -> en2.getValue() - en1.getValue()); + if (e1.isPresent()) { + key.append(" -L "); + key.append(e1.get().getKey()); + basesToMakeUp.addAndGet( - e1.get().getValue()); + contigs.add(e1.get().getKey()); + } else { + break; + } + } + results.put(key.toString(), (int)noOFBasesPerEntry - basesToMakeUp.intValue()); + } + } + } + + results.forEach((k,v) -> System.out.println("contigs: " + k + ", size: " + v)); + System.out.println("contigs.size(): " + contigs.size()); + + /* + * write file + */ + if (args.length > 2 && null != args[2]) { + try (Writer writer = new FileWriter(args[2]);) { + + /* + * sort according to number of bases + */ + results.entrySet().stream().sorted((e1, e2) -> e2.getValue() - e1.getValue()).forEach(e -> { + try { + writer.write(e.getKey() + "\n"); + } catch (IOException e3) { + // TODO Auto-generated catch block + e3.printStackTrace(); + } + }); + } + } + } + + return 0; + } + + public static void main(String[] args) throws Exception { + GetContigsFromHeader sp = new GetContigsFromHeader(); + int exitStatus = sp.setup(args); + if (null != logger) { + logger.logFinalExecutionStats(exitStatus); + } + + System.exit(exitStatus); + } + +} + diff --git a/qmule/src/org/qcmg/qmule/messages.properties b/qmule/src/org/qcmg/qmule/messages.properties new file mode 100644 index 000000000..4ec3962f0 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/messages.properties @@ -0,0 +1,107 @@ +#general usage message +USAGE = usage: qmule ToolName [-options] +HELP_OPTION_DESCRIPTION = Shows this help message. +VERSION_OPTION_DESCRIPTION = Print version info. +UNKNOWN_ERROR = An unknown error has occurred +ALL_ARGUMENTS_MUST_BE_OPTIONS = All arguments must be supplied as option values +LOG_OPTION_DESCRIPTION = specify the log file +LOGLEVEL_OPTION_DESCRIPTION = specify the log levle, eg: EXEC,TOOL,DEBUG,INFO. By default, it is INFO + +#IO warning message +INPUT_OPTION_DESCRIPTION = Specifies an input file. +INPUT_FILE_READ_ERROR = Cannot read input file {0} +INSUFFICIENT_INPUT_FILES = Insufficient input files +NONEXISTENT_INPUT_FILE = Nonexistent input file {0} +MISSING_INPUT_OPTIONS = You must specify at least one -i option +MISSING_OUTPUT_OPTIONS = You must specify an -o option +SAME_INPUT_FILE = Input file {0} supplied more than once +OUTPUT_OPTION_DESCRIPTION = Specifies the output file. +OUTPUT_FILE_WRITE_ERROR = Cannot write to output file {0} +CANNOT_CLOSE_FILES = Cannot close file(s) +SAME_FILES = {0} and {1} are the same file +FILE_USED_AS_INPUT_AND_OUTPUT = File {0} used both as input and output +TOO_MANY_OUTPUTFILE= Too many output files + +#for snppicker +MISSING_OUTPUT_MERGE_OPTIONS = You must specify either the -o or -m option +MERGE_AND_OUTPUT_ERROR = Either -o or -m may be specified, not both +READ_GROUP_OVERLAP = Read group overlap +BLANK_FILE_NAME_IN_READGROUP = Blank file name in replacement {0} +BLANK_REPLACEMENT_GROUP = Cannot use an empty value for a group replacement +NULL_REPLACEMENT_GROUP = Cannot use a null value for a group replacement +OVERLAPPING_READ_GROUPS = Overlapping read group(s) in files {0} and {1} +IDENTICAL_GROUP_FOR_REPLACEMENT = Identical group specified for replacement {0} +MULTIPLE_NUMBER_RECORDS_SPECIFIED = Too many number-of-records options specified +MULTIPLE_OUTPUT_FILES_SPECIFIED = Too many output files specified +BAD_REPLACEMENT_FORMAT = {0} does not follow pattern file:oldgroup:newgroup +BAD_GROUP_REPLACEMENT_FILENAME = Group replacement {0} specifies unknown input file {1} +CLASHING_NEW_REPLACEMENT_GROUP = Identical new group {0} for replacements {1} and {2} +CLASHING_OLD_REPLACEMENT_GROUP = Identical old group {0} for replacements {1} and {2} +CANNOT_OVERWRITE_EXISTING_OUTPUT = Cannot overwrite existing output file +CANNOT_DELETE_TEMPORARY_FILE = Unable to delete the temporary file during merge +UNSUITABLE_MERGE_FILE = Unsuitable merge file +FILE_NOT_DIRECTORY = Output must be a file not a directory +BAD_RECORD_ZC = Record ZC does not correspond to available ZCs in source file +FORCE_OPTION_DESCRIPTION = Forces the merge regardless of overlapping groups. +MERGE_OPTION_DESCRIPTION = Specifies the merge file. +NUMBER_RECORDS_DESCRIPTION = The number of records to merge into the final file (from start of file). +REPLACE_OPTION_DESCRIPTION = Replace the oldgroup in the input file with a newgroup in the output file. + +#message for replaceReadGroup +ID = specify read group identifier, with which the RG line will be replaced. +CN = specify name of sequencing center producing the read which will be added or replaced into CN field. +DS = add or replace specify descriptionwhich will be added or replaced into DS field. +DT = add or replace specify date the run was produced (ISO8601 date or date/time)which will be added or replaced into DT field. +LB = specify read group library value which will be added or replaced into LB field. +PI = specify predicted median insert size which will be added or replaced into PI field. +PL = specify platform/technology used to produce the reads. Valid values: CAPILLARY, LS454, ILLUMINA,SOLID, HELICOS, IONTORRENT and PACBIO. +PU = specify platform unit (e.g. owcell-barcode.lane for Illumina or slide for SOLiD). Unique identified. +SM = specify sample. Use pool name where a pool is being sequenced which will be added or replaced into SM field +AB = specify attributes. Formart should be :. eg. --AB "ZC:modify bioscope BAM header" "PG:qmule.replaceReadGroup" which will be added into field +SAMHEADER = output SAM file header only if this option specified. +ATTRIBUTE_ERR = Format error in attribute: {0}. See usage in help message. +USAGE_SWAP_LB_DS = usage: qmule org.qcmg.qmule.replaceReadGroup.Swqp_LB_DS -i -o --LB -l +USAGE_REPLACEREADGROUP = usage: qmule org.qcmg.qmule.replaceReadGroup.ReplaceReadGroup -i -o --ID -l [options] +USAGE_FixSingleRG = usage: qmule org.qcmg.qmule.replaceReadGroup.FixSingleRG -i -o --LB -l +USAGE_Fix = usage: qmule org.qcmg.qmule.FixBAM.Fix -i -o --LB -l + +#message for qcnv +REF_OPTION_DESCRIPTION = specify the normal or germline BAM file with full path +TEST_OPTION_DESCRIPTION = specify the tumour or cell line BAM file with full path +USAGE_QCNV = usage: qmule org.qcmg.qmule.qcnv.Main --ref --test -o --log [options] +WINDOW_SIZE_DESCRIPTION = (Optional) specify the window size here. Otherwise a default window size will given based on a hard coded formula. +TMPDIR_DESCRIPTION = (Optional) the directory for temporary files. Without this option, all temporary file will created into output file directory. +THREAD_OPTION_DESCRIPTION = (Optional) specify the thread number (the number of paralleled query). by default we set it to 2 + +#message for subSample +PROPORTION_OPTION_DESCRIPTION = specify the proportion of total reads you want to output +USAGE_SUBSAMPLE = usage: qmule org.qcmg.qmule.SubSample -i -o --proportion (0,1) --log + +#message for AlignerCompare +COMPAREALL_OPTION = Without this option, this comparison will discard all non primary alignments, such as secondary, supplementary alignments. +USAGE_AlignerCompare = usage: qmule org.qcmg.qmule.AlignerCompare -i -i -o [options] +OUTPUT_AlignerCompare = specifies output file prefix with full path here + +#message for BamMismatchCounts +USAGE_BamMismatchCounts = usage: qmule org.qcmg.qmule.BamMismatchCounts -i -o + +#BAMCompress +COMPRESS_LEVEL_DESCRIPTION = specifies output BAM compress level, default is 5 +USAGE_BAMCompress = usage: qmule org.qcmg.qmule.BAMCompress -i -compressLevel [0~9] + +#for AnnotateGFF +STRANDED_OPTION_DESCRIPTION=Only features that occur on the same strand as the input record are annotated. +MODE_ERROR=Mode provided {0} is not recognized +DCC_PARSE_ERROR=Could not determine the type of the DCC header. Could not find the {0} column +DCC_HEADER_ERROR=No header present for the DCC file +FILE_TYPE_ERROR=File does not appear to be dcc1 or dccq +NULL_GFF_MOTIF=Could not determine the motif from the gff file at position: {0} +#IndelDCCHeader +UUID_ERROR=Could not determine if a UUID and DCC header was present +#MAF2DCC1 +NO_COLUMN_INDEX=Could not find all required columns in the header +MISSING_DCC_RECORDS=Could not find all maf records in the input dcc1 file +DUPLICATE_MAF=Two identical maf record positions +T0O_MANY_MATCHES=More than one match found for dcc1 record at position: {0} +COUNT_ERROR=Number of dcc records added to the output file {0} does not match number of maf records {1} + diff --git a/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java b/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java new file mode 100644 index 000000000..707c4726d --- /dev/null +++ b/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java @@ -0,0 +1,226 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.qcnv; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import htsjdk.samtools.*; + +import java.lang.Math; +import java.io.*; + +import org.qcmg.picard.SAMFileReaderFactory; + + +public class CNVseq { + + private static final boolean String = false; + //in cnv-seq.pl it call below R to get value + //echo 'options(digits=16);qnorm(1-0.5*0.001)' | R --vanilla --slave (result: 3.290526731491926) + public static double bt = 3.290526731491926; + //$echo 'options(digits=16);qnorm(0.5*0.001)' | R --vanilla --slave (result: -3.290526731491894) + public static double st = -3.290526731491894; + + public static double pvalue = 0.001; + public static int min_windoe = 4; + public static double log2 = 0.6; + public static double bigger = 1.5; + //public static int debug = 0; + //public static String Rexe = "R"; + + private final Map refSeq; + + private final long genomeSize ; + private final long numTest; + private final long numRef; + + private final double biglog2_window; + private final double smalog2_window; + private final int windowSize; + + private final File ftest; + private final File fref; + + + /** + * it caculate the window size based on genome size, TEST and REF BAM records number + * @param test: File of TEST BAM + * @param ref: File of reference BAM + * @throws Exception + */ + CNVseq(File test, File ref, int window ) throws Exception { + //open file + SamReader rtest = SAMFileReaderFactory.createSAMFileReader(test );//new SAMFileReader(test); + SamReader rref = SAMFileReaderFactory.createSAMFileReader(ref );//new SAMFileReader(ref); + + //check whether index file exist or not + if(!rtest.hasIndex()){ + throw new Exception("can't find index for: " + test.getName()); + } + if(!rref.hasIndex()){ + throw new Exception("can't find index for: " + ref.getName()); + } + ftest = test; + fref = ref; + + //check SAM header + SAMFileHeader htest = rtest.getFileHeader(); + SAMFileHeader href = rref.getFileHeader(); + + //get sequence information from both inputs + Map seqTest = new HashMap (); + Map seqRef = new HashMap (); + + List genome = htest.getSequenceDictionary().getSequences(); + for(SAMSequenceRecord re : genome){ + seqTest.put(re.getSequenceName(),re.getSequenceLength()); + } + + genome = href.getSequenceDictionary().getSequences(); + for(SAMSequenceRecord re : genome){ + seqRef.put(re.getSequenceName(),re.getSequenceLength()); + } + + // check both @SQ line are same or not + if(seqRef.size() != seqTest.size()){ + throw new Exception("the sequence size are different between two inputs: \n" + ftest.getName() + "\n" + fref.getName() ); + } + + for (String key : seqTest.keySet()){ + //first convert Integer to int + int l1 = seqTest.get(key); + int l2 = seqRef.get(key); + if(l1 != l2){ + throw new Exception("the sequence size of " + key + " are different between two inputs : \n" + ftest.getName() + "\n" + fref.getName() ); + } + } + + // assign one of the identical reference info into the hash map + refSeq = seqTest; + + //caculate the genome size based on the identail reference + long size = 0; + for(String key : refSeq.keySet()){ size += refSeq.get(key); } + genomeSize = size; +//-debug +//genomeSize = 3253037807L; + + //count mapped record number based on index file + BAMIndex tIndex = rtest.indexing().getIndex(); + BAMIndex rIndex = rref.indexing().getIndex(); + BAMIndexMetaData meta; + int tMapped = 0; + int rMapped = 0; + for(int i = 0; i < seqRef.size(); i ++ ){ + meta = tIndex.getMetaData(i); + tMapped += meta.getAlignedRecordCount(); + meta = rIndex.getMetaData(i); + rMapped += meta.getAlignedRecordCount(); + } + numTest = tMapped; + numRef = rMapped; + + //close files + rtest.close(); + rref.close(); + + //caculate window size + double brp = Math.pow(2, log2); + double srp = 1.0 / brp; + + + biglog2_window = (numTest * Math.pow(brp, 2) + numRef) * genomeSize * Math.pow(bt, 2) / ( Math.pow((1- brp),2 ) * numTest * numRef); + smalog2_window = (numTest * Math.pow(srp, 2) + numRef) * genomeSize * Math.pow(st, 2) / ( Math.pow((1- srp),2 ) * numTest * numRef); + if(window == 0 ){ + windowSize = (int) (Math.max(biglog2_window, smalog2_window) * bigger) ; + }else{ + windowSize = window; + } + + } + + /** + * it create an Iterator and query on each window; finally it close the iterator + * @param f: SAMFileReader + * @param chr: genoeme name + * @param start: window start postion + * @param end: window end position + * @return the totoal number of records mapped overlapped on this window region + */ + int exeQuery (SamReader reader, String chr, int start, int end){ + + SAMRecordIterator block_ite = reader.queryOverlapping(chr, start, end); + int num = 0; + while(block_ite.hasNext()){ + num ++; + block_ite.next(); + } + + block_ite.close(); + + return num; + } + + /** + * + * @return total SAM records number in Test input file + */ + long getTestReadsNumber(){return numTest;} + + /** + * + * @return total SAM records number in Ref input file + */ + long getRefReadsNumber(){return numRef;} + + /** + * + * @return a hash table list each sequence reference name and length + */ + Map getrefseq(){return refSeq;} + + /** + * + * @return return the minimum window size for detecting log2>=0.6 + */ + double getpositivelog2window(){ return biglog2_window;} + + /** + * + * @return The minimum window size for detecting log2<=-0.6 + */ + double getnegativelog2window(){return smalog2_window;} + + /** + * + * @return The window size to use is max(100138.993801, 66550.928197) * 1.500000 + */ + int getWindowSize(){ return windowSize; } + + /** + * + * @return the total length of reference sequence listed on BAM @SQ lines + */ + long getGenomeSize( ){ return genomeSize;} + + /** + * + * @return the Test File with File type + */ + File getTestFile(){return ftest;} + + /** + * + * @return the Ref File with File type + */ + File getRefFile(){return fref;} + +} diff --git a/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- b/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- new file mode 100644 index 000000000..707c4726d --- /dev/null +++ b/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- @@ -0,0 +1,226 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.qcnv; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import htsjdk.samtools.*; + +import java.lang.Math; +import java.io.*; + +import org.qcmg.picard.SAMFileReaderFactory; + + +public class CNVseq { + + private static final boolean String = false; + //in cnv-seq.pl it call below R to get value + //echo 'options(digits=16);qnorm(1-0.5*0.001)' | R --vanilla --slave (result: 3.290526731491926) + public static double bt = 3.290526731491926; + //$echo 'options(digits=16);qnorm(0.5*0.001)' | R --vanilla --slave (result: -3.290526731491894) + public static double st = -3.290526731491894; + + public static double pvalue = 0.001; + public static int min_windoe = 4; + public static double log2 = 0.6; + public static double bigger = 1.5; + //public static int debug = 0; + //public static String Rexe = "R"; + + private final Map refSeq; + + private final long genomeSize ; + private final long numTest; + private final long numRef; + + private final double biglog2_window; + private final double smalog2_window; + private final int windowSize; + + private final File ftest; + private final File fref; + + + /** + * it caculate the window size based on genome size, TEST and REF BAM records number + * @param test: File of TEST BAM + * @param ref: File of reference BAM + * @throws Exception + */ + CNVseq(File test, File ref, int window ) throws Exception { + //open file + SamReader rtest = SAMFileReaderFactory.createSAMFileReader(test );//new SAMFileReader(test); + SamReader rref = SAMFileReaderFactory.createSAMFileReader(ref );//new SAMFileReader(ref); + + //check whether index file exist or not + if(!rtest.hasIndex()){ + throw new Exception("can't find index for: " + test.getName()); + } + if(!rref.hasIndex()){ + throw new Exception("can't find index for: " + ref.getName()); + } + ftest = test; + fref = ref; + + //check SAM header + SAMFileHeader htest = rtest.getFileHeader(); + SAMFileHeader href = rref.getFileHeader(); + + //get sequence information from both inputs + Map seqTest = new HashMap (); + Map seqRef = new HashMap (); + + List genome = htest.getSequenceDictionary().getSequences(); + for(SAMSequenceRecord re : genome){ + seqTest.put(re.getSequenceName(),re.getSequenceLength()); + } + + genome = href.getSequenceDictionary().getSequences(); + for(SAMSequenceRecord re : genome){ + seqRef.put(re.getSequenceName(),re.getSequenceLength()); + } + + // check both @SQ line are same or not + if(seqRef.size() != seqTest.size()){ + throw new Exception("the sequence size are different between two inputs: \n" + ftest.getName() + "\n" + fref.getName() ); + } + + for (String key : seqTest.keySet()){ + //first convert Integer to int + int l1 = seqTest.get(key); + int l2 = seqRef.get(key); + if(l1 != l2){ + throw new Exception("the sequence size of " + key + " are different between two inputs : \n" + ftest.getName() + "\n" + fref.getName() ); + } + } + + // assign one of the identical reference info into the hash map + refSeq = seqTest; + + //caculate the genome size based on the identail reference + long size = 0; + for(String key : refSeq.keySet()){ size += refSeq.get(key); } + genomeSize = size; +//-debug +//genomeSize = 3253037807L; + + //count mapped record number based on index file + BAMIndex tIndex = rtest.indexing().getIndex(); + BAMIndex rIndex = rref.indexing().getIndex(); + BAMIndexMetaData meta; + int tMapped = 0; + int rMapped = 0; + for(int i = 0; i < seqRef.size(); i ++ ){ + meta = tIndex.getMetaData(i); + tMapped += meta.getAlignedRecordCount(); + meta = rIndex.getMetaData(i); + rMapped += meta.getAlignedRecordCount(); + } + numTest = tMapped; + numRef = rMapped; + + //close files + rtest.close(); + rref.close(); + + //caculate window size + double brp = Math.pow(2, log2); + double srp = 1.0 / brp; + + + biglog2_window = (numTest * Math.pow(brp, 2) + numRef) * genomeSize * Math.pow(bt, 2) / ( Math.pow((1- brp),2 ) * numTest * numRef); + smalog2_window = (numTest * Math.pow(srp, 2) + numRef) * genomeSize * Math.pow(st, 2) / ( Math.pow((1- srp),2 ) * numTest * numRef); + if(window == 0 ){ + windowSize = (int) (Math.max(biglog2_window, smalog2_window) * bigger) ; + }else{ + windowSize = window; + } + + } + + /** + * it create an Iterator and query on each window; finally it close the iterator + * @param f: SAMFileReader + * @param chr: genoeme name + * @param start: window start postion + * @param end: window end position + * @return the totoal number of records mapped overlapped on this window region + */ + int exeQuery (SamReader reader, String chr, int start, int end){ + + SAMRecordIterator block_ite = reader.queryOverlapping(chr, start, end); + int num = 0; + while(block_ite.hasNext()){ + num ++; + block_ite.next(); + } + + block_ite.close(); + + return num; + } + + /** + * + * @return total SAM records number in Test input file + */ + long getTestReadsNumber(){return numTest;} + + /** + * + * @return total SAM records number in Ref input file + */ + long getRefReadsNumber(){return numRef;} + + /** + * + * @return a hash table list each sequence reference name and length + */ + Map getrefseq(){return refSeq;} + + /** + * + * @return return the minimum window size for detecting log2>=0.6 + */ + double getpositivelog2window(){ return biglog2_window;} + + /** + * + * @return The minimum window size for detecting log2<=-0.6 + */ + double getnegativelog2window(){return smalog2_window;} + + /** + * + * @return The window size to use is max(100138.993801, 66550.928197) * 1.500000 + */ + int getWindowSize(){ return windowSize; } + + /** + * + * @return the total length of reference sequence listed on BAM @SQ lines + */ + long getGenomeSize( ){ return genomeSize;} + + /** + * + * @return the Test File with File type + */ + File getTestFile(){return ftest;} + + /** + * + * @return the Ref File with File type + */ + File getRefFile(){return fref;} + +} diff --git a/qmule/src/org/qcmg/qmule/qcnv/Main.java b/qmule/src/org/qcmg/qmule/qcnv/Main.java new file mode 100644 index 000000000..41b681505 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/qcnv/Main.java @@ -0,0 +1,57 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.qcnv; + +import org.qcmg.common.log.*; +import htsjdk.samtools.*; +import java.util.*; +import java.util.Map.Entry; +import java.io.*; + +public class Main { + + public static void main(String[] args) throws Exception{ + //check arguments + Options options = new Options( args); + if(! options.commandCheck()){ System.exit(1); } + + QLogger logger = options.getLogger(args); + try{ + File ftest = new File(options.getIO("test")); + File fref = new File(options.getIO("ref")); + CNVseq cnvseq = new CNVseq(ftest, fref, options.getWindowSize()); + + logger.info("genome size used for calculation is " + cnvseq.getGenomeSize()); + logger.info(ftest.getName() + "contains records number: " + cnvseq.getTestReadsNumber()); + logger.info(fref.getName() + "contains records number: " + cnvseq.getRefReadsNumber()); + if(options.getWindowSize() == 0){ + logger.info("The minimum window size for detecting log2>=" + CNVseq.log2 +" should be " + cnvseq.getpositivelog2window()); + logger.info("The minimum window size for detecting log2<=-" + CNVseq.log2 +" should be " + cnvseq.getnegativelog2window()); + logger.info(String.format("The window size to use is max(%f, %f) * %f = %d", + cnvseq.getpositivelog2window(),cnvseq.getnegativelog2window(), CNVseq.bigger, cnvseq.getWindowSize())); + }else{ + logger.info("The window size used in this run is " + options.getWindowSize()); + } + + //count reads number in each window and output + MtCNVSeq cnvThread = new MtCNVSeq(cnvseq, new File(options.getIO("output")), options.getThreadNumber(), options.getTmpDir()); + cnvThread.cnvCount(logger); + + logger.logFinalExecutionStats(0); + System.exit(0); + }catch(Exception e){ + logger.error(e.toString()); + logger.logFinalExecutionStats(1); + System.err.println(e.toString()); + System.exit(1); + } + } + + + + +} diff --git a/qmule/src/org/qcmg/qmule/qcnv/Main.java-- b/qmule/src/org/qcmg/qmule/qcnv/Main.java-- new file mode 100644 index 000000000..41b681505 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/qcnv/Main.java-- @@ -0,0 +1,57 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.qcnv; + +import org.qcmg.common.log.*; +import htsjdk.samtools.*; +import java.util.*; +import java.util.Map.Entry; +import java.io.*; + +public class Main { + + public static void main(String[] args) throws Exception{ + //check arguments + Options options = new Options( args); + if(! options.commandCheck()){ System.exit(1); } + + QLogger logger = options.getLogger(args); + try{ + File ftest = new File(options.getIO("test")); + File fref = new File(options.getIO("ref")); + CNVseq cnvseq = new CNVseq(ftest, fref, options.getWindowSize()); + + logger.info("genome size used for calculation is " + cnvseq.getGenomeSize()); + logger.info(ftest.getName() + "contains records number: " + cnvseq.getTestReadsNumber()); + logger.info(fref.getName() + "contains records number: " + cnvseq.getRefReadsNumber()); + if(options.getWindowSize() == 0){ + logger.info("The minimum window size for detecting log2>=" + CNVseq.log2 +" should be " + cnvseq.getpositivelog2window()); + logger.info("The minimum window size for detecting log2<=-" + CNVseq.log2 +" should be " + cnvseq.getnegativelog2window()); + logger.info(String.format("The window size to use is max(%f, %f) * %f = %d", + cnvseq.getpositivelog2window(),cnvseq.getnegativelog2window(), CNVseq.bigger, cnvseq.getWindowSize())); + }else{ + logger.info("The window size used in this run is " + options.getWindowSize()); + } + + //count reads number in each window and output + MtCNVSeq cnvThread = new MtCNVSeq(cnvseq, new File(options.getIO("output")), options.getThreadNumber(), options.getTmpDir()); + cnvThread.cnvCount(logger); + + logger.logFinalExecutionStats(0); + System.exit(0); + }catch(Exception e){ + logger.error(e.toString()); + logger.logFinalExecutionStats(1); + System.err.println(e.toString()); + System.exit(1); + } + } + + + + +} diff --git a/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java b/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java new file mode 100644 index 000000000..b8bdbfcf5 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java @@ -0,0 +1,152 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.qcnv; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.File; +import java.io.IOException; + +import java.util.HashMap; +import java.util.Map; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecordIterator; +import htsjdk.samtools.ValidationStringency; + +import org.qcmg.common.log.*; +import org.qcmg.common.util.Constants; +import org.qcmg.picard.SAMFileReaderFactory; + + +public class MtCNVSeq { + + final CNVseq cnvseq; + final File Output; + final int noOfThreads; + final File tmpPath; + + MtCNVSeq(CNVseq cnvseq, File output, int noOfThreads, File tmpdir) throws IOException{ + this.cnvseq = cnvseq; + this.Output = output; + this.noOfThreads = noOfThreads; + if(tmpdir == null) + tmpPath = File.createTempFile( "qcnv", "", Output.getParentFile()); + else + tmpPath = File.createTempFile( "qcnv", "",tmpdir); + } + /** + * it call threads, parallel the BAMFileReader.query for single genome + * @param logger: an instance of QLogger + * @throws IOException + * @throws InterruptedException + */ + void cnvCount(QLogger logger) throws IOException, InterruptedException{ + + Map refseq = cnvseq.getrefseq(); + Map tmpoutput = new HashMap(); + ExecutorService queryThreads = Executors.newFixedThreadPool(noOfThreads); + + logger.debug("start parallel query based on genome file name"); + + + if(!(tmpPath.delete())) + throw new IOException("Could not delete tmp file: " + tmpPath.getAbsolutePath()); + if(! tmpPath.mkdirs()) + throw new IOException("Could not create tmp directory: " + tmpPath.getAbsolutePath()); + + //parallel query by genomes and output to tmp files + for ( Map.Entry chr : refseq.entrySet()){ + File tmp = File.createTempFile(chr.getKey(), ".count", tmpPath); + tmpoutput.put(chr.getKey(), tmp); + queryThreads.execute(new ExeQuery(cnvseq,chr, tmp)); + } + //wait threads finish + queryThreads.shutdown(); + queryThreads.awaitTermination(Constants.EXECUTOR_SERVICE_AWAIT_TERMINATION, TimeUnit.HOURS); + queryThreads.shutdownNow(); + logger.debug("completed parallel query based on genome file name"); + + + //collect outputs from tmp files into + logger.debug("starting collect each genome counts into final output"); + FileWriter writer = new FileWriter(Output); + writer.write("reference\tstart\tend\ttest\tref\n"); + for( Map.Entry tmp : tmpoutput.entrySet()){ + BufferedReader input = new BufferedReader(new FileReader(tmp.getValue())); + String line = null; + while((line = input.readLine()) != null){ + writer.write(line + "\n"); + } + input.close(); + tmp.getValue().deleteOnExit(); + } + tmpPath.delete(); + writer.close(); + logger.debug("created final output"); + } + + /** + * query on Test BAM and Ref BAM records which mapped to specified gemoem + * @author q.xu + * + */ + public static class ExeQuery implements Runnable { + CNVseq cnvseq; + File Output; + File Test; + File Ref; + QLogger logger; + int chrSize; + int winSize; + String chrName; + + ExeQuery(CNVseq cnvseq, Map.Entry chr,File tmp) { + Output = tmp; + Test = cnvseq.getTestFile(); + Ref = cnvseq.getRefFile(); + chrSize = chr.getValue(); + chrName = chr.getKey(); + winSize = cnvseq.getWindowSize(); + this.cnvseq = cnvseq; + } + + public void run() { + try { + FileWriter writer = new FileWriter(Output); + SamReader rTest = SAMFileReaderFactory.createSAMFileReader(Test,ValidationStringency.SILENT); + SamReader rRef = SAMFileReaderFactory.createSAMFileReader(Ref,ValidationStringency.SILENT); + + int win_num = chrSize / winSize + 1; + + for (int i = 0; i < win_num; i++){ + int start = i * winSize + 1; + int end = (i + 1 ) * winSize; + int num_test = cnvseq.exeQuery(rTest, chrName, start, end); + int num_ref = cnvseq.exeQuery(rRef, chrName, start, end); + writer.write(String.format("%s\t%d\t%d\t%d\t%d\n", chrName, start, end, num_test, num_ref )); + } + + rRef.close(); + writer.close(); + rTest.close(); + + } catch (Exception e) { + System.out.println(Thread.currentThread().getName() + " " + + e.getMessage()); + Thread.currentThread().interrupt(); + } + + } + } + +} diff --git a/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- b/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- new file mode 100644 index 000000000..b8bdbfcf5 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- @@ -0,0 +1,152 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.qcnv; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.File; +import java.io.IOException; + +import java.util.HashMap; +import java.util.Map; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMRecordIterator; +import htsjdk.samtools.ValidationStringency; + +import org.qcmg.common.log.*; +import org.qcmg.common.util.Constants; +import org.qcmg.picard.SAMFileReaderFactory; + + +public class MtCNVSeq { + + final CNVseq cnvseq; + final File Output; + final int noOfThreads; + final File tmpPath; + + MtCNVSeq(CNVseq cnvseq, File output, int noOfThreads, File tmpdir) throws IOException{ + this.cnvseq = cnvseq; + this.Output = output; + this.noOfThreads = noOfThreads; + if(tmpdir == null) + tmpPath = File.createTempFile( "qcnv", "", Output.getParentFile()); + else + tmpPath = File.createTempFile( "qcnv", "",tmpdir); + } + /** + * it call threads, parallel the BAMFileReader.query for single genome + * @param logger: an instance of QLogger + * @throws IOException + * @throws InterruptedException + */ + void cnvCount(QLogger logger) throws IOException, InterruptedException{ + + Map refseq = cnvseq.getrefseq(); + Map tmpoutput = new HashMap(); + ExecutorService queryThreads = Executors.newFixedThreadPool(noOfThreads); + + logger.debug("start parallel query based on genome file name"); + + + if(!(tmpPath.delete())) + throw new IOException("Could not delete tmp file: " + tmpPath.getAbsolutePath()); + if(! tmpPath.mkdirs()) + throw new IOException("Could not create tmp directory: " + tmpPath.getAbsolutePath()); + + //parallel query by genomes and output to tmp files + for ( Map.Entry chr : refseq.entrySet()){ + File tmp = File.createTempFile(chr.getKey(), ".count", tmpPath); + tmpoutput.put(chr.getKey(), tmp); + queryThreads.execute(new ExeQuery(cnvseq,chr, tmp)); + } + //wait threads finish + queryThreads.shutdown(); + queryThreads.awaitTermination(Constants.EXECUTOR_SERVICE_AWAIT_TERMINATION, TimeUnit.HOURS); + queryThreads.shutdownNow(); + logger.debug("completed parallel query based on genome file name"); + + + //collect outputs from tmp files into + logger.debug("starting collect each genome counts into final output"); + FileWriter writer = new FileWriter(Output); + writer.write("reference\tstart\tend\ttest\tref\n"); + for( Map.Entry tmp : tmpoutput.entrySet()){ + BufferedReader input = new BufferedReader(new FileReader(tmp.getValue())); + String line = null; + while((line = input.readLine()) != null){ + writer.write(line + "\n"); + } + input.close(); + tmp.getValue().deleteOnExit(); + } + tmpPath.delete(); + writer.close(); + logger.debug("created final output"); + } + + /** + * query on Test BAM and Ref BAM records which mapped to specified gemoem + * @author q.xu + * + */ + public static class ExeQuery implements Runnable { + CNVseq cnvseq; + File Output; + File Test; + File Ref; + QLogger logger; + int chrSize; + int winSize; + String chrName; + + ExeQuery(CNVseq cnvseq, Map.Entry chr,File tmp) { + Output = tmp; + Test = cnvseq.getTestFile(); + Ref = cnvseq.getRefFile(); + chrSize = chr.getValue(); + chrName = chr.getKey(); + winSize = cnvseq.getWindowSize(); + this.cnvseq = cnvseq; + } + + public void run() { + try { + FileWriter writer = new FileWriter(Output); + SamReader rTest = SAMFileReaderFactory.createSAMFileReader(Test,ValidationStringency.SILENT); + SamReader rRef = SAMFileReaderFactory.createSAMFileReader(Ref,ValidationStringency.SILENT); + + int win_num = chrSize / winSize + 1; + + for (int i = 0; i < win_num; i++){ + int start = i * winSize + 1; + int end = (i + 1 ) * winSize; + int num_test = cnvseq.exeQuery(rTest, chrName, start, end); + int num_ref = cnvseq.exeQuery(rRef, chrName, start, end); + writer.write(String.format("%s\t%d\t%d\t%d\t%d\n", chrName, start, end, num_test, num_ref )); + } + + rRef.close(); + writer.close(); + rTest.close(); + + } catch (Exception e) { + System.out.println(Thread.currentThread().getName() + " " + + e.getMessage()); + Thread.currentThread().interrupt(); + } + + } + } + +} diff --git a/qmule/src/org/qcmg/qmule/qcnv/Options.java b/qmule/src/org/qcmg/qmule/qcnv/Options.java new file mode 100644 index 000000000..3f4dc850b --- /dev/null +++ b/qmule/src/org/qcmg/qmule/qcnv/Options.java @@ -0,0 +1,169 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.qcnv; + + +import java.io.File; +import java.util.List; + +import joptsimple.OptionParser; +import joptsimple.OptionSet; + +import org.qcmg.qmule.Messages; +import org.qcmg.common.log.*; + +public class Options { + private static final String HELP_DESCRIPTION = Messages.getMessage("HELP_OPTION_DESCRIPTION"); + private static final String VERSION_DESCRIPTION = Messages.getMessage("VERSION_OPTION_DESCRIPTION"); + private static final String LOG_DESCRIPTION = Messages.getMessage("LOG_OPTION_DESCRIPTION"); + private static final String LOGLEVEL_DESCRIPTION = Messages.getMessage("LOGLEVEL_OPTION_DESCRIPTION"); + + private static final String OUTPUT_DESCRIPTION = Messages.getMessage("OUTPUT_OPTION_DESCRIPTION"); + private static final String TEST_DESCRIPTION = Messages.getMessage("TEST_OPTION_DESCRIPTION"); + private static final String REF_DESCRIPTION = Messages.getMessage("REF_OPTION_DESCRIPTION"); + private static final String THREAD_DESCRIPTION = Messages.getMessage("THREAD_OPTION_DESCRIPTION"); + private static final String WINDOW_DESCRIPTION = Messages.getMessage("WINDOW_SIZE_DESCRIPTION"); + private static final String TMPDIR_DESCRIPTION = Messages.getMessage("TMPDIR_DESCRIPTION"); + private final OptionParser parser = new OptionParser(); + private final OptionSet options; + + final static int DEFAULT_THREAD = 2; + final String commandLine; + final String USAGE = Messages.getMessage("USAGE_QCNV"); + final String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + + public Options( final String[] args) throws Exception { + parser.accepts("output", OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); + parser.accepts("ref", REF_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("Normal BAM"); + parser.accepts("test", TEST_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("Tumor BAM"); + parser.accepts("thread", THREAD_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("thread number"); + parser.accepts("window", WINDOW_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("window size"); + parser.accepts("tmpdir", TMPDIR_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("window size"); + + + parser.accepts("log", LOG_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("logfile"); + parser.accepts("loglevel", LOGLEVEL_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("loglevel"); + parser.accepts("version", VERSION_DESCRIPTION); + parser.accepts("help", HELP_DESCRIPTION); + + options = parser.parse(args); + commandLine = Messages.reconstructCommandLine(args); + } + + //IO parameters + String getIO(String io) throws Exception{ + + int size = options.valuesOf(io).size(); + if( size > 1){ + throw new Exception("multiple "+ io + " files specified" ); + } + else if( size < 1 ){ + throw new Exception(" missing or invalid IO option specified: " + io ); + } + + return options.valueOf(io).toString(); + } + + File getTmpDir() throws Exception{ + if(options.has("tmpdir")) + return new File (options.valueOf("tmpdir").toString()); + + + + return null; + + } + + int getThreadNumber(){ + + if(options.has("thread")){ + return Integer.parseInt((String) options.valueOf("thread")); + } + + return DEFAULT_THREAD; + } + + int getWindowSize(){ + + if(options.has("window")){ + return Integer.parseInt((String) options.valueOf("window")); + } + + return 0; + } + + QLogger getLogger(String[] args) throws Exception{ + + // configure logging + QLogger logger; + String logLevel = (String) options.valueOf("loglevel"); + String logFile; + if(options.has("log")){ + logFile = options.valueOf("log").toString(); + } + else{ + logFile = options.valueOf("output") + ".log"; + } + + logger = QLoggerFactory.getLogger( Main.class, logFile,logLevel); + logger.logInitialExecutionStats(Main.class.toString(), version, args); + return logger; + } + + boolean hasHelp() throws Exception{ + if(options.has("h") || options.has("help")){ + System.out.println(USAGE); + System.out.println(HELP_DESCRIPTION); + parser.printHelpOn(System.err); + return true; + } + return false; + } + + boolean hasVersion()throws Exception{ + if(options.has("v") || options.has("version")){ + System.out.println(VERSION_DESCRIPTION); + System.err.println(version); + return true; + } + return false; + } + + boolean commandCheck() throws Exception{ + //quit system after provide help or version info + if( hasHelp() || hasVersion() ){ + System.exit(0); + } + + + if (options.nonOptionArguments().size() > 0) { + List nonoptions = (List) options.nonOptionArguments(); + + for(String str : nonoptions){ + System.err.println("INVALID OPTION: " + str); + } + return false; + } + + if(getIO("ref") == null || getIO("test") == null){ + System.err.println("Missing ref or test option"); + return false; + } + if( getIO("ref").equals(getIO("output"))){ + System.err.println(Messages.getMessage("SAME_FILES", "ref", "output")); + return false; + } + if(options.has("thread")){ + int thread = Integer.parseInt((String) options.valueOf("thread")); + if(thread < 1){ + System.err.println("THREAD NUMBER MUST GREATER THAN ONE: " + options.valueOf("thread") ); + } + } + + return true; + } +} diff --git a/qmule/src/org/qcmg/qmule/qcnv/Options.java-- b/qmule/src/org/qcmg/qmule/qcnv/Options.java-- new file mode 100644 index 000000000..3f4dc850b --- /dev/null +++ b/qmule/src/org/qcmg/qmule/qcnv/Options.java-- @@ -0,0 +1,169 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.qcnv; + + +import java.io.File; +import java.util.List; + +import joptsimple.OptionParser; +import joptsimple.OptionSet; + +import org.qcmg.qmule.Messages; +import org.qcmg.common.log.*; + +public class Options { + private static final String HELP_DESCRIPTION = Messages.getMessage("HELP_OPTION_DESCRIPTION"); + private static final String VERSION_DESCRIPTION = Messages.getMessage("VERSION_OPTION_DESCRIPTION"); + private static final String LOG_DESCRIPTION = Messages.getMessage("LOG_OPTION_DESCRIPTION"); + private static final String LOGLEVEL_DESCRIPTION = Messages.getMessage("LOGLEVEL_OPTION_DESCRIPTION"); + + private static final String OUTPUT_DESCRIPTION = Messages.getMessage("OUTPUT_OPTION_DESCRIPTION"); + private static final String TEST_DESCRIPTION = Messages.getMessage("TEST_OPTION_DESCRIPTION"); + private static final String REF_DESCRIPTION = Messages.getMessage("REF_OPTION_DESCRIPTION"); + private static final String THREAD_DESCRIPTION = Messages.getMessage("THREAD_OPTION_DESCRIPTION"); + private static final String WINDOW_DESCRIPTION = Messages.getMessage("WINDOW_SIZE_DESCRIPTION"); + private static final String TMPDIR_DESCRIPTION = Messages.getMessage("TMPDIR_DESCRIPTION"); + private final OptionParser parser = new OptionParser(); + private final OptionSet options; + + final static int DEFAULT_THREAD = 2; + final String commandLine; + final String USAGE = Messages.getMessage("USAGE_QCNV"); + final String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); + + public Options( final String[] args) throws Exception { + parser.accepts("output", OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); + parser.accepts("ref", REF_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("Normal BAM"); + parser.accepts("test", TEST_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("Tumor BAM"); + parser.accepts("thread", THREAD_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("thread number"); + parser.accepts("window", WINDOW_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("window size"); + parser.accepts("tmpdir", TMPDIR_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("window size"); + + + parser.accepts("log", LOG_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("logfile"); + parser.accepts("loglevel", LOGLEVEL_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("loglevel"); + parser.accepts("version", VERSION_DESCRIPTION); + parser.accepts("help", HELP_DESCRIPTION); + + options = parser.parse(args); + commandLine = Messages.reconstructCommandLine(args); + } + + //IO parameters + String getIO(String io) throws Exception{ + + int size = options.valuesOf(io).size(); + if( size > 1){ + throw new Exception("multiple "+ io + " files specified" ); + } + else if( size < 1 ){ + throw new Exception(" missing or invalid IO option specified: " + io ); + } + + return options.valueOf(io).toString(); + } + + File getTmpDir() throws Exception{ + if(options.has("tmpdir")) + return new File (options.valueOf("tmpdir").toString()); + + + + return null; + + } + + int getThreadNumber(){ + + if(options.has("thread")){ + return Integer.parseInt((String) options.valueOf("thread")); + } + + return DEFAULT_THREAD; + } + + int getWindowSize(){ + + if(options.has("window")){ + return Integer.parseInt((String) options.valueOf("window")); + } + + return 0; + } + + QLogger getLogger(String[] args) throws Exception{ + + // configure logging + QLogger logger; + String logLevel = (String) options.valueOf("loglevel"); + String logFile; + if(options.has("log")){ + logFile = options.valueOf("log").toString(); + } + else{ + logFile = options.valueOf("output") + ".log"; + } + + logger = QLoggerFactory.getLogger( Main.class, logFile,logLevel); + logger.logInitialExecutionStats(Main.class.toString(), version, args); + return logger; + } + + boolean hasHelp() throws Exception{ + if(options.has("h") || options.has("help")){ + System.out.println(USAGE); + System.out.println(HELP_DESCRIPTION); + parser.printHelpOn(System.err); + return true; + } + return false; + } + + boolean hasVersion()throws Exception{ + if(options.has("v") || options.has("version")){ + System.out.println(VERSION_DESCRIPTION); + System.err.println(version); + return true; + } + return false; + } + + boolean commandCheck() throws Exception{ + //quit system after provide help or version info + if( hasHelp() || hasVersion() ){ + System.exit(0); + } + + + if (options.nonOptionArguments().size() > 0) { + List nonoptions = (List) options.nonOptionArguments(); + + for(String str : nonoptions){ + System.err.println("INVALID OPTION: " + str); + } + return false; + } + + if(getIO("ref") == null || getIO("test") == null){ + System.err.println("Missing ref or test option"); + return false; + } + if( getIO("ref").equals(getIO("output"))){ + System.err.println(Messages.getMessage("SAME_FILES", "ref", "output")); + return false; + } + if(options.has("thread")){ + int thread = Integer.parseInt((String) options.valueOf("thread")); + if(thread < 1){ + System.err.println("THREAD NUMBER MUST GREATER THAN ONE: " + options.valueOf("thread") ); + } + } + + return true; + } +} diff --git a/qmule/src/org/qcmg/qmule/queryChrMT.java b/qmule/src/org/qcmg/qmule/queryChrMT.java new file mode 100644 index 000000000..d9dcad3ff --- /dev/null +++ b/qmule/src/org/qcmg/qmule/queryChrMT.java @@ -0,0 +1,68 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; + +import htsjdk.samtools.*; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.ValidationStringency; +import htsjdk.samtools.SAMRecordIterator; + +import java.io.*; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; + +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.SAMOrBAMWriterFactory; + + +public class queryChrMT { + + public static void main(final String[] args) throws IOException, InterruptedException { + + try{ + + File inBAM = new File(args[0]); + String outputName = inBAM.getName().replace(".bam", ".chrMT.primary.bam"); + File output = new File(args[1], outputName); + + SamReader reader = SAMFileReaderFactory.createSAMFileReader(inBAM,ValidationStringency.SILENT); + SAMFileHeader he = reader.getFileHeader().clone(); + SAMOrBAMWriterFactory writeFactory = new SAMOrBAMWriterFactory(he , true, output); + SAMRecordIterator ite = reader.query("chrMT",0, 16569, false); + + SAMRecord record; + while(ite.hasNext()){ + record = ite.next(); + if(!record.getNotPrimaryAlignmentFlag()) + writeFactory.getWriter().addAlignment(record ); + + } + writeFactory.closeWriter(); + reader.close(); + + System.exit(0); + }catch(Exception e){ + System.err.println(e.toString()); + Thread.sleep(1); + System.out.println("usage: qmule org.qcmg.qmule.queryChrMT "); + System.exit(1); + } + + } + + +} diff --git a/qmule/src/org/qcmg/qmule/queryChrMT.java-- b/qmule/src/org/qcmg/qmule/queryChrMT.java-- new file mode 100644 index 000000000..d9dcad3ff --- /dev/null +++ b/qmule/src/org/qcmg/qmule/queryChrMT.java-- @@ -0,0 +1,68 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; + +import htsjdk.samtools.*; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.ValidationStringency; +import htsjdk.samtools.SAMRecordIterator; + +import java.io.*; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; + +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.SAMOrBAMWriterFactory; + + +public class queryChrMT { + + public static void main(final String[] args) throws IOException, InterruptedException { + + try{ + + File inBAM = new File(args[0]); + String outputName = inBAM.getName().replace(".bam", ".chrMT.primary.bam"); + File output = new File(args[1], outputName); + + SamReader reader = SAMFileReaderFactory.createSAMFileReader(inBAM,ValidationStringency.SILENT); + SAMFileHeader he = reader.getFileHeader().clone(); + SAMOrBAMWriterFactory writeFactory = new SAMOrBAMWriterFactory(he , true, output); + SAMRecordIterator ite = reader.query("chrMT",0, 16569, false); + + SAMRecord record; + while(ite.hasNext()){ + record = ite.next(); + if(!record.getNotPrimaryAlignmentFlag()) + writeFactory.getWriter().addAlignment(record ); + + } + writeFactory.closeWriter(); + reader.close(); + + System.exit(0); + }catch(Exception e){ + System.err.println(e.toString()); + Thread.sleep(1); + System.out.println("usage: qmule org.qcmg.qmule.queryChrMT "); + System.exit(1); + } + + } + + +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java b/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java new file mode 100644 index 000000000..e405206bd --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java @@ -0,0 +1,205 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map.Entry; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.SnpUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.qmule.util.IGVBatchFileGenerator; +import org.qcmg.qmule.util.TabbedDataLoader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class CompareSnps { + + private final ConcurrentMap firstSnpMap = new ConcurrentHashMap(30000); //not expecting more than 100000 + private final ConcurrentMap secondSnpMap = new ConcurrentHashMap(30000); + private final List firstList = new ArrayList(); + private final List secondList = new ArrayList(); +// private final ConcurrentMap uniqueTumourVCFMap = new ConcurrentHashMap(40000); + + private static QLogger logger; + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + public int engage() throws Exception { + + logger.info("loading snp data from file: " + cmdLineInputFiles[0]); + TabbedDataLoader.loadTabbedData(cmdLineInputFiles[0], -2, firstSnpMap); + logger.info("loading snp data - DONE [" + firstSnpMap.size() + "]"); + logger.info("loading snp data from file: " + cmdLineInputFiles[1]); + TabbedDataLoader.loadTabbedData(cmdLineInputFiles[1], -2, secondSnpMap); + logger.info("loading snp data - DONE [" + secondSnpMap.size() + "]"); + + compare(); + + outputIGVBatchFiles(); + +// addPileupFromNormalBam(); + + return exitStatus; + } + + private void outputIGVBatchFiles() throws IOException { + IGVBatchFileGenerator.generate(firstList, cmdLineOutputFiles[0]); + IGVBatchFileGenerator.generate(secondList, cmdLineOutputFiles[1]); + } + + protected void compare() { + + // total counts + int firstMapCount = 0, secondMapCount = 0; + // count of snps unique to each input + int uniqueToFirstMap = 0, uniqueToSecondMap = 0; + int commonSnps = 0, commonAndAlsoClassABStopNonSynon = 0; + + // loop through first set + for (Entry entry : firstSnpMap.entrySet()) { + + TabbedRecord firstRecord = entry.getValue(); + + if (isClassAB(firstRecord, -1) && isStopNonSynonymous(firstRecord, 22)) { + firstMapCount++; + + TabbedRecord secondRecord = secondSnpMap.get(entry.getKey()); + if (null == secondRecord || ! (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22))) { + uniqueToFirstMap++; + firstList.add(entry.getKey()); + logger.info("Unique to first: " + entry.getKey().getChromosome() + ":" + entry.getKey().getStartPosition()); + } else { + commonSnps++; +// if (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22)) { +// commonAndAlsoClassABStopNonSynon++; +// } + } + } + + } + + // loop through second set + for (Entry entry : secondSnpMap.entrySet()) { + + TabbedRecord secondRecord = entry.getValue(); + + if (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22)) { + secondMapCount++; + + TabbedRecord firstRecord = firstSnpMap.get(entry.getKey()); + if (null == firstRecord || ! (isClassAB(firstRecord, -1) && isStopNonSynonymous(firstRecord, 22))) { + uniqueToSecondMap++; + secondList.add(entry.getKey()); + logger.info("Unique to second: " + entry.getKey().getChromosome() + ":" + entry.getKey().getStartPosition()); +// logger.info("IGV: " + entry.getValue().getData()); + } + } + } + + logger.info("SUMMARY:"); + logger.info("firstMapCount: " + firstMapCount); + logger.info("secondMapCount: " + secondMapCount); + logger.info("uniqueToFirstMap: " + uniqueToFirstMap); + logger.info("uniqueToSecondMap: " + uniqueToSecondMap); + logger.info("commonSnps: " + commonSnps); +// logger.info("commonAndAlsoClassABStopNonSynon: " + commonAndAlsoClassABStopNonSynon); + + } + + + + protected static boolean isClassAB(TabbedRecord record, int index) { + if (null == record || null == record.getData()) throw new IllegalArgumentException("null or empty Tabbed record"); + String [] params = TabbedDataLoader.tabbedPattern.split(record.getData()); + String qcmgFlag = TabbedDataLoader.getStringFromArray(params, index); + + return SnpUtils.isClassAorB(qcmgFlag); +// return "--".equals(qcmgFlag) || "less than 12 reads coverage in normal".equals(qcmgFlag) +// || "less than 3 reads coverage in normal".equals(qcmgFlag); + + } + + protected static boolean isStopNonSynonymous(TabbedRecord record, int index) { + if (null == record || null == record.getData()) throw new IllegalArgumentException("null or empty Tabbed record"); + String [] params = TabbedDataLoader.tabbedPattern.split(record.getData()); +// String consequenceType = params[index]; + String consequenceType = TabbedDataLoader.getStringFromArray(params, index); + + return consequenceType.contains("STOP") || consequenceType.contains("NON_SYNONYMOUS"); + } + + + + public static void main(String[] args) throws Exception { + CompareSnps sp = new CompareSnps(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(CompareSnps.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CompareSnps", CompareSnps.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- new file mode 100644 index 000000000..e405206bd --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- @@ -0,0 +1,205 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map.Entry; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.SnpUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.qmule.util.IGVBatchFileGenerator; +import org.qcmg.qmule.util.TabbedDataLoader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class CompareSnps { + + private final ConcurrentMap firstSnpMap = new ConcurrentHashMap(30000); //not expecting more than 100000 + private final ConcurrentMap secondSnpMap = new ConcurrentHashMap(30000); + private final List firstList = new ArrayList(); + private final List secondList = new ArrayList(); +// private final ConcurrentMap uniqueTumourVCFMap = new ConcurrentHashMap(40000); + + private static QLogger logger; + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + public int engage() throws Exception { + + logger.info("loading snp data from file: " + cmdLineInputFiles[0]); + TabbedDataLoader.loadTabbedData(cmdLineInputFiles[0], -2, firstSnpMap); + logger.info("loading snp data - DONE [" + firstSnpMap.size() + "]"); + logger.info("loading snp data from file: " + cmdLineInputFiles[1]); + TabbedDataLoader.loadTabbedData(cmdLineInputFiles[1], -2, secondSnpMap); + logger.info("loading snp data - DONE [" + secondSnpMap.size() + "]"); + + compare(); + + outputIGVBatchFiles(); + +// addPileupFromNormalBam(); + + return exitStatus; + } + + private void outputIGVBatchFiles() throws IOException { + IGVBatchFileGenerator.generate(firstList, cmdLineOutputFiles[0]); + IGVBatchFileGenerator.generate(secondList, cmdLineOutputFiles[1]); + } + + protected void compare() { + + // total counts + int firstMapCount = 0, secondMapCount = 0; + // count of snps unique to each input + int uniqueToFirstMap = 0, uniqueToSecondMap = 0; + int commonSnps = 0, commonAndAlsoClassABStopNonSynon = 0; + + // loop through first set + for (Entry entry : firstSnpMap.entrySet()) { + + TabbedRecord firstRecord = entry.getValue(); + + if (isClassAB(firstRecord, -1) && isStopNonSynonymous(firstRecord, 22)) { + firstMapCount++; + + TabbedRecord secondRecord = secondSnpMap.get(entry.getKey()); + if (null == secondRecord || ! (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22))) { + uniqueToFirstMap++; + firstList.add(entry.getKey()); + logger.info("Unique to first: " + entry.getKey().getChromosome() + ":" + entry.getKey().getStartPosition()); + } else { + commonSnps++; +// if (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22)) { +// commonAndAlsoClassABStopNonSynon++; +// } + } + } + + } + + // loop through second set + for (Entry entry : secondSnpMap.entrySet()) { + + TabbedRecord secondRecord = entry.getValue(); + + if (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22)) { + secondMapCount++; + + TabbedRecord firstRecord = firstSnpMap.get(entry.getKey()); + if (null == firstRecord || ! (isClassAB(firstRecord, -1) && isStopNonSynonymous(firstRecord, 22))) { + uniqueToSecondMap++; + secondList.add(entry.getKey()); + logger.info("Unique to second: " + entry.getKey().getChromosome() + ":" + entry.getKey().getStartPosition()); +// logger.info("IGV: " + entry.getValue().getData()); + } + } + } + + logger.info("SUMMARY:"); + logger.info("firstMapCount: " + firstMapCount); + logger.info("secondMapCount: " + secondMapCount); + logger.info("uniqueToFirstMap: " + uniqueToFirstMap); + logger.info("uniqueToSecondMap: " + uniqueToSecondMap); + logger.info("commonSnps: " + commonSnps); +// logger.info("commonAndAlsoClassABStopNonSynon: " + commonAndAlsoClassABStopNonSynon); + + } + + + + protected static boolean isClassAB(TabbedRecord record, int index) { + if (null == record || null == record.getData()) throw new IllegalArgumentException("null or empty Tabbed record"); + String [] params = TabbedDataLoader.tabbedPattern.split(record.getData()); + String qcmgFlag = TabbedDataLoader.getStringFromArray(params, index); + + return SnpUtils.isClassAorB(qcmgFlag); +// return "--".equals(qcmgFlag) || "less than 12 reads coverage in normal".equals(qcmgFlag) +// || "less than 3 reads coverage in normal".equals(qcmgFlag); + + } + + protected static boolean isStopNonSynonymous(TabbedRecord record, int index) { + if (null == record || null == record.getData()) throw new IllegalArgumentException("null or empty Tabbed record"); + String [] params = TabbedDataLoader.tabbedPattern.split(record.getData()); +// String consequenceType = params[index]; + String consequenceType = TabbedDataLoader.getStringFromArray(params, index); + + return consequenceType.contains("STOP") || consequenceType.contains("NON_SYNONYMOUS"); + } + + + + public static void main(String[] args) throws Exception { + CompareSnps sp = new CompareSnps(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(CompareSnps.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CompareSnps", CompareSnps.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java b/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java new file mode 100644 index 000000000..1f914b876 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java @@ -0,0 +1,237 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.VcfUtils; +import org.qcmg.pileup.QPileupFileReader; +import org.qcmg.pileup.QSnpRecord; +import org.qcmg.qmule.pileup.VerifiedSnpFileReader; +import org.qcmg.qmule.pileup.VerifiedSnpRecord; +import org.qcmg.vcf.VCFFileReader; + +public class ExamineVerifiedSnps { + + private static final QLogger logger = QLoggerFactory.getLogger(ExamineVerifiedSnps.class); + + private static Map pileup = new HashMap<>(80000); + private static Map vcfRecords = new HashMap<>(80000); + private static Map verifiedSNPs = new HashMap<>(250); + + public static void main(String[] args) throws Exception { + logger.info("hello..."); + + String filename = args[0]; + boolean runQPileup = true; + // filename type depends on whether to load qpileup or vcf + if (FileUtils.isFileTypeValid(filename, "vcf")) { + runQPileup = false; + } + + loadVerifiedSnps(args[1]); + logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); + + if (runQPileup) { + // load the existing pileup into memory + logger.info("running in pileup mode"); + loadQPileup(args[0]); + logger.info("loaded " + pileup.size() + " entries into the pileup map"); + examine(args[2]); + } else { + logger.info("running in vcf mode"); + loadGATKData(args[0]); + logger.info("loaded " + vcfRecords.size() + " entries into the vcf map"); + examineVCF(args[2]); + } + logger.info("goodbye..."); + } + + private static void examine(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + + int verifiedYes = 0, qsnpVerifiedYes = 0; + int verifiedNo = 0, qsnpVerifiedNo = 0; + int verifiedNoGL = 0, qsnpVerifiedNoGL = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + for (final Map.Entry entry : verifiedSNPs.entrySet()) { + + QSnpRecord qpr = pileup.get(entry.getKey()); + VerifiedSnpRecord vsr = entry.getValue(); + + // only interested in exome data + if ( ! "exome".equals(vsr.getAnalysis())) continue; + + + if ("no".equals(vsr.getStatus())) { + verifiedNo++; + // if we don't have a matching qpr - good, otherwise, print details + if (null == qpr) { + qsnpVerifiedNo++; + writer.write(vsr.getFormattedString() + "\tOK - no entry in qsnp\n"); + } else { + writer.write(vsr.getFormattedString() + "\t???\t" + qpr.getClassification() + "\t" + + getAnnotationAndNote(qpr) + "\n"); +// + qpr.getMutation() + getAnnotationAndNote(qpr) + "\n"); + } + + } else if ("yes".equals(vsr.getStatus())) { + verifiedYes++; + if (null != qpr) { + qsnpVerifiedYes++; + writer.write(vsr.getFormattedString() + "\tOK - entry in qsnp\t" + qpr.getClassification() + "\t" + + getAnnotationAndNote(qpr) +"\n"); +// + qpr.getMutation() + getAnnotationAndNote(qpr) +"\n"); + } else { + writer.write(vsr.getFormattedString() + "\t???\n"); + } + } else if ("no -GL".equals(vsr.getStatus())) { + verifiedNoGL++; + if (null != qpr) { + qsnpVerifiedNoGL++; + + writer.write(vsr.getFormattedString() + "\tentry in qsnp\t" + qpr.getClassification() + "\t" + + getAnnotationAndNote(qpr) +"\n"); +// + qpr.getMutation() + getAnnotationAndNote(qpr) +"\n"); + } else { + writer.write(vsr.getFormattedString() + "\tNo entry in qsnp\n"); + } + } + } + + writer.close(); + logger.info("verified yes: " + verifiedYes + ", in qsnp: " + qsnpVerifiedYes); + logger.info("verified no: " + verifiedNo + ", in qsnp: " + (verifiedNo-qsnpVerifiedNo)); + logger.info("verified no -GL: " + verifiedNoGL + ", in qsnp: " + qsnpVerifiedNoGL); + } + } + + private static void examineVCF(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + + int verifiedYes = 0, gatkVerifiedYes = 0; + int verifiedNo = 0, gatkVerifiedNo = 0; + int verifiedNoGL = 0, gatkVerifiedNoGL = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + for (final Map.Entry entry : verifiedSNPs.entrySet()) { + + VcfRecord qpr = vcfRecords.get(entry.getKey()); + VerifiedSnpRecord vsr = entry.getValue(); + + // only interested in exome data + if ( ! "exome".equals(vsr.getAnalysis())) continue; + + if ("no".equals(vsr.getStatus())) { + verifiedNo++; + // if we don't have a matching qpr - good, otherwise, print details + if (null == qpr) { + gatkVerifiedNo++; + writer.write(vsr.getFormattedString() + "\tOK - no entry in GATK\n"); + } else { + writer.write(vsr.getFormattedString() + "\t???\t" + + VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() + "\n"); +// writer.write(vsr.getFormattedString() + "\t???\t" + qpr.getGenotype() + "\t" + qpr.getAlt() + "\n"); + } + + } else if ("yes".equals(vsr.getStatus())) { + verifiedYes++; + if (null != qpr) { + gatkVerifiedYes++; + writer.write(vsr.getFormattedString() + "\tOK - entry in GATK\t" + + VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() +"\n"); + } else { + writer.write(vsr.getFormattedString() + "\t???\n"); + } + } else if ("no -GL".equals(vsr.getStatus())) { + verifiedNoGL++; + if (null != qpr) { + gatkVerifiedNoGL++; + + writer.write(vsr.getFormattedString() + "\tentry in GATK\t" + + VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() +"\n"); + } else { + writer.write(vsr.getFormattedString() + "\tNo entry in GATK\n"); + } + } + } + + writer.close(); + logger.info("verified yes: " + verifiedYes + ", in GATK: " + gatkVerifiedYes); + logger.info("verified no: " + verifiedNo + ", in GATK: " + (verifiedNo-gatkVerifiedNo)); + logger.info("verified no -GL: " + verifiedNoGL + ", in GATK: " + gatkVerifiedNoGL); + } + } + + private static String getAnnotationAndNote(QSnpRecord record) { + if ( isNull(record.getAnnotation())) return "\tClassA"; + else if (isNull(record.getAnnotation())) return "\tClassB"; + else return "\tClassB\t" + record.getAnnotation(); + } + + private static boolean isNull(String string) { + return null == string || "null".equals(string) || 0 == string.length(); + } + + private static void loadQPileup(String pileupFile) throws IOException { + if (FileUtils.canFileBeRead(pileupFile)) { + QPileupFileReader reader = new QPileupFileReader(new File(pileupFile)); + try { + for (QSnpRecord qpr : reader) { + pileup.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); + } + } finally { + reader.close(); + } + } + } + + private static void loadGATKData(String pileupFile) throws Exception { + if (FileUtils.canFileBeRead(pileupFile)) { + + VCFFileReader reader = new VCFFileReader(new File(pileupFile)); + try { + for (VcfRecord qpr : reader) { + vcfRecords.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); + } + } finally { + reader.close(); + } + } + } + private static void loadVerifiedSnps(String verifiedSnpFile) throws IOException { + if (FileUtils.canFileBeRead(verifiedSnpFile)) { + VerifiedSnpFileReader reader = new VerifiedSnpFileReader(new File(verifiedSnpFile)); + try { + for (VerifiedSnpRecord vsr : reader) { + verifiedSNPs.put(ChrPointPosition.valueOf(vsr.getChromosome(), vsr.getPosition()),vsr); + } + } finally { + reader.close(); + } + } + } +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- new file mode 100644 index 000000000..322cbd5d1 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- @@ -0,0 +1,237 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.VcfUtils; +import org.qcmg.pileup.QPileupFileReader; +import org.qcmg.pileup.QSnpRecord; +import org.qcmg.pileup.VerifiedSnpFileReader; +import org.qcmg.pileup.VerifiedSnpRecord; +import org.qcmg.vcf.VCFFileReader; + +public class ExamineVerifiedSnps { + + private static final QLogger logger = QLoggerFactory.getLogger(ExamineVerifiedSnps.class); + + private static Map pileup = new HashMap<>(80000); + private static Map vcfRecords = new HashMap<>(80000); + private static Map verifiedSNPs = new HashMap<>(250); + + public static void main(String[] args) throws Exception { + logger.info("hello..."); + + String filename = args[0]; + boolean runQPileup = true; + // filename type depends on whether to load qpileup or vcf + if (FileUtils.isFileTypeValid(filename, "vcf")) { + runQPileup = false; + } + + loadVerifiedSnps(args[1]); + logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); + + if (runQPileup) { + // load the existing pileup into memory + logger.info("running in pileup mode"); + loadQPileup(args[0]); + logger.info("loaded " + pileup.size() + " entries into the pileup map"); + examine(args[2]); + } else { + logger.info("running in vcf mode"); + loadGATKData(args[0]); + logger.info("loaded " + vcfRecords.size() + " entries into the vcf map"); + examineVCF(args[2]); + } + logger.info("goodbye..."); + } + + private static void examine(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + + int verifiedYes = 0, qsnpVerifiedYes = 0; + int verifiedNo = 0, qsnpVerifiedNo = 0; + int verifiedNoGL = 0, qsnpVerifiedNoGL = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + for (final Map.Entry entry : verifiedSNPs.entrySet()) { + + QSnpRecord qpr = pileup.get(entry.getKey()); + VerifiedSnpRecord vsr = entry.getValue(); + + // only interested in exome data + if ( ! "exome".equals(vsr.getAnalysis())) continue; + + + if ("no".equals(vsr.getStatus())) { + verifiedNo++; + // if we don't have a matching qpr - good, otherwise, print details + if (null == qpr) { + qsnpVerifiedNo++; + writer.write(vsr.getFormattedString() + "\tOK - no entry in qsnp\n"); + } else { + writer.write(vsr.getFormattedString() + "\t???\t" + qpr.getClassification() + "\t" + + getAnnotationAndNote(qpr) + "\n"); +// + qpr.getMutation() + getAnnotationAndNote(qpr) + "\n"); + } + + } else if ("yes".equals(vsr.getStatus())) { + verifiedYes++; + if (null != qpr) { + qsnpVerifiedYes++; + writer.write(vsr.getFormattedString() + "\tOK - entry in qsnp\t" + qpr.getClassification() + "\t" + + getAnnotationAndNote(qpr) +"\n"); +// + qpr.getMutation() + getAnnotationAndNote(qpr) +"\n"); + } else { + writer.write(vsr.getFormattedString() + "\t???\n"); + } + } else if ("no -GL".equals(vsr.getStatus())) { + verifiedNoGL++; + if (null != qpr) { + qsnpVerifiedNoGL++; + + writer.write(vsr.getFormattedString() + "\tentry in qsnp\t" + qpr.getClassification() + "\t" + + getAnnotationAndNote(qpr) +"\n"); +// + qpr.getMutation() + getAnnotationAndNote(qpr) +"\n"); + } else { + writer.write(vsr.getFormattedString() + "\tNo entry in qsnp\n"); + } + } + } + + writer.close(); + logger.info("verified yes: " + verifiedYes + ", in qsnp: " + qsnpVerifiedYes); + logger.info("verified no: " + verifiedNo + ", in qsnp: " + (verifiedNo-qsnpVerifiedNo)); + logger.info("verified no -GL: " + verifiedNoGL + ", in qsnp: " + qsnpVerifiedNoGL); + } + } + + private static void examineVCF(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + + int verifiedYes = 0, gatkVerifiedYes = 0; + int verifiedNo = 0, gatkVerifiedNo = 0; + int verifiedNoGL = 0, gatkVerifiedNoGL = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + for (final Map.Entry entry : verifiedSNPs.entrySet()) { + + VcfRecord qpr = vcfRecords.get(entry.getKey()); + VerifiedSnpRecord vsr = entry.getValue(); + + // only interested in exome data + if ( ! "exome".equals(vsr.getAnalysis())) continue; + + if ("no".equals(vsr.getStatus())) { + verifiedNo++; + // if we don't have a matching qpr - good, otherwise, print details + if (null == qpr) { + gatkVerifiedNo++; + writer.write(vsr.getFormattedString() + "\tOK - no entry in GATK\n"); + } else { + writer.write(vsr.getFormattedString() + "\t???\t" + + VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() + "\n"); +// writer.write(vsr.getFormattedString() + "\t???\t" + qpr.getGenotype() + "\t" + qpr.getAlt() + "\n"); + } + + } else if ("yes".equals(vsr.getStatus())) { + verifiedYes++; + if (null != qpr) { + gatkVerifiedYes++; + writer.write(vsr.getFormattedString() + "\tOK - entry in GATK\t" + + VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() +"\n"); + } else { + writer.write(vsr.getFormattedString() + "\t???\n"); + } + } else if ("no -GL".equals(vsr.getStatus())) { + verifiedNoGL++; + if (null != qpr) { + gatkVerifiedNoGL++; + + writer.write(vsr.getFormattedString() + "\tentry in GATK\t" + + VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() +"\n"); + } else { + writer.write(vsr.getFormattedString() + "\tNo entry in GATK\n"); + } + } + } + + writer.close(); + logger.info("verified yes: " + verifiedYes + ", in GATK: " + gatkVerifiedYes); + logger.info("verified no: " + verifiedNo + ", in GATK: " + (verifiedNo-gatkVerifiedNo)); + logger.info("verified no -GL: " + verifiedNoGL + ", in GATK: " + gatkVerifiedNoGL); + } + } + + private static String getAnnotationAndNote(QSnpRecord record) { + if ( isNull(record.getAnnotation())) return "\tClassA"; + else if (isNull(record.getAnnotation())) return "\tClassB"; + else return "\tClassB\t" + record.getAnnotation(); + } + + private static boolean isNull(String string) { + return null == string || "null".equals(string) || 0 == string.length(); + } + + private static void loadQPileup(String pileupFile) throws IOException { + if (FileUtils.canFileBeRead(pileupFile)) { + QPileupFileReader reader = new QPileupFileReader(new File(pileupFile)); + try { + for (QSnpRecord qpr : reader) { + pileup.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); + } + } finally { + reader.close(); + } + } + } + + private static void loadGATKData(String pileupFile) throws Exception { + if (FileUtils.canFileBeRead(pileupFile)) { + + VCFFileReader reader = new VCFFileReader(new File(pileupFile)); + try { + for (VcfRecord qpr : reader) { + vcfRecords.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); + } + } finally { + reader.close(); + } + } + } + private static void loadVerifiedSnps(String verifiedSnpFile) throws IOException { + if (FileUtils.canFileBeRead(verifiedSnpFile)) { + VerifiedSnpFileReader reader = new VerifiedSnpFileReader(new File(verifiedSnpFile)); + try { + for (VerifiedSnpRecord vsr : reader) { + verifiedSNPs.put(ChrPointPosition.valueOf(vsr.getChromosome(), vsr.getPosition()),vsr); + } + } finally { + reader.close(); + } + } + } +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java b/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java new file mode 100644 index 000000000..88023e383 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java @@ -0,0 +1,488 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import htsjdk.samtools.SAMRecord; + +import org.qcmg.qmule.chrconv.ChrConvFileReader; +import org.qcmg.qmule.chrconv.ChromosomeConversionRecord; +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.Classification; +import org.qcmg.common.model.GenotypeEnum; +import org.qcmg.common.model.QSnpGATKRecord; +import org.qcmg.common.util.BaseUtils; +import org.qcmg.common.util.Constants; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.VcfUtils; +import org.qcmg.common.vcf.header.VcfHeaderUtils; +import org.qcmg.qmule.germlinedb.GermlineDBFileReader; +import org.qcmg.qmule.germlinedb.GermlineDBRecord; +import org.qcmg.picard.QJumper; +import org.qcmg.pileup.QSnpRecord; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.Options.Ids; +import org.qcmg.qmule.QMuleException; +import org.qcmg.vcf.VCFFileReader; + +public class GatkUniqueSnps { + +// private static final QLogger logger = QLoggerFactory.getLogger(GatkUniqueSnps.class); + private static QLogger logger; + + private static Map tumourRecords = new HashMap(100000); + private static Map normalRecords = new HashMap(100000); + +// private static Map classABRecords = new HashMap(100000); + private static List qPileupRecords = new ArrayList(15000); + + // map to hold chromosome conversion data + private static final Map ensembleToQCMG = new HashMap(110); + + + // constants + private String mutationIdPrefix; + private String tumourSampleId; + private String normalSampleId; + private String patientId; + private String somaticAnalysisId; + private String germlineAnalysisId; +// private String analysisId; +// private static final String mutationIdPrefix = "APGI_1992_"; +// private static final String analysisId = "qcmg_ssm_20110524_1"; +// private static final String tumourSampleId = "ICGC-ABMP-20091203-06-TD"; + + + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private Properties ids; + + private int exitStatus; + + + private static String bamFile1; + private static String bamFile1Index; +// private static String bamFile2; +// private static String bamFile2Index; + + private static QJumper jumper1; +// private static QJumper jumper2; + + public int engage() throws Exception { + + setupIds(); + + logger.info("loading normal vcf file"); + loadGATKData(cmdLineInputFiles[0], normalRecords); + logger.info("loaded " + normalRecords.size() + " normal vcf's"); + + logger.info("loading tumour vcf file"); + loadGATKData(cmdLineInputFiles[1], tumourRecords); + logger.info("loaded " + tumourRecords.size() + " tumour vcf's"); + + bamFile1 = cmdLineInputFiles[2]; + bamFile1Index = cmdLineInputFiles[3]; +// bamFile2 = args[4]; +// bamFile2Index = args[5]; + + + jumper1 = new QJumper(); + jumper1.setupReader(bamFile1, bamFile1Index); +// jumper2 = new QJumper(); +// jumper2.setupReader(bamFile2, bamFile2Index); + + + logger.info("about to call examine"); + examine(); + logger.info("about to call examine - DONE"); + + // close the qjumper + jumper1.closeReader(); + + logger.info("about to load chromosome conversion data"); + loadChromosomeConversionData(cmdLineInputFiles[4]); + logger.info("about to load chromosome conversion data - DONE"); + + logger.info("about to add germlineDB info"); + addGermlineDBData(cmdLineInputFiles[5]); + + int noAnnotation = 0; + for (final QSnpRecord qpr : qPileupRecords) if (null == qpr.getAnnotation()) noAnnotation++; + logger.info("class A after addition of germlinedb data: " + noAnnotation ); + + + logger.info("writing output"); + writeOutputForDCC(cmdLineOutputFiles[0]); + logger.info("DONE"); + + return exitStatus; + } + + private void setupIds() throws Exception { + if (null != ids) { + + somaticAnalysisId = (String) ids.get(Ids.SOMATIC_ANALYSIS); + germlineAnalysisId = (String) ids.get(Ids.GEMLINE_ANALYSIS); + tumourSampleId = (String) ids.get(Ids.TUMOUR_SAMPLE); + normalSampleId = (String) ids.get(Ids.NORMAL_SAMPLE); + patientId = (String) ids.get(Ids.PATIENT); + mutationIdPrefix = patientId + "_SNP_"; + + logger.tool("somaticAnalysisId: " + somaticAnalysisId); + logger.tool("germlineAnalysisId: " + germlineAnalysisId); + logger.tool("normalSampleId: " + normalSampleId); + logger.tool("tumourSampleId: " + tumourSampleId); + logger.tool("patientId: " + patientId); + logger.tool("mutationIdPrefix: " + mutationIdPrefix); + + } else { + logger.error("No ids were passed into the program"); + throw new Exception("Invalid arguments to GatkUniqueSnps"); + } + } + + private static void examine() throws Exception { + + int existsInNormalAndTumour = 0, sameGenotype = 0; + // loop through the tumour map + + for (final Entry tumourEntry : tumourRecords.entrySet()) { + + // see if a position exists in the normal map + final QSnpGATKRecord normalRecord = normalRecords.get(tumourEntry.getKey()); + if (null != normalRecord) { + existsInNormalAndTumour++; + + final GenotypeEnum normalGenotype = normalRecord.getGenotypeEnum(); + final GenotypeEnum tumourGenotype = tumourEntry.getValue().getGenotypeEnum(); + + if (normalGenotype == tumourGenotype) { + sameGenotype++; + } else { + if (tumourGenotype.containsAllele(normalRecord.getAlt().charAt(0))) { + //tumourEntry.getValue().getVCFRecord().addInfo("MIN"); + tumourEntry.getValue().getVCFRecord().appendInfo("MIN");; + } + if ( tumourGenotype.isHeterozygous() && ! tumourGenotype.containsAllele(tumourEntry.getValue().getRef().charAt(0))) + //tumourEntry.getValue().getVCFRecord().addInfo("tumour heterozygous for two non-reference alleles"); + tumourEntry.getValue().getVCFRecord().appendInfo("tumour heterozygous for two non-reference alleles"); +// if (null == tumourEntry.getValue().getAnnotation()) { + qPileupRecords.add(getQPileupRecord(tumourEntry.getValue())); +// } + } + } else { + // interested primarily in these fellas + qPileupRecords.add(getQPileupRecord(tumourEntry.getValue())); + } + } + + logger.info("exists in both normal and tumour: " + existsInNormalAndTumour + ", same Genotype: " + sameGenotype); + + logger.info("potential number of class A&B's before pileup: " + qPileupRecords.size() ); + + int noAnnotation = 0, count = 0; + for (final QSnpRecord qpr : qPileupRecords) { + getPileup(jumper1, qpr); + + if (++count % 100 == 0) + logger.info("hit " + count + " vcf records, " + qpr.toString()); + + if (qpr.getAnnotation() == null) + noAnnotation++; + } + + logger.info("class A after pileup: " + noAnnotation ); + + } + + private static void loadChromosomeConversionData(String chrConvFile) throws IOException { + final ChrConvFileReader reader = new ChrConvFileReader(new File(chrConvFile)); + try { + for (final ChromosomeConversionRecord record : reader) { + // add extra map inserts here as required + ensembleToQCMG.put(record.getEnsembleV55(), record.getQcmg()); + } + } finally { + reader.close(); + } + } + + private void writeOutputForDCC(String dccSomaticFile) throws IOException { + if (dccSomaticFile.contains("Germline_DB.txt")) throw new IOException("Wrong output file!!!"); + + final FileWriter somaticWriter = new FileWriter(new File(dccSomaticFile)); + + final String somaticHeader = "analysis_id\ttumour_sample_id\tmutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation\tquality_score\tprobability\tread_count\tis_annotated\tvalidation_status\tvalidation_platform\txref_ensembl_var_id\tnote\tQCMGflag\n"; + final int counter = 1; + try { + + somaticWriter.write(somaticHeader); + for (final QSnpRecord record : qPileupRecords) { + + String ensemblChr = null; + // get ensembl chromosome + for (final Map.Entry entry : ensembleToQCMG.entrySet()) { + if (record.getChromosome().equals(entry.getValue())) { + ensemblChr = entry.getKey(); + break; + } + } + somaticWriter.write(somaticAnalysisId + "\t" + tumourSampleId + "\t" + + "\n"); +// + record.getDCCData(mutationIdPrefix, ensemblChr) + "\n"); + } + } finally { + somaticWriter.close(); + } + } + + private static QSnpRecord getQPileupRecord(QSnpGATKRecord vcfRec) { + final QSnpRecord qpr = new QSnpRecord(vcfRec.getChromosome(), vcfRec.getPosition(), vcfRec.getRef()); + qpr.setTumourGenotype(vcfRec.getGenotypeEnum()); +// qpr.setMutation(vcfRec.getRef() + Constants.MUT_DELIM + vcfRec.getAlt()); +// qpr.getVcfRecord().setFilter(vcfRec.getAnnotation()); + qpr.setClassification(Classification.SOMATIC); + return qpr; + } + + + public static void getPileup(QJumper jumper, QSnpRecord record) throws Exception { + + final List firstSet = jumper.getRecordsAtPosition(record.getChromosome(), record.getPosition()); +// List secondSet = jumper2.getRecordsAtPosition(record.getChromosome(), record.getPosition()); + + + examinePileup(firstSet, record); + + +// char mutation = record.getMutation().charAt(record.getMutation().length() -1); +// boolean mutationFoundInNormal = false; +// int normalCoverage = 0; +// for (SAMRecord sam : firstSet ) { +// if ( ! sam.getDuplicateReadFlag()) { +// ++normalCoverage; +// +// // need to get the base at the position +// int offset = record.getPosition() - sam.getAlignmentStart(); +// if (offset < 0) throw new Exception("invalid start position!!!"); +// +// if (sam.getReadBases()[offset] == mutation) { +// mutationFoundInNormal = true; +// break; +// } +// } +// } +// +// if (mutationFoundInNormal) { +// record.addAnnotation("mutation also found in pileup of normal"); +// } +// +// record.setNormalCount(normalCoverage); +// +// if (normalCoverage < 12) +// record.addAnnotation("less than 12 reads coverage in normal"); + + } + + + public static void examinePileup(List sams, QSnpRecord record) throws Exception { + + final char mutation = record.getAlt().charAt(0); +// final char mutation = record.getMutation().charAt(record.getMutation().length() -1); + boolean mutationFoundInNormal = false; + int normalCoverage = 0; + for (final SAMRecord sam : sams ) { + if ( ! sam.getDuplicateReadFlag()) { + ++normalCoverage; + + // need to get the base at the position +// int offset = record.getPosition() - sam.getUnclippedStart(); + int offset = record.getPosition() - sam.getAlignmentStart(); + if (offset < 0) throw new Exception("invalid start position!!!: "+ sam.format()); + + if (offset >= sam.getReadLength()) { +// throw new Exception("offset [position: " + record.getPosition() + ", read start pos(unclipped): " + sam.getUnclippedStart() + ", read end pos(unclipped): " + sam.getUnclippedEnd()+ "] is larger than read length!!!: " + sam.format()); + // set to last entry in sequence + offset = sam.getReadLength() -1; + } + + if (sam.getReadBases()[offset] == mutation) { + mutationFoundInNormal = true; +// break; + } + } + } + + if (mutationFoundInNormal) { + VcfUtils.updateFilter(record.getVcfRecord(), VcfHeaderUtils.FILTER_MUTATION_IN_NORMAL); + } + +// record.setNormalCount(normalCoverage); + + if (normalCoverage < 12) { + VcfUtils.updateFilter(record.getVcfRecord(), VcfHeaderUtils.FILTER_COVERAGE); + } + + + } + + +// private static void getPileup(VCFRecord record) { +// +// List firstSet = jumper1.getRecordsAtPosition(record.getChromosome(), record.getPosition()); +//// List secondSet = jumper2.getRecordsAtPosition(record.getChromosome(), record.getPosition()); +// +// int normalCoverage = 0; +// for (SAMRecord sam : firstSet ) { +// if ( ! sam.getDuplicateReadFlag()) +// ++normalCoverage; +// } +// +// +//// int normalCoverage = firstSet.size(); +//// int normalCoverage = firstSet.size() + secondSet.size(); +// record.setNormalCoverage(normalCoverage); +// +// if (normalCoverage < 12) +// record.addAnnotation("less than 12 reads coverage in normal"); +// +// } + + + private static void addGermlineDBData(String germlineDBFile) throws IOException { + + final GermlineDBFileReader reader = new GermlineDBFileReader(new File(germlineDBFile)); + // create map of SOMATIC classified SNPs + final Map somaticPileupMap = new HashMap(qPileupRecords.size(), 1); + for (final QSnpRecord pileupRecord : qPileupRecords) { + somaticPileupMap.put(ChrPointPosition.valueOf(pileupRecord.getChromosome(), pileupRecord.getPosition()), pileupRecord); + } + + int updateCount = 0, count = 0; + try { + for (final GermlineDBRecord rec : reader) { + + // get QCMG chromosome from map + final String chr = ensembleToQCMG.get(rec.getChromosome()); + final ChrPosition id = ChrPointPosition.valueOf(chr, rec.getPosition()); + + final QSnpRecord qpr = somaticPileupMap.get(id); + if (null != qpr && null != qpr.getAlt() && (null == qpr.getAnnotation() || ! qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_GERMLINE))) { + final String mutation = qpr.getAlt(); + if (mutation.length() == 3) { + final char c = mutation.charAt(2); + + final GenotypeEnum germlineDBGenotype = BaseUtils.getGenotypeEnum(rec.getNormalGenotype()); + if (germlineDBGenotype.containsAllele(c)) { + updateCount++; + + VcfUtils.updateFilter(qpr.getVcfRecord(), VcfHeaderUtils.FILTER_GERMLINE); + } + + + } else { + logger.info("mutation string length: " + mutation.length()); + } + } + + if (++count % 1000000 == 0) + logger.info("hit " + count + " germline reords"); + + } + } finally { + reader.close(); + } + logger.info("updated: " + updateCount + " somatic positions with germlineDB info"); + } + + private static void loadGATKData(String pileupFile, Map map) throws Exception { + if (FileUtils.canFileBeRead(pileupFile)) { + + final VCFFileReader reader = new VCFFileReader(new File(pileupFile)); + try { + for (final VcfRecord qpr : reader) { + map.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()), new QSnpGATKRecord(qpr)); + } + } finally { + reader.close(); + } + } + } + + public static void main(String[] args) throws Exception { + final GatkUniqueSnps gus = new GatkUniqueSnps(); + final int exitStatus = gus.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + final Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logger = QLoggerFactory.getLogger(GatkUniqueSnps.class, options.getLogFile(), options.getLogLevel()); + logger.logInitialExecutionStats("GatkUniqueSnps", GatkUniqueSnps.class.getPackage().getImplementationVersion()); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (final String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + ids = options.getIds(); + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- new file mode 100644 index 000000000..6758eb70e --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- @@ -0,0 +1,488 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import htsjdk.samtools.SAMRecord; + +import org.qcmg.chrconv.ChrConvFileReader; +import org.qcmg.chrconv.ChromosomeConversionRecord; +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.Classification; +import org.qcmg.common.model.GenotypeEnum; +import org.qcmg.common.model.QSnpGATKRecord; +import org.qcmg.common.util.BaseUtils; +import org.qcmg.common.util.Constants; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.VcfUtils; +import org.qcmg.common.vcf.header.VcfHeaderUtils; +import org.qcmg.germlinedb.GermlineDBFileReader; +import org.qcmg.germlinedb.GermlineDBRecord; +import org.qcmg.picard.QJumper; +import org.qcmg.pileup.QSnpRecord; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.Options.Ids; +import org.qcmg.qmule.QMuleException; +import org.qcmg.vcf.VCFFileReader; + +public class GatkUniqueSnps { + +// private static final QLogger logger = QLoggerFactory.getLogger(GatkUniqueSnps.class); + private static QLogger logger; + + private static Map tumourRecords = new HashMap(100000); + private static Map normalRecords = new HashMap(100000); + +// private static Map classABRecords = new HashMap(100000); + private static List qPileupRecords = new ArrayList(15000); + + // map to hold chromosome conversion data + private static final Map ensembleToQCMG = new HashMap(110); + + + // constants + private String mutationIdPrefix; + private String tumourSampleId; + private String normalSampleId; + private String patientId; + private String somaticAnalysisId; + private String germlineAnalysisId; +// private String analysisId; +// private static final String mutationIdPrefix = "APGI_1992_"; +// private static final String analysisId = "qcmg_ssm_20110524_1"; +// private static final String tumourSampleId = "ICGC-ABMP-20091203-06-TD"; + + + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private Properties ids; + + private int exitStatus; + + + private static String bamFile1; + private static String bamFile1Index; +// private static String bamFile2; +// private static String bamFile2Index; + + private static QJumper jumper1; +// private static QJumper jumper2; + + public int engage() throws Exception { + + setupIds(); + + logger.info("loading normal vcf file"); + loadGATKData(cmdLineInputFiles[0], normalRecords); + logger.info("loaded " + normalRecords.size() + " normal vcf's"); + + logger.info("loading tumour vcf file"); + loadGATKData(cmdLineInputFiles[1], tumourRecords); + logger.info("loaded " + tumourRecords.size() + " tumour vcf's"); + + bamFile1 = cmdLineInputFiles[2]; + bamFile1Index = cmdLineInputFiles[3]; +// bamFile2 = args[4]; +// bamFile2Index = args[5]; + + + jumper1 = new QJumper(); + jumper1.setupReader(bamFile1, bamFile1Index); +// jumper2 = new QJumper(); +// jumper2.setupReader(bamFile2, bamFile2Index); + + + logger.info("about to call examine"); + examine(); + logger.info("about to call examine - DONE"); + + // close the qjumper + jumper1.closeReader(); + + logger.info("about to load chromosome conversion data"); + loadChromosomeConversionData(cmdLineInputFiles[4]); + logger.info("about to load chromosome conversion data - DONE"); + + logger.info("about to add germlineDB info"); + addGermlineDBData(cmdLineInputFiles[5]); + + int noAnnotation = 0; + for (final QSnpRecord qpr : qPileupRecords) if (null == qpr.getAnnotation()) noAnnotation++; + logger.info("class A after addition of germlinedb data: " + noAnnotation ); + + + logger.info("writing output"); + writeOutputForDCC(cmdLineOutputFiles[0]); + logger.info("DONE"); + + return exitStatus; + } + + private void setupIds() throws Exception { + if (null != ids) { + + somaticAnalysisId = (String) ids.get(Ids.SOMATIC_ANALYSIS); + germlineAnalysisId = (String) ids.get(Ids.GEMLINE_ANALYSIS); + tumourSampleId = (String) ids.get(Ids.TUMOUR_SAMPLE); + normalSampleId = (String) ids.get(Ids.NORMAL_SAMPLE); + patientId = (String) ids.get(Ids.PATIENT); + mutationIdPrefix = patientId + "_SNP_"; + + logger.tool("somaticAnalysisId: " + somaticAnalysisId); + logger.tool("germlineAnalysisId: " + germlineAnalysisId); + logger.tool("normalSampleId: " + normalSampleId); + logger.tool("tumourSampleId: " + tumourSampleId); + logger.tool("patientId: " + patientId); + logger.tool("mutationIdPrefix: " + mutationIdPrefix); + + } else { + logger.error("No ids were passed into the program"); + throw new Exception("Invalid arguments to GatkUniqueSnps"); + } + } + + private static void examine() throws Exception { + + int existsInNormalAndTumour = 0, sameGenotype = 0; + // loop through the tumour map + + for (final Entry tumourEntry : tumourRecords.entrySet()) { + + // see if a position exists in the normal map + final QSnpGATKRecord normalRecord = normalRecords.get(tumourEntry.getKey()); + if (null != normalRecord) { + existsInNormalAndTumour++; + + final GenotypeEnum normalGenotype = normalRecord.getGenotypeEnum(); + final GenotypeEnum tumourGenotype = tumourEntry.getValue().getGenotypeEnum(); + + if (normalGenotype == tumourGenotype) { + sameGenotype++; + } else { + if (tumourGenotype.containsAllele(normalRecord.getAlt().charAt(0))) { + //tumourEntry.getValue().getVCFRecord().addInfo("MIN"); + tumourEntry.getValue().getVCFRecord().appendInfo("MIN");; + } + if ( tumourGenotype.isHeterozygous() && ! tumourGenotype.containsAllele(tumourEntry.getValue().getRef().charAt(0))) + //tumourEntry.getValue().getVCFRecord().addInfo("tumour heterozygous for two non-reference alleles"); + tumourEntry.getValue().getVCFRecord().appendInfo("tumour heterozygous for two non-reference alleles"); +// if (null == tumourEntry.getValue().getAnnotation()) { + qPileupRecords.add(getQPileupRecord(tumourEntry.getValue())); +// } + } + } else { + // interested primarily in these fellas + qPileupRecords.add(getQPileupRecord(tumourEntry.getValue())); + } + } + + logger.info("exists in both normal and tumour: " + existsInNormalAndTumour + ", same Genotype: " + sameGenotype); + + logger.info("potential number of class A&B's before pileup: " + qPileupRecords.size() ); + + int noAnnotation = 0, count = 0; + for (final QSnpRecord qpr : qPileupRecords) { + getPileup(jumper1, qpr); + + if (++count % 100 == 0) + logger.info("hit " + count + " vcf records, " + qpr.toString()); + + if (qpr.getAnnotation() == null) + noAnnotation++; + } + + logger.info("class A after pileup: " + noAnnotation ); + + } + + private static void loadChromosomeConversionData(String chrConvFile) throws IOException { + final ChrConvFileReader reader = new ChrConvFileReader(new File(chrConvFile)); + try { + for (final ChromosomeConversionRecord record : reader) { + // add extra map inserts here as required + ensembleToQCMG.put(record.getEnsembleV55(), record.getQcmg()); + } + } finally { + reader.close(); + } + } + + private void writeOutputForDCC(String dccSomaticFile) throws IOException { + if (dccSomaticFile.contains("Germline_DB.txt")) throw new IOException("Wrong output file!!!"); + + final FileWriter somaticWriter = new FileWriter(new File(dccSomaticFile)); + + final String somaticHeader = "analysis_id\ttumour_sample_id\tmutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation\tquality_score\tprobability\tread_count\tis_annotated\tvalidation_status\tvalidation_platform\txref_ensembl_var_id\tnote\tQCMGflag\n"; + final int counter = 1; + try { + + somaticWriter.write(somaticHeader); + for (final QSnpRecord record : qPileupRecords) { + + String ensemblChr = null; + // get ensembl chromosome + for (final Map.Entry entry : ensembleToQCMG.entrySet()) { + if (record.getChromosome().equals(entry.getValue())) { + ensemblChr = entry.getKey(); + break; + } + } + somaticWriter.write(somaticAnalysisId + "\t" + tumourSampleId + "\t" + + "\n"); +// + record.getDCCData(mutationIdPrefix, ensemblChr) + "\n"); + } + } finally { + somaticWriter.close(); + } + } + + private static QSnpRecord getQPileupRecord(QSnpGATKRecord vcfRec) { + final QSnpRecord qpr = new QSnpRecord(vcfRec.getChromosome(), vcfRec.getPosition(), vcfRec.getRef()); + qpr.setTumourGenotype(vcfRec.getGenotypeEnum()); +// qpr.setMutation(vcfRec.getRef() + Constants.MUT_DELIM + vcfRec.getAlt()); +// qpr.getVcfRecord().setFilter(vcfRec.getAnnotation()); + qpr.setClassification(Classification.SOMATIC); + return qpr; + } + + + public static void getPileup(QJumper jumper, QSnpRecord record) throws Exception { + + final List firstSet = jumper.getRecordsAtPosition(record.getChromosome(), record.getPosition()); +// List secondSet = jumper2.getRecordsAtPosition(record.getChromosome(), record.getPosition()); + + + examinePileup(firstSet, record); + + +// char mutation = record.getMutation().charAt(record.getMutation().length() -1); +// boolean mutationFoundInNormal = false; +// int normalCoverage = 0; +// for (SAMRecord sam : firstSet ) { +// if ( ! sam.getDuplicateReadFlag()) { +// ++normalCoverage; +// +// // need to get the base at the position +// int offset = record.getPosition() - sam.getAlignmentStart(); +// if (offset < 0) throw new Exception("invalid start position!!!"); +// +// if (sam.getReadBases()[offset] == mutation) { +// mutationFoundInNormal = true; +// break; +// } +// } +// } +// +// if (mutationFoundInNormal) { +// record.addAnnotation("mutation also found in pileup of normal"); +// } +// +// record.setNormalCount(normalCoverage); +// +// if (normalCoverage < 12) +// record.addAnnotation("less than 12 reads coverage in normal"); + + } + + + public static void examinePileup(List sams, QSnpRecord record) throws Exception { + + final char mutation = record.getAlt().charAt(0); +// final char mutation = record.getMutation().charAt(record.getMutation().length() -1); + boolean mutationFoundInNormal = false; + int normalCoverage = 0; + for (final SAMRecord sam : sams ) { + if ( ! sam.getDuplicateReadFlag()) { + ++normalCoverage; + + // need to get the base at the position +// int offset = record.getPosition() - sam.getUnclippedStart(); + int offset = record.getPosition() - sam.getAlignmentStart(); + if (offset < 0) throw new Exception("invalid start position!!!: "+ sam.format()); + + if (offset >= sam.getReadLength()) { +// throw new Exception("offset [position: " + record.getPosition() + ", read start pos(unclipped): " + sam.getUnclippedStart() + ", read end pos(unclipped): " + sam.getUnclippedEnd()+ "] is larger than read length!!!: " + sam.format()); + // set to last entry in sequence + offset = sam.getReadLength() -1; + } + + if (sam.getReadBases()[offset] == mutation) { + mutationFoundInNormal = true; +// break; + } + } + } + + if (mutationFoundInNormal) { + VcfUtils.updateFilter(record.getVcfRecord(), VcfHeaderUtils.FILTER_MUTATION_IN_NORMAL); + } + +// record.setNormalCount(normalCoverage); + + if (normalCoverage < 12) { + VcfUtils.updateFilter(record.getVcfRecord(), VcfHeaderUtils.FILTER_COVERAGE); + } + + + } + + +// private static void getPileup(VCFRecord record) { +// +// List firstSet = jumper1.getRecordsAtPosition(record.getChromosome(), record.getPosition()); +//// List secondSet = jumper2.getRecordsAtPosition(record.getChromosome(), record.getPosition()); +// +// int normalCoverage = 0; +// for (SAMRecord sam : firstSet ) { +// if ( ! sam.getDuplicateReadFlag()) +// ++normalCoverage; +// } +// +// +//// int normalCoverage = firstSet.size(); +//// int normalCoverage = firstSet.size() + secondSet.size(); +// record.setNormalCoverage(normalCoverage); +// +// if (normalCoverage < 12) +// record.addAnnotation("less than 12 reads coverage in normal"); +// +// } + + + private static void addGermlineDBData(String germlineDBFile) throws IOException { + + final GermlineDBFileReader reader = new GermlineDBFileReader(new File(germlineDBFile)); + // create map of SOMATIC classified SNPs + final Map somaticPileupMap = new HashMap(qPileupRecords.size(), 1); + for (final QSnpRecord pileupRecord : qPileupRecords) { + somaticPileupMap.put(ChrPointPosition.valueOf(pileupRecord.getChromosome(), pileupRecord.getPosition()), pileupRecord); + } + + int updateCount = 0, count = 0; + try { + for (final GermlineDBRecord rec : reader) { + + // get QCMG chromosome from map + final String chr = ensembleToQCMG.get(rec.getChromosome()); + final ChrPosition id = ChrPointPosition.valueOf(chr, rec.getPosition()); + + final QSnpRecord qpr = somaticPileupMap.get(id); + if (null != qpr && null != qpr.getAlt() && (null == qpr.getAnnotation() || ! qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_GERMLINE))) { + final String mutation = qpr.getAlt(); + if (mutation.length() == 3) { + final char c = mutation.charAt(2); + + final GenotypeEnum germlineDBGenotype = BaseUtils.getGenotypeEnum(rec.getNormalGenotype()); + if (germlineDBGenotype.containsAllele(c)) { + updateCount++; + + VcfUtils.updateFilter(qpr.getVcfRecord(), VcfHeaderUtils.FILTER_GERMLINE); + } + + + } else { + logger.info("mutation string length: " + mutation.length()); + } + } + + if (++count % 1000000 == 0) + logger.info("hit " + count + " germline reords"); + + } + } finally { + reader.close(); + } + logger.info("updated: " + updateCount + " somatic positions with germlineDB info"); + } + + private static void loadGATKData(String pileupFile, Map map) throws Exception { + if (FileUtils.canFileBeRead(pileupFile)) { + + final VCFFileReader reader = new VCFFileReader(new File(pileupFile)); + try { + for (final VcfRecord qpr : reader) { + map.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()), new QSnpGATKRecord(qpr)); + } + } finally { + reader.close(); + } + } + } + + public static void main(String[] args) throws Exception { + final GatkUniqueSnps gus = new GatkUniqueSnps(); + final int exitStatus = gus.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + final Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logger = QLoggerFactory.getLogger(GatkUniqueSnps.class, options.getLogFile(), options.getLogLevel()); + logger.logInitialExecutionStats("GatkUniqueSnps", GatkUniqueSnps.class.getPackage().getImplementationVersion()); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (final String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + ids = options.getIds(); + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/Mule.java b/qmule/src/org/qcmg/qmule/snppicker/Mule.java new file mode 100644 index 000000000..6b3b7f4a7 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/Mule.java @@ -0,0 +1,85 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; + +public class Mule { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + + private static QLogger logger; + + public int engage() { + return 1; + } + + + + public static void main(String[] args) throws Exception { + Mule sp = new Mule(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(Mule.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("Example", Mule.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/Mule.java-- b/qmule/src/org/qcmg/qmule/snppicker/Mule.java-- new file mode 100644 index 000000000..6b3b7f4a7 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/Mule.java-- @@ -0,0 +1,85 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; + +public class Mule { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + + private static QLogger logger; + + public int engage() { + return 1; + } + + + + public static void main(String[] args) throws Exception { + Mule sp = new Mule(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(Mule.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("Example", Mule.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java new file mode 100644 index 000000000..c4a76aeaa --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java @@ -0,0 +1,802 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import htsjdk.samtools.SAMRecord; + +import org.qcmg.qmule.chrconv.ChrConvFileReader; +import org.qcmg.qmule.chrconv.ChromosomeConversionRecord; +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.Genotype; +import org.qcmg.common.util.BaseUtils; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.VcfUtils; +import org.qcmg.qmule.dbsnp.Dbsnp130Record; +import org.qcmg.qmule.dbsnp.DbsnpFileReader; +import org.qcmg.qmule.gff3.GFF3FileReader; +import org.qcmg.qmule.gff3.GFF3Record; +import org.qcmg.picard.QJumper; +import org.qcmg.pileup.PileupFileReader; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.qmule.record.Record; +import org.qcmg.unused.illumina.IlluminaFileReader; +import org.qcmg.unused.illumina.IlluminaRecord; +import org.qcmg.vcf.VCFFileReader; + +public class SnpPicker { + + private static final char DEFAULT_CHAR = '\u0000'; + private static QLogger logger; +// private static DecimalFormat df = new DecimalFormat("0.0000"); + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + private static boolean isNormal; + +// private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + + Map illuminaMap = new HashMap(1000000,0.99f); // not expecting more than 1000000 + + Map variantMap = new HashMap(2000000); + + // map to hold chromosome conversion data + Map gffToQCMG = new HashMap(100, 0.99f); + +// List illuminaRecords = new ArrayList(); +// List dbSNPRecords = new ArrayList(13000000); + + private int engage() throws Exception { + + // populate the chromosome conversion map + logger.info("about to load chromosome conversion data"); + loadChromosomeConversionData(); + logger.info("about to load chromosome conversion data - DONE"); + + // we are working off the raw illumina data here - first convert it into filtered format, and use that as the input + + logger.info("About to load raw illumina data"); + loadRawIlluminaData(); +// logger.info("No of variant records: " + variantMap.size() + " in file: " + cmdLineInputFiles[0]); + + logger.info("About to load gff3 data"); + loadGff3Data(); + logger.info("No of variant records: " + variantMap.size()); + +// logger.info("About to load vcf data"); +// loadVCFData(); +// logger.info("No of variant records: " + variantMap.size()); + + logger.info("About to load qsnp data"); + loadQSnpData(); + logger.info("No of variant records: " + variantMap.size()); + + + + + logger.info("About to load dbSNP data"); + loadDbSnpData(); +// logger.info("No of variant records: " + variantMap.size()); + + // update variantMap with details from illuminaMap + logger.info("About to load filtered illumina data into variant map"); + convertIlluminaToVariant(); + logger.info("About to load filtered illumina data into variant map - DONE"); + + // get some stats + displayStats(); + + // pileup + logger.info("time for pileup..."); + getPileup(); + logger.info("time for pileup - DONE"); + + // more stats + displayStats2(); + + logger.info("Will now attempt to write out variant data" ); + outputVariantData(); + logger.info("Will now attempt to write out variant data - DONE"); + + return exitStatus; + } + + private void getPileup() throws Exception { + QJumper qj = new QJumper(); + qj.setupReader(cmdLineInputFiles[5], cmdLineInputFiles[6]); + + VariantRecord rec; + StringBuilder pileup = new StringBuilder(); + List reads; +// String chr; + int position; + int offset; + + int pileupCount = 0; + for (Map.Entry entry : variantMap.entrySet()) { + // only want pileup if we have gff or vcf data + rec = entry.getValue(); + if (DEFAULT_CHAR != rec.getGffRef() || null != rec.getVcfGenotype()) { +// chr = ( ! entry.getKey().getChromosome().startsWith("GL") ? "chr" : "") + entry.getKey().getChromosome(); + + reads = qj.getRecordsAtPosition(entry.getKey().getChromosome(), entry.getKey().getStartPosition()); + // do something with the reads + position = entry.getKey().getStartPosition(); + for (SAMRecord sr : reads) { + offset = position - sr.getAlignmentStart(); + pileup.append((char)sr.getReadBases()[offset]); + } + rec.setPileup(pileup.toString()); + + // reset the StringBuilder + pileup.setLength(0); + + if (++pileupCount % 1000 == 0) + logger.info("Run " + pileupCount + " pileups so far, " + reads.size() + " sam records returned from picard"); + } + } + } + + private void loadChromosomeConversionData() { + String chrConvFile = cmdLineInputFiles[4]; + ChrConvFileReader reader = null; + try { + reader = new ChrConvFileReader(new File(chrConvFile)); + } catch (Exception e) { + logger.error("Exception caught whilst trying to instantiate ChrConvFileReader", e); + exitStatus = -1; + } + + if (null != reader) { + for (ChromosomeConversionRecord record : reader) { + // add extra map inserts here as required + // diBayes field is no longer present in chr conv file +// gffToQCMG.put(record.getDiBayes(), record.getQcmg()); + // guessing we want ensemble in here as the key + gffToQCMG.put(record.getEnsembleV55(), record.getQcmg()); + } + + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close ChrConvFileReader", e); + exitStatus = -1; + } + } + } + + private void displayStats() { + int illuminaOnly = 0; + int gff3Only = 0; + int vcfOnly = 0; + int vcfANDgff = 0; + int vcfANDillumina = 0; + int gffANDillumina = 0; + int allThree = 0; + for (VariantRecord record : variantMap.values()) { + + boolean illuminaDataPresent = null != record.getIlluminaRef(); + boolean gffDataPresent = DEFAULT_CHAR != record.getGffRef(); + boolean vcfDataPresent = DEFAULT_CHAR != record.getVcfRef(); + + if (illuminaDataPresent && gffDataPresent && vcfDataPresent) { + allThree++; + record.setPositionMatch("IGV"); + } else if (gffDataPresent && vcfDataPresent) { + vcfANDgff++; + record.setPositionMatch("GV"); + } else if (illuminaDataPresent && vcfDataPresent) { + vcfANDillumina++; + record.setPositionMatch("IV"); + } else if (illuminaDataPresent && gffDataPresent) { + gffANDillumina++; + record.setPositionMatch("IG"); + } else if ( gffDataPresent) { + gff3Only++; + record.setPositionMatch("G"); + }else if ( vcfDataPresent) { + vcfOnly++; + record.setPositionMatch("V"); + }else if ( illuminaDataPresent) { + illuminaOnly++; + record.setPositionMatch("I"); + } + + record.setGenotypeMatch(getGenotypeMatchInfo(record)); + } + + logger.info("allThree: " + allThree); + logger.info("illuminaOnly: " + illuminaOnly); + logger.info("gff3Only: " + gff3Only); + logger.info("vcfANDgff: " + vcfANDgff); + logger.info("vcfANDillumina: " + vcfANDillumina); + logger.info("gffANDillumina: " + gffANDillumina); + logger.info("vcfOnly: " + vcfOnly); + + int total = allThree + illuminaOnly + gff3Only + vcfANDgff + vcfANDillumina + gffANDillumina + vcfOnly; + logger.info("Sum of above numbers: " + total); + logger.info("No of records in map: " + variantMap.size()); + + } + + private void displayStats2() { + final String IGV = "IGV"; + final String IG = "IG"; + final String IV = "IV"; + final String GV = "GV"; + final String I = "I"; + final String G = "G"; + final String V = "V"; + + int positionIGV=0, positionIG=0, positionIV=0, positionGV=0, positionI=0, positionG=0, positionV = 0; + int pIGVgIGV=0, pIGVgIG=0, pIGVgIV=0, pIGVgGV=0; + int pIGgIG=0; + int pIVgIV=0; + int pGVgGV=0; + + + for (VariantRecord record : variantMap.values()) { + + String positionMatch = record.getPositionMatch(); + String genotypeMatch = record.getGenotypeMatch(); + + if (IGV.equals(positionMatch)) { + positionIGV++; + if (IGV.equals(genotypeMatch)) pIGVgIGV++; + else if (IG.equals(genotypeMatch)) pIGVgIG++; + else if (IV.equals(genotypeMatch)) pIGVgIV++; + else if (GV.equals(genotypeMatch)) pIGVgGV++; + + } else if (IG.equals(positionMatch)) { + positionIG++; + if (IG.equals(genotypeMatch)) pIGgIG++; + + } else if (IV.equals(positionMatch)) { + positionIV++; + if (IV.equals(genotypeMatch)) pIVgIV++; + + } else if (GV.equals(positionMatch)) { + positionGV++; + if (GV.equals(genotypeMatch)) pGVgGV++; + + } else if (I.equals(positionMatch)) positionI++; + else if ( G.equals(positionMatch)) positionG++; + else if ( V.equals(positionMatch)) positionV++; + } + + logger.info("position IGV: " + positionIGV + ", genotype IGV: " + pIGVgIGV + ", genotype IG: " + pIGVgIG + ", genotype IV: " + pIGVgIV + ", genotype GV: " + pIGVgGV); + logger.info("position IG: " + positionIG + ", genotype IG: " + pIGgIG); + logger.info("position IV: " + positionIV + ", genotype IV: " + pIVgIV); + logger.info("position GV: " + positionGV + ", genotype GV: " + pGVgGV); + + logger.info("position I: " + positionI); + logger.info("position G: " + positionG); + logger.info("position V: " + positionV); + + int total = positionIGV + positionIG + positionIV + positionGV + positionI + positionG + positionV; + logger.info("Sum of above numbers: " + total); + logger.info("No of records in map: " + variantMap.size()); + + } + + private String getGenotypeMatchInfo(VariantRecord record) { + Genotype illuminaGen = BaseUtils.getGenotype(record.getIllAllele1() , record.getIllAllele2()); +// String illuminaGen = record.getIlluminaRef(); + Genotype gffGen = BaseUtils.getGenotypeFromIUPACCode(record.getGffGenotype()); + Genotype vcfGen = null; + if (DEFAULT_CHAR != record.getVcfAlt()) + vcfGen = BaseUtils.getGenotypeFromVcf(record.getVcfGenotype(), record.getVcfRef(), record.getVcfAlt()); + else + vcfGen = BaseUtils.getGenotype(record.getVcfGenotype()); + + String result = null; + + if (illuminaGen.equals( gffGen) && illuminaGen.equals(vcfGen)) result = "IGV"; + else if (illuminaGen.equals(gffGen)) result = "IG"; + else if (illuminaGen.equals(vcfGen)) result = "IV"; + else if (null != gffGen && gffGen.equals(vcfGen)) result = "GV"; +// if (doStringsMatch(illuminaGen, gffGen) && doStringsMatch(illuminaGen, vcfGen)) result = "IGV"; +// else if (doStringsMatch(illuminaGen, gffGen)) result = "IG"; +// else if (doStringsMatch(illuminaGen, vcfGen)) result = "IV"; +// else if (doStringsMatch(gffGen, vcfGen)) result = "GV"; + + return result; + } + + private boolean doStringsMatch(String a, String b) { + return null == a ? false : a.equals(b); + } + + private void loadDbSnpData() { + // update records with dbsnp info + // should be second of the input files + String dbSNPFile = cmdLineInputFiles[3]; + DbsnpFileReader dbSNPReader = null; + try { + dbSNPReader = new DbsnpFileReader(new File(dbSNPFile)); + } catch (Exception e) { + logger.error("Error caught whilst trying to instantiate DbsnpFileReader", e); + exitStatus = -1; + } + + int updateCount = 0; + int noOfDbSnps = 0; + if (null != dbSNPReader) { + + ChrPosition varId; + VariantRecord varRec; + IlluminaRecord illRec; + int illuminaDbSnpCount = 0; + + for (Dbsnp130Record rec : dbSNPReader) { + // update illumina array with dbSNP details + illRec = illuminaMap.get(rec.getRefSnp()); + if (null != illRec) { + if (null != illRec.getChr()) { + logger.info("illumina rec: " + illRec.getChr() + ":" + illRec.getStart() + ":" + illRec.getSnpId() +" has already been updated - dbSNP: " + rec.getChromosome() + ":" + rec.getChromosomePosition() + ":" + rec.getRefSnp()); + // dbSNP id has more than 1 chr and position - create another IlluminaRecord in the variantMap + //TODO deal with multiple dbSnps for same id here!!! + } else { + updateIlluminaRecord(illRec, rec); + } + illuminaDbSnpCount++; + } + + varId = ChrPointPosition.valueOf(rec.getChromosome(), rec.getChromosomePosition()); + // lookup variant map to see if we have a matching record + varRec = variantMap.get(varId); + if (null == varRec && null != illRec && illRec.isSnp()) { + // don't have an existing record at this position, but we want to put illumina data in here if its a snp + varRec = new VariantRecord(); + variantMap.put(varId, varRec); + } + + if (null != varRec) { + // update required fields + varRec.setDbSnpID(rec.getRefSnp()); + varRec.setDbSnpStrand(rec.getStrand().charAt(0)); + varRec.setDbSnpRef_Alt(rec.getRefGenome() + "__" + rec.getVariant()); + + if (++updateCount % 100000 == 0) + logger.info("updated " + updateCount + " variant records with dbSNP ids"); + } + +// dbSNPRecords.add(rec); + if (++noOfDbSnps % 1000000 == 0) + logger.info("hit " + noOfDbSnps + " dbSnp records"); + } + + logger.info("match count for dbSnp and Illumina: " + illuminaDbSnpCount); + + try { + dbSNPReader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close DbsnpFileReader", e); + exitStatus = -1; + } + } + + logger.info("No of dbSnp records: " + noOfDbSnps + " in file: " + dbSNPFile); + logger.info("No of updated variant records: " + updateCount); + } + + private void loadVCFData() { + String vcfFile = cmdLineInputFiles[2]; + VCFFileReader reader = null; + try { + reader = new VCFFileReader(new File(vcfFile)); + } catch (Exception e) { + logger.error("Error caught whilst trying to instantiate VCFFileReader", e); + exitStatus = -1; + } + + if (null != reader) { + int vcfCount = 0; + ChrPosition id; + VariantRecord value; + + for (VcfRecord rec : reader) { + + id = ChrPointPosition.valueOf(rec.getChromosome(), rec.getPosition()); + + value = variantMap.get(id); + if (null == value) { + value = new VariantRecord(); + variantMap.put(id, value); + } + value.setVcfRef(rec.getRefChar()); + value.setVcfAlt(rec.getAlt().charAt(0)); + value.setVcfGenotype(VcfUtils.getGenotypeFromGATKVCFRecord(rec)); + vcfCount++; + } + logger.info("there were " + vcfCount + " records in the vcf file"); + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close VCFFileReader", e); + exitStatus = -1; + } + } + } + + private void loadQSnpData() { + String qSnpFile = cmdLineInputFiles[2]; + PileupFileReader reader = null; + try { + reader = new PileupFileReader(new File(qSnpFile)); + } catch (Exception e) { + logger.error("Error caught whilst trying to instantiate PileupFileReader", e); + exitStatus = -1; + } + + if (null != reader) { + int vcfCount = 0; + ChrPosition id; + VariantRecord value; + + for (String rec : reader) { +// for (PileupRecord rec : reader) { + // got some work to do here - need to split the pileup attribute to construct the object + String [] params = TabTokenizer.tokenize(rec); +// String [] params = tabbedPattern.split(rec.getPileup(), -1); + + // skip if the tumour genotype is null + String genotype = params[params.length-(isNormal ? 2 : 1)]; + if (null != genotype && ! "null".equals(genotype)) { + + id = ChrPointPosition.valueOf(params[0], Integer.parseInt(params[1])); + + value = variantMap.get(id); + if (null == value) { + value = new VariantRecord(); + variantMap.put(id, value); + } + value.setVcfRef(params[2].charAt(0)); + // value.setVcfAlt(rec.getAlt()); + value.setVcfGenotype(genotype); + vcfCount++; + } + } + logger.info("there were " + vcfCount + " records in the qsnp file"); + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close PileupFileReader", e); + exitStatus = -1; + } + } + } + + private void loadGff3Data() { + String gff3File = cmdLineInputFiles[1]; + GFF3FileReader reader = null; + try { + reader = new GFF3FileReader(new File(gff3File)); + } catch (Exception e) { + logger.error("Exception caught whilst trying to instantiate GFF3FileReader", e); + exitStatus = -1; + } + + if (null != reader) { + int gff3Count = 0; + ChrPosition id; + VariantRecord value; + String chr; + + for (GFF3Record rec : reader) { + // get QCMG chromosome from map + chr = gffToQCMG.get(rec.getSeqId()); + + id = ChrPointPosition.valueOf(chr, rec.getStart()); + + value = variantMap.get(id); + if (null == value) { + value = new VariantRecord(); + variantMap.put(id, value); + } + String attributes = rec.getAttributes(); + char genotype = attributes.charAt(attributes.indexOf("genotype=")+9); + char reference = attributes.charAt(attributes.indexOf("reference=")+10); +// value.setGffAlt(genotype+""); + value.setGffGenotype(genotype); + value.setGffRef(reference); + gff3Count++; + } + logger.info("there were " + gff3Count + " records in the gff3 file"); + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close GFF3FileReader", e); + exitStatus = -1; + } + } + } + + private void loadRawIlluminaData() { + String illuminaFile = cmdLineInputFiles[0]; + + isNormal = illuminaFile.contains("ND_"); + + IlluminaFileReader reader = null; + try { + reader = new IlluminaFileReader(new File(illuminaFile)); + } catch (Exception e) { + logger.error("Error caught whilst trying to instantiate IlluminaFileReader", e); + exitStatus = -1; + } + + if (null != reader) { + IlluminaRecord tempRec; + for (Record rec : reader) { + tempRec = (IlluminaRecord) rec; + illuminaMap.put(tempRec.getSnpId(), tempRec); + } + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close IlluminaFileReader", e); + exitStatus = -1; + } + } + logger.info("Loaded " + illuminaMap.size() + " entries into the illumina map"); + } + +// private void loadIlluminaData() { +// String illuminaFile = cmdLineInputFiles[0]; +// IlluminaFileReader reader = null; +// try { +// reader = new IlluminaFileReader(new File(illuminaFile)); +// } catch (Exception e) { +// logger.error("Error caught whilst trying to instantiate IlluminaFileReader", e); +// exitStatus = -1; +// } +// +// if (null != reader) { +// VariantID id; +// IlluminaRecord tempRec; +// +// for (Record rec : reader) { +// tempRec = (IlluminaRecord) rec; +// +// id = new VariantID(tempRec.getChr(), tempRec.getStart()); +// +// VariantRecord value = variantMap.get(id); +// if (null == value) { +// value = new VariantRecord(); +// variantMap.put(id, value); +// } +// value.setIlluminaSNP(tempRec.getSnp()); +// } +// try { +// reader.close(); +// } catch (IOException e) { +// logger.error("IOException caught whilst trying to close IlluminaFileReader", e); +// exitStatus = -1; +// } +// } +// } + + private void convertIlluminaToVariant() { + ChrPosition id; + VariantRecord value; + + // loop through the illumina map converting all entries into the variantMap + for (IlluminaRecord illuminaRec : illuminaMap.values()) { + + // TODO check this !!! + // ignore records that did not have a dbSNP + if (null != illuminaRec.getChr()) { + + id = ChrPointPosition.valueOf(illuminaRec.getChr(), illuminaRec.getStart()); + + value = variantMap.get(id); + if (null == value && illuminaRec.isSnp()) { + // only want to populate our map with illumina data that does not have a corresponding gff or vcf record + // if it contains a snp + value = new VariantRecord(); + variantMap.put(id, value); + } + + if (null != value) { + value.setDbSnpID(illuminaRec.getSnpId()); +// value.setIlluminaAlt(illuminaRec.getRefGenomeRefSNPAllele()); + value.setIlluminaRef(illuminaRec.getSnp()); + value.setIllAllele1(illuminaRec.getFirstAllele()); + value.setIllAllele2(illuminaRec.getSecondAllele()); + value.setIllGCScore(illuminaRec.getGCScore()); + value.setIllTypeHom(illuminaRec.isHom()); + } + } + } + + // clear illuminaMap - no longer required + illuminaMap.clear(); + } + + + private void updateIlluminaRecord(IlluminaRecord illuminaRec, Dbsnp130Record dbSnpRec) { + // standard value setting here... + char dbSnpStrand = dbSnpRec.getStrand().charAt(0); + illuminaRec.setChr(dbSnpRec.getChromosome()); + illuminaRec.setStart(dbSnpRec.getChromosomePosition()); +// illuminaRec.setRefGenomeRefSNPAllele(dbSnpRec.getRefGenome() + "__" + dbSnpRec.getVariant()); + + // now gets a bit more interesting + char strand; + // if illumina alleles are equal to dbsnp alleles + if (BaseUtils.areGenotypesEqual(dbSnpRec.getVariant(), illuminaRec.getSnp())) { + strand = dbSnpStrand; + } else strand = '+' == dbSnpStrand ? '-' : '+'; +// if (illuminaRec.getReference().charAt(1) == dbAlleles.charAt(0) && +// illuminaRec.getReference().charAt(3) == dbAlleles.charAt(2)) { +// strand = dbSnpStrand; +// } else strand = '+' == dbSnpStrand ? '-' : '+'; + + // no longer switch the illumina snp call, but the actual allele data +// if ('-' == strand) +// illuminaRec.setReference(BaseUtils.getComplementFromString(illuminaRec.getReference())); +// else +// illuminaRec.setReference(illuminaRec.getReference().substring(1, illuminaRec.getReference().length()-1)); + if ('-' == strand) { + illuminaRec.setFirstAllele(BaseUtils.getComplement(illuminaRec.getFirstAllele())); + illuminaRec.setSecondAllele(BaseUtils.getComplement(illuminaRec.getSecondAllele())); + } + // trim illumina snp + illuminaRec.setSnp(illuminaRec.getSnp().substring(1, illuminaRec.getSnp().length()-1)); + + // set snp + illuminaRec.setSnp(isSnp(dbSnpRec.getRefGenome(), illuminaRec.getFirstAllele(), illuminaRec.getSecondAllele())); + } + + private boolean isSnp(String ref, char alleleOne, char alleleTwo) { + if (null == ref || DEFAULT_CHAR == alleleOne || DEFAULT_CHAR == alleleTwo) + return false; + return ref.charAt(0) != alleleOne || ref.charAt(0) != alleleTwo; + } +// private boolean isSnp(String ref, String genotype) { +// if (null == ref || null == genotype) +// return false; +// // assume ref is of type A +// // assume genotype is of the form A/G +// return ref.charAt(0) != genotype.charAt(0) || ref.charAt(0) != genotype.charAt(2); +// } + + + private void outputVariantData() { + FileWriter allRecordsWriter = null; + FileWriter nonDbSnpwriter = null; + try { + allRecordsWriter = new FileWriter(new File(cmdLineOutputFiles[0])); // should be the first output file supplied + nonDbSnpwriter = new FileWriter(new File(cmdLineOutputFiles[1])); // should be the second output file supplied + allRecordsWriter.write("#chr\tstart\tdbSNP_id\tstrand\trg_rsa\t" + //dbSNP + "Ill_gc\ta1\ta2\ttype\tref\t" + //illumina + "gff3_ref\talt\tgen" + //gff + "\tvfc_ref\talt\tgen\t" + //vcf + "pileup\t" + //pileup + "posMatch\tgenMatch\n"); //matching + + nonDbSnpwriter.write("#chr\tstart\tdbSNP_id\tstrand\trg_rsa\t" + //dbSNP + "Ill_gc\ta1\ta2\ttype\tref\t" + //illumina + "gff3_ref\talt\tgen" + //gff + "\tvfc_ref\talt\tgen\t" + //vcf + "pileup\n" + //pileup + "posMatch\tgenMatch\n"); //matching + } catch (IOException ioe) { + logger.error("IOException caught whilst outputting data", ioe); + } + + //plonk the data into a TreeMap to bring some order to the proceedings.. + TreeMap sortedVariantMap = new TreeMap(variantMap); + + ChrPosition id; + VariantRecord value; +// String chr; + + for (Map.Entry entry : sortedVariantMap.entrySet()) { + id = entry.getKey(); + value = entry.getValue(); +// chr = ( ! id.getChromosome().startsWith("GL") ? "chr" : "") + id.getChromosome(); + + try { + allRecordsWriter.write(id.getChromosome() + "\t" + + id.getStartPosition() + "\t" + + value.formattedRecord() ); + // only want non dbSNP records + if (null == value.getDbSnpID()) { + nonDbSnpwriter.write(id.getChromosome() + "\t" + + id.getStartPosition() + "\t" + + value.formattedRecord() ); + } + } catch (IOException e) { + logger.error("IOException caught whilst outputting data", e); + } + } + + // close up + try { + allRecordsWriter.close(); + nonDbSnpwriter.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close output files", e); + } + } + + + public static void main(String[] args) throws Exception { + SnpPicker sp = new SnpPicker(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(SnpPicker.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("SnpPicker", SnpPicker.class.getPackage().getImplementationVersion()); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- new file mode 100644 index 000000000..63193c01a --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- @@ -0,0 +1,802 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import htsjdk.samtools.SAMRecord; + +import org.qcmg.chrconv.ChrConvFileReader; +import org.qcmg.chrconv.ChromosomeConversionRecord; +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.Genotype; +import org.qcmg.common.util.BaseUtils; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.VcfUtils; +import org.qcmg.dbsnp.Dbsnp130Record; +import org.qcmg.dbsnp.DbsnpFileReader; +import org.qcmg.qmule.gff3.GFF3FileReader; +import org.qcmg.qmule.gff3.GFF3Record; +import org.qcmg.picard.QJumper; +import org.qcmg.pileup.PileupFileReader; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.record.Record; +import org.qcmg.unused.illumina.IlluminaFileReader; +import org.qcmg.unused.illumina.IlluminaRecord; +import org.qcmg.vcf.VCFFileReader; + +public class SnpPicker { + + private static final char DEFAULT_CHAR = '\u0000'; + private static QLogger logger; +// private static DecimalFormat df = new DecimalFormat("0.0000"); + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + private static boolean isNormal; + +// private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + + Map illuminaMap = new HashMap(1000000,0.99f); // not expecting more than 1000000 + + Map variantMap = new HashMap(2000000); + + // map to hold chromosome conversion data + Map gffToQCMG = new HashMap(100, 0.99f); + +// List illuminaRecords = new ArrayList(); +// List dbSNPRecords = new ArrayList(13000000); + + private int engage() throws Exception { + + // populate the chromosome conversion map + logger.info("about to load chromosome conversion data"); + loadChromosomeConversionData(); + logger.info("about to load chromosome conversion data - DONE"); + + // we are working off the raw illumina data here - first convert it into filtered format, and use that as the input + + logger.info("About to load raw illumina data"); + loadRawIlluminaData(); +// logger.info("No of variant records: " + variantMap.size() + " in file: " + cmdLineInputFiles[0]); + + logger.info("About to load gff3 data"); + loadGff3Data(); + logger.info("No of variant records: " + variantMap.size()); + +// logger.info("About to load vcf data"); +// loadVCFData(); +// logger.info("No of variant records: " + variantMap.size()); + + logger.info("About to load qsnp data"); + loadQSnpData(); + logger.info("No of variant records: " + variantMap.size()); + + + + + logger.info("About to load dbSNP data"); + loadDbSnpData(); +// logger.info("No of variant records: " + variantMap.size()); + + // update variantMap with details from illuminaMap + logger.info("About to load filtered illumina data into variant map"); + convertIlluminaToVariant(); + logger.info("About to load filtered illumina data into variant map - DONE"); + + // get some stats + displayStats(); + + // pileup + logger.info("time for pileup..."); + getPileup(); + logger.info("time for pileup - DONE"); + + // more stats + displayStats2(); + + logger.info("Will now attempt to write out variant data" ); + outputVariantData(); + logger.info("Will now attempt to write out variant data - DONE"); + + return exitStatus; + } + + private void getPileup() throws Exception { + QJumper qj = new QJumper(); + qj.setupReader(cmdLineInputFiles[5], cmdLineInputFiles[6]); + + VariantRecord rec; + StringBuilder pileup = new StringBuilder(); + List reads; +// String chr; + int position; + int offset; + + int pileupCount = 0; + for (Map.Entry entry : variantMap.entrySet()) { + // only want pileup if we have gff or vcf data + rec = entry.getValue(); + if (DEFAULT_CHAR != rec.getGffRef() || null != rec.getVcfGenotype()) { +// chr = ( ! entry.getKey().getChromosome().startsWith("GL") ? "chr" : "") + entry.getKey().getChromosome(); + + reads = qj.getRecordsAtPosition(entry.getKey().getChromosome(), entry.getKey().getStartPosition()); + // do something with the reads + position = entry.getKey().getStartPosition(); + for (SAMRecord sr : reads) { + offset = position - sr.getAlignmentStart(); + pileup.append((char)sr.getReadBases()[offset]); + } + rec.setPileup(pileup.toString()); + + // reset the StringBuilder + pileup.setLength(0); + + if (++pileupCount % 1000 == 0) + logger.info("Run " + pileupCount + " pileups so far, " + reads.size() + " sam records returned from picard"); + } + } + } + + private void loadChromosomeConversionData() { + String chrConvFile = cmdLineInputFiles[4]; + ChrConvFileReader reader = null; + try { + reader = new ChrConvFileReader(new File(chrConvFile)); + } catch (Exception e) { + logger.error("Exception caught whilst trying to instantiate ChrConvFileReader", e); + exitStatus = -1; + } + + if (null != reader) { + for (ChromosomeConversionRecord record : reader) { + // add extra map inserts here as required + // diBayes field is no longer present in chr conv file +// gffToQCMG.put(record.getDiBayes(), record.getQcmg()); + // guessing we want ensemble in here as the key + gffToQCMG.put(record.getEnsembleV55(), record.getQcmg()); + } + + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close ChrConvFileReader", e); + exitStatus = -1; + } + } + } + + private void displayStats() { + int illuminaOnly = 0; + int gff3Only = 0; + int vcfOnly = 0; + int vcfANDgff = 0; + int vcfANDillumina = 0; + int gffANDillumina = 0; + int allThree = 0; + for (VariantRecord record : variantMap.values()) { + + boolean illuminaDataPresent = null != record.getIlluminaRef(); + boolean gffDataPresent = DEFAULT_CHAR != record.getGffRef(); + boolean vcfDataPresent = DEFAULT_CHAR != record.getVcfRef(); + + if (illuminaDataPresent && gffDataPresent && vcfDataPresent) { + allThree++; + record.setPositionMatch("IGV"); + } else if (gffDataPresent && vcfDataPresent) { + vcfANDgff++; + record.setPositionMatch("GV"); + } else if (illuminaDataPresent && vcfDataPresent) { + vcfANDillumina++; + record.setPositionMatch("IV"); + } else if (illuminaDataPresent && gffDataPresent) { + gffANDillumina++; + record.setPositionMatch("IG"); + } else if ( gffDataPresent) { + gff3Only++; + record.setPositionMatch("G"); + }else if ( vcfDataPresent) { + vcfOnly++; + record.setPositionMatch("V"); + }else if ( illuminaDataPresent) { + illuminaOnly++; + record.setPositionMatch("I"); + } + + record.setGenotypeMatch(getGenotypeMatchInfo(record)); + } + + logger.info("allThree: " + allThree); + logger.info("illuminaOnly: " + illuminaOnly); + logger.info("gff3Only: " + gff3Only); + logger.info("vcfANDgff: " + vcfANDgff); + logger.info("vcfANDillumina: " + vcfANDillumina); + logger.info("gffANDillumina: " + gffANDillumina); + logger.info("vcfOnly: " + vcfOnly); + + int total = allThree + illuminaOnly + gff3Only + vcfANDgff + vcfANDillumina + gffANDillumina + vcfOnly; + logger.info("Sum of above numbers: " + total); + logger.info("No of records in map: " + variantMap.size()); + + } + + private void displayStats2() { + final String IGV = "IGV"; + final String IG = "IG"; + final String IV = "IV"; + final String GV = "GV"; + final String I = "I"; + final String G = "G"; + final String V = "V"; + + int positionIGV=0, positionIG=0, positionIV=0, positionGV=0, positionI=0, positionG=0, positionV = 0; + int pIGVgIGV=0, pIGVgIG=0, pIGVgIV=0, pIGVgGV=0; + int pIGgIG=0; + int pIVgIV=0; + int pGVgGV=0; + + + for (VariantRecord record : variantMap.values()) { + + String positionMatch = record.getPositionMatch(); + String genotypeMatch = record.getGenotypeMatch(); + + if (IGV.equals(positionMatch)) { + positionIGV++; + if (IGV.equals(genotypeMatch)) pIGVgIGV++; + else if (IG.equals(genotypeMatch)) pIGVgIG++; + else if (IV.equals(genotypeMatch)) pIGVgIV++; + else if (GV.equals(genotypeMatch)) pIGVgGV++; + + } else if (IG.equals(positionMatch)) { + positionIG++; + if (IG.equals(genotypeMatch)) pIGgIG++; + + } else if (IV.equals(positionMatch)) { + positionIV++; + if (IV.equals(genotypeMatch)) pIVgIV++; + + } else if (GV.equals(positionMatch)) { + positionGV++; + if (GV.equals(genotypeMatch)) pGVgGV++; + + } else if (I.equals(positionMatch)) positionI++; + else if ( G.equals(positionMatch)) positionG++; + else if ( V.equals(positionMatch)) positionV++; + } + + logger.info("position IGV: " + positionIGV + ", genotype IGV: " + pIGVgIGV + ", genotype IG: " + pIGVgIG + ", genotype IV: " + pIGVgIV + ", genotype GV: " + pIGVgGV); + logger.info("position IG: " + positionIG + ", genotype IG: " + pIGgIG); + logger.info("position IV: " + positionIV + ", genotype IV: " + pIVgIV); + logger.info("position GV: " + positionGV + ", genotype GV: " + pGVgGV); + + logger.info("position I: " + positionI); + logger.info("position G: " + positionG); + logger.info("position V: " + positionV); + + int total = positionIGV + positionIG + positionIV + positionGV + positionI + positionG + positionV; + logger.info("Sum of above numbers: " + total); + logger.info("No of records in map: " + variantMap.size()); + + } + + private String getGenotypeMatchInfo(VariantRecord record) { + Genotype illuminaGen = BaseUtils.getGenotype(record.getIllAllele1() , record.getIllAllele2()); +// String illuminaGen = record.getIlluminaRef(); + Genotype gffGen = BaseUtils.getGenotypeFromIUPACCode(record.getGffGenotype()); + Genotype vcfGen = null; + if (DEFAULT_CHAR != record.getVcfAlt()) + vcfGen = BaseUtils.getGenotypeFromVcf(record.getVcfGenotype(), record.getVcfRef(), record.getVcfAlt()); + else + vcfGen = BaseUtils.getGenotype(record.getVcfGenotype()); + + String result = null; + + if (illuminaGen.equals( gffGen) && illuminaGen.equals(vcfGen)) result = "IGV"; + else if (illuminaGen.equals(gffGen)) result = "IG"; + else if (illuminaGen.equals(vcfGen)) result = "IV"; + else if (null != gffGen && gffGen.equals(vcfGen)) result = "GV"; +// if (doStringsMatch(illuminaGen, gffGen) && doStringsMatch(illuminaGen, vcfGen)) result = "IGV"; +// else if (doStringsMatch(illuminaGen, gffGen)) result = "IG"; +// else if (doStringsMatch(illuminaGen, vcfGen)) result = "IV"; +// else if (doStringsMatch(gffGen, vcfGen)) result = "GV"; + + return result; + } + + private boolean doStringsMatch(String a, String b) { + return null == a ? false : a.equals(b); + } + + private void loadDbSnpData() { + // update records with dbsnp info + // should be second of the input files + String dbSNPFile = cmdLineInputFiles[3]; + DbsnpFileReader dbSNPReader = null; + try { + dbSNPReader = new DbsnpFileReader(new File(dbSNPFile)); + } catch (Exception e) { + logger.error("Error caught whilst trying to instantiate DbsnpFileReader", e); + exitStatus = -1; + } + + int updateCount = 0; + int noOfDbSnps = 0; + if (null != dbSNPReader) { + + ChrPosition varId; + VariantRecord varRec; + IlluminaRecord illRec; + int illuminaDbSnpCount = 0; + + for (Dbsnp130Record rec : dbSNPReader) { + // update illumina array with dbSNP details + illRec = illuminaMap.get(rec.getRefSnp()); + if (null != illRec) { + if (null != illRec.getChr()) { + logger.info("illumina rec: " + illRec.getChr() + ":" + illRec.getStart() + ":" + illRec.getSnpId() +" has already been updated - dbSNP: " + rec.getChromosome() + ":" + rec.getChromosomePosition() + ":" + rec.getRefSnp()); + // dbSNP id has more than 1 chr and position - create another IlluminaRecord in the variantMap + //TODO deal with multiple dbSnps for same id here!!! + } else { + updateIlluminaRecord(illRec, rec); + } + illuminaDbSnpCount++; + } + + varId = ChrPointPosition.valueOf(rec.getChromosome(), rec.getChromosomePosition()); + // lookup variant map to see if we have a matching record + varRec = variantMap.get(varId); + if (null == varRec && null != illRec && illRec.isSnp()) { + // don't have an existing record at this position, but we want to put illumina data in here if its a snp + varRec = new VariantRecord(); + variantMap.put(varId, varRec); + } + + if (null != varRec) { + // update required fields + varRec.setDbSnpID(rec.getRefSnp()); + varRec.setDbSnpStrand(rec.getStrand().charAt(0)); + varRec.setDbSnpRef_Alt(rec.getRefGenome() + "__" + rec.getVariant()); + + if (++updateCount % 100000 == 0) + logger.info("updated " + updateCount + " variant records with dbSNP ids"); + } + +// dbSNPRecords.add(rec); + if (++noOfDbSnps % 1000000 == 0) + logger.info("hit " + noOfDbSnps + " dbSnp records"); + } + + logger.info("match count for dbSnp and Illumina: " + illuminaDbSnpCount); + + try { + dbSNPReader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close DbsnpFileReader", e); + exitStatus = -1; + } + } + + logger.info("No of dbSnp records: " + noOfDbSnps + " in file: " + dbSNPFile); + logger.info("No of updated variant records: " + updateCount); + } + + private void loadVCFData() { + String vcfFile = cmdLineInputFiles[2]; + VCFFileReader reader = null; + try { + reader = new VCFFileReader(new File(vcfFile)); + } catch (Exception e) { + logger.error("Error caught whilst trying to instantiate VCFFileReader", e); + exitStatus = -1; + } + + if (null != reader) { + int vcfCount = 0; + ChrPosition id; + VariantRecord value; + + for (VcfRecord rec : reader) { + + id = ChrPointPosition.valueOf(rec.getChromosome(), rec.getPosition()); + + value = variantMap.get(id); + if (null == value) { + value = new VariantRecord(); + variantMap.put(id, value); + } + value.setVcfRef(rec.getRefChar()); + value.setVcfAlt(rec.getAlt().charAt(0)); + value.setVcfGenotype(VcfUtils.getGenotypeFromGATKVCFRecord(rec)); + vcfCount++; + } + logger.info("there were " + vcfCount + " records in the vcf file"); + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close VCFFileReader", e); + exitStatus = -1; + } + } + } + + private void loadQSnpData() { + String qSnpFile = cmdLineInputFiles[2]; + PileupFileReader reader = null; + try { + reader = new PileupFileReader(new File(qSnpFile)); + } catch (Exception e) { + logger.error("Error caught whilst trying to instantiate PileupFileReader", e); + exitStatus = -1; + } + + if (null != reader) { + int vcfCount = 0; + ChrPosition id; + VariantRecord value; + + for (String rec : reader) { +// for (PileupRecord rec : reader) { + // got some work to do here - need to split the pileup attribute to construct the object + String [] params = TabTokenizer.tokenize(rec); +// String [] params = tabbedPattern.split(rec.getPileup(), -1); + + // skip if the tumour genotype is null + String genotype = params[params.length-(isNormal ? 2 : 1)]; + if (null != genotype && ! "null".equals(genotype)) { + + id = ChrPointPosition.valueOf(params[0], Integer.parseInt(params[1])); + + value = variantMap.get(id); + if (null == value) { + value = new VariantRecord(); + variantMap.put(id, value); + } + value.setVcfRef(params[2].charAt(0)); + // value.setVcfAlt(rec.getAlt()); + value.setVcfGenotype(genotype); + vcfCount++; + } + } + logger.info("there were " + vcfCount + " records in the qsnp file"); + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close PileupFileReader", e); + exitStatus = -1; + } + } + } + + private void loadGff3Data() { + String gff3File = cmdLineInputFiles[1]; + GFF3FileReader reader = null; + try { + reader = new GFF3FileReader(new File(gff3File)); + } catch (Exception e) { + logger.error("Exception caught whilst trying to instantiate GFF3FileReader", e); + exitStatus = -1; + } + + if (null != reader) { + int gff3Count = 0; + ChrPosition id; + VariantRecord value; + String chr; + + for (GFF3Record rec : reader) { + // get QCMG chromosome from map + chr = gffToQCMG.get(rec.getSeqId()); + + id = ChrPointPosition.valueOf(chr, rec.getStart()); + + value = variantMap.get(id); + if (null == value) { + value = new VariantRecord(); + variantMap.put(id, value); + } + String attributes = rec.getAttributes(); + char genotype = attributes.charAt(attributes.indexOf("genotype=")+9); + char reference = attributes.charAt(attributes.indexOf("reference=")+10); +// value.setGffAlt(genotype+""); + value.setGffGenotype(genotype); + value.setGffRef(reference); + gff3Count++; + } + logger.info("there were " + gff3Count + " records in the gff3 file"); + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close GFF3FileReader", e); + exitStatus = -1; + } + } + } + + private void loadRawIlluminaData() { + String illuminaFile = cmdLineInputFiles[0]; + + isNormal = illuminaFile.contains("ND_"); + + IlluminaFileReader reader = null; + try { + reader = new IlluminaFileReader(new File(illuminaFile)); + } catch (Exception e) { + logger.error("Error caught whilst trying to instantiate IlluminaFileReader", e); + exitStatus = -1; + } + + if (null != reader) { + IlluminaRecord tempRec; + for (Record rec : reader) { + tempRec = (IlluminaRecord) rec; + illuminaMap.put(tempRec.getSnpId(), tempRec); + } + try { + reader.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close IlluminaFileReader", e); + exitStatus = -1; + } + } + logger.info("Loaded " + illuminaMap.size() + " entries into the illumina map"); + } + +// private void loadIlluminaData() { +// String illuminaFile = cmdLineInputFiles[0]; +// IlluminaFileReader reader = null; +// try { +// reader = new IlluminaFileReader(new File(illuminaFile)); +// } catch (Exception e) { +// logger.error("Error caught whilst trying to instantiate IlluminaFileReader", e); +// exitStatus = -1; +// } +// +// if (null != reader) { +// VariantID id; +// IlluminaRecord tempRec; +// +// for (Record rec : reader) { +// tempRec = (IlluminaRecord) rec; +// +// id = new VariantID(tempRec.getChr(), tempRec.getStart()); +// +// VariantRecord value = variantMap.get(id); +// if (null == value) { +// value = new VariantRecord(); +// variantMap.put(id, value); +// } +// value.setIlluminaSNP(tempRec.getSnp()); +// } +// try { +// reader.close(); +// } catch (IOException e) { +// logger.error("IOException caught whilst trying to close IlluminaFileReader", e); +// exitStatus = -1; +// } +// } +// } + + private void convertIlluminaToVariant() { + ChrPosition id; + VariantRecord value; + + // loop through the illumina map converting all entries into the variantMap + for (IlluminaRecord illuminaRec : illuminaMap.values()) { + + // TODO check this !!! + // ignore records that did not have a dbSNP + if (null != illuminaRec.getChr()) { + + id = ChrPointPosition.valueOf(illuminaRec.getChr(), illuminaRec.getStart()); + + value = variantMap.get(id); + if (null == value && illuminaRec.isSnp()) { + // only want to populate our map with illumina data that does not have a corresponding gff or vcf record + // if it contains a snp + value = new VariantRecord(); + variantMap.put(id, value); + } + + if (null != value) { + value.setDbSnpID(illuminaRec.getSnpId()); +// value.setIlluminaAlt(illuminaRec.getRefGenomeRefSNPAllele()); + value.setIlluminaRef(illuminaRec.getSnp()); + value.setIllAllele1(illuminaRec.getFirstAllele()); + value.setIllAllele2(illuminaRec.getSecondAllele()); + value.setIllGCScore(illuminaRec.getGCScore()); + value.setIllTypeHom(illuminaRec.isHom()); + } + } + } + + // clear illuminaMap - no longer required + illuminaMap.clear(); + } + + + private void updateIlluminaRecord(IlluminaRecord illuminaRec, Dbsnp130Record dbSnpRec) { + // standard value setting here... + char dbSnpStrand = dbSnpRec.getStrand().charAt(0); + illuminaRec.setChr(dbSnpRec.getChromosome()); + illuminaRec.setStart(dbSnpRec.getChromosomePosition()); +// illuminaRec.setRefGenomeRefSNPAllele(dbSnpRec.getRefGenome() + "__" + dbSnpRec.getVariant()); + + // now gets a bit more interesting + char strand; + // if illumina alleles are equal to dbsnp alleles + if (BaseUtils.areGenotypesEqual(dbSnpRec.getVariant(), illuminaRec.getSnp())) { + strand = dbSnpStrand; + } else strand = '+' == dbSnpStrand ? '-' : '+'; +// if (illuminaRec.getReference().charAt(1) == dbAlleles.charAt(0) && +// illuminaRec.getReference().charAt(3) == dbAlleles.charAt(2)) { +// strand = dbSnpStrand; +// } else strand = '+' == dbSnpStrand ? '-' : '+'; + + // no longer switch the illumina snp call, but the actual allele data +// if ('-' == strand) +// illuminaRec.setReference(BaseUtils.getComplementFromString(illuminaRec.getReference())); +// else +// illuminaRec.setReference(illuminaRec.getReference().substring(1, illuminaRec.getReference().length()-1)); + if ('-' == strand) { + illuminaRec.setFirstAllele(BaseUtils.getComplement(illuminaRec.getFirstAllele())); + illuminaRec.setSecondAllele(BaseUtils.getComplement(illuminaRec.getSecondAllele())); + } + // trim illumina snp + illuminaRec.setSnp(illuminaRec.getSnp().substring(1, illuminaRec.getSnp().length()-1)); + + // set snp + illuminaRec.setSnp(isSnp(dbSnpRec.getRefGenome(), illuminaRec.getFirstAllele(), illuminaRec.getSecondAllele())); + } + + private boolean isSnp(String ref, char alleleOne, char alleleTwo) { + if (null == ref || DEFAULT_CHAR == alleleOne || DEFAULT_CHAR == alleleTwo) + return false; + return ref.charAt(0) != alleleOne || ref.charAt(0) != alleleTwo; + } +// private boolean isSnp(String ref, String genotype) { +// if (null == ref || null == genotype) +// return false; +// // assume ref is of type A +// // assume genotype is of the form A/G +// return ref.charAt(0) != genotype.charAt(0) || ref.charAt(0) != genotype.charAt(2); +// } + + + private void outputVariantData() { + FileWriter allRecordsWriter = null; + FileWriter nonDbSnpwriter = null; + try { + allRecordsWriter = new FileWriter(new File(cmdLineOutputFiles[0])); // should be the first output file supplied + nonDbSnpwriter = new FileWriter(new File(cmdLineOutputFiles[1])); // should be the second output file supplied + allRecordsWriter.write("#chr\tstart\tdbSNP_id\tstrand\trg_rsa\t" + //dbSNP + "Ill_gc\ta1\ta2\ttype\tref\t" + //illumina + "gff3_ref\talt\tgen" + //gff + "\tvfc_ref\talt\tgen\t" + //vcf + "pileup\t" + //pileup + "posMatch\tgenMatch\n"); //matching + + nonDbSnpwriter.write("#chr\tstart\tdbSNP_id\tstrand\trg_rsa\t" + //dbSNP + "Ill_gc\ta1\ta2\ttype\tref\t" + //illumina + "gff3_ref\talt\tgen" + //gff + "\tvfc_ref\talt\tgen\t" + //vcf + "pileup\n" + //pileup + "posMatch\tgenMatch\n"); //matching + } catch (IOException ioe) { + logger.error("IOException caught whilst outputting data", ioe); + } + + //plonk the data into a TreeMap to bring some order to the proceedings.. + TreeMap sortedVariantMap = new TreeMap(variantMap); + + ChrPosition id; + VariantRecord value; +// String chr; + + for (Map.Entry entry : sortedVariantMap.entrySet()) { + id = entry.getKey(); + value = entry.getValue(); +// chr = ( ! id.getChromosome().startsWith("GL") ? "chr" : "") + id.getChromosome(); + + try { + allRecordsWriter.write(id.getChromosome() + "\t" + + id.getStartPosition() + "\t" + + value.formattedRecord() ); + // only want non dbSNP records + if (null == value.getDbSnpID()) { + nonDbSnpwriter.write(id.getChromosome() + "\t" + + id.getStartPosition() + "\t" + + value.formattedRecord() ); + } + } catch (IOException e) { + logger.error("IOException caught whilst outputting data", e); + } + } + + // close up + try { + allRecordsWriter.close(); + nonDbSnpwriter.close(); + } catch (IOException e) { + logger.error("IOException caught whilst trying to close output files", e); + } + } + + + public static void main(String[] args) throws Exception { + SnpPicker sp = new SnpPicker(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(SnpPicker.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("SnpPicker", SnpPicker.class.getPackage().getImplementationVersion()); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java b/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java new file mode 100644 index 000000000..7e6275fe1 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java @@ -0,0 +1,200 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Pattern; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.pileup.PileupFileReader; + +public class UniqueQSnps { + + private static final QLogger logger = QLoggerFactory.getLogger(UniqueQSnps.class); + + private static Map qSnpPileup = new HashMap(10000); +// private static Map qSnpPileup = new HashMap(10000); + private static Map gatkVcfs = new HashMap(10000); +// private static Map gatkVcfs = new HashMap(10000); + private static Map verifiedSNPs = new HashMap(500); +// private static Map verifiedSNPs = new HashMap(500); + + private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + + + public static void main(String[] args) throws Exception { + logger.info("hello..."); + + String filename = args[0]; + boolean runQPileup = true; + // filename type depends on whether to load qpileup or vcf + if (FileUtils.isFileTypeValid(filename, "vcf")) { + runQPileup = false; + } + loadVerifiedSnps(args[1]); + logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); + + + if (runQPileup) { + // load the existing pileup into memory + logger.info("running in pileup mode"); + loadQPileup(args[0]); + logger.info("loaded " + qSnpPileup.size() + " entries into the pileup map"); + examine(args[2]); + } else { + logger.info("running in vcf mode"); + loadGatkData(args[0]); + logger.info("loaded " + gatkVcfs.size() + " entries into the vcf map"); + examineVCFs(args[2]); + } + + + // load the existing pileup into memory + + examine(args[2]); + logger.info("goodbye..."); + } + + + private static void examine(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + for (final Map.Entry entry : qSnpPileup.entrySet()) { + ++totalCount; + String verifiedRecord = verifiedSNPs.get(entry.getKey()); +// PileupRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); + String qSnpRecord = entry.getValue(); + + if (null == verifiedRecord) { + String [] params = TabTokenizer.tokenize(qSnpRecord); +// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); + String annotation = params[params.length-1]; + if ("--".equals(annotation)) { + ++uniqueQSnpClassACount; + writer.write(qSnpRecord + "\n"); + } else if ("less than 12 reads coverage in normal".equals(annotation)) { + ++uniqueQSnpClassBCount; + writer.write(qSnpRecord + "\n"); + } + } + } + + writer.close(); + logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); + } + } + + private static void examineVCFs(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + for (final Map.Entry entry : qSnpPileup.entrySet()) { + ++totalCount; + String verifiedRecord = verifiedSNPs.get(entry.getKey()); +// PileupRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); + String qSnpRecord = entry.getValue(); +// PileupRecord qSnpRecord = entry.getValue(); + + if (null == verifiedRecord) { + String [] params = TabTokenizer.tokenize(qSnpRecord); +// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); + String annotation = params[params.length-1]; + if ("--".equals(annotation)) { + ++uniqueQSnpClassACount; + writer.write(qSnpRecord + "\n"); + } else if ("less than 12 reads coverage in normal".equals(annotation)) { + ++uniqueQSnpClassBCount; + writer.write(qSnpRecord + "\n"); + } + } + } + + writer.close(); + logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); + } + } + + + private static void loadQPileup(String pileupFile) throws Exception { + if (FileUtils.canFileBeRead(pileupFile)) { + PileupFileReader reader = new PileupFileReader(new File(pileupFile)); + for (String pr : reader) { +// for (PileupRecord pr : reader) { + String [] params = TabTokenizer.tokenize(pr); +// String [] params = tabbedPattern.split(pr.getPileup()); + String chrPosition = params[params.length-2]; +// logger.info("chrPosition: " + chrPosition); + //TODO refactor to use StringUtils.getChrPositionFromString() + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + + qSnpPileup.put(chrPos,pr); + } + reader.close(); + } + } + + private static void loadGatkData(String pileupFile) throws Exception { + if (FileUtils.canFileBeRead(pileupFile)) { + PileupFileReader reader = new PileupFileReader(new File(pileupFile)); + for (String pr : reader) { +// for (PileupRecord pr : reader) { + String [] params = TabTokenizer.tokenize(pr); +// String [] params = tabbedPattern.split(pr.getPileup()); + String chrPosition = params[params.length-2]; +// logger.info("chrPosition: " + chrPosition); + //TODO refactor to use StringUtils.getChrPositionFromString() + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + + gatkVcfs.put(chrPos,pr); + } + reader.close(); + } + } + + private static void loadVerifiedSnps(String verifiedSnpFile) throws Exception { + if (FileUtils.canFileBeRead(verifiedSnpFile)) { + + PileupFileReader reader = new PileupFileReader(new File(verifiedSnpFile)); + for (String pr : reader) { +// for (PileupRecord pr : reader) { + String [] params = TabTokenizer.tokenize(pr); +// String [] params = tabbedPattern.split(pr.getPileup()); + String chrPosition = params[2]; +// logger.info("chrPosition: " + chrPosition); + //TODO refactor to use StringUtils.getChrPositionFromString() + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + + verifiedSNPs.put(chrPos,pr); + } + reader.close(); + } + } + +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- new file mode 100644 index 000000000..7e6275fe1 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- @@ -0,0 +1,200 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Pattern; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.util.TabTokenizer; +import org.qcmg.pileup.PileupFileReader; + +public class UniqueQSnps { + + private static final QLogger logger = QLoggerFactory.getLogger(UniqueQSnps.class); + + private static Map qSnpPileup = new HashMap(10000); +// private static Map qSnpPileup = new HashMap(10000); + private static Map gatkVcfs = new HashMap(10000); +// private static Map gatkVcfs = new HashMap(10000); + private static Map verifiedSNPs = new HashMap(500); +// private static Map verifiedSNPs = new HashMap(500); + + private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + + + public static void main(String[] args) throws Exception { + logger.info("hello..."); + + String filename = args[0]; + boolean runQPileup = true; + // filename type depends on whether to load qpileup or vcf + if (FileUtils.isFileTypeValid(filename, "vcf")) { + runQPileup = false; + } + loadVerifiedSnps(args[1]); + logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); + + + if (runQPileup) { + // load the existing pileup into memory + logger.info("running in pileup mode"); + loadQPileup(args[0]); + logger.info("loaded " + qSnpPileup.size() + " entries into the pileup map"); + examine(args[2]); + } else { + logger.info("running in vcf mode"); + loadGatkData(args[0]); + logger.info("loaded " + gatkVcfs.size() + " entries into the vcf map"); + examineVCFs(args[2]); + } + + + // load the existing pileup into memory + + examine(args[2]); + logger.info("goodbye..."); + } + + + private static void examine(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + for (final Map.Entry entry : qSnpPileup.entrySet()) { + ++totalCount; + String verifiedRecord = verifiedSNPs.get(entry.getKey()); +// PileupRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); + String qSnpRecord = entry.getValue(); + + if (null == verifiedRecord) { + String [] params = TabTokenizer.tokenize(qSnpRecord); +// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); + String annotation = params[params.length-1]; + if ("--".equals(annotation)) { + ++uniqueQSnpClassACount; + writer.write(qSnpRecord + "\n"); + } else if ("less than 12 reads coverage in normal".equals(annotation)) { + ++uniqueQSnpClassBCount; + writer.write(qSnpRecord + "\n"); + } + } + } + + writer.close(); + logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); + } + } + + private static void examineVCFs(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + for (final Map.Entry entry : qSnpPileup.entrySet()) { + ++totalCount; + String verifiedRecord = verifiedSNPs.get(entry.getKey()); +// PileupRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); + String qSnpRecord = entry.getValue(); +// PileupRecord qSnpRecord = entry.getValue(); + + if (null == verifiedRecord) { + String [] params = TabTokenizer.tokenize(qSnpRecord); +// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); + String annotation = params[params.length-1]; + if ("--".equals(annotation)) { + ++uniqueQSnpClassACount; + writer.write(qSnpRecord + "\n"); + } else if ("less than 12 reads coverage in normal".equals(annotation)) { + ++uniqueQSnpClassBCount; + writer.write(qSnpRecord + "\n"); + } + } + } + + writer.close(); + logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); + } + } + + + private static void loadQPileup(String pileupFile) throws Exception { + if (FileUtils.canFileBeRead(pileupFile)) { + PileupFileReader reader = new PileupFileReader(new File(pileupFile)); + for (String pr : reader) { +// for (PileupRecord pr : reader) { + String [] params = TabTokenizer.tokenize(pr); +// String [] params = tabbedPattern.split(pr.getPileup()); + String chrPosition = params[params.length-2]; +// logger.info("chrPosition: " + chrPosition); + //TODO refactor to use StringUtils.getChrPositionFromString() + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + + qSnpPileup.put(chrPos,pr); + } + reader.close(); + } + } + + private static void loadGatkData(String pileupFile) throws Exception { + if (FileUtils.canFileBeRead(pileupFile)) { + PileupFileReader reader = new PileupFileReader(new File(pileupFile)); + for (String pr : reader) { +// for (PileupRecord pr : reader) { + String [] params = TabTokenizer.tokenize(pr); +// String [] params = tabbedPattern.split(pr.getPileup()); + String chrPosition = params[params.length-2]; +// logger.info("chrPosition: " + chrPosition); + //TODO refactor to use StringUtils.getChrPositionFromString() + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + + gatkVcfs.put(chrPos,pr); + } + reader.close(); + } + } + + private static void loadVerifiedSnps(String verifiedSnpFile) throws Exception { + if (FileUtils.canFileBeRead(verifiedSnpFile)) { + + PileupFileReader reader = new PileupFileReader(new File(verifiedSnpFile)); + for (String pr : reader) { +// for (PileupRecord pr : reader) { + String [] params = TabTokenizer.tokenize(pr); +// String [] params = tabbedPattern.split(pr.getPileup()); + String chrPosition = params[2]; +// logger.info("chrPosition: " + chrPosition); + //TODO refactor to use StringUtils.getChrPositionFromString() + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + + verifiedSNPs.put(chrPos,pr); + } + reader.close(); + } + } + +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java b/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java new file mode 100644 index 000000000..4ac4d5586 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java @@ -0,0 +1,263 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Pattern; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class UniqueSnps { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + + private static QLogger logger; + +// private static Map qSnpPileup = new HashMap(10000); +// private static Map gatkVcfs = new HashMap(10000); + private static Map verifiedSNPs = new HashMap(500); + private static Map unVerifiedSNPs = new HashMap(10000); + + private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + + + public int engage() throws Exception { + logger.info("hello..."); + + loadVerifiedSnps(cmdLineInputFiles[1]); + logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); + if (verifiedSNPs.isEmpty()) exitStatus = 1; + + loadUnverifiedSnps(cmdLineInputFiles[0]); + logger.info("loaded " + unVerifiedSNPs.size() + " entries into the un-verifiedSNPs map"); + if (unVerifiedSNPs.isEmpty()) exitStatus = 1; + + +// examine(args[2]); +// if (runQPileup) { +// // load the existing pileup into memory +// logger.info("running in pileup mode"); +// loadUnverifiedSnps(args[0]); +// logger.info("loaded " + qSnpPileup.size() + " entries into the pileup map"); +// } else { +// logger.info("running in vcf mode"); +// loadGatkData(args[0]); +// logger.info("loaded " + gatkVcfs.size() + " entries into the vcf map"); +// examineVCFs(args[2]); +// } + + + // load the existing pileup into memory + + examine(cmdLineOutputFiles[0]); + logger.info("goodbye..."); + + return exitStatus; + } + + + private static void examine(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + int totalCount = 0, uniqueClassA = 0, uniqueClassB = 0, uniqueClassC = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + try { + for (final Map.Entry unVerifiedEntry : unVerifiedSNPs.entrySet()) { + TabbedRecord unVerifiedRecord = unVerifiedEntry.getValue(); + String [] params = tabbedPattern.split(unVerifiedRecord.getData()); + String consequenceType = params[22]; + if (consequenceType.contains("STOP") || consequenceType.contains("NON_SYNONYMOUS")) { + + ++totalCount; + + TabbedRecord verifiedRecord = verifiedSNPs.get(unVerifiedEntry.getKey()); + + if (null == verifiedRecord) { + String annotation = params[params.length-1]; + if ("--".equals(annotation)) { + ++uniqueClassA; + writer.write(unVerifiedRecord.getData() + "\n"); + } else if ("less than 12 reads coverage in normal".equals(annotation) + || "less than 3 reads coverage in normal".equals(annotation)) { + ++uniqueClassB; + writer.write(unVerifiedRecord.getData() + "\n"); + } + } + } + } + } finally { + writer.close(); + } + logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueClassA + ", uniqueQSnpCount (class B): " + uniqueClassB ); + } + } + +// private static void examineVCFs(String outputFile) throws IOException { +// if (FileUtils.canFileBeWrittenTo(outputFile)) { +// +// int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; +// +// FileWriter writer = new FileWriter(new File(outputFile)); +// +// // loop through the verified snps +// +// for (final Map.Entry entry : qSnpPileup.entrySet()) { +// ++totalCount; +// TabbedRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); +// TabbedRecord qSnpRecord = entry.getValue(); +// +// if (null == verifiedRecord) { +// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); +// String annotation = params[params.length-1]; +// if ("--".equals(annotation)) { +// ++uniqueQSnpClassACount; +// writer.write(qSnpRecord.getPileup() + "\n"); +// } else if ("less than 12 reads coverage in normal".equals(annotation)) { +// ++uniqueQSnpClassBCount; +// writer.write(qSnpRecord.getPileup() + "\n"); +// } +// } +// } +// +// writer.close(); +// logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); +// } +// } + + + private static void loadUnverifiedSnps(String file) throws Exception { + if (FileUtils.canFileBeRead(file)) { + TabbedFileReader reader = new TabbedFileReader(new File(file)); + try { + for (TabbedRecord tr : reader) { + String [] params = tabbedPattern.split(tr.getData()); + String chrPosition = params[params.length-2]; +// logger.info("chrPosition: " + chrPosition); + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + unVerifiedSNPs.put(chrPos,tr); + } + } finally { + reader.close(); + } + } + } + +// private static void loadGatkData(String pileupFile) throws IOException { +// if (FileUtils.canFileBeRead(pileupFile)) { +// TabbedFileReader reader = new TabbedFileReader(new File(pileupFile)); +// for (TabbedRecord pr : reader) { +// String [] params = tabbedPattern.split(pr.getPileup()); +// String chrPosition = params[params.length-2]; +//// logger.info("chrPosition: " + chrPosition); +// ChrPosition chrPos = new ChrPosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-")))); +// +// gatkVcfs.put(chrPos,pr); +// } +// reader.close(); +// } +// } + + private void loadVerifiedSnps(String verifiedSnpFile) throws Exception { + if (FileUtils.canFileBeRead(verifiedSnpFile)) { + + TabbedFileReader reader = new TabbedFileReader(new File(verifiedSnpFile)); + try { + for (TabbedRecord tr : reader) { + String [] params = tabbedPattern.split(tr.getData()); + String chrPosition = params[2]; + // logger.info("chrPosition: " + chrPosition); + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1),start, start); + + verifiedSNPs.put(chrPos,tr); + } + } finally { + reader.close(); + } + } + } + + public static void main(String[] args) throws Exception { + UniqueSnps sp = new UniqueSnps(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(UniqueSnps.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("UniqueSnps", UniqueSnps.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- new file mode 100644 index 000000000..4ac4d5586 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- @@ -0,0 +1,263 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Pattern; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class UniqueSnps { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + + private static QLogger logger; + +// private static Map qSnpPileup = new HashMap(10000); +// private static Map gatkVcfs = new HashMap(10000); + private static Map verifiedSNPs = new HashMap(500); + private static Map unVerifiedSNPs = new HashMap(10000); + + private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + + + public int engage() throws Exception { + logger.info("hello..."); + + loadVerifiedSnps(cmdLineInputFiles[1]); + logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); + if (verifiedSNPs.isEmpty()) exitStatus = 1; + + loadUnverifiedSnps(cmdLineInputFiles[0]); + logger.info("loaded " + unVerifiedSNPs.size() + " entries into the un-verifiedSNPs map"); + if (unVerifiedSNPs.isEmpty()) exitStatus = 1; + + +// examine(args[2]); +// if (runQPileup) { +// // load the existing pileup into memory +// logger.info("running in pileup mode"); +// loadUnverifiedSnps(args[0]); +// logger.info("loaded " + qSnpPileup.size() + " entries into the pileup map"); +// } else { +// logger.info("running in vcf mode"); +// loadGatkData(args[0]); +// logger.info("loaded " + gatkVcfs.size() + " entries into the vcf map"); +// examineVCFs(args[2]); +// } + + + // load the existing pileup into memory + + examine(cmdLineOutputFiles[0]); + logger.info("goodbye..."); + + return exitStatus; + } + + + private static void examine(String outputFile) throws IOException { + if (FileUtils.canFileBeWrittenTo(outputFile)) { + + int totalCount = 0, uniqueClassA = 0, uniqueClassB = 0, uniqueClassC = 0; + + FileWriter writer = new FileWriter(new File(outputFile)); + + // loop through the verified snps + + try { + for (final Map.Entry unVerifiedEntry : unVerifiedSNPs.entrySet()) { + TabbedRecord unVerifiedRecord = unVerifiedEntry.getValue(); + String [] params = tabbedPattern.split(unVerifiedRecord.getData()); + String consequenceType = params[22]; + if (consequenceType.contains("STOP") || consequenceType.contains("NON_SYNONYMOUS")) { + + ++totalCount; + + TabbedRecord verifiedRecord = verifiedSNPs.get(unVerifiedEntry.getKey()); + + if (null == verifiedRecord) { + String annotation = params[params.length-1]; + if ("--".equals(annotation)) { + ++uniqueClassA; + writer.write(unVerifiedRecord.getData() + "\n"); + } else if ("less than 12 reads coverage in normal".equals(annotation) + || "less than 3 reads coverage in normal".equals(annotation)) { + ++uniqueClassB; + writer.write(unVerifiedRecord.getData() + "\n"); + } + } + } + } + } finally { + writer.close(); + } + logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueClassA + ", uniqueQSnpCount (class B): " + uniqueClassB ); + } + } + +// private static void examineVCFs(String outputFile) throws IOException { +// if (FileUtils.canFileBeWrittenTo(outputFile)) { +// +// int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; +// +// FileWriter writer = new FileWriter(new File(outputFile)); +// +// // loop through the verified snps +// +// for (final Map.Entry entry : qSnpPileup.entrySet()) { +// ++totalCount; +// TabbedRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); +// TabbedRecord qSnpRecord = entry.getValue(); +// +// if (null == verifiedRecord) { +// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); +// String annotation = params[params.length-1]; +// if ("--".equals(annotation)) { +// ++uniqueQSnpClassACount; +// writer.write(qSnpRecord.getPileup() + "\n"); +// } else if ("less than 12 reads coverage in normal".equals(annotation)) { +// ++uniqueQSnpClassBCount; +// writer.write(qSnpRecord.getPileup() + "\n"); +// } +// } +// } +// +// writer.close(); +// logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); +// } +// } + + + private static void loadUnverifiedSnps(String file) throws Exception { + if (FileUtils.canFileBeRead(file)) { + TabbedFileReader reader = new TabbedFileReader(new File(file)); + try { + for (TabbedRecord tr : reader) { + String [] params = tabbedPattern.split(tr.getData()); + String chrPosition = params[params.length-2]; +// logger.info("chrPosition: " + chrPosition); + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + unVerifiedSNPs.put(chrPos,tr); + } + } finally { + reader.close(); + } + } + } + +// private static void loadGatkData(String pileupFile) throws IOException { +// if (FileUtils.canFileBeRead(pileupFile)) { +// TabbedFileReader reader = new TabbedFileReader(new File(pileupFile)); +// for (TabbedRecord pr : reader) { +// String [] params = tabbedPattern.split(pr.getPileup()); +// String chrPosition = params[params.length-2]; +//// logger.info("chrPosition: " + chrPosition); +// ChrPosition chrPos = new ChrPosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-")))); +// +// gatkVcfs.put(chrPos,pr); +// } +// reader.close(); +// } +// } + + private void loadVerifiedSnps(String verifiedSnpFile) throws Exception { + if (FileUtils.canFileBeRead(verifiedSnpFile)) { + + TabbedFileReader reader = new TabbedFileReader(new File(verifiedSnpFile)); + try { + for (TabbedRecord tr : reader) { + String [] params = tabbedPattern.split(tr.getData()); + String chrPosition = params[2]; + // logger.info("chrPosition: " + chrPosition); + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1),start, start); + + verifiedSNPs.put(chrPos,tr); + } + } finally { + reader.close(); + } + } + } + + public static void main(String[] args) throws Exception { + UniqueSnps sp = new UniqueSnps(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(UniqueSnps.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("UniqueSnps", UniqueSnps.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java b/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java new file mode 100644 index 000000000..eefbdd9ed --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java @@ -0,0 +1,193 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.text.DecimalFormat; + +public class VariantRecord { + + private final static char DEFAULT_CHAR = '\u0000'; + private final static DecimalFormat df = new DecimalFormat("0.0000"); + + private String dbSnpID; + private char dbSnpStrand; + private String dbSnpRef_Alt; + private float illGCScore; + private char illAllele1; + private char illAllele2; + private boolean illTypeHom; + private String illuminaRef; +// private String illuminaAlt; + private String illuminaSNP; + private char gffRef; + private char gffGenotype; + private String gffAlt; + private char vcfRef; + private char vcfAlt; + private String vcfGenotype; + private String pileup; + private String positionMatch; + private String genotypeMatch; + + public String getDbSnpID() { + return dbSnpID; + } + public void setDbSnpID(String dbSnpID) { + this.dbSnpID = dbSnpID; + } + public String getIlluminaRef() { + return illuminaRef; + } + public void setIlluminaRef(String illuminaRef) { + this.illuminaRef = illuminaRef; + } +// public String getIlluminaAlt() { +// return illuminaAlt; +// } +// public void setIlluminaAlt(String illuminaAlt) { +// this.illuminaAlt = illuminaAlt; +// } + public char getGffRef() { + return gffRef; + } + public void setGffRef(char gffRef) { + this.gffRef = gffRef; + } + public char getGffGenotype() { + return gffGenotype; + } + public void setGffGenotype(char gffGenotype) { + this.gffGenotype = gffGenotype; + } + public String getGffAlt() { + return gffAlt; + } + public void setGffAlt(String gffAlt) { + this.gffAlt = gffAlt; + } + public char getVcfRef() { + return vcfRef; + } + public void setVcfRef(char vcfRef) { + this.vcfRef = vcfRef; + } + public char getVcfAlt() { + return vcfAlt; + } + public void setVcfAlt(char vcfAlt) { + this.vcfAlt = vcfAlt; + } + public String getVcfGenotype() { + return vcfGenotype; + } + public void setVcfGenotype(String vcfGenotype) { + this.vcfGenotype = vcfGenotype; + } + public void setIlluminaSNP(String illuminaSNP) { + this.illuminaSNP = illuminaSNP; + } + public String getIlluminaSNP() { + return illuminaSNP; + } + + public String formattedRecord() { + StringBuilder sb = new StringBuilder(); + + sb.append(null != dbSnpID ? dbSnpID : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != dbSnpStrand ? dbSnpStrand : ""); + sb.append("\t"); + sb.append(null != dbSnpRef_Alt ? dbSnpRef_Alt : ""); + sb.append("\t"); + sb.append(illGCScore != 0.0f ? df.format(illGCScore) : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != illAllele1 ? illAllele1 : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != illAllele2 ? illAllele2 : ""); + sb.append("\t"); + sb.append(null != illuminaRef ? (illTypeHom ? "hom" : "het") : ""); + sb.append("\t"); + sb.append(null != illuminaRef ? illuminaRef : ""); + sb.append("\t"); +// sb.append(null != illuminaAlt ? illuminaAlt : ""); +// sb.append("\t"); +// sb.append(null != illuminaSNP ? illuminaSNP : ""); +// sb.append("\t"); + sb.append(DEFAULT_CHAR != gffRef ? gffRef : ""); + sb.append("\t"); + sb.append(null != gffAlt ? gffAlt : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != gffGenotype ? gffGenotype : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != vcfRef ? vcfRef : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != vcfAlt ? vcfAlt: ""); + sb.append("\t"); + sb.append(null != vcfGenotype ? vcfGenotype: ""); + sb.append("\t"); + sb.append(null != pileup ? pileup: ""); + sb.append("\t"); + sb.append(null != positionMatch ? positionMatch: ""); + sb.append("\t"); + sb.append(null != genotypeMatch ? genotypeMatch: ""); + sb.append("\n"); + + return sb.toString(); + } + public float getIllGCScore() { + return illGCScore; + } + public void setIllGCScore(float illGCScore) { + this.illGCScore = illGCScore; + } + public char getIllAllele1() { + return illAllele1; + } + public void setIllAllele1(char illAllele1) { + this.illAllele1 = illAllele1; + } + public char getIllAllele2() { + return illAllele2; + } + public void setIllAllele2(char illAllele2) { + this.illAllele2 = illAllele2; + } + public boolean isIllTypeHom() { + return illTypeHom; + } + public void setIllTypeHom(boolean illTypeHom) { + this.illTypeHom = illTypeHom; + } + public char getDbSnpStrand() { + return dbSnpStrand; + } + public void setDbSnpStrand(char dbSnpStrand) { + this.dbSnpStrand = dbSnpStrand; + } + public String getDbSnpRef_Alt() { + return dbSnpRef_Alt; + } + public void setDbSnpRef_Alt(String dbSnpRefAlt) { + dbSnpRef_Alt = dbSnpRefAlt; + } + public void setPileup(String pileup) { + this.pileup = pileup; + } + public String getPileup(String pileup) { + return pileup; + } + public String getPositionMatch() { + return positionMatch; + } + public void setPositionMatch(String positionMatch) { + this.positionMatch = positionMatch; + } + public String getGenotypeMatch() { + return genotypeMatch; + } + public void setGenotypeMatch(String genotypeMatch) { + this.genotypeMatch = genotypeMatch; + } + +} diff --git a/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- b/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- new file mode 100644 index 000000000..eefbdd9ed --- /dev/null +++ b/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- @@ -0,0 +1,193 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.snppicker; + +import java.text.DecimalFormat; + +public class VariantRecord { + + private final static char DEFAULT_CHAR = '\u0000'; + private final static DecimalFormat df = new DecimalFormat("0.0000"); + + private String dbSnpID; + private char dbSnpStrand; + private String dbSnpRef_Alt; + private float illGCScore; + private char illAllele1; + private char illAllele2; + private boolean illTypeHom; + private String illuminaRef; +// private String illuminaAlt; + private String illuminaSNP; + private char gffRef; + private char gffGenotype; + private String gffAlt; + private char vcfRef; + private char vcfAlt; + private String vcfGenotype; + private String pileup; + private String positionMatch; + private String genotypeMatch; + + public String getDbSnpID() { + return dbSnpID; + } + public void setDbSnpID(String dbSnpID) { + this.dbSnpID = dbSnpID; + } + public String getIlluminaRef() { + return illuminaRef; + } + public void setIlluminaRef(String illuminaRef) { + this.illuminaRef = illuminaRef; + } +// public String getIlluminaAlt() { +// return illuminaAlt; +// } +// public void setIlluminaAlt(String illuminaAlt) { +// this.illuminaAlt = illuminaAlt; +// } + public char getGffRef() { + return gffRef; + } + public void setGffRef(char gffRef) { + this.gffRef = gffRef; + } + public char getGffGenotype() { + return gffGenotype; + } + public void setGffGenotype(char gffGenotype) { + this.gffGenotype = gffGenotype; + } + public String getGffAlt() { + return gffAlt; + } + public void setGffAlt(String gffAlt) { + this.gffAlt = gffAlt; + } + public char getVcfRef() { + return vcfRef; + } + public void setVcfRef(char vcfRef) { + this.vcfRef = vcfRef; + } + public char getVcfAlt() { + return vcfAlt; + } + public void setVcfAlt(char vcfAlt) { + this.vcfAlt = vcfAlt; + } + public String getVcfGenotype() { + return vcfGenotype; + } + public void setVcfGenotype(String vcfGenotype) { + this.vcfGenotype = vcfGenotype; + } + public void setIlluminaSNP(String illuminaSNP) { + this.illuminaSNP = illuminaSNP; + } + public String getIlluminaSNP() { + return illuminaSNP; + } + + public String formattedRecord() { + StringBuilder sb = new StringBuilder(); + + sb.append(null != dbSnpID ? dbSnpID : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != dbSnpStrand ? dbSnpStrand : ""); + sb.append("\t"); + sb.append(null != dbSnpRef_Alt ? dbSnpRef_Alt : ""); + sb.append("\t"); + sb.append(illGCScore != 0.0f ? df.format(illGCScore) : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != illAllele1 ? illAllele1 : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != illAllele2 ? illAllele2 : ""); + sb.append("\t"); + sb.append(null != illuminaRef ? (illTypeHom ? "hom" : "het") : ""); + sb.append("\t"); + sb.append(null != illuminaRef ? illuminaRef : ""); + sb.append("\t"); +// sb.append(null != illuminaAlt ? illuminaAlt : ""); +// sb.append("\t"); +// sb.append(null != illuminaSNP ? illuminaSNP : ""); +// sb.append("\t"); + sb.append(DEFAULT_CHAR != gffRef ? gffRef : ""); + sb.append("\t"); + sb.append(null != gffAlt ? gffAlt : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != gffGenotype ? gffGenotype : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != vcfRef ? vcfRef : ""); + sb.append("\t"); + sb.append(DEFAULT_CHAR != vcfAlt ? vcfAlt: ""); + sb.append("\t"); + sb.append(null != vcfGenotype ? vcfGenotype: ""); + sb.append("\t"); + sb.append(null != pileup ? pileup: ""); + sb.append("\t"); + sb.append(null != positionMatch ? positionMatch: ""); + sb.append("\t"); + sb.append(null != genotypeMatch ? genotypeMatch: ""); + sb.append("\n"); + + return sb.toString(); + } + public float getIllGCScore() { + return illGCScore; + } + public void setIllGCScore(float illGCScore) { + this.illGCScore = illGCScore; + } + public char getIllAllele1() { + return illAllele1; + } + public void setIllAllele1(char illAllele1) { + this.illAllele1 = illAllele1; + } + public char getIllAllele2() { + return illAllele2; + } + public void setIllAllele2(char illAllele2) { + this.illAllele2 = illAllele2; + } + public boolean isIllTypeHom() { + return illTypeHom; + } + public void setIllTypeHom(boolean illTypeHom) { + this.illTypeHom = illTypeHom; + } + public char getDbSnpStrand() { + return dbSnpStrand; + } + public void setDbSnpStrand(char dbSnpStrand) { + this.dbSnpStrand = dbSnpStrand; + } + public String getDbSnpRef_Alt() { + return dbSnpRef_Alt; + } + public void setDbSnpRef_Alt(String dbSnpRefAlt) { + dbSnpRef_Alt = dbSnpRefAlt; + } + public void setPileup(String pileup) { + this.pileup = pileup; + } + public String getPileup(String pileup) { + return pileup; + } + public String getPositionMatch() { + return positionMatch; + } + public void setPositionMatch(String positionMatch) { + this.positionMatch = positionMatch; + } + public String getGenotypeMatch() { + return genotypeMatch; + } + public void setGenotypeMatch(String genotypeMatch) { + this.genotypeMatch = genotypeMatch; + } + +} diff --git a/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java b/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java new file mode 100644 index 000000000..3a1e039aa --- /dev/null +++ b/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java @@ -0,0 +1,78 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.util; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.List; + +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.util.FileUtils; + +public class IGVBatchFileGenerator { + + public static final String GENOME = "GRCh37_ICGC_standard_v2"; + + + public static void generate(final List positions, final String outputFile) throws IOException { + // check that list is not empty + if (positions == null || positions.isEmpty()) + throw new IllegalArgumentException("Null or empty list passed to IGVBatchFileGenerator"); + + // can we write to the outputFile? + File output = new File(outputFile); + if( ! FileUtils.canFileBeWrittenTo(output)) + throw new IllegalArgumentException("Can't write to output file: " + outputFile); + + FileWriter writer = new FileWriter(output); + + try { + writer.write(getHeaderInfo(output)); + + for (ChrPosition position : positions) { + writer.write(getLocationString(position)); + } + + } finally { + writer.close(); + } + + } + + private static String getHeaderInfo(File output) { + String path = output.getParent(); + return "snapshotDirectory " + path + "\n" + + "genome " + GENOME + "\n"; + } + + private static String getLocationString(ChrPosition chrPos) { + return "goto " + chrPos.toIGVString() + + "\nsort base\n" + + "collapse\n" + + "snapshot " + chrPos.getChromosome() + ":" + chrPos.getStartPosition() + ".png\n"; + } + + + +// snapshotDirectory C:/IGV_sessions/exonorama/APGI_1992 +// genome GRCh37_ICGC_standard_v2 +// goto chr8:93156526-93156566 +// sort base +// collapse +// snapshot APGI_1992_SNP_35325-chr8-93156546-var-CtoT-WITHIN_NON_CODING_GENE-ENSG00000233778.png +// goto chr12:114377865-114377905 +// sort base +// collapse +// snapshot APGI_1992_SNP_50905-chr12-114377885-var-GtoC-SYNONYMOUS_CODING-RBM19.png +// goto chr1:228481880-228481920 +// sort base +// collapse +// snapshot APGI_1992_SNP_6964-chr1-228481900-var-GtoA-NON_SYNONYMOUS_CODING-OBSCN.png + + +} diff --git a/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- b/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- new file mode 100644 index 000000000..3a1e039aa --- /dev/null +++ b/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- @@ -0,0 +1,78 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.util; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.List; + +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.util.FileUtils; + +public class IGVBatchFileGenerator { + + public static final String GENOME = "GRCh37_ICGC_standard_v2"; + + + public static void generate(final List positions, final String outputFile) throws IOException { + // check that list is not empty + if (positions == null || positions.isEmpty()) + throw new IllegalArgumentException("Null or empty list passed to IGVBatchFileGenerator"); + + // can we write to the outputFile? + File output = new File(outputFile); + if( ! FileUtils.canFileBeWrittenTo(output)) + throw new IllegalArgumentException("Can't write to output file: " + outputFile); + + FileWriter writer = new FileWriter(output); + + try { + writer.write(getHeaderInfo(output)); + + for (ChrPosition position : positions) { + writer.write(getLocationString(position)); + } + + } finally { + writer.close(); + } + + } + + private static String getHeaderInfo(File output) { + String path = output.getParent(); + return "snapshotDirectory " + path + "\n" + + "genome " + GENOME + "\n"; + } + + private static String getLocationString(ChrPosition chrPos) { + return "goto " + chrPos.toIGVString() + + "\nsort base\n" + + "collapse\n" + + "snapshot " + chrPos.getChromosome() + ":" + chrPos.getStartPosition() + ".png\n"; + } + + + +// snapshotDirectory C:/IGV_sessions/exonorama/APGI_1992 +// genome GRCh37_ICGC_standard_v2 +// goto chr8:93156526-93156566 +// sort base +// collapse +// snapshot APGI_1992_SNP_35325-chr8-93156546-var-CtoT-WITHIN_NON_CODING_GENE-ENSG00000233778.png +// goto chr12:114377865-114377905 +// sort base +// collapse +// snapshot APGI_1992_SNP_50905-chr12-114377885-var-GtoC-SYNONYMOUS_CODING-RBM19.png +// goto chr1:228481880-228481920 +// sort base +// collapse +// snapshot APGI_1992_SNP_6964-chr1-228481900-var-GtoA-NON_SYNONYMOUS_CODING-OBSCN.png + + +} diff --git a/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java b/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java new file mode 100644 index 000000000..60389d85b --- /dev/null +++ b/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java @@ -0,0 +1,61 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.util; + +import java.io.File; +import java.util.Map; +import java.util.regex.Pattern; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.string.StringUtils; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class TabbedDataLoader { + + public static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + private static final QLogger logger = QLoggerFactory.getLogger(TabbedDataLoader.class); + + + public static void loadTabbedData(String tabbedDataFile, int position, Map collection) throws Exception { + if (FileUtils.canFileBeRead(tabbedDataFile)) { + + TabbedFileReader reader = new TabbedFileReader(new File(tabbedDataFile)); + try { + for (TabbedRecord tr : reader) { + String [] params = tabbedPattern.split(tr.getData()); + String chrPosition = getStringFromArray(params, position); + + if (null != chrPosition) { + ChrPosition chrPos = StringUtils.getChrPositionFromString(chrPosition); + if (null != chrPos) collection.put(chrPos,tr); + } + } + + logger.info("Added " + collection.size() + " entries to the tabbed data collection"); + + } finally { + reader.close(); + } + } else { + throw new IllegalArgumentException("data file: " + tabbedDataFile + " could not be read"); + } + } + + public static String getStringFromArray(String[] params, int index) { + String result = null; + if (null != params && params.length > 0) { + if (index >= 0) { + result = params[(index > params.length ? params.length : index)]; + } else if (params.length + index >= 0 & params.length + index < params.length){ + result = params[params.length + index]; // adding a negative number! + } + } + return result; + } + +} diff --git a/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- b/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- new file mode 100644 index 000000000..60389d85b --- /dev/null +++ b/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- @@ -0,0 +1,61 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.util; + +import java.io.File; +import java.util.Map; +import java.util.regex.Pattern; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.string.StringUtils; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class TabbedDataLoader { + + public static final Pattern tabbedPattern = Pattern.compile("[\\t]"); + private static final QLogger logger = QLoggerFactory.getLogger(TabbedDataLoader.class); + + + public static void loadTabbedData(String tabbedDataFile, int position, Map collection) throws Exception { + if (FileUtils.canFileBeRead(tabbedDataFile)) { + + TabbedFileReader reader = new TabbedFileReader(new File(tabbedDataFile)); + try { + for (TabbedRecord tr : reader) { + String [] params = tabbedPattern.split(tr.getData()); + String chrPosition = getStringFromArray(params, position); + + if (null != chrPosition) { + ChrPosition chrPos = StringUtils.getChrPositionFromString(chrPosition); + if (null != chrPos) collection.put(chrPos,tr); + } + } + + logger.info("Added " + collection.size() + " entries to the tabbed data collection"); + + } finally { + reader.close(); + } + } else { + throw new IllegalArgumentException("data file: " + tabbedDataFile + " could not be read"); + } + } + + public static String getStringFromArray(String[] params, int index) { + String result = null; + if (null != params && params.length > 0) { + if (index >= 0) { + result = params[(index > params.length ? params.length : index)]; + } else if (params.length + index >= 0 & params.length + index < params.length){ + result = params[params.length + index]; // adding a negative number! + } + } + return result; + } + +} diff --git a/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java b/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java new file mode 100644 index 000000000..03a4e2f03 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java @@ -0,0 +1,269 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.vcf; + +import java.io.File; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.atomic.AtomicLong; + +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.GenotypeEnum; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.VcfUtils; +import org.qcmg.picard.QJumper; +import org.qcmg.picard.util.SAMUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.vcf.VCFFileReader; + +public class CompareVCFs { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + private static QLogger logger; + + private final ConcurrentMap normalVCFMap = new ConcurrentHashMap(12500); //not expecting more than 100000 + private final ConcurrentMap tumourVCFMap = new ConcurrentHashMap(12500); + private final ConcurrentMap uniqueTumourVCFMap = new ConcurrentHashMap(40000); + + public int engage() throws Exception { + + logger.info("loading normal vcf data"); + loadVCFData(cmdLineInputFiles[0], normalVCFMap); + logger.info("loading normal vcf data - DONE [" + normalVCFMap.size() + "]"); + + logger.info("loading tumour vcf data"); + loadVCFData(cmdLineInputFiles[1], tumourVCFMap); + logger.info("loading tumour vcf data - DONE [" + tumourVCFMap.size() + "]"); + + examine(); + + addPileupFromNormalBam(); + + return exitStatus; + } + + private void addPileupFromNormalBam() throws Exception { + // loop through each position in the unique map and get the entries in the normal GATK cleaned BAM file. + int notEnoughCoverage = 0, mutationFoundInNormal = 0; + StringBuilder sb = new StringBuilder(); + QJumper qj = new QJumper(); + qj.setupReader(cmdLineInputFiles[2]); + + for (Entry entry : uniqueTumourVCFMap.entrySet()) { + int position = entry.getKey().getStartPosition(); + boolean foundInNormal = false; + List sams = qj.getOverlappingRecordsAtPosition(entry.getKey().getChromosome(), position, position); + + for (SAMRecord sam : sams) { + int offset = SAMUtils.getIndexInReadFromPosition(sam, position); + if (offset > -1 && offset < sam.getReadLength()) { + char c = sam.getReadString().charAt(offset); + if (c == entry.getValue().getAlt().charAt(0)) { + foundInNormal = true; + mutationFoundInNormal++; + break; + } + } + } + + if ( ! foundInNormal && sams.size() < 8) + notEnoughCoverage++; + else if ( ! foundInNormal) + sb.append(entry.getKey().getChromosome() + ":" + position + "\n"); + } + + logger.info("total positions examined: " + uniqueTumourVCFMap.size()); + logger.info("positions where mutation was also found in normal (class C): " + mutationFoundInNormal); + logger.info("positions where coverage in normal was less than 8 (class B): " + notEnoughCoverage); + logger.info("Potential class A positions: "); + logger.info(sb.toString()); + } + + private void examine() { + + final Map diffGenotypes = new HashMap(); + + // we want to know the following... + // number unique to normal + // number unique to tumour + // no of common positions + int normalUnique = 0, tumourUnique = 0, normalAndTumour = 0; + + // for the common positions... + // no that have the same mutation + // no that have a different mutation + // no of those that have the same genotype + + int sameMutation = 0, sameMutationSameGenotype = 0; + int diffMutation = 0, diffMutationSameGenotype = 0; + + // here we go + + for (Entry entry : normalVCFMap.entrySet()) { + + VcfRecord normalVCF = entry.getValue(); + VcfRecord tumourVCF = tumourVCFMap.get(entry.getKey()); + + if (null == tumourVCF) { + normalUnique++; + } else { + ++normalAndTumour; + + // sanity check - compare ref - if not the same - oh dear... + assert normalVCF.getRef().equals(tumourVCF.getRef()); + + // compare mutations + char normalMut = normalVCF.getAlt().charAt(0); + char tumourMut = tumourVCF.getAlt().charAt(0); + + // need to get the genotype from the VCFRecord + + GenotypeEnum normalGenotype = VcfUtils.calculateGenotypeEnum( + normalVCF.getInfo().substring(0, 3), normalVCF.getRefChar(), normalVCF.getAlt().charAt(0)); + GenotypeEnum tumourGenotype = VcfUtils.calculateGenotypeEnum( + tumourVCF.getInfo().substring(0, 3), tumourVCF.getRefChar(), tumourVCF.getAlt().charAt(0)); + + if (normalMut == tumourMut) { + sameMutation++; + if (normalGenotype == tumourGenotype) + ++sameMutationSameGenotype; + else { + RefAndMultiGenotype ramg = new RefAndMultiGenotype(normalVCF.getRefChar(), normalGenotype, tumourGenotype); + AtomicLong al = diffGenotypes.get(ramg); + if (null == al) { + al = new AtomicLong(); + diffGenotypes.put(ramg, al); + } + al.incrementAndGet(); + } + } else { + diffMutation++; + if (normalGenotype == tumourGenotype) + ++diffMutationSameGenotype; + } + } + } + + for (ChrPosition position : tumourVCFMap.keySet()) { + if (null == normalVCFMap.get(position)) { + tumourUnique++; + uniqueTumourVCFMap.put(position, tumourVCFMap.get(position)); + } + } + + // now print out some stats + StringBuilder sb = new StringBuilder("\nSTATS\n"); + sb.append("No of positions in normal map: " + normalVCFMap.size()); + sb.append("\nNo of unique positions in normal map: " + normalUnique); + sb.append("\nNo of positions in tumour map: " + tumourVCFMap.size()); + sb.append("\nNo of unique positions in tumour map: " + tumourUnique); + sb.append("\nNo of shared positions: " + normalAndTumour); + sb.append("\n"); + sb.append("\nNo of positions with same mutation: " + sameMutation); + sb.append("\nNo of positions with same mutation and same genotype: " + sameMutationSameGenotype); + + sb.append("\npositions with same mutation and diff genotype: "); + + for (Entry entry : diffGenotypes.entrySet()) { + sb.append("\n" + entry.getKey().toString() + " count: " + entry.getValue().get()); + } + sb.append("\nNo of positions with diff mutation: " + diffMutation); + sb.append("\nNo of positions with diff mutation and same genotype: " + diffMutationSameGenotype); + + logger.info(sb.toString()); + + + } + + private void loadVCFData(String vcfFile, Map map) throws Exception { + if (FileUtils.canFileBeRead(vcfFile)) { + + VCFFileReader reader = new VCFFileReader(new File(vcfFile)); + try { + for (VcfRecord qpr : reader) { + map.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); + } + } finally { + reader.close(); + } + } + } + + + public static void main(String[] args) throws Exception { + CompareVCFs sp = new CompareVCFs(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(CompareVCFs.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CompareVCFs", CompareVCFs.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- b/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- new file mode 100644 index 000000000..03a4e2f03 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- @@ -0,0 +1,269 @@ +/** + * © Copyright The University of Queensland 2010-2014. + * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. + * + * This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.vcf; + +import java.io.File; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.atomic.AtomicLong; + +import htsjdk.samtools.SAMRecord; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.GenotypeEnum; +import org.qcmg.common.util.FileUtils; +import org.qcmg.common.vcf.VcfRecord; +import org.qcmg.common.vcf.VcfUtils; +import org.qcmg.picard.QJumper; +import org.qcmg.picard.util.SAMUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.vcf.VCFFileReader; + +public class CompareVCFs { + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + private static QLogger logger; + + private final ConcurrentMap normalVCFMap = new ConcurrentHashMap(12500); //not expecting more than 100000 + private final ConcurrentMap tumourVCFMap = new ConcurrentHashMap(12500); + private final ConcurrentMap uniqueTumourVCFMap = new ConcurrentHashMap(40000); + + public int engage() throws Exception { + + logger.info("loading normal vcf data"); + loadVCFData(cmdLineInputFiles[0], normalVCFMap); + logger.info("loading normal vcf data - DONE [" + normalVCFMap.size() + "]"); + + logger.info("loading tumour vcf data"); + loadVCFData(cmdLineInputFiles[1], tumourVCFMap); + logger.info("loading tumour vcf data - DONE [" + tumourVCFMap.size() + "]"); + + examine(); + + addPileupFromNormalBam(); + + return exitStatus; + } + + private void addPileupFromNormalBam() throws Exception { + // loop through each position in the unique map and get the entries in the normal GATK cleaned BAM file. + int notEnoughCoverage = 0, mutationFoundInNormal = 0; + StringBuilder sb = new StringBuilder(); + QJumper qj = new QJumper(); + qj.setupReader(cmdLineInputFiles[2]); + + for (Entry entry : uniqueTumourVCFMap.entrySet()) { + int position = entry.getKey().getStartPosition(); + boolean foundInNormal = false; + List sams = qj.getOverlappingRecordsAtPosition(entry.getKey().getChromosome(), position, position); + + for (SAMRecord sam : sams) { + int offset = SAMUtils.getIndexInReadFromPosition(sam, position); + if (offset > -1 && offset < sam.getReadLength()) { + char c = sam.getReadString().charAt(offset); + if (c == entry.getValue().getAlt().charAt(0)) { + foundInNormal = true; + mutationFoundInNormal++; + break; + } + } + } + + if ( ! foundInNormal && sams.size() < 8) + notEnoughCoverage++; + else if ( ! foundInNormal) + sb.append(entry.getKey().getChromosome() + ":" + position + "\n"); + } + + logger.info("total positions examined: " + uniqueTumourVCFMap.size()); + logger.info("positions where mutation was also found in normal (class C): " + mutationFoundInNormal); + logger.info("positions where coverage in normal was less than 8 (class B): " + notEnoughCoverage); + logger.info("Potential class A positions: "); + logger.info(sb.toString()); + } + + private void examine() { + + final Map diffGenotypes = new HashMap(); + + // we want to know the following... + // number unique to normal + // number unique to tumour + // no of common positions + int normalUnique = 0, tumourUnique = 0, normalAndTumour = 0; + + // for the common positions... + // no that have the same mutation + // no that have a different mutation + // no of those that have the same genotype + + int sameMutation = 0, sameMutationSameGenotype = 0; + int diffMutation = 0, diffMutationSameGenotype = 0; + + // here we go + + for (Entry entry : normalVCFMap.entrySet()) { + + VcfRecord normalVCF = entry.getValue(); + VcfRecord tumourVCF = tumourVCFMap.get(entry.getKey()); + + if (null == tumourVCF) { + normalUnique++; + } else { + ++normalAndTumour; + + // sanity check - compare ref - if not the same - oh dear... + assert normalVCF.getRef().equals(tumourVCF.getRef()); + + // compare mutations + char normalMut = normalVCF.getAlt().charAt(0); + char tumourMut = tumourVCF.getAlt().charAt(0); + + // need to get the genotype from the VCFRecord + + GenotypeEnum normalGenotype = VcfUtils.calculateGenotypeEnum( + normalVCF.getInfo().substring(0, 3), normalVCF.getRefChar(), normalVCF.getAlt().charAt(0)); + GenotypeEnum tumourGenotype = VcfUtils.calculateGenotypeEnum( + tumourVCF.getInfo().substring(0, 3), tumourVCF.getRefChar(), tumourVCF.getAlt().charAt(0)); + + if (normalMut == tumourMut) { + sameMutation++; + if (normalGenotype == tumourGenotype) + ++sameMutationSameGenotype; + else { + RefAndMultiGenotype ramg = new RefAndMultiGenotype(normalVCF.getRefChar(), normalGenotype, tumourGenotype); + AtomicLong al = diffGenotypes.get(ramg); + if (null == al) { + al = new AtomicLong(); + diffGenotypes.put(ramg, al); + } + al.incrementAndGet(); + } + } else { + diffMutation++; + if (normalGenotype == tumourGenotype) + ++diffMutationSameGenotype; + } + } + } + + for (ChrPosition position : tumourVCFMap.keySet()) { + if (null == normalVCFMap.get(position)) { + tumourUnique++; + uniqueTumourVCFMap.put(position, tumourVCFMap.get(position)); + } + } + + // now print out some stats + StringBuilder sb = new StringBuilder("\nSTATS\n"); + sb.append("No of positions in normal map: " + normalVCFMap.size()); + sb.append("\nNo of unique positions in normal map: " + normalUnique); + sb.append("\nNo of positions in tumour map: " + tumourVCFMap.size()); + sb.append("\nNo of unique positions in tumour map: " + tumourUnique); + sb.append("\nNo of shared positions: " + normalAndTumour); + sb.append("\n"); + sb.append("\nNo of positions with same mutation: " + sameMutation); + sb.append("\nNo of positions with same mutation and same genotype: " + sameMutationSameGenotype); + + sb.append("\npositions with same mutation and diff genotype: "); + + for (Entry entry : diffGenotypes.entrySet()) { + sb.append("\n" + entry.getKey().toString() + " count: " + entry.getValue().get()); + } + sb.append("\nNo of positions with diff mutation: " + diffMutation); + sb.append("\nNo of positions with diff mutation and same genotype: " + diffMutationSameGenotype); + + logger.info(sb.toString()); + + + } + + private void loadVCFData(String vcfFile, Map map) throws Exception { + if (FileUtils.canFileBeRead(vcfFile)) { + + VCFFileReader reader = new VCFFileReader(new File(vcfFile)); + try { + for (VcfRecord qpr : reader) { + map.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); + } + } finally { + reader.close(); + } + } + } + + + public static void main(String[] args) throws Exception { + CompareVCFs sp = new CompareVCFs(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(CompareVCFs.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CompareVCFs", CompareVCFs.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } +} diff --git a/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java b/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java new file mode 100644 index 000000000..29bb7c4c1 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java @@ -0,0 +1,116 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.vcf; + +import java.io.File; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedFileWriter; +import org.qcmg.qmule.tab.TabbedHeader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class ConvertVcfChr { + + private static final String CHR = "chr"; + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + private static QLogger logger; + + + private int engage() throws Exception { + + // load + if (FileUtils.canFileBeRead(cmdLineInputFiles[0])) { + TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); + TabbedHeader header = reader.getHeader(); + + TabbedFileWriter writer = new TabbedFileWriter(new File(cmdLineOutputFiles[0])); + writer.addHeader(header); + + try { + for (TabbedRecord tabRec : reader) { + if ( ! tabRec.getData().startsWith(CHR)) { + tabRec.setData(CHR + tabRec.getData()); + } + writer.add(tabRec); + } + } finally { + try { + writer.close(); + } finally { + reader.close(); + } + } + } + return exitStatus; + } + + public static void main(String[] args) throws Exception { + ConvertVcfChr sp = new ConvertVcfChr(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(ConvertVcfChr.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CompareVCFs", ConvertVcfChr.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- b/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- new file mode 100644 index 000000000..29bb7c4c1 --- /dev/null +++ b/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- @@ -0,0 +1,116 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.vcf; + +import java.io.File; + +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; +import org.qcmg.common.util.FileUtils; +import org.qcmg.qmule.Messages; +import org.qcmg.qmule.Options; +import org.qcmg.qmule.QMuleException; +import org.qcmg.qmule.tab.TabbedFileReader; +import org.qcmg.qmule.tab.TabbedFileWriter; +import org.qcmg.qmule.tab.TabbedHeader; +import org.qcmg.qmule.tab.TabbedRecord; + +public class ConvertVcfChr { + + private static final String CHR = "chr"; + + private String logFile; + private String[] cmdLineInputFiles; + private String[] cmdLineOutputFiles; + private int exitStatus; + + private static QLogger logger; + + + private int engage() throws Exception { + + // load + if (FileUtils.canFileBeRead(cmdLineInputFiles[0])) { + TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); + TabbedHeader header = reader.getHeader(); + + TabbedFileWriter writer = new TabbedFileWriter(new File(cmdLineOutputFiles[0])); + writer.addHeader(header); + + try { + for (TabbedRecord tabRec : reader) { + if ( ! tabRec.getData().startsWith(CHR)) { + tabRec.setData(CHR + tabRec.getData()); + } + writer.add(tabRec); + } + } finally { + try { + writer.close(); + } finally { + reader.close(); + } + } + } + return exitStatus; + } + + public static void main(String[] args) throws Exception { + ConvertVcfChr sp = new ConvertVcfChr(); + int exitStatus = sp.setup(args); + if (null != logger) + logger.logFinalExecutionStats(exitStatus); + + System.exit(exitStatus); + } + + protected int setup(String args[]) throws Exception{ + int returnStatus = -1; + Options options = new Options(args); + + if (options.hasHelpOption()) { + System.err.println(Messages.USAGE); + options.displayHelp(); + returnStatus = 0; + } else if (options.hasVersionOption()) { + System.err.println(Messages.getVersionMessage()); + returnStatus = 0; + } else if (options.getInputFileNames().length < 1) { + System.err.println(Messages.USAGE); + } else if ( ! options.hasLogOption()) { + System.err.println(Messages.USAGE); + } else { + // configure logging + logFile = options.getLogFile(); + logger = QLoggerFactory.getLogger(ConvertVcfChr.class, logFile, options.getLogLevel()); + logger.logInitialExecutionStats("CompareVCFs", ConvertVcfChr.class.getPackage().getImplementationVersion(), args); + + // get list of file names + cmdLineInputFiles = options.getInputFileNames(); + if (cmdLineInputFiles.length < 1) { + throw new QMuleException("INSUFFICIENT_ARGUMENTS"); + } else { + // loop through supplied files - check they can be read + for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { + if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { + throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); + } + } + } + + // check supplied output files can be written to + if (null != options.getOutputFileNames()) { + cmdLineOutputFiles = options.getOutputFileNames(); + for (String outputFile : cmdLineOutputFiles) { + if ( ! FileUtils.canFileBeWrittenTo(outputFile)) + throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); + } + } + + return engage(); + } + return returnStatus; + } + +} diff --git a/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java b/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java new file mode 100644 index 000000000..b0aad1b7f --- /dev/null +++ b/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java @@ -0,0 +1,101 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.vcf; + +import org.qcmg.common.model.GenotypeEnum; +import org.qcmg.common.model.Classification; + +public class RefAndMultiGenotype { + + private final char ref; + private final GenotypeEnum normal; + private final GenotypeEnum tumour; + + public RefAndMultiGenotype(char ref, GenotypeEnum normal, GenotypeEnum tumour) { + this.ref = ref; + this.normal = normal; + this.tumour = tumour; + } + + @Override + public String toString() { + return ref + " : " + normal.getDisplayString() + " : " + tumour.getDisplayString() + " : " + getClassification(); + } + + public String getClassification() { + if (normal == tumour) { + return Classification.GERMLINE.name(); + + } else if (normal.isHomozygous() && tumour.isHomozygous()) { + // not equal but both are homozygous + return Classification.SOMATIC.name(); + } else if (normal.isHeterozygous() && tumour.isHeterozygous()) { + // not equal but both are heterozygous + return Classification.SOMATIC.name(); + } + + /////////////////////////////////////////////////////// + // normal is HOM and tumour is HET + /////////////////////////////////////////////////////// + if (normal.isHomozygous() && tumour.isHeterozygous()) { + + GenotypeEnum refAndNormalGenotype = GenotypeEnum.getGenotypeEnum(ref, normal.getFirstAllele()); + + if (tumour == refAndNormalGenotype) { + return Classification.GERMLINE.name(); +// mutation = normal.getFirstAllele() + MUT_DELIM + record.getRef(); + } else { + return Classification.SOMATIC.name(); + } + } + + /////////////////////////////////////////////////////// + // normal is HET and tumour is HOM + ////////////////////////////////////////////////////// + else if (normal.isHeterozygous() && tumour.isHomozygous()){ + + if (normal.containsAllele(tumour.getFirstAllele())) { + return Classification.GERMLINE.name(); + } else { + return Classification.SOMATIC.name(); + } + } + return null; + } + + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((normal == null) ? 0 : normal.hashCode()); + result = prime * result + ref; + result = prime * result + ((tumour == null) ? 0 : tumour.hashCode()); + return result; + } + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + RefAndMultiGenotype other = (RefAndMultiGenotype) obj; + if (normal == null) { + if (other.normal != null) + return false; + } else if (!normal.equals(other.normal)) + return false; + if (ref != other.ref) + return false; + if (tumour == null) { + if (other.tumour != null) + return false; + } else if (!tumour.equals(other.tumour)) + return false; + return true; + } + +} diff --git a/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- b/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- new file mode 100644 index 000000000..b0aad1b7f --- /dev/null +++ b/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- @@ -0,0 +1,101 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qmule.vcf; + +import org.qcmg.common.model.GenotypeEnum; +import org.qcmg.common.model.Classification; + +public class RefAndMultiGenotype { + + private final char ref; + private final GenotypeEnum normal; + private final GenotypeEnum tumour; + + public RefAndMultiGenotype(char ref, GenotypeEnum normal, GenotypeEnum tumour) { + this.ref = ref; + this.normal = normal; + this.tumour = tumour; + } + + @Override + public String toString() { + return ref + " : " + normal.getDisplayString() + " : " + tumour.getDisplayString() + " : " + getClassification(); + } + + public String getClassification() { + if (normal == tumour) { + return Classification.GERMLINE.name(); + + } else if (normal.isHomozygous() && tumour.isHomozygous()) { + // not equal but both are homozygous + return Classification.SOMATIC.name(); + } else if (normal.isHeterozygous() && tumour.isHeterozygous()) { + // not equal but both are heterozygous + return Classification.SOMATIC.name(); + } + + /////////////////////////////////////////////////////// + // normal is HOM and tumour is HET + /////////////////////////////////////////////////////// + if (normal.isHomozygous() && tumour.isHeterozygous()) { + + GenotypeEnum refAndNormalGenotype = GenotypeEnum.getGenotypeEnum(ref, normal.getFirstAllele()); + + if (tumour == refAndNormalGenotype) { + return Classification.GERMLINE.name(); +// mutation = normal.getFirstAllele() + MUT_DELIM + record.getRef(); + } else { + return Classification.SOMATIC.name(); + } + } + + /////////////////////////////////////////////////////// + // normal is HET and tumour is HOM + ////////////////////////////////////////////////////// + else if (normal.isHeterozygous() && tumour.isHomozygous()){ + + if (normal.containsAllele(tumour.getFirstAllele())) { + return Classification.GERMLINE.name(); + } else { + return Classification.SOMATIC.name(); + } + } + return null; + } + + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((normal == null) ? 0 : normal.hashCode()); + result = prime * result + ref; + result = prime * result + ((tumour == null) ? 0 : tumour.hashCode()); + return result; + } + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + RefAndMultiGenotype other = (RefAndMultiGenotype) obj; + if (normal == null) { + if (other.normal != null) + return false; + } else if (!normal.equals(other.normal)) + return false; + if (ref != other.ref) + return false; + if (tumour == null) { + if (other.tumour != null) + return false; + } else if (!tumour.equals(other.tumour)) + return false; + return true; + } + +} From c6e8948c4ae7a32495878c9a4e001e280352caa0 Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 4 Nov 2020 19:29:56 +1000 Subject: [PATCH 23/73] recovery q3tiledagainer --- .../src/au/edu/qimr/tiledaligner/ReadTiledAligerFile.java | 2 +- q3tiledaligner/src/au/edu/qimr/tiledaligner/TiledAligner.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/q3tiledaligner/src/au/edu/qimr/tiledaligner/ReadTiledAligerFile.java b/q3tiledaligner/src/au/edu/qimr/tiledaligner/ReadTiledAligerFile.java index 04ce3b885..3eeeaa6eb 100644 --- a/q3tiledaligner/src/au/edu/qimr/tiledaligner/ReadTiledAligerFile.java +++ b/q3tiledaligner/src/au/edu/qimr/tiledaligner/ReadTiledAligerFile.java @@ -13,7 +13,7 @@ import org.qcmg.common.util.Constants; import org.qcmg.common.util.NumberUtils; -import org.qcmg.string.StringFileReader; +import org.qcmg.qio.record.StringFileReader; import gnu.trove.map.TIntObjectMap; import gnu.trove.map.hash.TIntObjectHashMap; diff --git a/q3tiledaligner/src/au/edu/qimr/tiledaligner/TiledAligner.java b/q3tiledaligner/src/au/edu/qimr/tiledaligner/TiledAligner.java index a4692e399..20115fbe2 100644 --- a/q3tiledaligner/src/au/edu/qimr/tiledaligner/TiledAligner.java +++ b/q3tiledaligner/src/au/edu/qimr/tiledaligner/TiledAligner.java @@ -16,7 +16,7 @@ import org.qcmg.common.model.BLATRecord; import org.qcmg.common.util.LoadReferencedClasses; import org.qcmg.common.util.NumberUtils; -import org.qcmg.string.StringFileReader; +import org.qcmg.qio.record.StringFileReader; import gnu.trove.map.TIntObjectMap; import gnu.trove.map.hash.TIntObjectHashMap; From 6d87ff1a0c00c89b8ba96f988966e525a434ecfc Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 19:48:45 +1000 Subject: [PATCH 24/73] update qsnp with new StringFileReader --- qsnp/src/org/qcmg/snp/BuildCommonSnpsVcf.java | 19 ++-- qsnp/src/org/qcmg/snp/MuTectPipeline.java | 95 ++----------------- .../test/org/qcmg/snp/PileupPipelineTest.java | 12 +-- 3 files changed, 23 insertions(+), 103 deletions(-) diff --git a/qsnp/src/org/qcmg/snp/BuildCommonSnpsVcf.java b/qsnp/src/org/qcmg/snp/BuildCommonSnpsVcf.java index 414b740ac..549315342 100644 --- a/qsnp/src/org/qcmg/snp/BuildCommonSnpsVcf.java +++ b/qsnp/src/org/qcmg/snp/BuildCommonSnpsVcf.java @@ -36,10 +36,9 @@ import org.qcmg.common.vcf.header.VcfHeader; import org.qcmg.common.vcf.header.VcfHeaderRecord; import org.qcmg.common.vcf.header.VcfHeaderUtils; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.record.RecordWriter; +import org.qcmg.qio.record.StringFileReader; import org.qcmg.vcf.VCFFileReader; -import org.qcmg.vcf.VCFFileWriter; public class BuildCommonSnpsVcf { private static QLogger logger; @@ -174,7 +173,7 @@ void writeVCF(String outputFileName) throws Exception { final List orderedList = new ArrayList(snpPositions.keySet()); Collections.sort(orderedList, new ChrPositionComparator()); - try (VCFFileWriter writer = new VCFFileWriter(new File(outputFileName));) { + try (RecordWriter writer = new RecordWriter<>(new File(outputFileName));) { final VcfHeader header = getHeaderForCommonSnps(searchString, searchDirectory, additionalSearchStrings, mapOfFilesAndIds); for(final VcfHeaderRecord re : header) writer.addHeader(re.toString()); @@ -219,13 +218,13 @@ private VcfHeader getHeaderForCommonSnps(final String [] searchString, final Str private void processDccFile(File f, Integer id) throws Exception { // read in data from file. - try (TabbedFileReader reader = new TabbedFileReader(f);) { + try (StringFileReader reader = new StringFileReader(f);) { int i = 0; - for (final TabbedRecord rec : reader) { + for (final String rec : reader) { // ignore header line if (i++ == 0) continue; - final String [] params = TabTokenizer.tokenize(rec.getData()); + final String [] params = TabTokenizer.tokenize(rec); final ChrPosition cp = ChrPointPosition.valueOf(params[4], Integer.parseInt(params[5])); final String ref = params[10]; final String alt = getAltFromMutation(params, 13); // can eventually change this to the last element in the file @@ -274,13 +273,13 @@ private void processDccFile(File f, Integer id) throws Exception { private void processMafFile(File f, Integer id) throws Exception { // read in data from file. - try (TabbedFileReader reader = new TabbedFileReader(f);) { + try (StringFileReader reader = new StringFileReader(f);) { int i = 0; - for (final TabbedRecord rec : reader) { + for (final String rec : reader) { // ignore header line if (i++ == 0) continue; - final String [] params = TabTokenizer.tokenize(rec.getData(), 15); // only need data from the first 15 columns + final String [] params = TabTokenizer.tokenize(rec, 15); // only need data from the first 15 columns final ChrPosition cp = new ChrRangePosition(params[4], Integer.parseInt(params[5]), Integer.parseInt(params[6])); final String ref = params[10]; final String alt1 = params[11]; diff --git a/qsnp/src/org/qcmg/snp/MuTectPipeline.java b/qsnp/src/org/qcmg/snp/MuTectPipeline.java index 8d1da496c..c40aaa18f 100644 --- a/qsnp/src/org/qcmg/snp/MuTectPipeline.java +++ b/qsnp/src/org/qcmg/snp/MuTectPipeline.java @@ -39,8 +39,7 @@ import org.qcmg.pileup.QSnpRecord; import org.qcmg.common.model.Classification; import org.qcmg.snp.util.IniFileUtil; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.record.StringFileReader; /** */ @@ -211,14 +210,7 @@ public static QSnpRecord getQSnpRecord(String [] mtData) { int tumourRefCount = Integer.parseInt(mtData[20]); int tumourAltCount = Integer.parseInt(mtData[21]); - QSnpRecord rec = new QSnpRecord(mtData[0], Integer.parseInt(mtData[1]), mtData[3], mtData[4]); -// rec.setRef(ref); -// rec.setAlt(alt); -// rec.setMutation(ref + Constants.MUT_DELIM + alt); -// rec.setNormalCount(normalRefCount + normalAltCount); -// rec.setTumourCount(tumourRefCount + tumourAltCount); -// rec.setNormalGenotype(GenotypeEnum.getGenotypeEnum(mtData[28].charAt(0), mtData[28].charAt(1))); - + QSnpRecord rec = new QSnpRecord(mtData[0], Integer.parseInt(mtData[1]), mtData[3], mtData[4]); if (tumourRefCount > 0 && tumourAltCount > 0) { rec.setTumourGenotype(GenotypeEnum.getGenotypeEnum(ref, alt)); } else if (tumourAltCount > 0) { @@ -229,13 +221,11 @@ public static QSnpRecord getQSnpRecord(String [] mtData) { // all on the forward strand... // division by zero String ND = null; -// String normalPileup = null; if (normalRefCount > 0) { double normalRefQuality = Double.parseDouble(mtData[32]); double aveQual = normalRefQuality / normalRefCount; ND = ref + normalRefCount + "[" + NF.format(aveQual) + "]0[0]"; -// normalPileup = "" + ref; } if (normalAltCount > 0) { @@ -247,49 +237,20 @@ public static QSnpRecord getQSnpRecord(String [] mtData) { } else { ND += ";" + alt + normalAltCount + "[" + NF.format(aveQual) + "]0[0]"; } - -// normalPileup = null == normalPileup ? "" + alt : "" + ref + alt; } -// rec.setNormalNucleotides(ND); -// rec.setNormalPileup(normalPileup); // hard-coding all to somatic rec.setClassification(Classification.SOMATIC); return rec; } - -// private QSnpRecord getQSnpRecord(QSnpGATKRecord normal, QSnpGATKRecord tumour) { -// QSnpRecord qpr = new QSnpRecord(); -// qpr.setId(++mutationId); -// -// if (null != normal) { -// qpr.setChromosome(normal.getChromosome()); -// qpr.setPosition(normal.getPosition()); -// qpr.setRef(normal.getRef()); -// qpr.setNormalGenotype(normal.getGenotypeEnum()); -// qpr.setAnnotation(normal.getAnnotation()); -// // tumour fields -// qpr.setTumourGenotype(null == tumour ? null : tumour.getGenotypeEnum()); -// qpr.setTumourCount(null == tumour ? 0 : VcfUtils.getDPFromFormatField(tumour.getGenotype())); -// -// } else if (null != tumour) { -// qpr.setChromosome(tumour.getChromosome()); -// qpr.setPosition(tumour.getPosition()); -// qpr.setRef(tumour.getRef()); -// qpr.setTumourGenotype(tumour.getGenotypeEnum()); -// qpr.setTumourCount(VcfUtils.getDPFromFormatField(tumour.getGenotype())); -// } -// -// return qpr; -// } - + private static void loadMuTectOutput(String muTectOutput, Map map) { - try (TabbedFileReader reader = new TabbedFileReader(new File(muTectOutput))) { + try (StringFileReader reader = new StringFileReader(new File(muTectOutput))) { int noOfRecords = 0; - for (TabbedRecord rec : reader) { + for (String rec : reader) { if (noOfRecords++ > 0) { // header line in mutect output doesn't have '#' - String [] params = TabTokenizer.tokenize(rec.getData()); + String [] params = TabTokenizer.tokenize(rec); map.put(ChrPointPosition.valueOf(params[0], Integer.parseInt(params[1])), params); } } @@ -313,19 +274,7 @@ protected void ingestIni(Ini ini) throws SnpException { logger.tool("**** OTHER CONFIG ****"); logger.tool("mutationIdPrefix: " + mutationIdPrefix); - } - -// @Override -// protected String getFormattedRecord(QSnpRecord record, final String ensemblChr) { -// return record.getDCCDataNSFlankingSeq(mutationIdPrefix, ensemblChr); -// } -// -// @Override -// protected String getOutputHeader(boolean isSomatic) { -// if (isSomatic) return HeaderUtil.DCC_SOMATIC_HEADER; -// else return HeaderUtil.DCC_GERMLINE_HEADER; -// } - + } /** * Class that reads SAMRecords from a Queue and after checking that they satisfy some criteria @@ -334,23 +283,19 @@ protected void ingestIni(Ini ini) throws SnpException { * */ public class Pileup implements Runnable { -// private final String bamFile; private final SamReader reader; - private final boolean isNormal; +// private final boolean isNormal; private final ConcurrentMap pileupMap; private int arraySize; private int arrayPosition; private ChrPosition cp; private Comparator chrComparator; -// private List snps; private final CountDownLatch latch; public Pileup(final String bamFile, final CountDownLatch latch, final boolean isNormal) { -// this.bamFile = bamFile; - this.isNormal = isNormal; +// this.isNormal = isNormal; pileupMap = isNormal ? normalPileup : tumourPileup; reader = SAMFileReaderFactory.createSAMFileReader(new File(bamFile)); -// snps = new ArrayList(positionRecordMap.keySet()); this.latch = latch; } @@ -406,28 +351,6 @@ private void advanceCPAndPosition() { if (null != cp) { // update QSnpRecord with our findings Accumulator acc = pileupMap.remove(cp); - if (null != acc) { -// QSnpRecord rec = positionRecordMap.get(cp); - -// String refString = rec.getRef(); -// if (refString.length() > 1) { -// logger.warn("ref string: " + refString + " in MuTectPipeline.advanceCPAndPosition"); -// } -// char ref = refString.charAt(0); - -// PileupElementLite pel = acc.getLargestVariant(ref); -// if (isNormal) { -//// rec.setNormalNucleotides(acc.getPileupElementString()); -//// rec.setNormalCount(acc.getCoverage()); -//// rec.setNormalPileup(acc.getPileup()); -//// rec.setNormalNovelStartCount(null != pel ? pel.getNovelStartCount() : 0); -// } else { -// // tumour fields -//// rec.setTumourCount(acc.getCoverage()); -//// rec.setTumourNucleotides(acc.getPileupElementString()); -//// rec.setTumourNovelStartCount(null != pel ? pel.getNovelStartCount() : 0); -// } - } } cp = snps.get(arrayPosition++).getChrPosition(); } diff --git a/qsnp/test/org/qcmg/snp/PileupPipelineTest.java b/qsnp/test/org/qcmg/snp/PileupPipelineTest.java index 63ed7c79d..89d702ef9 100644 --- a/qsnp/test/org/qcmg/snp/PileupPipelineTest.java +++ b/qsnp/test/org/qcmg/snp/PileupPipelineTest.java @@ -6,8 +6,7 @@ import java.util.ArrayList; import java.util.List; -import junit.framework.Assert; - +import org.junit.Assert; import org.junit.Test; import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; @@ -15,8 +14,7 @@ import org.qcmg.common.model.PileupElement; import org.qcmg.common.model.Rule; import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.tab.TabbedFileReader; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qio.record.StringFileReader; import org.qcmg.vcf.VCFFileReader; public class PileupPipelineTest { @@ -107,9 +105,9 @@ private int noOfLinesInVCFOutputFile(File vcfOutput) throws Exception { } public static String getFileHeader(File file) throws Exception { - try (TabbedFileReader reader = new TabbedFileReader(file);) { - for (final TabbedRecord vcf : reader) { - if (vcf.getData().startsWith("analysis")) return vcf.getData(); + try (StringFileReader reader = new StringFileReader(file);) { + for (final String vcf : reader) { + if (vcf.startsWith("analysis")) return vcf; } } return "no header line found"; From ef474599de9fd5fdcbc2d1d2622a4b1a1ec6cb42 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 19:55:52 +1000 Subject: [PATCH 25/73] mv exception to qmule folder --- .../{unused => qmule}/exception/RecordIteratorException.java | 2 +- .../{unused => qmule}/exception/RecordIteratorException.java-- | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename qio/src/org/qcmg/{unused => qmule}/exception/RecordIteratorException.java (93%) rename qio/src/org/qcmg/{unused => qmule}/exception/RecordIteratorException.java-- (100%) diff --git a/qio/src/org/qcmg/unused/exception/RecordIteratorException.java b/qio/src/org/qcmg/qmule/exception/RecordIteratorException.java similarity index 93% rename from qio/src/org/qcmg/unused/exception/RecordIteratorException.java rename to qio/src/org/qcmg/qmule/exception/RecordIteratorException.java index d2df8afe7..49c862953 100644 --- a/qio/src/org/qcmg/unused/exception/RecordIteratorException.java +++ b/qio/src/org/qcmg/qmule/exception/RecordIteratorException.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.unused.exception; +package org.qcmg.qmule.exception; public class RecordIteratorException extends RuntimeException { diff --git a/qio/src/org/qcmg/unused/exception/RecordIteratorException.java-- b/qio/src/org/qcmg/qmule/exception/RecordIteratorException.java-- similarity index 100% rename from qio/src/org/qcmg/unused/exception/RecordIteratorException.java-- rename to qio/src/org/qcmg/qmule/exception/RecordIteratorException.java-- From e715d801acc5859eca5dec19c7b1cf7b98326488 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 20:00:20 +1000 Subject: [PATCH 26/73] resolve location issue --- qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java | 2 +- qio/src/org/qcmg/unused/reader/AbstractReader.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java b/qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java index e84fff5ae..0526be1ef 100644 --- a/qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java +++ b/qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java @@ -9,7 +9,7 @@ import java.util.Iterator; import java.util.concurrent.atomic.AtomicLong; -import org.qcmg.unused.exception.RecordIteratorException; +import org.qcmg.qmule.exception.RecordIteratorException; public abstract class AbstractRecordIterator implements Iterator { diff --git a/qio/src/org/qcmg/unused/reader/AbstractReader.java b/qio/src/org/qcmg/unused/reader/AbstractReader.java index fcf1af524..68e6e731c 100644 --- a/qio/src/org/qcmg/unused/reader/AbstractReader.java +++ b/qio/src/org/qcmg/unused/reader/AbstractReader.java @@ -9,9 +9,9 @@ import java.io.InputStream; import java.util.Iterator; +import org.qcmg.qmule.exception.RecordIteratorException; import org.qcmg.qmule.record.AbstractRecordIterator; import org.qcmg.qmule.record.Record; -import org.qcmg.unused.exception.RecordIteratorException; public abstract class AbstractReader implements Reader, Iterable { From 9724284f2cbaa03c1399804feea5ab4bc4a79190 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 4 Nov 2020 20:05:54 +1000 Subject: [PATCH 27/73] update import relocation qio --- .../org/qcmg/qmule/AlignerCompareTest.java-- | 120 +++++ .../qmule/AnnotateDCCWithGFFRegionTest.java-- | 234 ++++++++++ .../org/qcmg/qmule/BamCompressTest.java-- | 95 ++++ .../qcmg/qmule/BamMismatchCountsTest.java-- | 75 +++ .../org/qcmg/qmule/ChrPosComparatorTest.java | 4 +- .../qcmg/qmule/ChrPosComparatorTest.java-- | 35 ++ .../qmule/CompareReferenceRegionsTest.java-- | 162 +++++++ qmule/test/org/qcmg/qmule/DccToMafTest.java-- | 167 +++++++ .../org/qcmg/qmule/IndelDCCHeaderTest.java-- | 222 +++++++++ qmule/test/org/qcmg/qmule/MAF2DCC1Test.java | 2 +- qmule/test/org/qcmg/qmule/MAF2DCC1Test.java-- | 315 +++++++++++++ .../test/org/qcmg/qmule/TestThreading.java-- | 55 +++ .../qmule/WiggleFromPileupTakeTwoTest.java-- | 428 +++++++++++++++++ .../org/qcmg/qmule/WiggleFromPileupTest.java | 2 +- .../qcmg/qmule/WiggleFromPileupTest.java-- | 431 ++++++++++++++++++ .../qcmg/qmule/snppicker/CompareSnpsTest.java | 2 +- .../qmule/snppicker/CompareSnpsTest.java-- | 70 +++ .../qmule/snppicker/GatkUniqueSnpsTest.java-- | 154 +++++++ .../util/IGVBatchFileGeneratorTest.java-- | 73 +++ .../qmule/util/TabbedDataLoaderTest.java-- | 21 + .../src/org/qcmg/sig/util/SignatureUtil.java | 2 +- .../sig/SignatureGeneratorBespokeTest.java | 6 +- .../org/qcmg/sig/SignatureGeneratorTest.java | 2 +- 23 files changed, 2667 insertions(+), 10 deletions(-) create mode 100644 qmule/test/org/qcmg/qmule/AlignerCompareTest.java-- create mode 100644 qmule/test/org/qcmg/qmule/AnnotateDCCWithGFFRegionTest.java-- create mode 100644 qmule/test/org/qcmg/qmule/BamCompressTest.java-- create mode 100644 qmule/test/org/qcmg/qmule/BamMismatchCountsTest.java-- create mode 100644 qmule/test/org/qcmg/qmule/ChrPosComparatorTest.java-- create mode 100644 qmule/test/org/qcmg/qmule/CompareReferenceRegionsTest.java-- create mode 100644 qmule/test/org/qcmg/qmule/DccToMafTest.java-- create mode 100644 qmule/test/org/qcmg/qmule/IndelDCCHeaderTest.java-- create mode 100644 qmule/test/org/qcmg/qmule/MAF2DCC1Test.java-- create mode 100644 qmule/test/org/qcmg/qmule/TestThreading.java-- create mode 100644 qmule/test/org/qcmg/qmule/WiggleFromPileupTakeTwoTest.java-- create mode 100644 qmule/test/org/qcmg/qmule/WiggleFromPileupTest.java-- create mode 100644 qmule/test/org/qcmg/qmule/snppicker/CompareSnpsTest.java-- create mode 100644 qmule/test/org/qcmg/qmule/snppicker/GatkUniqueSnpsTest.java-- create mode 100644 qmule/test/org/qcmg/qmule/util/IGVBatchFileGeneratorTest.java-- create mode 100644 qmule/test/org/qcmg/qmule/util/TabbedDataLoaderTest.java-- diff --git a/qmule/test/org/qcmg/qmule/AlignerCompareTest.java-- b/qmule/test/org/qcmg/qmule/AlignerCompareTest.java-- new file mode 100644 index 000000000..7f02a58bb --- /dev/null +++ b/qmule/test/org/qcmg/qmule/AlignerCompareTest.java-- @@ -0,0 +1,120 @@ +package org.qcmg.qmule; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; + + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMRecord; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.SAMOrBAMWriterFactory; + + +public class AlignerCompareTest { + public static final String INPUT_SAM1 = "./input1.sam"; + public static final String INPUT_SAM2 = "./input2.sam"; + public static final String OUTPUT_BAM = "./output.bam"; + + @After + public void deleteFiles(){ + //delete inputs + File in1 = new File(INPUT_SAM1); + File in2 = new File(INPUT_SAM2); + in1.delete(); + in2.delete(); + + //delete output + File[] files = new File("./").listFiles(); + for(File f : files) + if(f.toString().startsWith(OUTPUT_BAM)) + f.delete(); + + } + + @Before + public void before(){ + CreateSAMs( ); + } + + + + @Test + public void mainTest() throws Exception{ + + final String[] args1 = { "-i", INPUT_SAM1, "-i", INPUT_SAM2, "-o", OUTPUT_BAM }; + AlignerCompare.main(args1); + + } + + + public static void CreateSAMs(){ + List mydata = new ArrayList(); + + //common + mydata.add("@HD VN:1.4 SO:queryname"); + mydata.add("@SQ SN:GL000196.1 LN:38914"); + + mydata.add("@RG ID:2010072264129530 LB:Library_20100413_C DS:RUNTYPE{50F} SM:S0414_20100607_2_FragBC_bcSample1_F3_bcA10_05"); + mydata.add("@PG ID:2010072264129500 PN:MANUAL"); + mydata.add("603_1107_1232 0 GL000196.1 480 1 25M25H * 0 0 AATCACTTGAACCCAGGAGGCGGAG IIIIIIIIIIIIIIIIIIIIIIII: RG:Z:2010072264129530 CS:Z:T30321120120100120220330223100133302310303131133123 AS:i:24 CQ:Z:BBBB@AAA>><>B@;9AA<:BB=@>:AB<<=@9@7'9<22>?921<:/'1 XN:i:24 NH:i:10 IH:i:2 HI:i:1 CC:Z:GL000247.1 CP:i:35405 MD:Z:25"); + mydata.add("603_1107_1233 163 GL000196.1 36008 29 75M = 36083 142 GGATCTAGAATGCTGAAGGATCTAGTGTGTTGAGGGATCTAGCATGCTGAAGGATCTAGCATGTTAAGGGATCTA BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFI X0:i:1 X1:i:0 ZC:i:5 MD:Z:8G66 PG:Z:MarkDuplicates RG:Z:2010072264129530 XG:i:0 AM:i:29 NM:i:1 SM:i:29 XM:i:1 XO:i:0 XT:A:U"); + mydata.add("603_1107_1233 83 GL000196.1 36083 29 4S67M4S = 36008 -142 TCTAGCATGTCGAGAGATCTAGCATGCTGAAGGATCTAGCATGCTGAAGGATCTAGCATGTTGAGGGTTCTAGTG FFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFBBB ZC:i:5 MD:Z:63A3 PG:Z:MarkDuplicates RG:Z:2010072264129530 XG:i:0 AM:i:29 NM:i:1 SM:i:29 XM:i:1 XO:i:0 XT:A:M"); +//?? mydata.add("603_1107_1233 87 GL000196.1 36083 29 4S67M4S = 36008 -142 TCTAGCATGTCGAGAGATCTAGCATGCTGAAGGATCTAGCATGCTGAAGGATCTAGCATGTTGAGGGTTCTAGTG FFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFBBB ZC:i:5 MD:Z:63A3 PG:Z:MarkDuplicates RG:Z:2010072264129530 XG:i:0 AM:i:29 NM:i:1 SM:i:29 XM:i:1 XO:i:0 XT:A:M"); + + mydata.add("603_1108_0001 0 GL000196.1 38525 3 37M5D13H * 0 0 AGGCTGAGGTGGGCGGATCACTTGAGGTCCAGAGTTC IIIIIIIIIIIIIIIII;?IIIB@IIIBAIIIIIIII RG:Z:2010072264129530 CS:Z:T32032122011003302321120122012012221023222003301200 AS:i:30 CQ:Z: mydata1 = new ArrayList(); + mydata1.add("603_1108_0002 73 GL000196.1 319 3 50M = 319 0 GACATATACACAACACTGTACCCAACTATACGATACATATTCTTCTCAAG !IIIIIFIIIGIIII:?IIF4CIII;7CI''''IIIIIII**IIGIIIIA" + " X0:i:1 X1:i:0 MD:Z:100 PG:Z:MarkDuplicates RG:Z:2010072264129530 XG:i:0 AM:i:0 NM:i:0 SM:i:37 XM:i:0 XO:i:0 XT:A:U"); + mydata1.add("603_1108_0002 133 GL000196.1 319 0 * = 319 0 GACATATACACAACACTGTACCCAACTATACGATACATATTCTTCTCAAG !IIIIIFIIIGIIII:?IIF4CIII;7CI''''IIIIIII**IIGIIIIA RG:Z:2010072264129530 CS:Z:T22012220200333113323133321010013112111011113333112 AS:i:34 CQ:Z:/3:8@62B-*46?-A+B;'A'<9+-/@@6.'@B4,/;@2=+@B)>/?B@A XN:i:34 HI:i:2"); + + List mydata2 = new ArrayList(); + mydata2.add("603_1108_0002 73 GL000196.1 319 3 50M = 319 0 GACATATACACAACACTGTACCCAACTATACGATACATATTCTTCTCAAG !IIIIIFIIIGIIII:?IIF4CIII;7CI''''IIIIIII**IIGIIIIA RG:Z:2010072264129530 CS:Z:T22012220200333113323133321010013112111011113333112 AS:i:34 CQ:Z:/3:8@62B-*46?-A+B;'A'<9+-/@@6.'@B4,/;@2=+@B)>/?B@A XN:i:34 NH:i:2 IH:i:2 HI:i:2 MD:Z:26T3CG18"); + mydata2.add("603_1108_0002 133 GL000196.1 319 0 * = 319 0 GACATATACACAACACTGTACCCAACTATACGATACATATTCTTCTCAAG !IIIIIFIIIGIIII:?IIF4CIII;7CI''''IIIIIII**IIGIIIIA RG:Z:2010072264129530"); + + //add invalide record since mapq is not zero for unmapped reads + + try { + + BufferedWriter writer1 = new BufferedWriter(new FileWriter(INPUT_SAM1)); + BufferedWriter writer2 = new BufferedWriter(new FileWriter(INPUT_SAM2)); + + //create SAM + for (String line : mydata){ + writer1.write(line + "\n"); + writer2.write(line + "\n"); + } + + for (String line : mydata1) + writer1.write(line + "\n"); + + for (String line : mydata2) + writer2.write(line + "\n"); + + + writer1.close(); + writer2.close(); +//debug +// System.out.println(new File(INPUT_SAM).getAbsolutePath() ); + + } catch (IOException e) { + System.err.println(e.toString() + "\n\t can't write to : " + INPUT_SAM1 + " or " + INPUT_SAM2 ); + } + + } + + +} diff --git a/qmule/test/org/qcmg/qmule/AnnotateDCCWithGFFRegionTest.java-- b/qmule/test/org/qcmg/qmule/AnnotateDCCWithGFFRegionTest.java-- new file mode 100644 index 000000000..49ffbb19d --- /dev/null +++ b/qmule/test/org/qcmg/qmule/AnnotateDCCWithGFFRegionTest.java-- @@ -0,0 +1,234 @@ +package org.qcmg.qmule; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Vector; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class AnnotateDCCWithGFFRegionTest { + + private File inputDCC1File; + private File inputRepeatGFF; + private File inputGermGFF; + private File outputFile; + private AnnotateDCCWithGFFRegions ann; + private File inputDCCQFile; + private static String FILE_SEPARATOR = System.getProperty("file.separator"); + + @Rule + public TemporaryFolder testFolder = new TemporaryFolder(); + + @Before + public void setUp() throws IOException { + inputDCC1File = createDCCFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "input.dcc1"); + inputDCCQFile = createDCCQFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "input.dccq"); + inputRepeatGFF = createRepeatGFFFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "input.gff3"); + inputGermGFF = createGermGFFFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "germ.gff3"); + outputFile = new File(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "output.dccq"); + ann = new AnnotateDCCWithGFFRegions(); + } + + @After + public void tearDown() { + inputDCC1File.delete(); + outputFile.delete(); + inputDCCQFile.delete(); + outputFile.delete(); + inputRepeatGFF.delete(); + inputGermGFF.delete(); + inputDCC1File = null; + outputFile = null; + ann = null; + } + + @Test + public void testGoodOptions() throws Exception { + String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", inputDCC1File.getAbsolutePath(), "-i", inputRepeatGFF.getAbsolutePath(), "--output", outputFile.getAbsolutePath()}; + ann.setup(args); + assertEquals(inputDCC1File.getAbsolutePath(), ann.getCmdLineInputFiles()[0]); + assertEquals(inputRepeatGFF.getAbsolutePath(), ann.getCmdLineInputFiles()[1]); + assertEquals(outputFile.getAbsolutePath(), ann.getCmdLineOutputFiles()[0]); + + //with annotation + String[] args2 = {"--log", testFolder.newFile().getAbsolutePath(), "-i", inputDCC1File.getAbsolutePath(), "-i", inputRepeatGFF.getAbsolutePath(), "--output", outputFile.getAbsolutePath(), "--annotation", "GERM"}; + ann.setup(args2); + assertEquals("GERM", ann.getAnnotation()); + } + + @Test(expected=QMuleException.class) + public void testBadOptions() throws Exception { + outputFile = testFolder.newFile(); + String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", inputDCC1File.getAbsolutePath(), "-i", inputRepeatGFF.getAbsolutePath(), "--output", outputFile.getAbsolutePath()}; + ann.setup(args); + } + + @Test + public void testEngageWithDCC1Repeat() throws Exception { + String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", inputDCC1File.getAbsolutePath(), "-i", inputRepeatGFF.getAbsolutePath(), "--output", outputFile.getAbsolutePath()}; + ann.setup(args); + int exit = ann.engage(); + assertEquals(0, exit); + assertTrue(outputFile.exists()); + + BufferedReader reader = new BufferedReader(new FileReader(outputFile)); + + String line; + int count = 0; + while ((line = reader.readLine()) != null) { + count++; + String[] values = line.split("\t"); + if (count == 2) { + assertEquals("PASS", values[23]); + } + if (count == 3) { + assertEquals("PASS;Simple_repeat::(CCCTAA)n", values[23]); + } + } + reader.close(); + } + + @Test + public void testEngageWithDCC1GERM() throws Exception { + String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", inputDCC1File.getAbsolutePath(), + "-i", inputGermGFF.getAbsolutePath(), "--output", outputFile.getAbsolutePath(), "--annotation", "GERM"}; + ann.setup(args); + int exit = ann.engage(); + assertEquals(0, exit); + assertTrue(outputFile.exists()); + assertEquals(1, ann.getOverlapCount()); + assertEquals(1, ann.getNotOverlappingCount()); + BufferedReader reader = new BufferedReader(new FileReader(outputFile)); + + String line; + int count = 0; + while ((line = reader.readLine()) != null) { + count++; + String[] values = line.split("\t"); + if (count == 2) { + assertEquals("PASS", values[23]); + } + if (count == 3) { + assertEquals("PASS;GERM", values[23]); + } + } + reader.close(); + } + + @Test + public void testEngageWithDCCQGERM() throws Exception { + String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", inputDCCQFile.getAbsolutePath(), + "-i", inputGermGFF.getAbsolutePath(), "--output", outputFile.getAbsolutePath(), "--annotation", "GERM"}; + ann.setup(args); + int exit = ann.engage(); + assertEquals(0, exit); + assertTrue(outputFile.exists()); + assertEquals(3, ann.getOverlapCount()); + assertEquals(3, ann.getNotOverlappingCount()); + BufferedReader reader = new BufferedReader(new FileReader(outputFile)); + + String line; + int count = 0; + while ((line = reader.readLine()) != null) { + count++; + String[] values = line.split("\t"); + if (count == 2 || count == 3) { + assertEquals("4", values[1]); + } + if (count == 4 || count == 5) { + assertEquals("2", values[1]); + } + if (count == 6 || count == 7) { + assertEquals("3", values[1]); + } + if (count == 2 || count == 4 || count == 6) { + assertTrue(values[37].contains("GERM")); + } + if (count == 3 || count == 5 || count == 7) { + assertFalse(values[37].contains("GERM")); + } + } + reader.close(); + } + + @Test + public void testParseDCCColumnsWithDCCQ() throws QMuleException { + Vector headers = new Vector(); + headers.add("mutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation" + + "\texpressed_allele\tquality_score\tprobability\tread_count\tis_annotated\tverification_status\tverification_platform\txref_ensembl_var_id\tnote\tND\tTD\tNNS\tconsequence_type\taa_mutation\tcds_mutation" + + "\tprotein_domain_affected\tgene_affected\ttranscript_affected\tgene_build_version\tnote_s\tgene_symbol\tAll_domains\tAll_domains_type\tAll_domains_description\tChrPosition\tQCMGflag\tFlankSeq"); + ann.parseDCCHeader(headers, "dccq"); + assertEquals(5, ann.getDCC_STRAND_INDEX()); + assertEquals(37, ann.getQCMGFLAG_COLUMN_INDEX()); + assertEquals(8, ann.getREFERENCE_ALLELE_INDEX()); + assertEquals(10, ann.getTUMOUR_ALLELE_INDEX()); + assertEquals(1, ann.getMUTATION_TYPE_INDEX()); + } + + @Test + public void testParseDCCColumnsWithDCC1() throws QMuleException { + Vector headers = new Vector(); + headers.add("analysis_id\tanalyzed_sample_id\tmutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele" + + "\tcontrol_genotype\ttumour_genotype\tmutation\texpressed_allele\tquality_score\tprobability\tread_count\tis_annotated\tverification_status\tverification_platform\txref_ensembl_var_id\tnote\tQCMGflag\tND\tTD\tNNS\tFlankSeq"); + ann.parseDCCHeader(headers, "dcc1"); + assertEquals(7, ann.getDCC_STRAND_INDEX()); + assertEquals(23, ann.getQCMGFLAG_COLUMN_INDEX()); + } + + private File createDCCFile(String fileName) throws IOException { + BufferedWriter w = new BufferedWriter(new FileWriter(new File(fileName))); + w.write("analysis_id\tanalyzed_sample_id\tmutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation\texpressed_allele\tquality_score\tprobability\tread_count\tis_annotated\tverification_status\tverification_platform\txref_ensembl_var_id\tnote\tQCMGflag\tND\tTD\tNNS\tFlankSeq\n"); + w.write("id\ttest\ttest_ind1\t2\tchr1\t85\t86\t1\t-999\t-999\t-\t-999\tT\t->T\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\tPASS\t--\t--\t--\t--\n"); + w.write("id\ttest\ttest_ind1\t2\tchr1\t10001\t10002\t1\t-999\t-999\t-\t-999\tT\t->T\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\tPASS\t--\t--\t--\t--\n"); + + w.close(); + return new File(fileName); + } + + private File createRepeatGFFFile(String fileName) throws IOException { + BufferedWriter w = new BufferedWriter(new FileWriter(new File(fileName))); + w.write("chr1\thg19.fa.out\tSimple_repeat::(CCCTAA)n\t10001\t10468\t1504\t+\t.\tID=1;Note=(CCCTAA)n;SR_length=6;\n"); + w.close(); + return new File(fileName); + } + + private File createGermGFFFile(String fileName) throws IOException { + BufferedWriter w = new BufferedWriter(new FileWriter(new File(fileName))); + w.write("chr1\thg19.fa.out\t.\t10024\t10024\t1504\t+\t.\tReferenceAllele=C;TumourAllele=-;PatientCount=10\n"); + w.write("chr1\thg19.fa.out\t.\t10021\t10022\t1504\t+\t.\tReferenceAllele=-;TumourAllele=T;PatientCount=10\n"); + w.write("chr1\thg19.fa.out\t.\t10001\t10011\t1504\t+\t.\tReferenceAllele=CTAAGTCACC;TumourAllele=-;PatientCount=10\n"); + w.write("chr1\thg19.fa.out\t.\t10001\t10002\t1504\t+\t.\tReferenceAllele=-;TumourAllele=T;PatientCount=10\n"); + w.close(); + return new File(fileName); + } + + private File createDCCQFile(String fileName) throws IOException { + BufferedWriter w = new BufferedWriter(new FileWriter(new File(fileName))); + w.write("mutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation" + + "\texpressed_allele\tquality_score\tprobability\tread_count\tis_annotated\tverification_status\tverification_platform\txref_ensembl_var_id\tnote\tND\tTD\tNNS\tconsequence_type\taa_mutation\tcds_mutation" + + "\tprotein_domain_affected\tgene_affected\ttranscript_affected\tgene_build_version\tnote_s\tgene_symbol\tAll_domains\tAll_domains_type\tAll_domains_description\tChrPosition\tQCMGflag\tFlankSeq\n"); + w.write("test_ind1\t4\tchr1\t10001\t10011\t1\t-999\t-999\tCTAAGTCACC\t-999\tCCTTCAAGATTCAACCTGAATAAATCGCT\tCTAAGTCACC>CCTTCAAGATTCAACCTGAATAAATCGCT\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t0;44;26;0;4;1;9\t0;113;76;0;12;0;21;\t--\t-888\t-888\t-888\t-888\t-888\t-888\t70\t-999\t--\t--\t--\t--\tchr1:817120-817130\tPASS;NNS\t--\n"); + w.write("test_ind1\t4\tchr1\t10001\t10010\t1\t-999\t-999\tCTAAGTCACC\t-999\tCCTTCAAGATTCAACCTGAATAAATCGCT\tCTAAGTCACC>CCTTCAAGATTCAACCTGAATAAATCGCT\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t0;44;26;0;4;1;9\t0;113;76;0;12;0;21;\t--\t-888\t-888\t-888\t-888\t-888\t-888\t70\t-999\t--\t--\t--\t--\tchr1:817120-817130\tPASS;NNS\t--\n"); + w.write("test_ind1\t2\tchr1\t10021\t10022\t1\t-999\t-999\t-\t-999\tT\t->T\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t0;44;26;0;4;1;9\t0;113;76;0;12;0;21;\t--\t-888\t-888\t-888\t-888\t-888\t-888\t70\t-999\t--\t--\t--\t--\tchr1:817120-817130\tPASS;NNS\t--\n"); + w.write("test_ind1\t2\tchr1\t10021\t10022\t1\t-999\t-999\t-\t-999\tC\t->C\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t0;44;26;0;4;1;9\t0;113;76;0;12;0;21;\t--\t-888\t-888\t-888\t-888\t-888\t-888\t70\t-999\t--\t--\t--\t--\tchr1:817120-817130\tPASS;NNS\t--\n"); + w.write("test_ind1\t3\tchr1\t10024\t10024\t1\t-999\t-999\tC\t-999\t-\tC>-\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t0;44;26;0;4;1;9\t0;113;76;0;12;0;21;\t--\t-888\t-888\t-888\t-888\t-888\t-888\t70\t-999\t--\t--\t--\t--\tchr1:817120-817130\tPASS;NNS\t--\n"); + w.write("test_ind1\t3\tchr1\t10024\t10024\t1\t-999\t-999\tG\t-999\t-\tG>-\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t0;44;26;0;4;1;9\t0;113;76;0;12;0;21;\t--\t-888\t-888\t-888\t-888\t-888\t-888\t70\t-999\t--\t--\t--\t--\tchr1:817120-817130\tPASS;NNS\t--\n"); + + w.close(); + return new File(fileName); + } + + +} diff --git a/qmule/test/org/qcmg/qmule/BamCompressTest.java-- b/qmule/test/org/qcmg/qmule/BamCompressTest.java-- new file mode 100644 index 000000000..2011ee61d --- /dev/null +++ b/qmule/test/org/qcmg/qmule/BamCompressTest.java-- @@ -0,0 +1,95 @@ +package org.qcmg.qmule; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; + + +import java.util.ArrayList; +import java.util.List; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMRecord; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.picard.SAMOrBAMWriterFactory; + + +public class BamCompressTest { + public static final String INPUT_SAM = "./input.sam"; + public static final String OUTPUT_BAM = "./output.bam"; + + @After + public void deleteFiles(){ + File in = new File(INPUT_SAM); + File out = new File(OUTPUT_BAM); + + in.delete(); + out.delete(); + + + } + + @Before + public void before(){ + CreateBAM(INPUT_SAM); + + } + + @Test + public void mainTest() throws Exception{ + final String[] args1 = { "-i", INPUT_SAM, "-o", OUTPUT_BAM, "--compressLevel", "1" }; + final String[] args2 = { "-i", INPUT_SAM, "-o", OUTPUT_BAM, "--compressLevel", "9" }; + + + BAMCompress.main(args1); + BAMCompress.main(args2); + + } + + + public static void CreateBAM(String INPUT_SAM ){ + List mydata = new ArrayList(); + + //common + mydata.add("@HD VN:1.0"); + mydata.add("@SQ SN:GL000196.1 LN:38914"); + + mydata.add("@RG ID:2010072264129530 LB:Library_20100413_C DS:RUNTYPE{50F} SM:S0414_20100607_2_FragBC_bcSample1_F3_bcA10_05"); + mydata.add("@PG ID:2010072264129500 PN:MANUAL"); + mydata.add("1035_217_1202 272 GL000196.1 319 3 50M * 0 0 GACATATACACAACACTGTACCCAACTATACGATACATATTCTTCTCAAG !IIIIIFIIIGIIII:?IIF4CIII;7CI''''IIIIIII**IIGIIIIA RG:Z:2010072264129530 CS:Z:T22012220200333113323133321010013112111011113333112 AS:i:34 CQ:Z:/3:8@62B-*46?-A+B;'A'<9+-/@@6.'@B4,/;@2=+@B)>/?B@A XN:i:34 NH:i:2 IH:i:2 HI:i:2 MD:Z:26T3CG18"); + mydata.add("603_1107_1232 0 GL000196.1 480 1 25M25H * 0 0 AATCACTTGAACCCAGGAGGCGGAG IIIIIIIIIIIIIIIIIIIIIIII: RG:Z:2010072264129530 CS:Z:T30321120120100120220330223100133302310303131133123 AS:i:24 CQ:Z:BBBB@AAA>><>B@;9AA<:BB=@>:AB<<=@9@7'9<22>?921<:/'1 XN:i:24 NH:i:10 IH:i:2 HI:i:1 CC:Z:GL000247.1 CP:i:35405 MD:Z:25"); + mydata.add("828_1019_1921 0 GL000196.1 38525 3 37M5D13H * 0 0 AGGCTGAGGTGGGCGGATCACTTGAGGTCCAGAGTTC IIIIIIIIIIIIIIIII;?IIIB@IIIBAIIIIIIII RG:Z:2010072264129530 CS:Z:T32032122011003302321120122012012221023222003301200 AS:i:30 CQ:Z: normaldata = new ArrayList(); + + //common + normaldata.add("@HD VN:1.0"); + normaldata.add("@SQ SN:GL000196.1 LN:38914"); + normaldata.add("@RG ID:2010072264129530 LB:Library_20100413_C DS:RUNTYPE{50F} SM:S0414_20100607_2_FragBC_bcSample1_F3_bcA10_05"); + normaldata.add("@PG ID:2010072264129500 PN:MANUAL"); + normaldata.add("1035_217_1202 272 GL000196.1 319 3 50M * 0 0 GACATATACACAACACTGTACCCAACTATACGATACATATTCTTCTCAAG !IIIIIFIIIGIIII:?IIF4CIII;7CI''''IIIIIII**IIGIIIIA RG:Z:2010072264129530 CS:Z:T22012220200333113323133321010013112111011113333112 AS:i:34 CQ:Z:/3:8@62B-*46?-A+B;'A'<9+-/@@6.'@B4,/;@2=+@B)>/?B@A XN:i:34 NH:i:2 IH:i:2 HI:i:2 MD:Z:26T3CG18"); + normaldata.add("603_1107_1232 0 GL000196.1 480 1 25M25H * 0 0 AATCACTTGAACCCAGGAGGCGGAG IIIIIIIIIIIIIIIIIIIIIIII: RG:Z:2010072264129530 CS:Z:T30321120120100120220330223100133302310303131133123 AS:i:24 CQ:Z:BBBB@AAA>><>B@;9AA<:BB=@>:AB<<=@9@7'9<22>?921<:/'1 XN:i:24 NH:i:10 IH:i:2 HI:i:1 CC:Z:GL000247.1 CP:i:35405 MD:Z:25"); + normaldata.add("828_1019_1921 0 GL000196.1 38525 3 37M5D13H * 0 0 AGGCTGAGGTGGGCGGATCACTTGAGGTCCAGAGTTC IIIIIIIIIIIIIIIII;?IIIB@IIIBAIIIIIIII RG:Z:2010072264129530 CS:Z:T32032122011003302321120122012012221023222003301200 AS:i:30 CQ:Z: list = new ArrayList(); + list.add(r1); + list.add(r2); + + Collections.sort(list, new GFF3RecordChromosomeAndPositionComparator()); + + Assert.assertEquals(r2, list.get(0)); + } + +} diff --git a/qmule/test/org/qcmg/qmule/CompareReferenceRegionsTest.java-- b/qmule/test/org/qcmg/qmule/CompareReferenceRegionsTest.java-- new file mode 100644 index 000000000..2b80dae4c --- /dev/null +++ b/qmule/test/org/qcmg/qmule/CompareReferenceRegionsTest.java-- @@ -0,0 +1,162 @@ +package org.qcmg.qmule; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class CompareReferenceRegionsTest { + + File fileA; + File fileB; + File fileC; + String output; + private static String FILE_SEPARATOR = System.getProperty("file.separator"); + + @Rule + public TemporaryFolder testFolder = new TemporaryFolder(); + + @Before + public void setUp() throws IOException { + fileA = createFileA(); + fileB = createFileB(); + fileC = createFileC(); + } + + @After + public void tearDown() { + fileA.delete(); + fileB.delete(); + fileC.delete(); + } + + @Test + public void testOneway() throws Exception { + CompareReferenceRegions c = new CompareReferenceRegions(); + output = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "output.gff3"; + String outputB = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "outputB.gff3"; + String[] args = {"--log", testFolder.newFile("test.log").getAbsolutePath(), "--mode", "oneway", "--input", fileA.getAbsolutePath(), "--input", fileB.getAbsolutePath(), "--output", output, "--output", outputB,}; + c.setup(args); + + assertOutputFile(output, 100, 110, 2); + assertOutputFile(outputB, 90, 90, 1); + } + + @Test + public void testAnnotate() throws Exception { + CompareReferenceRegions c = new CompareReferenceRegions(); + output = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "output.gff3"; + String outputB = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "outputB.gff3"; + String[] args = {"--log", testFolder.newFile("test.log").getAbsolutePath(), "--mode", "annotate", "--input", fileA.getAbsolutePath(), "--input", fileB.getAbsolutePath(), "--output", output, "--output", outputB, "--column", "9", "--annotation", "ANNOTATION"}; + c.setup(args); + BufferedReader reader = new BufferedReader(new FileReader(new File(output))); + String line; + int count = 0; + while ((line = reader.readLine()) != null) { + count++; + String[] values = line.split("\t"); + if (count == 1) { + assertEquals("90", values[3]); + assertEquals("90", values[4]); + assertFalse(values[8].contains("ANNOTATION")); + } + if (count == 2) { + assertEquals("100", values[3]); + assertEquals("110", values[4]); + assertTrue(values[8].contains("ANNOTATION")); + } + if (count == 3) { + assertEquals("200", values[3]); + assertEquals("210", values[4]); + assertTrue(values[8].contains("ANNOTATION")); + } + } + reader.close(); + } + + @Test + public void testIntersect() throws Exception { + CompareReferenceRegions c = new CompareReferenceRegions(); + output = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "output.gff3"; + String[] args = {"--log", testFolder.newFile("test.log").getAbsolutePath(), "--mode", "intersect", "--input", fileA.getAbsolutePath(), "--input", fileB.getAbsolutePath(), "--input", fileC.getAbsolutePath(), "--output", output}; + c.setup(args); + + assertOutputFile(output, 190, 220, 1); + } + + private void assertOutputFile(String file, int start, int end, int count) throws IOException { + assertTrue(new File(file).exists()); + + BufferedReader reader = new BufferedReader(new FileReader(new File(file))); + String line = reader.readLine(); + assertNotNull(line); + String[] values = line.split("\t"); + assertEquals(Integer.toString(start), values[3]); + assertEquals(Integer.toString(end), values[4]); + reader.close(); + } + + @Test + public void testUnique() throws Exception { + CompareReferenceRegions c = new CompareReferenceRegions(); + output = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "outputA.gff3"; + String outputB = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "outputB.gff3"; + String outputC = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "outputC.gff3"; + String[] args = {"--log", testFolder.newFile("test.log").getAbsolutePath(), "--mode", "unique", "--input", fileA.getAbsolutePath(), "--input", fileB.getAbsolutePath(), "--input", fileC.getAbsolutePath(), "--output", output, + "--output", outputB, "--output", outputC + }; + c.setup(args); + assertOutputFile(output, 90, 90, 1); + assertOutputFile(outputB, 80, 80, 1); + assertOutputFile(outputC, 50, 55, 1); + } + + + private File createFileA() throws IOException { + File f = new File(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "fileA.gff3"); + BufferedWriter writer = new BufferedWriter(new FileWriter(f)); + + writer.write("chr1\ttest\t0\t100\t110\t1.92\t0\t0\tName=Test\n");//overlap with 2 + writer.write("chr1\ttest\t0\t90\t90\t1.92\t0\t0\tName=Test\n");//unique + writer.write("chr1\ttest\t0\t200\t210\t1.92\t0\t0\tName=Test\n");//overlap with 2 and 3 + writer.close(); + return f; + } + + private File createFileB() throws IOException { + File f = new File(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "fileB.gff3"); + BufferedWriter writer = new BufferedWriter(new FileWriter(f)); + + writer.write("chr1\ttest\t0\t100\t105\t1.92\t0\t0\tName=Test\n");//overlap with 1 + writer.write("chr1\ttest\t0\t80\t80\t1.92\t0\t0\tName=Test\n");//unique + writer.write("chr1\ttest\t0\t190\t210\t1.92\t0\t0\tName=Test\n");//overlap with 2 and 3 + writer.close(); + return f; + } + + private File createFileC() throws IOException { + File f = new File(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "fileC.gff3"); + BufferedWriter writer = new BufferedWriter(new FileWriter(f)); + + writer.write("chr1\ttest\t0\t50\t55\t1.92\t0\t0\tName=Test\n");//unique + writer.write("chr1\ttest\t0\t70\t70\t1.92\t0\t0\tName=Test\n");//unique + writer.write("chr1\ttest\t0\t200\t220\t1.92\t0\t0\tName=Test\n");//overlap with 2 and 3 + writer.close(); + return f; + } + + +} diff --git a/qmule/test/org/qcmg/qmule/DccToMafTest.java-- b/qmule/test/org/qcmg/qmule/DccToMafTest.java-- new file mode 100644 index 000000000..516a67a6e --- /dev/null +++ b/qmule/test/org/qcmg/qmule/DccToMafTest.java-- @@ -0,0 +1,167 @@ +package org.qcmg.qmule; + +import java.util.HashMap; +import java.util.Map; + +import junit.framework.Assert; + +import org.junit.Ignore; +import org.junit.Test; +import org.qcmg.common.dcc.DccConsequence; +import org.qcmg.common.string.StringUtils; + +public class DccToMafTest { + + + @Ignore + public void testRealLifeExample1() { + // want to test the following dcc record + /** + * APGI_2193_SNP_42944 1 9 21815432 21815432 1 -888 -888 A A/A A/G A>G + * -999 -999 30 2 2 -888 -999 -999 A:43[39.7],1[40] + * A:25[39.15],0[0],G:5[40],0[0] + * NON_SYNONYMOUS_CODING--SPLICE_SITE,NON_SYNONYMOUS_CODING--SPLICE_SITE,5PRIME_UTR--SPLICE_SITE, + * NON_SYNONYMOUS_CODING--SPLICE_SITE,NON_SYNONYMOUS_CODING--SPLICE_SITE,DOWNSTREAM,UPSTREAM + * I12V,I12V,-888,I12V,I29V,-888,-888 147A>G,147A>G,128A>G,126A>G,85A>G,-888,-888 -888,-888,-888,-888,-888,-888,-888 + * ENSG00000099810|ENSG00000233326|ENSG00000229298 + * ENST00000404796,ENST00000380172,ENST00000355696,ENST00000419385,ENST00000443256|ENST00000427788|ENST00000447235 + * 55 -999 CDKN2BAS|-888|-888 -888,TIGR01694,-888,-888,TIGR01694|-888|-888 + * -888,Tigrfam,-888,-888,Tigrfam|-888|-888 + * -888,MeThioAdo_phosphorylase,-888,-888,MeThioAdo_phosphorylase|-888|-888 A/G chr9:21815432-21815432 -- + */ + Map canonicalMap = new HashMap(); + canonicalMap.put("ENSG00000099810", "ENST00000404796"); + canonicalMap.put("ENSG00000233326", "ENST00000427788"); + canonicalMap.put("ENSG00000229298", "ENST00000447235"); + + String geneString = "ENSG00000099810|ENSG00000233326|ENSG00000229298"; + String [] genes = geneString.split("\\|"); + Assert.assertEquals(3, genes.length); + + String transcriptsString = "ENST00000404796,ENST00000380172,ENST00000355696,ENST00000419385,ENST00000443256|ENST00000427788|ENST00000447235"; + String [] transcriptIds = transcriptsString.split("\\|"); + Assert.assertEquals(3, transcriptIds.length); + + String[] consequenceResults = new String[] {"Splice_Site" , "3'Flank", "5'Flank"}; + + String consequencesString = "NON_SYNONYMOUS_CODING--SPLICE_SITE,NON_SYNONYMOUS_CODING--SPLICE_SITE,5PRIME_UTR--SPLICE_SITE,NON_SYNONYMOUS_CODING--SPLICE_SITE,NON_SYNONYMOUS_CODING--SPLICE_SITE,DOWNSTREAM,UPSTREAM"; + + testInputs(canonicalMap, genes, transcriptIds, consequenceResults, consequencesString); + + } + + @Ignore + public void testRealLifeExample2() { + // want to test the following dcc record + /** + * APGI_2158_SNP_61733 1 13 25068851 25068851 1 -888 -888 G G/G A/G G>A + * -999 -999 76 2 2 -888 -999 -999 G:7[40],53[38.71] G:10[39.9],58[37.33],A:8[40],0[0] + * WITHIN_NON_CODING_GENE,STOP_GAINED -888,Q201* -888,707G>A -888,-888 + * ENSG00000205822|ENSG00000102699 ENST00000445572|ENST00000381989 55 + * -999 -888|PARP4 -888|-888 -888|-888 -888|-888 G/A chr13:25068851-25068851 ��� + */ + Map canonicalMap = new HashMap(); + canonicalMap.put("ENSG00000205822", "noMatch"); + canonicalMap.put("ENSG00000102699", "ENST00000381989"); + + String geneString = "ENSG00000205822|ENSG00000102699"; + String [] genes = geneString.split("\\|"); + Assert.assertEquals(2, genes.length); + + String transcriptsString = "ENST00000445572|ENST00000381989"; + String [] transcriptIds = transcriptsString.split("\\|"); + Assert.assertEquals(2, transcriptIds.length); + + String[] consequenceResults = new String[] {null, "Nonsense_Mutation"}; + + String consequencesString = "WITHIN_NON_CODING_GENE,STOP_GAINED"; + + testInputs(canonicalMap, genes, transcriptIds, consequenceResults, consequencesString); + + } + + @Test + public void testRealLifeExample3() { + // v70 Ensembl + // AOCS exome solid data + // want to test the following dccq record + /** + * AOCS_066_SNP_3124 1 1 115256530 115256530 1 G/T -1 G + * G/G G/T G>T -999 -999 1.2420510993064712E-22 110 1 2 -888 + * rs121913254 -999 G:25[34.12],67[36.06] G:10[33.2],31[33.35],T:16[39.62],53[38.58] 44 + * missense_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant + * Q61K;Q61K;Q61K;Q61K;Q61K;Q61K;Q61K;Q61K;Q61K,-888,-888,-888,-888,-888,-888,-888,-888 + * 435G>T;435G>T;435G>T;435G>T;435G>T;435G>T;435G>T;435G>T;435G>T,-888,-888,-888,-888,-888,-888,-888,-888 + * PF00071;PF08477;PF00025;PF00009;TIGR00231;PR00449;SM00173;SM00175;SM00174 + * ENSG00000213281,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307 + * ENST00000369535,ENST00000339438,ENST00000438362,ENST00000358528,ENST00000261443,ENST00000530886,ENST00000369530,ENST00000483407,ENST00000534699 + * 70 -999 NRAS,CSDE1,CSDE1,CSDE1,CSDE1,CSDE1,CSDE1,CSDE1,CSDE1 PF00071;PF08477;PF00025;PF00009;TIGR00231;PR00449;SM00173;SM00175;SM00174 + * pfam;pfam;pfam;pfam;tigrfam;prints;smart;smart;smart Small_GTPase;MIRO-like;Small_GTPase_ARF/SAR;EF_GTP-bd_dom;Small_GTP-bd_dom;Small_GTPase;Small_GTPase_Ras;Small_GTPase_Rab_type;Small_GTPase_Rho + * chr1:115256530-115256530 PASS TTCTTTTCCAG + */ + Map canonicalMap = new HashMap(); + canonicalMap.put("ENSG00000205822", "noMatch"); + canonicalMap.put("ENSG00000102699", "ENST00000381989"); + + String geneString = "ENSG00000213281,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307"; + String [] genes = geneString.split(","); + Assert.assertEquals(9, genes.length); + + String transcriptsString = "ENST00000369535,ENST00000339438,ENST00000438362,ENST00000358528,ENST00000261443,ENST00000530886,ENST00000369530,ENST00000483407,ENST00000534699"; + String [] transcriptIds = transcriptsString.split(","); + Assert.assertEquals(9, transcriptIds.length); + + String[] consequenceResults = new String[] {"Nonsense_Mutation", "3'Flank"}; + + String consequencesString = "missense_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant"; + + testInputs(canonicalMap, genes, transcriptIds, consequenceResults, consequencesString); + + } + + private void testInputs(Map canonicalMap, String[] genes, + String[] transcriptIds, String[] consequenceResults, + String consequencesString) { + int i = 0, allTranscriptIdCount = 0; + for (String gene : genes) { + String[] geneSpecificTranscriptIds = transcriptIds[i].split(","); + + String canonicalTranscripId = canonicalMap.get(gene); + + if (null != canonicalTranscripId) { + int positionInTranscripts = StringUtils.getPositionOfStringInArray(geneSpecificTranscriptIds, canonicalTranscripId, true); + String [] consequences = consequencesString.split(","); + if (positionInTranscripts > -1) { + // we have a matching canonical transcript + positionInTranscripts += allTranscriptIdCount; + + if (consequences.length > positionInTranscripts) { + Assert.assertEquals(consequenceResults[i], DccConsequence.getMafName(consequences[positionInTranscripts], org.qcmg.common.dcc.MutationType.SNP, -1)); +// maf.setVariantClassification(DccConsequence.getMafName(params[22], type, Integer.parseInt(params[1]))); + } else { + Assert.fail("consequences.length is <= positionInTranscripts"); + } + } + // update transcript count + allTranscriptIdCount += geneSpecificTranscriptIds.length; + + } else { + // still want to keep the transcript count up to date + allTranscriptIdCount += geneSpecificTranscriptIds.length; +// maf.setVariantClassification(DccConsequence.getMafName(params[22], type, Integer.parseInt(params[1]))); + } + + i++; + } + } + + + @Test + public void testMultipleDelimiters() { + String inputString = "ENST00000438000,ENST00000428930,ENST00000447407,ENST00000419503|ENST00000439302,ENST00000437865,ENST00000422716,ENST00000435585,ENST00000456937|ENST00000416712,ENST00000429121,ENST00000427309"; + + String [] params = inputString.split("[,|]"); + Assert.assertEquals(12, params.length); + Assert.assertEquals("ENST00000427309", params[11]); + } +} diff --git a/qmule/test/org/qcmg/qmule/IndelDCCHeaderTest.java-- b/qmule/test/org/qcmg/qmule/IndelDCCHeaderTest.java-- new file mode 100644 index 000000000..4ba0cf6ad --- /dev/null +++ b/qmule/test/org/qcmg/qmule/IndelDCCHeaderTest.java-- @@ -0,0 +1,222 @@ +package org.qcmg.qmule; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class IndelDCCHeaderTest { + + File tumourBam; + File normalBam; + File somaticFile; + File somaticOutputFile; + File germlineFile; + File germlineOutputFile; + IndelDCCHeader id; + private static String FILE_SEPARATOR = System.getProperty("file.separator"); + + @Rule + public TemporaryFolder testFolder = new TemporaryFolder(); + + @Before + public void setUp() throws IOException { + tumourBam = createBamFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "tumor.bam", "tumourId"); + normalBam = createBamFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "normal.bam", "normalId"); + somaticFile = createDCCFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "input.dcc1", 13, "3d9d495c-94f7-46a4-9301-7dcbad7285d1"); + somaticOutputFile = new File(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "output.dcc1"); + germlineFile = createDCCFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "germ.input.dcc1", 13, "2d9d495c-94f7-46a4-9301-7dcbad7285d1"); + germlineOutputFile = new File(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "germ.output.dcc1"); + id = new IndelDCCHeader(); + } + + + @After + public void tearDown() { + tumourBam.delete(); + normalBam.delete(); + germlineFile.delete(); + germlineOutputFile.delete(); + somaticOutputFile.delete(); + somaticFile.delete(); + tumourBam = null; + normalBam = null; + germlineFile = null; + germlineOutputFile = null; + somaticFile = null; + somaticOutputFile = null; + id = null; + } + + @Test + public void testGoodOptions() throws Exception { + IndelDCCHeader id = new IndelDCCHeader(); + String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", somaticFile.getAbsolutePath(), "-i", germlineFile.getAbsolutePath(), "--tumour", tumourBam.getAbsolutePath(), "--normal", normalBam.getAbsolutePath(), "--output", somaticOutputFile.getAbsolutePath(), "--output", germlineOutputFile.getAbsolutePath(), "--mode", "gatk"}; + id.setup(args); + assertEquals(tumourBam.getAbsolutePath(), id.getTumourBam().getAbsolutePath()); + assertEquals(normalBam.getAbsolutePath(), id.getNormalBam().getAbsolutePath()); + assertEquals(somaticFile.getAbsolutePath(), id.getSomaticFile().getAbsolutePath()); + assertEquals(germlineFile.getAbsolutePath(), id.getGermlineFile().getAbsolutePath()); + assertEquals(somaticOutputFile.getAbsolutePath(), id.getSomaticOutputFile().getAbsolutePath()); + assertEquals(germlineOutputFile.getAbsolutePath(), id.getGermlineOutputFile().getAbsolutePath()); + assertEquals("gatk", id.getMode()); + } + + @Test + public void testAnnotate() throws Exception { + + String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", somaticFile.getAbsolutePath(), "-i", germlineFile.getAbsolutePath(), "--tumour", tumourBam.getAbsolutePath(), "--normal", normalBam.getAbsolutePath(), "--output", somaticOutputFile.getAbsolutePath(), "--output", germlineOutputFile.getAbsolutePath(), "--mode", "gatk"}; + + id.setup(args); + assertFalse(somaticOutputFile.exists()); + assertFalse(germlineOutputFile.exists()); + id.annotate(); + assertTrue(somaticOutputFile.exists()); + assertTrue(germlineOutputFile.exists()); + + assertAnnotationCorrect(somaticOutputFile, "tumourId"); + assertAnnotationCorrect(germlineOutputFile, "normalId"); + + } + + private void assertAnnotationCorrect(File outputFile, String sampleId) throws IOException { + BufferedReader reader = new BufferedReader(new FileReader(outputFile)); + + String line; + int count = 0; + while ((line = reader.readLine()) != null) { + count++; + if (count == 1 || count == 15) { + assertTrue(line.contains("3d9d495c_94f7_46a4_9301_7dcbad7285d1")); + } + if (count == 52) { + String[] results = line.split("\t"); + assertEquals("3d9d495c_94f7_46a4_9301_7dcbad7285d1", results[0]); + assertEquals(sampleId, results[1]); + assertEquals("3d9d495c_94f7_46a4_9301_7dcbad7285d1_" + sampleId + "_ind1", results[2]); + } + if (count == 15) { + assertTrue(line.startsWith("#Q_DCCMETA")); + } + if (count == 25) { + assertTrue(line.startsWith("#Q_LIMSMETA_TEST")); + } + if (count == 38) { + assertTrue(line.startsWith("#Q_LIMSMETA_CONTROL")); + } + + } + reader.close(); + + } + + + @Test + public void testIsCorrectUuidFormat() { + assertTrue(id.isCorrectUuidFormat("3d9d495c_94f7_46a4_9301_7dcbad7285d1")); + assertFalse(id.isCorrectUuidFormat("3d9d495c-94f7_46a4_9301_7dcbad7285d1")); + assertFalse(id.isCorrectUuidFormat("3d9d495c_94f7_46a4_9301_7dcbad7285d")); + } + + @Test + public void testReplaceAnalysisIdInLine() { + String uuid = "3d9d495c_94f7_46a4_9301_7dcbad7285d1"; + String tumour = "tumourId_added"; + String normal = "normalId_added"; + id.setUuid(uuid); + id.setTumourSampleId(tumour); + id.setNormalSampleId(normal); + String line = "id\tsecond\tthird_ind1"; + String[] results = id.replaceIdsInLine(line, false).split("\t"); + assertEquals(uuid, results[0]); + assertEquals(tumour, results[1]); + assertEquals(uuid + "_" + tumour + "_ind1" , results[2]); + + results = id.replaceIdsInLine(line, true).split("\t"); + assertEquals(uuid, results[0]); + assertEquals(normal, results[1]); + assertEquals(uuid + "_" + normal + "_ind1" , results[2]); + } + + @Test + public void testCheckForUUid() throws Exception { + String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", somaticFile.getAbsolutePath(), "-i", germlineFile.getAbsolutePath(), "--tumour", tumourBam.getAbsolutePath(), "--normal", normalBam.getAbsolutePath(), "--output", somaticOutputFile.getAbsolutePath(), "--output", germlineOutputFile.getAbsolutePath(), "--mode", "gatk"}; + id.setup(args); + assertFalse(id.isCompleteHeaderPresent()); + assertFalse(id.isQexecPresent()); + assertEquals(0, id.getQexec().size()); + id.checkForUUid(); + assertFalse(id.isCompleteHeaderPresent()); + assertEquals(14, id.getQexec().size()); + assertTrue(id.isQexecPresent()); + } + + @Test(expected=QMuleException.class) + public void testCheckForUUidThrowsException() throws Exception { + somaticFile = createDCCFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "input.dcc1", 12, "3d9d495c-94f7-46a4-9301-7dcbad7285d1"); + String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", somaticFile.getAbsolutePath(), "-i", germlineFile.getAbsolutePath(), "--tumour", tumourBam.getAbsolutePath(), "--normal", normalBam.getAbsolutePath(), "--output", somaticOutputFile.getAbsolutePath(), "--output", germlineOutputFile.getAbsolutePath(), "--mode", "gatk"}; + + id.setup(args); + assertFalse(id.isCompleteHeaderPresent()); + assertFalse(id.isQexecPresent()); + assertEquals(0, id.getQexec().size()); + id.checkForUUid(); + assertFalse(id.isCompleteHeaderPresent()); + assertEquals(14, id.getQexec().size()); + assertTrue(id.isQexecPresent()); + } + + private File createDCCFile(String fileName, int qexecLength, String uuid) throws IOException { + BufferedWriter w = new BufferedWriter(new FileWriter(new File(fileName))); + w.write("#Q_EXEC Uuid "+uuid +"\n"); + for (int i=1; i<=qexecLength; i++) { + w.write("#Q_EXEC\n"); + } + w.write("analysis_id\tanalyzed_sample_id\tmutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation\texpressed_allele\tquality_score\tprobability\tread_count\tis_annotated\tverification_status\tverification_platform\txref_ensembl_var_id\tnote\tQCMGflag\tND\tTD\tNNS\tFlankSeq\n"); + w.write("id\ttest\ttest_ind1\t2\tchr1\t85\t86\t1\t-999\t-999\t-\t-999\tT\t->T\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\tPASS\t--\t--\t--\t--\n"); + w.close(); + return new File(fileName); + } + + private File createBamFile(String fileName, String sampleID) throws IOException { + final List data = new ArrayList(); + data.add("@HD VN:1.0 GO:none SO:coordinate"); + data.add("@SQ SN:chr1 LN:249250621 "); + data.add("@SQ SN:chr4 LN:191154276 "); + data.add("@SQ SN:chr7 LN:159138663 "); + data.add("@SQ SN:chrX LN:155270560 "); + data.add("@SQ SN:chrY LN:59373566 "); + data.add("@SQ SN:chr19 LN:59128983 "); + data.add("@SQ SN:GL000191.1 LN:106433 "); + data.add("@SQ SN:GL000211.1 LN:166566 "); + data.add("@SQ SN:chrMT LN:16569 "); + data.add("@RG ID:20120817075934728 PL:ILLUMINA PU:lane_7 LB:Library_20120726_B zc:6:/mnt/seq_results/icgc_pancreatic/APGI_1992/seq_mapped/120804_SN7001240_0063_AC0VM1ACXX.lane_7.nobc.bam SM:Colo-829"); + data.add("@CO CN:QCMG QN:qlimsmeta Aligner=bwa Capture Kit=NoCapture Donor=test Failed QC=0 Library Protocol=Illumina TruSEQ Multiplexed Manual Material=1:DNA Project=test_project Reference Genome File=/panfs/share/genomes/GRCh37_ICGC_standard_v2/GRCh37_ICGC_standard_v2.fa Sample="+sampleID+" Sample Code=4:Normal control (other site) Sequencing Platform=HiSeq Species Reference Genome=Homo sapiens (GRCh37_ICGC_standard_v2)"); + + BufferedWriter out; + out = new BufferedWriter(new FileWriter(fileName)); + for (final String line : data) { + out.write(line + "\n"); + } + out.close(); + return new File(fileName); + } + + + + +} diff --git a/qmule/test/org/qcmg/qmule/MAF2DCC1Test.java b/qmule/test/org/qcmg/qmule/MAF2DCC1Test.java index 00cda5c33..db9d7d1ef 100644 --- a/qmule/test/org/qcmg/qmule/MAF2DCC1Test.java +++ b/qmule/test/org/qcmg/qmule/MAF2DCC1Test.java @@ -21,7 +21,7 @@ import org.qcmg.common.model.ChrPosition; import org.qcmg.common.model.ChrPositionName; import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qmule.tab.TabbedRecord; public class MAF2DCC1Test { diff --git a/qmule/test/org/qcmg/qmule/MAF2DCC1Test.java-- b/qmule/test/org/qcmg/qmule/MAF2DCC1Test.java-- new file mode 100644 index 000000000..db9d7d1ef --- /dev/null +++ b/qmule/test/org/qcmg/qmule/MAF2DCC1Test.java-- @@ -0,0 +1,315 @@ +package org.qcmg.qmule; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.ChrPositionName; +import org.qcmg.common.model.ChrRangePosition; +import org.qcmg.qmule.tab.TabbedRecord; + +public class MAF2DCC1Test { + + private File snpMafFile; + private File snpDccFile; + private File indelMafFile; + private File indelDccFile; + private File outputFile; + private MAF2DCC1 test; + private final String DCCHEADER = "analysis_id analyzed_sample_id mutation_id mutation_type chromosome chromosome_start chromosome_end chromosome_strand refsnp_allele refsnp_strand reference_genome_allele control_genotype tumour_genotype mutation expressed_allele quality_score probability read_count is_annotated validation_status validation_platform xref_ensembl_var_id note QCMGflag ND TD NNS FlankSeq"; + private final String MAFHEADER = "Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer QCMG_Flag ND TD Canonical_Transcript_Id Canonical_AA_Change Canonical_Base_Change Alternate_Transcript_Id Alternate_AA_Change Alternate_Base_Change Confidence CPG Gff3_Bait Novel_Starts"; + + private static String FILE_SEPARATOR = System.getProperty("file.separator"); + + @Rule + public TemporaryFolder testFolder = new TemporaryFolder(); + + @Before + public void setUp() throws Exception { + snpMafFile = createMafFile("snp", testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "snp.maf"); + indelMafFile = createMafFile("indel", testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "indel.maf"); + snpDccFile = createDccFile("snp", testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "snp.dcc1"); + indelDccFile = createDccFile("indel", testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "indel.dcc1"); + outputFile = new File(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "output.dcc1"); + String[] args = {"-i", indelMafFile.getAbsolutePath(), "-i", indelDccFile.getAbsolutePath(), "-o", outputFile.getAbsolutePath(), "-mode", "indel", "-log", testFolder.newFile().getAbsolutePath()}; + + test = new MAF2DCC1(); + test.setup(args); + } + + @After + public void tearDown() { + snpDccFile.delete(); + snpMafFile.delete(); + indelMafFile.delete(); + indelDccFile.delete(); + outputFile.delete(); + snpDccFile = null; + snpMafFile = null; + indelDccFile = null; + indelMafFile = null; + } + + @Test + public void testRunSnpMode() throws Exception { + String[] args = {"-i", snpMafFile.getAbsolutePath(), "-i", snpDccFile.getAbsolutePath(), "-o", outputFile.getAbsolutePath(), "-mode", "snp", "-log", testFolder.newFile().getAbsolutePath()}; + + MAF2DCC1 test = new MAF2DCC1(); + test.setup(args); + assertEquals(0, test.annotate()); + assertTrue(outputFile.exists()); + assertTrue(outputFile.length() > 0); + assertEquals(1, test.getInputMafRecordCount()); + } + + @Test + public void testRunIndelMode() throws Exception { + + assertEquals(0, test.annotate()); + assertTrue(outputFile.exists()); + assertTrue(outputFile.length() > 0); + assertEquals(2, test.getInputMafRecordCount()); + } + + @Test + public void testGoodOptions() throws Exception { + String log = testFolder.newFile().getAbsolutePath(); + String[] args = {"-i", indelMafFile.getAbsolutePath(), "-i", indelDccFile.getAbsolutePath(), "-o", outputFile.getAbsolutePath(), "-mode", "indel", "-log", log}; + MAF2DCC1 test = new MAF2DCC1(); + test.setup(args); + assertEquals(indelMafFile, test.getMafFile()); + assertEquals(1, test.getDccFiles().size()); + assertEquals(indelDccFile, test.getDccFiles().get(0)); + assertEquals(outputFile, test.getOutputDccFile()); + assertEquals("indel", test.getMode()); + assertEquals(log, test.getLogFile()); + } + + @Test(expected=QMuleException.class) + public void testOutputFileExistsThrowsException() throws Exception { + String log = testFolder.newFile().getAbsolutePath(); + outputFile = testFolder.newFile("test"); + String[] args = {"-i", indelMafFile.getAbsolutePath(), "-i", indelDccFile.getAbsolutePath(), "-o", outputFile.getAbsolutePath(), "-mode", "indel", "-log", log}; + assertTrue(outputFile.exists()); + MAF2DCC1 test = new MAF2DCC1(); + test.setup(args); + } + + @Test(expected=QMuleException.class) + public void testUnknownModeThrowsException() throws Exception { + String[] args = {"-i", indelMafFile.getAbsolutePath(), "-i", indelDccFile.getAbsolutePath(), "-o", outputFile.getAbsolutePath(), "-mode", "idel", "-log", testFolder.newFile().getAbsolutePath()}; + MAF2DCC1 test = new MAF2DCC1(); + test.setup(args); + } + + @Test + public void testMatchChrPos() { + ChrPosition maf = new ChrRangePosition("chr1", 1, 2); + ChrPosition dcc = new ChrRangePosition("chr1", 1, 2); + + assertTrue(test.match(maf, dcc)); + dcc = new ChrRangePosition("chr1", 1, 3); + assertFalse(test.match(maf, dcc)); + dcc = new ChrRangePosition("chr1", 2, 2); + assertFalse(test.match(maf, dcc)); + dcc = new ChrRangePosition("chr1", 1, 1); + assertFalse(test.match(maf, dcc)); + } + + @Test + public void testMatchingMutation() { + assertTrue(test.matchingMutation("SNP", "1")); + assertTrue(test.matchingMutation("INS", "2")); + assertTrue(test.matchingMutation("DEL", "3")); + assertFalse(test.matchingMutation("SNP", "3")); + assertFalse(test.matchingMutation("INS", "1")); + assertFalse(test.matchingMutation("DEL", "2")); + } + + @Test + public void testMatchRecordsSnpMode() { + int[] indexes = {0, 1, 2, 3, 4, 5}; + test.setMafColumnIndexes(indexes); + test.setDccColumnIndexes(indexes); + test.setMode("snp"); + TabbedRecord maf = new TabbedRecord(); + TabbedRecord dcc = new TabbedRecord(); + maf.setData("chr1\t1\t2\tSNP"); + dcc.setData("chr1\t1\t2\t1"); + assertTrue(test.matchOtherColumns(maf, dcc)); + dcc.setData("chr1\t1\t2\t4"); + assertFalse(test.matchOtherColumns(maf, dcc)); + } + + @Test + public void testMatchRecordsIndelMode() { + int[] indexes = {0, 1, 2, 3, 4, 5}; + test.setMafColumnIndexes(indexes); + test.setDccColumnIndexes(indexes); + test.setMode("indel"); + TabbedRecord maf = new TabbedRecord(); + TabbedRecord dcc = new TabbedRecord(); + maf.setData("chr1\t1\t2\tINS\t-\tA"); + dcc.setData("chr1\t1\t2\t2\t-\tA"); + assertTrue(test.matchOtherColumns(maf, dcc)); + dcc.setData("chr1\t1\t2\t1\t-\tA"); + assertFalse(test.matchOtherColumns(maf, dcc)); + } + + @Test + public void testRecordInMaf() throws QMuleException { + int[] indexes = {0, 1, 2, 3, 4, 5}; + test.setMafColumnIndexes(indexes); + test.setDccColumnIndexes(indexes); + test.setMode("indel"); + TabbedRecord maf = new TabbedRecord(); + TabbedRecord dcc = new TabbedRecord(); + maf.setData("chr1\t1\t2\tINS\t-\tA"); + dcc.setData("chr1\t1\t2\t2\t-\tA"); + List listOfRecords = new ArrayList<>(); + listOfRecords.add(maf); + Map> mafs = new HashMap<>(); + ChrPosition c = new ChrPositionName("chr1", 1, 2, "a"); + mafs.put(c, listOfRecords); + test.setMafRecords(mafs); + assertTrue(test.recordInMaf(c, dcc)); + } + + @Test(expected=QMuleException.class) + public void testRecordInMafThrowsException() throws QMuleException { + int[] indexes = {0, 1, 2, 3, 4, 5}; + test.setMafColumnIndexes(indexes); + test.setDccColumnIndexes(indexes); + test.setMode("indel"); + TabbedRecord maf = new TabbedRecord(); + TabbedRecord dcc = new TabbedRecord(); + maf.setData("chr1\t1\t2\tINS\t-\tA"); + dcc.setData("chr1\t1\t2\t2\t-\tA"); + List listOfRecords = new ArrayList<>(); + listOfRecords.add(maf); + listOfRecords.add(maf); +// List listOfRecords2 = new ArrayList<>(); +// listOfRecords2.add(maf); + Map> mafs = new HashMap<>(); + ChrPosition c = new ChrRangePosition("chr1", 1, 2); +// ChrPosition c2 = new ChrPosition("chr1", 1, 2); +// ChrPosition c = new ChrPosition("chr1", 1, 2, "a"); +// ChrPosition c2 = new ChrPosition("chr1", 1, 2, "b"); + mafs.put(c, listOfRecords); +// mafs.put(c2, listOfRecords2); + assertEquals(1, mafs.size()); + assertEquals(2, mafs.get( new ChrRangePosition("chr1", 1, 2)).size()); + test.setMafRecords(mafs); + test.recordInMaf(c, dcc); + } + + @Test + public void testfindColumnIndexesFromHeaderWithMaf() { + TabbedRecord rec = new TabbedRecord(); + rec.setData(MAFHEADER); + int[] cols = test.findColumnIndexesFromHeader(rec); + assertEquals(4, cols[0]); + assertEquals(5, cols[1]); + assertEquals(6, cols[2]); + assertEquals(9, cols[3]); + assertEquals(10, cols[4]); + assertEquals(11, cols[5]); + } + + @Test + public void testfindColumnIndexesFromHeaderWithDcc() { + TabbedRecord rec = new TabbedRecord(); + rec.setData(DCCHEADER); + int[] cols = test.findColumnIndexesFromHeader(rec); + assertEquals(4, cols[0]); + assertEquals(5, cols[1]); + assertEquals(6, cols[2]); + assertEquals(3, cols[3]); + assertEquals(10, cols[4]); + assertEquals(12, cols[5]); + } + + @Test(expected=QMuleException.class) + public void testMissingColumnIndexThrowsException() throws QMuleException { + int[] i = {-1}; + test.missingColumnIndex(i); + } + + @Test + public void testMissingColumnIndex() throws QMuleException { + int[] i = {1}; + assertFalse(test.missingColumnIndex(i)); + } + + @Test + public void testAddRecordToMap() throws QMuleException { + int[] indexes = {0, 1, 2, 3, 4, 5}; + test.setMafColumnIndexes(indexes); + test.setDccColumnIndexes(indexes); + test.setMode("indel"); + TabbedRecord maf = new TabbedRecord(); + maf.setData("chr1\t1\t2\tINS\t-\tA"); + test.addToMafRecordMap(maf, 1); + assertEquals(1, test.getMafRecords().size()); + assertTrue(test.getMafRecords().containsKey(new ChrRangePosition("1", 1, 2))); +// assertTrue(test.getMafRecords().containsKey(new ChrPosition("1", 1, 2, "" + 1))); + maf = new TabbedRecord(); + maf.setData("chr1\t1\t2\tINS\t-\tA"); + test.addToMafRecordMap(maf, 2); + assertEquals(1, test.getMafRecords().size()); + assertEquals(2, test.getMafRecords().get(new ChrRangePosition("1", 1, 2)).size()); +// assertEquals(2, test.getMafRecords().size()); + assertTrue(test.getMafRecords().containsKey(new ChrRangePosition("1", 1, 2))); +// assertTrue(test.getMafRecords().containsKey(new ChrPosition("1", 1, 2, "" +2))); + } + + + private File createDccFile(String type, String fileName) throws IOException { + BufferedWriter w = new BufferedWriter(new FileWriter(new File(fileName))); + //w.write("analysis_id\tanalyzed_sample_id\tmutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation\texpressed_allele\tquality_score\tprobability\tread_count\tis_annotated\tverification_status\tverification_platform\txref_ensembl_var_id\tnote\tQCMGflag\tND\tTD\tNNS\tFlankSeq\n"); + w.write(DCCHEADER + "\n"); + if (type.equals("indel")) { + w.write("aba9fc0c_7f03_417f_b087_2e8ab1a45e42 test test_ind716 2 chr1 4412134 4412135 1 -999 -999 -0 -999 A -/A -999 -999 -999 -999 -999 -999 -999 -999 PASS -- -- -- --\n"); + w.write("aba9fc0c_7f03_417f_b087_2e8ab1a45e42 test test_ind2740 3 chr1 12126362 12126362 1 -999 -999 T -999 -0 T/- -999 -999 -999 -999 -999 -999 -999 -999 PASS -- -- -- --\n"); + } + + if (type.equals("snp")) { + w.write("02ebc0c3_3102_4bf0_9c5b_eabcab65414d ICGC-ABMJ-20120706-01 APGI_1992_SNP_248 1 1 569492 569492 1 C/T 1 T T/T C/T T>C -999 -999 0.0119695263 106 1 2 -888 rs147253560 -999 MIN A:1[35],0[0],C:0[0],1[37],T:42[36.71],47[35.89] C:9[31.33],1[36],T:49[36.61],46[33.41] 5 ATCCCCATACT\n"); + w.write("02ebc0c3_3102_4bf0_9c5b_eabcab65414d ICGC-ABMJ-20120706-01 APGI_1992_SNP_260 1 1 604271 604271 1 -888 -888 G G/G A/G G>A -999 -999 0.3973437368 56 2 2 -888 -999 -999 MIN;MR;GERM A:0[0],1[29],G:20[34.1],19[35.79] A:2[37.5],2[32],G:30[36.43],22[38] 4 TGGAGAGGAAC"); + } + + w.close(); + return new File(fileName); + } + + private File createMafFile(String type, String fileName) throws IOException { + BufferedWriter w = new BufferedWriter(new FileWriter(new File(fileName))); + w.write(MAFHEADER + "\n"); + if (type.equals("indel")) { + w.write("Unknown null qcmg.uq.edu.au 37 1 4412134 4412135 0 null INS -0 A A novel null QCMG-66-APGI_1992-ICGC-ABMJ-20120706-01 QCMG-66-APGI_1992-ICGC-ABMP-20091203-10-ND -0 -0 null null null null null Unknown Somatic null Unknown null null null Unknown PASS;HOMCON_4 0;28;28;0;0;0;0 9;52;52;9;0;0;0;\"4 contiguous CTAAAAACACaAAAATTAGCT\" null null null null null null HIGH -- null 0\n"); + w.write("TNFRSF8 0 qcmg.uq.edu.au 37 1 12126362 12126362 0 Intron DEL T -0 -0 novel null QCMG-66-APGI_1992-ICGC-ABMJ-20120706-01 QCMG-66-APGI_1992-ICGC-ABMP-20091203-10-ND -0 -0 null null null null null Unknown Somatic null Unknown null null null Unknown PASS;HOMCON_3 0;67;66;0;0;2;0 15;52;49;16;0;1;0;\"3 contiguous AAGCTCGTTA_TTTAAAAAAA\" ENST00000263932 -888 -888 null null null HIGH -- fill\n"); + } + + if (type.equals("snp")) { + w.write("Unknown 0 qcmg.uq.edu.au 37 1 569492 569492 0 RNA SNP T C T rs147253560 null QCMG-66-APGI_1992-ICGC-ABMJ-20120706-01 QCMG-66-APGI_1992-ICGC-ABMP-20091203-10-ND T T null null null null null Unknown Somatic null Unknown null null null Unknown PASS A:1[35],0[0],C:0[0],1[37],T:42[36.71],47[35.89] C:9[31.33],1[36],T:49[36.61],46[33.41] ENST00000440200 -888 -888 null null null HIGH ATCCCCATACT fill\n"); + } + + w.close(); + return new File(fileName); + } +} diff --git a/qmule/test/org/qcmg/qmule/TestThreading.java-- b/qmule/test/org/qcmg/qmule/TestThreading.java-- new file mode 100644 index 000000000..a50c9d2e0 --- /dev/null +++ b/qmule/test/org/qcmg/qmule/TestThreading.java-- @@ -0,0 +1,55 @@ +package org.qcmg.qmule; + +import java.util.concurrent.atomic.AtomicLong; + +import org.junit.Ignore; + +public class TestThreading { + + private static final int testRuns = 50000000; + + @Ignore + public void testLongUpdate() { + + long counter = 0L; + + long start = System.currentTimeMillis(); + + for (int i = 0 ; i < testRuns ; i++) counter++; + + long end = System.currentTimeMillis(); + System.out.println("counter: " + counter); + System.out.println("Time taken: " + (end - start) + "ms"); + + } + + @Ignore + public void testLongUpdateSynchronised() { + + long counter = 0L; + + long start = System.currentTimeMillis(); + + for (int i = 0 ; i < testRuns ; i++) synchronized(this){counter++;} + + long end = System.currentTimeMillis(); + System.out.println("counter: " + counter); + System.out.println("Time taken (synchronised): " + (end - start) + "ms"); + + } + + @Ignore + public void testAtomicLongUpdate() { + + AtomicLong counter = new AtomicLong(); + + long start = System.currentTimeMillis(); + + for (int i = 0 ; i < testRuns ; i++) counter.getAndIncrement(); + + long end = System.currentTimeMillis(); + System.out.println("counter: " + counter.longValue()); + System.out.println("Time taken (Atomic): " + (end - start) + "ms"); + + } +} diff --git a/qmule/test/org/qcmg/qmule/WiggleFromPileupTakeTwoTest.java-- b/qmule/test/org/qcmg/qmule/WiggleFromPileupTakeTwoTest.java-- new file mode 100644 index 000000000..d645bb287 --- /dev/null +++ b/qmule/test/org/qcmg/qmule/WiggleFromPileupTakeTwoTest.java-- @@ -0,0 +1,428 @@ +package org.qcmg.qmule; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; +import org.qcmg.common.commandline.Executor; +import org.qcmg.common.util.FileUtils; + +public class WiggleFromPileupTakeTwoTest { + @Rule + public TemporaryFolder tempFolder = new TemporaryFolder(); + @Rule + public ExpectedException thrown = ExpectedException.none(); + + private File pileupFile; + private File gff3File; + private File wiggleFile; + private File pileupFileGZIP; + private File wiggleFileGZIP; + + @Before + public final void before() { + try { + pileupFile = tempFolder.newFile("wigglePileupTest.pileup"); + wiggleFile = tempFolder.newFile("wigglePileupTest.wiggle"); + gff3File = tempFolder.newFile("wigglePileupTest.gff3"); + pileupFileGZIP = tempFolder.newFile("wigglePileupTest.pileup.gz"); + wiggleFileGZIP = tempFolder.newFile("wigglePileupTest.wiggle.gz"); + createPileupFile(pileupFile); + createPileupFile(pileupFileGZIP); + createGFF3File(gff3File); + assertTrue(pileupFile.exists()); + assertTrue(gff3File.exists()); + assertTrue(pileupFileGZIP.exists()); + } catch (Exception e) { + System.err.println("File creation error in test harness: " + e.getMessage()); + } + } + + +// @Test +// public void testIsPositionInBaitSingleGff() { +// GFF3Record gff = new GFF3Record(); +// gff.setSeqId("chr1"); +// gff.setStart(1); +// gff.setEnd(10); +// +// List gffs = new ArrayList(); +// gffs.add(gff); +// Iterator iter = gffs.iterator(); +//// WiggleFromPileup.setGffRecord(gff); +// +// Assert.assertEquals(false, WiggleFromPileupTakeTwo.isPositionInBait("chr0", 0, iter, iter.next())); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 0, iter, gff)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 1, iter, gff)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 10, iter, gff)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 11, iter, gff)); +// +// gff.setSeqId("chrX"); +// gff.setStart(1000123); +// gff.setEnd(1000223); +// +//// WiggleFromPileup.setGffRecord(gff); +// +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 0, iter, gff)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 1, iter, gff)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrx", 1000124, iter, gff)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrx", 11, iter, gff)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 11, iter, gff)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 1000123, iter, gff)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 1000124, iter, gff)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 1000223, iter, gff)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 1000224, iter, gff)); +// } + +// @Test +// public void testIsPositionInBaitMultipleGff() { +// GFF3Record gff1 = new GFF3Record(); +// gff1.setSeqId("chr1"); +// gff1.setStart(1); +// gff1.setEnd(10); +// GFF3Record gff2 = new GFF3Record(); +// gff2.setSeqId("chr1"); +// gff2.setStart(11); +// gff2.setEnd(20); +// GFF3Record gff3 = new GFF3Record(); +// gff3.setSeqId("chr1"); +// gff3.setStart(31); +// gff3.setEnd(40); +// +// List gffs = new ArrayList(); +// gffs.add(gff1); +// gffs.add(gff2); +// gffs.add(gff3); +// Iterator iter = gffs.iterator(); +// +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 0, iter, iter.next())); +// +//// Assert.assertEquals(gff1, WiggleFromPileup.getGffRecord()); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 1, iter, gff1)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 10, iter, gff1)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 11, iter, gff1)); +// // iterator should have been advanced +// Assert.assertEquals(gff2, WiggleFromPileup.getGffRecord()); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 20, iter, gff2)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 21, iter, gff2)); +// // iterator should have been advanced +// Assert.assertEquals(gff3, WiggleFromPileup.getGffRecord()); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 29, iter, gff3)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 31, iter, gff3)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 40, iter, gff3)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 41, iter, gff3)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 141, iter, gff3)); +// } + +// @Test +// public void testIsPositionInBaitMultipleGffMultipleChromosomes() { +// GFF3Record gff1 = new GFF3Record(); +// gff1.setSeqId("chr1"); +// gff1.setStart(1); +// gff1.setEnd(10); +// GFF3Record gff2 = new GFF3Record(); +// gff2.setSeqId("chr1"); +// gff2.setStart(11); +// gff2.setEnd(20); +// GFF3Record gff3 = new GFF3Record(); +// gff3.setSeqId("chr1"); +// gff3.setStart(31); +// gff3.setEnd(40); +// GFF3Record gff4 = new GFF3Record(); +// gff4.setSeqId("chr2"); +// gff4.setStart(15); +// gff4.setEnd(25); +// GFF3Record gff5 = new GFF3Record(); +// gff5.setSeqId("chr2"); +// gff5.setStart(26); +// gff5.setEnd(40); +// GFF3Record gff6 = new GFF3Record(); +// gff6.setSeqId("chrX"); +// gff6.setStart(100026); +// gff6.setEnd(100040); +// +// List gffs = new ArrayList(); +// gffs.add(gff1); +// gffs.add(gff2); +// gffs.add(gff3); +// gffs.add(gff4); +// gffs.add(gff5); +// gffs.add(gff6); +// Iterator iter = gffs.iterator(); +// +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr0", 0, iter, iter.next())); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 0, iter, gff1)); +// +//// Assert.assertEquals(gff1, WiggleFromPileup.getGffRecord()); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 1, iter, gff1)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 10, iter, gff1)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 11, iter, gff1)); +// // iterator should have been advanced +// Assert.assertEquals(gff2, WiggleFromPileup.getGffRecord()); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 20, iter, gff2)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 21, iter, gff2)); +// // iterator should have been advanced +// Assert.assertEquals(gff3, WiggleFromPileup.getGffRecord()); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 29, iter, gff3)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 31, iter, gff3)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 40, iter, gff3)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 41, iter, gff3)); +// // iterator should have been advanced +// Assert.assertEquals(gff4, WiggleFromPileup.getGffRecord()); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 141, iter, gff4)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 142, iter, gff4)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 1000142, iter, gff4)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr2", 1, iter, gff4)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr2", 2, iter, gff4)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 15, iter, gff4)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 25, iter, gff4)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 26, iter, gff4)); +// // iterator should have been advanced +// Assert.assertEquals(gff5, WiggleFromPileup.getGffRecord()); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 40, iter, gff5)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr2", 41, iter, gff5)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr3", 15, iter, gff5)); +// // iterator should have been advanced +// Assert.assertEquals(gff6, WiggleFromPileup.getGffRecord()); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr3", 10015, iter, gff6)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr4", 10015, iter, gff6)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr5", 10015, iter, gff6)); +// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr15", 10015, iter, gff6)); +// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 100026, iter, gff6)); +// +// } + + @Test + public final void callWithNoArgs() throws Exception { + String command = ""; + Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileupTakeTwo"); + assertTrue(1 == exec.getErrCode()); + assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); + assertTrue(0 < exec.getErrorStreamConsumer().getLines().length); + } + + @Test + public final void callWithNoInputFile() throws Exception { + String command = "-log ./logfile -o " + tempFolder.getRoot().getAbsolutePath(); + Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileupTakeTwo"); + assertTrue(1 == exec.getErrCode()); + assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); + assertTrue(0 < exec.getErrorStreamConsumer().getLines().length); + } + + @Test + public final void callWithMissingArgs() throws Exception { + String command = "-log ./logfile -o blah.wiggle -i " + pileupFile.getAbsolutePath(); + Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileupTakeTwo"); + assertTrue(1 == exec.getErrCode()); + assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); + assertTrue(0 < exec.getErrorStreamConsumer().getLines().length); + } + + @Test + public final void callWithValidArguments() throws Exception { + ExpectedException.none(); + String command = "-log ./logfile -pileupFormat NNTT -normalCoverage 1 -tumourCoverage 1 -i " + pileupFile.getAbsolutePath() + + " -i " + gff3File.getAbsolutePath() + + " -gffRegions exon" + + " -o " + wiggleFile.getAbsolutePath(); + Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileupTakeTwo"); + assertEquals(0, exec.getErrCode()); + assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); + + // check the wiggle file + InputStream reader = new FileInputStream(wiggleFile); + assertEquals(29, examineWiggle(reader)); + } + + @Test + public final void callWithValidArgumentsLargeCoverage() throws Exception { + ExpectedException.none(); + String command = "-log ./logfile -pileupFormat NNTT -normalCoverage 50 -tumourCoverage 50 -i " + pileupFile.getAbsolutePath() + + " -i " + gff3File.getAbsolutePath() + + " -gffRegions exon" + + " -o " + wiggleFile.getAbsolutePath(); + Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileupTakeTwo"); + assertEquals(0, exec.getErrCode()); + assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); + + // check the wiggle file + InputStream reader = new FileInputStream(wiggleFile); + assertEquals(0, examineWiggle(reader)); + } + + @Test + public final void callWithZippedFiles() throws Exception { + ExpectedException.none(); + String command = "-log ./logfile -pileupFormat NNTT -normalCoverage 20 -tumourCoverage 20 -i " + pileupFileGZIP.getAbsolutePath() + + " -i " + gff3File.getAbsolutePath() + + " -gffRegions exon" + + " -o " + wiggleFileGZIP.getAbsolutePath(); + Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileupTakeTwo"); + assertEquals(0, exec.getErrCode()); + assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); + + // check the wiggle file + InputStream reader = new GZIPInputStream(new FileInputStream(wiggleFileGZIP)); + assertEquals(14, examineWiggle(reader)); + } + + private int examineWiggle(InputStream reader) throws IOException { + int count = 0; + BufferedReader fr = new BufferedReader(new InputStreamReader(reader)); + String line = fr.readLine(); // first line has the header + while ((line = fr.readLine()) != null) { + if (line.startsWith("fixedStep")) continue; + count += Integer.parseInt(line); + } + return count; + } + + private void createPileupFile(File pileupFile) throws IOException { + + OutputStream os = FileUtils.isFileNameGZip(pileupFile) ? new GZIPOutputStream( new FileOutputStream(pileupFile)) + : new FileOutputStream(pileupFile); + +// OutputStream os = new FileOutputStream(pileupFile); + PrintStream ps = new PrintStream(os); + + ps.println("chr1\t14923\tG\t8\t.......^!.\tIIIIIIIE\t7\t,.....^!.\t5IIIIIE\t10\t.........^T.\t0IIIIIIIIE\t7\t...,...\tIIIIIII"); + ps.println("chr1\t14924\tA\t9\t........^!.\tEI@III?IB\t7\t,......\t@IIIIII\t10\t..........\t-IIIIIIIII\t8\t...,...^!.\tIIII/IIB"); + ps.println("chr1\t14925\tA\t11\t.........^!.^P.\tIIDIIIHIEEE\t8\t,......^N.\tBIIIIIIE\t10\t..........\t)IIIIIIIII\t8\t...,....\tIII:4IIE"); + ps.println("chr1\t14926\tT\t11\t...........\tDIIIIIIIIII\t8\t,.......\t9IIIIIII\t10\t..........\t-IIIIIIIII\t8\t...,....\tIIH;DIII"); + ps.println("chr1\t14927\tT\t11\t...........\tDIIIIIIIIII\t8\t,.......\t8IIIIIII\t11\t..........^O.\t&FIIIIIIIIE\t8\t...,....\tII:>IIII"); + ps.println("chr1\t14928\tA\t11\t...........\tIIIIIIIIIII\t9\t,.......^(.\tGAIIIIIIE\t12\t...........^G.\t&CIBIIII9IIE\t8\t...,....\tII;0DIII"); + ps.println("chr1\t14929\tC\t11\t...........\tIIII\t9\t,........\tB37%I7III\t12\t............\t9FI77IIIIIII\t8\t...,....\t?I;>4I7I"); + ps.println("chr1\t14932\tG\t11\t...........\tI=IIIIIIIII\t9\t,........\t?@IIIIIII\t12\t............\t>IIIIIIIIIII\t8\t...,....\t?ICI@III"); + ps.println("chr1\t14933\tG\t11\t...........\tEAIIIIDIIII\t9\t,........\tD8III?III\t12\t............\t3EIIIIIIIIII\t9\t...,....^L.\t8I9HIIIIE"); + ps.println("chr1\t14934\tT\t11\t...........\t9I>IIIIIIFIIIIE\t9\t,........\tHCIIIIIII\t12\t............\t*IIIIIIIIIII\t9\t...,.....\tIII7IIIII"); + ps.println("chr1\t14936\tC\t12\t............\tI@IIIIIIIIII\t9\t,........\tBIIDIIIII\t12\t............\t8GIIIIIIIIII\t9\t...,.....\tIII,BIIII"); + ps.println("chr1\t14937\tT\t12\t............\tIIIIIIIIIIII\t9\t,........\t8IIIIFIII\t12\t............\t:IIIIIIIIIII\t9\t...,.....\tBII?)IIII"); + ps.println("chr1\t14938\tG\t12\t....$........\t%=I1II6IFIII\t9\t,........\tD%IIB/IHI\t12\t............\t3II>IIIIIIHI\t9\t...,.....\t0IAI/I?II"); + ps.println("chr1\t14939\tG\t11\t...........\t%@IHI:IIIHI\t9\t,........\tI%II@CIDI\t12\t............\t7IICIIIIII9A\t9\t...,.....\t1IAI;I9II"); + ps.println("chr1\t14940\tC\t11\t...........\t:IF?I-IIIII\t9\t,........\tF+II+IIII\t12\t......$......\t2%I%A>I>IIIA\t9\t...,.....\t3?)G:III"); + ps.println("chr1\t14944\tG\t11\t.....C.....\t(//AI%IIIFI\t9\t,$........\tI=II%ICIII\t8\t.$.......\t2II@6IBI\t9\t.........\t?:16IIB=,\t8\t..,.....\t9/%&>CI0"); + ps.println("chr1\t14946\tG\t11\t...........\t3I>II%I@I(I\t7\t.......\tIICIIII\t9\t.........\t4ID?II@GD\t8\t..,.....\tI@%;HIII"); + ps.println("chr1\t14947\tC\t11\t...$........\tDI?IIAIDI(I\t7\t.......\tIIIIIII\t9\t.$.....N$..\tEI58II!(B\t8\t..,.....\tI@C?IIII"); + ps.println("chr1\t14948\tG\t10\t.$.$........\t=;-%3I6I"); + ps.println("chr1\t14949\tG\t8\t.......$.\t5%6I>I%D\t7\t.......\tBI:%I;B\t6\t......\t*1,:0%\t7\t.$.,....\t'1I59;'"); + ps.println("chr1\t14950\tG\t7\t.$......\t?H3B+B7\t7\t.$......\t:+%%D7@\t6\t......\t%-%50%\t6\t.,....\t-I3'C'"); + ps.println("chr1\t14951\tC\t6\t......\tG2=+95\t6\t......\t)%%A6C\t6\t......\t%9%C89\t6\t.,....\t8H6(=%"); + + ps.close(); + os.close(); + } + + private void createGFF3File(File pileupFile) throws IOException { + + OutputStream os = FileUtils.isFileNameGZip( pileupFile) ? new GZIPOutputStream( new FileOutputStream(pileupFile)) + : new FileOutputStream(pileupFile); + +// OutputStream os = new FileOutputStream(pileupFile); + PrintStream ps = new PrintStream(os); + + + ps.println("##gff-version 3"); + ps.println("# Created by: simple_segmenter.pl[v2940]"); + ps.println("# Created on: Tue May 24 01:48:54 2011"); + ps.println("# Commandline: -v -g -l -i SureSelect_All_Exon_50mb_filtered_exons_1-200_20110524.gff3 -o SureSelect_All_Exon_50mb_filtered_exons_1-200_20110524_shoulders.gff3 -f exon,100,100,100 -f highexon,300 -f lowexon"); + ps.println("chr1 simple_segmenter.pl[v2940] fill 1 14166 . . . ID=gnl|fill"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 14167 14266 . + . ID=gnl|exon_3_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 14267 14366 . + . ID=gnl|exon_2_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 14367 14466 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 14467 14587 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 14588 14638 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 14639 14883 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon 14884 14942 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 14943 15064 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 15065 15164 . + . ID=gnl|exon_1_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 15165 15264 . + . ID=gnl|exon_2_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 15265 15364 . + . ID=gnl|exon_3_100"); + ps.println("chr1 simple_segmenter.pl[v2940] fill 15365 15370 . . . ID=gnl|fill"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 15371 15470 . + . ID=gnl|exon_3_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 15471 15570 . + . ID=gnl|exon_2_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 15571 15670 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 15671 15990 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 15991 16090 . + . ID=gnl|exon_1_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 16091 16190 . + . ID=gnl|exon_2_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 16191 16390 . + . ID=gnl|exon_3_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 16391 16490 . + . ID=gnl|exon_2_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 16491 16590 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 16591 16719 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 16720 16749 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 16750 17074 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 17075 17177 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 17178 17420 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 17421 17442 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 17443 18108 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 18109 18202 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 18203 18448 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 18449 18548 . + . ID=gnl|exon_1_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 18549 18648 . + . ID=gnl|exon_2_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 18649 18848 . + . ID=gnl|exon_3_100"); +// ps.println("##gff-version 3"); +// ps.println("# Created by: simple_segmenter.pl[v2940]"); +// ps.println("# Created on: Tue May 24 01:48:54 2011"); +// ps.println("# Commandline: -v -g -l -i SureSelect_All_Exon_50mb_filtered_baits_1-200_20110524.gff3 -o SureSelect_All_Exon_50mb_filtered_baits_1-200_20110524_shoulders.gff3 -f bait,100,100,100 -f highbait,300 -f lowbait"); +// ps.println("chr1 simple_segmenter.pl[v2940] fill 1 14166 . . . ID=gnl|fill"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 14167 14266 . + . ID=gnl|bait_3_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 14267 14366 . + . ID=gnl|bait_2_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 14367 14466 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 14467 14587 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 14588 14638 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 14639 14883 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 14884 14942 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 14943 15064 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 15065 15164 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 15165 15264 . + . ID=gnl|bait_2_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 15265 15364 . + . ID=gnl|bait_3_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] fill 15365 15370 . . . ID=gnl|fill"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 15371 15470 . + . ID=gnl|bait_3_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 15471 15570 . + . ID=gnl|bait_2_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 15571 15670 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 15671 15990 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 15991 16090 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 16091 16190 . + . ID=gnl|bait_2_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 16191 16390 . + . ID=gnl|bait_3_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 16391 16490 . + . ID=gnl|bait_2_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 16491 16590 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 16591 16719 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 16720 16749 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 16750 17074 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 17075 17177 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 17178 17420 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 17421 17442 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 17443 18108 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 18109 18202 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 18203 18448 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 18449 18548 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 18549 18648 . + . ID=gnl|bait_2_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 18649 18848 . + . ID=gnl|bait_3_100"); + + ps.close(); + os.close(); + } +} diff --git a/qmule/test/org/qcmg/qmule/WiggleFromPileupTest.java b/qmule/test/org/qcmg/qmule/WiggleFromPileupTest.java index 4cd748f37..291428f54 100644 --- a/qmule/test/org/qcmg/qmule/WiggleFromPileupTest.java +++ b/qmule/test/org/qcmg/qmule/WiggleFromPileupTest.java @@ -27,7 +27,7 @@ import org.junit.rules.TemporaryFolder; import org.qcmg.common.commandline.Executor; import org.qcmg.common.util.FileUtils; -import org.qcmg.gff3.GFF3Record; +import org.qcmg.qmule.gff3.GFF3Record; public class WiggleFromPileupTest { @Rule diff --git a/qmule/test/org/qcmg/qmule/WiggleFromPileupTest.java-- b/qmule/test/org/qcmg/qmule/WiggleFromPileupTest.java-- new file mode 100644 index 000000000..4cd748f37 --- /dev/null +++ b/qmule/test/org/qcmg/qmule/WiggleFromPileupTest.java-- @@ -0,0 +1,431 @@ +package org.qcmg.qmule; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +import junit.framework.Assert; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; +import org.qcmg.common.commandline.Executor; +import org.qcmg.common.util.FileUtils; +import org.qcmg.gff3.GFF3Record; + +public class WiggleFromPileupTest { + @Rule + public TemporaryFolder tempFolder = new TemporaryFolder(); + @Rule + public ExpectedException thrown = ExpectedException.none(); + + private File pileupFile; + private File gff3File; + private File wiggleFile; + private File pileupFileGZIP; + private File wiggleFileGZIP; + + @Before + public final void before() { + try { + pileupFile = tempFolder.newFile("wigglePileupTest.pileup"); + wiggleFile = tempFolder.newFile("wigglePileupTest.wiggle"); + gff3File = tempFolder.newFile("wigglePileupTest.gff3"); + pileupFileGZIP = tempFolder.newFile("wigglePileupTest.pileup.gz"); + wiggleFileGZIP = tempFolder.newFile("wigglePileupTest.wiggle.gz"); + createPileupFile(pileupFile); + createPileupFile(pileupFileGZIP); + createGFF3File(gff3File); + assertTrue(pileupFile.exists()); + assertTrue(gff3File.exists()); + assertTrue(pileupFileGZIP.exists()); + } catch (Exception e) { + System.err.println("File creation error in test harness: " + e.getMessage()); + } + } + + + @Test + public void testIsPositionInBaitSingleGff() { + GFF3Record gff = new GFF3Record(); + gff.setSeqId("chr1"); + gff.setStart(1); + gff.setEnd(10); + + List gffs = new ArrayList(); + gffs.add(gff); + Iterator iter = gffs.iterator(); +// WiggleFromPileup.setGffRecord(gff); + + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr0", 0, iter, iter.next())); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 0, iter, gff)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 1, iter, gff)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 10, iter, gff)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 11, iter, gff)); + + gff.setSeqId("chrX"); + gff.setStart(1000123); + gff.setEnd(1000223); + +// WiggleFromPileup.setGffRecord(gff); + + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 0, iter, gff)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 1, iter, gff)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrx", 1000124, iter, gff)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrx", 11, iter, gff)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 11, iter, gff)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 1000123, iter, gff)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 1000124, iter, gff)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 1000223, iter, gff)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 1000224, iter, gff)); + } + + @Test + public void testIsPositionInBaitMultipleGff() { + GFF3Record gff1 = new GFF3Record(); + gff1.setSeqId("chr1"); + gff1.setStart(1); + gff1.setEnd(10); + GFF3Record gff2 = new GFF3Record(); + gff2.setSeqId("chr1"); + gff2.setStart(11); + gff2.setEnd(20); + GFF3Record gff3 = new GFF3Record(); + gff3.setSeqId("chr1"); + gff3.setStart(31); + gff3.setEnd(40); + + List gffs = new ArrayList(); + gffs.add(gff1); + gffs.add(gff2); + gffs.add(gff3); + Iterator iter = gffs.iterator(); + + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 0, iter, iter.next())); + +// Assert.assertEquals(gff1, WiggleFromPileup.getGffRecord()); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 1, iter, gff1)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 10, iter, gff1)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 11, iter, gff1)); + // iterator should have been advanced + Assert.assertEquals(gff2, WiggleFromPileup.getGffRecord()); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 20, iter, gff2)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 21, iter, gff2)); + // iterator should have been advanced + Assert.assertEquals(gff3, WiggleFromPileup.getGffRecord()); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 29, iter, gff3)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 31, iter, gff3)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 40, iter, gff3)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 41, iter, gff3)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 141, iter, gff3)); + } + + @Test + public void testIsPositionInBaitMultipleGffMultipleChromosomes() { + GFF3Record gff1 = new GFF3Record(); + gff1.setSeqId("chr1"); + gff1.setStart(1); + gff1.setEnd(10); + GFF3Record gff2 = new GFF3Record(); + gff2.setSeqId("chr1"); + gff2.setStart(11); + gff2.setEnd(20); + GFF3Record gff3 = new GFF3Record(); + gff3.setSeqId("chr1"); + gff3.setStart(31); + gff3.setEnd(40); + GFF3Record gff4 = new GFF3Record(); + gff4.setSeqId("chr2"); + gff4.setStart(15); + gff4.setEnd(25); + GFF3Record gff5 = new GFF3Record(); + gff5.setSeqId("chr2"); + gff5.setStart(26); + gff5.setEnd(40); + GFF3Record gff6 = new GFF3Record(); + gff6.setSeqId("chrX"); + gff6.setStart(100026); + gff6.setEnd(100040); + + List gffs = new ArrayList(); + gffs.add(gff1); + gffs.add(gff2); + gffs.add(gff3); + gffs.add(gff4); + gffs.add(gff5); + gffs.add(gff6); + Iterator iter = gffs.iterator(); + + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr0", 0, iter, iter.next())); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 0, iter, gff1)); + +// Assert.assertEquals(gff1, WiggleFromPileup.getGffRecord()); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 1, iter, gff1)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 10, iter, gff1)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 11, iter, gff1)); + // iterator should have been advanced + Assert.assertEquals(gff2, WiggleFromPileup.getGffRecord()); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 20, iter, gff2)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 21, iter, gff2)); + // iterator should have been advanced + Assert.assertEquals(gff3, WiggleFromPileup.getGffRecord()); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 29, iter, gff3)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 31, iter, gff3)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 40, iter, gff3)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 41, iter, gff3)); + // iterator should have been advanced + Assert.assertEquals(gff4, WiggleFromPileup.getGffRecord()); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 141, iter, gff4)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 142, iter, gff4)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 1000142, iter, gff4)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr2", 1, iter, gff4)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr2", 2, iter, gff4)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 15, iter, gff4)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 25, iter, gff4)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 26, iter, gff4)); + // iterator should have been advanced + Assert.assertEquals(gff5, WiggleFromPileup.getGffRecord()); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 40, iter, gff5)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr2", 41, iter, gff5)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr3", 15, iter, gff5)); + // iterator should have been advanced + Assert.assertEquals(gff6, WiggleFromPileup.getGffRecord()); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr3", 10015, iter, gff6)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr4", 10015, iter, gff6)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr5", 10015, iter, gff6)); + Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr15", 10015, iter, gff6)); + Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 100026, iter, gff6)); + + } + + @Test + public final void callWithNoArgs() throws Exception { + String command = ""; + Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileup"); + assertTrue(1 == exec.getErrCode()); + assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); + assertTrue(0 < exec.getErrorStreamConsumer().getLines().length); + } + + @Test + public final void callWithNoInputFile() throws Exception { + String command = "-log ./logfile -o " + tempFolder.getRoot().getAbsolutePath(); + Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileup"); + assertTrue(1 == exec.getErrCode()); + assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); + assertTrue(0 < exec.getErrorStreamConsumer().getLines().length); + } + + @Test + public final void callWithMissingArgs() throws Exception { + String command = "-log ./logfile -o blah.wiggle -i " + pileupFile.getAbsolutePath(); + Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileup"); + assertTrue(1 == exec.getErrCode()); + assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); + assertTrue(0 < exec.getErrorStreamConsumer().getLines().length); + } + + @Test + public final void callWithValidArguments() throws Exception { + ExpectedException.none(); + String command = "-log ./logfile -pileupFormat NNTT -normalCoverage 1 -tumourCoverage 1 -i " + pileupFile.getAbsolutePath() + + " -i " + gff3File.getAbsolutePath() + + " -o " + wiggleFile.getAbsolutePath(); + Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileup"); + assertEquals(0, exec.getErrCode()); + assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); + + // check the wiggle file + InputStream reader = new FileInputStream(wiggleFile); + assertEquals(29, examineWiggle(reader)); + } + + @Test + public final void callWithValidArgumentsLargeCoverage() throws Exception { + ExpectedException.none(); + String command = "-log ./logfile -pileupFormat NNTT -normalCoverage 50 -tumourCoverage 50 -i " + pileupFile.getAbsolutePath() + + " -i " + gff3File.getAbsolutePath() + + " -o " + wiggleFile.getAbsolutePath(); + Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileup"); + assertEquals(0, exec.getErrCode()); + assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); + + // check the wiggle file + InputStream reader = new FileInputStream(wiggleFile); + assertEquals(0, examineWiggle(reader)); + } + + @Test + public final void callWithZippedFiles() throws Exception { + ExpectedException.none(); + String command = "-log ./logfile -pileupFormat NNTT -normalCoverage 20 -tumourCoverage 20 -i " + pileupFileGZIP.getAbsolutePath() + + " -i " + gff3File.getAbsolutePath() + + " -o " + wiggleFileGZIP.getAbsolutePath(); + Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileup"); + assertEquals(0, exec.getErrCode()); + assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); + + // check the wiggle file + InputStream reader = new GZIPInputStream(new FileInputStream(wiggleFileGZIP)); + assertEquals(14, examineWiggle(reader)); + } + + private int examineWiggle(InputStream reader) throws IOException { + int count = 0; + BufferedReader fr = new BufferedReader(new InputStreamReader(reader)); + String line = fr.readLine(); // first line has the header + while ((line = fr.readLine()) != null) { + if (line.startsWith("fixedStep")) continue; + count += Integer.parseInt(line); + } + return count; + } + + private void createPileupFile(File pileupFile) throws IOException { + + OutputStream os = FileUtils.isFileNameGZip(pileupFile) ? new GZIPOutputStream( new FileOutputStream(pileupFile)) + : new FileOutputStream(pileupFile); + +// OutputStream os = new FileOutputStream(pileupFile); + PrintStream ps = new PrintStream(os); + + ps.println("chr1\t14923\tG\t8\t.......^!.\tIIIIIIIE\t7\t,.....^!.\t5IIIIIE\t10\t.........^T.\t0IIIIIIIIE\t7\t...,...\tIIIIIII"); + ps.println("chr1\t14924\tA\t9\t........^!.\tEI@III?IB\t7\t,......\t@IIIIII\t10\t..........\t-IIIIIIIII\t8\t...,...^!.\tIIII/IIB"); + ps.println("chr1\t14925\tA\t11\t.........^!.^P.\tIIDIIIHIEEE\t8\t,......^N.\tBIIIIIIE\t10\t..........\t)IIIIIIIII\t8\t...,....\tIII:4IIE"); + ps.println("chr1\t14926\tT\t11\t...........\tDIIIIIIIIII\t8\t,.......\t9IIIIIII\t10\t..........\t-IIIIIIIII\t8\t...,....\tIIH;DIII"); + ps.println("chr1\t14927\tT\t11\t...........\tDIIIIIIIIII\t8\t,.......\t8IIIIIII\t11\t..........^O.\t&FIIIIIIIIE\t8\t...,....\tII:>IIII"); + ps.println("chr1\t14928\tA\t11\t...........\tIIIIIIIIIII\t9\t,.......^(.\tGAIIIIIIE\t12\t...........^G.\t&CIBIIII9IIE\t8\t...,....\tII;0DIII"); + ps.println("chr1\t14929\tC\t11\t...........\tIIII\t9\t,........\tB37%I7III\t12\t............\t9FI77IIIIIII\t8\t...,....\t?I;>4I7I"); + ps.println("chr1\t14932\tG\t11\t...........\tI=IIIIIIIII\t9\t,........\t?@IIIIIII\t12\t............\t>IIIIIIIIIII\t8\t...,....\t?ICI@III"); + ps.println("chr1\t14933\tG\t11\t...........\tEAIIIIDIIII\t9\t,........\tD8III?III\t12\t............\t3EIIIIIIIIII\t9\t...,....^L.\t8I9HIIIIE"); + ps.println("chr1\t14934\tT\t11\t...........\t9I>IIIIIIFIIIIE\t9\t,........\tHCIIIIIII\t12\t............\t*IIIIIIIIIII\t9\t...,.....\tIII7IIIII"); + ps.println("chr1\t14936\tC\t12\t............\tI@IIIIIIIIII\t9\t,........\tBIIDIIIII\t12\t............\t8GIIIIIIIIII\t9\t...,.....\tIII,BIIII"); + ps.println("chr1\t14937\tT\t12\t............\tIIIIIIIIIIII\t9\t,........\t8IIIIFIII\t12\t............\t:IIIIIIIIIII\t9\t...,.....\tBII?)IIII"); + ps.println("chr1\t14938\tG\t12\t....$........\t%=I1II6IFIII\t9\t,........\tD%IIB/IHI\t12\t............\t3II>IIIIIIHI\t9\t...,.....\t0IAI/I?II"); + ps.println("chr1\t14939\tG\t11\t...........\t%@IHI:IIIHI\t9\t,........\tI%II@CIDI\t12\t............\t7IICIIIIII9A\t9\t...,.....\t1IAI;I9II"); + ps.println("chr1\t14940\tC\t11\t...........\t:IF?I-IIIII\t9\t,........\tF+II+IIII\t12\t......$......\t2%I%A>I>IIIA\t9\t...,.....\t3?)G:III"); + ps.println("chr1\t14944\tG\t11\t.....C.....\t(//AI%IIIFI\t9\t,$........\tI=II%ICIII\t8\t.$.......\t2II@6IBI\t9\t.........\t?:16IIB=,\t8\t..,.....\t9/%&>CI0"); + ps.println("chr1\t14946\tG\t11\t...........\t3I>II%I@I(I\t7\t.......\tIICIIII\t9\t.........\t4ID?II@GD\t8\t..,.....\tI@%;HIII"); + ps.println("chr1\t14947\tC\t11\t...$........\tDI?IIAIDI(I\t7\t.......\tIIIIIII\t9\t.$.....N$..\tEI58II!(B\t8\t..,.....\tI@C?IIII"); + ps.println("chr1\t14948\tG\t10\t.$.$........\t=;-%3I6I"); + ps.println("chr1\t14949\tG\t8\t.......$.\t5%6I>I%D\t7\t.......\tBI:%I;B\t6\t......\t*1,:0%\t7\t.$.,....\t'1I59;'"); + ps.println("chr1\t14950\tG\t7\t.$......\t?H3B+B7\t7\t.$......\t:+%%D7@\t6\t......\t%-%50%\t6\t.,....\t-I3'C'"); + ps.println("chr1\t14951\tC\t6\t......\tG2=+95\t6\t......\t)%%A6C\t6\t......\t%9%C89\t6\t.,....\t8H6(=%"); + + ps.close(); + os.close(); + } + + private void createGFF3File(File pileupFile) throws IOException { + + OutputStream os = FileUtils.isFileNameGZip(pileupFile) ? new GZIPOutputStream( new FileOutputStream(pileupFile)) + : new FileOutputStream(pileupFile); + +// OutputStream os = new FileOutputStream(pileupFile); + PrintStream ps = new PrintStream(os); + + + ps.println("##gff-version 3"); + ps.println("# Created by: simple_segmenter.pl[v2940]"); + ps.println("# Created on: Tue May 24 01:48:54 2011"); + ps.println("# Commandline: -v -g -l -i SureSelect_All_Exon_50mb_filtered_exons_1-200_20110524.gff3 -o SureSelect_All_Exon_50mb_filtered_exons_1-200_20110524_shoulders.gff3 -f exon,100,100,100 -f highexon,300 -f lowexon"); + ps.println("chr1 simple_segmenter.pl[v2940] fill 1 14166 . . . ID=gnl|fill"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 14167 14266 . + . ID=gnl|exon_3_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 14267 14366 . + . ID=gnl|exon_2_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 14367 14466 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 14467 14587 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 14588 14638 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 14639 14883 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon 14884 14942 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 14943 15064 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 15065 15164 . + . ID=gnl|exon_1_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 15165 15264 . + . ID=gnl|exon_2_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 15265 15364 . + . ID=gnl|exon_3_100"); + ps.println("chr1 simple_segmenter.pl[v2940] fill 15365 15370 . . . ID=gnl|fill"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 15371 15470 . + . ID=gnl|exon_3_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 15471 15570 . + . ID=gnl|exon_2_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 15571 15670 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 15671 15990 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 15991 16090 . + . ID=gnl|exon_1_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 16091 16190 . + . ID=gnl|exon_2_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 16191 16390 . + . ID=gnl|exon_3_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 16391 16490 . + . ID=gnl|exon_2_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 16491 16590 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 16591 16719 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 16720 16749 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 16750 17074 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 17075 17177 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 17178 17420 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 17421 17442 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 17443 18108 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 18109 18202 . + . ID=gnl|exon_1_100"); + ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 18203 18448 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 18449 18548 . + . ID=gnl|exon_1_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 18549 18648 . + . ID=gnl|exon_2_100"); + ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 18649 18848 . + . ID=gnl|exon_3_100"); +// ps.println("##gff-version 3"); +// ps.println("# Created by: simple_segmenter.pl[v2940]"); +// ps.println("# Created on: Tue May 24 01:48:54 2011"); +// ps.println("# Commandline: -v -g -l -i SureSelect_All_Exon_50mb_filtered_baits_1-200_20110524.gff3 -o SureSelect_All_Exon_50mb_filtered_baits_1-200_20110524_shoulders.gff3 -f bait,100,100,100 -f highbait,300 -f lowbait"); +// ps.println("chr1 simple_segmenter.pl[v2940] fill 1 14166 . . . ID=gnl|fill"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 14167 14266 . + . ID=gnl|bait_3_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 14267 14366 . + . ID=gnl|bait_2_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 14367 14466 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 14467 14587 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 14588 14638 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 14639 14883 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 14884 14942 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 14943 15064 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 15065 15164 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 15165 15264 . + . ID=gnl|bait_2_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 15265 15364 . + . ID=gnl|bait_3_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] fill 15365 15370 . . . ID=gnl|fill"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 15371 15470 . + . ID=gnl|bait_3_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 15471 15570 . + . ID=gnl|bait_2_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 15571 15670 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 15671 15990 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 15991 16090 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 16091 16190 . + . ID=gnl|bait_2_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 16191 16390 . + . ID=gnl|bait_3_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 16391 16490 . + . ID=gnl|bait_2_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 16491 16590 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 16591 16719 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 16720 16749 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 16750 17074 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 17075 17177 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 17178 17420 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 17421 17442 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 17443 18108 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 18109 18202 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 18203 18448 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 18449 18548 . + . ID=gnl|bait_1_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 18549 18648 . + . ID=gnl|bait_2_100"); +// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 18649 18848 . + . ID=gnl|bait_3_100"); + + ps.close(); + os.close(); + } +} diff --git a/qmule/test/org/qcmg/qmule/snppicker/CompareSnpsTest.java b/qmule/test/org/qcmg/qmule/snppicker/CompareSnpsTest.java index 9c89fc8ca..c8a332287 100644 --- a/qmule/test/org/qcmg/qmule/snppicker/CompareSnpsTest.java +++ b/qmule/test/org/qcmg/qmule/snppicker/CompareSnpsTest.java @@ -5,7 +5,7 @@ import org.junit.Ignore; import org.junit.Test; import org.qcmg.common.util.SnpUtils; -import org.qcmg.tab.TabbedRecord; +import org.qcmg.qmule.tab.TabbedRecord; public class CompareSnpsTest { diff --git a/qmule/test/org/qcmg/qmule/snppicker/CompareSnpsTest.java-- b/qmule/test/org/qcmg/qmule/snppicker/CompareSnpsTest.java-- new file mode 100644 index 000000000..c8a332287 --- /dev/null +++ b/qmule/test/org/qcmg/qmule/snppicker/CompareSnpsTest.java-- @@ -0,0 +1,70 @@ +package org.qcmg.qmule.snppicker; + + +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; +import org.qcmg.common.util.SnpUtils; +import org.qcmg.qmule.tab.TabbedRecord; + +public class CompareSnpsTest { + + @Test + public void testIsStopNonSynonymous() { + try { + CompareSnps.isStopNonSynonymous(null, -1); + Assert.fail("should have thrown a wobbly"); + } catch (IllegalArgumentException e) {} + + TabbedRecord tr = new TabbedRecord(); + try { + CompareSnps.isStopNonSynonymous(tr, -1); + Assert.fail("should have thrown a wobbly"); + } catch (IllegalArgumentException e) {} + + tr.setData(""); + Assert.assertFalse(CompareSnps.isStopNonSynonymous(tr, -1)); + tr.setData("1\t2\t3\t4\t5"); + Assert.assertFalse(CompareSnps.isStopNonSynonymous(tr, -1)); + tr.setData("1\t2\t3\t4\t5\tSTOP\t7\t8"); + Assert.assertFalse(CompareSnps.isStopNonSynonymous(tr, -1)); + Assert.assertTrue(CompareSnps.isStopNonSynonymous(tr, 5)); + tr.setData("1\t2\t3\t4\t5\t6\t7\t8\tNON_SYNONYMOUS"); + Assert.assertTrue(CompareSnps.isStopNonSynonymous(tr, -1)); + Assert.assertFalse(CompareSnps.isStopNonSynonymous(tr, 5)); + + } + + @Ignore + public void testIsClassAB() { + try { + CompareSnps.isClassAB(null, -1); + Assert.fail("should have thrown a wobbly"); + } catch (IllegalArgumentException e) {} + + TabbedRecord tr = new TabbedRecord(); + try { + CompareSnps.isClassAB(tr, -1); + Assert.fail("should have thrown a wobbly"); + } catch (IllegalArgumentException e) {} + + tr.setData(""); + Assert.assertFalse(CompareSnps.isClassAB(tr, -1)); + tr.setData("1\t2\t3\t4\t5"); + Assert.assertFalse(CompareSnps.isClassAB(tr, -1)); + tr.setData("1\t2\t3\t4\t5\tSTOP\t7\t8"); + Assert.assertFalse(CompareSnps.isClassAB(tr, -1)); + Assert.assertFalse(CompareSnps.isClassAB(tr, 5)); + tr.setData("1\t2\t3\t4\t5\t6\t7\t8\tNON_SYNONYMOUS"); + Assert.assertFalse(CompareSnps.isClassAB(tr, -1)); + Assert.assertFalse(CompareSnps.isClassAB(tr, 5)); + + tr.setData("1\t2\t3\t4\t5\t6\t7\t8\t--"); + Assert.assertTrue(CompareSnps.isClassAB(tr, -1)); + tr.setData("1\t2\t3\t" + SnpUtils.LESS_THAN_3_READS_NORMAL + "\t5\t6\t7\t8\t--"); + Assert.assertTrue(CompareSnps.isClassAB(tr, 3)); + Assert.assertFalse(CompareSnps.isClassAB(tr, 4)); + + } + +} diff --git a/qmule/test/org/qcmg/qmule/snppicker/GatkUniqueSnpsTest.java-- b/qmule/test/org/qcmg/qmule/snppicker/GatkUniqueSnpsTest.java-- new file mode 100644 index 000000000..7e4b342e7 --- /dev/null +++ b/qmule/test/org/qcmg/qmule/snppicker/GatkUniqueSnpsTest.java-- @@ -0,0 +1,154 @@ +package org.qcmg.qmule.snppicker; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import htsjdk.samtools.SAMRecord; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.qcmg.common.util.SnpUtils; +import org.qcmg.common.vcf.header.VcfHeaderUtils; +import org.qcmg.pileup.QSnpRecord; + +public class GatkUniqueSnpsTest { + + + private static List samRecords = new ArrayList(); + + @Before + public void setup() throws IOException { + SAMRecord record = new SAMRecord(null); + record.setAlignmentStart(100); + record.setReferenceName("chr1"); + record.setReadBases(new byte[] {'A', 'C', 'G', 'T', 'A','A','A','A','A','A','A','A','A'}); + samRecords.add(record); + + for (int i = 1 ; i < 12 ; i++) { + record = new SAMRecord(null); + record.setAlignmentStart(100+i); + record.setReferenceName("chr1"); + record.setReadBases(new byte[] {'A', 'A', 'A', 'A', 'A','A','A','A','A','A','A','A','A'}); + samRecords.add(record); + } + } + + @Test + public void testFailingRead() throws Exception { + SAMRecord record = new SAMRecord(null); + record.setReferenceName("chr1"); + record.setAlignmentStart(168512433); +// record.setAlignmentEnd(168512486); + record.setCigarString("7M4D43M"); + record.setReadString("AGCTGGTATTGCACATGGTGTGGACCCCATCAAGCTGGTTAACTTTCTGN"); + List records = new ArrayList(); + records.add(record); + + QSnpRecord qpr = new QSnpRecord("chr1", 168512486, "G"); + qpr.setAlt("C"); + + GatkUniqueSnps.examinePileup(records, qpr); + + Assert.assertNotNull(qpr.getAnnotation()); + Assert.assertFalse(qpr.getAnnotation().contains("mutation also found in pileup of normal")); + Assert.assertTrue(qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_COVERAGE)); + } + + @Test + public void testFailingRead2() throws Exception{ + SAMRecord record = new SAMRecord(null); + record.setReferenceName("chr1"); + record.setAlignmentStart(55524198); + record.setCigarString("1H49M"); + record.setReadString("TGGTCAGCACACTGGGGGCCTACACGGATGGCCACAGCCATCGCCCGCT"); + List records = new ArrayList(); + records.add(record); + + record = new SAMRecord(null); + record.setReferenceName("chr1"); + record.setAlignmentStart(55524210); + record.setCigarString("13H37M"); + record.setReadString("TCGGGGCCTACACGGATGGCCACAGCCATCGCCCGCT"); + records.add(record); + + record = new SAMRecord(null); + record.setReferenceName("chr1"); + record.setAlignmentStart(55524212); + record.setCigarString("10H40M"); + record.setReadString("GGGGCCTACACGGATGGCCACAGCCATCGCCCGCTGCGCC"); + records.add(record); + + record = new SAMRecord(null); + record.setReferenceName("chr1"); + record.setAlignmentStart(55524218); + record.setCigarString("2H48M"); + record.setReadString("TACACGGATGGCCACAGCCGTCGCCCGCTGCGCCCCAGATGAGGAGCT"); + records.add(record); + + record = new SAMRecord(null); + record.setReferenceName("chr1"); + record.setAlignmentStart(55524228); + record.setCigarString("4M6D21M"); + record.setReadString("GCCATCGCCCGCTGCGCCCCAGATG"); + records.add(record); + + QSnpRecord qpr = new QSnpRecord("chr1", 55524237, "G"); + qpr.setAlt("A"); + + GatkUniqueSnps.examinePileup(records, qpr); + + Assert.assertNotNull(qpr.getAnnotation()); + Assert.assertTrue(qpr.getAnnotation().contains(SnpUtils.MUTATION_IN_NORMAL)); + Assert.assertTrue(qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_COVERAGE)); + } + + + @Test + public void testExaminePileup() throws Exception { + QSnpRecord qpr = new QSnpRecord("chr1", 101, "G"); + qpr.setAlt("C"); + + GatkUniqueSnps.examinePileup(samRecords.subList(0,1), qpr); + + Assert.assertNotNull(qpr.getAnnotation()); + Assert.assertTrue(qpr.getAnnotation().contains(SnpUtils.MUTATION_IN_NORMAL)); + Assert.assertTrue(qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_COVERAGE)); + + qpr = new QSnpRecord("chr1", 102, "G"); + qpr.setAlt("C"); + + GatkUniqueSnps.examinePileup(samRecords.subList(0, 1), qpr); + + Assert.assertNotNull(qpr.getAnnotation()); + Assert.assertFalse(qpr.getAnnotation().contains(SnpUtils.MUTATION_IN_NORMAL)); + Assert.assertTrue(qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_COVERAGE)); + + qpr = new QSnpRecord("chr1", 110, "A"); + qpr.setAlt("G"); + + GatkUniqueSnps.examinePileup(samRecords.subList(0, 10), qpr); + + Assert.assertNotNull(qpr.getAnnotation()); + Assert.assertFalse(qpr.getAnnotation().contains(SnpUtils.MUTATION_IN_NORMAL)); + Assert.assertTrue(qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_COVERAGE)); + + qpr = new QSnpRecord("chr1", 112, "A"); + qpr.setAlt("G"); + + GatkUniqueSnps.examinePileup(samRecords, qpr); + Assert.assertNull(qpr.getAnnotation()); + + qpr = new QSnpRecord("chr1", 112, "G"); + qpr.setAlt("A"); + + GatkUniqueSnps.examinePileup(samRecords, qpr); + Assert.assertNotNull(qpr.getAnnotation()); + Assert.assertTrue(qpr.getAnnotation().contains(SnpUtils.MUTATION_IN_NORMAL)); + Assert.assertFalse(qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_COVERAGE)); + } + + + +} diff --git a/qmule/test/org/qcmg/qmule/util/IGVBatchFileGeneratorTest.java-- b/qmule/test/org/qcmg/qmule/util/IGVBatchFileGeneratorTest.java-- new file mode 100644 index 000000000..6ea4f2382 --- /dev/null +++ b/qmule/test/org/qcmg/qmule/util/IGVBatchFileGeneratorTest.java-- @@ -0,0 +1,73 @@ +package org.qcmg.qmule.util; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import junit.framework.Assert; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.qcmg.common.model.ChrPointPosition; +import org.qcmg.common.model.ChrPosition; + +public class IGVBatchFileGeneratorTest { + + @Rule + public TemporaryFolder tempFolder = new TemporaryFolder(); + + @Test + public void testGenerate() throws IOException { + try { + IGVBatchFileGenerator.generate(null, null); + Assert.fail("Should not have reached here"); + } catch (IllegalArgumentException iae) {} + try { + IGVBatchFileGenerator.generate(null, ""); + Assert.fail("Should not have reached here"); + } catch (IllegalArgumentException iae) {} + try { + IGVBatchFileGenerator.generate(Collections.EMPTY_LIST, ""); + Assert.fail("Should not have reached here"); + } catch (IllegalArgumentException iae) {} + + // create a temp File + File tmpOutput = tempFolder.newFile("testGenerate.igv.batch"); + try { + IGVBatchFileGenerator.generate(Collections.EMPTY_LIST, tmpOutput.getAbsolutePath()); + Assert.fail("Should not have reached here"); + } catch (IllegalArgumentException iae) {} + + List positions = new ArrayList(); + positions.add(ChrPointPosition.valueOf("chr1", 1)); + positions.add(ChrPointPosition.valueOf("chr2", 1234567890)); + + IGVBatchFileGenerator.generate(positions, tmpOutput.getAbsolutePath()); + + //read in contents of file + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(tmpOutput))); + List fileContents = new ArrayList(); + String line = null; + while ((line = reader.readLine()) != null) { + fileContents.add(line); + } + reader.close(); + + Assert.assertEquals("snapshotDirectory " + tmpOutput.getParent() , fileContents.get(0)); + Assert.assertEquals("genome " + IGVBatchFileGenerator.GENOME, fileContents.get(1)); + Assert.assertEquals("goto chr1:1-1", fileContents.get(2)); + Assert.assertEquals("sort base", fileContents.get(3)); + Assert.assertEquals("collapse", fileContents.get(4)); + Assert.assertEquals("snapshot chr1:1.png", fileContents.get(5)); + Assert.assertEquals("goto chr2:1234567890-1234567890", fileContents.get(6)); + Assert.assertEquals("snapshot chr2:1234567890.png", fileContents.get(9)); + + } + +} diff --git a/qmule/test/org/qcmg/qmule/util/TabbedDataLoaderTest.java-- b/qmule/test/org/qcmg/qmule/util/TabbedDataLoaderTest.java-- new file mode 100644 index 000000000..213d0f15c --- /dev/null +++ b/qmule/test/org/qcmg/qmule/util/TabbedDataLoaderTest.java-- @@ -0,0 +1,21 @@ +package org.qcmg.qmule.util; + +import junit.framework.Assert; + +import org.junit.Test; + +public class TabbedDataLoaderTest { + + @Test + public void testGetStringFromArray() { + Assert.assertNull(TabbedDataLoader.getStringFromArray(null, -1)); + Assert.assertNull(TabbedDataLoader.getStringFromArray(new String[] {}, -1)); + Assert.assertNull(TabbedDataLoader.getStringFromArray(new String[] {}, 0)); + Assert.assertEquals("Hello", TabbedDataLoader.getStringFromArray(new String[] {"Hello"}, 0)); + Assert.assertEquals("Hello", TabbedDataLoader.getStringFromArray(new String[] {"Hello"}, -1)); + Assert.assertNull(TabbedDataLoader.getStringFromArray(new String[] {"Hello"}, -10)); + Assert.assertEquals("there", TabbedDataLoader.getStringFromArray(new String[] {"Hello", "there"}, -1)); + Assert.assertEquals("there", TabbedDataLoader.getStringFromArray(new String[] {"Hello", "1", "2", "3", "there"}, -1)); + Assert.assertEquals("1", TabbedDataLoader.getStringFromArray(new String[] {"Hello", "1", "2", "3", "there"}, 1)); + } +} diff --git a/qsignature/src/org/qcmg/sig/util/SignatureUtil.java b/qsignature/src/org/qcmg/sig/util/SignatureUtil.java index 447ae98ca..79acdecf5 100644 --- a/qsignature/src/org/qcmg/sig/util/SignatureUtil.java +++ b/qsignature/src/org/qcmg/sig/util/SignatureUtil.java @@ -55,7 +55,7 @@ import org.qcmg.common.vcf.VcfUtils; import org.qcmg.qio.illumina.IlluminaRecord; import org.qcmg.qio.record.StringFileReader; -import org.qcmg.qio.vcf.VCFFileReader; +import org.qcmg.vcf.VCFFileReader; import org.qcmg.sig.model.Comparison; import org.qcmg.sig.model.SigMeta; import org.w3c.dom.Document; diff --git a/qsignature/test/org/qcmg/sig/SignatureGeneratorBespokeTest.java b/qsignature/test/org/qcmg/sig/SignatureGeneratorBespokeTest.java index 818bb7fd5..717f356f4 100644 --- a/qsignature/test/org/qcmg/sig/SignatureGeneratorBespokeTest.java +++ b/qsignature/test/org/qcmg/sig/SignatureGeneratorBespokeTest.java @@ -13,7 +13,7 @@ import org.qcmg.common.vcf.VcfRecord; import org.qcmg.common.vcf.header.VcfHeader; import org.qcmg.common.vcf.header.VcfHeaderRecord; -import org.qcmg.qio.vcf.VCFFileReader; +import org.qcmg.vcf.VCFFileReader; import gnu.trove.map.TObjectIntMap; import htsjdk.samtools.SAMFileHeader; @@ -206,7 +206,7 @@ public void runProcessWithHG19BamFile() throws Exception { recs.add(rec); System.out.println("rec: " + rec.toString()); } - VcfHeader header = reader.getVcfHeader(); + VcfHeader header = reader.getHeader(); // header.getAllMetaRecords().stream().forEach(System.out::println); assertEquals(true, header.getAllMetaRecords().contains(new VcfHeaderRecord("##rg0=null"))); } @@ -245,7 +245,7 @@ public void runProcessWithReadGroupsSetInHeader() throws Exception { recs.add(rec); System.out.println("rec: " + rec.toString()); } - VcfHeader header = reader.getVcfHeader(); + VcfHeader header = reader.getHeader(); header.getAllMetaRecords().stream().forEach(System.out::println); assertEquals(true, header.getAllMetaRecords().contains(new VcfHeaderRecord("##rg0=null"))); assertEquals(true, header.getAllMetaRecords().contains(new VcfHeaderRecord("##rg1=20130325103517169"))); diff --git a/qsignature/test/org/qcmg/sig/SignatureGeneratorTest.java b/qsignature/test/org/qcmg/sig/SignatureGeneratorTest.java index 3ea7c9923..8c1e63682 100644 --- a/qsignature/test/org/qcmg/sig/SignatureGeneratorTest.java +++ b/qsignature/test/org/qcmg/sig/SignatureGeneratorTest.java @@ -33,7 +33,7 @@ import org.qcmg.picard.SAMOrBAMWriterFactory; import org.qcmg.picard.util.SAMUtils; import org.qcmg.qio.illumina.IlluminaRecord; -import org.qcmg.qio.vcf.VCFFileReader; +import org.qcmg.vcf.VCFFileReader; import org.qcmg.sig.util.SignatureUtil; public class SignatureGeneratorTest { From 82df5701d8790644c75d127bf29dda413c6b6bce Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 25 Nov 2020 17:27:09 +1000 Subject: [PATCH 28/73] merge from master --- qio/src/org/qcmg/qio/record/RecordReader.java | 55 +++++++++++-------- qio/src/org/qcmg/qio/record/RecordWriter.java | 11 ++-- .../org/qcmg/qio/record/StringFileReader.java | 5 +- 3 files changed, 42 insertions(+), 29 deletions(-) diff --git a/qio/src/org/qcmg/qio/record/RecordReader.java b/qio/src/org/qcmg/qio/record/RecordReader.java index fdbdf042d..9b900b5ef 100644 --- a/qio/src/org/qcmg/qio/record/RecordReader.java +++ b/qio/src/org/qcmg/qio/record/RecordReader.java @@ -4,10 +4,9 @@ * * This code is released under the terms outlined in the included LICENSE file. */ + package org.qcmg.qio.record; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; import java.io.BufferedReader; import java.io.Closeable; import java.io.File; @@ -15,6 +14,10 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.UncheckedIOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -33,7 +36,10 @@ public abstract class RecordReader implements Closeable, Iterable { protected T next; protected List headerLines = new ArrayList<>(); - public RecordReader(final File file) throws IOException { this(file, DEFAULT_BUFFER_SIZE); } + + public RecordReader(final File file) throws IOException { + this(file, DEFAULT_BUFFER_SIZE); + } public RecordReader(final File file, int bufferSize) throws IOException { this(file, bufferSize, DEFAULT_HEADER_PREFIX, DEFAULT_CHARSET); @@ -51,14 +57,10 @@ public RecordReader(final File file, int bufferSize, CharSequence headerPrefix, InputStreamReader streamReader = new InputStreamReader(inputStream, charset); bin = new BufferedReader(streamReader, bufferSize); - String nextLine = readHeader(headerPrefix);//bin.readLine(); - + String nextLine = readHeaderAndReturnFirstNonHeaderLine(headerPrefix); //get first record, set to null for empty file - try { - next = nextLine == null? null : getRecord(nextLine); - }catch(Exception e) { - throw new IOException("error during retrive first record " + e.getMessage()); - } + next = nextLine == null ? null : getRecord(nextLine); + } /** * this method is overridable in subclass, eg illumina file have different header patten @@ -67,16 +69,16 @@ public RecordReader(final File file, int bufferSize, CharSequence headerPrefix, * @return the first line just after header * @throws IOException */ - public String readHeader(CharSequence headerPrefix ) throws IOException{ + public String readHeaderAndReturnFirstNonHeaderLine(CharSequence headerPrefix ) throws IOException { + + String nextLine = bin.readLine(); - //empty file - if( nextLine == null ) return null; - - if(headerPrefix == null) return nextLine; + //keep empty header and return first nonHeaderline + if (headerPrefix == null) return nextLine; //reader header, hence file pointer to first line after header - while ( headerPrefix != null && null != nextLine && nextLine.startsWith(headerPrefix+"") ) { + while ( nextLine != null && nextLine.startsWith(headerPrefix + "") ) { headerLines.add(nextLine); //reset current read line nextLine = bin.readLine(); @@ -89,15 +91,21 @@ public String readHeader(CharSequence headerPrefix ) throws IOException{ * This reader can maxmum take Integer.max lines of file header. Please make other header if bigger than this. * @return a list of header lines */ - public List getHeader() { return headerLines; } + public List getHeader() { + return headerLines; + } @Override /** * Here, BufferedReader.close() calls InputStreamReader.close(), which API told us that it Closes the stream and releases any system resources associated with it. */ - public void close() throws IOException { bin.close(); } + public void close() throws IOException { + bin.close(); + } - public File getFile() { return file; } + public File getFile() { + return file; + } @Override public Iterator iterator() { @@ -116,13 +124,14 @@ public T next() { try { //get next record, it may read multi lines String line = bin.readLine(); - if(line != null ) { + if ( line != null ) { next = getRecord( line ); } return rec; - } catch (Exception e) { - throw new RuntimeException(e.getMessage()); + } catch (IOException e) { + //here we only catch IO exception + throw new UncheckedIOException(e); } } }; @@ -131,7 +140,7 @@ public T next() { } //some record cross multi lines, eg id\nseq\n, this method may call bin.readLine() inside - public abstract T getRecord(String line) throws Exception; + public abstract T getRecord(String line); } diff --git a/qio/src/org/qcmg/qio/record/RecordWriter.java b/qio/src/org/qcmg/qio/record/RecordWriter.java index 057260d6f..e76f00b73 100644 --- a/qio/src/org/qcmg/qio/record/RecordWriter.java +++ b/qio/src/org/qcmg/qio/record/RecordWriter.java @@ -1,6 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ + package org.qcmg.qio.record; import java.io.BufferedWriter; @@ -12,8 +13,8 @@ import java.io.OutputStreamWriter; import java.util.List; import java.util.zip.GZIPOutputStream; -import org.qcmg.common.util.FileUtils; import org.qcmg.common.util.Constants; +import org.qcmg.common.util.FileUtils; public class RecordWriter implements Closeable { private final File file; @@ -36,12 +37,12 @@ public RecordWriter(final File file) throws IOException { * @throws IOException */ public void addHeader(final String header) throws IOException { - String line = header.endsWith(Constants.NL_STRING)? header : header + Constants.NL; + String line = header.endsWith(Constants.NL_STRING) ? header : header + Constants.NL; bos.write(line); } public void addHeader(List header) throws IOException { - for(String str : header) { + for (String str : header) { addHeader(str); } } @@ -52,8 +53,8 @@ public void addHeader(List header) throws IOException { * @throws IOException */ public void add(final T record) throws IOException { - String encoded = record instanceof String? (String) record : record.toString(); - String line = encoded.endsWith(Constants.NL_STRING)? encoded : encoded + Constants.NL; + String encoded = record instanceof String ? (String) record : record.toString(); + String line = encoded.endsWith(Constants.NL_STRING) ? encoded : encoded + Constants.NL; bos.write(line); } diff --git a/qio/src/org/qcmg/qio/record/StringFileReader.java b/qio/src/org/qcmg/qio/record/StringFileReader.java index 6ed40c9dd..8914e5707 100644 --- a/qio/src/org/qcmg/qio/record/StringFileReader.java +++ b/qio/src/org/qcmg/qio/record/StringFileReader.java @@ -21,7 +21,10 @@ public StringFileReader(File file, int bufferSize) throws IOException { } @Override - public String getRecord(String line) throws Exception { + /** + * return input self even it is null + */ + public String getRecord(String line) { return line; } } \ No newline at end of file From 2a8df9b1afbdb20d1d7c10fe0700a31cb3c7ad06 Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 25 Nov 2020 18:55:40 +1000 Subject: [PATCH 29/73] update fasta, gff, gff3,illumina with new record package --- qio/src/org/qcmg/qio/fasta/FastaReader.java | 11 ++++++++--- qio/src/org/qcmg/qio/fasta/FastaRecord.java | 10 +++++----- qio/src/org/qcmg/qio/gff/GffReader.java | 4 ++-- qio/src/org/qcmg/qio/gff/GffRecord.java | 4 ++-- qio/src/org/qcmg/qio/gff3/Gff3FileReader.java | 4 ++-- qio/src/org/qcmg/qio/gff3/Gff3Record.java | 6 +++--- qio/src/org/qcmg/qio/illumina/IlluminaFileReader.java | 9 +++++---- qio/src/org/qcmg/qio/record/RecordReader.java | 2 +- 8 files changed, 28 insertions(+), 22 deletions(-) diff --git a/qio/src/org/qcmg/qio/fasta/FastaReader.java b/qio/src/org/qcmg/qio/fasta/FastaReader.java index 88d3b2cfe..59c9e5c84 100644 --- a/qio/src/org/qcmg/qio/fasta/FastaReader.java +++ b/qio/src/org/qcmg/qio/fasta/FastaReader.java @@ -5,6 +5,7 @@ package org.qcmg.qio.fasta; import java.io.File; +import java.io.IOException; import org.qcmg.qio.record.RecordReader; @@ -24,10 +25,14 @@ public FastaReader(File file) throws Exception { /** * it has to read two line to construct one record */ - public FastaRecord getRecord(String line) throws Exception { + public FastaRecord getRecord(String line) { String id = line; - String seq = bin.readLine(); - + String seq = null; + try { + seq = bin.readLine(); + } catch (IOException e) { + e.printStackTrace(); + } return new FastaRecord(id, seq); } } diff --git a/qio/src/org/qcmg/qio/fasta/FastaRecord.java b/qio/src/org/qcmg/qio/fasta/FastaRecord.java index 8908a0994..8ddb926b5 100644 --- a/qio/src/org/qcmg/qio/fasta/FastaRecord.java +++ b/qio/src/org/qcmg/qio/fasta/FastaRecord.java @@ -18,17 +18,17 @@ public class FastaRecord { private String id; private String data; - public FastaRecord(String id, String data) throws Exception { + public FastaRecord(String id, String data) { setId(id); setData(data); } public FastaRecord() {} - public void setId(String id) throws Exception { + public void setId(String id) { //id start with < if ( ! id.startsWith(ID_PREFIX)) { - throw new Exception("Bad id format: " + id); + throw new IllegalArgumentException("Bad id format: " + id); } this.id = id; } @@ -37,10 +37,10 @@ public String getId() { return id; } - public void setData(String data) throws Exception { + public void setData(String data) { //seq should not start with < if (data.startsWith(ID_PREFIX)) { - throw new Exception("Bad sequence format: " + data); + throw new IllegalArgumentException("Bad sequence format: " + data); } this.data = data; } diff --git a/qio/src/org/qcmg/qio/gff/GffReader.java b/qio/src/org/qcmg/qio/gff/GffReader.java index 59dd987d1..f4e9b12c1 100644 --- a/qio/src/org/qcmg/qio/gff/GffReader.java +++ b/qio/src/org/qcmg/qio/gff/GffReader.java @@ -13,7 +13,7 @@ public GffReader(File file) throws IOException { } @Override - public GffRecord getRecord(String line) throws Exception { + public GffRecord getRecord(String line) { if (null == line) { throw new AssertionError("Record was null"); } @@ -21,7 +21,7 @@ public GffRecord getRecord(String line) throws Exception { String[] fields = line.split(TAB_DELIMITER); if (fields.length < 8) { - throw new Exception("Not enough fields in the Record"); + throw new IllegalArgumentException("Not enough fields in the Record"); } return new GffRecord(fields); diff --git a/qio/src/org/qcmg/qio/gff/GffRecord.java b/qio/src/org/qcmg/qio/gff/GffRecord.java index c74e67360..0b44e184d 100644 --- a/qio/src/org/qcmg/qio/gff/GffRecord.java +++ b/qio/src/org/qcmg/qio/gff/GffRecord.java @@ -43,7 +43,7 @@ public class GffRecord { * @throws Exception * @throws QProfilerException */ - public GffRecord(String[] fields) throws Exception { + public GffRecord(String[] fields) { // public GffRecord( String textRecord, String delimiter ) { // this(); // call constructor 0 // originalLine = textRecord; @@ -67,7 +67,7 @@ public GffRecord(String[] fields) throws Exception { for (int i = 0; i < tmpattribs.length; i++) { String[] attrFields = tmpattribs[i].split("="); if (attrFields.length < 2) { - throw new Exception("Attribute [" + tmpattribs[i] + throw new IllegalArgumentException("Attribute [" + tmpattribs[i] + "] is badly formed"); } attributes.put(attrFields[0], attrFields[1]); diff --git a/qio/src/org/qcmg/qio/gff3/Gff3FileReader.java b/qio/src/org/qcmg/qio/gff3/Gff3FileReader.java index 5c6c368e4..3505a86ac 100644 --- a/qio/src/org/qcmg/qio/gff3/Gff3FileReader.java +++ b/qio/src/org/qcmg/qio/gff3/Gff3FileReader.java @@ -20,7 +20,7 @@ public Gff3FileReader(File file) throws IOException { } @Override - public Gff3Record getRecord(String line) throws Exception { - return new Gff3Record(line); + public Gff3Record getRecord(String line) { + return new Gff3Record(line); } } diff --git a/qio/src/org/qcmg/qio/gff3/Gff3Record.java b/qio/src/org/qcmg/qio/gff3/Gff3Record.java index 7aa259d4c..eb6d7feff 100644 --- a/qio/src/org/qcmg/qio/gff3/Gff3Record.java +++ b/qio/src/org/qcmg/qio/gff3/Gff3Record.java @@ -20,10 +20,10 @@ public class Gff3Record { protected String rawData; public Gff3Record() {} - public Gff3Record(final String line) throws Exception { + public Gff3Record(final String line) { String[] params = TabTokenizer.tokenize(line); - if (8 > params.length) { - throw new Exception("Bad GFF3 format. Insufficient columns: '" + line + "'"); + if (8 > params.length) { + throw new IllegalArgumentException("Bad GFF3 format. Insufficient columns: '" + line + "'"); } setRawData(line); diff --git a/qio/src/org/qcmg/qio/illumina/IlluminaFileReader.java b/qio/src/org/qcmg/qio/illumina/IlluminaFileReader.java index ed4157be3..549527ea9 100644 --- a/qio/src/org/qcmg/qio/illumina/IlluminaFileReader.java +++ b/qio/src/org/qcmg/qio/illumina/IlluminaFileReader.java @@ -18,8 +18,7 @@ public IlluminaFileReader(File file) throws IOException { super(file, DEFAULT_BUFFER_SIZE, HEADER_LINE, DEFAULT_CHARSET); } - @Override - public String readHeader(CharSequence headerPrefix ) throws IOException{ + public String readHeader(CharSequence headerPrefix ) throws IOException { String nextLine = bin.readLine(); //empty file @@ -45,11 +44,13 @@ public String readHeader(CharSequence headerPrefix ) throws IOException{ } @Override - public IlluminaRecord getRecord(String line) throws Exception { + public IlluminaRecord getRecord(String line) { String[] dataArray = TabTokenizer.tokenize(line); // raw Illumina data has 32 fields... and the first one is an integer - if (dataArray.length != 32) throw new Exception("Bad Illumina data format - expecting 32 fields but saw " + dataArray.length); + if (dataArray.length != 32) { + throw new IllegalArgumentException("Bad Illumina data format - expecting 32 fields but saw " + dataArray.length); + } return new IlluminaRecord( dataArray ); } diff --git a/qio/src/org/qcmg/qio/record/RecordReader.java b/qio/src/org/qcmg/qio/record/RecordReader.java index 9b900b5ef..7c012ee2c 100644 --- a/qio/src/org/qcmg/qio/record/RecordReader.java +++ b/qio/src/org/qcmg/qio/record/RecordReader.java @@ -132,7 +132,7 @@ public T next() { } catch (IOException e) { //here we only catch IO exception throw new UncheckedIOException(e); - } + } } }; From ab3c536ccc4acc9e09c122f2aa684a4a1d860b92 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 25 Nov 2020 21:45:23 +1000 Subject: [PATCH 30/73] delete tmp files --- .../exception/RecordIteratorException.java-- | 17 - .../org/qcmg/qmule/gff3/GFF3FileReader.java-- | 44 -- .../org/qcmg/qmule/gff3/GFF3FileWriter.java-- | 36 -- qio/src/org/qcmg/qmule/gff3/GFF3Record.java-- | 244 -------- ...cordChromosomeAndPositionComparator.java-- | 32 - .../qcmg/qmule/gff3/GFF3RecordIterator.java-- | 49 -- .../gff3/GFF3RecordPositionComparator.java-- | 21 - .../org/qcmg/qmule/gff3/GFF3Serializer.java-- | 94 --- .../record/AbstractRecordIterator.java-- | 47 -- .../record/ExtendedRecordIterator.java-- | 52 -- qio/src/org/qcmg/qmule/record/Record.java-- | 8 - .../qcmg/qmule/record/RecordIterator.java-- | 47 -- .../org/qcmg/qmule/record/Serializer.java-- | 39 -- .../org/qcmg/qmule/record/SimpleRecord.java-- | 40 -- .../qcmg/qmule/tab/TabbedFileReader.java-- | 61 -- .../qcmg/qmule/tab/TabbedFileWriter.java-- | 46 -- .../org/qcmg/qmule/tab/TabbedHeader.java-- | 24 - .../org/qcmg/qmule/tab/TabbedRecord.java-- | 19 - .../qmule/tab/TabbedRecordIterator.java-- | 47 -- .../qcmg/qmule/tab/TabbedSerializer.java-- | 51 -- .../org/qcmg/unused/bed/BEDFileReader.java-- | 37 -- qio/src/org/qcmg/unused/bed/BEDRecord.java-- | 97 --- .../qcmg/unused/bed/BEDRecordIterator.java-- | 49 -- .../bed/BEDRecordPositionComparator.java-- | 21 - .../org/qcmg/unused/bed/BEDSerializer.java-- | 65 -- .../consensuscalls/ConsensusCallsFlag.java-- | 64 -- .../ConsensusCallsRecord.java-- | 377 ------------ .../ConsensusCallsSerializer.java-- | 68 --- .../genesymbol/GeneSymbolFileReader.java-- | 38 -- .../unused/genesymbol/GeneSymbolRecord.java-- | 32 - .../GeneSymbolRecordIterator.java-- | 49 -- .../genesymbol/GeneSymbolSerializer.java-- | 44 -- .../unused/illumina/IlluminaFileReader.java-- | 21 - .../unused/illumina/IlluminaRecord.java-- | 262 -------- .../illumina/IlluminaRecordIterator.java-- | 21 - .../unused/illumina/IlluminaSerializer.java-- | 73 --- .../org/qcmg/unused/maf/MAFFileReader.java-- | 42 -- .../qcmg/unused/maf/MAFRecordIterator.java-- | 54 -- .../org/qcmg/unused/maf/MAFSerializer.java-- | 68 --- .../PrimerDesignFileReader.java-- | 17 - .../PrimerDesignRecord.java-- | 135 ----- .../PrimerDesignRecordSerializer.java-- | 80 --- .../primerdesignsummary/PrimerPosition.java-- | 68 --- .../primerinput/PrimerInputFileReader.java-- | 17 - .../primerinput/PrimerInputFileWriter.java-- | 33 - .../primerinput/PrimerInputRecord.java-- | 331 ---------- .../PrimerInputRecordSerializer.java-- | 201 ------- .../primerinput/PrimerSequenceTarget.java-- | 43 -- .../unused/primerinput/PrimerSizeRange.java-- | 44 -- .../PrimerOutputFileReader.java-- | 22 - .../PrimerOutputFileWriter.java-- | 35 -- .../primeroutput/PrimerOutputHeader.java-- | 563 ------------------ .../PrimerOutputHeaderSerializer.java-- | 128 ---- .../primeroutput/PrimerOutputRecord.java-- | 517 ---------------- .../PrimerOutputRecordSerializer.java-- | 136 ----- .../qcmg/unused/reader/AbstractReader.java-- | 40 -- .../unused/reader/ExtendedFileReader.java-- | 41 -- .../org/qcmg/unused/reader/FileReader.java-- | 57 -- qio/src/org/qcmg/unused/reader/Reader.java-- | 10 - .../unused/simple/SimpleFileReader.java-- | 23 - .../unused/simple/SimpleRecordIterator.java-- | 26 - .../unused/simple/SimpleSerializer.java-- | 57 -- 62 files changed, 5124 deletions(-) delete mode 100644 qio/src/org/qcmg/qmule/exception/RecordIteratorException.java-- delete mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3FileReader.java-- delete mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3FileWriter.java-- delete mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3Record.java-- delete mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3RecordChromosomeAndPositionComparator.java-- delete mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3RecordIterator.java-- delete mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3RecordPositionComparator.java-- delete mode 100644 qio/src/org/qcmg/qmule/gff3/GFF3Serializer.java-- delete mode 100644 qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java-- delete mode 100644 qio/src/org/qcmg/qmule/record/ExtendedRecordIterator.java-- delete mode 100644 qio/src/org/qcmg/qmule/record/Record.java-- delete mode 100644 qio/src/org/qcmg/qmule/record/RecordIterator.java-- delete mode 100644 qio/src/org/qcmg/qmule/record/Serializer.java-- delete mode 100644 qio/src/org/qcmg/qmule/record/SimpleRecord.java-- delete mode 100644 qio/src/org/qcmg/qmule/tab/TabbedFileReader.java-- delete mode 100644 qio/src/org/qcmg/qmule/tab/TabbedFileWriter.java-- delete mode 100644 qio/src/org/qcmg/qmule/tab/TabbedHeader.java-- delete mode 100644 qio/src/org/qcmg/qmule/tab/TabbedRecord.java-- delete mode 100644 qio/src/org/qcmg/qmule/tab/TabbedRecordIterator.java-- delete mode 100644 qio/src/org/qcmg/qmule/tab/TabbedSerializer.java-- delete mode 100644 qio/src/org/qcmg/unused/bed/BEDFileReader.java-- delete mode 100644 qio/src/org/qcmg/unused/bed/BEDRecord.java-- delete mode 100644 qio/src/org/qcmg/unused/bed/BEDRecordIterator.java-- delete mode 100644 qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java-- delete mode 100644 qio/src/org/qcmg/unused/bed/BEDSerializer.java-- delete mode 100644 qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java-- delete mode 100644 qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java-- delete mode 100644 qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java-- delete mode 100644 qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java-- delete mode 100644 qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java-- delete mode 100644 qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java-- delete mode 100644 qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java-- delete mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java-- delete mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaRecord.java-- delete mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java-- delete mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java-- delete mode 100644 qio/src/org/qcmg/unused/maf/MAFFileReader.java-- delete mode 100644 qio/src/org/qcmg/unused/maf/MAFRecordIterator.java-- delete mode 100644 qio/src/org/qcmg/unused/maf/MAFSerializer.java-- delete mode 100644 qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java-- delete mode 100644 qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java-- delete mode 100644 qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java-- delete mode 100644 qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java-- delete mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java-- delete mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java-- delete mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java-- delete mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java-- delete mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java-- delete mode 100644 qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java-- delete mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java-- delete mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java-- delete mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java-- delete mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java-- delete mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java-- delete mode 100644 qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java-- delete mode 100644 qio/src/org/qcmg/unused/reader/AbstractReader.java-- delete mode 100644 qio/src/org/qcmg/unused/reader/ExtendedFileReader.java-- delete mode 100644 qio/src/org/qcmg/unused/reader/FileReader.java-- delete mode 100644 qio/src/org/qcmg/unused/reader/Reader.java-- delete mode 100644 qio/src/org/qcmg/unused/simple/SimpleFileReader.java-- delete mode 100644 qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java-- delete mode 100644 qio/src/org/qcmg/unused/simple/SimpleSerializer.java-- diff --git a/qio/src/org/qcmg/qmule/exception/RecordIteratorException.java-- b/qio/src/org/qcmg/qmule/exception/RecordIteratorException.java-- deleted file mode 100644 index d2df8afe7..000000000 --- a/qio/src/org/qcmg/qmule/exception/RecordIteratorException.java-- +++ /dev/null @@ -1,17 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.exception; - -public class RecordIteratorException extends RuntimeException { - - private static final long serialVersionUID = 7963940971937212428L; - - public RecordIteratorException() {} // default constructor - public RecordIteratorException(Exception e) { - super(e.getMessage(), e); - } - public RecordIteratorException(String message, Exception e) { - super(message, e); - } -} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3FileReader.java-- b/qio/src/org/qcmg/qmule/gff3/GFF3FileReader.java-- deleted file mode 100644 index 112ae5f4e..000000000 --- a/qio/src/org/qcmg/qmule/gff3/GFF3FileReader.java-- +++ /dev/null @@ -1,44 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.gff3; - -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; - -public final class GFF3FileReader implements Closeable, Iterable { - private final File file; - private final InputStream inputStream; - - public GFF3FileReader(final File file) throws FileNotFoundException { - this.file = file; - FileInputStream fileStream = new FileInputStream(file); - inputStream = fileStream; - } - - @Override - public Iterator iterator() { - return getRecordIterator(); - } - - public GFF3RecordIterator getRecordIterator() { - return new GFF3RecordIterator(inputStream); - } - - @Override - public void close() throws IOException { - inputStream.close(); - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3FileWriter.java-- b/qio/src/org/qcmg/qmule/gff3/GFF3FileWriter.java-- deleted file mode 100644 index 672c4acdd..000000000 --- a/qio/src/org/qcmg/qmule/gff3/GFF3FileWriter.java-- +++ /dev/null @@ -1,36 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.gff3; - -import java.io.Closeable; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -public final class GFF3FileWriter implements Closeable { - private final File file; - private final OutputStream outputStream; - - public GFF3FileWriter(final File file) throws FileNotFoundException { - this.file = file; - OutputStream stream = new FileOutputStream(file); - outputStream = stream; - } - - public void add(final GFF3Record record) throws IOException { - String encoded = GFF3Serializer.serialise(record) + "\n"; - outputStream.write(encoded.getBytes()); - outputStream.flush(); - } - - @Override - public void close() throws IOException { - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3Record.java-- b/qio/src/org/qcmg/qmule/gff3/GFF3Record.java-- deleted file mode 100644 index ebde722f6..000000000 --- a/qio/src/org/qcmg/qmule/gff3/GFF3Record.java-- +++ /dev/null @@ -1,244 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.gff3; - - -public class GFF3Record { - - protected String seqId; - protected String source; - protected String type; - protected int start; - protected int end; - protected String score; - protected String strand; - protected String phase; - protected String attributes; - protected String rawData; - - /** - * Gets the value of the seqId property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSeqId() { - return seqId; - } - - /** - * Sets the value of the seqId property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSeqId(String value) { - this.seqId = value; - } - - /** - * Gets the value of the source property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSource() { - return source; - } - - /** - * Sets the value of the source property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSource(String value) { - this.source = value; - } - - /** - * Gets the value of the type property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getType() { - return type; - } - - /** - * Sets the value of the type property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setType(String value) { - this.type = value; - } - - /** - * Gets the value of the start property. - * - */ - public int getStart() { - return start; - } - - /** - * Sets the value of the start property. - * - */ - public void setStart(int value) { - this.start = value; - } - - /** - * Gets the value of the end property. - * - */ - public int getEnd() { - return end; - } - - /** - * Sets the value of the end property. - * - */ - public void setEnd(int value) { - this.end = value; - } - - /** - * Gets the value of the score property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getScore() { - return score; - } - - /** - * Sets the value of the score property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setScore(String value) { - this.score = value; - } - - /** - * Gets the value of the strand property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getStrand() { - return strand; - } - - /** - * Sets the value of the strand property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setStrand(String value) { - this.strand = value; - } - - /** - * Gets the value of the phase property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getPhase() { - return phase; - } - - /** - * Sets the value of the phase property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setPhase(String value) { - this.phase = value; - } - - /** - * Gets the value of the attributes property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getAttributes() { - return attributes; - } - - /** - * Sets the value of the attributes property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setAttributes(String value) { - this.attributes = value; - } - - /** - * Gets the value of the rawData property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getRawData() { - return rawData; - } - - /** - * Sets the value of the rawData property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setRawData(String value) { - this.rawData = value; - } - -} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3RecordChromosomeAndPositionComparator.java-- b/qio/src/org/qcmg/qmule/gff3/GFF3RecordChromosomeAndPositionComparator.java-- deleted file mode 100644 index 0efc4c6b2..000000000 --- a/qio/src/org/qcmg/qmule/gff3/GFF3RecordChromosomeAndPositionComparator.java-- +++ /dev/null @@ -1,32 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.gff3; - -import java.util.Comparator; - -import org.qcmg.common.model.ReferenceNameComparator; - -public class GFF3RecordChromosomeAndPositionComparator implements - Comparator { - - private static final Comparator chrComp = new ReferenceNameComparator(); - - public int compare(GFF3Record recordA, GFF3Record recordB) { - - // first compare chromosome - int chrcompare = chrComp.compare(recordA.getSeqId(), recordB.getSeqId()); - - if (chrcompare != 0) return chrcompare; - - return compareStart(recordA, recordB) + compareEnd(recordA, recordB); - } - - public int compareStart(GFF3Record recordA, GFF3Record recordB) { - return recordA.getStart() - recordB.getStart(); - } - - public int compareEnd(GFF3Record recordA, GFF3Record recordB) { - return recordA.getEnd() - recordB.getEnd(); - } -} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3RecordIterator.java-- b/qio/src/org/qcmg/qmule/gff3/GFF3RecordIterator.java-- deleted file mode 100644 index a98e4b73e..000000000 --- a/qio/src/org/qcmg/qmule/gff3/GFF3RecordIterator.java-- +++ /dev/null @@ -1,49 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.gff3; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.NoSuchElementException; - -public final class GFF3RecordIterator implements Iterator { - private final BufferedReader reader; - private GFF3Record next; - - public GFF3RecordIterator(final InputStream stream) { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public GFF3Record next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - GFF3Record result = next; - readNext(); - return result; - } - - private void readNext() { - try { - next = GFF3Serializer.nextRecord(reader); - } catch (NoSuchElementException e) { - throw e; - } catch (Exception ex) { - throw new RuntimeException(ex.getMessage(), ex); - } - } - - @SuppressWarnings("unchecked") - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3RecordPositionComparator.java-- b/qio/src/org/qcmg/qmule/gff3/GFF3RecordPositionComparator.java-- deleted file mode 100644 index 4f5548ceb..000000000 --- a/qio/src/org/qcmg/qmule/gff3/GFF3RecordPositionComparator.java-- +++ /dev/null @@ -1,21 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.gff3; - -import java.util.Comparator; - -public class GFF3RecordPositionComparator implements - Comparator { - public int compare(GFF3Record recordA, GFF3Record recordB) { - return compareStart(recordA, recordB) + compareEnd(recordA, recordB); - } - - public int compareStart(GFF3Record recordA, GFF3Record recordB) { - return recordA.getStart() - recordB.getStart(); - } - - public int compareEnd(GFF3Record recordA, GFF3Record recordB) { - return recordA.getEnd() - recordB.getEnd(); - } -} diff --git a/qio/src/org/qcmg/qmule/gff3/GFF3Serializer.java-- b/qio/src/org/qcmg/qmule/gff3/GFF3Serializer.java-- deleted file mode 100644 index a48cc461d..000000000 --- a/qio/src/org/qcmg/qmule/gff3/GFF3Serializer.java-- +++ /dev/null @@ -1,94 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.gff3; - -import java.io.BufferedReader; -import java.io.IOException; - -import org.qcmg.common.util.Constants; -import org.qcmg.common.util.TabTokenizer; - -public final class GFF3Serializer { - private static final String DEFAULT_HEADER_PREFIX = "#"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { - line = reader.readLine(); - } - return line; - } - - public static GFF3Record nextRecord(final BufferedReader reader) - throws Exception, IOException { - GFF3Record result = null; - try { - - String line = nextNonheaderLine(reader); - if (null != line) { - result = parseRecord(line); - } - } catch (IOException e) { - throw e; - } catch (Exception e) { - throw e; - } - return result; - } - - static GFF3Record parseRecord(final String line) throws Exception { - String[] params = TabTokenizer.tokenize(line); - if (8 > params.length) { - throw new Exception("Bad GFF3 format. Insufficient columns: '" + line + "'"); - } - GFF3Record result = new GFF3Record(); - result.setRawData(line); - result.setSeqId(params[0]); - result.setSource(params[1]); - result.setType(params[2]); - result.setStart(Integer.parseInt(params[3])); - result.setEnd(Integer.parseInt(params[4])); - result.setScore(params[5]); - result.setStrand(params[6]); - result.setPhase(params[7]); - if (8 < params.length) { - result.setAttributes(params[8]); - } - return result; - } - - public static GFF3Record duplicate(final GFF3Record record) { - GFF3Record result = new GFF3Record(); - result.setSeqId(record.getSeqId()); - result.setSource(record.getSource()); - result.setType(record.getType()); - result.setStart(record.getStart()); - result.setEnd(record.getEnd()); - result.setScore(record.getScore()); - result.setStrand(record.getStrand()); - result.setPhase(record.getPhase()); - result.setAttributes(record.getAttributes()); - return result; - } - - public static String serialise(final GFF3Record record) { - StringBuilder result = new StringBuilder(record.getSeqId()).append(Constants.TAB); - result.append(record.getSource()).append(Constants.TAB); - result.append(record.getType()).append(Constants.TAB); - result.append(record.getStart()).append(Constants.TAB); - result.append(record.getEnd()).append(Constants.TAB); - result.append(record.getScore()).append(Constants.TAB); - result.append(record.getStrand()).append(Constants.TAB); - result.append(record.getPhase()).append(Constants.TAB); - if (null != record.getAttributes()) { - result.append(record.getAttributes()); - } - return result.toString(); - } - -} diff --git a/qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java-- b/qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java-- deleted file mode 100644 index 7f100f601..000000000 --- a/qio/src/org/qcmg/qmule/record/AbstractRecordIterator.java-- +++ /dev/null @@ -1,47 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.record; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.concurrent.atomic.AtomicLong; - -import org.qcmg.exception.RecordIteratorException; - -public abstract class AbstractRecordIterator implements Iterator { - - protected final BufferedReader reader; - private final AtomicLong counter; - protected Record next; - - public AbstractRecordIterator(final InputStream stream) throws Exception { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - counter = new AtomicLong(0); - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public Record next() { - counter.incrementAndGet(); - Record result = next; - try { - readNext(); - } catch (Exception e) { - throw new RecordIteratorException(e.getMessage() + " [Record count: " + counter.get() +"]", e); - } - return result; - } - - protected abstract void readNext() throws Exception; - - public void remove() { - } - -} diff --git a/qio/src/org/qcmg/qmule/record/ExtendedRecordIterator.java-- b/qio/src/org/qcmg/qmule/record/ExtendedRecordIterator.java-- deleted file mode 100644 index be668b779..000000000 --- a/qio/src/org/qcmg/qmule/record/ExtendedRecordIterator.java-- +++ /dev/null @@ -1,52 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.record; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.concurrent.atomic.AtomicLong; - -public final class ExtendedRecordIterator implements - Iterator { - private final Serializer serializer; - private final BufferedReader reader; - private final AtomicLong counter; - private RecordType next; - - public ExtendedRecordIterator(final InputStream stream, - final Serializer serializer, - final Serializer headerSerializer) throws Exception { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - counter = new AtomicLong(0); - this.serializer = serializer; - headerSerializer.nextRecord(reader); // skip header - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public RecordType next() { - counter.incrementAndGet(); - RecordType result = next; - try { - readNext(); - } catch (Exception e) { - throw new RuntimeException(e.getMessage() + " [Record count: " - + counter.get() + "]", e); - } - return result; - } - - private void readNext() throws Exception { - next = serializer.nextRecord(reader); - } - - public void remove() { - } -} diff --git a/qio/src/org/qcmg/qmule/record/Record.java-- b/qio/src/org/qcmg/qmule/record/Record.java-- deleted file mode 100644 index 60e2f037c..000000000 --- a/qio/src/org/qcmg/qmule/record/Record.java-- +++ /dev/null @@ -1,8 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.record; - -public interface Record { - public static final String TAB_DELIMITER = "\t"; -} diff --git a/qio/src/org/qcmg/qmule/record/RecordIterator.java-- b/qio/src/org/qcmg/qmule/record/RecordIterator.java-- deleted file mode 100644 index 3c2f89503..000000000 --- a/qio/src/org/qcmg/qmule/record/RecordIterator.java-- +++ /dev/null @@ -1,47 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.record; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.concurrent.atomic.AtomicLong; - -public final class RecordIterator implements Iterator { - private final Serializer serializer; - private final BufferedReader reader; - private final AtomicLong counter; - private RecordType next; - - public RecordIterator(final InputStream stream, final Serializer serializer) throws Exception { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - counter = new AtomicLong(0); - this.serializer = serializer; - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public RecordType next() { - counter.incrementAndGet(); - RecordType result = next; - try { - readNext(); - } catch (Exception e) { - throw new RuntimeException(e.getMessage() + " [Record count: " + counter.get() +"]", e); - } - return result; - } - - private void readNext() throws Exception { - next = serializer.nextRecord(reader); - } - - public void remove() { - } -} diff --git a/qio/src/org/qcmg/qmule/record/Serializer.java-- b/qio/src/org/qcmg/qmule/record/Serializer.java-- deleted file mode 100644 index c0c5c2fc8..000000000 --- a/qio/src/org/qcmg/qmule/record/Serializer.java-- +++ /dev/null @@ -1,39 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.record; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.regex.Pattern; - -public abstract class Serializer { - public static final String HASH = "#"; - public static final String NEWLINE = "\n"; - public static final String EQUALS = "="; - public static final Pattern tabbedPattern = Pattern.compile("[\\t]+"); - public static final Pattern colonPattern = Pattern.compile("[:]+"); - public static final Pattern hyphenPattern = Pattern.compile("[-]+"); - public static final Pattern equalsPattern = Pattern.compile("[=]+"); - public static final Pattern commaPattern = Pattern.compile("[,]+"); - - public RecordType nextRecord(final BufferedReader reader) throws Exception { - RecordType result = null; - try { - result = parseRecord(reader); - } catch (IOException e) { - throw e; - } catch (Exception e) { - throw e; - } - return result; - } - - public abstract String serialise(final RecordType record) throws Exception; - - public abstract RecordType parseRecord(BufferedReader reader) - throws Exception; -} diff --git a/qio/src/org/qcmg/qmule/record/SimpleRecord.java-- b/qio/src/org/qcmg/qmule/record/SimpleRecord.java-- deleted file mode 100644 index 5c1f75e56..000000000 --- a/qio/src/org/qcmg/qmule/record/SimpleRecord.java-- +++ /dev/null @@ -1,40 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.record; - -/** - * Simple data container class for records that have an id, and some data - *

- * eg. the .csfasta format from SOLiD sequence alignment files. - * Each record is split over two lines. The first line starts with '>' followed by the ID, - * the subsequent line contains the colour space sequence - * - * @author oholmes - */ -public class SimpleRecord implements Record { - - private String id; - private String data; - - public SimpleRecord() {} - - public SimpleRecord(String id, String data) { - this.id = id; - this.data = data; - } - - public void setId(String id) { - this.id = id; - } - public String getId() { - return id; - } - - public void setData(String data) { - this.data = data; - } - public String getData() { - return data; - } -} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedFileReader.java-- b/qio/src/org/qcmg/qmule/tab/TabbedFileReader.java-- deleted file mode 100644 index d3f9f31d8..000000000 --- a/qio/src/org/qcmg/qmule/tab/TabbedFileReader.java-- +++ /dev/null @@ -1,61 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.unused.tab; - -import java.io.BufferedReader; -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.zip.GZIPInputStream; - -import org.qcmg.Utils.IOStreamUtils; -import org.qcmg.common.util.FileUtils; -import org.qcmg.vcf.VCFSerializer; - -public final class TabbedFileReader implements Closeable, Iterable { - private final File file; - private final InputStream inputStream; - private final TabbedHeader header; - - public TabbedFileReader(final File file) throws IOException { - this.file = file; - boolean isGzip = FileUtils.isInputGZip( file); - try(InputStream stream = (isGzip) ? new GZIPInputStream(new FileInputStream(file), 65536) : new FileInputStream(file);) { - BufferedReader in = new BufferedReader(new InputStreamReader(stream)); - header = TabbedSerializer.readHeader(in); - } - - // create a new stream rather a closed one - inputStream = (isGzip) ? new GZIPInputStream(new FileInputStream(file), 65536) : new FileInputStream(file); - } - - public TabbedHeader getHeader() { - return header; - } - - @Override - public Iterator iterator() { - return getRecordIterator(); - } - - public TabbedRecordIterator getRecordIterator() { - return new TabbedRecordIterator(inputStream); - } - - @Override - public void close() throws IOException { - inputStream.close(); - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedFileWriter.java-- b/qio/src/org/qcmg/qmule/tab/TabbedFileWriter.java-- deleted file mode 100644 index ef8352ac4..000000000 --- a/qio/src/org/qcmg/qmule/tab/TabbedFileWriter.java-- +++ /dev/null @@ -1,46 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.tab; - -import java.io.Closeable; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -public final class TabbedFileWriter implements Closeable { - private final File file; - private final OutputStream outputStream; - - public TabbedFileWriter(final File file) throws Exception { - this.file = file; - OutputStream stream = new FileOutputStream(file); - outputStream = stream; - } - - public void addHeader(final TabbedHeader header) throws Exception { - for (String headerLine : header) { - String encoded = headerLine + "\n"; - outputStream.write(encoded.getBytes()); - } - outputStream.flush(); - } - - public void add(final TabbedRecord record) throws Exception { - String encoded = record.getData() + "\n"; - outputStream.write(encoded.getBytes()); - outputStream.flush(); - } - - @Override - public void close() throws IOException { - // flush anything outstanding and then close - outputStream.flush(); - outputStream.close(); - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedHeader.java-- b/qio/src/org/qcmg/qmule/tab/TabbedHeader.java-- deleted file mode 100644 index 28bd5406e..000000000 --- a/qio/src/org/qcmg/qmule/tab/TabbedHeader.java-- +++ /dev/null @@ -1,24 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.tab; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -public final class TabbedHeader implements Iterable { - private final List records = new ArrayList<>(); - - public TabbedHeader(final List headerRecords) { - if (null != headerRecords) { - records.addAll(headerRecords); - } - } - - @Override - public Iterator iterator() { - return records.iterator(); - } - -} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedRecord.java-- b/qio/src/org/qcmg/qmule/tab/TabbedRecord.java-- deleted file mode 100644 index 8cc7d08ac..000000000 --- a/qio/src/org/qcmg/qmule/tab/TabbedRecord.java-- +++ /dev/null @@ -1,19 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.tab; - - -public class TabbedRecord { - private String data; - - public String getData() { - return data; - } - public String[] getDataArray() { - return data.replace("\"", "").split("\t"); - } - public void setData(String data) { - this.data = data; - } -} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedRecordIterator.java-- b/qio/src/org/qcmg/qmule/tab/TabbedRecordIterator.java-- deleted file mode 100644 index b531e4112..000000000 --- a/qio/src/org/qcmg/qmule/tab/TabbedRecordIterator.java-- +++ /dev/null @@ -1,47 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.tab; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.NoSuchElementException; - -public final class TabbedRecordIterator implements Iterator { - private final BufferedReader reader; - private TabbedRecord next; - - public TabbedRecordIterator(final InputStream stream) { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public TabbedRecord next() { - if (!hasNext()) throw new NoSuchElementException(); - - TabbedRecord result = next; - readNext(); - return result; - } - - private void readNext() { - try { - next = TabbedSerializer.nextRecord(reader); - } catch (NoSuchElementException e) { - throw e; - } catch (Exception ex) { - throw new RuntimeException(ex.getMessage(), ex); - } - } - - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/qio/src/org/qcmg/qmule/tab/TabbedSerializer.java-- b/qio/src/org/qcmg/qmule/tab/TabbedSerializer.java-- deleted file mode 100644 index 58a56fb21..000000000 --- a/qio/src/org/qcmg/qmule/tab/TabbedSerializer.java-- +++ /dev/null @@ -1,51 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.tab; - -import java.io.BufferedReader; - -import static org.qcmg.common.util.Constants.HASH_STRING; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -public final class TabbedSerializer { - - public static TabbedHeader readHeader(final BufferedReader reader) throws IOException { - List headerLines = new ArrayList<>(); - String line = reader.readLine(); - while (null != line && line.startsWith(HASH_STRING)) { - headerLines.add(line); - line = reader.readLine(); - } - return new TabbedHeader(headerLines); - } - - private static String nextNonheaderLine(final BufferedReader reader) throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(HASH_STRING)) { - line = reader.readLine(); - } - return line; - } - - public static TabbedRecord nextRecord(final BufferedReader reader) throws IOException { - TabbedRecord result = null; - String line = nextNonheaderLine(reader); - if (null != line) { - result = parseRecord(line); - } - return result; - } - - static TabbedRecord parseRecord(final String line) { - TabbedRecord result = new TabbedRecord(); - result.setData(line); - return result; - } -} diff --git a/qio/src/org/qcmg/unused/bed/BEDFileReader.java-- b/qio/src/org/qcmg/unused/bed/BEDFileReader.java-- deleted file mode 100644 index 39090fc6d..000000000 --- a/qio/src/org/qcmg/unused/bed/BEDFileReader.java-- +++ /dev/null @@ -1,37 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.bed; - -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; - -public final class BEDFileReader implements Closeable, Iterable { - private final File file; - private final InputStream inputStream; - - public BEDFileReader(final File file) throws IOException { - this.file = file; - FileInputStream fileStream = new FileInputStream(file); - inputStream = fileStream; - } - - public Iterator iterator() { - return getRecordIterator(); - } - - public BEDRecordIterator getRecordIterator() { - return new BEDRecordIterator(inputStream); - } - - public void close() throws IOException { - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/unused/bed/BEDRecord.java-- b/qio/src/org/qcmg/unused/bed/BEDRecord.java-- deleted file mode 100644 index 297b66cb2..000000000 --- a/qio/src/org/qcmg/unused/bed/BEDRecord.java-- +++ /dev/null @@ -1,97 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.bed; - - - -public class BEDRecord { - - private final static char T = '\t'; - - String chrom; - int chromStart; - int chromEnd; - String name; - int score; - String strand; - int thickStart; - int thickEnd; - String itemRGB; - int blockCount; - int blockSizes; - int blockStarts; - - public String getChrom() { - return chrom; - } - public void setChrom(String chrom) { - this.chrom = chrom; - } - public int getChromStart() { - return chromStart; - } - public void setChromStart(int chromStart) { - this.chromStart = chromStart; - } - public int getChromEnd() { - return chromEnd; - } - public void setChromEnd(int chromEnd) { - this.chromEnd = chromEnd; - } - public String getName() { - return name; - } - public void setName(String name) { - this.name = name; - } - public int getScore() { - return score; - } - public void setScore(int score) { - this.score = score; - } - public String getStrand() { - return strand; - } - public void setStrand(String strand) { - this.strand = strand; - } - public int getThickStart() { - return thickStart; - } - public void setThickStart(int thickStart) { - this.thickStart = thickStart; - } - public int getThickEnd() { - return thickEnd; - } - public void setThickEnd(int thickEnd) { - this.thickEnd = thickEnd; - } - public String getItemRGB() { - return itemRGB; - } - public void setItemRGB(String itemRGB) { - this.itemRGB = itemRGB; - } - public int getBlockCount() { - return blockCount; - } - public void setBlockCount(int blockCount) { - this.blockCount = blockCount; - } - public int getBlockSizes() { - return blockSizes; - } - public void setBlockSizes(int blockSizes) { - this.blockSizes = blockSizes; - } - public int getBlockStarts() { - return blockStarts; - } - public void setBlockStarts(int blockStarts) { - this.blockStarts = blockStarts; - } -} diff --git a/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java-- b/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java-- deleted file mode 100644 index 6d8726a5d..000000000 --- a/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java-- +++ /dev/null @@ -1,49 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.bed; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.NoSuchElementException; - -public final class BEDRecordIterator implements Iterator { - private final BufferedReader reader; - private BEDRecord next; - - public BEDRecordIterator(final InputStream stream) { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public BEDRecord next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - BEDRecord result = next; - readNext(); - return result; - } - - private void readNext() { - try { - next = BEDSerializer.nextRecord(reader); - } catch (NoSuchElementException e) { - throw e; - } catch (Exception ex) { - throw new RuntimeException(ex.getMessage(), ex); - } - } - - @SuppressWarnings("unchecked") - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java-- b/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java-- deleted file mode 100644 index a813a9afc..000000000 --- a/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java-- +++ /dev/null @@ -1,21 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.bed; - -import java.util.Comparator; - -public class BEDRecordPositionComparator implements - Comparator { - public int compare(BEDRecord recordA, BEDRecord recordB) { - return compareStart(recordA, recordB) + compareEnd(recordA, recordB); - } - - public int compareStart(BEDRecord recordA, BEDRecord recordB) { - return recordA.getChromStart() - recordB.getChromStart(); - } - - public int compareEnd(BEDRecord recordA, BEDRecord recordB) { - return recordA.getChromEnd() - recordB.getChromEnd(); - } -} diff --git a/qio/src/org/qcmg/unused/bed/BEDSerializer.java-- b/qio/src/org/qcmg/unused/bed/BEDSerializer.java-- deleted file mode 100644 index 164cf8189..000000000 --- a/qio/src/org/qcmg/unused/bed/BEDSerializer.java-- +++ /dev/null @@ -1,65 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.bed; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.regex.Pattern; - -public final class BEDSerializer { - private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - private static final String DEFAULT_HEADER_PREFIX = "#"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { - line = reader.readLine(); - } - return line; - } - - public static BEDRecord nextRecord(final BufferedReader reader) - throws IOException , Exception { - BEDRecord result = null; - String line = nextNonheaderLine(reader); - if (null != line) { - result = parseRecord(line); - } - return result; - } - - static BEDRecord parseRecord(final String line) throws Exception { - String[] params = tabbedPattern.split(line, -1); - if (3 > params.length) { - throw new Exception("Bad BED format. Insufficient columns: '" + line + "'"); - } - BEDRecord result = new BEDRecord(); - result.setChrom(params[0]); - result.setChromStart(Integer.parseInt(params[1])); - result.setChromEnd(Integer.parseInt(params[2])); - if (params.length > 3) { - if (params.length >= 4) - result.setName(params[3]); - if (params.length >= 5) - result.setScore(Integer.parseInt(params[4])); - if (params.length >= 6) - result.setStrand(params[5]); - if (params.length >= 7) - result.setThickStart(Integer.parseInt(params[6])); - if (params.length >= 8) - result.setThickEnd(Integer.parseInt(params[7])); - if (params.length >= 9) - result.setItemRGB(params[8]); - if (params.length >= 10) - result.setBlockCount(Integer.parseInt(params[9])); - if (params.length >= 11) - result.setBlockSizes(Integer.parseInt(params[10])); - if (params.length >= 12) - result.setBlockStarts(Integer.parseInt(params[11])); - } - return result; - } - -} diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java-- b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java-- deleted file mode 100644 index 6cb38fa52..000000000 --- a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java-- +++ /dev/null @@ -1,64 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.consensuscalls; - - -public enum ConsensusCallsFlag { - - H_1("h1"), - H_2("h2"), - H_3("h3"), - H_4("h4"), - H_5("h5"), - H_6("h6"), - H_7("h7"), - H_8("h8"), - H_9("h9"), - H_10("h10"), - H_11("h11"), - H_12("h12"), - H_13("h13"), - H_14("h14"), - H_15("h15"), - H_16("h16"), - H_17("h17"), - H_18("h18"), - H_19("h19"), - H_20("h20"), - H_21("h21"), - H_22("h22"), - M_1("m1"), - M_2("m2"), - M_3("m3"), - M_4("m4"), - M_5("m5"), - M_6("m6"), - M_7("m7"), - M_8("m8"), - M_9("m9"), - M_10("m10"), - M_11("m11"), - M_12("m12"), - M_13("m13"); - - private final String value; - - ConsensusCallsFlag(String v) { - value = v; - } - - public String value() { - return value; - } - - public static ConsensusCallsFlag fromValue(String v) { - for (ConsensusCallsFlag c: ConsensusCallsFlag.values()) { - if (c.value.equals(v)) { - return c; - } - } - throw new IllegalArgumentException(v); - } - -} diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java-- b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java-- deleted file mode 100644 index c9321b571..000000000 --- a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java-- +++ /dev/null @@ -1,377 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -// -// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 -// See http://java.sun.com/xml/jaxb -// Any modifications to this file will be lost upon recompilation of the source schema. -// Generated on: 2013.10.25 at 10:52:20 AM EST -// - - -package org.qcmg.unused.consensuscalls; - -import java.util.ArrayList; -import java.util.List; -public class ConsensusCallsRecord { - - protected String chr; - protected int position; - protected String alleleDiColor1; - protected String alleleDiColor2; - protected String reference; - protected String genotype; - protected double pValue; - protected List flag; - protected int coverage; - protected int nCounts1StAllele; - protected int nCountsReferenceAllele; - protected int nCountsNonReferenceAllele; - protected int refAvgQV; - protected int novelAvgQV; - protected int heterozygous; - protected String algorithm; - protected String algorithmName; - - /** - * Gets the value of the chr property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getChr() { - return chr; - } - - /** - * Sets the value of the chr property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setChr(String value) { - this.chr = value; - } - - /** - * Gets the value of the position property. - * - */ - public int getPosition() { - return position; - } - - /** - * Sets the value of the position property. - * - */ - public void setPosition(int value) { - this.position = value; - } - - /** - * Gets the value of the alleleDiColor1 property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getAlleleDiColor1() { - return alleleDiColor1; - } - - /** - * Sets the value of the alleleDiColor1 property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setAlleleDiColor1(String value) { - this.alleleDiColor1 = value; - } - - /** - * Gets the value of the alleleDiColor2 property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getAlleleDiColor2() { - return alleleDiColor2; - } - - /** - * Sets the value of the alleleDiColor2 property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setAlleleDiColor2(String value) { - this.alleleDiColor2 = value; - } - - /** - * Gets the value of the reference property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getReference() { - return reference; - } - - /** - * Sets the value of the reference property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setReference(String value) { - this.reference = value; - } - - /** - * Gets the value of the genotype property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getGenotype() { - return genotype; - } - - /** - * Sets the value of the genotype property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setGenotype(String value) { - this.genotype = value; - } - - /** - * Gets the value of the pValue property. - * - */ - public double getPValue() { - return pValue; - } - - /** - * Sets the value of the pValue property. - * - */ - public void setPValue(double value) { - this.pValue = value; - } - - /** - * Gets the value of the flag property. - * - *

- * This accessor method returns a reference to the live list, - * not a snapshot. Therefore any modification you make to the - * returned list will be present inside the JAXB object. - * This is why there is not a set method for the flag property. - * - *

- * For example, to add a new item, do as follows: - *

-     *    getFlag().add(newItem);
-     * 
- * - * - *

- * Objects of the following type(s) are allowed in the list - * {@link ConsensusCallsFlag } - * - * - */ - public List getFlag() { - if (flag == null) { - flag = new ArrayList(); - } - return this.flag; - } - - /** - * Gets the value of the coverage property. - * - */ - public int getCoverage() { - return coverage; - } - - /** - * Sets the value of the coverage property. - * - */ - public void setCoverage(int value) { - this.coverage = value; - } - - /** - * Gets the value of the nCounts1StAllele property. - * - */ - public int getNCounts1StAllele() { - return nCounts1StAllele; - } - - /** - * Sets the value of the nCounts1StAllele property. - * - */ - public void setNCounts1StAllele(int value) { - this.nCounts1StAllele = value; - } - - /** - * Gets the value of the nCountsReferenceAllele property. - * - */ - public int getNCountsReferenceAllele() { - return nCountsReferenceAllele; - } - - /** - * Sets the value of the nCountsReferenceAllele property. - * - */ - public void setNCountsReferenceAllele(int value) { - this.nCountsReferenceAllele = value; - } - - /** - * Gets the value of the nCountsNonReferenceAllele property. - * - */ - public int getNCountsNonReferenceAllele() { - return nCountsNonReferenceAllele; - } - - /** - * Sets the value of the nCountsNonReferenceAllele property. - * - */ - public void setNCountsNonReferenceAllele(int value) { - this.nCountsNonReferenceAllele = value; - } - - /** - * Gets the value of the refAvgQV property. - * - */ - public int getRefAvgQV() { - return refAvgQV; - } - - /** - * Sets the value of the refAvgQV property. - * - */ - public void setRefAvgQV(int value) { - this.refAvgQV = value; - } - - /** - * Gets the value of the novelAvgQV property. - * - */ - public int getNovelAvgQV() { - return novelAvgQV; - } - - /** - * Sets the value of the novelAvgQV property. - * - */ - public void setNovelAvgQV(int value) { - this.novelAvgQV = value; - } - - /** - * Gets the value of the heterozygous property. - * - */ - public int getHeterozygous() { - return heterozygous; - } - - /** - * Sets the value of the heterozygous property. - * - */ - public void setHeterozygous(int value) { - this.heterozygous = value; - } - - /** - * Gets the value of the algorithm property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getAlgorithm() { - return algorithm; - } - - /** - * Sets the value of the algorithm property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setAlgorithm(String value) { - this.algorithm = value; - } - - /** - * Gets the value of the algorithmName property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getAlgorithmName() { - return algorithmName; - } - - /** - * Sets the value of the algorithmName property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setAlgorithmName(String value) { - this.algorithmName = value; - } - -} diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java-- b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java-- deleted file mode 100644 index 7695b6486..000000000 --- a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java-- +++ /dev/null @@ -1,68 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.consensuscalls; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.regex.Pattern; - -public final class ConsensusCallsSerializer { - private static final Pattern tabbedPattern = Pattern.compile("[\\t]+"); - private static final Pattern commaPattern = Pattern.compile("[,]+"); - - public static ConsensusCallsRecord nextRecord(final BufferedReader reader) - throws Exception, IOException { - ConsensusCallsRecord result = null; - try { - String line = reader.readLine(); - if (null != line) { - result = parseRecord(line); - } - } catch (IOException e) { - throw e; - } catch (Exception e) { - throw e; - } - return result; - } - - static ConsensusCallsRecord parseRecord(final String line) throws Exception { - String[] params = tabbedPattern.split(line); - if (17 != params.length) { - throw new Exception("Bad Consensus Calls format"); - } - ConsensusCallsRecord result = new ConsensusCallsRecord(); - result.setChr(params[0]); - result.setPosition(Integer.parseInt(params[1])); - result.setAlleleDiColor1(params[2]); - result.setAlleleDiColor2(params[3]); - result.setReference(params[4]); - result.setGenotype(params[5]); - result.setPValue(Double.parseDouble(params[6])); - parseFlags(result.getFlag(), params[7]); - result.setCoverage(Integer.parseInt(params[8])); - result.setNCountsNonReferenceAllele(Integer.parseInt(params[9])); - result.setNCountsReferenceAllele(Integer.parseInt(params[10])); - result.setNCountsNonReferenceAllele(Integer.parseInt(params[11])); - result.setRefAvgQV(Integer.parseInt(params[12])); - result.setNovelAvgQV(Integer.parseInt(params[13])); - result.setHeterozygous(Integer.parseInt(params[14])); - result.setAlgorithm(params[15]); - result.setAlgorithmName(params[16]); - return result; - } - - public static void parseFlags(final List list, final String value) throws Exception { - String[] params = commaPattern.split(value); - if (1 > params.length) { - throw new Exception("Bad Consensus Calls Flag format"); - } - for (String param : params) { - list.add(ConsensusCallsFlag.fromValue(param)); - } - } -} diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java-- b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java-- deleted file mode 100644 index 3ec52a69c..000000000 --- a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java-- +++ /dev/null @@ -1,38 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.genesymbol; - -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; - -public final class GeneSymbolFileReader implements Closeable, Iterable { - private final File file; - private final InputStream inputStream; - - public GeneSymbolFileReader(final File file) throws IOException { - this.file = file; - FileInputStream fileStream = new FileInputStream(file); - inputStream = fileStream; - } - - public Iterator iterator() { - return getRecordIterator(); - } - - public GeneSymbolRecordIterator getRecordIterator() { - return new GeneSymbolRecordIterator(inputStream); - } - - public void close() throws IOException { - inputStream.close(); - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java-- b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java-- deleted file mode 100644 index efc505155..000000000 --- a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java-- +++ /dev/null @@ -1,32 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.genesymbol; - - -public class GeneSymbolRecord { - - private String geneId; - private String transcriptId; - private String symbol; - - public String getGeneId() { - return geneId; - } - public void setGeneId(String geneId) { - this.geneId = geneId; - } - public String getTranscriptId() { - return transcriptId; - } - public void setTranscriptId(String transcriptId) { - this.transcriptId = transcriptId; - } - public String getSymbol() { - return symbol; - } - public void setSymbol(String symbol) { - this.symbol = symbol; - } - -} diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java-- b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java-- deleted file mode 100644 index 1ad4c2505..000000000 --- a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java-- +++ /dev/null @@ -1,49 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.genesymbol; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.NoSuchElementException; - -public final class GeneSymbolRecordIterator implements Iterator { - private final BufferedReader reader; - private GeneSymbolRecord next; - - public GeneSymbolRecordIterator(final InputStream stream) { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public GeneSymbolRecord next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - GeneSymbolRecord result = next; - readNext(); - return result; - } - - private void readNext() { - try { - next = GeneSymbolSerializer.nextRecord(reader); - } catch (NoSuchElementException e) { - throw e; - } catch (Exception ex) { - throw new RuntimeException(ex.getMessage(), ex); - } - } - - @SuppressWarnings("unchecked") - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java-- b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java-- deleted file mode 100644 index d117b5abe..000000000 --- a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java-- +++ /dev/null @@ -1,44 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.genesymbol; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.regex.Pattern; - -public final class GeneSymbolSerializer { - private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - private static final String DEFAULT_HEADER_PREFIX = "#"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { - line = reader.readLine(); - } - return line; - } - - public static GeneSymbolRecord nextRecord(final BufferedReader reader) - throws IOException , Exception { - GeneSymbolRecord result = null; - String line = nextNonheaderLine(reader); - if (null != line) { - result = parseRecord(line); - } - return result; - } - - static GeneSymbolRecord parseRecord(final String line) throws Exception { - String[] params = tabbedPattern.split(line, -1); - if (3 > params.length) { - throw new Exception("Bad Gene Symbol format. Insufficient columns: '" + line + "'"); - } - GeneSymbolRecord result = new GeneSymbolRecord(); - result.setGeneId(params[0]); - result.setTranscriptId(params[1]); - result.setSymbol(params[2]); - return result; - } -} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java-- b/qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java-- deleted file mode 100644 index 014fab77d..000000000 --- a/qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java-- +++ /dev/null @@ -1,21 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.illumina; - -import java.io.File; -import java.io.IOException; - -import org.qcmg.reader.AbstractReader; - -public final class IlluminaFileReader extends AbstractReader { - - public IlluminaFileReader(final File file) throws IOException { - super(file); - } - - public IlluminaRecordIterator getRecordIterator() throws Exception { - return new IlluminaRecordIterator(inputStream); - } - -} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaRecord.java-- b/qio/src/org/qcmg/unused/illumina/IlluminaRecord.java-- deleted file mode 100644 index ec203b1b2..000000000 --- a/qio/src/org/qcmg/unused/illumina/IlluminaRecord.java-- +++ /dev/null @@ -1,262 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.illumina; - -import org.qcmg.common.string.StringUtils; -import org.qcmg.unused.record.Record; - -public class IlluminaRecord implements Record { - - private String chr; - private int start; - private final String strand; - private String snpId; - - //TODO do we need this field? - private float GCScore; - - private char firstAllele; - private char secondAllele; - - private final char firstAlleleForward; - private final char secondAlleleForward; - - private final char firstAlleleCall; - private final char secondAlleleCall; - - //TODO do we need this field? - private boolean hom; - private boolean isSnp; - private String snp; - - private final float logRRatio; - private final float bAlleleFreq; - - private final int rawX; - private final int rawY; - - /** - * Constructor that takes in a String array, retrieving pertinent fields from the array to populate the record - * - * @param rawIlluminaData String[] representing a line in the raw Illumina data file - */ - public IlluminaRecord(String [] rawIlluminaData) { - // chromosome and position defined in the raw Illumina data file relate to an old version - // of the genome (hg18), so instead, we use the dbSNP id to get the more recent - //(hg19) chromosome and position details from the dbSNP file at a later date - int length = rawIlluminaData.length; - snpId = rawIlluminaData[0]; - GCScore = Float.parseFloat(rawIlluminaData[4]); - firstAlleleForward = rawIlluminaData[10].charAt(0); - secondAlleleForward = rawIlluminaData[11].charAt(0); - firstAllele = rawIlluminaData[12].charAt(0); - secondAllele = rawIlluminaData[13].charAt(0); - setHom(rawIlluminaData[14].equals(rawIlluminaData[15])); - chr = rawIlluminaData[16]; - start = Integer.parseInt(rawIlluminaData[17]); - snp = rawIlluminaData[20]; - rawX = Integer.parseInt(rawIlluminaData[length - 4]); - rawY = Integer.parseInt(rawIlluminaData[length - 3]); - bAlleleFreq = Float.parseFloat(rawIlluminaData[length - 2]); - String logRRatioString = rawIlluminaData[length - 1]; - if (StringUtils.isNullOrEmpty(logRRatioString)) - logRRatioString = "NaN"; - logRRatio = Float.parseFloat(logRRatioString); - firstAlleleCall = rawIlluminaData[14].charAt(0); - secondAlleleCall = rawIlluminaData[15].charAt(0); - strand = rawIlluminaData[22]; // use customer strand rather than illumina strand -// strand = rawIlluminaData[21]; - } - - - public String getChr() { - return chr; - } - public void setChr(String chr) { - this.chr = chr; - } - public int getStart() { - return start; - } - public void setStart(int start) { - this.start = start; - } - public String getSnpId() { - return snpId; - } - public void setSnpId(String snpId) { - this.snpId = snpId; - } - public float getGCScore() { - return GCScore; - } - public void setGCScore(float GCScore) { - this.GCScore = GCScore; - } - public char getFirstAllele() { - return firstAllele; - } - public void setFirstAllele(char firstAllele) { - this.firstAllele = firstAllele; - } - public char getSecondAllele() { - return secondAllele; - } - public void setSecondAllele(char secondAllele) { - this.secondAllele = secondAllele; - } - public String getSnp() { - return snp; - } - public void setSnp(String snp) { - this.snp = snp; - } - public void setHom(boolean hom) { - this.hom = hom; - } - public boolean isHom() { - return hom; - } - - public void setSnp(boolean isSnp) { - this.isSnp = isSnp; - } - - public boolean isSnp() { - return isSnp; - } - - public float getLogRRatio() { - return logRRatio; - } - - - public float getbAlleleFreq() { - return bAlleleFreq; - } - - - public char getFirstAlleleCall() { - return firstAlleleCall; - } - - - public char getSecondAlleleCall() { - return secondAlleleCall; - } - - public int getRawX() { - return rawX; - } - - public int getRawY() { - return rawY; - } - - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + Float.floatToIntBits(GCScore); - result = prime * result + Float.floatToIntBits(bAlleleFreq); - result = prime * result + ((chr == null) ? 0 : chr.hashCode()); - result = prime * result + firstAllele; - result = prime * result + firstAlleleCall; - result = prime * result + (hom ? 1231 : 1237); - result = prime * result + (isSnp ? 1231 : 1237); - result = prime * result + Float.floatToIntBits(logRRatio); - result = prime * result + rawX; - result = prime * result + rawY; - result = prime * result + secondAllele; - result = prime * result + secondAlleleCall; - result = prime * result + ((snp == null) ? 0 : snp.hashCode()); - result = prime * result + ((snpId == null) ? 0 : snpId.hashCode()); - result = prime * result + start; - return result; - } - - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - IlluminaRecord other = (IlluminaRecord) obj; - if (Float.floatToIntBits(GCScore) != Float - .floatToIntBits(other.GCScore)) - return false; - if (Float.floatToIntBits(bAlleleFreq) != Float - .floatToIntBits(other.bAlleleFreq)) - return false; - if (chr == null) { - if (other.chr != null) - return false; - } else if (!chr.equals(other.chr)) - return false; - if (firstAllele != other.firstAllele) - return false; - if (firstAlleleCall != other.firstAlleleCall) - return false; - if (hom != other.hom) - return false; - if (isSnp != other.isSnp) - return false; - if (Float.floatToIntBits(logRRatio) != Float - .floatToIntBits(other.logRRatio)) - return false; - if (rawX != other.rawX) - return false; - if (rawY != other.rawY) - return false; - if (secondAllele != other.secondAllele) - return false; - if (secondAlleleCall != other.secondAlleleCall) - return false; - if (snp == null) { - if (other.snp != null) - return false; - } else if (!snp.equals(other.snp)) - return false; - if (snpId == null) { - if (other.snpId != null) - return false; - } else if (!snpId.equals(other.snpId)) - return false; - if (start != other.start) - return false; - return true; - } - - - @Override - public String toString() { - return "IlluminaRecord [GCScore=" + GCScore + ", bAlleleFreq=" - + bAlleleFreq + ", chr=" + chr + ", firstAllele=" + firstAllele - + ", firstAlleleCall=" + firstAlleleCall + ", hom=" + hom - + ", isSnp=" + isSnp + ", logRRatio=" + logRRatio + ", rawX=" - + rawX + ", rawY=" + rawY + ", secondAllele=" + secondAllele - + ", secondAlleleCall=" + secondAlleleCall + ", snp=" + snp - + ", snpId=" + snpId + ", start=" + start + "]"; - } - - - public String getStrand() { - return strand; - } - - - public char getFirstAlleleForward() { - return firstAlleleForward; - } - - public char getSecondAlleleForward() { - return secondAlleleForward; - } - - -} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java-- b/qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java-- deleted file mode 100644 index 6f39a9463..000000000 --- a/qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java-- +++ /dev/null @@ -1,21 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.illumina; - -import java.io.InputStream; - -import org.qcmg.unused.record.AbstractRecordIterator; - -public class IlluminaRecordIterator extends AbstractRecordIterator { - - public IlluminaRecordIterator(InputStream stream) throws Exception { - super(stream); - } - - @Override - protected void readNext() throws Exception { - next = IlluminaSerializer.nextRecord(reader); - } - -} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java-- b/qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java-- deleted file mode 100644 index 2bef61139..000000000 --- a/qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java-- +++ /dev/null @@ -1,73 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.illumina; - -import java.io.BufferedReader; -import java.io.IOException; - -import org.qcmg.common.util.TabTokenizer; - -public final class IlluminaSerializer { - private static final String HEADER_LINE = "[Header]"; - private static final String DATA_LINE = "[Data]"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - // header lines are as follows: - /* -[Header] -GSGT Version 1.8.4 -Processing Date 8/12/2011 8:41 PM -Content HumanOmni1-Quad_v1-0_H.bpm -Num SNPs 1134514 -Total SNPs 1134514 -Num Samples 259 -Total Samples 260 -File 77 of 259 -[Data] -SNP Name Sample ID Allele1 - Top Allele2 - Top GC Score Sample Name Sample Group Sample Index SNP Index SNP Aux Allele1 - Forward Allele2 - Forward Allele1 - Design Allele2 - Design Allele1 - AB Allele2 - AB Chr Position GT Score Cluster Sep SNP ILMN Strand Customer Strand Top Genomic Sequence Theta R X Y -X Raw Y Raw B Allele Freq Log R Ratio - */ - - String line = reader.readLine(); - if (null != line && line.startsWith(HEADER_LINE)) { - - // ignore header lines until we hit [DATA] - line = reader.readLine(); - while (null != line && ! line.startsWith(DATA_LINE)) { - line = reader.readLine(); - } - // next line is still header.... - line = reader.readLine(); - line = reader.readLine(); - } - return line; - } - - public static IlluminaRecord nextRecord(final BufferedReader reader) throws Exception { - IlluminaRecord result = null; - - String data = nextNonheaderLine(reader); - if (null != data ) { - result = parseRecord(data); - } - - return result; - } - - static String[] parseData(final String value) throws Exception { - String[] dataArray = TabTokenizer.tokenize(value); - - // raw Illumina data has 32 fields... and the first one is an integer - if (dataArray.length != 32) throw new Exception("Bad Illumina data format - expecting 32 fields but saw " + dataArray.length); - - return dataArray; - } - - static IlluminaRecord parseRecord(final String record) - throws Exception { - return new IlluminaRecord(parseData(record)); - } - -} diff --git a/qio/src/org/qcmg/unused/maf/MAFFileReader.java-- b/qio/src/org/qcmg/unused/maf/MAFFileReader.java-- deleted file mode 100644 index 9d98df780..000000000 --- a/qio/src/org/qcmg/unused/maf/MAFFileReader.java-- +++ /dev/null @@ -1,42 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.maf; - -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; - -import org.qcmg.common.maf.MAFRecord; - -public final class MAFFileReader implements Closeable, Iterable { - private final File file; - private final InputStream inputStream; - - public MAFFileReader(final File file) throws IOException { - this.file = file; - FileInputStream fileStream = new FileInputStream(file); - inputStream = fileStream; - } - - public Iterator iterator() { - return getRecordIterator(); - } - - public MAFRecordIterator getRecordIterator() { - return new MAFRecordIterator(inputStream); - } - - public void close() throws IOException { - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java-- b/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java-- deleted file mode 100644 index 760780922..000000000 --- a/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java-- +++ /dev/null @@ -1,54 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.maf; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.NoSuchElementException; - -import org.qcmg.common.maf.MAFRecord; - -public final class MAFRecordIterator implements Iterator { - private final BufferedReader reader; - private MAFRecord next; - - public MAFRecordIterator(final InputStream stream) { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public MAFRecord next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - MAFRecord result = next; - readNext(); - return result; - } - - private void readNext() { - try { - next = MAFSerializer.nextRecord(reader); - } catch (NoSuchElementException e) { - throw e; - } catch (Exception ex) { - throw new RuntimeException(ex.getMessage(), ex); - } - } - - @SuppressWarnings("unchecked") - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/qio/src/org/qcmg/unused/maf/MAFSerializer.java-- b/qio/src/org/qcmg/unused/maf/MAFSerializer.java-- deleted file mode 100644 index bfd4b7d0a..000000000 --- a/qio/src/org/qcmg/unused/maf/MAFSerializer.java-- +++ /dev/null @@ -1,68 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.maf; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.regex.Pattern; - -import org.qcmg.common.maf.MAFRecord; - -public final class MAFSerializer { - private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - private static final String DEFAULT_HEADER_PREFIX = "#"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { - line = reader.readLine(); - } - return line; - } - - public static MAFRecord nextRecord(final BufferedReader reader) - throws IOException , Exception { - MAFRecord result = null; - String line = nextNonheaderLine(reader); - if (null != line) { - result = parseRecord(line); - } - return result; - } - - static MAFRecord parseRecord(final String line) throws Exception { - String[] params = tabbedPattern.split(line, -1); - if (8 > params.length) { - throw new Exception("Bad VCF format. Insufficient columns: '" + line + "'"); - } - MAFRecord result = new MAFRecord(); -// result.setChromosome(params[0]); -// result.setPosition(Integer.parseInt(params[1])); -// result.setRef(params[3].charAt(0)); -// result.setAlt(params[4].charAt(0)); -// result.setGenotype(params[9]); -// calculateGenotypeEnum(result); - return result; - } - - private static void calculateGenotypeEnum(MAFRecord record) { - -// String genotypeString = record.getGenotype().substring(0, 3); -// -// if ("0/1".equals(genotypeString)) { -// record.setGenotypeEnum(GenotypeEnum.getGenotypeEnum(record.getRef(), record.getAlt())); -// } else if ("1/1".equals(genotypeString)) { -// record.setGenotypeEnum(GenotypeEnum.getGenotypeEnum(record.getAlt(), record.getAlt())); -// } else if ("0/0".equals(genotypeString)) { -// record.setGenotypeEnum(GenotypeEnum.getGenotypeEnum(record.getRef(), record.getRef())); -// } else { -// System.out.println("unhandled genotype string: " + genotypeString); -// } - - } -} diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java-- b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java-- deleted file mode 100644 index 9ed51e589..000000000 --- a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java-- +++ /dev/null @@ -1,17 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.primerdesignsummary; - -import java.io.File; - -import org.qcmg.unused.reader.FileReader; - -public class PrimerDesignFileReader extends FileReader { - private final static PrimerDesignRecordSerializer serializer = - new PrimerDesignRecordSerializer(); - - public PrimerDesignFileReader(final File file) throws Exception { - super(file, serializer); - } -} diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java-- b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java-- deleted file mode 100644 index 289892f7e..000000000 --- a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java-- +++ /dev/null @@ -1,135 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ - -package org.qcmg.unused.primerdesignsummary; - -public class PrimerDesignRecord { - - protected String snpId; - protected String gene; - protected PrimerPosition position; - protected String baseChange; - protected String snpClass; - - /** - * Gets the value of the snpId property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSnpId() { - return snpId; - } - - /** - * Sets the value of the snpId property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSnpId(String value) { - this.snpId = value; - } - - /** - * Gets the value of the gene property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getGene() { - return gene; - } - - /** - * Sets the value of the gene property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setGene(String value) { - this.gene = value; - } - - /** - * Gets the value of the position property. - * - * @return - * possible object is - * {@link PrimerPosition } - * - */ - public PrimerPosition getPosition() { - return position; - } - - /** - * Sets the value of the position property. - * - * @param value - * allowed object is - * {@link PrimerPosition } - * - */ - public void setPosition(PrimerPosition value) { - this.position = value; - } - - /** - * Gets the value of the baseChange property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getBaseChange() { - return baseChange; - } - - /** - * Sets the value of the baseChange property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setBaseChange(String value) { - this.baseChange = value; - } - - /** - * Gets the value of the snpClass property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSnpClass() { - return snpClass; - } - - /** - * Sets the value of the snpClass property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSnpClass(String value) { - this.snpClass = value; - } - -} diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java-- b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java-- deleted file mode 100644 index c81b5bf23..000000000 --- a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java-- +++ /dev/null @@ -1,80 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.primerdesignsummary; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.regex.Pattern; - -import org.qcmg.unused.record.Serializer; - -public final class PrimerDesignRecordSerializer extends Serializer { - public PrimerDesignRecord parseRecord(final String line) throws Exception { - String[] params = tabbedPattern.split(line); - if (5 > params.length) { - throw new Exception("Bad primer design record format: '" + line - + "'"); - } - String encodedPosition = params[2].trim(); - PrimerPosition primerPosition = parsePrimerPosition(encodedPosition); - - PrimerDesignRecord result = new PrimerDesignRecord(); - result.setSnpId(params[0].trim()); - result.setGene(params[1].trim()); - result.setPosition(primerPosition); - result.setBaseChange(params[3].trim()); - result.setSnpClass(params[4].trim()); - return result; - } - - public PrimerPosition parsePrimerPosition(String encodedPosition) - throws Exception { - String[] positionParams = colonPattern.split(encodedPosition); - if (2 != positionParams.length) { - throw new Exception("Bad primer design record position format: '" - + encodedPosition + "'"); - } - String chromosome = positionParams[0].trim(); - String positionRange = positionParams[1].trim(); - - String[] positions = hyphenPattern.split(positionRange); - if (2 != positions.length) { - throw new Exception("Bad primer design record position format: '" - + encodedPosition + "'"); - } - int start = Integer.parseInt(positions[0]); - int end = Integer.parseInt(positions[1]); - - PrimerPosition primerPosition = new PrimerPosition(); - primerPosition.setChromosome(chromosome); - primerPosition.setStart(start); - primerPosition.setEnd(end); - return primerPosition; - } - - public String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(HASH)) { - line = reader.readLine(); - } - return line; - } - - public String serialise(PrimerDesignRecord record) throws Exception { - // TODO Auto-generated method stub - return null; - } - - @Override - public PrimerDesignRecord parseRecord(BufferedReader reader) - throws Exception { - String line = nextNonheaderLine(reader); - PrimerDesignRecord result = null; - if (null != line) { - result = parseRecord(line); - } - return result; - } -} diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java-- b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java-- deleted file mode 100644 index 1154b4fb4..000000000 --- a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java-- +++ /dev/null @@ -1,68 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.primerdesignsummary; - -public class PrimerPosition { - - protected String chromosome; - protected int start; - protected int end; - - /** - * Gets the value of the chromosome property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getChromosome() { - return chromosome; - } - - /** - * Sets the value of the chromosome property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setChromosome(String value) { - this.chromosome = value; - } - - /** - * Gets the value of the start property. - * - */ - public int getStart() { - return start; - } - - /** - * Sets the value of the start property. - * - */ - public void setStart(int value) { - this.start = value; - } - - /** - * Gets the value of the end property. - * - */ - public int getEnd() { - return end; - } - - /** - * Sets the value of the end property. - * - */ - public void setEnd(int value) { - this.end = value; - } - -} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java-- b/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java-- deleted file mode 100644 index c2a7262a4..000000000 --- a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java-- +++ /dev/null @@ -1,17 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.primerinput; - -import java.io.File; - -import org.qcmg.unused.reader.FileReader; - -public class PrimerInputFileReader extends FileReader { - private final static PrimerInputRecordSerializer serializer = - new PrimerInputRecordSerializer(); - - public PrimerInputFileReader(final File file) throws Exception { - super(file, serializer); - } -} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java-- b/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java-- deleted file mode 100644 index b09623176..000000000 --- a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java-- +++ /dev/null @@ -1,33 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.primerinput; - -import java.io.Closeable; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -public final class PrimerInputFileWriter implements Closeable { - private static final String EQUALS = "="; - private static final PrimerInputRecordSerializer serializer = new PrimerInputRecordSerializer(); - private final OutputStream outputStream; - - public PrimerInputFileWriter(final File file) throws Exception { - OutputStream stream = new FileOutputStream(file); - outputStream = stream; - } - - public void add(final PrimerInputRecord record) throws Exception { - String encoded = serializer.serialise(record); - outputStream.write(encoded.getBytes()); - outputStream.flush(); - } - - public void close() throws IOException { - outputStream.write(EQUALS.getBytes()); - outputStream.flush(); - outputStream.close(); - } -} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java-- b/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java-- deleted file mode 100644 index 7de08071a..000000000 --- a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java-- +++ /dev/null @@ -1,331 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.primerinput; - - -public class PrimerInputRecord { - - protected String sequenceId; - protected String sequenceTemplate; - protected PrimerSequenceTarget sequenceTarget; - protected int primerProductMinTm; - protected int primerProductMaxTm; - protected double primerDnaConc; - protected double primerSaltConc; - protected int primerMinTm; - protected int primerOptTm; - protected int primerMaxTm; - protected int primerMinSize; - protected int primerOptSize; - protected int primerMaxSize; - protected PrimerSizeRange primerProductSizeRange; - protected boolean primerExplainFlag; - protected int primerNumReturn; - protected boolean primerNumNsAccepted; - - /** - * Gets the value of the sequenceId property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSequenceId() { - return sequenceId; - } - - /** - * Sets the value of the sequenceId property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSequenceId(String value) { - this.sequenceId = value; - } - - /** - * Gets the value of the sequenceTemplate property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSequenceTemplate() { - return sequenceTemplate; - } - - /** - * Sets the value of the sequenceTemplate property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSequenceTemplate(String value) { - this.sequenceTemplate = value; - } - - /** - * Gets the value of the sequenceTarget property. - * - * @return - * possible object is - * {@link PrimerSequenceTarget } - * - */ - public PrimerSequenceTarget getSequenceTarget() { - return sequenceTarget; - } - - /** - * Sets the value of the sequenceTarget property. - * - * @param value - * allowed object is - * {@link PrimerSequenceTarget } - * - */ - public void setSequenceTarget(PrimerSequenceTarget value) { - this.sequenceTarget = value; - } - - /** - * Gets the value of the primerProductMinTm property. - * - */ - public int getPrimerProductMinTm() { - return primerProductMinTm; - } - - /** - * Sets the value of the primerProductMinTm property. - * - */ - public void setPrimerProductMinTm(int value) { - this.primerProductMinTm = value; - } - - /** - * Gets the value of the primerProductMaxTm property. - * - */ - public int getPrimerProductMaxTm() { - return primerProductMaxTm; - } - - /** - * Sets the value of the primerProductMaxTm property. - * - */ - public void setPrimerProductMaxTm(int value) { - this.primerProductMaxTm = value; - } - - /** - * Gets the value of the primerDnaConc property. - * - */ - public double getPrimerDnaConc() { - return primerDnaConc; - } - - /** - * Sets the value of the primerDnaConc property. - * - */ - public void setPrimerDnaConc(double value) { - this.primerDnaConc = value; - } - - /** - * Gets the value of the primerSaltConc property. - * - */ - public double getPrimerSaltConc() { - return primerSaltConc; - } - - /** - * Sets the value of the primerSaltConc property. - * - */ - public void setPrimerSaltConc(double value) { - this.primerSaltConc = value; - } - - /** - * Gets the value of the primerMinTm property. - * - */ - public int getPrimerMinTm() { - return primerMinTm; - } - - /** - * Sets the value of the primerMinTm property. - * - */ - public void setPrimerMinTm(int value) { - this.primerMinTm = value; - } - - /** - * Gets the value of the primerOptTm property. - * - */ - public int getPrimerOptTm() { - return primerOptTm; - } - - /** - * Sets the value of the primerOptTm property. - * - */ - public void setPrimerOptTm(int value) { - this.primerOptTm = value; - } - - /** - * Gets the value of the primerMaxTm property. - * - */ - public int getPrimerMaxTm() { - return primerMaxTm; - } - - /** - * Sets the value of the primerMaxTm property. - * - */ - public void setPrimerMaxTm(int value) { - this.primerMaxTm = value; - } - - /** - * Gets the value of the primerMinSize property. - * - */ - public int getPrimerMinSize() { - return primerMinSize; - } - - /** - * Sets the value of the primerMinSize property. - * - */ - public void setPrimerMinSize(int value) { - this.primerMinSize = value; - } - - /** - * Gets the value of the primerOptSize property. - * - */ - public int getPrimerOptSize() { - return primerOptSize; - } - - /** - * Sets the value of the primerOptSize property. - * - */ - public void setPrimerOptSize(int value) { - this.primerOptSize = value; - } - - /** - * Gets the value of the primerMaxSize property. - * - */ - public int getPrimerMaxSize() { - return primerMaxSize; - } - - /** - * Sets the value of the primerMaxSize property. - * - */ - public void setPrimerMaxSize(int value) { - this.primerMaxSize = value; - } - - /** - * Gets the value of the primerProductSizeRange property. - * - * @return - * possible object is - * {@link PrimerSizeRange } - * - */ - public PrimerSizeRange getPrimerProductSizeRange() { - return primerProductSizeRange; - } - - /** - * Sets the value of the primerProductSizeRange property. - * - * @param value - * allowed object is - * {@link PrimerSizeRange } - * - */ - public void setPrimerProductSizeRange(PrimerSizeRange value) { - this.primerProductSizeRange = value; - } - - /** - * Gets the value of the primerExplainFlag property. - * - */ - public boolean isPrimerExplainFlag() { - return primerExplainFlag; - } - - /** - * Sets the value of the primerExplainFlag property. - * - */ - public void setPrimerExplainFlag(boolean value) { - this.primerExplainFlag = value; - } - - /** - * Gets the value of the primerNumReturn property. - * - */ - public int getPrimerNumReturn() { - return primerNumReturn; - } - - /** - * Sets the value of the primerNumReturn property. - * - */ - public void setPrimerNumReturn(int value) { - this.primerNumReturn = value; - } - - /** - * Gets the value of the primerNumNsAccepted property. - * - */ - public boolean isPrimerNumNsAccepted() { - return primerNumNsAccepted; - } - - /** - * Sets the value of the primerNumNsAccepted property. - * - */ - public void setPrimerNumNsAccepted(boolean value) { - this.primerNumNsAccepted = value; - } - -} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java-- b/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java-- deleted file mode 100644 index 5bff3cd6a..000000000 --- a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java-- +++ /dev/null @@ -1,201 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.primerinput; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.regex.Pattern; - -import org.qcmg.unused.gff3.GFF3Record; -import org.qcmg.unused.record.Serializer; - -public final class PrimerInputRecordSerializer extends - Serializer { - private final static String SEQUENCE_ID = "SEQUENCE_ID"; - private final static String SEQUENCE_TEMPLATE = "SEQUENCE_TEMPLATE"; - private final static String SEQUENCE_TARGET = "SEQUENCE_TARGET"; - private final static String PRIMER_PRODUCT_MIN_TM = "PRIMER_PRODUCT_MIN_TM"; - private final static String PRIMER_PRODUCT_MAX_TM = "PRIMER_PRODUCT_MAX_TM"; - private final static String PRIMER_DNA_CONC = "PRIMER_DNA_CONC"; - private final static String PRIMER_SALT_CONC = "PRIMER_SALT_CONC"; - private final static String PRIMER_MIN_TM = "PRIMER_MIN_TM"; - private final static String PRIMER_OPT_TM = "PRIMER_OPT_TM"; - private final static String PRIMER_MAX_TM = "PRIMER_MAX_TM"; - private final static String PRIMER_MIN_SIZE = "PRIMER_MIN_SIZE"; - private final static String PRIMER_OPT_SIZE = "PRIMER_OPT_SIZE"; - private final static String PRIMER_MAX_SIZE = "PRIMER_MAX_SIZE"; - private final static String PRIMER_PRODUCT_SIZE_RANGE = "PRIMER_PRODUCT_SIZE_RANGE"; - private final static String PRIMER_EXPLAIN_FLAG = "PRIMER_EXPLAIN_FLAG"; - private final static String PRIMER_NUM_RETURN = "PRIMER_NUM_RETURN"; - private final static String PRIMER_NUM_NS_ACCEPTED = "PRIMER_NUM_NS_ACCEPTED"; - private final static String EQUALS = "="; - - public PrimerInputRecord parseRecord(final BufferedReader reader) - throws Exception { - String nextLine = nextStringValue(reader); - if (nextLine.equals("=")) { - return null; - } - PrimerInputRecord result = new PrimerInputRecord(); - result.setSequenceId(nextLine); - result.setSequenceTemplate(nextStringValue(reader)); - result.setSequenceTarget(nextTargetValue(reader)); - result.setPrimerProductMinTm(nextIntegerValue(reader)); - result.setPrimerProductMaxTm(nextIntegerValue(reader)); - result.setPrimerDnaConc(nextDoubleValue(reader)); - result.setPrimerSaltConc(nextDoubleValue(reader)); - result.setPrimerMinTm(nextIntegerValue(reader)); - result.setPrimerOptTm(nextIntegerValue(reader)); - result.setPrimerMaxTm(nextIntegerValue(reader)); - result.setPrimerMinSize(nextIntegerValue(reader)); - result.setPrimerOptSize(nextIntegerValue(reader)); - result.setPrimerMaxSize(nextIntegerValue(reader)); - result.setPrimerProductSizeRange(nextPrimerSizeRangeValue(reader)); - result.setPrimerExplainFlag(nextBooleanValue(reader)); - result.setPrimerNumReturn(nextIntegerValue(reader)); - result.setPrimerNumNsAccepted(nextBooleanValue(reader)); - return result; - } - - private double nextDoubleValue(BufferedReader reader) throws NumberFormatException, Exception { - return Double.parseDouble(nextStringValue(reader)); - } - - private PrimerSizeRange nextPrimerSizeRangeValue(BufferedReader reader) - throws Exception { - String targetValue = nextStringValue(reader); - final String[] params = hyphenPattern.split(targetValue, -1); - if (2 != params.length) { - throw new Exception("Bad format. Insufficient numbered values: '" - + targetValue + "'"); - } - PrimerSizeRange range = new PrimerSizeRange(); - range.setLowerLimit(Integer.parseInt(params[0])); - range.setUpperLimit(Integer.parseInt(params[1])); - return range; - } - - private boolean nextBooleanValue(BufferedReader reader) throws Exception { - return Boolean.parseBoolean(nextStringValue(reader)); - } - - private String nextStringValue(final BufferedReader reader) - throws Exception { - final String line = reader.readLine(); - final String[] params = equalsPattern.split(line, -1); - if (2 != params.length) { - throw new Exception("Bad format. Insufficient columns: '" + line - + "'"); - } - return params[1].trim(); - } - - private int nextIntegerValue(final BufferedReader reader) throws Exception { - return Integer.parseInt(nextStringValue(reader)); - } - - private PrimerSequenceTarget nextTargetValue(final BufferedReader reader) - throws Exception { - String targetValue = nextStringValue(reader); - final String[] params = commaPattern.split(targetValue, -1); - if (2 != params.length) { - throw new Exception("Bad format. Insufficient numbered values: '" - + targetValue + "'"); - } - PrimerSequenceTarget target = new PrimerSequenceTarget(); - target.setLeftValue(Integer.parseInt(params[0])); - target.setRightValue(Integer.parseInt(params[1])); - return target; - } - - public String serialise(final PrimerInputRecord record) throws Exception { - String result = addLine("", SEQUENCE_ID, record.getSequenceId()); - result = addLine(result, SEQUENCE_TEMPLATE, record - .getSequenceTemplate()); - result = addLine(result, SEQUENCE_TARGET, record.getSequenceTarget()); - result = addLine(result, PRIMER_PRODUCT_MIN_TM, record - .getPrimerProductMinTm()); - result = addLine(result, PRIMER_PRODUCT_MAX_TM, record - .getPrimerProductMaxTm()); - result = addLine(result, PRIMER_DNA_CONC, record.getPrimerDnaConc()); - result = addLine(result, PRIMER_SALT_CONC, record.getPrimerSaltConc()); - result = addLine(result, PRIMER_MIN_TM, record.getPrimerMinTm()); - result = addLine(result, PRIMER_OPT_TM, record.getPrimerOptTm()); - result = addLine(result, PRIMER_MAX_TM, record.getPrimerMaxTm()); - result = addLine(result, PRIMER_MIN_SIZE, record.getPrimerMinSize()); - result = addLine(result, PRIMER_OPT_SIZE, record.getPrimerOptSize()); - result = addLine(result, PRIMER_MAX_SIZE, record.getPrimerMaxSize()); - result = addLine(result, PRIMER_PRODUCT_SIZE_RANGE, record - .getPrimerProductSizeRange()); - result = addLine(result, PRIMER_EXPLAIN_FLAG, record - .isPrimerExplainFlag()); - result = addLine(result, PRIMER_NUM_RETURN, record.getPrimerNumReturn()); - result = addLine(result, PRIMER_NUM_NS_ACCEPTED, record - .isPrimerNumNsAccepted()); - return result; - } - - public static void initialise(PrimerInputRecord record) { - record.setPrimerDnaConc(120); - record.setPrimerSaltConc(50); - record.setPrimerExplainFlag(false); - record.setPrimerMaxSize(25); - record.setPrimerExplainFlag(true); - record.setPrimerMaxSize(25); - record.setPrimerMaxTm(75); - record.setPrimerMinSize(18); - record.setPrimerMinTm(55); - record.setPrimerNumNsAccepted(true); - record.setPrimerNumReturn(10000); - record.setPrimerOptSize(20); - record.setPrimerOptTm(65); - record.setPrimerProductMaxTm(85); - record.setPrimerProductMinTm(65); - PrimerSizeRange range = new PrimerSizeRange(); - range.setLowerLimit(50); - range.setUpperLimit(120); - record.setPrimerProductSizeRange(range); - record.setSequenceId(""); - record.setSequenceTemplate(""); - PrimerSequenceTarget target = new PrimerSequenceTarget(); - target.setLeftValue(249); - target.setRightValue(3); - record.setSequenceTarget(target); - } - - private static String addLine(final String result, final String lhs, - final String rhs) { - return result + lhs + EQUALS + rhs + NEWLINE; - } - - private static String addLine(String result, final String lhs, - final double rhs) { - return result + lhs + EQUALS + Double.toString(rhs) + NEWLINE; - } - - private static String addLine(String result, final String lhs, final int rhs) { - return result + lhs + EQUALS + Integer.toString(rhs) + NEWLINE; - } - - private static String addLine(String result, String lhs, - final PrimerSequenceTarget rhs) { - return result + lhs + EQUALS + rhs.getLeftValue() + "," - + rhs.getRightValue() + NEWLINE; - } - - private static String addLine(String result, String lhs, final boolean rhs) { - if (rhs) { - return result + lhs + EQUALS + "1" + NEWLINE; - } else { - return result + lhs + EQUALS + "0" + NEWLINE; - } - } - - private static String addLine(String result, String lhs, - final PrimerSizeRange rhs) { - return result + lhs + EQUALS + rhs.getLowerLimit() + "-" - + rhs.getUpperLimit() + NEWLINE; - } - -} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java-- b/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java-- deleted file mode 100644 index 8cb3833fe..000000000 --- a/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java-- +++ /dev/null @@ -1,43 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.primerinput; - -public class PrimerSequenceTarget { - - protected int leftValue; - protected int rightValue; - - /** - * Gets the value of the leftValue property. - * - */ - public int getLeftValue() { - return leftValue; - } - - /** - * Sets the value of the leftValue property. - * - */ - public void setLeftValue(int value) { - this.leftValue = value; - } - - /** - * Gets the value of the rightValue property. - * - */ - public int getRightValue() { - return rightValue; - } - - /** - * Sets the value of the rightValue property. - * - */ - public void setRightValue(int value) { - this.rightValue = value; - } - -} diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java-- b/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java-- deleted file mode 100644 index bc16b6531..000000000 --- a/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java-- +++ /dev/null @@ -1,44 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.primerinput; - - -public class PrimerSizeRange { - - protected int lowerLimit; - protected int upperLimit; - - /** - * Gets the value of the lowerLimit property. - * - */ - public int getLowerLimit() { - return lowerLimit; - } - - /** - * Sets the value of the lowerLimit property. - * - */ - public void setLowerLimit(int value) { - this.lowerLimit = value; - } - - /** - * Gets the value of the upperLimit property. - * - */ - public int getUpperLimit() { - return upperLimit; - } - - /** - * Sets the value of the upperLimit property. - * - */ - public void setUpperLimit(int value) { - this.upperLimit = value; - } - -} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java-- b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java-- deleted file mode 100644 index 937c5993f..000000000 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java-- +++ /dev/null @@ -1,22 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.primeroutput; - -import java.io.File; - -import org.qcmg.primeroutput.PrimerOutputHeader; -import org.qcmg.primeroutput.PrimerOutputRecord; -import org.qcmg.reader.ExtendedFileReader; -import org.qcmg.unused.reader.FileReader; - -public class PrimerOutputFileReader extends ExtendedFileReader { - private final static PrimerOutputHeaderSerializer headerSerializer = - new PrimerOutputHeaderSerializer(); - private final static PrimerOutputRecordSerializer recordSerializer = - new PrimerOutputRecordSerializer(); - - public PrimerOutputFileReader(final File file) throws Exception { - super(file, recordSerializer, headerSerializer); - } -} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java-- b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java-- deleted file mode 100644 index 7afe95b1b..000000000 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java-- +++ /dev/null @@ -1,35 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.primeroutput; - -import java.io.Closeable; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -import org.qcmg.primeroutput.PrimerOutputRecord; - -public final class PrimerOutputFileWriter implements Closeable { - private static final String EQUALS = "="; - private static final PrimerOutputRecordSerializer serializer = new PrimerOutputRecordSerializer(); - private final OutputStream outputStream; - - public PrimerOutputFileWriter(final File file) throws Exception { - OutputStream stream = new FileOutputStream(file); - outputStream = stream; - } - - public void add(final PrimerOutputRecord record) throws Exception { - String encoded = serializer.serialise(record); - outputStream.write(encoded.getBytes()); - outputStream.flush(); - } - - public void close() throws IOException { - outputStream.write(EQUALS.getBytes()); - outputStream.flush(); - outputStream.close(); - } -} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java-- b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java-- deleted file mode 100644 index 4763d993e..000000000 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java-- +++ /dev/null @@ -1,563 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -// -// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 -// See http://java.sun.com/xml/jaxb -// Any modifications to this file will be lost upon recompilation of the source schema. -// Generated on: 2013.10.25 at 10:52:22 AM EST -// - - -package org.qcmg.unused.primeroutput; - -import javax.xml.bind.annotation.XmlAccessType; -import javax.xml.bind.annotation.XmlAccessorType; -import javax.xml.bind.annotation.XmlElement; -import javax.xml.bind.annotation.XmlType; - - -/** - *

Java class for primerOutputHeader complex type. - * - *

The following schema fragment specifies the expected content contained within this class. - * - *

- * <complexType name="primerOutputHeader">
- *   <complexContent>
- *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
- *       <sequence>
- *         <element name="sequenceId" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="sequenceTemplate" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="sequenceTarget" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="productMinTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="productMaxTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="dnaConc" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="saltConc" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="minTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="optTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="maxTm" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="minSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="optSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="maxSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="productSizeRange" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="explainFlag" type="{http://www.w3.org/2001/XMLSchema}boolean"/>
- *         <element name="numReturn" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="numNsAccepted" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="leftExplain" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="rightExplain" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="pairExplain" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="leftNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="rightNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="internalNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="pairNumReturned" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *       </sequence>
- *     </restriction>
- *   </complexContent>
- * </complexType>
- * 
- * - * - */ -@XmlAccessorType(XmlAccessType.FIELD) -@XmlType(name = "primerOutputHeader", propOrder = { - "sequenceId", - "sequenceTemplate", - "sequenceTarget", - "productMinTm", - "productMaxTm", - "dnaConc", - "saltConc", - "minTm", - "optTm", - "maxTm", - "minSize", - "optSize", - "maxSize", - "productSizeRange", - "explainFlag", - "numReturn", - "numNsAccepted", - "leftExplain", - "rightExplain", - "pairExplain", - "leftNumReturned", - "rightNumReturned", - "internalNumReturned", - "pairNumReturned" -}) -public class PrimerOutputHeader { - - @XmlElement(required = true) - protected String sequenceId; - @XmlElement(required = true) - protected String sequenceTemplate; - @XmlElement(required = true) - protected String sequenceTarget; - protected int productMinTm; - protected int productMaxTm; - protected double dnaConc; - protected double saltConc; - protected int minTm; - protected int optTm; - protected int maxTm; - protected int minSize; - protected int optSize; - protected int maxSize; - @XmlElement(required = true) - protected String productSizeRange; - protected boolean explainFlag; - protected int numReturn; - protected int numNsAccepted; - @XmlElement(required = true) - protected String leftExplain; - @XmlElement(required = true) - protected String rightExplain; - @XmlElement(required = true) - protected String pairExplain; - protected int leftNumReturned; - protected int rightNumReturned; - protected int internalNumReturned; - protected int pairNumReturned; - - /** - * Gets the value of the sequenceId property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSequenceId() { - return sequenceId; - } - - /** - * Sets the value of the sequenceId property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSequenceId(String value) { - this.sequenceId = value; - } - - /** - * Gets the value of the sequenceTemplate property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSequenceTemplate() { - return sequenceTemplate; - } - - /** - * Sets the value of the sequenceTemplate property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSequenceTemplate(String value) { - this.sequenceTemplate = value; - } - - /** - * Gets the value of the sequenceTarget property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getSequenceTarget() { - return sequenceTarget; - } - - /** - * Sets the value of the sequenceTarget property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setSequenceTarget(String value) { - this.sequenceTarget = value; - } - - /** - * Gets the value of the productMinTm property. - * - */ - public int getProductMinTm() { - return productMinTm; - } - - /** - * Sets the value of the productMinTm property. - * - */ - public void setProductMinTm(int value) { - this.productMinTm = value; - } - - /** - * Gets the value of the productMaxTm property. - * - */ - public int getProductMaxTm() { - return productMaxTm; - } - - /** - * Sets the value of the productMaxTm property. - * - */ - public void setProductMaxTm(int value) { - this.productMaxTm = value; - } - - /** - * Gets the value of the dnaConc property. - * - */ - public double getDnaConc() { - return dnaConc; - } - - /** - * Sets the value of the dnaConc property. - * - */ - public void setDnaConc(double value) { - this.dnaConc = value; - } - - /** - * Gets the value of the saltConc property. - * - */ - public double getSaltConc() { - return saltConc; - } - - /** - * Sets the value of the saltConc property. - * - */ - public void setSaltConc(double value) { - this.saltConc = value; - } - - /** - * Gets the value of the minTm property. - * - */ - public int getMinTm() { - return minTm; - } - - /** - * Sets the value of the minTm property. - * - */ - public void setMinTm(int value) { - this.minTm = value; - } - - /** - * Gets the value of the optTm property. - * - */ - public int getOptTm() { - return optTm; - } - - /** - * Sets the value of the optTm property. - * - */ - public void setOptTm(int value) { - this.optTm = value; - } - - /** - * Gets the value of the maxTm property. - * - */ - public int getMaxTm() { - return maxTm; - } - - /** - * Sets the value of the maxTm property. - * - */ - public void setMaxTm(int value) { - this.maxTm = value; - } - - /** - * Gets the value of the minSize property. - * - */ - public int getMinSize() { - return minSize; - } - - /** - * Sets the value of the minSize property. - * - */ - public void setMinSize(int value) { - this.minSize = value; - } - - /** - * Gets the value of the optSize property. - * - */ - public int getOptSize() { - return optSize; - } - - /** - * Sets the value of the optSize property. - * - */ - public void setOptSize(int value) { - this.optSize = value; - } - - /** - * Gets the value of the maxSize property. - * - */ - public int getMaxSize() { - return maxSize; - } - - /** - * Sets the value of the maxSize property. - * - */ - public void setMaxSize(int value) { - this.maxSize = value; - } - - /** - * Gets the value of the productSizeRange property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getProductSizeRange() { - return productSizeRange; - } - - /** - * Sets the value of the productSizeRange property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setProductSizeRange(String value) { - this.productSizeRange = value; - } - - /** - * Gets the value of the explainFlag property. - * - */ - public boolean isExplainFlag() { - return explainFlag; - } - - /** - * Sets the value of the explainFlag property. - * - */ - public void setExplainFlag(boolean value) { - this.explainFlag = value; - } - - /** - * Gets the value of the numReturn property. - * - */ - public int getNumReturn() { - return numReturn; - } - - /** - * Sets the value of the numReturn property. - * - */ - public void setNumReturn(int value) { - this.numReturn = value; - } - - /** - * Gets the value of the numNsAccepted property. - * - */ - public int getNumNsAccepted() { - return numNsAccepted; - } - - /** - * Sets the value of the numNsAccepted property. - * - */ - public void setNumNsAccepted(int value) { - this.numNsAccepted = value; - } - - /** - * Gets the value of the leftExplain property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getLeftExplain() { - return leftExplain; - } - - /** - * Sets the value of the leftExplain property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setLeftExplain(String value) { - this.leftExplain = value; - } - - /** - * Gets the value of the rightExplain property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getRightExplain() { - return rightExplain; - } - - /** - * Sets the value of the rightExplain property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setRightExplain(String value) { - this.rightExplain = value; - } - - /** - * Gets the value of the pairExplain property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getPairExplain() { - return pairExplain; - } - - /** - * Sets the value of the pairExplain property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setPairExplain(String value) { - this.pairExplain = value; - } - - /** - * Gets the value of the leftNumReturned property. - * - */ - public int getLeftNumReturned() { - return leftNumReturned; - } - - /** - * Sets the value of the leftNumReturned property. - * - */ - public void setLeftNumReturned(int value) { - this.leftNumReturned = value; - } - - /** - * Gets the value of the rightNumReturned property. - * - */ - public int getRightNumReturned() { - return rightNumReturned; - } - - /** - * Sets the value of the rightNumReturned property. - * - */ - public void setRightNumReturned(int value) { - this.rightNumReturned = value; - } - - /** - * Gets the value of the internalNumReturned property. - * - */ - public int getInternalNumReturned() { - return internalNumReturned; - } - - /** - * Sets the value of the internalNumReturned property. - * - */ - public void setInternalNumReturned(int value) { - this.internalNumReturned = value; - } - - /** - * Gets the value of the pairNumReturned property. - * - */ - public int getPairNumReturned() { - return pairNumReturned; - } - - /** - * Sets the value of the pairNumReturned property. - * - */ - public void setPairNumReturned(int value) { - this.pairNumReturned = value; - } - -} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java-- b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java-- deleted file mode 100644 index fbb1e483e..000000000 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java-- +++ /dev/null @@ -1,128 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.primeroutput; - -import java.io.BufferedReader; - -import org.qcmg.unused.record.Serializer; - -public class PrimerOutputHeaderSerializer extends - Serializer { - private final static String[] FIELDS = { - "SEQUENCE_ID", - "SEQUENCE_TEMPLATE", - "SEQUENCE_TARGET", - "PRIMER_PRODUCT_MIN_TM", - "PRIMER_PRODUCT_MAX_TM", - "PRIMER_DNA_CONC", - "PRIMER_SALT_CONC", - "PRIMER_MIN_TM", - "PRIMER_OPT_TM", - "PRIMER_MAX_TM", - "PRIMER_MIN_SIZE", - "PRIMER_OPT_SIZE", - "PRIMER_MAX_SIZE", - "PRIMER_PRODUCT_SIZE_RANGE", - "PRIMER_EXPLAIN_FLAG", - "PRIMER_NUM_RETURN", - "PRIMER_NUM_NS_ACCEPTED", - "PRIMER_LEFT_EXPLAIN", - "PRIMER_RIGHT_EXPLAIN", - "PRIMER_PAIR_EXPLAIN", - "PRIMER_LEFT_NUM_RETURNED", - "PRIMER_RIGHT_NUM_RETURNED", - "PRIMER_INTERNAL_NUM_RETURNED", - "PRIMER_PAIR_NUM_RETURNED"}; - - static { - assert(24 == FIELDS.length); - } - - @Override - public PrimerOutputHeader parseRecord(final BufferedReader reader) - throws Exception { - PrimerOutputHeader result = new PrimerOutputHeader(); - result.setSequenceId(nextStringValue(reader)); - result.setSequenceTemplate(nextStringValue(reader)); - result.setSequenceTarget(nextStringValue(reader)); - result.setProductMinTm(nextIntegerValue(reader)); - result.setProductMaxTm(nextIntegerValue(reader)); - result.setDnaConc(nextDoubleValue(reader)); - result.setSaltConc(nextDoubleValue(reader)); - result.setMinTm(nextIntegerValue(reader)); - result.setOptTm(nextIntegerValue(reader)); - result.setMaxTm(nextIntegerValue(reader)); - result.setMinSize(nextIntegerValue(reader)); - result.setOptSize(nextIntegerValue(reader)); - result.setMaxSize(nextIntegerValue(reader)); - result.setProductSizeRange(nextStringValue(reader)); - result.setExplainFlag(nextBooleanValue(reader)); - result.setNumReturn(nextIntegerValue(reader)); - result.setNumNsAccepted(nextIntegerValue(reader)); - result.setLeftExplain(nextStringValue(reader)); - result.setRightExplain(nextStringValue(reader)); - result.setPairExplain(nextStringValue(reader)); - result.setLeftNumReturned(nextIntegerValue(reader)); - result.setRightNumReturned(nextIntegerValue(reader)); - result.setInternalNumReturned(nextIntegerValue(reader)); - result.setPairNumReturned(nextIntegerValue(reader)); - return result; - } - - @Override - public String serialise(PrimerOutputHeader record) throws Exception { - String result = createLine(FIELDS[0], record.getSequenceId()); - result += FIELDS[1] + EQUALS + record.getSequenceTemplate() + NEWLINE; - result += FIELDS[2] + EQUALS + record.getSequenceTarget() + NEWLINE; - result += FIELDS[3] + EQUALS + record.getProductMinTm() + NEWLINE; - result += FIELDS[4] + EQUALS + record.getProductMaxTm() + NEWLINE; - result += FIELDS[5] + EQUALS + record.getDnaConc() + NEWLINE; - result += FIELDS[6] + EQUALS + record.getSaltConc() + NEWLINE; - result += FIELDS[7] + EQUALS + record.getMinTm() + NEWLINE; - result += FIELDS[8] + EQUALS + record.getOptTm() + NEWLINE; - result += FIELDS[9] + EQUALS + record.getMaxTm() + NEWLINE; - result += FIELDS[10] + EQUALS + record.getMinSize() + NEWLINE; - result += FIELDS[11] + EQUALS + record.getOptSize() + NEWLINE; - result += FIELDS[12] + EQUALS + record.getMaxSize() + NEWLINE; - result += FIELDS[13] + EQUALS + record.getProductSizeRange() + NEWLINE; - result += FIELDS[14] + EQUALS + record.isExplainFlag() + NEWLINE; - result += FIELDS[15] + EQUALS + record.getNumReturn() + NEWLINE; - result += FIELDS[16] + EQUALS + record.getNumNsAccepted() + NEWLINE; - result += FIELDS[17] + EQUALS + record.getLeftExplain() + NEWLINE; - result += FIELDS[18] + EQUALS + record.getRightExplain() + NEWLINE; - result += FIELDS[19] + EQUALS + record.getPairExplain() + NEWLINE; - result += FIELDS[20] + EQUALS + record.getLeftNumReturned() + NEWLINE; - result += FIELDS[21] + EQUALS + record.getRightNumReturned() + NEWLINE; - result += FIELDS[22] + EQUALS + record.getInternalNumReturned() + NEWLINE; - result += FIELDS[23] + EQUALS + record.getPairNumReturned() + NEWLINE; - return result; - } - - private String createLine(final String fieldName, final String fieldValue) { - return fieldName + EQUALS + fieldValue + NEWLINE; - } - - private double nextDoubleValue(BufferedReader reader) throws NumberFormatException, Exception { - return Double.parseDouble(nextStringValue(reader)); - } - - private boolean nextBooleanValue(BufferedReader reader) throws Exception { - return Boolean.parseBoolean(nextStringValue(reader)); - } - - private String nextStringValue(final BufferedReader reader) - throws Exception { - final String line = reader.readLine(); - final String[] params = equalsPattern.split(line); - if (2 != params.length) { - throw new Exception("Bad format. Insufficient columns: '" + line - + "'"); - } - return params[1].trim(); - } - - private int nextIntegerValue(final BufferedReader reader) throws Exception { - return Integer.parseInt(nextStringValue(reader)); - } -} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java-- b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java-- deleted file mode 100644 index f208e770b..000000000 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java-- +++ /dev/null @@ -1,517 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -// -// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 -// See http://java.sun.com/xml/jaxb -// Any modifications to this file will be lost upon recompilation of the source schema. -// Generated on: 2013.10.25 at 10:52:22 AM EST -// - - -package org.qcmg.unused.primeroutput; - -import javax.xml.bind.annotation.XmlAccessType; -import javax.xml.bind.annotation.XmlAccessorType; -import javax.xml.bind.annotation.XmlElement; -import javax.xml.bind.annotation.XmlType; - - -/** - *

Java class for primerOutputRecord complex type. - * - *

The following schema fragment specifies the expected content contained within this class. - * - *

- * <complexType name="primerOutputRecord">
- *   <complexContent>
- *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
- *       <sequence>
- *         <element name="pairPenalty" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="leftPenalty" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="rightPenalty" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="leftSequence" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="rightSequence" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="left" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="right" type="{http://www.w3.org/2001/XMLSchema}string"/>
- *         <element name="leftTm" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="rightTm" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="leftGcPercent" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="rightGcPercent" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="leftSelfAny" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="rightSelfAny" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="leftSelfEnd" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="rightSelfEnd" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="leftEndStability" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="rightEndStability" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="pairComplAny" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="pairComplEnd" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="pairProductSize" type="{http://www.w3.org/2001/XMLSchema}int"/>
- *         <element name="pairProductTm" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="pairProductTmOligoTmDiff" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *         <element name="pairTOptA" type="{http://www.w3.org/2001/XMLSchema}double"/>
- *       </sequence>
- *     </restriction>
- *   </complexContent>
- * </complexType>
- * 
- * - * - */ -@XmlAccessorType(XmlAccessType.FIELD) -@XmlType(name = "primerOutputRecord", propOrder = { - "pairPenalty", - "leftPenalty", - "rightPenalty", - "leftSequence", - "rightSequence", - "left", - "right", - "leftTm", - "rightTm", - "leftGcPercent", - "rightGcPercent", - "leftSelfAny", - "rightSelfAny", - "leftSelfEnd", - "rightSelfEnd", - "leftEndStability", - "rightEndStability", - "pairComplAny", - "pairComplEnd", - "pairProductSize", - "pairProductTm", - "pairProductTmOligoTmDiff", - "pairTOptA" -}) -public class PrimerOutputRecord { - - protected double pairPenalty; - protected double leftPenalty; - protected double rightPenalty; - @XmlElement(required = true) - protected String leftSequence; - @XmlElement(required = true) - protected String rightSequence; - @XmlElement(required = true) - protected String left; - @XmlElement(required = true) - protected String right; - protected double leftTm; - protected double rightTm; - protected double leftGcPercent; - protected double rightGcPercent; - protected double leftSelfAny; - protected double rightSelfAny; - protected double leftSelfEnd; - protected double rightSelfEnd; - protected double leftEndStability; - protected double rightEndStability; - protected double pairComplAny; - protected double pairComplEnd; - protected int pairProductSize; - protected double pairProductTm; - protected double pairProductTmOligoTmDiff; - protected double pairTOptA; - - /** - * Gets the value of the pairPenalty property. - * - */ - public double getPairPenalty() { - return pairPenalty; - } - - /** - * Sets the value of the pairPenalty property. - * - */ - public void setPairPenalty(double value) { - this.pairPenalty = value; - } - - /** - * Gets the value of the leftPenalty property. - * - */ - public double getLeftPenalty() { - return leftPenalty; - } - - /** - * Sets the value of the leftPenalty property. - * - */ - public void setLeftPenalty(double value) { - this.leftPenalty = value; - } - - /** - * Gets the value of the rightPenalty property. - * - */ - public double getRightPenalty() { - return rightPenalty; - } - - /** - * Sets the value of the rightPenalty property. - * - */ - public void setRightPenalty(double value) { - this.rightPenalty = value; - } - - /** - * Gets the value of the leftSequence property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getLeftSequence() { - return leftSequence; - } - - /** - * Sets the value of the leftSequence property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setLeftSequence(String value) { - this.leftSequence = value; - } - - /** - * Gets the value of the rightSequence property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getRightSequence() { - return rightSequence; - } - - /** - * Sets the value of the rightSequence property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setRightSequence(String value) { - this.rightSequence = value; - } - - /** - * Gets the value of the left property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getLeft() { - return left; - } - - /** - * Sets the value of the left property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setLeft(String value) { - this.left = value; - } - - /** - * Gets the value of the right property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getRight() { - return right; - } - - /** - * Sets the value of the right property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setRight(String value) { - this.right = value; - } - - /** - * Gets the value of the leftTm property. - * - */ - public double getLeftTm() { - return leftTm; - } - - /** - * Sets the value of the leftTm property. - * - */ - public void setLeftTm(double value) { - this.leftTm = value; - } - - /** - * Gets the value of the rightTm property. - * - */ - public double getRightTm() { - return rightTm; - } - - /** - * Sets the value of the rightTm property. - * - */ - public void setRightTm(double value) { - this.rightTm = value; - } - - /** - * Gets the value of the leftGcPercent property. - * - */ - public double getLeftGcPercent() { - return leftGcPercent; - } - - /** - * Sets the value of the leftGcPercent property. - * - */ - public void setLeftGcPercent(double value) { - this.leftGcPercent = value; - } - - /** - * Gets the value of the rightGcPercent property. - * - */ - public double getRightGcPercent() { - return rightGcPercent; - } - - /** - * Sets the value of the rightGcPercent property. - * - */ - public void setRightGcPercent(double value) { - this.rightGcPercent = value; - } - - /** - * Gets the value of the leftSelfAny property. - * - */ - public double getLeftSelfAny() { - return leftSelfAny; - } - - /** - * Sets the value of the leftSelfAny property. - * - */ - public void setLeftSelfAny(double value) { - this.leftSelfAny = value; - } - - /** - * Gets the value of the rightSelfAny property. - * - */ - public double getRightSelfAny() { - return rightSelfAny; - } - - /** - * Sets the value of the rightSelfAny property. - * - */ - public void setRightSelfAny(double value) { - this.rightSelfAny = value; - } - - /** - * Gets the value of the leftSelfEnd property. - * - */ - public double getLeftSelfEnd() { - return leftSelfEnd; - } - - /** - * Sets the value of the leftSelfEnd property. - * - */ - public void setLeftSelfEnd(double value) { - this.leftSelfEnd = value; - } - - /** - * Gets the value of the rightSelfEnd property. - * - */ - public double getRightSelfEnd() { - return rightSelfEnd; - } - - /** - * Sets the value of the rightSelfEnd property. - * - */ - public void setRightSelfEnd(double value) { - this.rightSelfEnd = value; - } - - /** - * Gets the value of the leftEndStability property. - * - */ - public double getLeftEndStability() { - return leftEndStability; - } - - /** - * Sets the value of the leftEndStability property. - * - */ - public void setLeftEndStability(double value) { - this.leftEndStability = value; - } - - /** - * Gets the value of the rightEndStability property. - * - */ - public double getRightEndStability() { - return rightEndStability; - } - - /** - * Sets the value of the rightEndStability property. - * - */ - public void setRightEndStability(double value) { - this.rightEndStability = value; - } - - /** - * Gets the value of the pairComplAny property. - * - */ - public double getPairComplAny() { - return pairComplAny; - } - - /** - * Sets the value of the pairComplAny property. - * - */ - public void setPairComplAny(double value) { - this.pairComplAny = value; - } - - /** - * Gets the value of the pairComplEnd property. - * - */ - public double getPairComplEnd() { - return pairComplEnd; - } - - /** - * Sets the value of the pairComplEnd property. - * - */ - public void setPairComplEnd(double value) { - this.pairComplEnd = value; - } - - /** - * Gets the value of the pairProductSize property. - * - */ - public int getPairProductSize() { - return pairProductSize; - } - - /** - * Sets the value of the pairProductSize property. - * - */ - public void setPairProductSize(int value) { - this.pairProductSize = value; - } - - /** - * Gets the value of the pairProductTm property. - * - */ - public double getPairProductTm() { - return pairProductTm; - } - - /** - * Sets the value of the pairProductTm property. - * - */ - public void setPairProductTm(double value) { - this.pairProductTm = value; - } - - /** - * Gets the value of the pairProductTmOligoTmDiff property. - * - */ - public double getPairProductTmOligoTmDiff() { - return pairProductTmOligoTmDiff; - } - - /** - * Sets the value of the pairProductTmOligoTmDiff property. - * - */ - public void setPairProductTmOligoTmDiff(double value) { - this.pairProductTmOligoTmDiff = value; - } - - /** - * Gets the value of the pairTOptA property. - * - */ - public double getPairTOptA() { - return pairTOptA; - } - - /** - * Sets the value of the pairTOptA property. - * - */ - public void setPairTOptA(double value) { - this.pairTOptA = value; - } - -} diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java-- b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java-- deleted file mode 100644 index da0a4d385..000000000 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java-- +++ /dev/null @@ -1,136 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.primeroutput; - -import java.io.BufferedReader; -import java.io.IOException; - -import org.qcmg.unused.record.Serializer; - -public final class PrimerOutputRecordSerializer extends - Serializer { - private final static String[] FIELD_PREFIXES = { - "PRIMER_PAIR_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_LEFT_", - "PRIMER_RIGHT_", - "PRIMER_PAIR_", - "PRIMER_PAIR_", - "PRIMER_PAIR_", - "PRIMER_PAIR_", - "PRIMER_PAIR_", - "PRIMER_PAIR_"}; - - private final static String[] FIELD_SUFFIXES = { - "_PENALTY", - "_PENALTY", - "_PENALTY", - "_SEQUENCE", - "_SEQUENCE", - "", - "", - "_TM", - "_TM", - "_GC_PERCENT", - "_GC_PERCENT", - "_SELF_ANY", - "_SELF_ANY", - "_SELF_END", - "_SELF_END", - "_END_STABILITY", - "_END_STABILITY", - "_COMPL_ANY", - "_COMPL_END", - "_PRODUCT_SIZE", - "_PRODUCT_TM", - "_PRODUCT_TM_OLIGO_TM_DIFF", - "_T_OPT_A"}; - - static { - assert(FIELD_PREFIXES.length == FIELD_SUFFIXES.length); - } - - public PrimerOutputRecord parseRecord(final BufferedReader reader) - throws Exception { - String nextLine = reader.readLine(); - if (null == nextLine) { - return null; - } - PrimerOutputRecord result = new PrimerOutputRecord(); - result.setPairPenalty(doubleValue(nextLine)); - result.setLeftPenalty(nextDoubleValue(reader)); - result.setRightPenalty(nextDoubleValue(reader)); - result.setLeftSequence(nextStringValue(reader)); - result.setRightSequence(nextStringValue(reader)); - result.setLeft(nextStringValue(reader)); - result.setRight(nextStringValue(reader)); - result.setLeftTm(nextDoubleValue(reader)); - result.setRightTm(nextDoubleValue(reader)); - result.setLeftGcPercent(nextDoubleValue(reader)); - result.setRightGcPercent(nextDoubleValue(reader)); - result.setLeftSelfAny(nextDoubleValue(reader)); - result.setRightSelfAny(nextDoubleValue(reader)); - result.setLeftSelfEnd(nextDoubleValue(reader)); - result.setRightSelfEnd(nextDoubleValue(reader)); - result.setLeftEndStability(nextDoubleValue(reader)); - result.setRightEndStability(nextDoubleValue(reader)); - result.setPairComplAny(nextDoubleValue(reader)); - result.setPairComplEnd(nextDoubleValue(reader)); - result.setPairProductSize(nextIntegerValue(reader)); - result.setPairProductTm(nextDoubleValue(reader)); - result.setPairProductTmOligoTmDiff(nextDoubleValue(reader)); - result.setPairTOptA(nextDoubleValue(reader)); - return result; - } - - public String serialise(final PrimerOutputRecord record) throws Exception { - String result = null; - return result; - } - - private double nextDoubleValue(BufferedReader reader) throws NumberFormatException, Exception { - return Double.parseDouble(nextStringValue(reader)); - } - - private double doubleValue(final String line) throws Exception { - final String[] params = equalsPattern.split(line); - if (2 != params.length) { - throw new Exception("Bad format. Insufficient columns: '" + line - + "'"); - } - return Double.parseDouble(params[1].trim()); - } - - private String nextStringValue(final BufferedReader reader) - throws Exception { - final String line = reader.readLine(); - return stringValue(line); - } - - private String stringValue(final String line) throws Exception { - final String[] params = equalsPattern.split(line); - if (2 != params.length) { - throw new Exception("Bad format. Insufficient columns: '" + line - + "'"); - } - return params[1].trim(); - } - - private int nextIntegerValue(final BufferedReader reader) throws Exception { - return Integer.parseInt(nextStringValue(reader)); - } -} diff --git a/qio/src/org/qcmg/unused/reader/AbstractReader.java-- b/qio/src/org/qcmg/unused/reader/AbstractReader.java-- deleted file mode 100644 index 9824af0cd..000000000 --- a/qio/src/org/qcmg/unused/reader/AbstractReader.java-- +++ /dev/null @@ -1,40 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.reader; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; - -import org.qcmg.exception.RecordIteratorException; -import org.qcmg.unused.record.AbstractRecordIterator; -import org.qcmg.unused.record.Record; - -public abstract class AbstractReader implements Reader, Iterable { - - protected final InputStream inputStream; - - public AbstractReader(final File file) throws IOException { - FileInputStream stream = new FileInputStream(file); - inputStream = stream; - } - - @Override - public Iterator iterator() { - try { - return getRecordIterator(); - } catch (Exception e) { - throw new RecordIteratorException(e); - } - } - - public abstract AbstractRecordIterator getRecordIterator() throws Exception; - - @Override - public void close() throws IOException { - inputStream.close(); - } -} diff --git a/qio/src/org/qcmg/unused/reader/ExtendedFileReader.java-- b/qio/src/org/qcmg/unused/reader/ExtendedFileReader.java-- deleted file mode 100644 index 546ba9e45..000000000 --- a/qio/src/org/qcmg/unused/reader/ExtendedFileReader.java-- +++ /dev/null @@ -1,41 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.reader; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStreamReader; -import java.util.Iterator; - -import org.qcmg.unused.record.ExtendedRecordIterator; -import org.qcmg.unused.record.Serializer; - -public abstract class ExtendedFileReader extends - FileReader { - private final Serializer headerSerializer; - private final HeaderType header; - - public ExtendedFileReader(final File file, - final Serializer recordSerializer, - final Serializer headerSerializer) throws Exception { - super(file, recordSerializer); - FileInputStream inputStream = new FileInputStream(file); - InputStreamReader inputStreamReader = new InputStreamReader(inputStream); - BufferedReader reader = new BufferedReader(inputStreamReader); - this.headerSerializer = headerSerializer; - header = headerSerializer.parseRecord(reader); - } - - public HeaderType getHeader() { - return header; - } - - @Override - public Iterator getIterator() throws Exception { - return new ExtendedRecordIterator( - getInputStream(), getSerializer(), headerSerializer); - } - -} diff --git a/qio/src/org/qcmg/unused/reader/FileReader.java-- b/qio/src/org/qcmg/unused/reader/FileReader.java-- deleted file mode 100644 index 8651ed657..000000000 --- a/qio/src/org/qcmg/unused/reader/FileReader.java-- +++ /dev/null @@ -1,57 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.reader; - -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.Iterator; - -import org.qcmg.unused.record.RecordIterator; -import org.qcmg.unused.record.Serializer; - -public abstract class FileReader implements Closeable, - Iterable { - private final Serializer serializer; - private final File file; - private final FileInputStream inputStream; - - public FileReader(final File file, final Serializer serializer) - throws Exception { - this.file = file; - this.serializer = serializer; - inputStream = new FileInputStream(file); - } - - public Iterator iterator() { - try { - return getIterator(); - } catch (Exception e) { - e.printStackTrace(); - throw new RuntimeException(e); - } - } - - public Iterator getIterator() throws Exception { - return new RecordIterator(inputStream, serializer); - } - - public void close() throws IOException { - inputStream.close(); - } - - public File getFile() { - return file; - } - - public FileInputStream getInputStream() { - return inputStream; - } - - public Serializer getSerializer() { - return serializer; - } - -} diff --git a/qio/src/org/qcmg/unused/reader/Reader.java-- b/qio/src/org/qcmg/unused/reader/Reader.java-- deleted file mode 100644 index 5f7a95a30..000000000 --- a/qio/src/org/qcmg/unused/reader/Reader.java-- +++ /dev/null @@ -1,10 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.reader; - -import java.io.Closeable; - -public interface Reader extends Closeable { - -} diff --git a/qio/src/org/qcmg/unused/simple/SimpleFileReader.java-- b/qio/src/org/qcmg/unused/simple/SimpleFileReader.java-- deleted file mode 100644 index 2a1d6ba91..000000000 --- a/qio/src/org/qcmg/unused/simple/SimpleFileReader.java-- +++ /dev/null @@ -1,23 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.simple; - -import java.io.File; -import java.io.IOException; - -import org.qcmg.reader.AbstractReader; -import org.qcmg.unused.record.AbstractRecordIterator; - -public class SimpleFileReader extends AbstractReader { - - public SimpleFileReader(File file) throws IOException { - super(file); - } - - @Override - public AbstractRecordIterator getRecordIterator() throws Exception{ - return new SimpleRecordIterator(inputStream); - } - -} diff --git a/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java-- b/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java-- deleted file mode 100644 index 438c6695f..000000000 --- a/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java-- +++ /dev/null @@ -1,26 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.simple; - -import java.io.InputStream; - -import org.qcmg.unused.record.AbstractRecordIterator; - -public class SimpleRecordIterator extends AbstractRecordIterator { - - public SimpleRecordIterator(InputStream stream) throws Exception{ - super(stream); - } - - @Override - protected void readNext() throws Exception { -// try { - next = SimpleSerializer.nextRecord(reader); -// } catch (Exception ex) { -// next = null; -// throw ex; -// } - } - -} diff --git a/qio/src/org/qcmg/unused/simple/SimpleSerializer.java-- b/qio/src/org/qcmg/unused/simple/SimpleSerializer.java-- deleted file mode 100644 index 07c4491b2..000000000 --- a/qio/src/org/qcmg/unused/simple/SimpleSerializer.java-- +++ /dev/null @@ -1,57 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.simple; - -import java.io.BufferedReader; -import java.io.IOException; - -import org.qcmg.unused.record.SimpleRecord; - -public final class SimpleSerializer { - - private static final String DEFAULT_ID_PREFIX = ">"; - private static final String DEFAULT_HEADER_PREFIX = "#"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { - line = reader.readLine(); - } - return line; - } - - public static SimpleRecord nextRecord(final BufferedReader reader) throws Exception { - SimpleRecord result = null; - - String id = nextNonheaderLine(reader); - String sequence = reader.readLine(); -// return parseRecord(id, sequence); - if (null != id && null != sequence) { - result = parseRecord(id, sequence); - } - - return result; - } - - static String parseID(final String value) throws Exception { - if ( ! value.startsWith(DEFAULT_ID_PREFIX)) { - throw new Exception("Bad id format: " + value); - } - return value; - } - - static String parseSequence(final String sequence) throws Exception { - if (sequence.startsWith(DEFAULT_ID_PREFIX)) { - throw new Exception("Bad sequence format: " + sequence); - } - return sequence; - } - - static SimpleRecord parseRecord(final String id, final String sequence) - throws Exception { - return new SimpleRecord(parseID(id), parseSequence(sequence)); - } - -} From e9d5a3f4875d699c55bf980ec1a022fe5215cb1b Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 25 Nov 2020 21:47:23 +1000 Subject: [PATCH 31/73] delete tmp files --- .../src/org/qcmg/qmule/AlignerCompare.java-- | 272 ------ .../qmule/AnnotateDCCWithGFFRegions.java-- | 710 ---------------- qmule/src/org/qcmg/qmule/BAM2CS.java-- | 183 ---- qmule/src/org/qcmg/qmule/BAMCompress.java-- | 156 ---- .../org/qcmg/qmule/BAMHeaderChecker.java-- | 250 ------ qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- | 124 --- .../org/qcmg/qmule/BamMismatchCounts.java-- | 160 ---- .../org/qcmg/qmule/BamRecordCounter.java-- | 44 - .../qcmg/qmule/CompareReferenceRegions.java-- | 676 --------------- .../org/qcmg/qmule/DbSnpChrLiftover.java-- | 86 -- .../org/qcmg/qmule/GermlineDBStripper.java-- | 47 - qmule/src/org/qcmg/qmule/GetBamRecords.java-- | 226 ----- qmule/src/org/qcmg/qmule/GetInsetSize.java-- | 35 - .../src/org/qcmg/qmule/IndelDCCHeader.java-- | 395 --------- qmule/src/org/qcmg/qmule/MAF2DCC1.java-- | 418 --------- qmule/src/org/qcmg/qmule/Main.java-- | 100 --- qmule/src/org/qcmg/qmule/Messages.java-- | 132 --- qmule/src/org/qcmg/qmule/Options.java-- | 512 ----------- qmule/src/org/qcmg/qmule/Pileup.java-- | 101 --- qmule/src/org/qcmg/qmule/PileupStats.java-- | 254 ------ .../src/org/qcmg/qmule/QMuleException.java-- | 28 - qmule/src/org/qcmg/qmule/QueryCADDLib.java-- | 187 ---- .../qcmg/qmule/ReAnnotateDccWithDbSNP.java-- | 280 ------ .../src/org/qcmg/qmule/ReadPartGZFile.java-- | 152 ---- qmule/src/org/qcmg/qmule/ReadsAppend.java-- | 95 --- qmule/src/org/qcmg/qmule/RunGatk.java-- | 141 --- .../org/qcmg/qmule/SmithWatermanGotoh.java-- | 368 -------- .../qmule/SnpToReferenceRegionFilter.java-- | 647 -------------- qmule/src/org/qcmg/qmule/SubSample.java-- | 165 ---- .../src/org/qcmg/qmule/TestFileFinder.java-- | 23 - qmule/src/org/qcmg/qmule/TestJarUpdate.java-- | 191 ----- qmule/src/org/qcmg/qmule/TestSort.java-- | 109 --- .../org/qcmg/qmule/TranscriptomeMule.java-- | 192 ----- .../org/qcmg/qmule/WiggleFromPileup.java-- | 302 ------- .../qcmg/qmule/WiggleFromPileupTakeTwo.java-- | 307 ------- qmule/src/org/qcmg/qmule/XCvsZP.java-- | 117 --- qmule/src/org/qcmg/qmule/bam/CheckBam.java-- | 339 -------- .../qmule/bam/GetContigsFromHeader.java-- | 127 --- qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- | 226 ----- qmule/src/org/qcmg/qmule/qcnv/Main.java-- | 57 -- qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- | 152 ---- qmule/src/org/qcmg/qmule/qcnv/Options.java-- | 169 ---- qmule/src/org/qcmg/qmule/queryChrMT.java-- | 68 -- .../qcmg/qmule/snppicker/CompareSnps.java-- | 205 ----- .../snppicker/ExamineVerifiedSnps.java-- | 237 ------ .../qmule/snppicker/GatkUniqueSnps.java-- | 488 ----------- .../src/org/qcmg/qmule/snppicker/Mule.java-- | 85 -- .../org/qcmg/qmule/snppicker/SnpPicker.java-- | 802 ------------------ .../qcmg/qmule/snppicker/UniqueQSnps.java-- | 200 ----- .../qcmg/qmule/snppicker/UniqueSnps.java-- | 263 ------ .../qcmg/qmule/snppicker/VariantRecord.java-- | 193 ----- .../qmule/util/IGVBatchFileGenerator.java-- | 78 -- .../qcmg/qmule/util/TabbedDataLoader.java-- | 61 -- .../src/org/qcmg/qmule/vcf/CompareVCFs.java-- | 269 ------ .../org/qcmg/qmule/vcf/ConvertVcfChr.java-- | 116 --- .../qcmg/qmule/vcf/RefAndMultiGenotype.java-- | 101 --- .../org/qcmg/qmule/AlignerCompareTest.java-- | 120 --- .../qmule/AnnotateDCCWithGFFRegionTest.java-- | 234 ----- .../org/qcmg/qmule/BamCompressTest.java-- | 95 --- .../qcmg/qmule/BamMismatchCountsTest.java-- | 75 -- .../qcmg/qmule/ChrPosComparatorTest.java-- | 35 - .../qmule/CompareReferenceRegionsTest.java-- | 162 ---- qmule/test/org/qcmg/qmule/DccToMafTest.java-- | 167 ---- .../org/qcmg/qmule/IndelDCCHeaderTest.java-- | 222 ----- qmule/test/org/qcmg/qmule/MAF2DCC1Test.java-- | 315 ------- .../test/org/qcmg/qmule/TestThreading.java-- | 55 -- .../qmule/WiggleFromPileupTakeTwoTest.java-- | 428 ---------- .../qcmg/qmule/WiggleFromPileupTest.java-- | 431 ---------- .../qmule/snppicker/CompareSnpsTest.java-- | 70 -- .../qmule/snppicker/GatkUniqueSnpsTest.java-- | 154 ---- .../util/IGVBatchFileGeneratorTest.java-- | 73 -- .../qmule/util/TabbedDataLoaderTest.java-- | 21 - 72 files changed, 15078 deletions(-) delete mode 100644 qmule/src/org/qcmg/qmule/AlignerCompare.java-- delete mode 100644 qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- delete mode 100644 qmule/src/org/qcmg/qmule/BAM2CS.java-- delete mode 100644 qmule/src/org/qcmg/qmule/BAMCompress.java-- delete mode 100644 qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- delete mode 100644 qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- delete mode 100644 qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- delete mode 100644 qmule/src/org/qcmg/qmule/BamRecordCounter.java-- delete mode 100644 qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- delete mode 100644 qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- delete mode 100644 qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- delete mode 100644 qmule/src/org/qcmg/qmule/GetBamRecords.java-- delete mode 100644 qmule/src/org/qcmg/qmule/GetInsetSize.java-- delete mode 100644 qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- delete mode 100644 qmule/src/org/qcmg/qmule/MAF2DCC1.java-- delete mode 100644 qmule/src/org/qcmg/qmule/Main.java-- delete mode 100644 qmule/src/org/qcmg/qmule/Messages.java-- delete mode 100644 qmule/src/org/qcmg/qmule/Options.java-- delete mode 100644 qmule/src/org/qcmg/qmule/Pileup.java-- delete mode 100644 qmule/src/org/qcmg/qmule/PileupStats.java-- delete mode 100644 qmule/src/org/qcmg/qmule/QMuleException.java-- delete mode 100644 qmule/src/org/qcmg/qmule/QueryCADDLib.java-- delete mode 100644 qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- delete mode 100644 qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- delete mode 100644 qmule/src/org/qcmg/qmule/ReadsAppend.java-- delete mode 100644 qmule/src/org/qcmg/qmule/RunGatk.java-- delete mode 100644 qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java-- delete mode 100644 qmule/src/org/qcmg/qmule/SnpToReferenceRegionFilter.java-- delete mode 100644 qmule/src/org/qcmg/qmule/SubSample.java-- delete mode 100644 qmule/src/org/qcmg/qmule/TestFileFinder.java-- delete mode 100644 qmule/src/org/qcmg/qmule/TestJarUpdate.java-- delete mode 100644 qmule/src/org/qcmg/qmule/TestSort.java-- delete mode 100644 qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- delete mode 100644 qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- delete mode 100644 qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- delete mode 100644 qmule/src/org/qcmg/qmule/XCvsZP.java-- delete mode 100644 qmule/src/org/qcmg/qmule/bam/CheckBam.java-- delete mode 100644 qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- delete mode 100644 qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- delete mode 100644 qmule/src/org/qcmg/qmule/qcnv/Main.java-- delete mode 100644 qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- delete mode 100644 qmule/src/org/qcmg/qmule/qcnv/Options.java-- delete mode 100644 qmule/src/org/qcmg/qmule/queryChrMT.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/Mule.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- delete mode 100644 qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- delete mode 100644 qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- delete mode 100644 qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- delete mode 100644 qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- delete mode 100644 qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- delete mode 100644 qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- delete mode 100644 qmule/test/org/qcmg/qmule/AlignerCompareTest.java-- delete mode 100644 qmule/test/org/qcmg/qmule/AnnotateDCCWithGFFRegionTest.java-- delete mode 100644 qmule/test/org/qcmg/qmule/BamCompressTest.java-- delete mode 100644 qmule/test/org/qcmg/qmule/BamMismatchCountsTest.java-- delete mode 100644 qmule/test/org/qcmg/qmule/ChrPosComparatorTest.java-- delete mode 100644 qmule/test/org/qcmg/qmule/CompareReferenceRegionsTest.java-- delete mode 100644 qmule/test/org/qcmg/qmule/DccToMafTest.java-- delete mode 100644 qmule/test/org/qcmg/qmule/IndelDCCHeaderTest.java-- delete mode 100644 qmule/test/org/qcmg/qmule/MAF2DCC1Test.java-- delete mode 100644 qmule/test/org/qcmg/qmule/TestThreading.java-- delete mode 100644 qmule/test/org/qcmg/qmule/WiggleFromPileupTakeTwoTest.java-- delete mode 100644 qmule/test/org/qcmg/qmule/WiggleFromPileupTest.java-- delete mode 100644 qmule/test/org/qcmg/qmule/snppicker/CompareSnpsTest.java-- delete mode 100644 qmule/test/org/qcmg/qmule/snppicker/GatkUniqueSnpsTest.java-- delete mode 100644 qmule/test/org/qcmg/qmule/util/IGVBatchFileGeneratorTest.java-- delete mode 100644 qmule/test/org/qcmg/qmule/util/TabbedDataLoaderTest.java-- diff --git a/qmule/src/org/qcmg/qmule/AlignerCompare.java-- b/qmule/src/org/qcmg/qmule/AlignerCompare.java-- deleted file mode 100644 index 5c8538a93..000000000 --- a/qmule/src/org/qcmg/qmule/AlignerCompare.java-- +++ /dev/null @@ -1,272 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.ArrayList; -import java.util.Objects; - -import htsjdk.samtools.SAMFileHeader.SortOrder; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordIterator; -import htsjdk.samtools.ValidationStringency; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.SAMOrBAMWriterFactory; - -public class AlignerCompare { - static QLogger logger = QLoggerFactory.getLogger(AlignerCompare.class); - boolean discardNonPrimary; - SamReader firReader; - SamReader secReader; - - SAMOrBAMWriterFactory sameWriter; - SAMOrBAMWriterFactory diffWriter_first; - SAMOrBAMWriterFactory diffWriter_second; - - SAMOrBAMWriterFactory unsureWriter_first; - SAMOrBAMWriterFactory unsureWriter_second; - - - long total_bam1 = 0; - long total_bam2 = 0; - long total_same = 0; - long noDiff_bam1 = 0; - long noDiff_bam2 = 0; - long noSecondary_bam1 = 0; - long nosupplementary_bam1 = 0; - long noSecondary_bam2 = 0; - long nosupplementary_bam2 = 0; - long nounsureAlignment = 0; - - - AlignerCompare(File firBam, File secBam, String prefix, boolean flag) throws Exception{ - //check inputs: sort by query name - firReader = SAMFileReaderFactory.createSAMFileReader(firBam, ValidationStringency.SILENT); - secReader = SAMFileReaderFactory.createSAMFileReader(secBam, ValidationStringency.SILENT); - discardNonPrimary = flag; - - if(! firReader.getFileHeader().getSortOrder().equals(SortOrder.queryname)) - throw new Exception("Please sort the input BAM by queryname: " + firBam.getAbsolutePath()); - - if(! secReader.getFileHeader().getSortOrder().equals(SortOrder.queryname)) - throw new Exception("Please sort the input BAM by queryname: " + secBam.getAbsolutePath()); - - - logger.info("input BAM1: " + firBam.getAbsolutePath()); - logger.info("input BAM2: " + secBam.getAbsolutePath()); - logger.info("discard secondary or supplementary alignments: " + String.valueOf(discardNonPrimary)); - - //create outputs - File outsame = new File(prefix + ".identical.bam" ); - File outdiff_first = new File(prefix + ".different.first.bam" ); - File outdiff_second = new File(prefix + ".different.second.bam" ); - - if(! firBam.getName().equals(secBam.getName())){ - outdiff_first = new File( prefix + ".different." + firBam.getName() ); - outdiff_second = new File( prefix + ".different." + secBam.getName() ); - } - - sameWriter = new SAMOrBAMWriterFactory(firReader.getFileHeader(), true, outsame); - diffWriter_first = new SAMOrBAMWriterFactory(firReader.getFileHeader(), true, outdiff_first ); - diffWriter_second = new SAMOrBAMWriterFactory(secReader.getFileHeader(), true, outdiff_second ); - - logger.info("output of identical alignments: " + outsame.getAbsolutePath()); - logger.info("output of different alignments from BAM1: " + outdiff_first.getAbsolutePath()); - logger.info("output of different alignments from BAM2: " + outdiff_second.getAbsolutePath()); - - //execute comparison - compareExecutor(); - - - //close IOs - firReader.close(); - secReader.close(); - sameWriter.closeWriter(); - diffWriter_first.closeWriter(); - diffWriter_second.closeWriter(); - - } - - void compareExecutor() throws Exception{ - ArrayList from1 = new ArrayList (); - ArrayList from2 = new ArrayList (); - SAMRecordIterator it1 = firReader.iterator(); - SAMRecordIterator it2 = secReader.iterator(); - //stats - long noRead = 0; - long noAlign1 = 1; - long noAlign2 = 1; - long noSame = 0; - - //initialize - SAMRecord record1 = it1.next(); - SAMRecord record2 = it2.next(); - String Id = record1.getReadName(); - from1.add(record1); - from2.add(record2); - - //get all aligner from same read - while( it1.hasNext() || it2.hasNext()){ - while(it1.hasNext()){ - noAlign1 ++; - record1 = it1.next() ; - if(record1.getReadName().equals(Id)){ - from1.add(record1); - }else //if not equals(Id) - break; - } //end while - - while( it2.hasNext() ){ - noAlign2 ++; - record2 = it2.next(); - if(record2.getReadName().equals(Id)){ - from2.add(record2); - }else - break; //exit while, record2 is read for next loop - } - //compare alignment in arraylist which filtered out secondary or supplenmentary alignments - noSame += classifyReads( AlignerFilter(from1, unsureWriter_first) , AlignerFilter(from2, unsureWriter_second) ); - - //clear arraylist and store current reads into arraylist for next loop - noRead ++; - from1.clear(); - from2.clear(); - from1.add(record1); - from2.add(record2); - Id = record1.getReadName(); - } - - logger.info(String.format("There are %d reads with %d alignments from BAM1", noRead, noAlign1)); - logger.info(String.format("There are %d reads with %d alignments from BAM2", noRead, noAlign2)); - logger.info(String.format("There are %d alignments are identical from both BAM", noSame)); - logger.info(String.format("Different alignments from BAM1 are %d, from BAM2 are %d", noDiff_bam1, noDiff_bam2)); - logger.info( String.format("discard %d secondary alignments and %d supplementary alignments from BAM1",noSecondary_bam1,nosupplementary_bam1)); - logger.info(String.format("discard %d secondary alignments and %d supplementary alignments from BAM2",noSecondary_bam2,nosupplementary_bam2)); - - - } - - /** - * - * @param from: an input alignments with same read id - * @return ArrayList : cleaned alignments excluding secondary and supplementary alignments - */ - ArrayList AlignerFilter(ArrayList from, SAMOrBAMWriterFactory factory) throws Exception{ - ArrayList cleaned = new ArrayList(); - - for(SAMRecord record : from) - if( discardNonPrimary && record.isSecondaryOrSupplementary()){ - if( record.getNotPrimaryAlignmentFlag()) - noSecondary_bam1 ++; - else if( record.getSupplementaryAlignmentFlag()) - nosupplementary_bam1 ++; - else - throw new Exception(record.getReadName() + " record flag error: record.isSecondaryOrSupplementary but not (secondary or supplementary) : " + record.getFlags()); - }else - cleaned.add(record); - -/* //record these multi alignments for further investigation - if(cleaned.size() != 2){ - for(SAMRecord record : cleaned){ - factory.getWriter().addAlignment(record); - nounsureAlignment ++; - - } - } -*/ - return cleaned; - } - - - int classifyReads(ArrayList from1, ArrayList from2) throws Exception{ - ArrayList toremove1 = new ArrayList(); - ArrayList toremove2 = new ArrayList(); - - for(SAMRecord record1 : from1){ - for(SAMRecord record2: from2){ - if(!record1.getReadName().equals(record2.getReadName())) - throw new Exception("error during process: reads with different name are store in arrayList for comparison: " - + record1.getReadName() + " != " + record2.getReadName() ) ; - if (record1.getFlags() == record2.getFlags() && - record1.getReferenceName().equals(record2.getReferenceName()) && - record1.getAlignmentStart() == record2.getAlignmentStart() && - record1.getAlignmentEnd() == record2.getAlignmentEnd() && - record1.getMappingQuality() == record2.getMappingQuality() && - record1.getCigarString().equals(record2.getCigarString()) && - Objects.equals(record1.getAttribute("MD") , record2.getAttribute("MD"))){ - sameWriter.getWriter().addAlignment(record1); - toremove1.add(record1); - toremove2.add(record2); - } - } - } - - //record the left differnt aligner - from1.removeAll(toremove1); - for(SAMRecord record1 : from1) - diffWriter_first.getWriter().addAlignment(record1); - - from2.removeAll(toremove2); - for(SAMRecord record2: from2) - diffWriter_second.getWriter().addAlignment(record2); - - //count unique alignment number - noDiff_bam1 += from1.size(); - noDiff_bam2 += from2.size(); - - return toremove1.size(); - } - - public static void main(String[] args) throws Exception{ - - Options op = new Options(AlignerCompare.class, args); - if(op.hasHelpOption()){ - System.out.println(Messages.getMessage("USAGE_AlignerCompare")); - op.displayHelp(); - System.exit(0); - } - - if( op.getInputFileNames().length != 2 - || op.getOutputFileNames().length != 1 ){ - System.err.println("improper parameters passed to command line, please refer to"); - System.out.println(Messages.getMessage("USAGE_AlignerCompare")); - op.displayHelp(); - System.exit(1); - } - - File f1 = new File(op.getInputFileNames()[0]); - File f2 = new File(op.getInputFileNames()[1]); - if(! f1.exists() || ! f2.exists()) - throw new Exception("input not exists: " + args[0] + " or " + args[1]); - - //assign to true if no "compareAll" option - boolean flag = ! op.hasCompareAllOption(); - - if(op.hasLogOption()) - logger = QLoggerFactory.getLogger(AlignerCompare.class,op.getLogFile(), op.getLogLevel()); - else - logger = QLoggerFactory.getLogger(AlignerCompare.class,op.getOutputFileNames()[0] + ".log", op.getLogLevel()); - - String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - logger.logInitialExecutionStats( "qmule " + AlignerCompare.class.getName(), version,args); - - long startTime = System.currentTimeMillis(); - AlignerCompare compare = new AlignerCompare( f1, f2, op.getOutputFileNames()[0], flag ); - - logger.info( String.format("It took %d hours, %d minutes to perform the comparison", - (int) (System.currentTimeMillis() - startTime) / (1000*60*60), - (int) ( (System.currentTimeMillis() - startTime) / (1000*60) ) % 60) ); - logger.logFinalExecutionStats(0); - - } - - -} diff --git a/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- b/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- deleted file mode 100644 index ee7a1eb00..000000000 --- a/qmule/src/org/qcmg/qmule/AnnotateDCCWithGFFRegions.java-- +++ /dev/null @@ -1,710 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.TreeMap; -import java.util.Vector; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrPositionName; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; - - -public class AnnotateDCCWithGFFRegions { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private List chromosomes = new ArrayList(); - private final int exitStatus = 0; - private Map> inputRecords = new HashMap>(); - private final Map> compareRecords = new HashMap>(); - private int overlapCount = 0; - private int notOverlappingCount = 0; - private int recordCount; - private Vector inputFileHeader = new Vector(); - private String inputFileType; - private String compareFileType; - private static QLogger logger; - private static final String MAF = "maf"; - private static final String GFF3 = "gff3"; - private static final String BED = "bed"; - private static final String VCF = "vcf"; - private static final String TAB = "txt"; - private static final String DCC1 = "dcc1"; - private static final String DCCQ = "dccq"; - private BufferedWriter outputFileWriter; - private File outputFile; - private String[] features; - private boolean stranded; - private final int GFF_STRAND_INDEX = 6; - private int DCC_STRAND_INDEX = -1; - private int QCMGFLAG_COLUMN_INDEX = -1; - private int REFERENCE_ALLELE_INDEX = -1; - private int TUMOUR_ALLELE_INDEX = -1; - private String annotation; - private int MUTATION_TYPE_INDEX; - //private static final int PATIENT_MIN = 5; - - public int engage() throws Exception { - - loadGFFFile(cmdLineInputFiles[1], compareRecords); - if (compareRecords.isEmpty()) { - logger.info("No positions loaded from gff file"); - } - - logger.info("Starting to process DCC records."); - - outputFile = new File(cmdLineOutputFiles[0]); - - outputFileWriter = new BufferedWriter(new FileWriter(outputFile)); - - inputFileType = null; - inputFileType = getFileType(cmdLineInputFiles[0]); - recordCount = loadDCCFile(cmdLineInputFiles[0], inputFileHeader, inputFileType); - logger.info("Finished processing DCC records."); - outputFileWriter.close(); - logger.info("SUMMARY"); - logger.info("Total DCC Records: " + recordCount); - logger.info("Total Records in supplied reference regions: " + overlapCount); - logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); - return exitStatus; - } - - private String getFileType(String fileName) throws QMuleException { - int index = fileName.lastIndexOf(".") + 1; - String name = fileName.substring(index, fileName.length()); - - if (name.equals("dcc")) { - return "dcc1"; - } - - if (!name.equals(DCC1) && !name.equals(DCCQ)) { - throw new QMuleException("FILE_TYPE_ERROR"); - } - - return name; - } - - private int loadGFFFile(String file, Map> records) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(file)); - int recordCount = 0; - try { - - Iterator iterator = reader.getRecordIterator(); - - while (iterator.hasNext()) { - - TabbedRecord tab = iterator.next(); - - if (tab.getData().startsWith("#")) { - continue; - } - recordCount++; - ChrPosition chrPos = getChrPosition(GFF3, tab, Integer.toString(recordCount)); - String key = chrPos.getChromosome().replace("chr", ""); - if (records.containsKey(key)) { - records.get(key).put(chrPos, tab); - } else { - TreeMap map = new TreeMap(); - map.put(chrPos, tab); - records.put(key,map); - } - if (!chromosomes.contains(key)) { - chromosomes.add(key); - } - } - } finally { - reader.close(); - } - - logger.info("loaded gff file, total records: " + recordCount); - return recordCount; - } - - private int loadDCCFile(String file, Vector header, String fileType) throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(file)); - - int recordCount = 0; - try { - - Iterator iterator = reader.getRecordIterator(); - - if (reader.getHeader() != null) { - Iterator iter = reader.getHeader().iterator(); - while (iter.hasNext()) { - header.add(iter.next()); - } - } - while (iterator.hasNext()) { - - TabbedRecord inputRecord = iterator.next(); - if (inputRecord.getData().startsWith("#") || inputRecord.getData().startsWith("Hugo") || inputRecord.getData().startsWith("analysis") || - inputRecord.getData().startsWith("mutation")) { - header.add(inputRecord.getData()); - continue; - } - - if (header.size() > 0) { - parseDCCHeader(header, fileType); - logger.info("Column of DCC file to annotate: " + QCMGFLAG_COLUMN_INDEX); - writeHeader(fileType, header); - header.clear(); - } - - recordCount++; - ChrPosition chrPos = getChrPosition(fileType, inputRecord, null); - String key = chrPos.getChromosome().replace("chr", ""); - TreeMap compareMap = compareRecords.get(key); - boolean isOverlapping = false; - if (compareMap != null) { - //check to see if it is overlapping with the comparison reference region - for (Entry compareEntry : compareMap.entrySet()) { - ChrPosition comparePos = compareEntry.getKey(); - if (comparePos.getEndPosition() < chrPos.getStartPosition()) { - continue; - } else if (comparePos.getStartPosition() > chrPos.getEndPosition()) { - break; - } else { - String[] vals = inputRecord.getDataArray(); - - if (annotation != null) { - String oldInfo = vals[QCMGFLAG_COLUMN_INDEX]; - if (!oldInfo.contains("GERM") && tabbedRecordMatchesCompareRecord(chrPos, inputRecord, compareEntry)) { - if (annotation != null && !oldInfo.contains("GERM")) { - if (annotateWithGermline(vals, compareEntry.getValue().getDataArray())) { - isOverlapping = true; - if (!oldInfo.equals("") && !oldInfo.endsWith(";")) { - oldInfo += ";"; - } - oldInfo += annotation; - inputRecord = buildOutputString(inputRecord, vals, oldInfo); - } - } - } - } else { - if (tabbedRecordFallsInCompareRecord(chrPos, inputRecord, compareEntry)) { - isOverlapping = true; - String oldInfo = vals[QCMGFLAG_COLUMN_INDEX]; - //annotate with gff feature - String feature = getFeatures(compareEntry.getValue()); - if (!oldInfo.equals("") && !oldInfo.endsWith(";") && !feature.equals("")) { - oldInfo += ";"; - } - oldInfo += feature; - inputRecord = buildOutputString(inputRecord, vals, oldInfo); - } - } - - } - } - } - - if (isOverlapping) { - overlapCount++; - } else { - notOverlappingCount++; - } - - writeRecord(inputRecord); - - if (recordCount % 50000 == 0) { - logger.info("Processed records: " + recordCount); - } - } - } finally { - reader.close(); - } - return recordCount; - } - - private TabbedRecord buildOutputString(TabbedRecord inputRecord, String[] vals, - String oldInfo) { - vals[QCMGFLAG_COLUMN_INDEX] = oldInfo; - String data= ""; - for (String s: vals) { - data += s + "\t"; - } - inputRecord.setData(data); - return inputRecord; - } - - private boolean annotateWithGermline(String[] inputValues, String[] gffValues) throws QMuleException { - String[] attribs = gffValues[getFeatureIndex("attribs")].split(";"); - String gffMotif = getGFF3Motif(attribs); - //int patientCount = getPatientCount(attribs); - if (gffMotif == null) { - String position = gffValues[0] + ":" + gffValues[3] + "-" + gffValues[4]; - throw new QMuleException("NULL_GFF_MOTIF", position); - } - String dccMotif = getDCCMotif(inputValues); - if ((dccMotif == null || gffMotif.equals(dccMotif))) { - return true; - } - - return false; - } - - private int getPatientCount(String[] attribs) { - for (String s: attribs) { - if (s.startsWith("PatientCount")) { - return new Integer(s.split("=")[1]); - } - } - return 0; - } - - private String getGFF3Motif(String[] attribs) { - - String referenceAllele = null; - String tumourAllele = null; - for (String s: attribs) { - if (s.startsWith("ReferenceAllele")) { - referenceAllele = s.split("=")[1]; - } - if (s.startsWith("TumourAllele")) { - tumourAllele = s.split("=")[1]; - } - } - - if (referenceAllele.contains("-") && !tumourAllele.contains("-")) { - return tumourAllele; - } - if (!referenceAllele.contains("-") && tumourAllele.contains("-")) { - return referenceAllele; - } - return null; - } - - private String getDCCMotif(String[] inputValues) { - String mutationType = inputValues[MUTATION_TYPE_INDEX]; - String refAllele = inputValues[REFERENCE_ALLELE_INDEX]; - String tumourAllele = inputValues[TUMOUR_ALLELE_INDEX]; - - if (mutationType.equals("2")) { - return tumourAllele; - } else if (mutationType.equals("3")) { - return refAllele; - } - return null; - } - - public void parseDCCHeader(List headers, String inputFileType) throws QMuleException { - - for (String header: headers) { - String[] values = header.split("\t"); - if (values.length == 28 && inputFileType.equals(DCC1) - || values.length == 39 && inputFileType.equals(DCCQ)) { - //check dcc header - for (int i=0; i compareEntry) { - if (compareEntry != null) { - ChrPosition compareChrPos = compareEntry.getKey(); - if ((inputChrPos.getStartPosition() == compareChrPos.getStartPosition() - && inputChrPos.getEndPosition() == compareChrPos.getEndPosition())) { - //check strand if this option is provided - if (stranded) { - String inputStrand = inputRecord.getDataArray()[DCC_STRAND_INDEX]; - String compareStrand = compareEntry.getValue().getDataArray()[GFF_STRAND_INDEX]; - if (inputStrand.equals(compareStrand)) { - return true; - } - } else { - return true; - } - } - } - return false; - } - - private boolean tabbedRecordFallsInCompareRecord(ChrPosition inputChrPos, TabbedRecord inputRecord, Entry entry) { - if (entry != null) { - ChrPosition compareChrPos = entry.getKey(); - if ((inputChrPos.getStartPosition() >= compareChrPos.getStartPosition() && inputChrPos.getStartPosition() <= compareChrPos.getEndPosition()) || - (inputChrPos.getEndPosition() >= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() <= compareChrPos.getEndPosition()) - || (inputChrPos.getStartPosition() <= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() >= compareChrPos.getEndPosition())) { - //check strand if this option is provided - if (stranded) { - String inputStrand = inputRecord.getDataArray()[DCC_STRAND_INDEX]; - String compareStrand = entry.getValue().getDataArray()[GFF_STRAND_INDEX]; - if (inputStrand.equals(compareStrand)) { - return true; - } - } else { - return true; - } - } - } - return false; - } - - public String[] getCmdLineInputFiles() { - return cmdLineInputFiles; - } - - public void setCmdLineInputFiles(String[] cmdLineInputFiles) { - this.cmdLineInputFiles = cmdLineInputFiles; - } - - - private void writeHeader(String file, Vector header) throws IOException { - - for (String h: header) { - outputFileWriter.write(h + "\n"); - } - } - - public List getChromosomes() { - return chromosomes; - } - - public void setChromosomes(List chromosomes) { - this.chromosomes = chromosomes; - } - - - public int getOverlapCount() { - return overlapCount; - } - - public void setOverlapCount(int overlapCount) { - this.overlapCount = overlapCount; - } - - public int getNotOverlappingCount() { - return notOverlappingCount; - } - - public void setNotOverlappingCount(int notOverlappingCount) { - this.notOverlappingCount = notOverlappingCount; - } - - public int getMafCount() { - return recordCount; - } - - public void setMafCount(int mafCount) { - this.recordCount = mafCount; - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(AnnotateDCCWithGFFRegions.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("AnnotateDCCWithGFFRegions", AnnotateDCCWithGFFRegions.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 2) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - cmdLineOutputFiles = options.getOutputFileNames(); - if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); - } - - for (String file : cmdLineOutputFiles) { - if (new File(file).exists() && !new File(file).isDirectory()) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); - } - } - features = options.getFeature(); - annotation = options.getAnnotation(); - if (features == null && annotation == null) { - logger.info("Features to annotate: " + "feature"); - } else if (features != null){ - String featureString = new String(); - for (String f : features) { - featureString += f; - } - logger.info("Features to annotate: " + featureString); - } - logger.info("Annotation is : " + annotation); - stranded = options.hasStrandedOption(); - if (options.getColumn() != null) { - this.QCMGFLAG_COLUMN_INDEX = new Integer(options.getColumn()) - 1; - } - - - - logger.info("Require matching strand: " + stranded); - logger.info("DCC file: " + cmdLineInputFiles[0]); - logger.info("GFF file: " + cmdLineInputFiles[1]); - - } - - return returnStatus; - } - - public static void main(String[] args) throws Exception { - AnnotateDCCWithGFFRegions sp = new AnnotateDCCWithGFFRegions(); - LoadReferencedClasses.loadClasses(AnnotateDCCWithGFFRegions.class); - sp.setup(args); - int exitStatus = sp.engage(); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - public String[] getCmdLineOutputFiles() { - return cmdLineOutputFiles; - } - - public void setCmdLineOutputFiles(String[] cmdLineOutputFiles) { - this.cmdLineOutputFiles = cmdLineOutputFiles; - } - - public Map> getInputRecords() { - return inputRecords; - } - - public void setInputRecords( - Map> inputRecords) { - this.inputRecords = inputRecords; - } - - public Vector getInputFileHeader() { - return inputFileHeader; - } - - public void setInputFileHeader(Vector inputFileHeader) { - this.inputFileHeader = inputFileHeader; - } - - public File getOutputFile() { - return outputFile; - } - - public int getREFERENCE_ALLELE_INDEX() { - return REFERENCE_ALLELE_INDEX; - } - - public void setREFERENCE_ALLELE_INDEX(int rEFERENCE_ALLELE_INDEX) { - REFERENCE_ALLELE_INDEX = rEFERENCE_ALLELE_INDEX; - } - - public int getTUMOUR_ALLELE_INDEX() { - return TUMOUR_ALLELE_INDEX; - } - - public void setTUMOUR_ALLELE_INDEX(int tUMOUR_ALLELE_INDEX) { - TUMOUR_ALLELE_INDEX = tUMOUR_ALLELE_INDEX; - } - - public int getMUTATION_TYPE_INDEX() { - return MUTATION_TYPE_INDEX; - } - - public void setMUTATION_TYPE_INDEX(int mUTATION_TYPE_INDEX) { - MUTATION_TYPE_INDEX = mUTATION_TYPE_INDEX; - } - - public void setOutputFile(File outputFile) { - this.outputFile = outputFile; - } - - public String getAnnotation() { - return this.annotation; - } - -} diff --git a/qmule/src/org/qcmg/qmule/BAM2CS.java-- b/qmule/src/org/qcmg/qmule/BAM2CS.java-- deleted file mode 100644 index 13d4d21f5..000000000 --- a/qmule/src/org/qcmg/qmule/BAM2CS.java-- +++ /dev/null @@ -1,183 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; - -import java.io.*; -import java.net.InetAddress; -import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.HashMap; -import java.util.Iterator; - -import org.qcmg.common.string.StringUtils; - - -public class BAM2CS { - File inBAM; - File outDir; - HashMap outFast = new HashMap(); - HashMap outQual = new HashMap(); - - - BAM2CS(final String[] args) throws Exception{ - inBAM = new File(args[0]); - outDir = new File(args[1]); - printHeader(null); - } - - /** - * retrive the CS and CQ value from BAM record to output csfasta or qual file - * @throws Exception - */ - void CreateCSfile() throws Exception{ - - SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault(); - SamReader reader = samReaderFactory.open(inBAM); - int num = 0; - for (SAMRecord record : reader) { - String id = ">" + record.getReadName(); - Add2Fasta(id, record.getAttribute("CS").toString()); - add2Qual(id, record.getAttribute("CQ").toString()); - num ++; - } - - reader.close(); - closeWriters(); - - System.out.println(getTime() + " total output records " + num); - System.exit(0); - } - - /** - * Add header information to Writer. If Writer is null, print to STD - * @param Writer - * @throws Exception - */ - private void printHeader(PrintWriter Writer) throws Exception{ - if(Writer == null){ - System.out.println(getTime() + " tool name: qmule org.qcmg.qmule.BAM2CS"); - System.out.println(getTime() + " host: " + InetAddress.getLocalHost().getHostName()); - System.out.println(getTime() + " input: " + inBAM.getAbsolutePath()); - System.out.println(getTime() + " output directory: " + outDir.getAbsolutePath()); - }else{ - Writer.println("#" + getTime() + " tool name: qmule org.qcmg.qmule.BAM2CS"); - Writer.println("#" + getTime() + " host: " + InetAddress.getLocalHost().getHostName()); - Writer.println("#" + getTime() + " input: " + inBAM.getAbsolutePath()); - } - } - - private void closeWriters(){ - //close all csfasta files - Iterator itr = outFast.values().iterator(); - while(itr.hasNext()){ - PrintWriter Writer = itr.next(); - Writer.close(); - } - - //close all qual files - itr = outQual.values().iterator(); - while(itr.hasNext()){ - PrintWriter Writer = itr.next(); - Writer.close(); - } - } - - /** - * Add raw color sequence into output csfasta; If the output file isn't exist, create a new one with header lines - * @param id - * @param seq - * @throws Exception - */ - private void Add2Fasta(String id, String seq) throws Exception{ - //sequence length should -1 since it start with 'T' or 'G' - int len = seq.length() - 1; - PrintWriter Writer; - - //get writer or create an new one - if(outFast.containsKey(len)){ - Writer = outFast.get(len); - }else{ - String fname = inBAM.getName(); - int index = fname.lastIndexOf('.'); - fname = fname.substring(0,index) + "." + len + ".csfasta"; - File csFile = new File(outDir, fname); - Writer = new PrintWriter(new FileWriter(csFile)); - outFast.put(len, Writer); - printHeader(Writer); - System.out.println(getTime() + " creating output: " + csFile.getAbsolutePath() ); - } - - Writer.println(id); - Writer.println(seq); - } - /** - * cover CQ value into raw qual sequence and addto output qual; - * If the output file isn't exist, create a new one with header lines. - * @param id - * @param seq - * @throws Exception - */ - void add2Qual(String id, String seq) throws Exception{ - int len = seq.length(); - PrintWriter writer; - - //get writer or create an new one - if(outQual.containsKey(len)){ - writer = outQual.get(len); - }else{ - String fname = inBAM.getName(); - int index = fname.lastIndexOf('.'); - fname = fname.substring(0,index) + "." + len + ".qual"; - File csFile = new File(outDir, fname); - writer = new PrintWriter(new FileWriter(csFile)); - outQual.put(len, writer); - printHeader(writer); - System.out.println(getTime() + " creating output: " + csFile.getAbsolutePath() ); - } - - //convert ascii to int - String qual = ""; - for(int i = 0; i < len; i ++){ - char c = seq.charAt(i); - int j = c; - - if(StringUtils.isNullOrEmpty(qual)){ - qual += j; - } else { - qual += " " + j; - } - } - - writer.println(id); - writer.println(qual); - - } - - private String getTime(){ - Calendar currentDate = Calendar.getInstance(); - SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); - return "[" + formatter.format(currentDate.getTime()) + "]"; - } - public static void main(final String[] args) throws IOException, InterruptedException { - - try{ - BAM2CS myCS = new BAM2CS(args); - myCS.CreateCSfile(); - System.exit(0); - }catch(Exception e){ - System.err.println(e.toString()); - Thread.sleep(1); - System.out.println("usage: qmule org.qcmg.qmule.BAM2CS "); - System.exit(1); - } - - } -} diff --git a/qmule/src/org/qcmg/qmule/BAMCompress.java-- b/qmule/src/org/qcmg/qmule/BAMCompress.java-- deleted file mode 100644 index 7ae4254a3..000000000 --- a/qmule/src/org/qcmg/qmule/BAMCompress.java-- +++ /dev/null @@ -1,156 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.List; - -import htsjdk.samtools.SAMFileWriter; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMFileWriterFactory; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.ValidationStringency; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.SAMOrBAMWriterFactory; - -public class BAMCompress { - static QLogger logger = QLoggerFactory.getLogger(BAMCompress.class); - private static File input; - private static File output; - private static int level; - - BAMCompress(File input, File output, int level) throws Exception{ - this.input = input; - this.output = output; - this.level = level; - - logger.info("input file: " + input.getAbsolutePath()); - logger.info("output file name: " + output.getAbsolutePath()); - logger.info("compress level for output BAM: " + level); - } - - public void replaceSeq() throws Exception{ - - SamReader reader = SAMFileReaderFactory.createSAMFileReader( input, ValidationStringency.SILENT); - SAMFileWriter writer = new SAMFileWriterFactory() .makeBAMWriter(reader.getFileHeader(), false, output, level); - - for( SAMRecord record : reader){ - //only replace fully mapped reads, that is no clipping, indels and pading - if( seekFullMppaed(record) && seekMismatch(record) ){ - byte[] base = record.getReadBases(); - for(int i = 0; i < base.length; i++) - base[i] = 'N'; - record.setReadBases(base); - } - - if(record.isValid() == null) // if valid - writer.addAlignment( record ); - } - - reader.close(); - writer.close(); - - logger.info( "input " + reportFileSize(input) ); - logger.info( "output " + reportFileSize(output) ); - - } - - public String reportFileSize(File f){ - - double bytes_in = f.length(); - double kilobytes = (bytes_in / 1024); - double megabytes = (kilobytes / 1024); - double gigabytes = (megabytes / 1024); - - return String.format("file size is %.2fG or %.2fK", gigabytes, kilobytes); - } - - - private boolean seekMismatch(SAMRecord r) { - String attribute = (String)r.getAttribute("MD"); - if (null != attribute) { - for (int i = 0, size = attribute.length() ; i < size ; ) { - char c = attribute.charAt(i); - if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { - return false; - } else if ( c == '^') { - //skip the insertion base - while (++i < size && Character.isLetter(attribute.charAt(i))) {} - } else i++; // need to increment this or could end up with infinite loop... - } - return true; - } - return false; - } - - private boolean seekFullMppaed(SAMRecord r){ - - if(r.getReadUnmappedFlag()) - return false; - - //reads with clips or indel, skips, pads - List ele = r.getCigar().getCigarElements(); - for (CigarElement element : r.getCigar().getCigarElements()){ - if( element.getLength() > 0){ - if(element.getOperator() == CigarOperator.H ||element.getOperator() == CigarOperator.S) { - return false; - }else if (element.getOperator() == CigarOperator.I ||element.getOperator() == CigarOperator.D){ - return false; - }else if (element.getOperator() == CigarOperator.P ||element.getOperator() == CigarOperator.N){ - return false; - } - } - } - - return true; - } - - - public static void main(String[] args) throws Exception{ - Options op = new Options(BAMCompress.class, args); - if(op.hasHelpOption()){ - System.out.println(Messages.getMessage("USAGE_BAMCompress")); - op.displayHelp(); - System.exit(0); - } - - String output = op.getOutputFileNames()[0]; - String input = op.getInputFileNames()[0]; - if(! new File(input).exists() ) - throw new Exception("input file not exists: " + args[0]); - - if(op.hasLogOption()) - logger = QLoggerFactory.getLogger(BAMCompress.class, op.getLogFile(), op.getLogLevel()); - else - logger = QLoggerFactory.getLogger(BAMCompress.class, output + ".log", op.getLogLevel()); - - String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - logger.logInitialExecutionStats( "qmule " + BAMCompress.class.getName(), version,args); - - int level = op.getcompressLevel(); //default compress level - - logger.logInitialExecutionStats( "qmule " + BAMCompress.class.getName(), null,args); - - long startTime = System.currentTimeMillis(); - BAMCompress compress = new BAMCompress(new File(input), new File(output) , level ); - compress.replaceSeq(); - - logger.info( String.format("It took %d hours, %d seconds to perform the compression", - (int) (System.currentTimeMillis() - startTime) / (1000*60*60), - (int) ( (System.currentTimeMillis() - startTime) / (1000*60) ) % 60) ); - logger.logFinalExecutionStats(0); - - } - - -} diff --git a/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- b/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- deleted file mode 100644 index 363f5ccbc..000000000 --- a/qmule/src/org/qcmg/qmule/BAMHeaderChecker.java-- +++ /dev/null @@ -1,250 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.sql.ResultSet; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMReadGroupRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -//import org.qcmg.db.ConnectionType; -//import org.qcmg.db.GeneusDBConnection; -import org.qcmg.picard.SAMFileReaderFactory; - -public class BAMHeaderChecker { - /* - private static final String SEPERATOR = "&"; - - private static QLogger logger; - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - - private final List bamFiles = new ArrayList(); - private List bamDirectories = new ArrayList(); - - private final Map results = new HashMap(); - - private int exitStatus; - - private int engage() throws Exception { - - bamDirectories = Arrays.asList(FileUtils.findDirectories(cmdLineInputFiles[0], "seq_final", true)); - - logger.info("Will check the following directories for bam files:"); - for (File f : bamDirectories) { - logger.info(f.getAbsolutePath()); - bamFiles.addAll(Arrays.asList(FileUtils.findFilesEndingWithFilter(f.getAbsolutePath(), ".bam"))); - } - - // only operates on seq_final bams -// bamFiles = Arrays.asList(FileUtils.findFiles(cmdLineInputFiles[0], ".bam")); - - // loop through each file and get patient, experiment and input_type - String patient = null; - String experiment = null; - String input = null; - - GeneusDBConnection conn = new GeneusDBConnection(ConnectionType.QCMG_MAPSET); - - try { - for (File bamFile : bamFiles) { - String bamFileName = bamFile.getAbsolutePath(); - logger.info("examining bam file: " + bamFileName); - String bamFileSmallName = bamFileName.substring(bamFileName.lastIndexOf(System.getProperty("file.separator")) + 1 , bamFileName.indexOf(".bam")); - - patient = bamFileSmallName.substring(0, 9); //APGI_1234 - experiment = bamFileSmallName.substring(10, bamFileSmallName.lastIndexOf(".")); //APGI_1234 - input = bamFileSmallName.substring(bamFileSmallName.lastIndexOf(".") + 1); //APGI_1234 - logger.info("patient: " + patient + ", experiment: " + experiment + ", input: " + input); - - // get details from bam header - List constituentFiles = getConstituentBamFiles(bamFile); - List trackliteConstituentFiles = getTrackliteBamFiles(patient, experiment, input, conn); - - //loop through tracklite constituentFiles and check that they all have an entry in bam header ConstituentFiles - for (String trackliteBam : trackliteConstituentFiles) { - String [] params = trackliteBam.split(SEPERATOR); - - String result = "OK"; - boolean trackliteMatch = false; - - for (String headerFileBam : constituentFiles) { - if (headerFileBam.contains(params[0]) && headerFileBam.contains(params[1])) { - trackliteMatch = true; - break; - } - } - - if ( ! trackliteMatch) { - result = "no corresponding entry in bam file header for tracklite details: " + params[0] + ":" + params[1]; - logger.warn(result); - } - results.put(bamFileSmallName, result); - } - } - } finally { - conn.closeConnection(); - } - - logger.info(""); - logger.info(""); - logger.info("SUMMARY:"); - for (Entry resultsEntry : results.entrySet()) { - logger.info(resultsEntry.getKey() + " : " + resultsEntry.getValue()); - } - logger.info("DONE"); - - return exitStatus; - } - - private List getTrackliteBamFiles(String patient, String experiment, String input, GeneusDBConnection conn) throws Exception { - List trackliteResults = new ArrayList (); - - String sql = "SELECT patient_id, run_name, barcode FROM tracklite_run tr, tracklite_sample ts" + - " WHERE tr.sample_id = ts.processing_id" + - " AND ts.patient_id = '" + patient.replace('_', '-') + "'" + - " AND tr.experiment_type = '" + experiment + "'" + - " AND tr.input_type = '" + input + "'" + - "AND tr.run_status = 'complete'"; - - ResultSet rs = null; - try { - rs = conn.executeSelectQuery(sql); - - while (rs.next()) { - String runName = rs.getString(2); - String barCode = rs.getString(3); - logger.debug("runName: " + runName + ", barCode: " + barCode); - trackliteResults.add(runName + SEPERATOR + barCode); - } - - } finally { - try { - if (null != rs && null != rs.getStatement() ) { - rs.getStatement().close(); - } - } finally { - if (null != rs) rs.close(); - } - } - - return trackliteResults; - } - - private List getConstituentBamFiles(File bamFile) { - List results = new ArrayList(); - SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFile); - try { - - SAMFileHeader header = reader.getFileHeader(); - // get the read groups - for (SAMReadGroupRecord readGroup : header.getReadGroups()) { - String constituentBamFile = readGroup.getAttribute("zc"); - if (null == constituentBamFile) - constituentBamFile = readGroup.getAttribute("ZC"); - - if (null != constituentBamFile) { - constituentBamFile = constituentBamFile.substring(2); - logger.debug("read group ZC attribute: " + constituentBamFile); - results.add(constituentBamFile); - } else { - logger.debug("null ZC attribute in file: " + bamFile.getAbsolutePath()); - } - } - - } finally { - reader.close(); - } - return results; - } - - - - - public static void main(String[] args) throws Exception { - BAMHeaderChecker sp = new BAMHeaderChecker(); - int exitStatus = 0; - try { - exitStatus = sp.setup(args); - } catch (Exception e) { - exitStatus = 1; - if (null != logger) - logger.error("Exception caught whilst running BAMHeaderChecker:", e); - else System.err.println("Exception caught whilst running BAMHeaderChecker"); - } - - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(BAMHeaderChecker.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("BAMHeaderChecker", BAMHeaderChecker.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - */ -} diff --git a/qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- b/qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- deleted file mode 100644 index b8646c1ee..000000000 --- a/qmule/src/org/qcmg/qmule/BAMPileupUtil.java-- +++ /dev/null @@ -1,124 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import htsjdk.samtools.Cigar; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; - -public class BAMPileupUtil { - - public static int SM_CUTOFF = 14; - public static int MD_CUTOFF = 3; - public static int CIGAR_CUTOFF = 34; - - public static int readLengthMatchCounter = 0; - public static int posiitonInDeletionCounter = 0; - - private static final QLogger logger = QLoggerFactory.getLogger(BAMPileupUtil.class); - - -// public static void examinePileup(List sams, VCFRecord record) { -//// int normalCoverage = 0; -// String pileup = ""; -// String qualities = ""; -// for (SAMRecord sam : sams ) { -// -// if ( eligibleSamRecord(sam)) { -//// ++normalCoverage; -// -// int offset = getReadPosition(sam, record.getPosition()); -// -// if (offset < 0) { -// logger.info("invalid offset position - position falls within deletion?? position: "+ record.getPosition() + ", alignment start: " + sam.getAlignmentStart() + ", alignment end: " + sam.getAlignmentEnd() + ", read length: " + sam.getReadLength() + " cigar: "+ sam.getCigarString()); -// continue; -// } -// -// if (offset >= sam.getReadLength()) { -//// throw new Exception("offset [position: " + record.getPosition() + ", read start pos(unclipped): " + sam.getUnclippedStart() + ", read end pos(unclipped): " + sam.getUnclippedEnd()+ "] is larger than read length!!!: " + sam.format()); -// // set to last entry in sequence -//// logger.info("adjusting offset to read length -1"); -//// String read = sam.getReadString(); -//// int refPosition = sam.getReferencePositionAtReadPosition(offset); -// logger.info("offset: " + offset + ", position: " + record.getPosition() + ", alignment start: " + sam.getAlignmentStart() + ", unclipped alignment start: " + sam.getUnclippedStart() + ", alignment end: " + sam.getAlignmentEnd()); -// logger.info( sam.format()); -//// offset = sam.getReadLength() -1; -//// logger.info("char at adjusted offset: " + read.charAt(offset)); -//// logger.info("md tag: " + sam.getStringAttribute("MD")); -// continue; -// } -// -// char c = sam.getReadString().charAt(offset); -// pileup += sam.getReadNegativeStrandFlag() ? Character.toLowerCase(c) : c; -// qualities += sam.getBaseQualityString().charAt(offset); -// } -// } -// -// -// if (pileup.length() > 0) -// record.setPileup(PileupUtil.getPileupCounts(pileup, qualities)); -// -// } - - /** - * Determines whether a sam record is eligible by applying some filtering criteria. - * Currently filters on the SM tag value, some of the flags, and the Cigar string - * - *

NOTE that we should also be filtering on MD tag, but GATK removes this - * tag when it does its local realignment, so there is no need to include this check for the time being - * - * @param record SAMRecord that is being put through the filter check - * @return boolean indicating if the record has passed the filter - */ - public static boolean eligibleSamRecord(SAMRecord record) { - if (null == record) return false; - Integer sm = record.getIntegerAttribute("SM"); - return ! record.getDuplicateReadFlag() - && (null == sm ? false : sm.intValue() > SM_CUTOFF) -// && tallyMDMismatches(record.getStringAttribute("MD")) < MD_CUTOFF // - && ((record.getReadPairedFlag() && record.getSecondOfPairFlag() && record.getProperPairFlag()) - || tallyCigarMatchMismatches(record.getCigar()) > CIGAR_CUTOFF); - - } - - public static int tallyCigarMatchMismatches(Cigar cigar) { - int tally = 0; - if (null != cigar) { - for (CigarElement element : cigar.getCigarElements()) { - if (CigarOperator.M == element.getOperator()) { - tally += element.getLength(); - } - } - } - return tally; - } - - public static int tallyMDMismatches(String mdData) { - int count = 0; - if (null != mdData) { - for (int i = 0, size = mdData.length() ; i < size ; ) { - - if (isValidMismatch(mdData.charAt(i))) { - count++; - i++; - } else if ('^' == mdData.charAt(i)) { - while (++i < size && Character.isLetter(mdData.charAt(i))) {} - } else i++; // need to increment this or could end up with infinite loop... - } - } - return count; - } - - private static boolean isValidMismatch(char c) { - return c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N'; - } - -} diff --git a/qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- b/qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- deleted file mode 100644 index 4501a5994..000000000 --- a/qmule/src/org/qcmg/qmule/BamMismatchCounts.java-- +++ /dev/null @@ -1,160 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.nio.file.StandardOpenOption; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; - -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.ValidationStringency; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.SAMFileReaderFactory; - -public class BamMismatchCounts { - static QLogger logger = QLoggerFactory.getLogger(BamMismatchCounts.class); - static long[] mismatch = new long[100]; - - static HashMap counts = new HashMap(); - static long total = 0; - static long unmapped = 0; - static long clipped = 0; - static long indel = 0; - static long skipPad = 0; - static long fullMapped = 0; - static long noMDreads = 0; - - /** - * count the mismatch base number based on the MD field - * @param r: samrecord - */ - private static void countMismatch(SAMRecord r) { - String attribute = (String)r.getAttribute("MD"); - if (null != attribute) { - int count = 0; - for (int i = 0, size = attribute.length() ; i < size ; ) { - char c = attribute.charAt(i); - if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { - count++; - i++; - } else if ( c == '^') { - //skip the insertion base - while (++i < size && Character.isLetter(attribute.charAt(i))) {} - } else i++; // need to increment this or could end up with infinite loop... - } - mismatch[count] ++; - - }else - noMDreads ++; - - - } - - /** - * - * @param r: sam record - * @return true if this read is full length mapped without any indels, skips and pads - */ - static private Boolean seekFullMapped(SAMRecord r){ - - if(r.getReadUnmappedFlag()){ - unmapped ++; - return false; - } - //reads with clips or indel, skips, pads - else{ - List ele = r.getCigar().getCigarElements(); - for (CigarElement element : r.getCigar().getCigarElements()){ - if( element.getLength() > 0){ - if(element.getOperator() == CigarOperator.H ||element.getOperator() == CigarOperator.S) { - clipped ++; - return false; - }else if (element.getOperator() == CigarOperator.I ||element.getOperator() == CigarOperator.D){ - indel ++; - return false; - }else if (element.getOperator() == CigarOperator.P ||element.getOperator() == CigarOperator.N){ - skipPad ++; - return false; - } - } - } - //count mismatch after the for loop - return true; - } - } - - /** - * survey the mismatch stats on full length mapped reads - * @param args: SAM/BAM file with full path, log file with full path - * @throws Exception - */ - public static void main(final String[] args) throws Exception { - Options op = new Options(BamMismatchCounts.class, args); - if(op.hasHelpOption()){ - System.out.println(Messages.getMessage("USAGE_BamMismatchCounts")); - op.displayHelp(); - System.exit(0); - } - - - if(op.hasLogOption()) - logger = QLoggerFactory.getLogger(BamMismatchCounts.class, op.getLogFile(), op.getLogLevel()); - else - logger = QLoggerFactory.getLogger(BamMismatchCounts.class,op.getOutputFileNames()[0] + ".log", op.getLogLevel()); - - String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - logger.logInitialExecutionStats( "qmule " + BamMismatchCounts.class.getName(), version,args); - - String output = op.getOutputFileNames()[0]; - String input = op.getInputFileNames()[0]; - SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(input), - ValidationStringency.SILENT); - - for(int i = 0; i < 100; i++) mismatch[i] = 0; - for (SAMRecord r : reader){ - total ++; - if(seekFullMapped( r)){ - fullMapped ++; - countMismatch(r); - } - } - reader.close(); - - //report mismatch - String S_mismatch = "mismatch matrix for fully mapped reads is below:\nmismatch\treads_number\tratio_to_(fullmapped,total)\n"; - for(int i = 0; i < 100; i++) - if(mismatch[i] > 0){ - int p1 = Math.round(mismatch[i] * 100 / fullMapped); - int p2 = Math.round(mismatch[i] * 100 / total); - S_mismatch += String.format("%d\t%d\t(%d%%,%d%%)\n", i,mismatch[i],p1, p2); - } - - Files.write(Paths.get(output), S_mismatch.getBytes() ); - - logger.info("total records in file: " + total ); - logger.info("unmapped records: " + unmapped); - logger.info("records with clipping (CIGAR S,H): " + clipped); - logger.info("records with indel (CIGAR I,D) : " + indel); - logger.info("records with skipping or padding (CIGAR N,P) : " + skipPad); - logger.info("records mapped full-length: " + fullMapped); - logger.info("records mapped full-length but missing MD field: " + noMDreads); - logger.info("the mismatch counts matrix is outputed to " + args[1]); - logger.logFinalExecutionStats(0); - - } - -} diff --git a/qmule/src/org/qcmg/qmule/BamRecordCounter.java-- b/qmule/src/org/qcmg/qmule/BamRecordCounter.java-- deleted file mode 100644 index d81e01a9c..000000000 --- a/qmule/src/org/qcmg/qmule/BamRecordCounter.java-- +++ /dev/null @@ -1,44 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; - -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.SAMFileReaderFactory; - -public class BamRecordCounter { - - private static final QLogger logger = QLoggerFactory.getLogger(BamRecordCounter.class); - - public static void main(String args[]) { - - if (null != args && args.length > 0) { - for (String filename : args) { - SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(filename)); - long count = 0; - long duplicates = 0; - long startTime = System.currentTimeMillis(); - for (SAMRecord r : reader) { - count++; - if (r.getDuplicateReadFlag()) - duplicates++; - } - logger.info("no of records in file [" + filename + "] is: " + count); - logger.info("no of duplicate records: " + duplicates); - logger.info("It took " + (System.currentTimeMillis() - startTime) + "ms to perform the count."); - } - } else { - logger.info("USAGE: qmule " + BamRecordCounter.class.getName() + " "); - } - } - -} diff --git a/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- b/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- deleted file mode 100644 index 3b3fbc798..000000000 --- a/qmule/src/org/qcmg/qmule/CompareReferenceRegions.java-- +++ /dev/null @@ -1,676 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; -import java.util.Map.Entry; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrPositionName; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; - - -public class CompareReferenceRegions { - - private static final String MODE_ONEWAY = "oneway"; - private static final String MODE_ANNOTATE = "annotate"; - private static final String MODE_TWOWAY = "twoway"; - private static final String MODE_INTERSECT = "intersect"; - private static final String MODE_UNIQUE = "unique"; - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private List chromosomes = new ArrayList(); - private int overlapCount = 0; - private int notOverlappingCount = 0; - private int recordCount; - private String mode; - private int column; - private String annotation; - private static QLogger logger; - private static final String MAF = "maf"; - private static final String GFF3 = "gff3"; - private static final String GTF = "gtf"; - private static final String BED = "bed"; - private static final String VCF = "vcf"; - private static final String TAB = "txt"; - private static final String DCC1 = "dcc1"; - - private void runOnewayComparison(File inputFile, File comparisonFile, - File outputOverlapFile, File outputNoOverlapFile) throws Exception { - - if (mode.equals(MODE_ANNOTATE)) { - logger.info("If overlapping, will annotate column: " + column+1 +" of file with the annotation " + annotation); - } - - //get a list of the chromosomes - setUp(inputFile, outputOverlapFile, outputNoOverlapFile); - - logger.info("Input file: " + inputFile.getAbsolutePath()); - logger.info("Comparison file: " + comparisonFile.getAbsolutePath()); - - logger.info("Chromosomes to analyze: " + chromosomes.size()); - - for (String c: chromosomes) { - logger.info("Getting records for chromosome: " + c); - Map inputRecords = readRecords(inputFile, c); - Map compareRecords = readRecords(comparisonFile, c); - compareRecords(inputRecords, compareRecords, outputOverlapFile, outputNoOverlapFile); - } - logSummary(); - clear(); - } - - private void logSummary() { - logger.info("SUMMARY"); - logger.info("Total Records: " + recordCount); - logger.info("Total Records in supplied reference regions: " + overlapCount); - logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); - } - - private void runAnnotateComparison(File inputFile, File comparisonFile, - File outputOverlapFile) throws Exception { - - //get a list of the chromosomes - setUp(inputFile, outputOverlapFile, null); - - logger.info("Input file: " + inputFile.getAbsolutePath()); - logger.info("Comparison file: " + comparisonFile.getAbsolutePath()); - - logger.info("Chromosomes to analyze: " + chromosomes.size()); - - for (String c: chromosomes) { - logger.info("Getting records for chromosome: " + c); - Map inputRecords = readRecords(inputFile, c); - Map compareRecords = readRecords(comparisonFile, c); - compareRecordsAndAnnotate(inputRecords, compareRecords, outputOverlapFile); - } - logSummary(); - clear(); - } - - private void runIntersectComparison() throws Exception { - //Set first input file as primary - File primaryInputFile = new File(cmdLineInputFiles[0]); - //Single output file - File outputFile = new File(cmdLineOutputFiles[0]); - - int[] counts = new int[cmdLineInputFiles.length]; - counts[0] = 0; - - setUp(primaryInputFile, outputFile, null); - - //logging - logger.info("Input file 1: " + primaryInputFile.getAbsolutePath()); - for (int i=1; i inputRecords = readRecords(primaryInputFile, c); - counts[0] += inputRecords.size(); - for (int i=1; i compareRecords = readRecords(compareFile, c); - counts[i] += compareRecords.size(); - compareOverlapRecords(c, inputRecords, compareRecords, getFileType(primaryInputFile)); - } - overlapCount += inputRecords.size(); - //any input records left at the end are intersecting - writeRecords(inputRecords, outputFile); - } - for (int i=0; i inputRecords = readRecords(primaryInputFile, c); - Map compareRecords = new TreeMap(); - counts[f] += inputRecords.size(); - for (int i=0; i currentRecords = readRecords(compareFile, c); - counts[i] = counts[i] + currentRecords.size(); - compareRecords.putAll(currentRecords); - } - } - compareOverlapRecords(c, inputRecords, compareRecords, getFileType(primaryInputFile)); - notOverlappingCount += inputRecords.size(); - //any input records left at the end are unique - writeRecords(inputRecords, outputFile); - logger.info(counts[f] + " total records for file " +cmdLineInputFiles[f]); - for (int i=0; i inputRecords, Map compareRecords, String inputFileType) throws Exception { - - Iterator> entries = inputRecords.entrySet().iterator(); - while (entries.hasNext()) { - Entry entry = entries.next(); - - boolean isOverlapping = compareRecord(entry, compareRecords, inputFileType); - - if (mode.equals(MODE_INTERSECT) && !isOverlapping) { - //remove input record if it isn't overlapping and won't intersect with all records - entries.remove(); - } - if (mode.equals(MODE_UNIQUE) && isOverlapping) { - entries.remove(); - } - } - } - - private void compareRecordsAndAnnotate(Map inputRecords, - Map compareRecords, - File outputOverlapFile) throws Exception { - BufferedWriter overlapWriter = new BufferedWriter(new FileWriter(outputOverlapFile, true)); - - try { - for (Entry entry : inputRecords.entrySet()) { - recordCount++; - boolean isOverlapping = compareRecord(entry, compareRecords, null); - - if (isOverlapping) { - overlapCount++; - } else { - notOverlappingCount++; - } - writeRecord(overlapWriter, entry.getValue()); - } - } finally { - overlapWriter.close(); - } - } - - private void compareRecords(Map inputRecords, - Map compareRecords, - File outputOverlapFile, File outputNoOverlapFile) throws Exception { - BufferedWriter overlapWriter = new BufferedWriter(new FileWriter(outputOverlapFile, true)); - BufferedWriter noOverlapWriter = new BufferedWriter(new FileWriter(outputNoOverlapFile, true)); - - try { - for (Entry entry : inputRecords.entrySet()) { - - recordCount++; - - boolean isOverlapping = compareRecord(entry, compareRecords, null); - - if (isOverlapping) { - overlapCount++; - writeRecord(overlapWriter, entry.getValue()); - } else { - notOverlappingCount++; - if (mode.equals(MODE_ANNOTATE)) { - - } else { - writeRecord(noOverlapWriter, entry.getValue()); - } - } - } - } finally { - overlapWriter.close(); - noOverlapWriter.close(); - } - } - - private boolean compareRecord(Entry entry, Map compareRecords, String inputFileType) throws Exception { - ChrPosition inputChrPos = entry.getKey(); - TabbedRecord inputRecord = entry.getValue(); - boolean isOverlapping = false; - //check to see if it is overlapping with the comparison reference region - for (Entry compareEntry : compareRecords.entrySet()) { - ChrPosition comparePos = compareEntry.getKey(); - if (comparePos.getEndPosition() < inputChrPos.getStartPosition()) { - continue; - } else if (comparePos.getStartPosition() > inputChrPos.getEndPosition()) { - break; - } else { - if (tabbedRecordFallsInCompareRecord(inputChrPos, inputRecord, compareEntry)) { - isOverlapping = true; - if (mode.equals(MODE_ANNOTATE)) { - String[] values = inputRecord.getDataArray(); - String oldVal = values[column]; - if (oldVal.equals("")) { - values[column] = annotation; - } else { - if (oldVal.endsWith(";")) { - values[column] = oldVal + annotation; - } else { - values[column] = oldVal + ";" + annotation; - } - } - String data = ""; - for (String s: values) { - data += s + "\t"; - } - inputRecord.setData(data); - } - if (mode.equals(MODE_INTERSECT)) { - //change the ends?? - int[] indexes = getChrIndex(inputFileType, entry.getValue().getData().split("\t")); - String[] array = inputRecord.getDataArray(); - - if (inputChrPos.getStartPosition() > compareEntry.getKey().getStartPosition()) { - array[indexes[1]] = Integer.toString(compareEntry.getKey().getStartPosition()); - } - if (inputChrPos.getEndPosition() < compareEntry.getKey().getEndPosition()) { - array[indexes[2]] = Integer.toString(compareEntry.getKey().getEndPosition()); - } - String data = ""; - for (String s: array) { - data += s + "\t"; - } - inputRecord.setData(data); - entry.setValue(inputRecord); - } - } - } - } - return isOverlapping; - } - - - private void writeRecords(Map records, File outputFile) throws IOException { - BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile, true)); - - for (Entry entry: records.entrySet()) { - writeRecord(writer, entry.getValue()); - } - writer.close(); - } - - private void writeRecord(BufferedWriter writer, TabbedRecord record) throws IOException { - if (!record.getData().endsWith("\n")) { - record.setData(record.getData() + "\n"); - } - writer.write(record.getData()); - } - - private TreeMap readRecords(File inputFile, String chromosome) throws Exception { - - TabbedFileReader reader = new TabbedFileReader(inputFile); - TreeMap records = new TreeMap(); - String fileType = getFileType(inputFile); - try { - - Iterator iterator = reader.getRecordIterator(); - - while (iterator.hasNext()) { - - TabbedRecord tab = iterator.next(); - if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo") || tab.getData().startsWith("analysis") || tab.getData().startsWith("Chr") ) { - continue; - } - ChrPosition chrPos = getChrPosition(fileType, tab); - if (chrPos.getChromosome().equals(chromosome)) { - records.put(chrPos, tab); - } - } - - } finally { - reader.close(); - } - - return records; - } - - private String getFileType(File inputFile) { - int index = inputFile.getName().lastIndexOf(".") + 1; - String name = inputFile.getName().substring(index, inputFile.getName().length()); - - if (name.equals("dcc")) { - return "dcc1"; - } - - return name; - } - - private void setUp(File file, File outputFileOne, File outputFileTwo) throws Exception { - TabbedFileReader reader = new TabbedFileReader(file); - Iterator iterator = reader.getRecordIterator(); - - String fileType = getFileType(file); - List header = new ArrayList(); - if (reader.getHeader() != null) { - Iterator iter = reader.getHeader().iterator(); - while (iter.hasNext()) { - header.add(iter.next()); - } - } - - while (iterator.hasNext()) { - - TabbedRecord tab = iterator.next(); - - if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo") || tab.getData().startsWith("analysis") || tab.getData().startsWith("Chr") ) { - header.add(tab.getData()); - continue; - } - - ChrPosition chrPos = getChrPosition(fileType, tab); - - if (!chromosomes.contains(chrPos.getChromosome())) { - chromosomes.add(chrPos.getChromosome()); - } - } - reader.close(); - - if (outputFileOne != null) { - writeHeader(header, outputFileOne); - } - if (outputFileTwo != null) { - writeHeader(header, outputFileTwo); - } - - } - - private int[] getChrIndex(String inputFileType, String[] values) throws Exception { - - int chrIndex = 0; - int startIndex = 0; - int endIndex = 0; - - if (inputFileType.equals(MAF)) { - chrIndex = 4; - startIndex = 5; - endIndex = 6; - } else if (inputFileType.equals(DCC1)) { - chrIndex = 4; - startIndex = 5; - endIndex = 6; - } else if (inputFileType.equals(BED)) { - chrIndex = 0; - startIndex = 1; - endIndex = 2; - } else if (inputFileType.equals(GFF3) || inputFileType.equals(GTF)) { - chrIndex = 0; - startIndex = 3; - endIndex = 4; - } else if (inputFileType.equals(VCF)) { - chrIndex = 0; - startIndex = 1; - endIndex = 1; - if (values.length >= 8) { - String[] infos = values[7].split("\t"); - - for (String info : infos) { - String[] params = info.split("="); - if (params.length == 2) { - if (params[0].equals("END")) { - endIndex = 2; - values[2] = params[1]; - } - } - } - } - //NEED TO CHANGE FOR INDELS - } else if (inputFileType.equals(TAB)) { - chrIndex = 0; - startIndex = 1; - endIndex = 2; - } else { - throw new Exception("Input file type is not recognized"); - } - int[] arr = {chrIndex, startIndex, endIndex}; - return arr; - } - - private ChrPosition getChrPosition(String inputFileType, TabbedRecord tab) throws Exception { - String[] values = tab.getData().split("\t"); - ChrPosition chr = null; - - int[] indexes = getChrIndex(inputFileType, values); - int chrIndex = indexes[0]; - int startIndex = indexes[1]; - int endIndex = indexes[2]; - - if (inputFileType.equals(BED)) { - chr = new ChrRangePosition(values[chrIndex], new Integer(values[startIndex])+1, new Integer(values[endIndex])+1); - } else { - String chromosome = values[chrIndex]; - if (!chromosome.contains("GL") && !chromosome.startsWith("chr")) { - chromosome = "chr" + chromosome; - } - if (chromosome.equals("chrM")) { - chromosome = "chrMT"; - } - if (inputFileType.equals(MAF)) { - chr = new ChrPositionName(chromosome, new Integer(values[startIndex]), new Integer(values[endIndex]), values[0]); - } else { - chr = new ChrRangePosition(chromosome, new Integer(values[startIndex]), new Integer(values[endIndex])); - } - } - return chr; - } - - private boolean tabbedRecordFallsInCompareRecord(ChrPosition inputChrPos, TabbedRecord inputRecord, Entry entry) { - if (entry != null) { - ChrPosition compareChrPos = entry.getKey(); - if ((inputChrPos.getStartPosition() >= compareChrPos.getStartPosition() && inputChrPos.getStartPosition() <= compareChrPos.getEndPosition()) || - (inputChrPos.getEndPosition() >= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() <= compareChrPos.getEndPosition()) - || (inputChrPos.getStartPosition() <= compareChrPos.getStartPosition() && inputChrPos.getEndPosition() >= compareChrPos.getEndPosition())) { - return true; - } - } - return false; - } - - public String[] getCmdLineInputFiles() { - return cmdLineInputFiles; - } - - public void setCmdLineInputFiles(String[] cmdLineInputFiles) { - this.cmdLineInputFiles = cmdLineInputFiles; - } - - - private void writeHeader(List header, File outputOverlapFile) throws IOException { - BufferedWriter writer = new BufferedWriter(new FileWriter(outputOverlapFile, true)); - - for (String h: header) { - - writer.write(h + "\n"); - } - writer.close(); - } - - public List getChromosomes() { - return chromosomes; - } - - public void setChromosomes(List chromosomes) { - this.chromosomes = chromosomes; - } - - - public int getOverlapCount() { - return overlapCount; - } - - public void setOverlapCount(int overlapCount) { - this.overlapCount = overlapCount; - } - - public int getNotOverlappingCount() { - return notOverlappingCount; - } - - public void setNotOverlappingCount(int notOverlappingCount) { - this.notOverlappingCount = notOverlappingCount; - } - - public int getMafCount() { - return recordCount; - } - - public void setMafCount(int mafCount) { - this.recordCount = mafCount; - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(CompareReferenceRegions.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CompareReferenceRegions", CompareReferenceRegions.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - //output files - cmdLineOutputFiles = options.getOutputFileNames(); - - if (cmdLineOutputFiles.length >= 1) { - if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); - } - - for (String file : cmdLineOutputFiles) { - if (new File(file).exists() && !new File(file).isDirectory()) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); - } - } - } - mode = options.getMode(); - if (mode == null) { - mode = MODE_ONEWAY; - } - logger.info("Mode: " + mode); - - if (mode.equals(MODE_ANNOTATE)) { - //take away 1 to get index of column rather than column number - column = new Integer(options.getColumn()) -1; - annotation = options.getAnnotation(); - } - - return engage(); - } - - return returnStatus; - } - - - private int engage() throws Exception { - - if (mode.equals(MODE_ONEWAY) || mode.equals(MODE_TWOWAY)) { - runOnewayComparison(new File(cmdLineInputFiles[0]), new File(cmdLineInputFiles[1]), new File(cmdLineOutputFiles[0]), new File(cmdLineOutputFiles[1])); - if (mode.equals(MODE_TWOWAY)) { - runOnewayComparison(new File(cmdLineInputFiles[1]), new File(cmdLineInputFiles[0]), new File(cmdLineOutputFiles[2]), new File(cmdLineOutputFiles[3])); - } - } else if (mode.equals(MODE_ANNOTATE)) { - runAnnotateComparison(new File(cmdLineInputFiles[0]), new File(cmdLineInputFiles[1]), new File(cmdLineOutputFiles[0])); - } else if (mode.equals(MODE_INTERSECT)) { - runIntersectComparison(); - } else if (mode.equals(MODE_UNIQUE)) { - runUniqueComparison(); - } else { - throw new QMuleException("MODE_ERROR", mode); - } - return 0; - } - - - private void clear() { - recordCount = 0; - overlapCount = 0; - notOverlappingCount = 0; - } - - public static void main(String[] args) throws Exception { - CompareReferenceRegions sp = new CompareReferenceRegions(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - -} diff --git a/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- b/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- deleted file mode 100644 index 84fc72a32..000000000 --- a/qmule/src/org/qcmg/qmule/DbSnpChrLiftover.java-- +++ /dev/null @@ -1,86 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.Set; -import java.util.TreeSet; - -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedFileWriter; -import org.qcmg.qmule.tab.TabbedHeader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class DbSnpChrLiftover { - - private static char TAB = '\t'; - - String inputVCF; - String outputVCF; - - - private final Set uniqueChrNames = new TreeSet(); - - public DbSnpChrLiftover() {} - - private void getUniqueChrNames() throws Exception { - TabbedFileReader reader = new TabbedFileReader(new File(inputVCF)); - TabbedFileWriter writer = new TabbedFileWriter(new File(outputVCF)); - try { - - TabbedHeader header = reader.getHeader(); - - // writer out header - writer.addHeader(header); - - for (TabbedRecord record : reader) { - String [] params = TabTokenizer.tokenize(record.getData()); - String chr = params[0]; - uniqueChrNames.add(chr); - - // switch the chr - params[0] = "chr" + chr; - - StringBuilder sb = new StringBuilder(); - for (int i = 0, len = params.length ; i < len ; i ++) { - sb.append(params[i]); - if (i < len-1) sb.append(TAB); - } - - record.setData(sb.toString()); - - writer.add(record); - } - - } finally { - try { - writer.close(); - } finally { - reader.close(); - } - } - - - for (String chr : uniqueChrNames) { - System.out.println("chr: " + chr); - } - } - - - public static void main(String[] args) throws Exception { - if (args.length < 2) - throw new IllegalArgumentException("USAGE: DbSnpChrLiftover "); - - DbSnpChrLiftover dcl = new DbSnpChrLiftover(); - - - dcl.inputVCF = args[0]; - dcl.outputVCF = args[1]; - - dcl.getUniqueChrNames(); - - } - -} diff --git a/qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- b/qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- deleted file mode 100644 index 71bd5e9cf..000000000 --- a/qmule/src/org/qcmg/qmule/GermlineDBStripper.java-- +++ /dev/null @@ -1,47 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.IOException; - -import org.qcmg.germlinedb.GermlineDBFileReader; -import org.qcmg.germlinedb.GermlineDBFileWriter; -import org.qcmg.germlinedb.GermlineDBRecord; - -public class GermlineDBStripper { - - - public static void main(String[] args) throws IOException { - - String germlineDB = args[0]; - String germlineDBClassA = args[1]; - String header = "analysis_id\tcontrol_sample_id\tvariation_id\tvariation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tquality_score\tprobability\tread_count\tis_annotated\tvalidation_status\tvalidation_platform\txref_ensembl_var_id\tnote\tflag"; - - GermlineDBFileReader reader = new GermlineDBFileReader(new File(germlineDB)); - GermlineDBFileWriter writer = new GermlineDBFileWriter(new File(germlineDBClassA)); - - try { - writer.add(header+"\n"); - - // strip out all non-classA entities from Germline_DB - int totalCount = 0, classACount = 0; - for (GermlineDBRecord record : reader) { - ++totalCount; - if ("--".equals(record.getFlag())) { - ++classACount; - writer.add(record.getData() + "\n"); - } - } - System.out.println("total count: " + totalCount + ", classA count: " + classACount); - - } finally { - try { - reader.close(); - } finally { - writer.close(); - } - } - } -} diff --git a/qmule/src/org/qcmg/qmule/GetBamRecords.java-- b/qmule/src/org/qcmg/qmule/GetBamRecords.java-- deleted file mode 100644 index 114351d71..000000000 --- a/qmule/src/org/qcmg/qmule/GetBamRecords.java-- +++ /dev/null @@ -1,226 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMUtils; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.picard.QJumper; - -public class GetBamRecords { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - List records; - - private String position; - - private int exitStatus; - private static QLogger logger; - - - public int engage() throws Exception { - - logger.info("Setting up the QJumper"); - QJumper jumper = new QJumper(); - jumper.setupReader(cmdLineInputFiles[0]); - - String contig = position.substring(0, position.indexOf(":")); - int start = Integer.parseInt(position.substring(position.indexOf(":")+1)); - - logger.info("config: " + contig); - logger.info("start: " + start); - - records = jumper.getOverlappingRecordsAtPosition(contig, start, start); - - logger.info("unfiltered read count: " + records.size()+ ""); - - int filteredCount = 0, readsWithBaseAtPosition = 0, duplicateCount = 0, properlyPaired = 0,properlyPairedAll = 0, pairedAll = 0, paired = 0, notPrimaryAlignment = 0, unmapped = 0; - String qualityString = "", qualityPhredString = ""; - String baseString = ""; - int unmappedSecondaryDuplicates = 0, unmappedSecondaryDuplicatesProperly = 0; - - char[] novelStartBases = new char[1024]; // hmmmmm - Set forwardStrand = new HashSet(); - Set reverseStrand = new HashSet(); - int j = 0; - - for (SAMRecord rec : records) { - int readPosition = org.qcmg.picard.util.SAMUtils.getIndexInReadFromPosition(rec, start); - if (readPosition >= 0 && readPosition < rec.getReadLength()) { - char c = rec.getReadString().charAt(readPosition); - if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N') { - readsWithBaseAtPosition++; - if (rec.getDuplicateReadFlag()) { - duplicateCount++; - } else { - byte [] baseQuals = SAMUtils.fastqToPhred(rec.getBaseQualityString()); - qualityPhredString +=baseQuals[readPosition] + ","; - baseString += (rec.getReadNegativeStrandFlag() ? Character.toLowerCase(c) : c) + ""; -// baseString += c + ","; - qualityString +=rec.getBaseQualityString().charAt(readPosition) + ""; - - if (rec.getMappingQuality() >= 10 && rec.getBaseQualities()[readPosition] >= 10) { - if (rec.getReadNegativeStrandFlag()) { - if (reverseStrand.add(rec.getAlignmentStart())) { - novelStartBases[j++] = c; - } - } else { - if (forwardStrand.add(rec.getAlignmentStart())) { - novelStartBases[j++] = c; - } - } - } - } - } - - if (rec.getReadPairedFlag()) { - paired++; - if ( rec.getProperPairFlag()) properlyPaired++; - - } - if (rec.getReadUnmappedFlag()) unmapped++; - if (rec.getReadUnmappedFlag()) unmapped++; - if (rec.getNotPrimaryAlignmentFlag()) notPrimaryAlignment++; - - - if ( ! rec.getDuplicateReadFlag() && ! rec.getNotPrimaryAlignmentFlag() && ! rec.getReadUnmappedFlag()) - unmappedSecondaryDuplicates++; - if ( ! rec.getDuplicateReadFlag() && ! rec.getNotPrimaryAlignmentFlag() && ! rec.getReadUnmappedFlag() - && (rec.getReadPairedFlag() ? rec.getProperPairFlag() : true)) -// && (rec.getReadPairedFlag() && rec.getProperPairFlag())) - unmappedSecondaryDuplicatesProperly++; - } - - if (rec.getReadPairedFlag()) { - pairedAll++; - if (rec.getProperPairFlag()) properlyPairedAll++; - } - - if (BAMPileupUtil.eligibleSamRecord(rec)) { - ++filteredCount; - logger.info("***" + rec.getSAMString()); - } else logger.info(rec.getSAMString()); - - - - } - - - logger.info("SUMMARY: "); - logger.info("Total no of records: " + records.size() ); - logger.info("No of records with a base at position: " + readsWithBaseAtPosition); - logger.info("No of duplicate records (that have a base at position): " + duplicateCount); - logger.info("No of unique records (that have a base at position): " + (readsWithBaseAtPosition-duplicateCount)); - logger.info("No of unique paired records (that have a base at position): " + paired); - logger.info("No of unique properly paired records (that have a base at position): " + properlyPaired); - logger.info("No of records not primary aligned (that have a base at position): " + notPrimaryAlignment); - logger.info("No of records not mapped (that have a base at position): " + unmapped); - logger.info("unmappedSecondaryDuplicates (that have a base at position): " + unmappedSecondaryDuplicates); - logger.info("unmappedSecondaryDuplicatesProperly (that have a base at position): " + unmappedSecondaryDuplicatesProperly); - logger.info("No of paired records (all): " + pairedAll); - logger.info("No of properly paired records (all): " + properlyPairedAll); - logger.info("Unique record bases: " + baseString.substring(0,baseString.length() > 0 ? baseString.length() : 0)); - logger.info("Unique record base qualities: " + qualityString.substring(0,qualityString.length() > 0 ? qualityString.length() : 0)); - logger.info("Unique record base qualities (phred): " + qualityPhredString.substring(0,qualityPhredString.length() > 0 ? qualityPhredString.length() : 0)); - logger.info("filtered read count: " + filteredCount + " out of " + records.size() ); - logger.info("Novel start bases: " + new String(novelStartBases)); - - jumper.closeReader(); - - writeToFile(); - - return exitStatus; - } - - private void writeToFile() { - if (null != cmdLineOutputFiles && cmdLineOutputFiles.length == 1) { - try (FileWriter writer = new FileWriter(cmdLineOutputFiles[0]);){ - for (SAMRecord rec : records) { - writer.write(rec.getSAMString()); - } - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - } - } - - - - public static void main(String[] args) throws Exception { - GetBamRecords sp = new GetBamRecords(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(GetBamRecords.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("GetBamRecords", GetBamRecords.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - position = options.getPosition(); - position = options.getPosition(); - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/GetInsetSize.java-- b/qmule/src/org/qcmg/qmule/GetInsetSize.java-- deleted file mode 100644 index 44d5cc8c6..000000000 --- a/qmule/src/org/qcmg/qmule/GetInsetSize.java-- +++ /dev/null @@ -1,35 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; - -import org.qcmg.picard.SAMFileReaderFactory; - -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -public class GetInsetSize { - public static void main(String[] args) throws Exception{ - - File input = new File(args[0]); - SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); //new SAMFileReader(input); - int min =3000; - int max = 0; - String aaa = "AAA"; - for( SAMRecord record : reader){ - - if(record.getAttribute("XC").equals(aaa)){ - int size = Math.abs( record.getInferredInsertSize()); - if(size > max) max = size; - if(size < min) min = size; - } - } - reader.close(); - System.out.println(String.format("Insert range %d-%d\n", min, max)); - } - -} diff --git a/qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- b/qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- deleted file mode 100644 index 408ef9027..000000000 --- a/qmule/src/org/qcmg/qmule/IndelDCCHeader.java-- +++ /dev/null @@ -1,395 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; - -import htsjdk.samtools.SAMFileHeader; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.meta.QDccMeta; -import org.qcmg.common.meta.QLimsMeta; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.util.QDccMetaFactory; -import org.qcmg.picard.util.QLimsMetaFactory; - -public class IndelDCCHeader { - - private String logFile; - private File somaticOutputFile; - private File germlineOutputFile; - private String mode; - private File normalBam; - private File tumourBam; - private String uuid; - private boolean qexecPresent = false; - private ArrayList qexec = new ArrayList(); - private boolean completeHeaderPresent = false; - private File somaticFile; - private File germlineFile; - private String tumourSampleId; - private String normalSampleId; - private static QLogger logger; - - public void setup(String args[]) throws Exception{ - - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(IndelDCCHeader.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("IndelDCCHeader", IndelDCCHeader.class.getPackage().getImplementationVersion(), args); - - // get list of file names - String[] cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 2) { - throw new QMuleException("INSUFFICIENT_INPUT_FILES"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - somaticFile = new File(cmdLineInputFiles[0]); - germlineFile = new File(cmdLineInputFiles[1]); - tumourBam = new File(options.getTumour()); - normalBam = new File(options.getNormal()); - - if ( ! FileUtils.canFileBeRead(tumourBam)) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , tumourBam.getAbsolutePath()); - } - if ( ! FileUtils.canFileBeRead(normalBam)) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , tumourBam.getAbsolutePath()); - } - - String[] cmdLineOutputFiles = options.getOutputFileNames(); - - somaticOutputFile = new File(cmdLineOutputFiles[0]); - germlineOutputFile = new File(cmdLineOutputFiles[1]); - - if (cmdLineOutputFiles.length != 2) { - throw new QMuleException("TOO_MANY_OUTPUTFILE"); - } - if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); - } - for (String file : cmdLineOutputFiles) { - if (new File(file).exists()) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); - } - } - - mode = options.getMode(); - - if (mode == null || (!mode.equals("pindel") && !mode.equals("gatk"))) { - throw new QMuleException("MODE_ERROR", mode); - } - - logger.info("Somatic input DCC: " + somaticFile.getAbsolutePath()); - logger.info("Germline input DCC: " + germlineFile.getAbsolutePath()); - logger.info("Output DCC: " + somaticOutputFile.getAbsolutePath()); - logger.info("Output DCC: " + germlineOutputFile.getAbsolutePath()); - logger.info("Tumour bam: " + tumourBam.getAbsolutePath()); - logger.info("Normal bam: " + normalBam.getAbsolutePath()); - logger.info("Mode: " + mode); - - } - } - - public int annotate() throws Exception { - //double check to make sure that uuid isn't already present - checkForUUid(); - - StringBuilder header = new StringBuilder(); - if (completeHeaderPresent) { - logger.info("UUid already present in header. No annotation is taking place"); - } else if (qexecPresent){ - StringBuilder sb = new StringBuilder(); - for (String s: qexec) { - sb.append(s + "\n"); - } - header.append(sb.toString()); - header.append(getDCCMeta()); - QLimsMeta tumour = QLimsMetaFactory.getLimsMeta("TEST", tumourBam.getAbsolutePath()); - tumourSampleId = tumour.getSample(); - header.append(tumour.getLimsMetaDataToString()); - QLimsMeta normal = QLimsMetaFactory.getLimsMeta("CONTROL", normalBam.getAbsolutePath()); - normalSampleId = normal.getSample(); - header.append(normal.getLimsMetaDataToString()); - //write somatic - writeOutputFile(header.toString(), somaticFile, somaticOutputFile, false); - //write germline - writeOutputFile(header.toString(), germlineFile, germlineOutputFile, true); - } - - return 0; - } - - public File getSomaticOutputFile() { - return somaticOutputFile; - } - - public void setSomaticOutputFile(File somaticOutputFile) { - this.somaticOutputFile = somaticOutputFile; - } - - public File getGermlineOutputFile() { - return germlineOutputFile; - } - - public void setGermlineOutputFile(File germlineOutputFile) { - this.germlineOutputFile = germlineOutputFile; - } - - public File getSomaticFile() { - return somaticFile; - } - - public void setSomaticFile(File somaticFile) { - this.somaticFile = somaticFile; - } - - public File getGermlineFile() { - return germlineFile; - } - - public void setGermlineFile(File germlineFile) { - this.germlineFile = germlineFile; - } - - public boolean isQexecPresent() { - return qexecPresent; - } - - public void setQexecPresent(boolean qexecPresent) { - this.qexecPresent = qexecPresent; - } - - public ArrayList getQexec() { - return qexec; - } - - public void setQexec(ArrayList qexec) { - this.qexec = qexec; - } - - public boolean isCompleteHeaderPresent() { - return completeHeaderPresent; - } - - public void setCompleteHeaderPresent(boolean completeHeaderPresent) { - this.completeHeaderPresent = completeHeaderPresent; - } - - public void checkForUUid() throws IOException, QMuleException { - BufferedReader reader = new BufferedReader(new FileReader(somaticFile)); - - String line; - boolean ddcMeta = false; - boolean uuidHere = false; - boolean uuidInResults = false; - qexec = new ArrayList(); - while((line = reader.readLine()) != null) { - if (line.startsWith("#") || line.startsWith("analysis")) { - if (line.contains("Uuid") || line.contains("uuid")) { - uuidHere = true; - } - if (line.startsWith("#Q_EXEC")) { - qexec.add(line); - } - if (line.startsWith("#Q_DCCMETA")) { - ddcMeta = true; - } - } else { - String[] values = line.split("\t"); - if (isCorrectUuidFormat(values[0])) { - uuidInResults = true; - } - } - } - reader.close(); - if (ddcMeta && uuidHere && uuidInResults) { - logger.info("Complete header already present."); - completeHeaderPresent = true; - } else if (uuidHere && qexec.size() == 14) { - qexecPresent = true; - logger.info("QExec header and uuid present."); - String q = ""; - for (String s: qexec) { - if (s.contains("Uuid")) { - q = s.replace("-", "_"); - String potentialUuid = s.split("\t")[2].replace("-", "_"); - if (isCorrectUuidFormat(potentialUuid)) { - uuid = potentialUuid; - } else { - logger.info("UUid was not correct format: " + potentialUuid); - throw new QMuleException("UUID_ERROR"); - } - } - } - qexec.remove(0); - qexec.add(0, q); - } else { - logger.info("Could not determine if UUid and DCC header is present"); - throw new QMuleException("UUID_ERROR"); - } - } - - public boolean isCorrectUuidFormat(String potentialUuid) { - if (potentialUuid.length() == 36 && potentialUuid.split("_").length == 5) { - return true; - } - return false; - } - - public String getDCCMeta() throws Exception { - SAMFileHeader tHeader = SAMFileReaderFactory.createSAMFileReader(tumourBam).getFileHeader(); - SAMFileHeader nHeader = SAMFileReaderFactory.createSAMFileReader(normalBam).getFileHeader(); - QDccMeta meta; - - meta = QDccMetaFactory.getDccMeta(uuid, nHeader, tHeader, mode); - return meta.getDCCMetaDataToString(); - } - - public void writeOutputFile(String header, File inputFile, File outputFile, boolean isGermline) throws IOException { - BufferedReader reader = new BufferedReader(new FileReader(inputFile)); - BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile)); - - if (!completeHeaderPresent) { - writer.write(header); - } - - String line; - while((line = reader.readLine()) != null) { - if (!line.startsWith("#") && !line.startsWith("analysis") && !completeHeaderPresent) { - writer.write(replaceIdsInLine(line, isGermline) + "\n"); - } else { - if (qexecPresent && !line.startsWith("#Q_EXEC")) { - writer.write(line + "\n"); - } - } - } - reader.close(); - writer.close(); - } - - public String getTumourSampleId() { - return tumourSampleId; - } - - public void setTumourSampleId(String tumourSampleId) { - this.tumourSampleId = tumourSampleId; - } - - public String getNormalSampleId() { - return normalSampleId; - } - - public void setNormalSampleId(String normalSampleId) { - this.normalSampleId = normalSampleId; - } - - public String replaceIdsInLine(String line, boolean isGermline) { - String[] values = line.split("\t"); - - StringBuilder sb = new StringBuilder(); - for (int i=0; i< values.length; i++) { - if (i==0 && !completeHeaderPresent) { - sb.append(uuid + "\t"); - } else if (i==1 && !completeHeaderPresent){ - if (isGermline) { - sb.append(normalSampleId + "\t"); - } else { - sb.append(tumourSampleId + "\t"); - } - } else if (i==2 && !completeHeaderPresent) { - String[] mutationStrs = values[i].split("_"); - String count = "_" + mutationStrs[mutationStrs.length-1]; - if (isGermline) { - sb.append(uuid + "_" + normalSampleId + count + "\t"); - } else { - sb.append(uuid + "_"+ tumourSampleId + count + "\t"); - } - } else { - sb.append(values[i] + "\t"); - } - } - return sb.toString(); - } - - public String getUuid() { - return uuid; - } - - public void setUuid(String uuid) { - this.uuid = uuid; - } - - public String getMode() { - return mode; - } - - public void setMode(String mode) { - this.mode = mode; - } - - public File getNormalBam() { - return normalBam; - } - - public void setNormalBam(File normalBam) { - this.normalBam = normalBam; - } - - public File getTumourBam() { - return tumourBam; - } - - public void setTumourBam(File tumourBam) { - this.tumourBam = tumourBam; - } - - public static void main(String[] args) throws Exception { - IndelDCCHeader sp = new IndelDCCHeader(); - LoadReferencedClasses.loadClasses(IndelDCCHeader.class); - sp.setup(args); - int exitStatus = sp.annotate(); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - -} diff --git a/qmule/src/org/qcmg/qmule/MAF2DCC1.java-- b/qmule/src/org/qcmg/qmule/MAF2DCC1.java-- deleted file mode 100644 index 998a34a10..000000000 --- a/qmule/src/org/qcmg/qmule/MAF2DCC1.java-- +++ /dev/null @@ -1,418 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedFileWriter; -import org.qcmg.qmule.tab.TabbedHeader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class MAF2DCC1 { - - private String logFile; - private File mafFile; - private final List dccFiles = new ArrayList(); - private File outputDccFile; - private static QLogger logger; - private Map> mafRecords = new HashMap<>(); - private int inputMafRecordCount; - private int[] mafColumnIndexes; - private int[] dccColumnIndexes; - private String mode; - - - public String getLogFile() { - return logFile; - } - - public File getMafFile() { - return mafFile; - } - - public File getOutputDccFile() { - return outputDccFile; - } - - public Map> getMafRecords() { - return mafRecords; - } - - public void setMafRecords(Map> mafRecords) { - this.mafRecords = mafRecords; - } - - public int[] getMafColumnIndexes() { - return mafColumnIndexes; - } - - public void setMafColumnIndexes(int[] mafColumnIndexes) { - this.mafColumnIndexes = mafColumnIndexes; - } - - public int[] getDccColumnIndexes() { - return dccColumnIndexes; - } - - public void setDccColumnIndexes(int[] dccColumnIndexes) { - this.dccColumnIndexes = dccColumnIndexes; - } - - public String getMode() { - return mode; - } - - public void setMode(String mode) { - this.mode = mode; - } - - public int getInputMafRecordCount() { - return inputMafRecordCount; - } - - public List getDccFiles() { - return dccFiles; - } - - public void setup(String args[]) throws Exception{ - - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(MAF2DCC1.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("MAF2DCC1", MAF2DCC1.class.getPackage().getImplementationVersion(), args); - - // get list of file names - String[] cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 2) { - throw new QMuleException("INSUFFICIENT_INPUT_FILES"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - mafFile = new File(cmdLineInputFiles[0]); - - for (int i=1; i 0) { - logger.warn("Could not find matches for the following records: "); - for (ChrPosition key : mafRecords.keySet()) { - logger.info("Missing at positions: " + key.toString()); - } - throw new QMuleException("MISSING_DCC_RECORDS", Integer.toString(mafRecords.size())); - } - - if (countInMaf != inputMafRecordCount || mafRecords.size() > 0) { - throw new QMuleException("COUNT_ERROR", Integer.toString(countInMaf), Integer.toString(inputMafRecordCount)); - } - - logger.info("Added " + countInMaf + " records to the dcc1 output file"); - - return 0; - } - - private void readMafFile() throws Exception { - TabbedFileReader reader = new TabbedFileReader(mafFile); - try { - int count = 0; - boolean checkForMissingColumnIndex = true; - for (TabbedRecord rec : reader) { - count++; - //header - if (rec.getData().startsWith("Hugo")) { - mafColumnIndexes = findColumnIndexesFromHeader(rec); - } else { - // only need to do this once - if (checkForMissingColumnIndex) { - if (missingColumnIndex(mafColumnIndexes)) { - throw new QMuleException("NO_COLUMN_INDEX", mafFile.getAbsolutePath()); - } - checkForMissingColumnIndex = false; - } - addToMafRecordMap(rec, count); - inputMafRecordCount++; - } - } - - logger.info("Number of input maf records: " + inputMafRecordCount); - - } finally { - reader.close(); - } - } - - private int compare(File dccFile, int count, TabbedFileWriter writer) throws Exception { - logger.info("Looking in dcc file: " + dccFile.getAbsolutePath()); - int countInMaf = 0; - int total = 0; - boolean checkForMissingColumnIndex = true; - - try (TabbedFileReader reader = new TabbedFileReader(dccFile);) { - if (count == 1) { - TabbedHeader header = reader.getHeader(); - writer.addHeader(header); - } - for (TabbedRecord rec : reader) { - //header - - if (rec.getData().startsWith("analysis_id")) { - //mutation id column - dccColumnIndexes = findColumnIndexesFromHeader(rec); - if (count == 1) { - writer.add(rec); - } - } else { - total++; - if (total % 10000 == 0) { - logger.info("Processed: " + total + " dcc records" ); - } - if (checkForMissingColumnIndex) { - if (missingColumnIndex(mafColumnIndexes)) { - throw new QMuleException("NO_MUTATION_ID", dccFile.getAbsolutePath()); - } - checkForMissingColumnIndex = false; - } - String[] strArray = rec.getDataArray(); - String chr = strArray[dccColumnIndexes[0]].replace("chr", ""); - if (chr.equals("M")) { - chr += "T"; - } - ChrPosition chrPos = new ChrRangePosition(chr, Integer.valueOf(strArray[dccColumnIndexes[1]]), Integer.valueOf(strArray[dccColumnIndexes[2]])); - if (recordInMaf(chrPos, rec)) { - writer.add(rec); - countInMaf++; - } - } - } - } - logger.info("Finished looking in dcc file: " + dccFile.getAbsolutePath() + " found " + countInMaf + " maf record/s." ); - return countInMaf; - } - - public void addToMafRecordMap(TabbedRecord rec, int count) throws QMuleException { - String[] strArray = rec.getDataArray(); - - //need to screw around with chr1 vs 1 vs chrMT vs chrM - String chr = strArray[mafColumnIndexes[0]].replace("chr", ""); - - if (chr.equals("M")) { - chr += "T"; - } - ChrPosition chrPos = new ChrRangePosition(chr, Integer.valueOf(strArray[mafColumnIndexes[1]]), Integer.valueOf(strArray[mafColumnIndexes[2]])); - - List recordsAtThisPosition = mafRecords.get(chrPos); - if (null == recordsAtThisPosition) { - recordsAtThisPosition = new ArrayList(2); - mafRecords.put(chrPos, recordsAtThisPosition); - } - recordsAtThisPosition.add(rec); - - } - - public boolean missingColumnIndex(int[] columnIndexes) throws QMuleException { - for (int i =0; i< columnIndexes.length; i++) { - if (columnIndexes[i] == -1) { - throw new QMuleException("NO_COLUMN_INDEX"); - } - } - return false; - } - - public int[] findColumnIndexesFromHeader(TabbedRecord rec) { - int[] mutationColumns = {-1, -1, -1, -1, -1, -1}; - String[] strArray = rec.getDataArray(); - for (int i=0; i recordsAtThisPosition = mafRecords.get(dccChrPos); - if (null != recordsAtThisPosition && ! recordsAtThisPosition.isEmpty()) { - - if (recordsAtThisPosition.size() > 1) { - logger.info("more than 1 record for position: " + dccChrPos); - } - - // check to see if any of the records match our dccRec - List recordsToRemove = new ArrayList<>(2); - - for (TabbedRecord tr : recordsAtThisPosition) { - if (matchOtherColumns(tr, dccRec)) { - matches++; - if (matches > 1) { - throw new QMuleException("T0O_MANY_MATCHES", dccChrPos.toString()); - } - - // remove record from array - recordsToRemove.add(tr); - matchFound = true; - } - } - - // remove records that have been matched - recordsAtThisPosition.removeAll(recordsToRemove); - - // check to see if there are any records left, if not, remove entry from map - if (recordsAtThisPosition.isEmpty()) { - mafRecords.remove(dccChrPos); - } - } - - return matchFound; - } - - public boolean matchOtherColumns(TabbedRecord mafRec, TabbedRecord dccRec) { - String[] mafValues = mafRec.getDataArray(); - String[] dccValues = dccRec.getDataArray(); - - if (mode.equals("snp")) { - if (matchingMutation(mafValues[mafColumnIndexes[3]], dccValues[dccColumnIndexes[3]])) { - return true; - } - } - if (mode.equals("indel")) { - if (matchingMutation(mafValues[mafColumnIndexes[3]], dccValues[dccColumnIndexes[3]]) && - mafValues[mafColumnIndexes[4]].equals(dccValues[dccColumnIndexes[4]]) && - mafValues[mafColumnIndexes[5]].equals(dccValues[dccColumnIndexes[5]])) { - return true; - } - } - - - return false; - } - - public boolean matchingMutation(String mafMutation, String dccMutation) { - if ((mafMutation.equals("SNP") && dccMutation.equals("1")) || - (mafMutation.equals("INS") && dccMutation.equals("2")) || - (mafMutation.equals("DEL") && dccMutation.equals("3"))) { - return true; - } - return false; - } - - public boolean match(ChrPosition mafChrPos, ChrPosition dccChrPos) { - if (mafChrPos.getChromosome().equals(dccChrPos.getChromosome()) - && mafChrPos.getStartPosition() == dccChrPos.getStartPosition() - && mafChrPos.getEndPosition() == dccChrPos.getEndPosition()) { - return true; - } - return false; - } - - - public static void main(String[] args) throws Exception { - MAF2DCC1 sp = new MAF2DCC1(); - LoadReferencedClasses.loadClasses(MAF2DCC1.class); - sp.setup(args); - - int exitStatus = sp.annotate(); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - -} diff --git a/qmule/src/org/qcmg/qmule/Main.java-- b/qmule/src/org/qcmg/qmule/Main.java-- deleted file mode 100644 index fc7560b17..000000000 --- a/qmule/src/org/qcmg/qmule/Main.java-- +++ /dev/null @@ -1,100 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - - - -/** - * The entry point for the command-line SAM/BAM merging tool. - */ -public final class Main { - -// enum Tool { -// GetBamRecords("org.qcmg.qmule.GetBamRecords"); -//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"), -//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"), -//// GetBamRecords("GetBamRecords", "org.qcmg.qmule.GetBamRecords"); -// -//// private final String name; -// private final String fullyQualifiedName; -// -// private Tool(String fullyQualifiedName) { -//// this.name = name; -// this.fullyQualifiedName = fullyQualifiedName; -// } -// -// public String getFullyQualifiedName() { -// return fullyQualifiedName; -// } -// public static Tool getTool(String name) { -// for (Tool t : Tool.values()) { -// if (name.equals(t.name())) return t; -// } -// throw new IllegalArgumentException("Tool not found: " + name); -// } -// } - - /** - * Performs a single merge based on the supplied arguments. Errors will - * terminate the merge and display error and usage messages. - * - * @param args - * the command-line arguments. - * @throws ClassNotFoundException - */ - public static void main(final String[] args) throws ClassNotFoundException { - Options options = null; - try { - options = new Options(args); - } catch (Exception e) { - e.printStackTrace(); - } - System.out.println(Messages.USAGE); - try { - options.displayHelp(); - } catch (Exception e) { - e.printStackTrace(); - } - -// String toolName = options.getToolName(); -// Tool t = Tool.getTool(toolName); -// Class tool = Class.forName(t.getFullyQualifiedName()); -// System.out.println("Class: " + tool.getCanonicalName()); -// // Create the array of Argument Types -// Class[] argTypes = { args.getClass()}; // array is Object! -// // Now find the method -// Method m = null; -// try { -// m = tool.getMethod("main", argTypes); -// } catch (SecurityException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } catch (NoSuchMethodException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } -// System.out.println(m); -// -// // Create the actual argument array -// Object passedArgv[] = { args }; -// -// // Now invoke the method. -// try { -// m.invoke(null, passedArgv); -// } catch (IllegalArgumentException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } catch (IllegalAccessException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } catch (InvocationTargetException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } - -//) Method m = tool.getMethod("main", Object.class); -// m.iinvoke(args); - System.exit(0); - } -} diff --git a/qmule/src/org/qcmg/qmule/Messages.java-- b/qmule/src/org/qcmg/qmule/Messages.java-- deleted file mode 100644 index 302f166f1..000000000 --- a/qmule/src/org/qcmg/qmule/Messages.java-- +++ /dev/null @@ -1,132 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.text.MessageFormat; -import java.util.ResourceBundle; - -/** - * Class used to lookup messages from this package's message bundles. - */ -public final class Messages { - - /** The Constant messages. */ - static final ResourceBundle messages = ResourceBundle - .getBundle("org.qcmg.qmule.messages"); - - /** The Constant ERROR_PREFIX. */ - static final String ERROR_PREFIX = getProgramName() + ": "; - - /** The Constant USAGE. */ - public static final String USAGE = getMessage("USAGE"); - - /** - * Gets the message. - * - * @param identifier the identifier - * @return the message - */ - public static String getMessage(final String identifier) { - return messages.getString(identifier); - } - - /** - * Gets the message. - * - * @param identifier the identifier - * @param argument the argument - * @return the message - */ - public static String getMessage(final String identifier, final String argument) { - final String message = Messages.getMessage(identifier); - Object[] arguments = { argument }; - return MessageFormat.format(message, arguments); - } - - /** - * Gets the message. - * - * @param identifier the identifier - * @param arg1 the arg1 - * @param arg2 the arg2 - * @return the message - */ - public static String getMessage(final String identifier, final String arg1, - final String arg2) { - final String message = Messages.getMessage(identifier); - Object[] arguments = { arg1, arg2 }; - return MessageFormat.format(message, arguments); - } - - /** - * Gets the message. - * - * @param identifier the identifier - * @param arg1 the arg1 - * @param arg2 the arg2 - * @param arg3 the arg3 - * @return the message - */ - public static String getMessage(final String identifier, final String arg1, - final String arg2, final String arg3) { - final String message = Messages.getMessage(identifier); - Object[] arguments = { arg1, arg2, arg3 }; - return MessageFormat.format(message, arguments); - } - - /** - * Gets the message. - * - * @param identifier the identifier - * @param arguments the arguments - * @return the message - */ - public static String getMessage(final String identifier, final Object[] arguments) { - final String message = Messages.getMessage(identifier); - return MessageFormat.format(message, arguments); - } - - /** - * Gets the program name. - * - * @return the program name - */ - static String getProgramName() { - return Messages.class.getPackage().getImplementationTitle(); - } - - /** - * Gets the program version. - * - * @return the program version - */ - static String getProgramVersion() { - return Messages.class.getPackage().getImplementationVersion(); - } - - /** - * Gets the version message. - * - * @return the version message - * @throws Exception the exception - */ - public static String getVersionMessage() throws Exception { - return getProgramName() + ", version " + getProgramVersion(); - } - - /** - * Reconstruct command line. - * - * @param args the args - * @return the string - */ - public static String reconstructCommandLine(final String[] args) { - String result = getProgramName() + " "; - for (final String arg : args) { - result += arg + " "; - } - return result; - } - -} diff --git a/qmule/src/org/qcmg/qmule/Options.java-- b/qmule/src/org/qcmg/qmule/Options.java-- deleted file mode 100644 index c83f4812d..000000000 --- a/qmule/src/org/qcmg/qmule/Options.java-- +++ /dev/null @@ -1,512 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import static java.util.Arrays.asList; - -import java.io.IOException; -import java.util.List; -import java.util.Properties; - -import joptsimple.OptionParser; -import joptsimple.OptionSet; - -/** - * The Class Options. - */ -public final class Options { - - public enum Ids{ - PATIENT, - SOMATIC_ANALYSIS, - GEMLINE_ANALYSIS, - TUMOUR_SAMPLE, - NORMAL_SAMPLE; - } - - /** The Constant HELP_DESCRIPTION. */ - private static final String HELP_DESCRIPTION = Messages - .getMessage("HELP_OPTION_DESCRIPTION"); - - /** The Constant VERSION_DESCRIPTION. */ - private static final String VERSION_DESCRIPTION = Messages - .getMessage("VERSION_OPTION_DESCRIPTION"); - - /** The Constant INPUT_DESCRIPTION. */ - private static final String INPUT_DESCRIPTION = Messages - .getMessage("INPUT_OPTION_DESCRIPTION"); - - /** The Constant OUTPUT_DESCRIPTION. */ - private static final String OUTPUT_DESCRIPTION = Messages - .getMessage("OUTPUT_OPTION_DESCRIPTION"); - - /** The parser. */ - private final OptionParser parser = new OptionParser(); - - /** The options. */ - private final OptionSet options; - - /** The command line. */ - private final String commandLine; - - /** The input file names. */ - private final String[] inputFileNames; - - /** The output file names. */ - private final String[] outputFileNames; - - /** The log file */ - private String logFile; - - /** The log level */ - private String logLevel; - - private String patientId; - private String somaticAnalysisId; - private String germlineAnalysisId; - private String normalSampleId; - private String tumourSampleId; - private String position; - private String pileupFormat; - private int normalCoverage; - private int numberOfThreads; - private int tumourCoverage; - private int minCoverage; - private String mafMode; - private String gff; - private String fasta; - private String[] gffRegions; - private int noOfBases; - private String mode; - - - private String column; - - private String annotation; - - private String features; - - private String tumour; - - private String normal; - - private String analysis; - - /** - * Instantiates a new options. - * - * @param args the args - * @throws Exception the exception - */ - @SuppressWarnings("unchecked") - public Options(final String[] args) throws Exception { - commandLine = Messages.reconstructCommandLine(args); - -// parser.accepts("qmule", "Tool").withRequiredArg().ofType(String.class).describedAs("tool name"); - parser.accepts("output", OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); - parser.accepts("input", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("inputfile"); - parser.accepts("log", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("logfile"); - parser.accepts("loglevel", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("loglevel"); - parser.accepts("help", HELP_DESCRIPTION); - parser.accepts("version", VERSION_DESCRIPTION); - parser.accepts("patientId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("patientId"); - parser.accepts("somaticAnalysisId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("somaticAnalysisId"); - parser.accepts("germlineAnalysisId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("germlineAnalysisId"); - parser.accepts("normalSampleId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("normalSampleId"); - parser.accepts("tumourSampleId", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("tumourSampleId"); - parser.accepts("position", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("position"); - parser.accepts("pileupFormat", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("pileupFormat"); - parser.accepts("normalCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) - .describedAs("normalCoverage"); - parser.accepts("numberOfThreads", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) - .describedAs("numberOfThreads"); - parser.accepts("tumourCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) - .describedAs("tumourCoverage"); - parser.accepts("minCoverage", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class) - .describedAs("minCoverage"); - parser.accepts("mafMode", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("mafMode"); - parser.accepts("mode", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("mode"); - parser.accepts("column", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("column"); - parser.accepts("annotation", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("annotation"); - parser.accepts("gffFile", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("gffFile"); - parser.accepts("fasta", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("fasta"); - parser.accepts("feature", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("feature"); - parser.accepts("tumour", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("tumour"); - parser.accepts("normal", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("normal"); - parser.accepts("analysis", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class) - .describedAs("analysis"); - parser.accepts("verifiedInvalid", INPUT_DESCRIPTION); - parser.accepts("gffRegions", INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).withValuesSeparatedBy(',').describedAs("gffRegions"); - parser.accepts("noOfBases", INPUT_DESCRIPTION).withRequiredArg().ofType(Integer.class).describedAs("noOfBases"); - parser.accepts("proportion", Messages - .getMessage("PROPORTION_OPTION_DESCRIPTION")).withRequiredArg().ofType(Double.class); - parser.accepts("stranded", Messages - .getMessage("STRANDED_OPTION_DESCRIPTION")); - parser.accepts("compareAll",Messages.getMessage("COMPAREALL_OPTION")); - - parser.posixlyCorrect(true); - options = parser.parse(args); - - List inputList = options.valuesOf("input"); - inputFileNames = new String[inputList.size()]; - inputList.toArray(inputFileNames); - - List outputList = options.valuesOf("output"); - outputFileNames = new String[outputList.size()]; - outputList.toArray(outputFileNames); - - logFile = (String) options.valueOf("log"); - logLevel = (String) options.valueOf("loglevel"); - - patientId = (String) options.valueOf("patientId"); - somaticAnalysisId = (String) options.valueOf("somaticAnalysisId"); - germlineAnalysisId = (String) options.valueOf("germlineAnalysisId"); - normalSampleId = (String) options.valueOf("normalSampleId"); - tumourSampleId = (String) options.valueOf("tumourSampleId"); - - // WiggleFromPileup specific options - pileupFormat = (String) options.valueOf("pileupFormat"); - if (null != options.valueOf("normalCoverage")) - normalCoverage = (Integer) options.valueOf("normalCoverage"); - if (null != options.valueOf("tumourCoverage")) - tumourCoverage = (Integer) options.valueOf("tumourCoverage"); - // end of WiggleFromPileup specific options - - //compareReferenceRegions - mode = (String) options.valueOf("mode"); - column = (String) options.valueOf("column"); - annotation = (String) options.valueOf("annotation"); - features = (String) options.valueOf("feature"); - position = (String) options.valueOf("position"); - mafMode = (String) options.valueOf("mafMode"); - - gff = (String) options.valueOf("gffFile"); - fasta = (String) options.valueOf("fasta"); - - tumour = (String) options.valueOf("tumour"); - normal = (String) options.valueOf("normal"); - analysis = (String) options.valueOf("analysis"); - - // gffRegions - List gffRegionsArgs = (List) options.valuesOf("gffRegions"); - gffRegions = new String[gffRegionsArgs.size()]; - gffRegionsArgs.toArray(gffRegions); - - // MafAddCPG specific - if (null != options.valueOf("noOfBases")) - noOfBases = (Integer) options.valueOf("noOfBases"); - - // qsignature - if (null != options.valueOf("minCoverage")) - minCoverage = (Integer) options.valueOf("minCoverage"); - - if (null != options.valueOf("numberOfThreads")) - numberOfThreads = (Integer) options.valueOf("numberOfThreads"); - - } - - /** - * - * @param className - * @param args - * @throws Exception - */ - public Options( final Class myclass, final String[] args) throws Exception { - commandLine = Messages.reconstructCommandLine(args); - - parser.acceptsAll( asList("h", "help"), HELP_DESCRIPTION ); -// parser.acceptsAll( asList("v", "version"), VERSION_DESCRIPTION); - parser.acceptsAll( asList("i", "input"), INPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("input"); - parser.acceptsAll(asList("o", "output"), OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); - parser.accepts("log", Messages.getMessage("LOG_OPTION_DESCRIPTION")).withRequiredArg().ofType(String.class).describedAs("logfile"); - parser.accepts("loglevel", Messages.getMessage("LOGLEVEL_OPTION_DESCRIPTION")).withRequiredArg().ofType(String.class).describedAs("loglevel"); - - if( myclass.equals(AlignerCompare.class) ){ - parser.accepts("compareAll",Messages.getMessage("COMPAREALL_OPTION")); - parser.acceptsAll( asList("o", "output"), Messages.getMessage("OUTPUT_AlignerCompare")).withRequiredArg().ofType(String.class).describedAs("output"); - }else if(myclass.equals(SubSample.class)) { - parser.accepts("proportion",Messages.getMessage("PROPORTION_OPTION_DESCRIPTION")).withRequiredArg().ofType(Double.class).describedAs("[0,1]"); - }else if(myclass.equals(BAMCompress.class)){ - parser.accepts("compressLevel",Messages.getMessage("COMPRESS_LEVEL_DESCRIPTION") ).withRequiredArg().ofType(Integer.class).describedAs("[0,9]"); - } - - - //else if( myclass.equals(BamMismatchCounts.class)){} - - options = parser.parse(args); - - List inputList = options.valuesOf("input"); - inputFileNames = new String[inputList.size()]; - inputList.toArray(inputFileNames); - - List outputList = options.valuesOf("output"); - outputFileNames = new String[outputList.size()]; - outputList.toArray(outputFileNames); - - } - - public String getTumour() { - return tumour; - } - - public void setTumour(String tumour) { - this.tumour = tumour; - } - - public String getNormal() { - return normal; - } - - public void setNormal(String normal) { - this.normal = normal; - } - - public String getAnalysis() { - return analysis; - } - - public void setAnalysis(String analysis) { - this.analysis = analysis; - } - - /** - * Checks for input option. - * - * @return true, if successful - */ - public boolean hasInputOption() { - return options.has("input"); - } - - /** - * Checks for output option. - * - * @return true, if successful - */ - public boolean hasOutputOption() { - return options.has("o") || options.has("output"); - } - - /** - * Checks for version option. - * - * @return true, if successful - */ - public boolean hasVersionOption() { - return options.has("version"); - } - - public boolean getIncludeInvalid() { - return options.has("verifiedInvalid"); - } - - /** - * Checks for help option. - * - * @return true, if successful - */ - public boolean hasHelpOption() { - return options.has("help"); - } - - public boolean hasCompareAllOption() { - return options.has("compareAll"); - } - - /** - * Checks for log option. - * - * @return true, if successful - */ - public boolean hasLogOption() { - return options.has("log"); - } - - /** - * Checks for non options. - * - * @return true, if successful - */ - public boolean hasNonOptions() { - return 0 != options.nonOptionArguments().size(); - } - - /** - * Gets the input file names. - * - * @return the input file names - */ - public String[] getInputFileNames() { - return inputFileNames; - } - - /** - * Gets the output file names. - * - * @return the output file names - */ - public String[] getOutputFileNames() { - return outputFileNames; - } - - /** - * Gets the command line. - * - * @return the command line - */ - public String getCommandLine() { - return commandLine; - } - - public boolean hasStrandedOption() { - return options.has("stranded"); - } - - public String getPosition() { - return position; - } - public String getPileupFormat() { - return pileupFormat; - } - public int getNormalCoverage() { - return normalCoverage; - } - public int getTumourCoverage() { - return tumourCoverage; - } - public int getMinCoverage() { - return minCoverage; - } - public int getNumberOfThreads() { - return numberOfThreads; - } - public String getMafMode() { - return mafMode; - } - public String getGffFile() { - return gff; - } - public String getFastaFile() { - return fasta; - } - - public String getMode() { - return mode; - } - - public int getcompressLevel() throws Exception{ - if(options.has("compressLevel")){ - int l = (int) options.valueOf("compressLevel"); - if(l >= 0 && l <= 9) - return l; - else - throw new Exception("compressLevel must between [0,9]"); - } - - return 5; - } - //subSample - public double getPROPORTION() throws Exception{ - if(options.has("proportion")){ - - double prop = (double) options.valueOf("proportion"); -// double prop = Double.parseDouble( (String) options.valueOf("proportion") ); - if(prop > 0 && prop <= 1){ - return prop; - - } - } - throw new Exception("no proportion are specified"); - } - - - /** - * Display help. - * - * @throws Exception the exception - */ - public void displayHelp() throws IOException { - parser.printHelpOn(System.out); - } - - /** - * Detect bad options. - * - * @throws Exception the exception - */ - public void detectBadOptions() throws Exception { - if (hasNonOptions()) { - throw new Exception("ALL_ARGUMENTS_MUST_BE_OPTIONS"); - } - if (hasOutputOption() && 1 != getOutputFileNames().length) { - throw new Exception("MULTIPLE_OUTPUT_FILES_SPECIFIED"); - } - if (!hasInputOption()) { - throw new Exception("MISSING_INPUT_OPTIONS"); - } - } - - public String getLogFile(){ - return logFile; - } - - public String getLogLevel(){ - return logLevel; - } - - public Properties getIds() { - Properties props = new Properties(); - props.put(Ids.PATIENT, patientId); - props.put(Ids.SOMATIC_ANALYSIS, somaticAnalysisId); - props.put(Ids.GEMLINE_ANALYSIS, germlineAnalysisId); - props.put(Ids.NORMAL_SAMPLE, normalSampleId); - props.put(Ids.TUMOUR_SAMPLE, tumourSampleId); - return props; - } - - public String[] getGffRegions() { - - return gffRegions; - } - - public int getNoOfBases() { - - return noOfBases; - } - - public String getColumn() { - return column; - } - - public String getAnnotation() { - return annotation; - } - - public String[] getFeature() { - if (features != null) { - return features.split(","); - } - return null; - } - -} diff --git a/qmule/src/org/qcmg/qmule/Pileup.java-- b/qmule/src/org/qcmg/qmule/Pileup.java-- deleted file mode 100644 index c1503ab6a..000000000 --- a/qmule/src/org/qcmg/qmule/Pileup.java-- +++ /dev/null @@ -1,101 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.Comparator; -import java.util.Iterator; -import java.util.Map; -import java.util.TreeMap; -import java.util.Map.Entry; - -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrPositionComparator; -import org.qcmg.common.model.QPileupSimpleRecord; -import org.qcmg.picard.SAMFileReaderFactory; - - -public class Pileup { - private static final Comparator COMPARATOR = new ChrPositionComparator(); - private static QLogger logger = QLoggerFactory.getLogger(Pileup.class); - - Map pileup = new TreeMap(); -// Map pileup = new HashMap(10000000, 0.99f); - - private void engage(String args[]) throws IOException { - - SamReader reader = SAMFileReaderFactory.createSAMFileReader(new File(args[0])); - FileWriter writer = new FileWriter(new File(args[1])); - - int counter = 0; - for (SAMRecord sr : reader) { - parseRecord(sr); - if (++counter % 100000 == 0) { - logger.info("hit " + counter + " reads in bam file, size of pileup map is: " + pileup.size()); - - // output contents of pileup to file to clear memory - // get current chromosome and position an write out - //all records a couple of hundred bases prior to that position - writePileup(writer, sr.getReferenceName(), sr.getAlignmentStart() - 500); - } - } - logger.info("Done!! No of reads in file: " + counter + ", size of pileup map is: " + pileup.size() ); - } - - private void writePileup(FileWriter writer, String chromosome, int position) throws IOException { - ChrPosition chrPos = ChrPointPosition.valueOf(chromosome, position); - - Iterator> iter = pileup.entrySet().iterator(); - - while (iter.hasNext()) { - Map.Entry entry = iter.next(); - if (0 < COMPARATOR.compare(chrPos, entry.getKey())) { - - writer.write(entry.getKey().getChromosome() + "\t" + - entry.getKey().getStartPosition() + "\t" + - entry.getValue().getFormattedString()); - - iter.remove(); - } - } - - } - - private void parseRecord(SAMRecord sr) { - - ChrPosition chrPos; - QPileupSimpleRecord pileupRec; - int position = 0; - - for (byte b : sr.getReadBases()) { - chrPos = ChrPointPosition.valueOf(sr.getReferenceName(), sr.getAlignmentStart() + position++); - pileupRec = pileup.get(chrPos); - if (null == pileupRec) { - pileupRec = new QPileupSimpleRecord(); - pileup.put(chrPos, pileupRec); - } - pileupRec.incrementBase(b); - } - - - } - - - - public static void main(String[] args) throws IOException { - Pileup p = new Pileup(); - p.engage(args); - } -} diff --git a/qmule/src/org/qcmg/qmule/PileupStats.java-- b/qmule/src/org/qcmg/qmule/PileupStats.java-- deleted file mode 100644 index e2ea6d844..000000000 --- a/qmule/src/org/qcmg/qmule/PileupStats.java-- +++ /dev/null @@ -1,254 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.util.Map.Entry; -import java.util.TreeMap; - -import htsjdk.samtools.Cigar; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordIterator; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.picard.SAMFileReaderFactory; - -public class PileupStats { - - private String logFile; - private File inputFile; - private File outputFile; - private File bamFile; - private static QLogger logger; - - public int engage() throws Exception { - - BufferedReader reader = new BufferedReader(new FileReader(inputFile)); - BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile)); - - writer.write(getHeader()); - String line; - int count = 0; - while ((line = reader.readLine()) != null) { - String[] values = line.split("\t"); - - String result = pileup(values[0], new Integer(values[1]), new Integer(values[2])); - - writer.write(line + "\t" + result + "\n"); - //System.out.println(line + "\t " + result); - if (count++ % 1000 == 0) { - logger.info("Number processed: " + count); - } - } - logger.info("Total processed: " + count); - reader.close(); - writer.close(); - - return 0; - } - - private String getHeader() { - return "chr\tposition\tposition\tbed\tbed\tbed\ttotal reads\ttotal unmapped" + - "\ttotal mates unmapped\ttotal indels\ttotal mismatch reads\ttotal soft clips" + - "\ttotal hard clips\ttotal spliced reads\ttotal duplicates\tmismatch counts\tsplice lengths\n"; - } - - private String pileup(String chromosome, int start, int end) throws IOException { - SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFile, "silent"); - - SAMRecordIterator iterator = reader.queryOverlapping(chromosome, start, end); - - int totalReads = 0; - int totalMatesUnmapped = 0; - int totalUnmapped = 0; - int totalDuplicates = 0; - int totalMismatches = 0; - int totalSpliced = 0; - int totalSoftClips = 0; - int totalHardClips = 0; - int totalIndels = 0; - TreeMap spliceMap = new TreeMap(); - TreeMap mismatchMap = new TreeMap(); - - while (iterator.hasNext()) { - SAMRecord record = iterator.next(); - if (record.getReadUnmappedFlag()) { - totalUnmapped++; - } else { - totalReads++; - if (record.getDuplicateReadFlag()) { - totalDuplicates++; - } else { - - if (record.getMateUnmappedFlag()) { - totalMatesUnmapped++; - } - - //cigars - Cigar cigar = record.getCigar(); - - for (CigarElement ce : cigar.getCigarElements()) { - if (ce.getOperator().equals(CigarOperator.DELETION) || ce.getOperator().equals(CigarOperator.INSERTION)) { - totalIndels++; - } - - if (ce.getOperator().equals(CigarOperator.SOFT_CLIP)) { - totalSoftClips++; - } - if (ce.getOperator().equals(CigarOperator.HARD_CLIP)) { - totalHardClips++; - } - if (ce.getOperator().equals(CigarOperator.N)) { - totalSpliced++; - Integer length = new Integer(ce.getLength()); - int count = 1; - if (spliceMap.containsKey(length)) { - count += spliceMap.get(length); - } - spliceMap.put(length, count); - } - } - - //MD tag - String mdData = (String) record.getAttribute("MD"); - int matches = tallyMDMismatches(mdData); - if (matches > 0) { - totalMismatches++; - } - int count = 1; - if (mismatchMap.containsKey(matches)) { - count += mismatchMap.get(matches); - } - mismatchMap.put(matches, count); - - } - } - - } - - iterator.close(); - reader.close(); - - String spliceCounts = getMapString(spliceMap); - String mismatchCounts = getMapString(mismatchMap); - - String result = totalReads + "\t" + totalUnmapped + "\t" + totalMatesUnmapped + "\t" + totalIndels + "\t" - + totalMismatches + "\t" + totalSoftClips + "\t" + totalHardClips + "\t" + totalSpliced + "\t" + totalDuplicates - + "\t" + mismatchCounts + "\t" + spliceCounts; - return result; - } - - private String getMapString(TreeMap map) { - StringBuilder sb = new StringBuilder(); - - for (Entry entry: map.entrySet()) { - sb.append(entry.getKey() + ":" + entry.getValue() + ";"); - } - - return sb.toString(); - } - - public int tallyMDMismatches(String mdData) { - int count = 0; - if (null != mdData) { - for (int i = 0, size = mdData.length() ; i < size ; ) { - char c = mdData.charAt(i); - if (isValidMismatch(c)) { - count++; - i++; - } else if ('^' == c) { - while (++i < size && Character.isLetter(mdData.charAt(i))) {} - } else i++; // need to increment this or could end up with infinite loop... - } - } - return count; - } - - private boolean isValidMismatch(char c) { - return c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N'; - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(PileupStats.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("PileupStats", PileupStats.class.getPackage().getImplementationVersion(), args); - - // get list of file names - String[] cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 2) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - String[] cmdLineOutputFiles = options.getOutputFileNames(); - if ( ! FileUtils.canFileBeWrittenTo(cmdLineOutputFiles[0])) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", cmdLineOutputFiles[0]); - } - - for (String file : cmdLineOutputFiles) { - if (new File(file).exists() && !new File(file).isDirectory()) { - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", file); - } - } - - bamFile = new File(cmdLineInputFiles[0]); - inputFile = new File(cmdLineInputFiles[1]); - outputFile = new File(cmdLineOutputFiles[0]); - logger.info("Bam file: " + bamFile); - logger.info("Input file: " + inputFile); - logger.info("Output file: " + outputFile); - - } - - return returnStatus; - } - - public static void main(String[] args) throws Exception { - PileupStats sp = new PileupStats(); - sp.setup(args); - int exitStatus = sp.engage(); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } -} diff --git a/qmule/src/org/qcmg/qmule/QMuleException.java-- b/qmule/src/org/qcmg/qmule/QMuleException.java-- deleted file mode 100644 index 2e85e03f0..000000000 --- a/qmule/src/org/qcmg/qmule/QMuleException.java-- +++ /dev/null @@ -1,28 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -public final class QMuleException extends Exception { - private static final long serialVersionUID = -4575755996356751582L; - - public QMuleException(final String identifier) { - super(Messages.getMessage(identifier)); - } - - public QMuleException(final String identifier, final String argument) { - super(Messages.getMessage(identifier, argument)); - } - - public QMuleException(final String identifier, final String arg1, final String arg2) { - super(Messages.getMessage(identifier, arg1, arg2)); - } - - public QMuleException(final String identifier, final String arg1, final String arg2, final String arg3) { - super(Messages.getMessage(identifier, arg1, arg2, arg3)); - } - - public QMuleException(final String identifier, final Object[] arguments) { - super(Messages.getMessage(identifier, arguments)); - } -} diff --git a/qmule/src/org/qcmg/qmule/QueryCADDLib.java-- b/qmule/src/org/qcmg/qmule/QueryCADDLib.java-- deleted file mode 100644 index eece05fe3..000000000 --- a/qmule/src/org/qcmg/qmule/QueryCADDLib.java-- +++ /dev/null @@ -1,187 +0,0 @@ -/** - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. -*/ -package org.qcmg.qmule; - - -import htsjdk.tribble.readers.TabixReader; - -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.header.VcfHeader; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.vcf.VCFFileReader; - - -public class QueryCADDLib { -// protected final static ArrayList libBlocks = new ArrayList<>(); -// protected final static ArrayList inputBlocks = new ArrayList<>(); -// protected final static ArrayList outputBlocks = new ArrayList<>(); - - protected final static Map positionRecordMap = new HashMap(); - protected static long outputNo = 0; - protected static long blockNo = 0; - protected static long inputNo = 0; - final String CADD = "CADD"; - - public QueryCADDLib(final String input_gzip_file, final String vcf, final String output, final int gap) throws IOException{ - - TabixReader tabix = new TabixReader( input_gzip_file); - String chr = null; - int pos = 0; - int start = -1; - - System.out.println("Below is the stats for each queried block, follow the format \norder: query(ref,start,end) [CADDLibBlockSize, inputVariantNo, outputVariantNo, runtime]"); - - try (VCFFileReader reader = new VCFFileReader(vcf); - FileWriter writer = new FileWriter(new File(output))) { - for (final VcfRecord re : reader){ - if(re.getChromosome().equals(chr) && - (re.getPosition() - pos) < gap ){ - pos = re.getPosition(); - add2Map(re); - }else{ - //s1: query(chr:start:pos), and output - if(chr != null){ - if(chr.startsWith("chr")) chr = chr.substring(3); - TabixReader.Iterator it = tabix.query(chr, start-1, pos); - //debug - System.out.print(String.format("%8d: query(%s, %8d, %8d) ", blockNo++, chr, start, pos)); - query( it, writer ); - - } - //s2: reset -// //debug bf clear -// for( Entry entry: positionRecordMap.entrySet()){ -// if(entry.getValue().getFilter() == null) -// System.out.println(entry.getValue().toString()); -// } - - positionRecordMap.clear(); - chr = re.getChromosome(); - start = re.getPosition(); - pos = re.getPosition(); - add2Map(re); - } - } - //last block - if(chr != null){ - if(chr.startsWith("chr")) chr = chr.substring(3); - TabixReader.Iterator it = tabix.query(chr, start, pos); - query( it, writer ); - } - - }//end try - - System.out.println("total input variants is " + inputNo); - System.out.println("total outputed and annotated variants is " + outputNo); - System.out.println("total query CADD library time is " + blockNo); - - } - - /** - * it remove "chr" string from reference name if exists - * @param re input vcf record - */ - private void add2Map(VcfRecord re){ - ChrPosition chr = re.getChrPosition(); - if(chr.getChromosome().startsWith("chr")) - chr = new ChrRangePosition(re.getChromosome().substring(3), re.getChrPosition().getStartPosition(), re.getChrPosition().getEndPosition()); // orig.getChromosome().substring(3); - - - re.setFilter(null); //for debug - positionRecordMap.put(chr, re); - } - - - private void query(TabixReader.Iterator it,FileWriter writer ) throws IOException{ - long startTime = System.currentTimeMillis(); - - String line; - String[] eles; - String last = null; - - int blockSize = 0; - int outputSize = 0; - - while(( line = it.next())!= null){ - blockSize ++; - eles = TabTokenizer.tokenize(line, '\t'); - int s = Integer.parseInt(eles[1]); //start position = second column - int e = s + eles[2].length() - 1; //start position + length -1 - - //only retrive the first annotation entry from CADD library - String entry = eles[0] + ":" + eles[1] + ":" +eles[2]+ ":" + eles[4]; - if(entry.equals(last)) continue; - else last = entry; - - VcfRecord inputVcf = positionRecordMap.get(new ChrRangePosition(eles[0], s, e )); - - if ( (null == inputVcf) || !inputVcf.getRef().equalsIgnoreCase(eles[2])) continue; - - String[] allels = {inputVcf.getAlt()}; - if(inputVcf.getAlt().contains(",")) - allels = TabTokenizer.tokenize(inputVcf.getAlt(), ','); - - String cadd = ""; - - //it will exit loop once find the matched allele - for(String al : allels) - if(al.equalsIgnoreCase(eles[4])){ - cadd = String.format("(%s=>%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s)", eles[2],eles[4],eles[8],eles[10],eles[11],eles[12],eles[17], - eles[21],eles[26],eles[35],eles[39],eles[72],eles[82],eles[83],eles[86],eles[92],eles[92],eles[93],eles[96]); - String info = inputVcf.getInfoRecord().getField(CADD); - info = (info == null)? CADD + "=" + cadd : CADD + "=" + info + "," + cadd; - inputVcf.appendInfo( info); - - writer.append(inputVcf.toString() + "\n"); - outputSize ++; - } - } - - //get stats - long endTime = System.currentTimeMillis(); - String time = QLogger.getRunTime(startTime, endTime); - System.out.println(String.format("[ %8d,%8d,%8d, %s ] ", blockSize, positionRecordMap.size(), outputSize, time)); - inputNo += positionRecordMap.size(); - outputNo += outputSize; - } - - - public static void main(String[] args) { - - long startTime = System.currentTimeMillis(); - try{ - String gzlib = args[0]; - String input = args[1]; - String output = args[2]; - int gap = 1000; - if(args.length > 3) - gap = Integer.parseInt(args[3]); - - new QueryCADDLib(gzlib, input, output, gap); - - }catch(Exception e){ - e.printStackTrace(); - System.err.println("Usage: java -cp qmule-0.1pre.jar QueryCADDLib "); - } - - long endTime = System.currentTimeMillis(); - String time = QLogger.getRunTime(startTime, endTime); - System.out.println("run Time is " + time); - } -} - diff --git a/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- b/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- deleted file mode 100644 index 86499809c..000000000 --- a/qmule/src/org/qcmg/qmule/ReAnnotateDccWithDbSNP.java-- +++ /dev/null @@ -1,280 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileWriter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrPositionComparator; -import org.qcmg.common.string.StringUtils; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; -import org.qcmg.vcf.VCFFileReader; - -public class ReAnnotateDccWithDbSNP { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - private String header; - - private static QLogger logger; - - private Map dccs = new HashMap(); - - - public int engage() throws Exception { - - loadDccFile(); - - updateDBSnpData(); - - writeDCCOutput(); - - - return exitStatus; - } - - private void writeDCCOutput() throws Exception { - if ( ! StringUtils.isNullOrEmpty(cmdLineOutputFiles[0])) { - FileWriter writer = new FileWriter(new File(cmdLineOutputFiles[0])); - try { - //sort - List data = new ArrayList(dccs.keySet()); - Collections.sort(data, new ChrPositionComparator()); - - - writer.write(header + "\tdbSnpVer\n"); - - for (ChrPosition cp : data) { - String[] dcc = dccs.get(cp); - StringBuilder sb = new StringBuilder(); - for (String s : dcc) { - if (sb.length() > 0) sb.append('\t'); - sb.append(s); - } - writer.write(sb.toString() + '\n'); - } - - } finally { - writer.close(); - } - } - } - - - private void loadDccFile() throws Exception { - logger.info("Attempting to load dcc data"); - TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); - int count = 0; - try { - for (TabbedRecord rec : reader) { - if (++count == 1) { // header line - header = rec.getData(); - continue; - } - String[] params = TabTokenizer.tokenize(rec.getData()); - ChrPosition cp = ChrPointPosition.valueOf(params[4], Integer.parseInt(params[5])); - - // reset dbsnpid - params[20] = null; -// StringBuilder sb = new StringBuilder(); -// for (String s : params) { -// if (sb.length() > 0) sb.append('\t'); -// sb.append(s); -// } -// rec.setData(sb.toString()); - dccs.put(cp, params); - } - } finally { - reader.close(); - } - logger.info("Attempting to load dcc data - DONE with " + dccs.size() + " entries"); - } - - private void updateDBSnpData() throws Exception { - - VCFFileReader reader = new VCFFileReader(new File(cmdLineInputFiles[1])); - - int count = 0, multipleVersions = 0; - int pre30 = 0, thirty = 0, thirtyOne = 0, thirtyTwo = 0, thirtyThree = 0, thirtyFour = 0, thirtyFive = 0; - try { - for (VcfRecord dbSNPVcf : reader) { - if (++count % 1000000 == 0) - logger.info("hit " + count + " dbsnp records"); - - if ( ! StringUtils.doesStringContainSubString(dbSNPVcf.getInfo(), "VC=SNV", false)) continue; - // vcf dbSNP record chromosome does not contain "chr", whereas the positionRecordMap does - add - String[] params = dccs.get(ChrPointPosition.valueOf(dbSNPVcf.getChromosome(), dbSNPVcf.getPosition())); - if (null == params) continue; - - // if no dbsnp data - continue - String previousDBSnpValue = params[20]; - if ( ! StringUtils.isNullOrEmpty(previousDBSnpValue)) { - multipleVersions++; - continue; - } - -// logger.info("Resetting previousDBSnpValue of: " + previousDBSnpValue + " to " + dbSNPVcf.getId()); - - // only proceed if we have a SNP variant record - int startIndex = dbSNPVcf.getInfo().indexOf("dbSNPBuildID=") + 13; - int endIndex = dbSNPVcf.getInfo().indexOf(";" , startIndex); - String dbSnpVersion = dbSNPVcf.getInfo().substring(startIndex, endIndex); -// logger.info("dbsnp version = " + dbSnpVersion); - - int dbSnpVersionInt = Integer.parseInt(dbSnpVersion); - if (dbSnpVersionInt < 130) pre30++; - else if (dbSnpVersionInt == 130) thirty++; - else if (dbSnpVersionInt == 131) thirtyOne++; - else if (dbSnpVersionInt == 132) thirtyTwo++; - else if (dbSnpVersionInt == 133) thirtyThree++; - else if (dbSnpVersionInt == 134) thirtyFour++; - else if (dbSnpVersionInt == 135) thirtyFive++; - else if (dbSnpVersionInt > 135) logger.info("hmmm: " + dbSnpVersionInt); - - params[20] = dbSNPVcf.getId(); - params = Arrays.copyOf(params, params.length + 1); - params[params.length -1] = dbSnpVersion; - dccs.put(ChrPointPosition.valueOf(dbSNPVcf.getChromosome(), dbSNPVcf.getPosition()), params); - - -// GenotypeEnum tumour = snpRecord.getTumourGenotype(); -// //TODO should we continue if the tumour Genotype is null?? -// if (null == tumour) continue; -// -// // multiple dbSNP entries can exist for a position. -// // if we already have dbSNP info for this snp, check to see if the dbSNP alt is shorter than the existing dbSNP record -// // if so, proceed, and re-write dbSNP details (if applicable). -// int dbSNPAltLengh = dbSNPVcf.getAlt().length(); -// if (snpRecord.getDbSnpAltLength() > 0 && dbSNPAltLengh > snpRecord.getDbSnpAltLength()) { -// continue; -// } -// -// // deal with multiple alt bases -// String [] alts = null; -// if (dbSNPAltLengh == 1) { -// alts = new String[] {dbSNPVcf.getAlt()}; -// } else if (dbSNPAltLengh > 1){ -// alts = TabTokenizer.tokenize(dbSNPVcf.getAlt(), ','); -// } -// -// if (null != alts) { -// for (String alt : alts) { -// -// GenotypeEnum dbSnpGenotype = BaseUtils.getGenotypeEnum(dbSNPVcf.getRef() + alt); -// if (null == dbSnpGenotype) { -// logger.warn("Couldn't get Genotype from dbSNP position with variant: " + alt); -// continue; -// } -//// // no longer flip the genotype as dbSNP is reporting on the +ve strand -////// if (reverseStrand) { -////// dbSnpGenotype = dbSnpGenotype.getComplement(); -////// } -// if (tumour == dbSnpGenotype || (tumour.isHomozygous() && dbSnpGenotype.containsAllele(tumour.getFirstAllele()))) { -// boolean reverseStrand = StringUtils.doesStringContainSubString(dbSNPVcf.getInfo(), "RV", false); -//// boolean reverseStrand = VcfUtils.isDbSNPVcfRecordOnReverseStrand(dbSNPVcf.getInfo()); -// snpRecord.setDbSnpStrand(reverseStrand ? '-' : '+'); -// snpRecord.setDbSnpId(dbSNPVcf.getId()); -// snpRecord.setDbSnpGenotype(dbSnpGenotype); -// snpRecord.setDbSnpAltLength(dbSNPAltLengh); -// break; -// } -// } -// } - } - } finally { - reader.close(); - } - logger.info("STATS:"); - logger.info("No of dcc records with dbSNP version of pre 130: " + pre30); - logger.info("No of dcc records with dbSNP version of 130: " + thirty); - logger.info("No of dcc records with dbSNP version of 131: " + thirtyOne); - logger.info("No of dcc records with dbSNP version of 132: " + thirtyTwo); - logger.info("No of dcc records with dbSNP version of 133: " + thirtyThree); - logger.info("No of dcc records with dbSNP version of 134: " + thirtyFour); - logger.info("No of dcc records with dbSNP version of 135: " + thirtyFive); - logger.info("No of dcc records with duplicate dbSNP versions : " + multipleVersions); - logger.info("Total no of dcc records with dbSNP data : " + (pre30 + thirty + thirtyOne + thirtyTwo + thirtyThree + thirtyFour + thirtyFive)); - } - - public static void main(String[] args) throws Exception { - ReAnnotateDccWithDbSNP sp = new ReAnnotateDccWithDbSNP(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(ReAnnotateDccWithDbSNP.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("ReAnnotateDccWithDbSNP", ReAnnotateDccWithDbSNP.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- b/qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- deleted file mode 100644 index ee8018ccc..000000000 --- a/qmule/src/org/qcmg/qmule/ReadPartGZFile.java-- +++ /dev/null @@ -1,152 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import htsjdk.tribble.readers.TabixReader; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.HashSet; -import java.util.Set; -import java.util.zip.GZIPInputStream; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.util.FileUtils; -import org.qcmg.vcf.VCFSerializer; - - -public class ReadPartGZFile { - -// static InputStream getInputStream(File input_gzip_file) throws FileNotFoundException, IOException{ -// InputStream inputStream; -// // if (FileUtils.isFileGZip(input_gzip_file)) { -// if (FileUtils.isInputGZip(input_gzip_file)) { -// GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(input_gzip_file)); -// try(InputStreamReader streamReader = new InputStreamReader(gzis)){ -// inputStream = new GZIPInputStream(new FileInputStream(input_gzip_file)); -// } -// } else { -// FileInputStream stream = new FileInputStream(input_gzip_file); -// try(InputStreamReader streamReader = new InputStreamReader(stream)){ -// BufferedReader in = new BufferedReader(streamReader); -// inputStream = new FileInputStream(input_gzip_file); -// } -// } -// return inputStream; -// } - - - - - ReadPartGZFile(File input_gzip_file, int no) throws Exception{ - - //get a new stream rather than a closed one - InputStream inputStream = FileUtils.isInputGZip( input_gzip_file) ? - new GZIPInputStream(new FileInputStream(input_gzip_file), 65536) : new FileInputStream(input_gzip_file); - - try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream) )){ - int num = 0; - String line; - while( (line = reader.readLine() ) != null){ - if( ++num > no) break; - System.out.println(line); - } - } - - - } - static void countLines(File input_gzip_file) throws FileNotFoundException, IOException, InterruptedException{ - HashSet uniqRef = new HashSet(); - - long startTime = System.currentTimeMillis(); - long num = 0; -// InputStream inputStream = getInputStream(input_gzip_file); - InputStream inputStream = FileUtils.isInputGZip( input_gzip_file) ? - new GZIPInputStream(new FileInputStream(input_gzip_file), 65536) : new FileInputStream(input_gzip_file); - - try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream) )){ - String line; - while( (line = reader.readLine() ) != null){ - uniqRef.add(line.split("\\t")[0]); - num ++; - } - } - - System.out.println(String.format("Read file: %s\nLine number: %d", input_gzip_file.getAbsoluteFile(), num)); - System.out.println("Uniq reference name are " + uniqRef ); - - - } - - static void countUniqPosition(String input_gzip_file, String indexFile) throws IOException{ - // TabixReader tabix = new TabixReader( input_gzip_file, indexFile); - TabixReader tabix = new TabixReader( input_gzip_file); - Set chrs = tabix.getChromosomes(); - HashSet uniqPos = new HashSet(); - long total_uniq = 0; - long num = 0; - System.out.println("total reference number is " + chrs.size() + " from " + input_gzip_file); - for(String str : chrs){ - - uniqPos.clear(); - TabixReader.Iterator it = tabix.query(str); - - - String line; - while(( line = it.next())!= null){ - // String[] eles = TabTokenizer.tokenize(line, '\t'); - // uniqPos.add(eles[1]); - // uniqPos.add(line.split("\\t")[1]); - num ++; - } - - //debug - System.out.println("There are " + num+ " position recorded in reference " + str); - num ++; - - - // total_uniq += uniqPos.size(); - // System.out.println("There are " + uniqPos.size() + " uniq position recorded in reference " + str); - - } - -// System.out.println("Total uniq position recorded in all reference is " + total_uniq); -// System.out.println("Total records in whole file is " + num); - - } - - public static void main(String[] args) { - try{ - long startTime = System.currentTimeMillis(); - File input = new File(args[0]); - int no = Integer.parseInt(args[1]); - - if(no > 0) - new ReadPartGZFile(input, no ); - else if (no == 0) - countUniqPosition(args[0], null); - else - countLines(input); - - long endTime = System.currentTimeMillis(); - String time = QLogger.getRunTime(startTime, endTime); - System.out.println("run Time is " + time); - - }catch(Exception e){ - e.printStackTrace(); - //System.out.println(e.printStackTrace();); - System.err.println("Usage: java -cp qmule-0.1pre.jar ReadPartGZFile "); - - } - - } -} diff --git a/qmule/src/org/qcmg/qmule/ReadsAppend.java-- b/qmule/src/org/qcmg/qmule/ReadsAppend.java-- deleted file mode 100644 index 4c2ce5fab..000000000 --- a/qmule/src/org/qcmg/qmule/ReadsAppend.java-- +++ /dev/null @@ -1,95 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.IOException; - -import htsjdk.samtools.*; - -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.List; - -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.SAMOrBAMWriterFactory; - - -public class ReadsAppend { - File[] inputs; - File output; - - - ReadsAppend(File output, File[] inputs ) throws Exception{ - this.output = output; - this.inputs = inputs; - merging(); - } - - /** - * retrive the CS and CQ value from BAM record to output csfasta or qual file - * @throws Exception - */ - void merging() throws Exception{ - System.out.println("start time : " + getTime()); - - List readers = new ArrayList<>(); - for (File f: inputs) { - readers.add( SAMFileReaderFactory.createSAMFileReader(f)); - } - - SAMFileHeader header = readers.get(0).getFileHeader().clone(); - - SAMOrBAMWriterFactory factory = new SAMOrBAMWriterFactory(header, true, output,2000000 ); - SAMFileWriter writer = factory.getWriter(); - - for( SamReader reader : readers){ - for( SAMRecord record : reader) { - writer.addAlignment(record); - } - reader.close(); - } - - factory.closeWriter(); - System.out.println("end time : " + getTime()); - System.exit(0); - } - - - private String getTime(){ - Calendar currentDate = Calendar.getInstance(); - SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); - return "[" + formatter.format(currentDate.getTime()) + "]"; - } - public static void main(final String[] args) throws IOException, InterruptedException { - - try{ - if(args.length < 2) - throw new Exception("missing inputs or outputs name"); - - File output = new File(args[0]); - File[] inputs = new File[args.length-1]; - for (int i = 1; i < args.length; i++) { - inputs[i-1] = new File(args[i]) ; - - System.out.println(inputs[i-1].toString()); - } - - - new ReadsAppend(output, inputs ); - - System.exit(0); - }catch(Exception e){ - System.err.println(e.toString()); - Thread.sleep(1); - System.out.println("usage: qmule org.qcmg.qmule.ReadsAppend "); - System.exit(1); - } - - } -} diff --git a/qmule/src/org/qcmg/qmule/RunGatk.java-- b/qmule/src/org/qcmg/qmule/RunGatk.java-- deleted file mode 100644 index b2e13458d..000000000 --- a/qmule/src/org/qcmg/qmule/RunGatk.java-- +++ /dev/null @@ -1,141 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - - -public class RunGatk { - -// public static String PATH="/panfs/home/oholmes/devel/QCMGScripts/o.holmes/gatk/pbs4java/"; -// public static final String PARAMS=" -l walltime=124:00:00 -v patient="; -// public static int jobCounter = 1; -// -// // inputs -// public static String patientId; -// public static String mixture; -// public static String normalBamFile; -// public static String tumourBamFile; -// public static String outputDir; -// -// public static String patientParams; -// public static String nodeName; -// public static String startPoint; -// -// public static void main(String[] args) throws IOException, InterruptedException, Exception { -// -// if (args.length < 5) throw new IllegalArgumentException("USAGE: RunGatk []"); -// -// patientId = args[0]; -// mixture = args[1]; -// normalBamFile = args[2]; -// tumourBamFile = args[3]; -// outputDir = args[4]; -// if (args.length == 6) { -// PATH = args[5]; -// } -// if (args.length == 7) { -// PATH = args[6]; -// } -// -// patientParams = PARAMS + patientId + ",mixture=" + mixture; -// -// String mergeParams = patientParams + ",normalBam=" + normalBamFile + ",tumourBam=" + tumourBamFile; -// -// -// String jobName = jobCounter++ + "RG_" + mixture; -// System.out.println("About to submit merge job"); -// -// Job merge = new Job(jobName, PATH + "run_gatk_merge_1.sh" + mergeParams); -//// merge.setQueue(queue); -// merge.queue(); -// String status = merge.getStatus(); -// System.out.println("1st job status: " + status); -// while ("N/A".equals(status)) { -// Thread.sleep(1500); -// String [] jobs = Job.SearchJobsByName(jobName, true); -// System.out.println("Sleeping till job status changes..." + status + ", id: " + merge.getId() + " no of jobs: " + jobs.length); -// -// for (int i = 0 ; i < jobs.length ; i++) { -// System.out.println("jobs[" + i + "] : " + jobs[i]); -// merge = Job.getJobById(jobs[i]); -// status = merge.getStatus(); -// System.out.println("job.getJobStatus: " + Job.getJobStatus(jobs[i])); -// -// } -// } -// nodeName = merge.getExecuteNode().substring(0, merge.getExecuteNode().indexOf('/')); -// -// -// -// System.out.println("About to submit clean 1 job"); -// // clean 1 -// String script = PATH + "run_gatk_clean_1.sh" + patientParams; -// Job clean1 = submitDependantJob(merge, "1", script, true); -// -// -// System.out.println("About to submit clean 2 job"); -// // clean 2 -// script = PATH + "run_gatk_clean_2.sh" + patientParams; -// Job clean2 = submitDependantJob(clean1, "1", script, true); -// -// // clean 3 -// script = PATH + "run_gatk_clean_3.sh" + patientParams; -// Job clean3 = submitDependantJob(clean2, "6", script, true); -// -//// String scriptToRun = PATH + "run_gatk_clean_4.sh" + patientParams; -// -// System.out.println("About to submit clean 4 job"); -// script = PATH + "run_gatk_clean_4.sh" + patientParams; -// Job clean4 = submitDependantJob(clean3, "1", script, true); -// -// // split -// System.out.println("About to submit split job"); -// script = PATH + "run_gatk_split.sh" + patientParams; -// Job split = submitDependantJob(clean4, "1", script, true); -// -// runMergeDelUG(split, "ND"); -// runMergeDelUG(split, "TD"); -// } -// -// private static void runMergeDelUG(Job splitJob, String type) throws IOException, InterruptedException, Exception { -// String script = PATH + "run_gatk_merge_2.sh" + patientParams + ",type=" + type; -// Job mergeJob = submitDependantJob(splitJob, "1", script, true); -// -// // delete -// script = PATH + "run_gatk_del_split_files.sh" + patientParams + ",type=" + type; -// Job deleteJob = submitDependantJob(mergeJob, "1", script, true); -// -// -// // UG -// script = PATH + "run_gatk_UG.sh" + patientParams + ",type=" + type; -// Job unifiedGenotyperJob = submitDependantJob(mergeJob, "4", script, false); -// -// } -// -// private static Job submitDependantJob(Job depJob, String ppn, String script, boolean onNode) throws IOException, InterruptedException, Exception { -// -// String jobName; -// ArrayList dependantJobs; -// String[] jobs; -// jobName = jobCounter++ + "RG_" + mixture; -// Job newJob = new Job(jobName, script); -//// Job newJob = new Job(jobName, PATH + script + patientParams + ",type=" + type); -//// newJob.setQueue(queue); -// if (onNode) { -// newJob.setExecuteNode(nodeName); -// newJob.setNodes(nodeName); -// } -// newJob.setPpn(ppn); -// dependantJobs = new ArrayList(); -// dependantJobs.add(depJob.getId() + " "); -// newJob.setAfterOK(dependantJobs); -// newJob.queue(); -// // sleep to allow job to make it to the queue -// Thread.sleep(1000); -// -// jobs = Job.SearchJobsByName(jobName, true); -// newJob = Job.getJobById(jobs[0]); -// return newJob; -// } - -} diff --git a/qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java-- b/qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java-- deleted file mode 100644 index 6730aa5ff..000000000 --- a/qmule/src/org/qcmg/qmule/SmithWatermanGotoh.java-- +++ /dev/null @@ -1,368 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; - -public class SmithWatermanGotoh { - - private final float gapOpen; - private final float gapExtend; - private final int matchScore; - private final int mismatchScore; - private final String sequenceA; - private final String sequenceB; - private final int rows; - private final int columns; - private int[][] pointerMatrix; - private short[][] verticalGaps; - private short[][] horizontalGaps; - private int bestRow; - private int bestColumn; - private float bestScore; - private static final int STOP = 0; - private static final int LEFT = 1; - private static final int DIAGONAL = 2; - private static final int UP = 3; - private static final String GAP = "-"; - private static final String EMPTY = " "; - private static final String MISMATCH = "."; - private static final String MATCH = "|"; - private static final String TAB = ""; - - public SmithWatermanGotoh(File fileA, File fileB, int matchScore, int mismatchScore, float gapOpen, float gapExtend) throws IOException { - - this.sequenceA = readFastaFile(fileA); - this.sequenceB = readFastaFile(fileB); - this.gapOpen = gapOpen; - this.gapExtend = gapExtend; - this.matchScore = matchScore; - this.mismatchScore = mismatchScore; - this.rows = sequenceA.length() + 1;//i - this.columns = sequenceB.length() + 1;//j - align(); - } - - public SmithWatermanGotoh(String a, String b, int matchScore, int mismatchScore, float gapOpen, float gapExtend) { - - this.sequenceA = a; - this.sequenceB = b; - this.gapOpen = gapOpen; - this.gapExtend = gapExtend; - this.matchScore = matchScore; - this.mismatchScore = mismatchScore; - this.rows = sequenceA.length() + 1;//i - this.columns = sequenceB.length() + 1;//j - align(); - } - - private String readFastaFile(File file) throws IOException { - - try (FileInputStream inputStream = new FileInputStream(file)) { - StringBuilder buffer = new StringBuilder(); - int ch; - while ((ch = inputStream.read()) != -1) { - buffer.append((char)ch); - } - inputStream.close(); - - String seq = buffer.toString(); - - if (seq.startsWith(">")) { - int index = seq.indexOf("\n"); - return seq.substring(index, seq.length()).replace("\n", "").toUpperCase(); - } else { - return seq.replace("\n", "").toUpperCase(); - } - } - } - - private void align() { - fillMatrix(); - traceback(); -// System.out.println(getDiffs()); - } - - private void fillMatrix() { - //etutorials.org/Misc/blast/Part+II+Theory/Chapter+3.+Sequence+Alignment/3.2+Local+Alignment+Smith-Waterman/ - //Gotoh: http://www.cse.msu.edu/~cse891/Sect001/notes_alignment.pdf - //https://github.com/ekg/smithwaterman/blob/master/SmithWatermanGotoh.cpp - //http://cci.lbl.gov/cctbx_sources/mmtbx/alignment.py - // - //The dynamic programming algorithm was improved in performance by Gotoh (1982) by using the linear -// relationship for a gap weight wx = g + rx, where the weight for a gap of length x is the sum of a gap -// opening penalty (g) and a gap extension penalty (r) times the gap length (x), and by simplifying -// the dynamic programming algorithm. He reasoned that two of the terms that are maximized in the -// dynamic programming algorithm and designated here Pij and Qij depend only on the values in the -// current and previous row and column, as indicated below. - - initialize(); - - //storage for current calculations - float[] bestScores = new float[columns];//score if xi aligns to gap after yi - float[] queryGapScores = new float[columns];//best score of alignment x1..xi to y1..yi - - for (int i=0; i queryGapOpenScore) { - //add extend score - queryGapScores[column] = queryGapExtendScore; - //increase size of gap - int gapLength = verticalGaps[row-1][column] + 1; - verticalGaps[row][column] = (short) gapLength; - } else { - //add open score - queryGapScores[column] = queryGapOpenScore; - } - - //calculate horizontal gaps - referenceGapExtendScore = currentAnchorGapScore - gapExtend; - referenceGapOpenScore = bestScores[column-1] - gapOpen; - - if (referenceGapExtendScore > referenceGapOpenScore) { - //add extend score - currentAnchorGapScore = referenceGapExtendScore; - //increase size of gap - short gapLength = (short) (horizontalGaps[row][column-1] + 1); - horizontalGaps[row][column] = gapLength; - } else { - //add open score - currentAnchorGapScore = referenceGapOpenScore; - } - - //test scores - bestScoreDiagonal = bestScores[column]; - bestScores[column] = findMaximum(totalSimilarityScore, queryGapScores[column], currentAnchorGapScore); - - //determine trackback direction - float score = bestScores[column]; - if (score == 0) { - pointerMatrix[row][column] = STOP; - } else if (score == totalSimilarityScore) { - pointerMatrix[row][column] = DIAGONAL; - } else if (score == queryGapScores[column]) { - pointerMatrix[row][column] = UP; - } else { - pointerMatrix[row][column] = LEFT; - } - - //set current cell if this is the best score - if (score > bestScore) { - bestRow = row; - bestColumn = column; - bestScore = score; - } - } - } - } - - - private void initialize() { - pointerMatrix = new int[rows][columns]; - verticalGaps = new short[rows][columns]; - horizontalGaps = new short[rows][columns]; - for (int i=0; i chromosomes = new ArrayList(); -// private int exitStatus; -// private Map> vcfRecords = new HashMap>(); -// private Map> mafRecords = new HashMap>(); -// private Map> gffRecords = new HashMap>(); -// private Map> bedRecords = new HashMap>(); -// private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); -// private List overlappingMafRecords = new ArrayList(); -// private List notOverlappingMafRecords = new ArrayList(); -// private List overlappingVcfRecords = new ArrayList(); -// private List notOverlappingVcfRecords = new ArrayList(); -// private int overlapCount = 0; -// private int notOverlappingCount = 0; -// private int snpCount; -// private Vector header = new Vector(); -// private String inputSnpType; -// -// private static QLogger logger; -// -// public int engage() throws Exception { -// inputSnpType = null; -// if (cmdLineInputFiles[0].endsWith("maf")) { -// logger.info("MAF File: " + cmdLineInputFiles[0]); -// loadMafFile(); -// inputSnpType = "MAF"; -// if (mafRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from maf file"); -// } -// if (cmdLineInputFiles[0].endsWith("vcf")) { -// logger.info("VCF File: " + cmdLineInputFiles[0]); -// loadVCFFile(); -// inputSnpType = "VCF"; -// if (vcfRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from vcf file"); -// } -// if (cmdLineInputFiles[1].endsWith("bed")) { -// logger.info("BED File: " + cmdLineInputFiles[1]); -// } else if (cmdLineInputFiles[1].endsWith("gff3")) { -// logger.info("GFF3 File: " + cmdLineInputFiles[1]); -// } -// logger.info("Output file: " + cmdLineOutputFiles[0]); -// logger.info("Output file: " + cmdLineOutputFiles[1]); -// -// String fileType = null; -// if (cmdLineInputFiles[1].endsWith("bed")) { -// loadBedFile(); -// fileType = "bed"; -// } else if (cmdLineInputFiles[1].endsWith("gff3")) { -// fileType = "gff3"; -// loadGffFile(); -// } else { -// throw new IllegalArgumentException("File type for reference regions is not recognized. Must be bed or gff3"); -// } -// -// Collections.sort(chromosomes,COMPARATOR); -// -// writeHeader(); -// -// if (fileType.equals("bed")) { -// if (bedRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from bed file"); -// for (String c: chromosomes) { -// if (inputSnpType.equals("MAF")) { -// -// compareBedRecordsToMaf(c, bedRecords.get(c)); -// } -// if (inputSnpType.equals("VCF")) { -// compareBedRecordsToVcf(c, bedRecords.get(c)); -// } -// } -// } -// -// if (fileType.equals("gff3")) { -// if (gffRecords.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); -// for (String c: chromosomes) { -// logger.info("Chromosome: " + c); -// if (inputSnpType.equals("MAF")) { -// compareGFFRecordsToMaf(c, gffRecords.get(c)); -// } -// if (inputSnpType.equals("VCF")) { -// compareGFFRecordsToVcf(c, gffRecords.get(c)); -// } -// } -// } -// -// logger.info("SUMMARY"); -// logger.info("Total Records in " +inputSnpType+ ": " + snpCount); -// logger.info("Total Records in supplied reference regions: " + overlapCount); -// logger.info("Total Records not in supplied reference regions: " + notOverlappingCount); -// return exitStatus; -// } -// -// private void loadVCFFile() throws Exception { -// VCFFileReader reader = new VCFFileReader(new File(cmdLineInputFiles[0])); -// try { -// header = new Vector(); -// Iterator iterator = reader.getRecordIterator(); -// snpCount = 0; -// if (reader.getHeader() != null) { -// Iterator iter = reader.getHeader().iterator(); -// while (iter.hasNext()) { -// header.add(iter.next()); -// } -// } -// while (iterator.hasNext()) { -// -// VCFRecord vcfRec = iterator.next(); -// -// snpCount++; -// if (vcfRecords.containsKey(vcfRec.getChromosome())) { -// vcfRecords.get(vcfRec.getChromosome()).add(vcfRec); -// } else { -// List list = new ArrayList(); -// list.add(vcfRec); -// vcfRecords.put(vcfRec.getChromosome(),list); -// } -// if (!chromosomes.contains(vcfRec.getChromosome())) { -// chromosomes.add(vcfRec.getChromosome()); -// } -// } -// logger.info("loaded maf file, total records: " + snpCount); -// } finally { -// reader.close(); -// } -// } -// -// private void loadMafFile() throws Exception { -// TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); -// try { -// header = new Vector(); -// Iterator iterator = reader.getRecordIterator(); -// snpCount = 0; -// if (reader.getHeader() != null) { -// Iterator iter = reader.getHeader().iterator(); -// while (iter.hasNext()) { -// header.add(iter.next()); -// } -// } -// while (iterator.hasNext()) { -// -// TabbedRecord tab = iterator.next(); -// -// if (tab.getData().startsWith("#") || tab.getData().startsWith("Hugo")) { -// header.add(tab.getData()); -// continue; -// } -// snpCount++; -// MAFRecord mafRec = convertToMafRecord(tab.getData().split("\t")); -// mafRec.setData(tab.getData()); -// if (mafRecords.containsKey(mafRec.getChromosome())) { -// mafRecords.get(mafRec.getChromosome()).add(mafRec); -// } else { -// List list = new ArrayList(); -// list.add(mafRec); -// mafRecords.put(mafRec.getChromosome(),list); -// } -// if (!chromosomes.contains(mafRec.getChromosome())) { -// chromosomes.add(mafRec.getChromosome()); -// } -// } -// logger.info("loaded maf file, total records: " + snpCount); -// } finally { -// reader.close(); -// } -// } -// -// private void loadBedFile() throws IOException { -// BEDFileReader reader = new BEDFileReader(new File(cmdLineInputFiles[1])); -// try { -// int count = 0; -// for (BEDRecord record : reader) { -// count++; -// String chr = record.getChrom(); -// if (inputSnpType.equals("MAF")) { -// chr = record.getChrom().replace("chr", ""); -// } -// if (bedRecords.containsKey(chr)) { -// bedRecords.get(chr).put(record.getChromStart(), record); -// } else { -// TreeMap map = new TreeMap(); -// map.put(record.getChromStart(), record); -// bedRecords.put(chr,map); -// } -// } -// logger.info("loaded bed file, total record: " + count); -// } finally { -// reader.close(); -// } -// -// } -// -// private void loadGffFile() throws Exception { -// GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); -// try { -// int count = 0; -// for (GFF3Record record : reader) { -// count++; -// String chr = record.getSeqId(); -// if (inputSnpType.equals("MAF")) { -// chr = record.getSeqId().replace("chr", ""); -// } -// if (gffRecords.containsKey(chr)) { -// gffRecords.get(chr).put(record.getStart(), record); -// } else { -// TreeMap map = new TreeMap(); -// map.put(record.getStart(), record); -// gffRecords.put(chr,map); -// } -// } -// -// logger.info("loaded gff3 file, total record: " + count); -// } finally { -// reader.close(); -// } -// } -// -// public void compareBedRecordsToVcf(String chromosome, TreeMap map) throws IOException { -// List vcfList = vcfRecords.get(chromosome); -// -// //bed positions are zero based -// if (map != null) { -// -// for (VCFRecord snp : vcfList) { -// Entry floor = map.floorEntry(new Integer(snp.getPosition())); -// Entry ceiling = map.ceilingEntry(new Integer(snp.getPosition())); -// -// if (vcfRecordFallsInBEDRecord(snp, floor) || vcfRecordFallsInBEDRecord(snp, ceiling)) { -// overlapCount++; -// overlappingVcfRecords.add(snp); -// } else { -// notOverlappingCount++; -// notOverlappingVcfRecords.add(snp); -// if (notOverlappingCount % 10000 == 0) { -// logger.info("Processed records: " + notOverlappingCount); -// } -// } -// } -// } else { -// notOverlappingVcfRecords.addAll(vcfList); -// notOverlappingCount += vcfList.size(); -// } -// writeParsedVcfRecords(); -// } -// -// public void compareBedRecordsToMaf(String chromosome, TreeMap map) throws IOException { -// List mafList = mafRecords.get(chromosome); -// -// //bed positions are zero based -// if (map != null) { -// -// for (MAFRecord snp : mafList) { -// -// Entry floor = map.floorEntry(new Integer(snp.getStartPosition())); -// Entry ceiling = map.ceilingEntry(new Integer(snp.getStartPosition())); -// -// if (mafRecordFallsInBEDRecord(snp, floor) || mafRecordFallsInBEDRecord(snp, ceiling)) { -// overlapCount++; -// overlappingMafRecords.add(snp); -// } else { -// notOverlappingCount++; -// notOverlappingMafRecords.add(snp); -// if (notOverlappingCount % 10000 == 0) { -// logger.info("Processed records: " + notOverlappingCount); -// } -// } -// -// } -// } else { -// notOverlappingMafRecords.addAll(mafList); -// notOverlappingCount += mafList.size(); -// } -// writeParsedMafRecords(); -// } -// -// public void compareGFFRecordsToVcf(String chromosome, TreeMap map) throws IOException { -// List vcfList = vcfRecords.get(chromosome); -// -// if (map != null) { -// -// logger.info("List size: " + vcfList.size()); -// for (VCFRecord snp : vcfList) { -// Entry floor = map.floorEntry(new Integer(snp.getPosition())); -// Entry ceiling = map.ceilingEntry(new Integer(snp.getPosition())); -// -// if (vcfRecordFallsInGFF3Record(snp, floor) || vcfRecordFallsInGFF3Record(snp, ceiling)) { -// overlapCount++; -// overlappingVcfRecords.add(snp); -// } else { -// notOverlappingCount++; -// notOverlappingVcfRecords.add(snp); -// if (notOverlappingCount % 10000 == 0) { -// logger.info("Processed records: " + notOverlappingCount); -// } -// } -// } -// } else { -// notOverlappingVcfRecords.addAll(vcfList); -// notOverlappingCount += vcfList.size(); -// } -// writeParsedVcfRecords(); -// } -// -// public void compareGFFRecordsToMaf(String chromosome, TreeMap map) throws IOException { -// List mafList = mafRecords.get(chromosome); -// -// if (map != null) { -// -// for (MAFRecord snp : mafList) { -// -// Entry floor = map.floorEntry(new Integer(snp.getStartPosition())); -// Entry ceiling = map.ceilingEntry(new Integer(snp.getStartPosition())); -// -// if (mafRecordFallsInGFF3Record(snp, floor) || mafRecordFallsInGFF3Record(snp, ceiling)) { -// overlapCount++; -// overlappingMafRecords.add(snp); -// } else { -// notOverlappingCount++; -// notOverlappingMafRecords.add(snp); -// if (notOverlappingCount % 10000 == 0) { -// logger.info("Processed records: " + notOverlappingCount); -// } -// } -// } -// } else { -// notOverlappingMafRecords.addAll(mafList); -// notOverlappingCount += mafList.size(); -// } -// writeParsedMafRecords(); -// } -// -// -// private boolean mafRecordFallsInGFF3Record(MAFRecord snp, Entry entry) { -// if (entry != null) { -// if (snp.getStartPosition() >= entry.getValue().getStart() && snp.getStartPosition() <= entry.getValue().getEnd() || -// snp.getEndPosition() >= entry.getValue().getStart() && snp.getEndPosition() <= entry.getValue().getEnd()) { -// return true; -// } -// } -// return false; -// } -// -// private boolean mafRecordFallsInBEDRecord(MAFRecord snp, Entry entry) { -// if (entry != null) { -// if (snp.getStartPosition() >= entry.getValue().getChromStart()+1 && snp.getStartPosition() <= entry.getValue().getChromEnd() || -// snp.getEndPosition() >= entry.getValue().getChromStart()+1 && snp.getEndPosition() <= entry.getValue().getChromEnd()) { -// return true; -// } -// } -// return false; -// } -// -// private boolean vcfRecordFallsInGFF3Record(VCFRecord snp, Entry entry) { -// if (entry != null) { -// if (snp.getPosition() >= entry.getValue().getStart() && snp.getPosition() <= entry.getValue().getEnd()) { -// return true; -// } -// } -// return false; -// } -// -// private boolean vcfRecordFallsInBEDRecord(VCFRecord snp, Entry entry) { -// if (entry != null) { -// if (snp.getPosition() >= entry.getValue().getChromStart()+1 && snp.getPosition() <= entry.getValue().getChromEnd()) { -// return true; -// } -// } -// return false; -// } -// -// public String[] getCmdLineInputFiles() { -// return cmdLineInputFiles; -// } -// -// public void setCmdLineInputFiles(String[] cmdLineInputFiles) { -// this.cmdLineInputFiles = cmdLineInputFiles; -// } -// -// public String[] getCmdLineOutputFiles() { -// return cmdLineOutputFiles; -// } -// -// public void setCmdLineOutputFiles(String[] cmdLineOutputFiles) { -// this.cmdLineOutputFiles = cmdLineOutputFiles; -// } -// -// private void writeParsedMafRecords() throws IOException { -// writeMafRecordsToFile(cmdLineOutputFiles[0], overlappingMafRecords); -// writeMafRecordsToFile(cmdLineOutputFiles[1], notOverlappingMafRecords); -// } -// -// private void writeParsedVcfRecords() throws IOException { -// writeVcfRecordsToFile(cmdLineOutputFiles[0], overlappingVcfRecords); -// writeVcfRecordsToFile(cmdLineOutputFiles[1], notOverlappingVcfRecords); -// } -// -// private void writeHeader() throws IOException { -// writeHeader(cmdLineOutputFiles[0]); -// writeHeader(cmdLineOutputFiles[1]); -// } -// -// private void writeHeader(String fileName) throws IOException { -// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); -// -// for (String h: header) { -// writer.write(h + "\n"); -// } -// writer.close(); -// } -// -// private void writeMafRecordsToFile(String fileName, -// List outputRecords) throws IOException { -// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); -// -// for (MAFRecord r: outputRecords) { -// writer.write(r.getData() + "\n"); -// } -// -// writer.close(); -// outputRecords.clear(); -// } -// -// private void writeVcfRecordsToFile(String fileName, -// List outputRecords) throws IOException { -// BufferedWriter writer = new BufferedWriter(new FileWriter(new File(fileName), true)); -// -// for (VCFRecord r: outputRecords) { -// writer.write(r.toString() + "\n"); -// } -// -// writer.close(); -// outputRecords.clear(); -// } -// -// -// protected int setup(String args[]) throws Exception{ -// int returnStatus = 1; -// if (null == args || args.length == 0) { -// System.err.println(Messages.USAGE); -// System.exit(1); -// } -// Options options = new Options(args); -// -// if (options.hasHelpOption()) { -// System.err.println(Messages.USAGE); -// options.displayHelp(); -// returnStatus = 0; -// } else if (options.hasVersionOption()) { -// System.err.println(Messages.getVersionMessage()); -// returnStatus = 0; -// } else if (options.getInputFileNames().length < 1) { -// System.err.println(Messages.USAGE); -// } else if ( ! options.hasLogOption()) { -// System.err.println(Messages.USAGE); -// } else { -// // configure logging -// logFile = options.getLogFile(); -// logger = QLoggerFactory.getLogger(SnpToReferenceRegionFilter.class, logFile, options.getLogLevel()); -// logger.logInitialExecutionStats("SnpMafBedFileComparison", SnpToReferenceRegionFilter.class.getPackage().getImplementationVersion(), args); -// -// // get list of file names -// cmdLineInputFiles = options.getInputFileNames(); -// if (cmdLineInputFiles.length < 1) { -// throw new QMuleException("INSUFFICIENT_ARGUMENTS"); -// } else { -// // loop through supplied files - check they can be read -// for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { -// if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { -// throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); -// } -// } -// } -// -// // check supplied output files can be written to -// if (null != options.getOutputFileNames()) { -// cmdLineOutputFiles = options.getOutputFileNames(); -// for (String outputFile : cmdLineOutputFiles) { -// if ( ! FileUtils.canFileBeWrittenTo(outputFile)) -// throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); -// } -// } -// return engage(); -// } -// return returnStatus; -// } -// -// -// public static void main(String[] args) throws Exception { -// SnpToReferenceRegionFilter sp = new SnpToReferenceRegionFilter(); -// int exitStatus = sp.setup(args); -// if (null != logger) -// logger.logFinalExecutionStats(exitStatus); -// -// System.exit(exitStatus); -// } -// -// public static MAFRecord convertToMafRecord(String[] params) { -// MAFRecord maf = new MAFRecord(); -//// System.out.println(params[0]); -//// maf.setHugoSymbol(params[0]); -//// maf.setEntrezGeneId(params[1]); -//// maf.setCenter(params[2]); -//// maf.setNcbiBuild(Integer.parseInt(params[3])); -// maf.setChromosome(params[0]); -// maf.setStartPosition(Integer.parseInt(params[1])); -// maf.setEndPosition(Integer.parseInt(params[1])); -//// maf.setStrand(params[7].charAt(0)); -//// maf.setVariantClassification(params[8]); -//// maf.setVariantType(params[9]); -//// maf.setRef(params[10]); -//// maf.setTumourAllele1(params[11]); -//// maf.setTumourAllele2(params[12]); -//// maf.setDbSnpId(params[13]); -//// maf.setDbSnpValStatus(params[14]); -//// maf.setTumourSampleBarcode(params[15]); -//// maf.setNormalSampleBarcode(params[16]); -//// maf.setNormalAllele1(params[17]); -//// maf.setNormalAllele2(params[18]); -//// maf.setTumourValidationAllele1(params[19]); -//// maf.setTumourValidationAllele2(params[20]); -//// maf.setNormalValidationAllele1(params[21]); -//// maf.setNormalValidationAllele2(params[22]); -//// maf.setVerificationStatus(params[23]); -//// maf.setValidationStatus(params[24]); -//// maf.setMutationStatus(params[25]); -//// maf.setSequencingPhase(params[26]); -//// maf.setSequencingSource(params[27]); -//// maf.setValidationMethod(params[28]); -//// maf.setScore(params[29]); -//// maf.setBamFile(params[30]); -//// maf.setSequencer(params[31]); -//// // QCMG -//// if (params.length > 32) -//// maf.setFlag(params[32]); -//// if (params.length > 33) -//// maf.setNd(params[33]); -//// if (params.length > 34) -//// maf.setTd(params[34]); -//// if (params.length > 35) -//// maf.setCanonicalTranscriptId(params[35]); -//// if (params.length > 36) -//// maf.setCanonicalAAChange(params[36]); -//// if (params.length > 37) -//// maf.setCanonicalBaseChange(params[37]); -//// if (params.length > 38) -//// maf.setAlternateTranscriptId(params[38]); -//// if (params.length > 39) -//// maf.setAlternateAAChange(params[39]); -//// if (params.length > 40) -//// maf.setAlternateBaseChange(params[40]); -// -// return maf; -// } -// -// public List getChromosomes() { -// return chromosomes; -// } -// -// public void setChromosomes(List chromosomes) { -// this.chromosomes = chromosomes; -// } -// -// public Map> getMafRecords() { -// return mafRecords; -// } -// -// public void setMafRecords(Map> mafRecords) { -// this.mafRecords = mafRecords; -// } -// -// public List getOverlappingRecords() { -// return overlappingMafRecords; -// } -// -// public void setOverlappingRecords(List overlappingRecords) { -// this.overlappingMafRecords = overlappingRecords; -// } -// -// public List getNotOverlappingRecords() { -// return notOverlappingMafRecords; -// } -// -// public void setNotOverlappingRecords(List notOverlappingRecords) { -// this.notOverlappingMafRecords = notOverlappingRecords; -// } -// -// public int getOverlapCount() { -// return overlapCount; -// } -// -// public void setOverlapCount(int overlapCount) { -// this.overlapCount = overlapCount; -// } -// -// public int getNotOverlappingCount() { -// return notOverlappingCount; -// } -// -// public void setNotOverlappingCount(int notOverlappingCount) { -// this.notOverlappingCount = notOverlappingCount; -// } -// -// public int getMafCount() { -// return snpCount; -// } -// -// public void setMafCount(int mafCount) { -// this.snpCount = mafCount; -// } -// -// -// -//} diff --git a/qmule/src/org/qcmg/qmule/SubSample.java-- b/qmule/src/org/qcmg/qmule/SubSample.java-- deleted file mode 100644 index 63f71a737..000000000 --- a/qmule/src/org/qcmg/qmule/SubSample.java-- +++ /dev/null @@ -1,165 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.ArrayList; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.picard.HeaderUtils; -import org.qcmg.picard.SAMFileReaderFactory; - -import htsjdk.samtools.*; - -public class SubSample { - SamReader reader; - SAMFileWriter writer; - double proportion; - QLogger logger; - - SubSample(Options op, QLogger log) throws Exception{ - - proportion = op.getPROPORTION(); - logger = log; - - String[] inputs =op.getInputFileNames(); - String[] outputs =op.getOutputFileNames(); - if(inputs.length == 0 || outputs.length == 0) - throw new Exception("please specify input/output"); - - //get initialized logger - File input = new File(inputs[0]); - File output = new File(outputs[0]); - if(!input.canRead()) - throw new Exception("unreadable input: " + input.getAbsolutePath()); - - reader = SAMFileReaderFactory.createSAMFileReader(input,ValidationStringency.LENIENT); - SAMFileHeader header = reader.getFileHeader(); - if(header.getSortOrder() != SAMFileHeader.SortOrder.queryname){ - throw new Exception("the input BAM is not sorted by queryname"); - } - SAMFileWriterFactory writeFactory = new SAMFileWriterFactory(); - HeaderUtils.addProgramRecord(header, op.getCommandLine(), null ); - - writer = writeFactory.makeSAMOrBAMWriter(header, false, output ); - - - } - - void run() throws Exception{ - int numPair = 0; - int numSingle = 0; - int numtotal = 0; - SAMRecordIterator ie = reader.iterator(); - ArrayList adjacents = new ArrayList(); - adjacents.add(ie.next()); - - while(ie.hasNext()){ - numtotal ++; - SAMRecord record = ie.next(); - - //select reads - if(! record.getReadName().equals(adjacents.get(0).getReadName())){ - //select pairs - if(adjacents.size() > 1) - numPair += selectPair( adjacents); - //select single - else if(Math.random() < proportion ){ - writer.addAlignment(adjacents.get(0)); - numSingle ++; - } - //after reporting clear the arraylist - adjacents.clear(); - } - adjacents.add(record); - - } - - //select last records - if(adjacents.size() > 1) - selectPair( adjacents); - else if(Math.random() < proportion ) - writer.addAlignment(adjacents.get(0)); - - reader.close(); - writer.close(); - - logger.info("total reads in input is " + numtotal); - logger.info("select paired reads is " + numPair); - logger.info("select single reads is " + numSingle); - logger.info("the rate of selected reads is "+ ((double)(numPair + numSingle)) / numtotal); - - } - - private int selectPair(ArrayList pairs) { - - if(pairs.size() == 0 ){ - logger.error("Program Error: select reads from empty arraylist! "); - return 0; - } - if(pairs.size() == 1 ){ - logger.error("program Error: single read in paired arraylist -- " + pairs.get(0).getReadName()); - return 0; - } - - int num = 0; - while(pairs.size() >= 2){ - //seek pair one by one - SAMRecord first = pairs.get(0); - SAMRecord mate = null; - pairs.remove(first); - - for(int i = 0; i < pairs.size(); i ++){ - if(first.getReadGroup().getId().equals(pairs.get(i).getReadGroup().getId())){ - mate = pairs.get(i); - pairs.remove(mate); - break; - } - } - - - if(Math.random() < proportion ){ - num ++; //number of selected paired reads - writer.addAlignment(first); - if(mate != null){ - num ++; - writer.addAlignment(mate); - }else{ - logger.error("paired reads missing mate -- " + pairs.get(0).getReadName()); - } - } - } - - return num; - } - - public static void main(String[] args) throws Exception{ - Options op = new Options(SubSample.class, args); - if(op.hasHelpOption()){ - System.out.println(Messages.getMessage("USAGE_SUBSAMPLE")); - op.displayHelp(); - System.exit(0); - } - - String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - QLogger logger = QLoggerFactory.getLogger(SubSample.class, op.getLogFile(), op.getLogLevel()); - try{ - logger.logInitialExecutionStats(SubSample.class.toString(), version, args); - logger.exec("Porportion " + op.getPROPORTION()); - SubSample mySample = new SubSample(op, logger); - mySample.run(); - logger.logFinalExecutionStats(0); - System.exit(0); - }catch(Exception e){ - System.err.println( e.getMessage() + e.toString()); - logger.logFinalExecutionStats(-1); - System.exit(1); - } - } - -} diff --git a/qmule/src/org/qcmg/qmule/TestFileFinder.java-- b/qmule/src/org/qcmg/qmule/TestFileFinder.java-- deleted file mode 100644 index 28da0aa08..000000000 --- a/qmule/src/org/qcmg/qmule/TestFileFinder.java-- +++ /dev/null @@ -1,23 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; - -import org.qcmg.common.util.FileUtils; - -public class TestFileFinder { - public static void main(String[] args) { - File [] files = FileUtils.findDirectories(args[0], "seq_final", true); - System.out.println("no of files: " + files.length); - for (File f : files) { - System.out.println("file found: " + f.getAbsolutePath()); - } -// File [] files = FileUtils.findFiles(args[0], "java", true); -// System.out.println("no of files: " + files.length); -// for (File f : files) { -// System.out.println("file found: " + f.getAbsolutePath()); -// } - } -} diff --git a/qmule/src/org/qcmg/qmule/TestJarUpdate.java-- b/qmule/src/org/qcmg/qmule/TestJarUpdate.java-- deleted file mode 100644 index c1937f55a..000000000 --- a/qmule/src/org/qcmg/qmule/TestJarUpdate.java-- +++ /dev/null @@ -1,191 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.OutputStream; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.List; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMFileWriter; -import htsjdk.samtools.SAMFileWriterFactory; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.picard.SAMFileReaderFactory; - -public class TestJarUpdate { - - private SAMFileWriter writer; - private SamReader reader; - - private void doWork() throws Exception{ - try { - - LoadReferencedClasses.loadClasses(getClass()); - -// URL className = getClass().getResource(TestJarUpdate.class.getName()); -// if (null != className) -// System.out.println("url: " + className.getFile()); -// else -// System.out.println("url: " + null); -// -// File jarFile = new File(TestJarUpdate.class.getProtectionDomain().getCodeSource().getLocation().toURI()); -// if (null != jarFile) -// System.out.println("jarFile: " + jarFile.getName()); -// else -// System.out.println("jarFile: " + null); -// -// System.out.println("is file type valid jar: " + FileUtils.isFileTypeValid(jarFile, "jar")); -// -// System.out.println("BEFORE: no of loaded packages: " + Package.getPackages().length); -// -// if (FileUtils.isFileTypeValid(jarFile, "jar")) { -// -// // got jar file - load and -// JarFile jf = new JarFile(jarFile); -// Attributes att = jf.getManifest().getMainAttributes(); -// System.out.println("att.size" + att.size()); -// String classpath = att.getValue("Class-Path"); -// System.out.println("classpath: " + classpath); -// -// String [] jars = classpath.split(" "); -// for (String jar : jars) { -// JarFile internalJarFile = new JarFile(jar); -// Enumeration enums = internalJarFile.entries(); -// while (enums.hasMoreElements()) { -// JarEntry je = enums.nextElement(); -// if (FileUtils.isFileTypeValid(je.getName(), "class")) { -// String blah = je.getName().replace(".class", ""); -// blah = blah.replaceAll(System.getProperty("file.separator"), "."); -// System.out.println("about to load class: " + blah); -// this.getClass().getClassLoader().loadClass(blah); -// } -// } -// } -// -// } -// -// System.out.println("AFTER: no of loaded packages: " + Package.getPackages().length); - - - // write to bam file - // sleep for a few mins to allow the sam jar file to be removed/replaced - // close bam file - // tinker with class loader - File inputFile = File.createTempFile("testJarUpdateInput", ".sam"); - inputFile.deleteOnExit(); - File outputFile = File.createTempFile("testJarUpdateOutput", ".bam"); -// outputFile.deleteOnExit(); - - createCoverageSam(inputFile); - - reader = SAMFileReaderFactory.createSAMFileReader(inputFile); - - SAMFileHeader header = reader.getFileHeader(); - List recs = new ArrayList(); - - for( SAMRecord rec : reader) { - recs.add(rec); - } - - - SAMFileWriterFactory factory = new SAMFileWriterFactory(); - - writer = factory.makeSAMOrBAMWriter(header, true, outputFile); - -// for (int i = 0 ; i < 100 ; i++) - for( SAMRecord rec : recs) { - for (int i = 0 ; i < 100 ; i++) - writer.addAlignment(rec); - } - - System.out.println("About to sleep!"); - System.gc(); - Thread.sleep(60000); - System.out.println("Am awake now"); - - close(); - System.out.println("DONE!!!"); - } finally { - System.out.println("about to run close quietly"); - closeQuietly(); - System.out.println("DONE!!! again"); - } - } - - - public static void main(String[] args) throws Exception { - TestJarUpdate tju = new TestJarUpdate(); - tju.doWork(); - } - - - private void close() throws Exception { - try { - writer.close(); - reader.close(); - } catch (Exception e) { - System.out.println("Exception caught in close(): "); -// e.printStackTrace(); - throw new Exception("CANNOT_CLOSE_FILES"); - } - } - - private void closeQuietly() { - try { - close(); - } catch (Exception e) { -// e.printStackTrace(); - } - } - - public static final void createCoverageSam(final File fileName) throws Exception { - - OutputStream os = new FileOutputStream(fileName); - PrintStream ps = new PrintStream(os); - - ps.println("@HD VN:1.0 SO:coordinate"); - ps.println("@RG ID:ZZ SM:ES DS:rl=50 "); - ps.println("@RG ID:ZZZ SM:ES DS:rl=50 "); - ps.println("@PG ID:SOLID-GffToSam VN:1.4.3"); - ps.println("@SQ SN:chr1 LN:249250621"); - ps.println("@SQ SN:chr2 LN:243199373"); - ps.println("@SQ SN:chr3 LN:198022430"); - ps.println("@SQ SN:chr4 LN:191154276"); - ps.println("@SQ SN:chr5 LN:180915260"); - ps.println("@SQ SN:chr6 LN:171115067"); - ps.println("@SQ SN:chr7 LN:159138663"); - ps.println("@SQ SN:chr8 LN:146364022"); - ps.println("@SQ SN:chr9 LN:141213431"); - ps.println("@SQ SN:chr10 LN:135534747"); - ps.println("@SQ SN:chr11 LN:135006516"); - ps.println("@SQ SN:chr12 LN:133851895"); - ps.println("@SQ SN:chr13 LN:115169878"); - ps.println("@SQ SN:chr14 LN:107349540"); - ps.println("@SQ SN:chr15 LN:102531392"); - ps.println("@SQ SN:chr16 LN:90354753"); - ps.println("@SQ SN:chr17 LN:81195210"); - ps.println("@SQ SN:chr18 LN:78077248"); - ps.println("@SQ SN:chr19 LN:59128983"); - ps.println("@SQ SN:chr20 LN:63025520"); - ps.println("@SQ SN:chr21 LN:48129895"); - ps.println("@SQ SN:chr22 LN:51304566"); - ps.println("@SQ SN:chrX LN:155270560"); - ps.println("@SQ SN:chrY LN:59373566"); - ps.println("@SQ SN:chrM LN:16571"); - ps.println("1290_738_1025 0 chr1 54026 255 45M5H * 0 0 AACATTCCAAAAGTCAACCATCCAAGTTTATTCTAAATAGATGTG !DDDDDDDDDDDDDDDD''DDDDDD9DDDDDDDDD:<3B''DDD! RG:Z:ZZ CS:Z:T301130201000212101113201021003302230033233111 CQ:Z:BBB=B:@5?>B9A5?>B?'A49<475%@;6<+;9@'4)+8'1?:>"); - ps.println("2333_755_492 16 chr2 10103 255 10H40M * 0 0 CACACCACACCCACACACCACACACCACACCCACACCCAC !=DD?%+DD<)=DDD<@9)9C:DA.:DD>%%,?+%;<-1"); - ps.println("1879_282_595 0 chr3 60775 255 40M10H * 0 0 TCTAAATTTGTTTGATCACATACTCCTTTTCTGGCTAACA !DD,*@DDD''DD>5:DD>;DDDD=CDD8%%DA9-DDC0! RG:Z:ZZ CS:Z:T0223303001200123211133122020003210323011 CQ:Z:=><=,*7685'970/'437(4<:54*:84%%;/3''?;)("); - ps.close(); - os.close(); - } -} diff --git a/qmule/src/org/qcmg/qmule/TestSort.java-- b/qmule/src/org/qcmg/qmule/TestSort.java-- deleted file mode 100644 index cf9faddb6..000000000 --- a/qmule/src/org/qcmg/qmule/TestSort.java-- +++ /dev/null @@ -1,109 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.IOException; -import java.net.InetAddress; -import java.text.SimpleDateFormat; -import java.util.Calendar; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMFileWriter; -import htsjdk.samtools.SAMFileWriterFactory; -import htsjdk.samtools.SAMRecord; - -import org.qcmg.picard.SAMFileReaderFactory; - -public class TestSort { - private final File input; - private final File output; - private final int maxRecordsInRam; - private SAMFileHeader.SortOrder sort = SAMFileHeader.SortOrder.unsorted; - - TestSort(final String[] args) throws Exception{ - input = new File(args[0]); - output = new File(args[1]); - maxRecordsInRam = Integer.parseInt(args[2]); - - String sortOrder = args[3]; - if(sortOrder.equalsIgnoreCase("coordinate")) - sort = SAMFileHeader.SortOrder.coordinate; - else if(sortOrder.equalsIgnoreCase("queryname")) - sort = SAMFileHeader.SortOrder.queryname; - else if(! sortOrder.equalsIgnoreCase("unsorted")) - throw new Exception( sortOrder + " isn't valid SAMFileHeader sort order!"); - - System.out.println(getTime() + " host: " + InetAddress.getLocalHost().getHostName()); - System.out.println(getTime() + " input: " + input.getAbsolutePath()); - System.out.println(getTime() + " output: " + output.getAbsolutePath()); - System.out.println(getTime() + " sort order: " + sortOrder); - System.out.println(getTime() + " max Records In RAM: " + maxRecordsInRam); - } - - public void Sorting() throws Exception{ - SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); - SAMFileHeader header = reader.getFileHeader(); - - SAMFileWriterFactory writeFactory = new SAMFileWriterFactory(); - htsjdk.samtools.SAMFileWriterImpl.setDefaultMaxRecordsInRam(maxRecordsInRam ); - header.setSortOrder(sort); - if(sort.equals(SAMFileHeader.SortOrder.coordinate)) - writeFactory.setCreateIndex(true); - final SAMFileWriter writer = writeFactory.makeSAMOrBAMWriter(header, false, output); - - int num = 0; - for (SAMRecord record : reader) { - if(num % maxRecordsInRam == 0) - printRunInfo(num); - - writer.addAlignment(record); - num ++; - } - -// System.out.println(getTime() + " Merging tmp into output BAM, tmp location are " + htsjdk.samtools.util.IOUtil.getDefaultTmpDir()); - reader.close(); - writer.close(); - - System.out.println(getTime() + " created output: " + output.getAbsolutePath()); - } - - private void printRunInfo(int number) throws IOException{ - Runtime runtime = Runtime.getRuntime(); - int mb = 1024 * 1024; - long totalRAM = runtime.totalMemory() / mb; - long usedRAM = (runtime.totalMemory() - runtime.freeMemory()) / mb; - - String dateNow = getTime(); - - String info = String.format("%s read %d record. Total memeory: %dM, used memory: %dM", - dateNow, number, totalRAM, usedRAM); - - System.out.println(info); - } - - private String getTime(){ - Calendar currentDate = Calendar.getInstance(); - SimpleDateFormat formatter= new SimpleDateFormat("yyyy/MMM/dd HH:mm:ss"); - return "[" + formatter.format(currentDate.getTime()) + "]"; - } - - public static void main(final String[] args) { - try{ - TestSort mysort = new TestSort(args); - mysort.Sorting(); - System.exit(0); - }catch(Exception e){ - System.err.println("usage:qmule.TestSort [queryname/coordinate/unsorted]"); - System.err.println(e.toString()); - System.exit(1); - } - - - } -} diff --git a/qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- b/qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- deleted file mode 100644 index c9b4f95f5..000000000 --- a/qmule/src/org/qcmg/qmule/TranscriptomeMule.java-- +++ /dev/null @@ -1,192 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.PileupElement; -import org.qcmg.common.string.StringUtils; -import org.qcmg.common.util.Constants; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.PileupUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.picard.util.PileupElementUtil; -import org.qcmg.pileup.PileupFileReader; - -public class TranscriptomeMule { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - private final static int MIN_COVERAGE = 3; - // assuming all the tumours have been merged together, and we only have a single entry -// private static int[] tumourStartPositions = null; - private int[] tumourStartPositions = null; - - private final List positions = new ArrayList<>(100000); - - private static QLogger logger; - - public int engage() throws Exception { - logger.info("loading samtools mpileup data"); - walkPileup(cmdLineInputFiles[0]); - logger.info("loading samtools mpileup data - DONE [" + positions.size() + "]"); - - logger.info("outputting data"); - writeOutput(cmdLineOutputFiles[0]); - logger.info("outputting data - DONE"); - - return exitStatus; - } - - private void writeOutput(String outputFile) throws IOException { - FileWriter writer = new FileWriter(outputFile); - String header = "chr\tposition\tref\tpileup"; - - try { - writer.write(header + "\n"); - for (StringBuilder sb : positions) { - writer.write(sb.toString() + Constants.NEW_LINE); - } -// for (QSnpRecord record : positions) -// writer.write(record.getChromosome() + "\t" -// + record.getPosition() + "\t" -// + record.getRef() + "\t" -// + record.getTumourNucleotides() + "\n"); - } finally { - writer.close(); - } - } - - private void parsePileup(String record) { -// private void parsePileup(PileupRecord record) { - String[] params = TabTokenizer.tokenize(record); -// String[] params = tabbedPattern.split(record.getPileup(), -1); - if (null == tumourStartPositions) { - // set up the number of tumour start positions - // dependent on the number of columns in the input - // HUGE assumption that the mpileup data only contains tumour data here... - - //TODO is this right? - // first 3 columns are chr pos ref - int noOfSamples = (params.length -3) /3; - tumourStartPositions = new int[noOfSamples]; - for (int i = 0 ; i < noOfSamples ; i++) { - tumourStartPositions[i] = (i+1) * 3; - } - } - - // get coverage for both normal and tumour - int tumourCoverage = PileupUtils.getCoverageCount(params, tumourStartPositions); - if (tumourCoverage < MIN_COVERAGE) return; - - String tumourBases = PileupUtils.getBases(params, tumourStartPositions); - - // means there is an indel at this position - ignore - if (tumourBases.contains("+") || tumourBases.contains("-")) return; - String tumourBaseQualities = PileupUtils.getQualities(params, tumourStartPositions); - - // get bases as PileupElement collections - List tumourBaseCounts = PileupElementUtil.getPileupCounts(tumourBases, tumourBaseQualities); - - // get variant count for both - int tumourVariantCount = PileupElementUtil.getLargestVariantCount(tumourBaseCounts); - - if (tumourVariantCount >= 3) { - // keeper - StringBuilder sb = new StringBuilder(params[0]); - StringUtils.updateStringBuilder(sb, params[1], Constants.TAB); - StringUtils.updateStringBuilder(sb, params[2], Constants.TAB); - StringUtils.updateStringBuilder(sb, PileupElementUtil.getOABS(tumourBaseCounts, params[2].charAt(0)), Constants.TAB); - -// QSnpRecord rec = new QSnpRecord(params[0], Integer.parseInt(params[1]), params[2]); -// rec.setTumourOABS(PileupElementUtil.getOABS(tumourBaseCounts, rec.getRef().charAt(0))); - positions.add(sb); - } - - } - - private void walkPileup(String pileupFileName) throws Exception { - PileupFileReader reader = new PileupFileReader(new File(pileupFileName)); - int count = 0; - try { - for (String record : reader) { -// for (PileupRecord record : reader) { - parsePileup(record); - if (++count % 1000000 == 0) - logger.info("hit " + count + " pileup records, with " + positions.size() + " keepers."); - } - } finally { - reader.close(); - } - } - - public static void main(String[] args) throws Exception { - TranscriptomeMule sp = new TranscriptomeMule(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(TranscriptomeMule.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("Example", TranscriptomeMule.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- b/qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- deleted file mode 100644 index 222727290..000000000 --- a/qmule/src/org/qcmg/qmule/WiggleFromPileup.java-- +++ /dev/null @@ -1,302 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.regex.Pattern; -import java.util.zip.GZIPOutputStream; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ReferenceNameComparator; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.PileupUtils; -import org.qcmg.qmule.gff3.GFF3FileReader; -import org.qcmg.qmule.gff3.GFF3Record; -import org.qcmg.qmule.gff3.GFF3RecordChromosomeAndPositionComparator; -import org.qcmg.pileup.PileupFileReader; - -public class WiggleFromPileup { - - private final static Pattern tabbedPattern = Pattern.compile("[\\t]"); - private boolean compressOutput; - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - private String pileupFormat; - private int normalCoverage, tumourCoverage; - private int noOfNormalFiles, noOfTumourFiles; - private long covGood, covBad, totalCov; - private int[] normalStartPositions, tumourStartPositions; - private String currentChromosome = "chr1"; - - private int lastPosition; - - private final List gffs = new ArrayList(); - - private static GFF3Record gffRecord; - private static Iterator iter; - - private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); - private final static GFF3RecordChromosomeAndPositionComparator CHR_POS_COMP = new GFF3RecordChromosomeAndPositionComparator(); - - - private static QLogger logger; - - public int engage() throws Exception { - - // setup - initialise(); - - loadGffFile(); - - Collections.sort(gffs, CHR_POS_COMP); - - if (gffs.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); - - // parse pileup file - parsePileup(); - - logger.info("bases with enough coverage: " + covGood + ", those with not enough coverage: " + covBad + ", total: " + totalCov); - - return exitStatus; - } - - private void loadGffFile() throws Exception { - GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); - try { - int totalNoOfbaits = 0, ignoredBaits = 0; - for (GFF3Record record : reader) { - totalNoOfbaits++; - if (isGff3RecordBait(record.getType())) { - gffs.add(record); - } else ignoredBaits++; - } - - logger.info("loaded gff3 file, total no of baits: " + totalNoOfbaits + ", entries in collection: " + gffs.size() + ", entries that didn't make it: " + ignoredBaits); - } finally { - reader.close(); - } - } - - protected static boolean isGff3RecordBait(String type) { - return "exon".equals(type); - } -// protected static boolean isGff3RecordBait(String type) { -// return "bait_1_100".equals(type) -// || "bait".equals(type) -// || "highbait".equals(type) -// || "lowbait".equals(type); -// } - - private void initialise() { - noOfNormalFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'N'); - noOfTumourFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'T'); - normalStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, true); - tumourStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, false); - -// logger.info("start positions: " + Arrays.deepToString(normalStartPositions) + ", " + Arrays.deepToString(tumourStartPositions)); - } - - private void parsePileup() throws Exception { - Writer writer = getWriter(cmdLineOutputFiles[0]); - - iter = gffs.iterator(); - if (iter.hasNext()) { - setGffRecord(iter.next()); - } else { - throw new RuntimeException("Unable to set next Gff record"); - } - - PileupFileReader reader = new PileupFileReader(new File(cmdLineInputFiles[0])); - StringBuilder sb = new StringBuilder(); - try { - for (String pr : reader) { -// for (PileupRecord pr : reader) { - addWiggleData(pr, sb); -// addWiggleData(tabbedPattern.split(pr.getPileup(), -1), sb); - if (++totalCov % 100000 == 0 && sb.length() > 0) { - writer.write(sb.toString()); - sb = new StringBuilder(); - - if (totalCov % 10000000 == 0) - logger.info("hit " + totalCov + " pileup records"); - } - } - - // empty contents of StringBuilder to writer - if (sb.length() > 0) writer.write(sb.toString()); - - } finally { - writer.close(); - reader.close(); - } - } - - private Writer getWriter(String fileName) throws IOException { - Writer writer = null; - if (compressOutput) { - writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(fileName))); - } else { - writer = new FileWriter(new File(fileName)); - } - return writer; - } - - protected static boolean isPositionInBait(String chromosome, int position, Iterator iter, GFF3Record currentRecord) { - - if (chromosome.equals(currentRecord.getSeqId())) { - - if (position < currentRecord.getStart()) { - return false; - } else if (position <= currentRecord.getEnd()) { - return true; - } else { - return advanceGff3Record(chromosome, position, iter); - } - } else if (COMPARATOR.compare(chromosome, currentRecord.getSeqId()) < 0) { - // pileup position is in lower chromosome than gffRecord - return false; - } else { - // pileup position is in higher chromosome than gffRecord - // advance iterator - return advanceGff3Record(chromosome, position, iter); - } - } - - private static boolean advanceGff3Record(String chromosome, int position, - Iterator iter) { - if ( ! iter.hasNext()) { - // no more entries in gffs - return false; - } else { - setGffRecord(iter.next()); - return isPositionInBait(chromosome, position, iter, getGffRecord()); - } - } - - private void addWiggleData(String paramString, StringBuilder sb) { - int firstTabIndex = paramString.indexOf('\t'); - String chromosome = paramString.substring(0, firstTabIndex); - int position = Integer.parseInt(paramString.substring(firstTabIndex+1, paramString.indexOf('\t', firstTabIndex+1))); - - if ( ! isPositionInBait(chromosome, position, iter, getGffRecord())) return; -// if ( ! isPositionInBait(chromosome, position)) return; - - if (position != lastPosition +1 || ! currentChromosome.equalsIgnoreCase(chromosome)) { - // add new header to the StringBuilder - String wiggleHeader = "fixedStep chrom=" + chromosome + " start=" + position + " step=1\n"; - sb.append(wiggleHeader); - - // update last position and current chromosome - currentChromosome = chromosome; - } - lastPosition = position; - String [] params = tabbedPattern.split(paramString, -1); - - if (PileupUtils.getCoverageCount(params, normalStartPositions) < normalCoverage) { - sb.append("0\n"); - ++covBad; - } else { - if (PileupUtils.getCoverageCount(params, tumourStartPositions) >= tumourCoverage) { - sb.append("1\n"); - ++covGood; - } else { - sb.append("0\n"); - ++covBad; - } - } - } - - public static void main(String[] args) throws Exception { - WiggleFromPileup sp = new WiggleFromPileup(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(WiggleFromPileup.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("WiggleFromPileup", WiggleFromPileup.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - // get app specific options - pileupFormat = options.getPileupFormat(); - normalCoverage = options.getNormalCoverage(); - tumourCoverage = options.getTumourCoverage(); - compressOutput = FileUtils.isFileNameGZip(new File(cmdLineOutputFiles[0])); - - if (null == pileupFormat) throw new IllegalArgumentException("Please specify a pileupFormat relating to the pileup file (eg. NNTT)"); - if (normalCoverage == 0) throw new IllegalArgumentException("Please specify a normal coverage value (eg. 20)"); - if (tumourCoverage == 0) throw new IllegalArgumentException("Please specify a tumour coverage value (eg. 20)"); - - logger.tool("about to run with pileupFormat: " + pileupFormat + ", normal cov: " + normalCoverage + ", tumour cov: " + tumourCoverage + ", compressOutput: " + compressOutput); - - return engage(); - } - return returnStatus; - } - - protected static void setGffRecord(GFF3Record gffRecord) { - WiggleFromPileup.gffRecord = gffRecord; - } - - protected static GFF3Record getGffRecord() { - return gffRecord; - } -} diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- b/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- deleted file mode 100644 index 36c6a7a8f..000000000 --- a/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java-- +++ /dev/null @@ -1,307 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; -import java.util.PriorityQueue; -import java.util.zip.GZIPOutputStream; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.PositionRange; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.LoadReferencedClasses; -import org.qcmg.common.util.PileupUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.qmule.gff3.GFF3FileReader; -import org.qcmg.qmule.gff3.GFF3Record; -import org.qcmg.pileup.PileupFileReader; - -public class WiggleFromPileupTakeTwo { - - private static QLogger logger; -// private final static Pattern tabbedPattern = Pattern.compile("[\\t]"); -// private final static ReferenceNameComparator COMPARATOR = new ReferenceNameComparator(); - - private boolean compressOutput; - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - private String pileupFormat; - private int normalCoverage, tumourCoverage; - private int noOfNormalFiles, noOfTumourFiles; - private long covGood, covBad, totalCov; - private int[] normalStartPositions, tumourStartPositions; - private String currentChromosome; - private String[] gffRegions; - private PriorityQueue currentQueue; - private PositionRange currentRange; - private int lastPosition; - private final Map> regionsOfInterest = new HashMap>(); - - - public int engage() throws Exception { - // setup - initialise(); - - loadGffFile(); - - logger.info("no of entries in regionsOfInterest: " + regionsOfInterest.size()); - - long baseCount = 0; - for (PriorityQueue ranges : regionsOfInterest.values()) { - for (PositionRange pr : ranges) { - baseCount += (pr.getEnd() - pr.getStart()); - } - } - logger.info("total no of bases covered by gff regions of interest: " + baseCount); - - - if (regionsOfInterest.isEmpty()) throw new IllegalArgumentException("No positions loaded from gff3 file"); - - // parse pileup file - parsePileup(); - - logger.info("bases with enough coverage: " + covGood + ", those with not enough coverage: " + covBad + ", total: " + totalCov); - - return exitStatus; - } - - private void loadGffFile() throws Exception { - GFF3FileReader reader = new GFF3FileReader(new File(cmdLineInputFiles[1])); - try { - int totalNoOfbaits = 0, ignoredBaits = 0; - for (GFF3Record record : reader) { - totalNoOfbaits++; - if (isGff3RecordCorrectType(record.getType())) { - populateRegionsOfInterest(record); - } else ignoredBaits++; - } - - logger.info("loaded gff3 file, total no of baits: " + totalNoOfbaits + ", entries in collection: " + (totalNoOfbaits - ignoredBaits) + ", entries that didn't make it: " + ignoredBaits); - } finally { - reader.close(); - } - } - - private void populateRegionsOfInterest(GFF3Record record) { - // get collection corresponding to chromosome - PriorityQueue ranges = regionsOfInterest.get(record.getSeqId()); - if (null == ranges) { - ranges = new PriorityQueue(); - ranges.add(new PositionRange(record.getStart(), record.getEnd())); - regionsOfInterest.put(record.getSeqId(), ranges); - } else { - // loop through PositionRanges and see if any are adjacent - // not very efficient, but will do for now - boolean rangeExtended = false; - for (PositionRange pr : ranges) { - if (pr.isAdjacentToEnd(record.getStart())) { - pr.extendRange(record.getEnd()); - rangeExtended = true; - break; - } - } - if ( ! rangeExtended) { - // add new PositionRange - ranges.add(new PositionRange(record.getStart(), record.getEnd())); - } - } - } - - protected boolean isGff3RecordCorrectType(String type) { - for (String regionName : gffRegions) { - if (type.equals(regionName)) return true; - } - return false; - } - - private void initialise() { - noOfNormalFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'N'); - noOfTumourFiles = PileupUtils.getNoOfFilesFromPileupFormat(pileupFormat, 'T'); - normalStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, true); - tumourStartPositions = PileupUtils.getStartPositions(noOfNormalFiles, noOfTumourFiles, false); - } - - private void parsePileup() throws Exception { - Writer writer = getWriter(cmdLineOutputFiles[0]); - - PileupFileReader reader = new PileupFileReader(new File(cmdLineInputFiles[0])); - StringBuilder sb = new StringBuilder(); - try { - for (String pr : reader) { -// for (PileupRecord pr : reader) { - addWiggleData(pr, sb); -// addWiggleData(tabbedPattern.split(pr.getPileup(), -1), sb); - if (++totalCov % 100000 == 0 && sb.length() > 0) { - writer.write(sb.toString()); - sb = new StringBuilder(); - - if (totalCov % 10000000 == 0) - logger.info("hit " + totalCov + " pileup records"); - } - } - - // empty contents of StringBuilder to writer - if (sb.length() > 0) writer.write(sb.toString()); - - } finally { - writer.close(); - reader.close(); - } - } - - private Writer getWriter(String fileName) throws IOException { - Writer writer = null; - if (compressOutput) { - writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(fileName))); - } else { - writer = new FileWriter(new File(fileName)); - } - return writer; - } - - protected boolean isPositionInRegionOfInterest(int position, PriorityQueue ranges) { - if (null == currentRange) return false; - - if (position < currentRange.getStart()) { - return false; - } else if (position <= currentRange.getEnd()) { - return true; - } else { - // advance queue - currentRange = ranges.poll(); - return isPositionInRegionOfInterest(position, ranges); - } - } - - private void addWiggleData(String paramString, StringBuilder sb) { - int firstTabIndex = paramString.indexOf('\t'); - String chromosome = paramString.substring(0, firstTabIndex); - int position = Integer.parseInt(paramString.substring(firstTabIndex+1, paramString.indexOf('\t', firstTabIndex+1))); - boolean chromosomeUpdated = false; - if ( ! chromosome.equalsIgnoreCase(currentChromosome)) { - // update last position and current chromosome - currentChromosome = chromosome; - chromosomeUpdated = true; - currentQueue = regionsOfInterest.get(chromosome); - if (null == currentQueue) { - logger.warn("no ranges found for chr: " + chromosome); - currentRange = null; - } else { - currentRange = currentQueue.poll(); - } - } - - if ( ! isPositionInRegionOfInterest(position, currentQueue)) return; - - if (position != lastPosition +1 || chromosomeUpdated) { - String wiggleHeader = "fixedStep chrom=" + chromosome + " start=" + position + " step=1\n"; - sb.append(wiggleHeader); - } - lastPosition = position; - String [] params = TabTokenizer.tokenize(paramString); -// String [] params = tabbedPattern.split(paramString, -1); - - if (PileupUtils.getCoverageCount(params, normalStartPositions) < normalCoverage) { - sb.append("0\n"); - ++covBad; - } else { - if (PileupUtils.getCoverageCount(params, tumourStartPositions) >= tumourCoverage) { - sb.append("1\n"); - ++covGood; - } else { - sb.append("0\n"); - ++covBad; - } - } - } - - public static void main(String[] args) throws Exception { - LoadReferencedClasses.loadClasses(WiggleFromPileupTakeTwo.class); - WiggleFromPileupTakeTwo sp = new WiggleFromPileupTakeTwo(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = 1; - if (null == args || args.length == 0) { - System.err.println(Messages.USAGE); - System.exit(1); - } - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(WiggleFromPileupTakeTwo.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("WiggleFromPileup", WiggleFromPileupTakeTwo.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - // get app specific options - pileupFormat = options.getPileupFormat(); - normalCoverage = options.getNormalCoverage(); - tumourCoverage = options.getTumourCoverage(); - compressOutput = FileUtils.isFileNameGZip(new File(cmdLineOutputFiles[0])); - gffRegions = options.getGffRegions(); - - - if (null == pileupFormat) throw new IllegalArgumentException("Please specify a pileupFormat relating to the pileup file (eg. NNTT)"); - if (normalCoverage == 0) throw new IllegalArgumentException("Please specify a normal coverage value (eg. 20)"); - if (tumourCoverage == 0) throw new IllegalArgumentException("Please specify a tumour coverage value (eg. 20)"); - if (gffRegions.length == 0) throw new IllegalArgumentException("Please specify the region names within the gff3 file you are interested in"); - - logger.tool("about to run with pileupFormat: " + pileupFormat + ", normal cov: " + normalCoverage + ", tumour cov: " + tumourCoverage + ", compressOutput: " + compressOutput + ", gff regions: " + Arrays.deepToString(gffRegions)); - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/XCvsZP.java-- b/qmule/src/org/qcmg/qmule/XCvsZP.java-- deleted file mode 100644 index e7973ade9..000000000 --- a/qmule/src/org/qcmg/qmule/XCvsZP.java-- +++ /dev/null @@ -1,117 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; -import java.lang.Math; - -import org.qcmg.picard.SAMFileReaderFactory; - -import htsjdk.samtools.BAMIndex; -import htsjdk.samtools.BAMIndexMetaData; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecord; - -public class XCvsZP { - - - XCvsZP(File input) throws Exception{ - SamReader reader = SAMFileReaderFactory.createSAMFileReader(input); //new SAMFileReader(input); - - HashMap matric = countToMatric( reader ); - - ArrayList keys = getKeys(matric ); - printMatric(matric, keys); - - reader.close(); - - } - - ArrayList getKeys( HashMap matric ){ - Set myset = new HashSet(); - - Iterator itr = matric.keySet().iterator(); - while( itr.hasNext()){ - String key = itr.next().toString(); - String[] zpxc = key.split("_"); - myset.add(zpxc[0]); - myset.add(zpxc[1]); - } - ArrayList mylist = new ArrayList(myset); - Collections.sort(mylist); - - - return mylist; - } - - - void printMatric( HashMap matric, ArrayList keys ){ - System.out.print("\t\tZP \t(reads_Number/total_number)\n"); - System.out.print("-------------------------------------------------------------------------------------------------------------------------------------------------\n XC\t|" ); - for(int i = 0; i < keys.size(); i ++) - System.out.print( "\t " + keys.get(i) + " "); - - for(int i = 0; i < keys.size(); i ++){ - System.out.print( "\n\t|" + keys.get(i) + "|\t"); - for(int j = 0; j < keys.size(); j ++){ - String xc_zp = keys.get(i) + "_" + keys.get(j); - if(matric.containsKey(xc_zp)) - System.out.print(String.format("%.4f\t", matric.get(xc_zp)) ); - else - System.out.print("-----\t"); - } - } - } - - - HashMap countToMatric( SamReader reader) throws Exception{ - - HashMap matric = new HashMap(); - HashMap rateMatric = new HashMap(); - - long numRead = 0; - for( SAMRecord record : reader){ - String xc = record.getAttribute("XC").toString(); - String zp = record.getAttribute("ZP").toString(); - String key = xc + "_" + zp; - - long value = 1; - if( matric.containsKey(key)) - value = matric.get(key) + 1; - - matric.put(key, value); - numRead ++; - } - - System.out.println("Total number of reads is " + numRead + "\n"); - - //convert to float with %.4f formart - for(Map.Entry set: matric.entrySet()){ - String key = set.getKey(); - int value = Math.round((set.getValue() * 10000 )/ numRead ); - rateMatric.put(key, ((float) value/10000 )); - } - - return rateMatric; - } - - - - public static void main(String[] args) throws Exception{ - - XCvsZP vs = new XCvsZP(new File(args[0]) ); - - } -} diff --git a/qmule/src/org/qcmg/qmule/bam/CheckBam.java-- b/qmule/src/org/qcmg/qmule/bam/CheckBam.java-- deleted file mode 100644 index 3154595c1..000000000 --- a/qmule/src/org/qcmg/qmule/bam/CheckBam.java-- +++ /dev/null @@ -1,339 +0,0 @@ -/** - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. -*/ -package org.qcmg.qmule.bam; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordIterator; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.SamReader; - -import java.io.File; -import java.util.AbstractQueue; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicLongArray; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.qmule.GetBamRecords; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; - - -public class CheckBam { - - private final static String UNMAPPED_READS = "Unmapped"; - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private File bamFIle; - private int numberOfThreads = 1; - - - private static final int READ_PAIRED_FLAG = 0x1; - private static final int PROPER_PAIR_FLAG = 0x2; - private static final int READ_UNMAPPED_FLAG = 0x4; - private static final int MATE_UNMAPPED_FLAG = 0x8; - private static final int READ_STRAND_FLAG = 0x10; - private static final int MATE_STRAND_FLAG = 0x20; - private static final int FIRST_OF_PAIR_FLAG = 0x40; - private static final int SECOND_OF_PAIR_FLAG = 0x80; - private static final int NOT_PRIMARY_ALIGNMENT_FLAG = 0x100; - private static final int READ_FAILS_VENDOR_QUALITY_CHECK_FLAG = 0x200; - private static final int DUPLICATE_READ_FLAG = 0x400; - private static final int SUPPLEMENTARY_ALIGNMENT_FLAG = 0x800; - - - private int exitStatus; - private static QLogger logger; - - private final AtomicLong counter = new AtomicLong(); - -// long [] flagCounter = new long[5000]; - AtomicLongArray flags = new AtomicLongArray(5000); - - - public int engage() throws Exception { - - logger.info("Get reference contigs from bam header"); - bamFIle = new File(cmdLineInputFiles[0]); - - final AbstractQueue sequences = new ConcurrentLinkedQueue(); - - try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { - if ( ! reader.hasIndex() && numberOfThreads > 1) { - logger.warn("Using 1 producer thread - no index found for bam file: " + bamFIle.getAbsolutePath()); - numberOfThreads = 1; - } - - SAMFileHeader header = reader.getFileHeader(); - List samSequences = header.getSequenceDictionary().getSequences(); - List orderedSamSequences = new ArrayList(); - orderedSamSequences.addAll(samSequences); - Collections.sort(orderedSamSequences, new Comparator(){ - @Override - public int compare(SAMSequenceRecord o1, SAMSequenceRecord o2) { - return o2.getSequenceLength() - o1.getSequenceLength(); - } - }); - // add the unmapped reads marker - sequences.add(UNMAPPED_READS); - - for (SAMSequenceRecord rec : orderedSamSequences) { - sequences.add(rec.getSequenceName()); - } - } - - - logger.info("will create " + numberOfThreads + " threads"); - - final CountDownLatch pLatch = new CountDownLatch(numberOfThreads); -// setpup and kick-off single Producer thread - ExecutorService producerThreads = Executors.newFixedThreadPool(numberOfThreads); - if (1 == numberOfThreads) { - producerThreads.execute(new SingleProducer(Thread.currentThread(), pLatch)); - } else { - for (int i = 0 ; i < numberOfThreads ; i++) { - producerThreads.execute(new Producer(Thread.currentThread(), pLatch, sequences)); - } - } - - // don't allow any new threads to start - producerThreads.shutdown(); - - logger.info("waiting for Producer thread to finish"); - pLatch.await(); - logger.info("Producer thread finished, counter size: " + counter.longValue()); - // output flag stats too - long dups = 0; - long sups = 0; - long mapped = 0; - long paired = 0; - long properPair = 0; - long r1 = 0; - long r2 = 0; - for (int i = 0 ; i < flags.length() ; i++) { - long l = flags.get(i); - if (l > 0) { - - if ((i & READ_PAIRED_FLAG) != 0) { - paired += l; - } - if ((i & PROPER_PAIR_FLAG) != 0) { - properPair += l; - } - if ((i & READ_UNMAPPED_FLAG) == 0) { - mapped += l; - } - if ((i & FIRST_OF_PAIR_FLAG) != 0) { - r1 += l; - } - if ((i & SECOND_OF_PAIR_FLAG) != 0) { - r2 += l; - } - if ((i & DUPLICATE_READ_FLAG) != 0) { - dups += l; - } - if ((i & SUPPLEMENTARY_ALIGNMENT_FLAG) != 0) { - sups += l; - } - logger.info("flag: " + i + " : " + l + " hits"); - } - } - logger.info("total read count: " + counter.longValue()); - logger.info("dups: " + dups + " (" + (((double) dups / counter.longValue()) * 100) + "%)"); - logger.info("sups: " + sups + " (" + (((double) sups / counter.longValue()) * 100) + "%)"); - logger.info("mapped: " + mapped + " (" + (((double) mapped / counter.longValue()) * 100) + "%)"); - logger.info("paired: " + paired + " (" + (((double) paired / counter.longValue()) * 100) + "%)"); - logger.info("properPair: " + properPair + " (" + (((double)properPair / counter.longValue()) * 100) + "%)"); - logger.info("r1: " + r1 + " (" + (((double) r1 / counter.longValue()) * 100) + "%)"); - logger.info("r2: " + r2 + " (" + (((double) r2 / counter.longValue()) * 100) + "%)"); - - return exitStatus; - } - - - - public class Producer implements Runnable { - private final Thread mainThread; - private final CountDownLatch pLatch; - private final AbstractQueue sequences; - private final QLogger log = QLoggerFactory.getLogger(Producer.class); - - private final long [] flagCounter = new long[5000]; - - Producer(Thread mainThread, CountDownLatch pLatch, AbstractQueue sequences) { - this.mainThread = mainThread; - this.pLatch = pLatch; - this.sequences = sequences; - } - - @Override - public void run() { - log.debug("Start Producer "); - - long count = 0; - - try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { - - while (true) { - String sequence = sequences.poll(); - if (null == sequence) break; - SAMRecordIterator iter = UNMAPPED_READS.equals(sequence) ? reader.queryUnmapped() : reader.query(sequence, 0, 0, false) ; - log.info("retrieving records for sequence: " + sequence); - while (iter.hasNext()) { - int flag = iter.next().getFlags(); - flagCounter[flag] ++ ; - // update count for this flag - if (++count % 2000000 == 0) { - log.info("added " + count/1000000 + "M"); - } - } - iter.close(); - } - - } catch (Exception e) { - log.error(Thread.currentThread().getName() + " " + e.getMessage(), e); - mainThread.interrupt(); - } finally { - pLatch.countDown(); - } - // update the shared counter - counter.addAndGet(count); - //update the flag Counter - int i = 0 ; - for (long l : flagCounter) { - if (l > 0) { - flags.addAndGet(i, l); - } - i++; - } - } - } - - public class SingleProducer implements Runnable { - private final Thread mainThread; - private final QLogger log = QLoggerFactory.getLogger(SingleProducer.class); - private final CountDownLatch pLatch; - private final long [] flagCounter = new long[5000]; - - SingleProducer(Thread mainThread, CountDownLatch pLatch) { - this.mainThread = mainThread; - this.pLatch = pLatch; - } - - @Override - public void run() { - log.debug("Start SingleProducer "); - - long count = 0; - - try (SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFIle);) { - - for (SAMRecord r : reader) { - int flag = r.getFlags(); - flagCounter[flag] ++ ; - if (++count % 2000000 == 0) { - log.info("added " + count/1000000 + "M"); - } - } - - } catch (Exception e) { - log.error(Thread.currentThread().getName() + " " + e.getMessage(), e); - mainThread.interrupt(); - } finally { - pLatch.countDown(); - } - // update the shared counter - counter.addAndGet(count); - //update the flag Counter - int i = 0 ; - for (long l : flagCounter) { - if (l > 0) { - flags.addAndGet(i, l); - } - i++; - } - } - } - - public static void main(String[] args) throws Exception { - CheckBam sp = new CheckBam(); - int exitStatus = sp.setup(args); - if (null != logger) { - logger.logFinalExecutionStats(exitStatus); - } - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(GetBamRecords.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CheckBam", CheckBam.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - int nt = options.getNumberOfThreads(); - if (nt > 0) { - numberOfThreads = nt; - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- b/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- deleted file mode 100644 index b480f21f4..000000000 --- a/qmule/src/org/qcmg/qmule/bam/GetContigsFromHeader.java-- +++ /dev/null @@ -1,127 +0,0 @@ -package org.qcmg.qmule.bam; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.Writer; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Collectors; - -import org.qcmg.common.log.QLogger; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMSequenceDictionary; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; - -public class GetContigsFromHeader { - - private static QLogger logger; - - private int setup(String [] args) throws IOException { - /* - * first arg should be the header, - * second arg (if present) should be how many times the genome should be diviied up - */ - - SamReaderFactory factory = SamReaderFactory.make(); - SamReader reader = factory.open(new File(args[0])); - SAMFileHeader header = reader.getFileHeader(); - - SAMSequenceDictionary dict = header.getSequenceDictionary(); - Map map = dict.getSequences().stream().collect(Collectors.groupingBy(SAMSequenceRecord::getSequenceName, Collectors.summingInt(SAMSequenceRecord::getSequenceLength))); - - - - if (args.length > 1 && null != args[1]) { - int numberOfContigs = map.keySet().size(); - long length = map.values().stream().mapToLong(Integer::longValue).sum(); - int numberOfEntries = Integer.parseInt(args[1]) - 1; - - long noOFBasesPerEntry = length / numberOfEntries; - - System.out.println("genome length: " + length + ", numberOfEntries: " + numberOfEntries + ", noOFBasesPerEntry: " + noOFBasesPerEntry + ", numberOfContigs: " + numberOfContigs); - - - Map results = new HashMap<>(); - Set contigs = new HashSet<>(); - - List sortedContigs = map.entrySet().stream().sorted((e1, e2) -> e2.getValue() - e1.getValue()).map(e -> e.getKey()).collect(Collectors.toList()); - - - for (String contig : sortedContigs) { - System.out.println("looking at contig: " + contig); - Integer contigLength = map.get(contig); - if ( ! contigs.contains(contig)) { - if (contigLength >= noOFBasesPerEntry) { - results.put(contig, contigLength); - contigs.add(contig); - } else { - AtomicLong basesToMakeUp = new AtomicLong(noOFBasesPerEntry - contigLength); -// long basesToMakeUp = noOFBasesPerEntry - e.getValue(); - StringBuilder key = new StringBuilder(); - key.append(contig); - contigs.add(contig); - while (basesToMakeUp.longValue() > 1000000) { - Optional> e1 = map.entrySet().stream().filter(en -> ! contigs.contains(en.getKey())).filter(en -> en.getValue() < basesToMakeUp.longValue()).max((en1, en2) -> en2.getValue() - en1.getValue()); - if (e1.isPresent()) { - key.append(" -L "); - key.append(e1.get().getKey()); - basesToMakeUp.addAndGet( - e1.get().getValue()); - contigs.add(e1.get().getKey()); - } else { - break; - } - } - results.put(key.toString(), (int)noOFBasesPerEntry - basesToMakeUp.intValue()); - } - } - } - - results.forEach((k,v) -> System.out.println("contigs: " + k + ", size: " + v)); - System.out.println("contigs.size(): " + contigs.size()); - - /* - * write file - */ - if (args.length > 2 && null != args[2]) { - try (Writer writer = new FileWriter(args[2]);) { - - /* - * sort according to number of bases - */ - results.entrySet().stream().sorted((e1, e2) -> e2.getValue() - e1.getValue()).forEach(e -> { - try { - writer.write(e.getKey() + "\n"); - } catch (IOException e3) { - // TODO Auto-generated catch block - e3.printStackTrace(); - } - }); - } - } - } - - return 0; - } - - public static void main(String[] args) throws Exception { - GetContigsFromHeader sp = new GetContigsFromHeader(); - int exitStatus = sp.setup(args); - if (null != logger) { - logger.logFinalExecutionStats(exitStatus); - } - - System.exit(exitStatus); - } - -} - diff --git a/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- b/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- deleted file mode 100644 index 707c4726d..000000000 --- a/qmule/src/org/qcmg/qmule/qcnv/CNVseq.java-- +++ /dev/null @@ -1,226 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.qcnv; - -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import htsjdk.samtools.*; - -import java.lang.Math; -import java.io.*; - -import org.qcmg.picard.SAMFileReaderFactory; - - -public class CNVseq { - - private static final boolean String = false; - //in cnv-seq.pl it call below R to get value - //echo 'options(digits=16);qnorm(1-0.5*0.001)' | R --vanilla --slave (result: 3.290526731491926) - public static double bt = 3.290526731491926; - //$echo 'options(digits=16);qnorm(0.5*0.001)' | R --vanilla --slave (result: -3.290526731491894) - public static double st = -3.290526731491894; - - public static double pvalue = 0.001; - public static int min_windoe = 4; - public static double log2 = 0.6; - public static double bigger = 1.5; - //public static int debug = 0; - //public static String Rexe = "R"; - - private final Map refSeq; - - private final long genomeSize ; - private final long numTest; - private final long numRef; - - private final double biglog2_window; - private final double smalog2_window; - private final int windowSize; - - private final File ftest; - private final File fref; - - - /** - * it caculate the window size based on genome size, TEST and REF BAM records number - * @param test: File of TEST BAM - * @param ref: File of reference BAM - * @throws Exception - */ - CNVseq(File test, File ref, int window ) throws Exception { - //open file - SamReader rtest = SAMFileReaderFactory.createSAMFileReader(test );//new SAMFileReader(test); - SamReader rref = SAMFileReaderFactory.createSAMFileReader(ref );//new SAMFileReader(ref); - - //check whether index file exist or not - if(!rtest.hasIndex()){ - throw new Exception("can't find index for: " + test.getName()); - } - if(!rref.hasIndex()){ - throw new Exception("can't find index for: " + ref.getName()); - } - ftest = test; - fref = ref; - - //check SAM header - SAMFileHeader htest = rtest.getFileHeader(); - SAMFileHeader href = rref.getFileHeader(); - - //get sequence information from both inputs - Map seqTest = new HashMap (); - Map seqRef = new HashMap (); - - List genome = htest.getSequenceDictionary().getSequences(); - for(SAMSequenceRecord re : genome){ - seqTest.put(re.getSequenceName(),re.getSequenceLength()); - } - - genome = href.getSequenceDictionary().getSequences(); - for(SAMSequenceRecord re : genome){ - seqRef.put(re.getSequenceName(),re.getSequenceLength()); - } - - // check both @SQ line are same or not - if(seqRef.size() != seqTest.size()){ - throw new Exception("the sequence size are different between two inputs: \n" + ftest.getName() + "\n" + fref.getName() ); - } - - for (String key : seqTest.keySet()){ - //first convert Integer to int - int l1 = seqTest.get(key); - int l2 = seqRef.get(key); - if(l1 != l2){ - throw new Exception("the sequence size of " + key + " are different between two inputs : \n" + ftest.getName() + "\n" + fref.getName() ); - } - } - - // assign one of the identical reference info into the hash map - refSeq = seqTest; - - //caculate the genome size based on the identail reference - long size = 0; - for(String key : refSeq.keySet()){ size += refSeq.get(key); } - genomeSize = size; -//-debug -//genomeSize = 3253037807L; - - //count mapped record number based on index file - BAMIndex tIndex = rtest.indexing().getIndex(); - BAMIndex rIndex = rref.indexing().getIndex(); - BAMIndexMetaData meta; - int tMapped = 0; - int rMapped = 0; - for(int i = 0; i < seqRef.size(); i ++ ){ - meta = tIndex.getMetaData(i); - tMapped += meta.getAlignedRecordCount(); - meta = rIndex.getMetaData(i); - rMapped += meta.getAlignedRecordCount(); - } - numTest = tMapped; - numRef = rMapped; - - //close files - rtest.close(); - rref.close(); - - //caculate window size - double brp = Math.pow(2, log2); - double srp = 1.0 / brp; - - - biglog2_window = (numTest * Math.pow(brp, 2) + numRef) * genomeSize * Math.pow(bt, 2) / ( Math.pow((1- brp),2 ) * numTest * numRef); - smalog2_window = (numTest * Math.pow(srp, 2) + numRef) * genomeSize * Math.pow(st, 2) / ( Math.pow((1- srp),2 ) * numTest * numRef); - if(window == 0 ){ - windowSize = (int) (Math.max(biglog2_window, smalog2_window) * bigger) ; - }else{ - windowSize = window; - } - - } - - /** - * it create an Iterator and query on each window; finally it close the iterator - * @param f: SAMFileReader - * @param chr: genoeme name - * @param start: window start postion - * @param end: window end position - * @return the totoal number of records mapped overlapped on this window region - */ - int exeQuery (SamReader reader, String chr, int start, int end){ - - SAMRecordIterator block_ite = reader.queryOverlapping(chr, start, end); - int num = 0; - while(block_ite.hasNext()){ - num ++; - block_ite.next(); - } - - block_ite.close(); - - return num; - } - - /** - * - * @return total SAM records number in Test input file - */ - long getTestReadsNumber(){return numTest;} - - /** - * - * @return total SAM records number in Ref input file - */ - long getRefReadsNumber(){return numRef;} - - /** - * - * @return a hash table list each sequence reference name and length - */ - Map getrefseq(){return refSeq;} - - /** - * - * @return return the minimum window size for detecting log2>=0.6 - */ - double getpositivelog2window(){ return biglog2_window;} - - /** - * - * @return The minimum window size for detecting log2<=-0.6 - */ - double getnegativelog2window(){return smalog2_window;} - - /** - * - * @return The window size to use is max(100138.993801, 66550.928197) * 1.500000 - */ - int getWindowSize(){ return windowSize; } - - /** - * - * @return the total length of reference sequence listed on BAM @SQ lines - */ - long getGenomeSize( ){ return genomeSize;} - - /** - * - * @return the Test File with File type - */ - File getTestFile(){return ftest;} - - /** - * - * @return the Ref File with File type - */ - File getRefFile(){return fref;} - -} diff --git a/qmule/src/org/qcmg/qmule/qcnv/Main.java-- b/qmule/src/org/qcmg/qmule/qcnv/Main.java-- deleted file mode 100644 index 41b681505..000000000 --- a/qmule/src/org/qcmg/qmule/qcnv/Main.java-- +++ /dev/null @@ -1,57 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.qcnv; - -import org.qcmg.common.log.*; -import htsjdk.samtools.*; -import java.util.*; -import java.util.Map.Entry; -import java.io.*; - -public class Main { - - public static void main(String[] args) throws Exception{ - //check arguments - Options options = new Options( args); - if(! options.commandCheck()){ System.exit(1); } - - QLogger logger = options.getLogger(args); - try{ - File ftest = new File(options.getIO("test")); - File fref = new File(options.getIO("ref")); - CNVseq cnvseq = new CNVseq(ftest, fref, options.getWindowSize()); - - logger.info("genome size used for calculation is " + cnvseq.getGenomeSize()); - logger.info(ftest.getName() + "contains records number: " + cnvseq.getTestReadsNumber()); - logger.info(fref.getName() + "contains records number: " + cnvseq.getRefReadsNumber()); - if(options.getWindowSize() == 0){ - logger.info("The minimum window size for detecting log2>=" + CNVseq.log2 +" should be " + cnvseq.getpositivelog2window()); - logger.info("The minimum window size for detecting log2<=-" + CNVseq.log2 +" should be " + cnvseq.getnegativelog2window()); - logger.info(String.format("The window size to use is max(%f, %f) * %f = %d", - cnvseq.getpositivelog2window(),cnvseq.getnegativelog2window(), CNVseq.bigger, cnvseq.getWindowSize())); - }else{ - logger.info("The window size used in this run is " + options.getWindowSize()); - } - - //count reads number in each window and output - MtCNVSeq cnvThread = new MtCNVSeq(cnvseq, new File(options.getIO("output")), options.getThreadNumber(), options.getTmpDir()); - cnvThread.cnvCount(logger); - - logger.logFinalExecutionStats(0); - System.exit(0); - }catch(Exception e){ - logger.error(e.toString()); - logger.logFinalExecutionStats(1); - System.err.println(e.toString()); - System.exit(1); - } - } - - - - -} diff --git a/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- b/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- deleted file mode 100644 index b8bdbfcf5..000000000 --- a/qmule/src/org/qcmg/qmule/qcnv/MtCNVSeq.java-- +++ /dev/null @@ -1,152 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.qcnv; - -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.File; -import java.io.IOException; - -import java.util.HashMap; -import java.util.Map; - -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; - -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMRecordIterator; -import htsjdk.samtools.ValidationStringency; - -import org.qcmg.common.log.*; -import org.qcmg.common.util.Constants; -import org.qcmg.picard.SAMFileReaderFactory; - - -public class MtCNVSeq { - - final CNVseq cnvseq; - final File Output; - final int noOfThreads; - final File tmpPath; - - MtCNVSeq(CNVseq cnvseq, File output, int noOfThreads, File tmpdir) throws IOException{ - this.cnvseq = cnvseq; - this.Output = output; - this.noOfThreads = noOfThreads; - if(tmpdir == null) - tmpPath = File.createTempFile( "qcnv", "", Output.getParentFile()); - else - tmpPath = File.createTempFile( "qcnv", "",tmpdir); - } - /** - * it call threads, parallel the BAMFileReader.query for single genome - * @param logger: an instance of QLogger - * @throws IOException - * @throws InterruptedException - */ - void cnvCount(QLogger logger) throws IOException, InterruptedException{ - - Map refseq = cnvseq.getrefseq(); - Map tmpoutput = new HashMap(); - ExecutorService queryThreads = Executors.newFixedThreadPool(noOfThreads); - - logger.debug("start parallel query based on genome file name"); - - - if(!(tmpPath.delete())) - throw new IOException("Could not delete tmp file: " + tmpPath.getAbsolutePath()); - if(! tmpPath.mkdirs()) - throw new IOException("Could not create tmp directory: " + tmpPath.getAbsolutePath()); - - //parallel query by genomes and output to tmp files - for ( Map.Entry chr : refseq.entrySet()){ - File tmp = File.createTempFile(chr.getKey(), ".count", tmpPath); - tmpoutput.put(chr.getKey(), tmp); - queryThreads.execute(new ExeQuery(cnvseq,chr, tmp)); - } - //wait threads finish - queryThreads.shutdown(); - queryThreads.awaitTermination(Constants.EXECUTOR_SERVICE_AWAIT_TERMINATION, TimeUnit.HOURS); - queryThreads.shutdownNow(); - logger.debug("completed parallel query based on genome file name"); - - - //collect outputs from tmp files into - logger.debug("starting collect each genome counts into final output"); - FileWriter writer = new FileWriter(Output); - writer.write("reference\tstart\tend\ttest\tref\n"); - for( Map.Entry tmp : tmpoutput.entrySet()){ - BufferedReader input = new BufferedReader(new FileReader(tmp.getValue())); - String line = null; - while((line = input.readLine()) != null){ - writer.write(line + "\n"); - } - input.close(); - tmp.getValue().deleteOnExit(); - } - tmpPath.delete(); - writer.close(); - logger.debug("created final output"); - } - - /** - * query on Test BAM and Ref BAM records which mapped to specified gemoem - * @author q.xu - * - */ - public static class ExeQuery implements Runnable { - CNVseq cnvseq; - File Output; - File Test; - File Ref; - QLogger logger; - int chrSize; - int winSize; - String chrName; - - ExeQuery(CNVseq cnvseq, Map.Entry chr,File tmp) { - Output = tmp; - Test = cnvseq.getTestFile(); - Ref = cnvseq.getRefFile(); - chrSize = chr.getValue(); - chrName = chr.getKey(); - winSize = cnvseq.getWindowSize(); - this.cnvseq = cnvseq; - } - - public void run() { - try { - FileWriter writer = new FileWriter(Output); - SamReader rTest = SAMFileReaderFactory.createSAMFileReader(Test,ValidationStringency.SILENT); - SamReader rRef = SAMFileReaderFactory.createSAMFileReader(Ref,ValidationStringency.SILENT); - - int win_num = chrSize / winSize + 1; - - for (int i = 0; i < win_num; i++){ - int start = i * winSize + 1; - int end = (i + 1 ) * winSize; - int num_test = cnvseq.exeQuery(rTest, chrName, start, end); - int num_ref = cnvseq.exeQuery(rRef, chrName, start, end); - writer.write(String.format("%s\t%d\t%d\t%d\t%d\n", chrName, start, end, num_test, num_ref )); - } - - rRef.close(); - writer.close(); - rTest.close(); - - } catch (Exception e) { - System.out.println(Thread.currentThread().getName() + " " - + e.getMessage()); - Thread.currentThread().interrupt(); - } - - } - } - -} diff --git a/qmule/src/org/qcmg/qmule/qcnv/Options.java-- b/qmule/src/org/qcmg/qmule/qcnv/Options.java-- deleted file mode 100644 index 3f4dc850b..000000000 --- a/qmule/src/org/qcmg/qmule/qcnv/Options.java-- +++ /dev/null @@ -1,169 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.qcnv; - - -import java.io.File; -import java.util.List; - -import joptsimple.OptionParser; -import joptsimple.OptionSet; - -import org.qcmg.qmule.Messages; -import org.qcmg.common.log.*; - -public class Options { - private static final String HELP_DESCRIPTION = Messages.getMessage("HELP_OPTION_DESCRIPTION"); - private static final String VERSION_DESCRIPTION = Messages.getMessage("VERSION_OPTION_DESCRIPTION"); - private static final String LOG_DESCRIPTION = Messages.getMessage("LOG_OPTION_DESCRIPTION"); - private static final String LOGLEVEL_DESCRIPTION = Messages.getMessage("LOGLEVEL_OPTION_DESCRIPTION"); - - private static final String OUTPUT_DESCRIPTION = Messages.getMessage("OUTPUT_OPTION_DESCRIPTION"); - private static final String TEST_DESCRIPTION = Messages.getMessage("TEST_OPTION_DESCRIPTION"); - private static final String REF_DESCRIPTION = Messages.getMessage("REF_OPTION_DESCRIPTION"); - private static final String THREAD_DESCRIPTION = Messages.getMessage("THREAD_OPTION_DESCRIPTION"); - private static final String WINDOW_DESCRIPTION = Messages.getMessage("WINDOW_SIZE_DESCRIPTION"); - private static final String TMPDIR_DESCRIPTION = Messages.getMessage("TMPDIR_DESCRIPTION"); - private final OptionParser parser = new OptionParser(); - private final OptionSet options; - - final static int DEFAULT_THREAD = 2; - final String commandLine; - final String USAGE = Messages.getMessage("USAGE_QCNV"); - final String version = org.qcmg.qmule.Main.class.getPackage().getImplementationVersion(); - - public Options( final String[] args) throws Exception { - parser.accepts("output", OUTPUT_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("outputfile"); - parser.accepts("ref", REF_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("Normal BAM"); - parser.accepts("test", TEST_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("Tumor BAM"); - parser.accepts("thread", THREAD_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("thread number"); - parser.accepts("window", WINDOW_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("window size"); - parser.accepts("tmpdir", TMPDIR_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("window size"); - - - parser.accepts("log", LOG_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("logfile"); - parser.accepts("loglevel", LOGLEVEL_DESCRIPTION).withRequiredArg().ofType(String.class).describedAs("loglevel"); - parser.accepts("version", VERSION_DESCRIPTION); - parser.accepts("help", HELP_DESCRIPTION); - - options = parser.parse(args); - commandLine = Messages.reconstructCommandLine(args); - } - - //IO parameters - String getIO(String io) throws Exception{ - - int size = options.valuesOf(io).size(); - if( size > 1){ - throw new Exception("multiple "+ io + " files specified" ); - } - else if( size < 1 ){ - throw new Exception(" missing or invalid IO option specified: " + io ); - } - - return options.valueOf(io).toString(); - } - - File getTmpDir() throws Exception{ - if(options.has("tmpdir")) - return new File (options.valueOf("tmpdir").toString()); - - - - return null; - - } - - int getThreadNumber(){ - - if(options.has("thread")){ - return Integer.parseInt((String) options.valueOf("thread")); - } - - return DEFAULT_THREAD; - } - - int getWindowSize(){ - - if(options.has("window")){ - return Integer.parseInt((String) options.valueOf("window")); - } - - return 0; - } - - QLogger getLogger(String[] args) throws Exception{ - - // configure logging - QLogger logger; - String logLevel = (String) options.valueOf("loglevel"); - String logFile; - if(options.has("log")){ - logFile = options.valueOf("log").toString(); - } - else{ - logFile = options.valueOf("output") + ".log"; - } - - logger = QLoggerFactory.getLogger( Main.class, logFile,logLevel); - logger.logInitialExecutionStats(Main.class.toString(), version, args); - return logger; - } - - boolean hasHelp() throws Exception{ - if(options.has("h") || options.has("help")){ - System.out.println(USAGE); - System.out.println(HELP_DESCRIPTION); - parser.printHelpOn(System.err); - return true; - } - return false; - } - - boolean hasVersion()throws Exception{ - if(options.has("v") || options.has("version")){ - System.out.println(VERSION_DESCRIPTION); - System.err.println(version); - return true; - } - return false; - } - - boolean commandCheck() throws Exception{ - //quit system after provide help or version info - if( hasHelp() || hasVersion() ){ - System.exit(0); - } - - - if (options.nonOptionArguments().size() > 0) { - List nonoptions = (List) options.nonOptionArguments(); - - for(String str : nonoptions){ - System.err.println("INVALID OPTION: " + str); - } - return false; - } - - if(getIO("ref") == null || getIO("test") == null){ - System.err.println("Missing ref or test option"); - return false; - } - if( getIO("ref").equals(getIO("output"))){ - System.err.println(Messages.getMessage("SAME_FILES", "ref", "output")); - return false; - } - if(options.has("thread")){ - int thread = Integer.parseInt((String) options.valueOf("thread")); - if(thread < 1){ - System.err.println("THREAD NUMBER MUST GREATER THAN ONE: " + options.valueOf("thread") ); - } - } - - return true; - } -} diff --git a/qmule/src/org/qcmg/qmule/queryChrMT.java-- b/qmule/src/org/qcmg/qmule/queryChrMT.java-- deleted file mode 100644 index d9dcad3ff..000000000 --- a/qmule/src/org/qcmg/qmule/queryChrMT.java-- +++ /dev/null @@ -1,68 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule; - -import java.io.File; -import java.io.IOException; -import java.io.PrintWriter; - -import htsjdk.samtools.*; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.ValidationStringency; -import htsjdk.samtools.SAMRecordIterator; - -import java.io.*; -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; - -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.SAMOrBAMWriterFactory; - - -public class queryChrMT { - - public static void main(final String[] args) throws IOException, InterruptedException { - - try{ - - File inBAM = new File(args[0]); - String outputName = inBAM.getName().replace(".bam", ".chrMT.primary.bam"); - File output = new File(args[1], outputName); - - SamReader reader = SAMFileReaderFactory.createSAMFileReader(inBAM,ValidationStringency.SILENT); - SAMFileHeader he = reader.getFileHeader().clone(); - SAMOrBAMWriterFactory writeFactory = new SAMOrBAMWriterFactory(he , true, output); - SAMRecordIterator ite = reader.query("chrMT",0, 16569, false); - - SAMRecord record; - while(ite.hasNext()){ - record = ite.next(); - if(!record.getNotPrimaryAlignmentFlag()) - writeFactory.getWriter().addAlignment(record ); - - } - writeFactory.closeWriter(); - reader.close(); - - System.exit(0); - }catch(Exception e){ - System.err.println(e.toString()); - Thread.sleep(1); - System.out.println("usage: qmule org.qcmg.qmule.queryChrMT "); - System.exit(1); - } - - } - - -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- deleted file mode 100644 index e405206bd..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/CompareSnps.java-- +++ /dev/null @@ -1,205 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map.Entry; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.SnpUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.qmule.util.IGVBatchFileGenerator; -import org.qcmg.qmule.util.TabbedDataLoader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class CompareSnps { - - private final ConcurrentMap firstSnpMap = new ConcurrentHashMap(30000); //not expecting more than 100000 - private final ConcurrentMap secondSnpMap = new ConcurrentHashMap(30000); - private final List firstList = new ArrayList(); - private final List secondList = new ArrayList(); -// private final ConcurrentMap uniqueTumourVCFMap = new ConcurrentHashMap(40000); - - private static QLogger logger; - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - public int engage() throws Exception { - - logger.info("loading snp data from file: " + cmdLineInputFiles[0]); - TabbedDataLoader.loadTabbedData(cmdLineInputFiles[0], -2, firstSnpMap); - logger.info("loading snp data - DONE [" + firstSnpMap.size() + "]"); - logger.info("loading snp data from file: " + cmdLineInputFiles[1]); - TabbedDataLoader.loadTabbedData(cmdLineInputFiles[1], -2, secondSnpMap); - logger.info("loading snp data - DONE [" + secondSnpMap.size() + "]"); - - compare(); - - outputIGVBatchFiles(); - -// addPileupFromNormalBam(); - - return exitStatus; - } - - private void outputIGVBatchFiles() throws IOException { - IGVBatchFileGenerator.generate(firstList, cmdLineOutputFiles[0]); - IGVBatchFileGenerator.generate(secondList, cmdLineOutputFiles[1]); - } - - protected void compare() { - - // total counts - int firstMapCount = 0, secondMapCount = 0; - // count of snps unique to each input - int uniqueToFirstMap = 0, uniqueToSecondMap = 0; - int commonSnps = 0, commonAndAlsoClassABStopNonSynon = 0; - - // loop through first set - for (Entry entry : firstSnpMap.entrySet()) { - - TabbedRecord firstRecord = entry.getValue(); - - if (isClassAB(firstRecord, -1) && isStopNonSynonymous(firstRecord, 22)) { - firstMapCount++; - - TabbedRecord secondRecord = secondSnpMap.get(entry.getKey()); - if (null == secondRecord || ! (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22))) { - uniqueToFirstMap++; - firstList.add(entry.getKey()); - logger.info("Unique to first: " + entry.getKey().getChromosome() + ":" + entry.getKey().getStartPosition()); - } else { - commonSnps++; -// if (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22)) { -// commonAndAlsoClassABStopNonSynon++; -// } - } - } - - } - - // loop through second set - for (Entry entry : secondSnpMap.entrySet()) { - - TabbedRecord secondRecord = entry.getValue(); - - if (isClassAB(secondRecord, -1) && isStopNonSynonymous(secondRecord, 22)) { - secondMapCount++; - - TabbedRecord firstRecord = firstSnpMap.get(entry.getKey()); - if (null == firstRecord || ! (isClassAB(firstRecord, -1) && isStopNonSynonymous(firstRecord, 22))) { - uniqueToSecondMap++; - secondList.add(entry.getKey()); - logger.info("Unique to second: " + entry.getKey().getChromosome() + ":" + entry.getKey().getStartPosition()); -// logger.info("IGV: " + entry.getValue().getData()); - } - } - } - - logger.info("SUMMARY:"); - logger.info("firstMapCount: " + firstMapCount); - logger.info("secondMapCount: " + secondMapCount); - logger.info("uniqueToFirstMap: " + uniqueToFirstMap); - logger.info("uniqueToSecondMap: " + uniqueToSecondMap); - logger.info("commonSnps: " + commonSnps); -// logger.info("commonAndAlsoClassABStopNonSynon: " + commonAndAlsoClassABStopNonSynon); - - } - - - - protected static boolean isClassAB(TabbedRecord record, int index) { - if (null == record || null == record.getData()) throw new IllegalArgumentException("null or empty Tabbed record"); - String [] params = TabbedDataLoader.tabbedPattern.split(record.getData()); - String qcmgFlag = TabbedDataLoader.getStringFromArray(params, index); - - return SnpUtils.isClassAorB(qcmgFlag); -// return "--".equals(qcmgFlag) || "less than 12 reads coverage in normal".equals(qcmgFlag) -// || "less than 3 reads coverage in normal".equals(qcmgFlag); - - } - - protected static boolean isStopNonSynonymous(TabbedRecord record, int index) { - if (null == record || null == record.getData()) throw new IllegalArgumentException("null or empty Tabbed record"); - String [] params = TabbedDataLoader.tabbedPattern.split(record.getData()); -// String consequenceType = params[index]; - String consequenceType = TabbedDataLoader.getStringFromArray(params, index); - - return consequenceType.contains("STOP") || consequenceType.contains("NON_SYNONYMOUS"); - } - - - - public static void main(String[] args) throws Exception { - CompareSnps sp = new CompareSnps(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(CompareSnps.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CompareSnps", CompareSnps.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- deleted file mode 100644 index 322cbd5d1..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java-- +++ /dev/null @@ -1,237 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.pileup.QPileupFileReader; -import org.qcmg.pileup.QSnpRecord; -import org.qcmg.pileup.VerifiedSnpFileReader; -import org.qcmg.pileup.VerifiedSnpRecord; -import org.qcmg.vcf.VCFFileReader; - -public class ExamineVerifiedSnps { - - private static final QLogger logger = QLoggerFactory.getLogger(ExamineVerifiedSnps.class); - - private static Map pileup = new HashMap<>(80000); - private static Map vcfRecords = new HashMap<>(80000); - private static Map verifiedSNPs = new HashMap<>(250); - - public static void main(String[] args) throws Exception { - logger.info("hello..."); - - String filename = args[0]; - boolean runQPileup = true; - // filename type depends on whether to load qpileup or vcf - if (FileUtils.isFileTypeValid(filename, "vcf")) { - runQPileup = false; - } - - loadVerifiedSnps(args[1]); - logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); - - if (runQPileup) { - // load the existing pileup into memory - logger.info("running in pileup mode"); - loadQPileup(args[0]); - logger.info("loaded " + pileup.size() + " entries into the pileup map"); - examine(args[2]); - } else { - logger.info("running in vcf mode"); - loadGATKData(args[0]); - logger.info("loaded " + vcfRecords.size() + " entries into the vcf map"); - examineVCF(args[2]); - } - logger.info("goodbye..."); - } - - private static void examine(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - - int verifiedYes = 0, qsnpVerifiedYes = 0; - int verifiedNo = 0, qsnpVerifiedNo = 0; - int verifiedNoGL = 0, qsnpVerifiedNoGL = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - for (final Map.Entry entry : verifiedSNPs.entrySet()) { - - QSnpRecord qpr = pileup.get(entry.getKey()); - VerifiedSnpRecord vsr = entry.getValue(); - - // only interested in exome data - if ( ! "exome".equals(vsr.getAnalysis())) continue; - - - if ("no".equals(vsr.getStatus())) { - verifiedNo++; - // if we don't have a matching qpr - good, otherwise, print details - if (null == qpr) { - qsnpVerifiedNo++; - writer.write(vsr.getFormattedString() + "\tOK - no entry in qsnp\n"); - } else { - writer.write(vsr.getFormattedString() + "\t???\t" + qpr.getClassification() + "\t" - + getAnnotationAndNote(qpr) + "\n"); -// + qpr.getMutation() + getAnnotationAndNote(qpr) + "\n"); - } - - } else if ("yes".equals(vsr.getStatus())) { - verifiedYes++; - if (null != qpr) { - qsnpVerifiedYes++; - writer.write(vsr.getFormattedString() + "\tOK - entry in qsnp\t" + qpr.getClassification() + "\t" - + getAnnotationAndNote(qpr) +"\n"); -// + qpr.getMutation() + getAnnotationAndNote(qpr) +"\n"); - } else { - writer.write(vsr.getFormattedString() + "\t???\n"); - } - } else if ("no -GL".equals(vsr.getStatus())) { - verifiedNoGL++; - if (null != qpr) { - qsnpVerifiedNoGL++; - - writer.write(vsr.getFormattedString() + "\tentry in qsnp\t" + qpr.getClassification() + "\t" - + getAnnotationAndNote(qpr) +"\n"); -// + qpr.getMutation() + getAnnotationAndNote(qpr) +"\n"); - } else { - writer.write(vsr.getFormattedString() + "\tNo entry in qsnp\n"); - } - } - } - - writer.close(); - logger.info("verified yes: " + verifiedYes + ", in qsnp: " + qsnpVerifiedYes); - logger.info("verified no: " + verifiedNo + ", in qsnp: " + (verifiedNo-qsnpVerifiedNo)); - logger.info("verified no -GL: " + verifiedNoGL + ", in qsnp: " + qsnpVerifiedNoGL); - } - } - - private static void examineVCF(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - - int verifiedYes = 0, gatkVerifiedYes = 0; - int verifiedNo = 0, gatkVerifiedNo = 0; - int verifiedNoGL = 0, gatkVerifiedNoGL = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - for (final Map.Entry entry : verifiedSNPs.entrySet()) { - - VcfRecord qpr = vcfRecords.get(entry.getKey()); - VerifiedSnpRecord vsr = entry.getValue(); - - // only interested in exome data - if ( ! "exome".equals(vsr.getAnalysis())) continue; - - if ("no".equals(vsr.getStatus())) { - verifiedNo++; - // if we don't have a matching qpr - good, otherwise, print details - if (null == qpr) { - gatkVerifiedNo++; - writer.write(vsr.getFormattedString() + "\tOK - no entry in GATK\n"); - } else { - writer.write(vsr.getFormattedString() + "\t???\t" + - VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() + "\n"); -// writer.write(vsr.getFormattedString() + "\t???\t" + qpr.getGenotype() + "\t" + qpr.getAlt() + "\n"); - } - - } else if ("yes".equals(vsr.getStatus())) { - verifiedYes++; - if (null != qpr) { - gatkVerifiedYes++; - writer.write(vsr.getFormattedString() + "\tOK - entry in GATK\t" + - VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() +"\n"); - } else { - writer.write(vsr.getFormattedString() + "\t???\n"); - } - } else if ("no -GL".equals(vsr.getStatus())) { - verifiedNoGL++; - if (null != qpr) { - gatkVerifiedNoGL++; - - writer.write(vsr.getFormattedString() + "\tentry in GATK\t" + - VcfUtils.getGenotypeFromGATKVCFRecord(qpr) + "\t" + qpr.getAlt() +"\n"); - } else { - writer.write(vsr.getFormattedString() + "\tNo entry in GATK\n"); - } - } - } - - writer.close(); - logger.info("verified yes: " + verifiedYes + ", in GATK: " + gatkVerifiedYes); - logger.info("verified no: " + verifiedNo + ", in GATK: " + (verifiedNo-gatkVerifiedNo)); - logger.info("verified no -GL: " + verifiedNoGL + ", in GATK: " + gatkVerifiedNoGL); - } - } - - private static String getAnnotationAndNote(QSnpRecord record) { - if ( isNull(record.getAnnotation())) return "\tClassA"; - else if (isNull(record.getAnnotation())) return "\tClassB"; - else return "\tClassB\t" + record.getAnnotation(); - } - - private static boolean isNull(String string) { - return null == string || "null".equals(string) || 0 == string.length(); - } - - private static void loadQPileup(String pileupFile) throws IOException { - if (FileUtils.canFileBeRead(pileupFile)) { - QPileupFileReader reader = new QPileupFileReader(new File(pileupFile)); - try { - for (QSnpRecord qpr : reader) { - pileup.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); - } - } finally { - reader.close(); - } - } - } - - private static void loadGATKData(String pileupFile) throws Exception { - if (FileUtils.canFileBeRead(pileupFile)) { - - VCFFileReader reader = new VCFFileReader(new File(pileupFile)); - try { - for (VcfRecord qpr : reader) { - vcfRecords.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); - } - } finally { - reader.close(); - } - } - } - private static void loadVerifiedSnps(String verifiedSnpFile) throws IOException { - if (FileUtils.canFileBeRead(verifiedSnpFile)) { - VerifiedSnpFileReader reader = new VerifiedSnpFileReader(new File(verifiedSnpFile)); - try { - for (VerifiedSnpRecord vsr : reader) { - verifiedSNPs.put(ChrPointPosition.valueOf(vsr.getChromosome(), vsr.getPosition()),vsr); - } - } finally { - reader.close(); - } - } - } -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- deleted file mode 100644 index 6758eb70e..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java-- +++ /dev/null @@ -1,488 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; - -import htsjdk.samtools.SAMRecord; - -import org.qcmg.chrconv.ChrConvFileReader; -import org.qcmg.chrconv.ChromosomeConversionRecord; -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.Classification; -import org.qcmg.common.model.GenotypeEnum; -import org.qcmg.common.model.QSnpGATKRecord; -import org.qcmg.common.util.BaseUtils; -import org.qcmg.common.util.Constants; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.common.vcf.header.VcfHeaderUtils; -import org.qcmg.germlinedb.GermlineDBFileReader; -import org.qcmg.germlinedb.GermlineDBRecord; -import org.qcmg.picard.QJumper; -import org.qcmg.pileup.QSnpRecord; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.Options.Ids; -import org.qcmg.qmule.QMuleException; -import org.qcmg.vcf.VCFFileReader; - -public class GatkUniqueSnps { - -// private static final QLogger logger = QLoggerFactory.getLogger(GatkUniqueSnps.class); - private static QLogger logger; - - private static Map tumourRecords = new HashMap(100000); - private static Map normalRecords = new HashMap(100000); - -// private static Map classABRecords = new HashMap(100000); - private static List qPileupRecords = new ArrayList(15000); - - // map to hold chromosome conversion data - private static final Map ensembleToQCMG = new HashMap(110); - - - // constants - private String mutationIdPrefix; - private String tumourSampleId; - private String normalSampleId; - private String patientId; - private String somaticAnalysisId; - private String germlineAnalysisId; -// private String analysisId; -// private static final String mutationIdPrefix = "APGI_1992_"; -// private static final String analysisId = "qcmg_ssm_20110524_1"; -// private static final String tumourSampleId = "ICGC-ABMP-20091203-06-TD"; - - - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private Properties ids; - - private int exitStatus; - - - private static String bamFile1; - private static String bamFile1Index; -// private static String bamFile2; -// private static String bamFile2Index; - - private static QJumper jumper1; -// private static QJumper jumper2; - - public int engage() throws Exception { - - setupIds(); - - logger.info("loading normal vcf file"); - loadGATKData(cmdLineInputFiles[0], normalRecords); - logger.info("loaded " + normalRecords.size() + " normal vcf's"); - - logger.info("loading tumour vcf file"); - loadGATKData(cmdLineInputFiles[1], tumourRecords); - logger.info("loaded " + tumourRecords.size() + " tumour vcf's"); - - bamFile1 = cmdLineInputFiles[2]; - bamFile1Index = cmdLineInputFiles[3]; -// bamFile2 = args[4]; -// bamFile2Index = args[5]; - - - jumper1 = new QJumper(); - jumper1.setupReader(bamFile1, bamFile1Index); -// jumper2 = new QJumper(); -// jumper2.setupReader(bamFile2, bamFile2Index); - - - logger.info("about to call examine"); - examine(); - logger.info("about to call examine - DONE"); - - // close the qjumper - jumper1.closeReader(); - - logger.info("about to load chromosome conversion data"); - loadChromosomeConversionData(cmdLineInputFiles[4]); - logger.info("about to load chromosome conversion data - DONE"); - - logger.info("about to add germlineDB info"); - addGermlineDBData(cmdLineInputFiles[5]); - - int noAnnotation = 0; - for (final QSnpRecord qpr : qPileupRecords) if (null == qpr.getAnnotation()) noAnnotation++; - logger.info("class A after addition of germlinedb data: " + noAnnotation ); - - - logger.info("writing output"); - writeOutputForDCC(cmdLineOutputFiles[0]); - logger.info("DONE"); - - return exitStatus; - } - - private void setupIds() throws Exception { - if (null != ids) { - - somaticAnalysisId = (String) ids.get(Ids.SOMATIC_ANALYSIS); - germlineAnalysisId = (String) ids.get(Ids.GEMLINE_ANALYSIS); - tumourSampleId = (String) ids.get(Ids.TUMOUR_SAMPLE); - normalSampleId = (String) ids.get(Ids.NORMAL_SAMPLE); - patientId = (String) ids.get(Ids.PATIENT); - mutationIdPrefix = patientId + "_SNP_"; - - logger.tool("somaticAnalysisId: " + somaticAnalysisId); - logger.tool("germlineAnalysisId: " + germlineAnalysisId); - logger.tool("normalSampleId: " + normalSampleId); - logger.tool("tumourSampleId: " + tumourSampleId); - logger.tool("patientId: " + patientId); - logger.tool("mutationIdPrefix: " + mutationIdPrefix); - - } else { - logger.error("No ids were passed into the program"); - throw new Exception("Invalid arguments to GatkUniqueSnps"); - } - } - - private static void examine() throws Exception { - - int existsInNormalAndTumour = 0, sameGenotype = 0; - // loop through the tumour map - - for (final Entry tumourEntry : tumourRecords.entrySet()) { - - // see if a position exists in the normal map - final QSnpGATKRecord normalRecord = normalRecords.get(tumourEntry.getKey()); - if (null != normalRecord) { - existsInNormalAndTumour++; - - final GenotypeEnum normalGenotype = normalRecord.getGenotypeEnum(); - final GenotypeEnum tumourGenotype = tumourEntry.getValue().getGenotypeEnum(); - - if (normalGenotype == tumourGenotype) { - sameGenotype++; - } else { - if (tumourGenotype.containsAllele(normalRecord.getAlt().charAt(0))) { - //tumourEntry.getValue().getVCFRecord().addInfo("MIN"); - tumourEntry.getValue().getVCFRecord().appendInfo("MIN");; - } - if ( tumourGenotype.isHeterozygous() && ! tumourGenotype.containsAllele(tumourEntry.getValue().getRef().charAt(0))) - //tumourEntry.getValue().getVCFRecord().addInfo("tumour heterozygous for two non-reference alleles"); - tumourEntry.getValue().getVCFRecord().appendInfo("tumour heterozygous for two non-reference alleles"); -// if (null == tumourEntry.getValue().getAnnotation()) { - qPileupRecords.add(getQPileupRecord(tumourEntry.getValue())); -// } - } - } else { - // interested primarily in these fellas - qPileupRecords.add(getQPileupRecord(tumourEntry.getValue())); - } - } - - logger.info("exists in both normal and tumour: " + existsInNormalAndTumour + ", same Genotype: " + sameGenotype); - - logger.info("potential number of class A&B's before pileup: " + qPileupRecords.size() ); - - int noAnnotation = 0, count = 0; - for (final QSnpRecord qpr : qPileupRecords) { - getPileup(jumper1, qpr); - - if (++count % 100 == 0) - logger.info("hit " + count + " vcf records, " + qpr.toString()); - - if (qpr.getAnnotation() == null) - noAnnotation++; - } - - logger.info("class A after pileup: " + noAnnotation ); - - } - - private static void loadChromosomeConversionData(String chrConvFile) throws IOException { - final ChrConvFileReader reader = new ChrConvFileReader(new File(chrConvFile)); - try { - for (final ChromosomeConversionRecord record : reader) { - // add extra map inserts here as required - ensembleToQCMG.put(record.getEnsembleV55(), record.getQcmg()); - } - } finally { - reader.close(); - } - } - - private void writeOutputForDCC(String dccSomaticFile) throws IOException { - if (dccSomaticFile.contains("Germline_DB.txt")) throw new IOException("Wrong output file!!!"); - - final FileWriter somaticWriter = new FileWriter(new File(dccSomaticFile)); - - final String somaticHeader = "analysis_id\ttumour_sample_id\tmutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation\tquality_score\tprobability\tread_count\tis_annotated\tvalidation_status\tvalidation_platform\txref_ensembl_var_id\tnote\tQCMGflag\n"; - final int counter = 1; - try { - - somaticWriter.write(somaticHeader); - for (final QSnpRecord record : qPileupRecords) { - - String ensemblChr = null; - // get ensembl chromosome - for (final Map.Entry entry : ensembleToQCMG.entrySet()) { - if (record.getChromosome().equals(entry.getValue())) { - ensemblChr = entry.getKey(); - break; - } - } - somaticWriter.write(somaticAnalysisId + "\t" + tumourSampleId + "\t" - + "\n"); -// + record.getDCCData(mutationIdPrefix, ensemblChr) + "\n"); - } - } finally { - somaticWriter.close(); - } - } - - private static QSnpRecord getQPileupRecord(QSnpGATKRecord vcfRec) { - final QSnpRecord qpr = new QSnpRecord(vcfRec.getChromosome(), vcfRec.getPosition(), vcfRec.getRef()); - qpr.setTumourGenotype(vcfRec.getGenotypeEnum()); -// qpr.setMutation(vcfRec.getRef() + Constants.MUT_DELIM + vcfRec.getAlt()); -// qpr.getVcfRecord().setFilter(vcfRec.getAnnotation()); - qpr.setClassification(Classification.SOMATIC); - return qpr; - } - - - public static void getPileup(QJumper jumper, QSnpRecord record) throws Exception { - - final List firstSet = jumper.getRecordsAtPosition(record.getChromosome(), record.getPosition()); -// List secondSet = jumper2.getRecordsAtPosition(record.getChromosome(), record.getPosition()); - - - examinePileup(firstSet, record); - - -// char mutation = record.getMutation().charAt(record.getMutation().length() -1); -// boolean mutationFoundInNormal = false; -// int normalCoverage = 0; -// for (SAMRecord sam : firstSet ) { -// if ( ! sam.getDuplicateReadFlag()) { -// ++normalCoverage; -// -// // need to get the base at the position -// int offset = record.getPosition() - sam.getAlignmentStart(); -// if (offset < 0) throw new Exception("invalid start position!!!"); -// -// if (sam.getReadBases()[offset] == mutation) { -// mutationFoundInNormal = true; -// break; -// } -// } -// } -// -// if (mutationFoundInNormal) { -// record.addAnnotation("mutation also found in pileup of normal"); -// } -// -// record.setNormalCount(normalCoverage); -// -// if (normalCoverage < 12) -// record.addAnnotation("less than 12 reads coverage in normal"); - - } - - - public static void examinePileup(List sams, QSnpRecord record) throws Exception { - - final char mutation = record.getAlt().charAt(0); -// final char mutation = record.getMutation().charAt(record.getMutation().length() -1); - boolean mutationFoundInNormal = false; - int normalCoverage = 0; - for (final SAMRecord sam : sams ) { - if ( ! sam.getDuplicateReadFlag()) { - ++normalCoverage; - - // need to get the base at the position -// int offset = record.getPosition() - sam.getUnclippedStart(); - int offset = record.getPosition() - sam.getAlignmentStart(); - if (offset < 0) throw new Exception("invalid start position!!!: "+ sam.format()); - - if (offset >= sam.getReadLength()) { -// throw new Exception("offset [position: " + record.getPosition() + ", read start pos(unclipped): " + sam.getUnclippedStart() + ", read end pos(unclipped): " + sam.getUnclippedEnd()+ "] is larger than read length!!!: " + sam.format()); - // set to last entry in sequence - offset = sam.getReadLength() -1; - } - - if (sam.getReadBases()[offset] == mutation) { - mutationFoundInNormal = true; -// break; - } - } - } - - if (mutationFoundInNormal) { - VcfUtils.updateFilter(record.getVcfRecord(), VcfHeaderUtils.FILTER_MUTATION_IN_NORMAL); - } - -// record.setNormalCount(normalCoverage); - - if (normalCoverage < 12) { - VcfUtils.updateFilter(record.getVcfRecord(), VcfHeaderUtils.FILTER_COVERAGE); - } - - - } - - -// private static void getPileup(VCFRecord record) { -// -// List firstSet = jumper1.getRecordsAtPosition(record.getChromosome(), record.getPosition()); -//// List secondSet = jumper2.getRecordsAtPosition(record.getChromosome(), record.getPosition()); -// -// int normalCoverage = 0; -// for (SAMRecord sam : firstSet ) { -// if ( ! sam.getDuplicateReadFlag()) -// ++normalCoverage; -// } -// -// -//// int normalCoverage = firstSet.size(); -//// int normalCoverage = firstSet.size() + secondSet.size(); -// record.setNormalCoverage(normalCoverage); -// -// if (normalCoverage < 12) -// record.addAnnotation("less than 12 reads coverage in normal"); -// -// } - - - private static void addGermlineDBData(String germlineDBFile) throws IOException { - - final GermlineDBFileReader reader = new GermlineDBFileReader(new File(germlineDBFile)); - // create map of SOMATIC classified SNPs - final Map somaticPileupMap = new HashMap(qPileupRecords.size(), 1); - for (final QSnpRecord pileupRecord : qPileupRecords) { - somaticPileupMap.put(ChrPointPosition.valueOf(pileupRecord.getChromosome(), pileupRecord.getPosition()), pileupRecord); - } - - int updateCount = 0, count = 0; - try { - for (final GermlineDBRecord rec : reader) { - - // get QCMG chromosome from map - final String chr = ensembleToQCMG.get(rec.getChromosome()); - final ChrPosition id = ChrPointPosition.valueOf(chr, rec.getPosition()); - - final QSnpRecord qpr = somaticPileupMap.get(id); - if (null != qpr && null != qpr.getAlt() && (null == qpr.getAnnotation() || ! qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_GERMLINE))) { - final String mutation = qpr.getAlt(); - if (mutation.length() == 3) { - final char c = mutation.charAt(2); - - final GenotypeEnum germlineDBGenotype = BaseUtils.getGenotypeEnum(rec.getNormalGenotype()); - if (germlineDBGenotype.containsAllele(c)) { - updateCount++; - - VcfUtils.updateFilter(qpr.getVcfRecord(), VcfHeaderUtils.FILTER_GERMLINE); - } - - - } else { - logger.info("mutation string length: " + mutation.length()); - } - } - - if (++count % 1000000 == 0) - logger.info("hit " + count + " germline reords"); - - } - } finally { - reader.close(); - } - logger.info("updated: " + updateCount + " somatic positions with germlineDB info"); - } - - private static void loadGATKData(String pileupFile, Map map) throws Exception { - if (FileUtils.canFileBeRead(pileupFile)) { - - final VCFFileReader reader = new VCFFileReader(new File(pileupFile)); - try { - for (final VcfRecord qpr : reader) { - map.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()), new QSnpGATKRecord(qpr)); - } - } finally { - reader.close(); - } - } - } - - public static void main(String[] args) throws Exception { - final GatkUniqueSnps gus = new GatkUniqueSnps(); - final int exitStatus = gus.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - final Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logger = QLoggerFactory.getLogger(GatkUniqueSnps.class, options.getLogFile(), options.getLogLevel()); - logger.logInitialExecutionStats("GatkUniqueSnps", GatkUniqueSnps.class.getPackage().getImplementationVersion()); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (final String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - ids = options.getIds(); - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/Mule.java-- b/qmule/src/org/qcmg/qmule/snppicker/Mule.java-- deleted file mode 100644 index 6b3b7f4a7..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/Mule.java-- +++ /dev/null @@ -1,85 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; - -public class Mule { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - - private static QLogger logger; - - public int engage() { - return 1; - } - - - - public static void main(String[] args) throws Exception { - Mule sp = new Mule(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(Mule.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("Example", Mule.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- deleted file mode 100644 index 63193c01a..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java-- +++ /dev/null @@ -1,802 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - -import htsjdk.samtools.SAMRecord; - -import org.qcmg.chrconv.ChrConvFileReader; -import org.qcmg.chrconv.ChromosomeConversionRecord; -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.Genotype; -import org.qcmg.common.util.BaseUtils; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.dbsnp.Dbsnp130Record; -import org.qcmg.dbsnp.DbsnpFileReader; -import org.qcmg.qmule.gff3.GFF3FileReader; -import org.qcmg.qmule.gff3.GFF3Record; -import org.qcmg.picard.QJumper; -import org.qcmg.pileup.PileupFileReader; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.record.Record; -import org.qcmg.unused.illumina.IlluminaFileReader; -import org.qcmg.unused.illumina.IlluminaRecord; -import org.qcmg.vcf.VCFFileReader; - -public class SnpPicker { - - private static final char DEFAULT_CHAR = '\u0000'; - private static QLogger logger; -// private static DecimalFormat df = new DecimalFormat("0.0000"); - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - private static boolean isNormal; - -// private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - - Map illuminaMap = new HashMap(1000000,0.99f); // not expecting more than 1000000 - - Map variantMap = new HashMap(2000000); - - // map to hold chromosome conversion data - Map gffToQCMG = new HashMap(100, 0.99f); - -// List illuminaRecords = new ArrayList(); -// List dbSNPRecords = new ArrayList(13000000); - - private int engage() throws Exception { - - // populate the chromosome conversion map - logger.info("about to load chromosome conversion data"); - loadChromosomeConversionData(); - logger.info("about to load chromosome conversion data - DONE"); - - // we are working off the raw illumina data here - first convert it into filtered format, and use that as the input - - logger.info("About to load raw illumina data"); - loadRawIlluminaData(); -// logger.info("No of variant records: " + variantMap.size() + " in file: " + cmdLineInputFiles[0]); - - logger.info("About to load gff3 data"); - loadGff3Data(); - logger.info("No of variant records: " + variantMap.size()); - -// logger.info("About to load vcf data"); -// loadVCFData(); -// logger.info("No of variant records: " + variantMap.size()); - - logger.info("About to load qsnp data"); - loadQSnpData(); - logger.info("No of variant records: " + variantMap.size()); - - - - - logger.info("About to load dbSNP data"); - loadDbSnpData(); -// logger.info("No of variant records: " + variantMap.size()); - - // update variantMap with details from illuminaMap - logger.info("About to load filtered illumina data into variant map"); - convertIlluminaToVariant(); - logger.info("About to load filtered illumina data into variant map - DONE"); - - // get some stats - displayStats(); - - // pileup - logger.info("time for pileup..."); - getPileup(); - logger.info("time for pileup - DONE"); - - // more stats - displayStats2(); - - logger.info("Will now attempt to write out variant data" ); - outputVariantData(); - logger.info("Will now attempt to write out variant data - DONE"); - - return exitStatus; - } - - private void getPileup() throws Exception { - QJumper qj = new QJumper(); - qj.setupReader(cmdLineInputFiles[5], cmdLineInputFiles[6]); - - VariantRecord rec; - StringBuilder pileup = new StringBuilder(); - List reads; -// String chr; - int position; - int offset; - - int pileupCount = 0; - for (Map.Entry entry : variantMap.entrySet()) { - // only want pileup if we have gff or vcf data - rec = entry.getValue(); - if (DEFAULT_CHAR != rec.getGffRef() || null != rec.getVcfGenotype()) { -// chr = ( ! entry.getKey().getChromosome().startsWith("GL") ? "chr" : "") + entry.getKey().getChromosome(); - - reads = qj.getRecordsAtPosition(entry.getKey().getChromosome(), entry.getKey().getStartPosition()); - // do something with the reads - position = entry.getKey().getStartPosition(); - for (SAMRecord sr : reads) { - offset = position - sr.getAlignmentStart(); - pileup.append((char)sr.getReadBases()[offset]); - } - rec.setPileup(pileup.toString()); - - // reset the StringBuilder - pileup.setLength(0); - - if (++pileupCount % 1000 == 0) - logger.info("Run " + pileupCount + " pileups so far, " + reads.size() + " sam records returned from picard"); - } - } - } - - private void loadChromosomeConversionData() { - String chrConvFile = cmdLineInputFiles[4]; - ChrConvFileReader reader = null; - try { - reader = new ChrConvFileReader(new File(chrConvFile)); - } catch (Exception e) { - logger.error("Exception caught whilst trying to instantiate ChrConvFileReader", e); - exitStatus = -1; - } - - if (null != reader) { - for (ChromosomeConversionRecord record : reader) { - // add extra map inserts here as required - // diBayes field is no longer present in chr conv file -// gffToQCMG.put(record.getDiBayes(), record.getQcmg()); - // guessing we want ensemble in here as the key - gffToQCMG.put(record.getEnsembleV55(), record.getQcmg()); - } - - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close ChrConvFileReader", e); - exitStatus = -1; - } - } - } - - private void displayStats() { - int illuminaOnly = 0; - int gff3Only = 0; - int vcfOnly = 0; - int vcfANDgff = 0; - int vcfANDillumina = 0; - int gffANDillumina = 0; - int allThree = 0; - for (VariantRecord record : variantMap.values()) { - - boolean illuminaDataPresent = null != record.getIlluminaRef(); - boolean gffDataPresent = DEFAULT_CHAR != record.getGffRef(); - boolean vcfDataPresent = DEFAULT_CHAR != record.getVcfRef(); - - if (illuminaDataPresent && gffDataPresent && vcfDataPresent) { - allThree++; - record.setPositionMatch("IGV"); - } else if (gffDataPresent && vcfDataPresent) { - vcfANDgff++; - record.setPositionMatch("GV"); - } else if (illuminaDataPresent && vcfDataPresent) { - vcfANDillumina++; - record.setPositionMatch("IV"); - } else if (illuminaDataPresent && gffDataPresent) { - gffANDillumina++; - record.setPositionMatch("IG"); - } else if ( gffDataPresent) { - gff3Only++; - record.setPositionMatch("G"); - }else if ( vcfDataPresent) { - vcfOnly++; - record.setPositionMatch("V"); - }else if ( illuminaDataPresent) { - illuminaOnly++; - record.setPositionMatch("I"); - } - - record.setGenotypeMatch(getGenotypeMatchInfo(record)); - } - - logger.info("allThree: " + allThree); - logger.info("illuminaOnly: " + illuminaOnly); - logger.info("gff3Only: " + gff3Only); - logger.info("vcfANDgff: " + vcfANDgff); - logger.info("vcfANDillumina: " + vcfANDillumina); - logger.info("gffANDillumina: " + gffANDillumina); - logger.info("vcfOnly: " + vcfOnly); - - int total = allThree + illuminaOnly + gff3Only + vcfANDgff + vcfANDillumina + gffANDillumina + vcfOnly; - logger.info("Sum of above numbers: " + total); - logger.info("No of records in map: " + variantMap.size()); - - } - - private void displayStats2() { - final String IGV = "IGV"; - final String IG = "IG"; - final String IV = "IV"; - final String GV = "GV"; - final String I = "I"; - final String G = "G"; - final String V = "V"; - - int positionIGV=0, positionIG=0, positionIV=0, positionGV=0, positionI=0, positionG=0, positionV = 0; - int pIGVgIGV=0, pIGVgIG=0, pIGVgIV=0, pIGVgGV=0; - int pIGgIG=0; - int pIVgIV=0; - int pGVgGV=0; - - - for (VariantRecord record : variantMap.values()) { - - String positionMatch = record.getPositionMatch(); - String genotypeMatch = record.getGenotypeMatch(); - - if (IGV.equals(positionMatch)) { - positionIGV++; - if (IGV.equals(genotypeMatch)) pIGVgIGV++; - else if (IG.equals(genotypeMatch)) pIGVgIG++; - else if (IV.equals(genotypeMatch)) pIGVgIV++; - else if (GV.equals(genotypeMatch)) pIGVgGV++; - - } else if (IG.equals(positionMatch)) { - positionIG++; - if (IG.equals(genotypeMatch)) pIGgIG++; - - } else if (IV.equals(positionMatch)) { - positionIV++; - if (IV.equals(genotypeMatch)) pIVgIV++; - - } else if (GV.equals(positionMatch)) { - positionGV++; - if (GV.equals(genotypeMatch)) pGVgGV++; - - } else if (I.equals(positionMatch)) positionI++; - else if ( G.equals(positionMatch)) positionG++; - else if ( V.equals(positionMatch)) positionV++; - } - - logger.info("position IGV: " + positionIGV + ", genotype IGV: " + pIGVgIGV + ", genotype IG: " + pIGVgIG + ", genotype IV: " + pIGVgIV + ", genotype GV: " + pIGVgGV); - logger.info("position IG: " + positionIG + ", genotype IG: " + pIGgIG); - logger.info("position IV: " + positionIV + ", genotype IV: " + pIVgIV); - logger.info("position GV: " + positionGV + ", genotype GV: " + pGVgGV); - - logger.info("position I: " + positionI); - logger.info("position G: " + positionG); - logger.info("position V: " + positionV); - - int total = positionIGV + positionIG + positionIV + positionGV + positionI + positionG + positionV; - logger.info("Sum of above numbers: " + total); - logger.info("No of records in map: " + variantMap.size()); - - } - - private String getGenotypeMatchInfo(VariantRecord record) { - Genotype illuminaGen = BaseUtils.getGenotype(record.getIllAllele1() , record.getIllAllele2()); -// String illuminaGen = record.getIlluminaRef(); - Genotype gffGen = BaseUtils.getGenotypeFromIUPACCode(record.getGffGenotype()); - Genotype vcfGen = null; - if (DEFAULT_CHAR != record.getVcfAlt()) - vcfGen = BaseUtils.getGenotypeFromVcf(record.getVcfGenotype(), record.getVcfRef(), record.getVcfAlt()); - else - vcfGen = BaseUtils.getGenotype(record.getVcfGenotype()); - - String result = null; - - if (illuminaGen.equals( gffGen) && illuminaGen.equals(vcfGen)) result = "IGV"; - else if (illuminaGen.equals(gffGen)) result = "IG"; - else if (illuminaGen.equals(vcfGen)) result = "IV"; - else if (null != gffGen && gffGen.equals(vcfGen)) result = "GV"; -// if (doStringsMatch(illuminaGen, gffGen) && doStringsMatch(illuminaGen, vcfGen)) result = "IGV"; -// else if (doStringsMatch(illuminaGen, gffGen)) result = "IG"; -// else if (doStringsMatch(illuminaGen, vcfGen)) result = "IV"; -// else if (doStringsMatch(gffGen, vcfGen)) result = "GV"; - - return result; - } - - private boolean doStringsMatch(String a, String b) { - return null == a ? false : a.equals(b); - } - - private void loadDbSnpData() { - // update records with dbsnp info - // should be second of the input files - String dbSNPFile = cmdLineInputFiles[3]; - DbsnpFileReader dbSNPReader = null; - try { - dbSNPReader = new DbsnpFileReader(new File(dbSNPFile)); - } catch (Exception e) { - logger.error("Error caught whilst trying to instantiate DbsnpFileReader", e); - exitStatus = -1; - } - - int updateCount = 0; - int noOfDbSnps = 0; - if (null != dbSNPReader) { - - ChrPosition varId; - VariantRecord varRec; - IlluminaRecord illRec; - int illuminaDbSnpCount = 0; - - for (Dbsnp130Record rec : dbSNPReader) { - // update illumina array with dbSNP details - illRec = illuminaMap.get(rec.getRefSnp()); - if (null != illRec) { - if (null != illRec.getChr()) { - logger.info("illumina rec: " + illRec.getChr() + ":" + illRec.getStart() + ":" + illRec.getSnpId() +" has already been updated - dbSNP: " + rec.getChromosome() + ":" + rec.getChromosomePosition() + ":" + rec.getRefSnp()); - // dbSNP id has more than 1 chr and position - create another IlluminaRecord in the variantMap - //TODO deal with multiple dbSnps for same id here!!! - } else { - updateIlluminaRecord(illRec, rec); - } - illuminaDbSnpCount++; - } - - varId = ChrPointPosition.valueOf(rec.getChromosome(), rec.getChromosomePosition()); - // lookup variant map to see if we have a matching record - varRec = variantMap.get(varId); - if (null == varRec && null != illRec && illRec.isSnp()) { - // don't have an existing record at this position, but we want to put illumina data in here if its a snp - varRec = new VariantRecord(); - variantMap.put(varId, varRec); - } - - if (null != varRec) { - // update required fields - varRec.setDbSnpID(rec.getRefSnp()); - varRec.setDbSnpStrand(rec.getStrand().charAt(0)); - varRec.setDbSnpRef_Alt(rec.getRefGenome() + "__" + rec.getVariant()); - - if (++updateCount % 100000 == 0) - logger.info("updated " + updateCount + " variant records with dbSNP ids"); - } - -// dbSNPRecords.add(rec); - if (++noOfDbSnps % 1000000 == 0) - logger.info("hit " + noOfDbSnps + " dbSnp records"); - } - - logger.info("match count for dbSnp and Illumina: " + illuminaDbSnpCount); - - try { - dbSNPReader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close DbsnpFileReader", e); - exitStatus = -1; - } - } - - logger.info("No of dbSnp records: " + noOfDbSnps + " in file: " + dbSNPFile); - logger.info("No of updated variant records: " + updateCount); - } - - private void loadVCFData() { - String vcfFile = cmdLineInputFiles[2]; - VCFFileReader reader = null; - try { - reader = new VCFFileReader(new File(vcfFile)); - } catch (Exception e) { - logger.error("Error caught whilst trying to instantiate VCFFileReader", e); - exitStatus = -1; - } - - if (null != reader) { - int vcfCount = 0; - ChrPosition id; - VariantRecord value; - - for (VcfRecord rec : reader) { - - id = ChrPointPosition.valueOf(rec.getChromosome(), rec.getPosition()); - - value = variantMap.get(id); - if (null == value) { - value = new VariantRecord(); - variantMap.put(id, value); - } - value.setVcfRef(rec.getRefChar()); - value.setVcfAlt(rec.getAlt().charAt(0)); - value.setVcfGenotype(VcfUtils.getGenotypeFromGATKVCFRecord(rec)); - vcfCount++; - } - logger.info("there were " + vcfCount + " records in the vcf file"); - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close VCFFileReader", e); - exitStatus = -1; - } - } - } - - private void loadQSnpData() { - String qSnpFile = cmdLineInputFiles[2]; - PileupFileReader reader = null; - try { - reader = new PileupFileReader(new File(qSnpFile)); - } catch (Exception e) { - logger.error("Error caught whilst trying to instantiate PileupFileReader", e); - exitStatus = -1; - } - - if (null != reader) { - int vcfCount = 0; - ChrPosition id; - VariantRecord value; - - for (String rec : reader) { -// for (PileupRecord rec : reader) { - // got some work to do here - need to split the pileup attribute to construct the object - String [] params = TabTokenizer.tokenize(rec); -// String [] params = tabbedPattern.split(rec.getPileup(), -1); - - // skip if the tumour genotype is null - String genotype = params[params.length-(isNormal ? 2 : 1)]; - if (null != genotype && ! "null".equals(genotype)) { - - id = ChrPointPosition.valueOf(params[0], Integer.parseInt(params[1])); - - value = variantMap.get(id); - if (null == value) { - value = new VariantRecord(); - variantMap.put(id, value); - } - value.setVcfRef(params[2].charAt(0)); - // value.setVcfAlt(rec.getAlt()); - value.setVcfGenotype(genotype); - vcfCount++; - } - } - logger.info("there were " + vcfCount + " records in the qsnp file"); - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close PileupFileReader", e); - exitStatus = -1; - } - } - } - - private void loadGff3Data() { - String gff3File = cmdLineInputFiles[1]; - GFF3FileReader reader = null; - try { - reader = new GFF3FileReader(new File(gff3File)); - } catch (Exception e) { - logger.error("Exception caught whilst trying to instantiate GFF3FileReader", e); - exitStatus = -1; - } - - if (null != reader) { - int gff3Count = 0; - ChrPosition id; - VariantRecord value; - String chr; - - for (GFF3Record rec : reader) { - // get QCMG chromosome from map - chr = gffToQCMG.get(rec.getSeqId()); - - id = ChrPointPosition.valueOf(chr, rec.getStart()); - - value = variantMap.get(id); - if (null == value) { - value = new VariantRecord(); - variantMap.put(id, value); - } - String attributes = rec.getAttributes(); - char genotype = attributes.charAt(attributes.indexOf("genotype=")+9); - char reference = attributes.charAt(attributes.indexOf("reference=")+10); -// value.setGffAlt(genotype+""); - value.setGffGenotype(genotype); - value.setGffRef(reference); - gff3Count++; - } - logger.info("there were " + gff3Count + " records in the gff3 file"); - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close GFF3FileReader", e); - exitStatus = -1; - } - } - } - - private void loadRawIlluminaData() { - String illuminaFile = cmdLineInputFiles[0]; - - isNormal = illuminaFile.contains("ND_"); - - IlluminaFileReader reader = null; - try { - reader = new IlluminaFileReader(new File(illuminaFile)); - } catch (Exception e) { - logger.error("Error caught whilst trying to instantiate IlluminaFileReader", e); - exitStatus = -1; - } - - if (null != reader) { - IlluminaRecord tempRec; - for (Record rec : reader) { - tempRec = (IlluminaRecord) rec; - illuminaMap.put(tempRec.getSnpId(), tempRec); - } - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close IlluminaFileReader", e); - exitStatus = -1; - } - } - logger.info("Loaded " + illuminaMap.size() + " entries into the illumina map"); - } - -// private void loadIlluminaData() { -// String illuminaFile = cmdLineInputFiles[0]; -// IlluminaFileReader reader = null; -// try { -// reader = new IlluminaFileReader(new File(illuminaFile)); -// } catch (Exception e) { -// logger.error("Error caught whilst trying to instantiate IlluminaFileReader", e); -// exitStatus = -1; -// } -// -// if (null != reader) { -// VariantID id; -// IlluminaRecord tempRec; -// -// for (Record rec : reader) { -// tempRec = (IlluminaRecord) rec; -// -// id = new VariantID(tempRec.getChr(), tempRec.getStart()); -// -// VariantRecord value = variantMap.get(id); -// if (null == value) { -// value = new VariantRecord(); -// variantMap.put(id, value); -// } -// value.setIlluminaSNP(tempRec.getSnp()); -// } -// try { -// reader.close(); -// } catch (IOException e) { -// logger.error("IOException caught whilst trying to close IlluminaFileReader", e); -// exitStatus = -1; -// } -// } -// } - - private void convertIlluminaToVariant() { - ChrPosition id; - VariantRecord value; - - // loop through the illumina map converting all entries into the variantMap - for (IlluminaRecord illuminaRec : illuminaMap.values()) { - - // TODO check this !!! - // ignore records that did not have a dbSNP - if (null != illuminaRec.getChr()) { - - id = ChrPointPosition.valueOf(illuminaRec.getChr(), illuminaRec.getStart()); - - value = variantMap.get(id); - if (null == value && illuminaRec.isSnp()) { - // only want to populate our map with illumina data that does not have a corresponding gff or vcf record - // if it contains a snp - value = new VariantRecord(); - variantMap.put(id, value); - } - - if (null != value) { - value.setDbSnpID(illuminaRec.getSnpId()); -// value.setIlluminaAlt(illuminaRec.getRefGenomeRefSNPAllele()); - value.setIlluminaRef(illuminaRec.getSnp()); - value.setIllAllele1(illuminaRec.getFirstAllele()); - value.setIllAllele2(illuminaRec.getSecondAllele()); - value.setIllGCScore(illuminaRec.getGCScore()); - value.setIllTypeHom(illuminaRec.isHom()); - } - } - } - - // clear illuminaMap - no longer required - illuminaMap.clear(); - } - - - private void updateIlluminaRecord(IlluminaRecord illuminaRec, Dbsnp130Record dbSnpRec) { - // standard value setting here... - char dbSnpStrand = dbSnpRec.getStrand().charAt(0); - illuminaRec.setChr(dbSnpRec.getChromosome()); - illuminaRec.setStart(dbSnpRec.getChromosomePosition()); -// illuminaRec.setRefGenomeRefSNPAllele(dbSnpRec.getRefGenome() + "__" + dbSnpRec.getVariant()); - - // now gets a bit more interesting - char strand; - // if illumina alleles are equal to dbsnp alleles - if (BaseUtils.areGenotypesEqual(dbSnpRec.getVariant(), illuminaRec.getSnp())) { - strand = dbSnpStrand; - } else strand = '+' == dbSnpStrand ? '-' : '+'; -// if (illuminaRec.getReference().charAt(1) == dbAlleles.charAt(0) && -// illuminaRec.getReference().charAt(3) == dbAlleles.charAt(2)) { -// strand = dbSnpStrand; -// } else strand = '+' == dbSnpStrand ? '-' : '+'; - - // no longer switch the illumina snp call, but the actual allele data -// if ('-' == strand) -// illuminaRec.setReference(BaseUtils.getComplementFromString(illuminaRec.getReference())); -// else -// illuminaRec.setReference(illuminaRec.getReference().substring(1, illuminaRec.getReference().length()-1)); - if ('-' == strand) { - illuminaRec.setFirstAllele(BaseUtils.getComplement(illuminaRec.getFirstAllele())); - illuminaRec.setSecondAllele(BaseUtils.getComplement(illuminaRec.getSecondAllele())); - } - // trim illumina snp - illuminaRec.setSnp(illuminaRec.getSnp().substring(1, illuminaRec.getSnp().length()-1)); - - // set snp - illuminaRec.setSnp(isSnp(dbSnpRec.getRefGenome(), illuminaRec.getFirstAllele(), illuminaRec.getSecondAllele())); - } - - private boolean isSnp(String ref, char alleleOne, char alleleTwo) { - if (null == ref || DEFAULT_CHAR == alleleOne || DEFAULT_CHAR == alleleTwo) - return false; - return ref.charAt(0) != alleleOne || ref.charAt(0) != alleleTwo; - } -// private boolean isSnp(String ref, String genotype) { -// if (null == ref || null == genotype) -// return false; -// // assume ref is of type A -// // assume genotype is of the form A/G -// return ref.charAt(0) != genotype.charAt(0) || ref.charAt(0) != genotype.charAt(2); -// } - - - private void outputVariantData() { - FileWriter allRecordsWriter = null; - FileWriter nonDbSnpwriter = null; - try { - allRecordsWriter = new FileWriter(new File(cmdLineOutputFiles[0])); // should be the first output file supplied - nonDbSnpwriter = new FileWriter(new File(cmdLineOutputFiles[1])); // should be the second output file supplied - allRecordsWriter.write("#chr\tstart\tdbSNP_id\tstrand\trg_rsa\t" + //dbSNP - "Ill_gc\ta1\ta2\ttype\tref\t" + //illumina - "gff3_ref\talt\tgen" + //gff - "\tvfc_ref\talt\tgen\t" + //vcf - "pileup\t" + //pileup - "posMatch\tgenMatch\n"); //matching - - nonDbSnpwriter.write("#chr\tstart\tdbSNP_id\tstrand\trg_rsa\t" + //dbSNP - "Ill_gc\ta1\ta2\ttype\tref\t" + //illumina - "gff3_ref\talt\tgen" + //gff - "\tvfc_ref\talt\tgen\t" + //vcf - "pileup\n" + //pileup - "posMatch\tgenMatch\n"); //matching - } catch (IOException ioe) { - logger.error("IOException caught whilst outputting data", ioe); - } - - //plonk the data into a TreeMap to bring some order to the proceedings.. - TreeMap sortedVariantMap = new TreeMap(variantMap); - - ChrPosition id; - VariantRecord value; -// String chr; - - for (Map.Entry entry : sortedVariantMap.entrySet()) { - id = entry.getKey(); - value = entry.getValue(); -// chr = ( ! id.getChromosome().startsWith("GL") ? "chr" : "") + id.getChromosome(); - - try { - allRecordsWriter.write(id.getChromosome() + "\t" + - id.getStartPosition() + "\t" + - value.formattedRecord() ); - // only want non dbSNP records - if (null == value.getDbSnpID()) { - nonDbSnpwriter.write(id.getChromosome() + "\t" + - id.getStartPosition() + "\t" + - value.formattedRecord() ); - } - } catch (IOException e) { - logger.error("IOException caught whilst outputting data", e); - } - } - - // close up - try { - allRecordsWriter.close(); - nonDbSnpwriter.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close output files", e); - } - } - - - public static void main(String[] args) throws Exception { - SnpPicker sp = new SnpPicker(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(SnpPicker.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("SnpPicker", SnpPicker.class.getPackage().getImplementationVersion()); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- deleted file mode 100644 index 7e6275fe1..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java-- +++ /dev/null @@ -1,200 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Pattern; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.TabTokenizer; -import org.qcmg.pileup.PileupFileReader; - -public class UniqueQSnps { - - private static final QLogger logger = QLoggerFactory.getLogger(UniqueQSnps.class); - - private static Map qSnpPileup = new HashMap(10000); -// private static Map qSnpPileup = new HashMap(10000); - private static Map gatkVcfs = new HashMap(10000); -// private static Map gatkVcfs = new HashMap(10000); - private static Map verifiedSNPs = new HashMap(500); -// private static Map verifiedSNPs = new HashMap(500); - - private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - - - public static void main(String[] args) throws Exception { - logger.info("hello..."); - - String filename = args[0]; - boolean runQPileup = true; - // filename type depends on whether to load qpileup or vcf - if (FileUtils.isFileTypeValid(filename, "vcf")) { - runQPileup = false; - } - loadVerifiedSnps(args[1]); - logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); - - - if (runQPileup) { - // load the existing pileup into memory - logger.info("running in pileup mode"); - loadQPileup(args[0]); - logger.info("loaded " + qSnpPileup.size() + " entries into the pileup map"); - examine(args[2]); - } else { - logger.info("running in vcf mode"); - loadGatkData(args[0]); - logger.info("loaded " + gatkVcfs.size() + " entries into the vcf map"); - examineVCFs(args[2]); - } - - - // load the existing pileup into memory - - examine(args[2]); - logger.info("goodbye..."); - } - - - private static void examine(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - for (final Map.Entry entry : qSnpPileup.entrySet()) { - ++totalCount; - String verifiedRecord = verifiedSNPs.get(entry.getKey()); -// PileupRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); - String qSnpRecord = entry.getValue(); - - if (null == verifiedRecord) { - String [] params = TabTokenizer.tokenize(qSnpRecord); -// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); - String annotation = params[params.length-1]; - if ("--".equals(annotation)) { - ++uniqueQSnpClassACount; - writer.write(qSnpRecord + "\n"); - } else if ("less than 12 reads coverage in normal".equals(annotation)) { - ++uniqueQSnpClassBCount; - writer.write(qSnpRecord + "\n"); - } - } - } - - writer.close(); - logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); - } - } - - private static void examineVCFs(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - for (final Map.Entry entry : qSnpPileup.entrySet()) { - ++totalCount; - String verifiedRecord = verifiedSNPs.get(entry.getKey()); -// PileupRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); - String qSnpRecord = entry.getValue(); -// PileupRecord qSnpRecord = entry.getValue(); - - if (null == verifiedRecord) { - String [] params = TabTokenizer.tokenize(qSnpRecord); -// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); - String annotation = params[params.length-1]; - if ("--".equals(annotation)) { - ++uniqueQSnpClassACount; - writer.write(qSnpRecord + "\n"); - } else if ("less than 12 reads coverage in normal".equals(annotation)) { - ++uniqueQSnpClassBCount; - writer.write(qSnpRecord + "\n"); - } - } - } - - writer.close(); - logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); - } - } - - - private static void loadQPileup(String pileupFile) throws Exception { - if (FileUtils.canFileBeRead(pileupFile)) { - PileupFileReader reader = new PileupFileReader(new File(pileupFile)); - for (String pr : reader) { -// for (PileupRecord pr : reader) { - String [] params = TabTokenizer.tokenize(pr); -// String [] params = tabbedPattern.split(pr.getPileup()); - String chrPosition = params[params.length-2]; -// logger.info("chrPosition: " + chrPosition); - //TODO refactor to use StringUtils.getChrPositionFromString() - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - - qSnpPileup.put(chrPos,pr); - } - reader.close(); - } - } - - private static void loadGatkData(String pileupFile) throws Exception { - if (FileUtils.canFileBeRead(pileupFile)) { - PileupFileReader reader = new PileupFileReader(new File(pileupFile)); - for (String pr : reader) { -// for (PileupRecord pr : reader) { - String [] params = TabTokenizer.tokenize(pr); -// String [] params = tabbedPattern.split(pr.getPileup()); - String chrPosition = params[params.length-2]; -// logger.info("chrPosition: " + chrPosition); - //TODO refactor to use StringUtils.getChrPositionFromString() - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - - gatkVcfs.put(chrPos,pr); - } - reader.close(); - } - } - - private static void loadVerifiedSnps(String verifiedSnpFile) throws Exception { - if (FileUtils.canFileBeRead(verifiedSnpFile)) { - - PileupFileReader reader = new PileupFileReader(new File(verifiedSnpFile)); - for (String pr : reader) { -// for (PileupRecord pr : reader) { - String [] params = TabTokenizer.tokenize(pr); -// String [] params = tabbedPattern.split(pr.getPileup()); - String chrPosition = params[2]; -// logger.info("chrPosition: " + chrPosition); - //TODO refactor to use StringUtils.getChrPositionFromString() - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - - verifiedSNPs.put(chrPos,pr); - } - reader.close(); - } - } - -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- b/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- deleted file mode 100644 index 4ac4d5586..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/UniqueSnps.java-- +++ /dev/null @@ -1,263 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Pattern; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class UniqueSnps { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - - private static QLogger logger; - -// private static Map qSnpPileup = new HashMap(10000); -// private static Map gatkVcfs = new HashMap(10000); - private static Map verifiedSNPs = new HashMap(500); - private static Map unVerifiedSNPs = new HashMap(10000); - - private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - - - public int engage() throws Exception { - logger.info("hello..."); - - loadVerifiedSnps(cmdLineInputFiles[1]); - logger.info("loaded " + verifiedSNPs.size() + " entries into the verifiedSNPs map"); - if (verifiedSNPs.isEmpty()) exitStatus = 1; - - loadUnverifiedSnps(cmdLineInputFiles[0]); - logger.info("loaded " + unVerifiedSNPs.size() + " entries into the un-verifiedSNPs map"); - if (unVerifiedSNPs.isEmpty()) exitStatus = 1; - - -// examine(args[2]); -// if (runQPileup) { -// // load the existing pileup into memory -// logger.info("running in pileup mode"); -// loadUnverifiedSnps(args[0]); -// logger.info("loaded " + qSnpPileup.size() + " entries into the pileup map"); -// } else { -// logger.info("running in vcf mode"); -// loadGatkData(args[0]); -// logger.info("loaded " + gatkVcfs.size() + " entries into the vcf map"); -// examineVCFs(args[2]); -// } - - - // load the existing pileup into memory - - examine(cmdLineOutputFiles[0]); - logger.info("goodbye..."); - - return exitStatus; - } - - - private static void examine(String outputFile) throws IOException { - if (FileUtils.canFileBeWrittenTo(outputFile)) { - - int totalCount = 0, uniqueClassA = 0, uniqueClassB = 0, uniqueClassC = 0; - - FileWriter writer = new FileWriter(new File(outputFile)); - - // loop through the verified snps - - try { - for (final Map.Entry unVerifiedEntry : unVerifiedSNPs.entrySet()) { - TabbedRecord unVerifiedRecord = unVerifiedEntry.getValue(); - String [] params = tabbedPattern.split(unVerifiedRecord.getData()); - String consequenceType = params[22]; - if (consequenceType.contains("STOP") || consequenceType.contains("NON_SYNONYMOUS")) { - - ++totalCount; - - TabbedRecord verifiedRecord = verifiedSNPs.get(unVerifiedEntry.getKey()); - - if (null == verifiedRecord) { - String annotation = params[params.length-1]; - if ("--".equals(annotation)) { - ++uniqueClassA; - writer.write(unVerifiedRecord.getData() + "\n"); - } else if ("less than 12 reads coverage in normal".equals(annotation) - || "less than 3 reads coverage in normal".equals(annotation)) { - ++uniqueClassB; - writer.write(unVerifiedRecord.getData() + "\n"); - } - } - } - } - } finally { - writer.close(); - } - logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueClassA + ", uniqueQSnpCount (class B): " + uniqueClassB ); - } - } - -// private static void examineVCFs(String outputFile) throws IOException { -// if (FileUtils.canFileBeWrittenTo(outputFile)) { -// -// int totalCount = 0, uniqueQSnpClassACount = 0, uniqueQSnpClassBCount = 0; -// -// FileWriter writer = new FileWriter(new File(outputFile)); -// -// // loop through the verified snps -// -// for (final Map.Entry entry : qSnpPileup.entrySet()) { -// ++totalCount; -// TabbedRecord verifiedRecord = verifiedSNPs.get(entry.getKey()); -// TabbedRecord qSnpRecord = entry.getValue(); -// -// if (null == verifiedRecord) { -// String [] params = tabbedPattern.split(qSnpRecord.getPileup()); -// String annotation = params[params.length-1]; -// if ("--".equals(annotation)) { -// ++uniqueQSnpClassACount; -// writer.write(qSnpRecord.getPileup() + "\n"); -// } else if ("less than 12 reads coverage in normal".equals(annotation)) { -// ++uniqueQSnpClassBCount; -// writer.write(qSnpRecord.getPileup() + "\n"); -// } -// } -// } -// -// writer.close(); -// logger.info("totalCount: " + totalCount + ", uniqueQSnpCount (class A): " + uniqueQSnpClassACount + ", uniqueQSnpCount (class B): " + uniqueQSnpClassBCount ); -// } -// } - - - private static void loadUnverifiedSnps(String file) throws Exception { - if (FileUtils.canFileBeRead(file)) { - TabbedFileReader reader = new TabbedFileReader(new File(file)); - try { - for (TabbedRecord tr : reader) { - String [] params = tabbedPattern.split(tr.getData()); - String chrPosition = params[params.length-2]; -// logger.info("chrPosition: " + chrPosition); - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - unVerifiedSNPs.put(chrPos,tr); - } - } finally { - reader.close(); - } - } - } - -// private static void loadGatkData(String pileupFile) throws IOException { -// if (FileUtils.canFileBeRead(pileupFile)) { -// TabbedFileReader reader = new TabbedFileReader(new File(pileupFile)); -// for (TabbedRecord pr : reader) { -// String [] params = tabbedPattern.split(pr.getPileup()); -// String chrPosition = params[params.length-2]; -//// logger.info("chrPosition: " + chrPosition); -// ChrPosition chrPos = new ChrPosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-")))); -// -// gatkVcfs.put(chrPos,pr); -// } -// reader.close(); -// } -// } - - private void loadVerifiedSnps(String verifiedSnpFile) throws Exception { - if (FileUtils.canFileBeRead(verifiedSnpFile)) { - - TabbedFileReader reader = new TabbedFileReader(new File(verifiedSnpFile)); - try { - for (TabbedRecord tr : reader) { - String [] params = tabbedPattern.split(tr.getData()); - String chrPosition = params[2]; - // logger.info("chrPosition: " + chrPosition); - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1),start, start); - - verifiedSNPs.put(chrPos,tr); - } - } finally { - reader.close(); - } - } - } - - public static void main(String[] args) throws Exception { - UniqueSnps sp = new UniqueSnps(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(UniqueSnps.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("UniqueSnps", UniqueSnps.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- b/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- deleted file mode 100644 index eefbdd9ed..000000000 --- a/qmule/src/org/qcmg/qmule/snppicker/VariantRecord.java-- +++ /dev/null @@ -1,193 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.snppicker; - -import java.text.DecimalFormat; - -public class VariantRecord { - - private final static char DEFAULT_CHAR = '\u0000'; - private final static DecimalFormat df = new DecimalFormat("0.0000"); - - private String dbSnpID; - private char dbSnpStrand; - private String dbSnpRef_Alt; - private float illGCScore; - private char illAllele1; - private char illAllele2; - private boolean illTypeHom; - private String illuminaRef; -// private String illuminaAlt; - private String illuminaSNP; - private char gffRef; - private char gffGenotype; - private String gffAlt; - private char vcfRef; - private char vcfAlt; - private String vcfGenotype; - private String pileup; - private String positionMatch; - private String genotypeMatch; - - public String getDbSnpID() { - return dbSnpID; - } - public void setDbSnpID(String dbSnpID) { - this.dbSnpID = dbSnpID; - } - public String getIlluminaRef() { - return illuminaRef; - } - public void setIlluminaRef(String illuminaRef) { - this.illuminaRef = illuminaRef; - } -// public String getIlluminaAlt() { -// return illuminaAlt; -// } -// public void setIlluminaAlt(String illuminaAlt) { -// this.illuminaAlt = illuminaAlt; -// } - public char getGffRef() { - return gffRef; - } - public void setGffRef(char gffRef) { - this.gffRef = gffRef; - } - public char getGffGenotype() { - return gffGenotype; - } - public void setGffGenotype(char gffGenotype) { - this.gffGenotype = gffGenotype; - } - public String getGffAlt() { - return gffAlt; - } - public void setGffAlt(String gffAlt) { - this.gffAlt = gffAlt; - } - public char getVcfRef() { - return vcfRef; - } - public void setVcfRef(char vcfRef) { - this.vcfRef = vcfRef; - } - public char getVcfAlt() { - return vcfAlt; - } - public void setVcfAlt(char vcfAlt) { - this.vcfAlt = vcfAlt; - } - public String getVcfGenotype() { - return vcfGenotype; - } - public void setVcfGenotype(String vcfGenotype) { - this.vcfGenotype = vcfGenotype; - } - public void setIlluminaSNP(String illuminaSNP) { - this.illuminaSNP = illuminaSNP; - } - public String getIlluminaSNP() { - return illuminaSNP; - } - - public String formattedRecord() { - StringBuilder sb = new StringBuilder(); - - sb.append(null != dbSnpID ? dbSnpID : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != dbSnpStrand ? dbSnpStrand : ""); - sb.append("\t"); - sb.append(null != dbSnpRef_Alt ? dbSnpRef_Alt : ""); - sb.append("\t"); - sb.append(illGCScore != 0.0f ? df.format(illGCScore) : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != illAllele1 ? illAllele1 : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != illAllele2 ? illAllele2 : ""); - sb.append("\t"); - sb.append(null != illuminaRef ? (illTypeHom ? "hom" : "het") : ""); - sb.append("\t"); - sb.append(null != illuminaRef ? illuminaRef : ""); - sb.append("\t"); -// sb.append(null != illuminaAlt ? illuminaAlt : ""); -// sb.append("\t"); -// sb.append(null != illuminaSNP ? illuminaSNP : ""); -// sb.append("\t"); - sb.append(DEFAULT_CHAR != gffRef ? gffRef : ""); - sb.append("\t"); - sb.append(null != gffAlt ? gffAlt : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != gffGenotype ? gffGenotype : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != vcfRef ? vcfRef : ""); - sb.append("\t"); - sb.append(DEFAULT_CHAR != vcfAlt ? vcfAlt: ""); - sb.append("\t"); - sb.append(null != vcfGenotype ? vcfGenotype: ""); - sb.append("\t"); - sb.append(null != pileup ? pileup: ""); - sb.append("\t"); - sb.append(null != positionMatch ? positionMatch: ""); - sb.append("\t"); - sb.append(null != genotypeMatch ? genotypeMatch: ""); - sb.append("\n"); - - return sb.toString(); - } - public float getIllGCScore() { - return illGCScore; - } - public void setIllGCScore(float illGCScore) { - this.illGCScore = illGCScore; - } - public char getIllAllele1() { - return illAllele1; - } - public void setIllAllele1(char illAllele1) { - this.illAllele1 = illAllele1; - } - public char getIllAllele2() { - return illAllele2; - } - public void setIllAllele2(char illAllele2) { - this.illAllele2 = illAllele2; - } - public boolean isIllTypeHom() { - return illTypeHom; - } - public void setIllTypeHom(boolean illTypeHom) { - this.illTypeHom = illTypeHom; - } - public char getDbSnpStrand() { - return dbSnpStrand; - } - public void setDbSnpStrand(char dbSnpStrand) { - this.dbSnpStrand = dbSnpStrand; - } - public String getDbSnpRef_Alt() { - return dbSnpRef_Alt; - } - public void setDbSnpRef_Alt(String dbSnpRefAlt) { - dbSnpRef_Alt = dbSnpRefAlt; - } - public void setPileup(String pileup) { - this.pileup = pileup; - } - public String getPileup(String pileup) { - return pileup; - } - public String getPositionMatch() { - return positionMatch; - } - public void setPositionMatch(String positionMatch) { - this.positionMatch = positionMatch; - } - public String getGenotypeMatch() { - return genotypeMatch; - } - public void setGenotypeMatch(String genotypeMatch) { - this.genotypeMatch = genotypeMatch; - } - -} diff --git a/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- b/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- deleted file mode 100644 index 3a1e039aa..000000000 --- a/qmule/src/org/qcmg/qmule/util/IGVBatchFileGenerator.java-- +++ /dev/null @@ -1,78 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.util; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.List; - -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.util.FileUtils; - -public class IGVBatchFileGenerator { - - public static final String GENOME = "GRCh37_ICGC_standard_v2"; - - - public static void generate(final List positions, final String outputFile) throws IOException { - // check that list is not empty - if (positions == null || positions.isEmpty()) - throw new IllegalArgumentException("Null or empty list passed to IGVBatchFileGenerator"); - - // can we write to the outputFile? - File output = new File(outputFile); - if( ! FileUtils.canFileBeWrittenTo(output)) - throw new IllegalArgumentException("Can't write to output file: " + outputFile); - - FileWriter writer = new FileWriter(output); - - try { - writer.write(getHeaderInfo(output)); - - for (ChrPosition position : positions) { - writer.write(getLocationString(position)); - } - - } finally { - writer.close(); - } - - } - - private static String getHeaderInfo(File output) { - String path = output.getParent(); - return "snapshotDirectory " + path + "\n" - + "genome " + GENOME + "\n"; - } - - private static String getLocationString(ChrPosition chrPos) { - return "goto " + chrPos.toIGVString() - + "\nsort base\n" + - "collapse\n" + - "snapshot " + chrPos.getChromosome() + ":" + chrPos.getStartPosition() + ".png\n"; - } - - - -// snapshotDirectory C:/IGV_sessions/exonorama/APGI_1992 -// genome GRCh37_ICGC_standard_v2 -// goto chr8:93156526-93156566 -// sort base -// collapse -// snapshot APGI_1992_SNP_35325-chr8-93156546-var-CtoT-WITHIN_NON_CODING_GENE-ENSG00000233778.png -// goto chr12:114377865-114377905 -// sort base -// collapse -// snapshot APGI_1992_SNP_50905-chr12-114377885-var-GtoC-SYNONYMOUS_CODING-RBM19.png -// goto chr1:228481880-228481920 -// sort base -// collapse -// snapshot APGI_1992_SNP_6964-chr1-228481900-var-GtoA-NON_SYNONYMOUS_CODING-OBSCN.png - - -} diff --git a/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- b/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- deleted file mode 100644 index 60389d85b..000000000 --- a/qmule/src/org/qcmg/qmule/util/TabbedDataLoader.java-- +++ /dev/null @@ -1,61 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.util; - -import java.io.File; -import java.util.Map; -import java.util.regex.Pattern; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.string.StringUtils; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class TabbedDataLoader { - - public static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - private static final QLogger logger = QLoggerFactory.getLogger(TabbedDataLoader.class); - - - public static void loadTabbedData(String tabbedDataFile, int position, Map collection) throws Exception { - if (FileUtils.canFileBeRead(tabbedDataFile)) { - - TabbedFileReader reader = new TabbedFileReader(new File(tabbedDataFile)); - try { - for (TabbedRecord tr : reader) { - String [] params = tabbedPattern.split(tr.getData()); - String chrPosition = getStringFromArray(params, position); - - if (null != chrPosition) { - ChrPosition chrPos = StringUtils.getChrPositionFromString(chrPosition); - if (null != chrPos) collection.put(chrPos,tr); - } - } - - logger.info("Added " + collection.size() + " entries to the tabbed data collection"); - - } finally { - reader.close(); - } - } else { - throw new IllegalArgumentException("data file: " + tabbedDataFile + " could not be read"); - } - } - - public static String getStringFromArray(String[] params, int index) { - String result = null; - if (null != params && params.length > 0) { - if (index >= 0) { - result = params[(index > params.length ? params.length : index)]; - } else if (params.length + index >= 0 & params.length + index < params.length){ - result = params[params.length + index]; // adding a negative number! - } - } - return result; - } - -} diff --git a/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- b/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- deleted file mode 100644 index 03a4e2f03..000000000 --- a/qmule/src/org/qcmg/qmule/vcf/CompareVCFs.java-- +++ /dev/null @@ -1,269 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. - * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. - * - * This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.vcf; - -import java.io.File; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.atomic.AtomicLong; - -import htsjdk.samtools.SAMRecord; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.GenotypeEnum; -import org.qcmg.common.util.FileUtils; -import org.qcmg.common.vcf.VcfRecord; -import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.picard.QJumper; -import org.qcmg.picard.util.SAMUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.vcf.VCFFileReader; - -public class CompareVCFs { - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - private static QLogger logger; - - private final ConcurrentMap normalVCFMap = new ConcurrentHashMap(12500); //not expecting more than 100000 - private final ConcurrentMap tumourVCFMap = new ConcurrentHashMap(12500); - private final ConcurrentMap uniqueTumourVCFMap = new ConcurrentHashMap(40000); - - public int engage() throws Exception { - - logger.info("loading normal vcf data"); - loadVCFData(cmdLineInputFiles[0], normalVCFMap); - logger.info("loading normal vcf data - DONE [" + normalVCFMap.size() + "]"); - - logger.info("loading tumour vcf data"); - loadVCFData(cmdLineInputFiles[1], tumourVCFMap); - logger.info("loading tumour vcf data - DONE [" + tumourVCFMap.size() + "]"); - - examine(); - - addPileupFromNormalBam(); - - return exitStatus; - } - - private void addPileupFromNormalBam() throws Exception { - // loop through each position in the unique map and get the entries in the normal GATK cleaned BAM file. - int notEnoughCoverage = 0, mutationFoundInNormal = 0; - StringBuilder sb = new StringBuilder(); - QJumper qj = new QJumper(); - qj.setupReader(cmdLineInputFiles[2]); - - for (Entry entry : uniqueTumourVCFMap.entrySet()) { - int position = entry.getKey().getStartPosition(); - boolean foundInNormal = false; - List sams = qj.getOverlappingRecordsAtPosition(entry.getKey().getChromosome(), position, position); - - for (SAMRecord sam : sams) { - int offset = SAMUtils.getIndexInReadFromPosition(sam, position); - if (offset > -1 && offset < sam.getReadLength()) { - char c = sam.getReadString().charAt(offset); - if (c == entry.getValue().getAlt().charAt(0)) { - foundInNormal = true; - mutationFoundInNormal++; - break; - } - } - } - - if ( ! foundInNormal && sams.size() < 8) - notEnoughCoverage++; - else if ( ! foundInNormal) - sb.append(entry.getKey().getChromosome() + ":" + position + "\n"); - } - - logger.info("total positions examined: " + uniqueTumourVCFMap.size()); - logger.info("positions where mutation was also found in normal (class C): " + mutationFoundInNormal); - logger.info("positions where coverage in normal was less than 8 (class B): " + notEnoughCoverage); - logger.info("Potential class A positions: "); - logger.info(sb.toString()); - } - - private void examine() { - - final Map diffGenotypes = new HashMap(); - - // we want to know the following... - // number unique to normal - // number unique to tumour - // no of common positions - int normalUnique = 0, tumourUnique = 0, normalAndTumour = 0; - - // for the common positions... - // no that have the same mutation - // no that have a different mutation - // no of those that have the same genotype - - int sameMutation = 0, sameMutationSameGenotype = 0; - int diffMutation = 0, diffMutationSameGenotype = 0; - - // here we go - - for (Entry entry : normalVCFMap.entrySet()) { - - VcfRecord normalVCF = entry.getValue(); - VcfRecord tumourVCF = tumourVCFMap.get(entry.getKey()); - - if (null == tumourVCF) { - normalUnique++; - } else { - ++normalAndTumour; - - // sanity check - compare ref - if not the same - oh dear... - assert normalVCF.getRef().equals(tumourVCF.getRef()); - - // compare mutations - char normalMut = normalVCF.getAlt().charAt(0); - char tumourMut = tumourVCF.getAlt().charAt(0); - - // need to get the genotype from the VCFRecord - - GenotypeEnum normalGenotype = VcfUtils.calculateGenotypeEnum( - normalVCF.getInfo().substring(0, 3), normalVCF.getRefChar(), normalVCF.getAlt().charAt(0)); - GenotypeEnum tumourGenotype = VcfUtils.calculateGenotypeEnum( - tumourVCF.getInfo().substring(0, 3), tumourVCF.getRefChar(), tumourVCF.getAlt().charAt(0)); - - if (normalMut == tumourMut) { - sameMutation++; - if (normalGenotype == tumourGenotype) - ++sameMutationSameGenotype; - else { - RefAndMultiGenotype ramg = new RefAndMultiGenotype(normalVCF.getRefChar(), normalGenotype, tumourGenotype); - AtomicLong al = diffGenotypes.get(ramg); - if (null == al) { - al = new AtomicLong(); - diffGenotypes.put(ramg, al); - } - al.incrementAndGet(); - } - } else { - diffMutation++; - if (normalGenotype == tumourGenotype) - ++diffMutationSameGenotype; - } - } - } - - for (ChrPosition position : tumourVCFMap.keySet()) { - if (null == normalVCFMap.get(position)) { - tumourUnique++; - uniqueTumourVCFMap.put(position, tumourVCFMap.get(position)); - } - } - - // now print out some stats - StringBuilder sb = new StringBuilder("\nSTATS\n"); - sb.append("No of positions in normal map: " + normalVCFMap.size()); - sb.append("\nNo of unique positions in normal map: " + normalUnique); - sb.append("\nNo of positions in tumour map: " + tumourVCFMap.size()); - sb.append("\nNo of unique positions in tumour map: " + tumourUnique); - sb.append("\nNo of shared positions: " + normalAndTumour); - sb.append("\n"); - sb.append("\nNo of positions with same mutation: " + sameMutation); - sb.append("\nNo of positions with same mutation and same genotype: " + sameMutationSameGenotype); - - sb.append("\npositions with same mutation and diff genotype: "); - - for (Entry entry : diffGenotypes.entrySet()) { - sb.append("\n" + entry.getKey().toString() + " count: " + entry.getValue().get()); - } - sb.append("\nNo of positions with diff mutation: " + diffMutation); - sb.append("\nNo of positions with diff mutation and same genotype: " + diffMutationSameGenotype); - - logger.info(sb.toString()); - - - } - - private void loadVCFData(String vcfFile, Map map) throws Exception { - if (FileUtils.canFileBeRead(vcfFile)) { - - VCFFileReader reader = new VCFFileReader(new File(vcfFile)); - try { - for (VcfRecord qpr : reader) { - map.put(ChrPointPosition.valueOf(qpr.getChromosome(), qpr.getPosition()),qpr); - } - } finally { - reader.close(); - } - } - } - - - public static void main(String[] args) throws Exception { - CompareVCFs sp = new CompareVCFs(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(CompareVCFs.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CompareVCFs", CompareVCFs.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } -} diff --git a/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- b/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- deleted file mode 100644 index 29bb7c4c1..000000000 --- a/qmule/src/org/qcmg/qmule/vcf/ConvertVcfChr.java-- +++ /dev/null @@ -1,116 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.vcf; - -import java.io.File; - -import org.qcmg.common.log.QLogger; -import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.common.util.FileUtils; -import org.qcmg.qmule.Messages; -import org.qcmg.qmule.Options; -import org.qcmg.qmule.QMuleException; -import org.qcmg.qmule.tab.TabbedFileReader; -import org.qcmg.qmule.tab.TabbedFileWriter; -import org.qcmg.qmule.tab.TabbedHeader; -import org.qcmg.qmule.tab.TabbedRecord; - -public class ConvertVcfChr { - - private static final String CHR = "chr"; - - private String logFile; - private String[] cmdLineInputFiles; - private String[] cmdLineOutputFiles; - private int exitStatus; - - private static QLogger logger; - - - private int engage() throws Exception { - - // load - if (FileUtils.canFileBeRead(cmdLineInputFiles[0])) { - TabbedFileReader reader = new TabbedFileReader(new File(cmdLineInputFiles[0])); - TabbedHeader header = reader.getHeader(); - - TabbedFileWriter writer = new TabbedFileWriter(new File(cmdLineOutputFiles[0])); - writer.addHeader(header); - - try { - for (TabbedRecord tabRec : reader) { - if ( ! tabRec.getData().startsWith(CHR)) { - tabRec.setData(CHR + tabRec.getData()); - } - writer.add(tabRec); - } - } finally { - try { - writer.close(); - } finally { - reader.close(); - } - } - } - return exitStatus; - } - - public static void main(String[] args) throws Exception { - ConvertVcfChr sp = new ConvertVcfChr(); - int exitStatus = sp.setup(args); - if (null != logger) - logger.logFinalExecutionStats(exitStatus); - - System.exit(exitStatus); - } - - protected int setup(String args[]) throws Exception{ - int returnStatus = -1; - Options options = new Options(args); - - if (options.hasHelpOption()) { - System.err.println(Messages.USAGE); - options.displayHelp(); - returnStatus = 0; - } else if (options.hasVersionOption()) { - System.err.println(Messages.getVersionMessage()); - returnStatus = 0; - } else if (options.getInputFileNames().length < 1) { - System.err.println(Messages.USAGE); - } else if ( ! options.hasLogOption()) { - System.err.println(Messages.USAGE); - } else { - // configure logging - logFile = options.getLogFile(); - logger = QLoggerFactory.getLogger(ConvertVcfChr.class, logFile, options.getLogLevel()); - logger.logInitialExecutionStats("CompareVCFs", ConvertVcfChr.class.getPackage().getImplementationVersion(), args); - - // get list of file names - cmdLineInputFiles = options.getInputFileNames(); - if (cmdLineInputFiles.length < 1) { - throw new QMuleException("INSUFFICIENT_ARGUMENTS"); - } else { - // loop through supplied files - check they can be read - for (int i = 0 ; i < cmdLineInputFiles.length ; i++ ) { - if ( ! FileUtils.canFileBeRead(cmdLineInputFiles[i])) { - throw new QMuleException("INPUT_FILE_READ_ERROR" , cmdLineInputFiles[i]); - } - } - } - - // check supplied output files can be written to - if (null != options.getOutputFileNames()) { - cmdLineOutputFiles = options.getOutputFileNames(); - for (String outputFile : cmdLineOutputFiles) { - if ( ! FileUtils.canFileBeWrittenTo(outputFile)) - throw new QMuleException("OUTPUT_FILE_WRITE_ERROR", outputFile); - } - } - - return engage(); - } - return returnStatus; - } - -} diff --git a/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- b/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- deleted file mode 100644 index b0aad1b7f..000000000 --- a/qmule/src/org/qcmg/qmule/vcf/RefAndMultiGenotype.java-- +++ /dev/null @@ -1,101 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.qmule.vcf; - -import org.qcmg.common.model.GenotypeEnum; -import org.qcmg.common.model.Classification; - -public class RefAndMultiGenotype { - - private final char ref; - private final GenotypeEnum normal; - private final GenotypeEnum tumour; - - public RefAndMultiGenotype(char ref, GenotypeEnum normal, GenotypeEnum tumour) { - this.ref = ref; - this.normal = normal; - this.tumour = tumour; - } - - @Override - public String toString() { - return ref + " : " + normal.getDisplayString() + " : " + tumour.getDisplayString() + " : " + getClassification(); - } - - public String getClassification() { - if (normal == tumour) { - return Classification.GERMLINE.name(); - - } else if (normal.isHomozygous() && tumour.isHomozygous()) { - // not equal but both are homozygous - return Classification.SOMATIC.name(); - } else if (normal.isHeterozygous() && tumour.isHeterozygous()) { - // not equal but both are heterozygous - return Classification.SOMATIC.name(); - } - - /////////////////////////////////////////////////////// - // normal is HOM and tumour is HET - /////////////////////////////////////////////////////// - if (normal.isHomozygous() && tumour.isHeterozygous()) { - - GenotypeEnum refAndNormalGenotype = GenotypeEnum.getGenotypeEnum(ref, normal.getFirstAllele()); - - if (tumour == refAndNormalGenotype) { - return Classification.GERMLINE.name(); -// mutation = normal.getFirstAllele() + MUT_DELIM + record.getRef(); - } else { - return Classification.SOMATIC.name(); - } - } - - /////////////////////////////////////////////////////// - // normal is HET and tumour is HOM - ////////////////////////////////////////////////////// - else if (normal.isHeterozygous() && tumour.isHomozygous()){ - - if (normal.containsAllele(tumour.getFirstAllele())) { - return Classification.GERMLINE.name(); - } else { - return Classification.SOMATIC.name(); - } - } - return null; - } - - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((normal == null) ? 0 : normal.hashCode()); - result = prime * result + ref; - result = prime * result + ((tumour == null) ? 0 : tumour.hashCode()); - return result; - } - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - RefAndMultiGenotype other = (RefAndMultiGenotype) obj; - if (normal == null) { - if (other.normal != null) - return false; - } else if (!normal.equals(other.normal)) - return false; - if (ref != other.ref) - return false; - if (tumour == null) { - if (other.tumour != null) - return false; - } else if (!tumour.equals(other.tumour)) - return false; - return true; - } - -} diff --git a/qmule/test/org/qcmg/qmule/AlignerCompareTest.java-- b/qmule/test/org/qcmg/qmule/AlignerCompareTest.java-- deleted file mode 100644 index 7f02a58bb..000000000 --- a/qmule/test/org/qcmg/qmule/AlignerCompareTest.java-- +++ /dev/null @@ -1,120 +0,0 @@ -package org.qcmg.qmule; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; - - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMFileWriter; -import htsjdk.samtools.SAMRecord; - -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.SAMOrBAMWriterFactory; - - -public class AlignerCompareTest { - public static final String INPUT_SAM1 = "./input1.sam"; - public static final String INPUT_SAM2 = "./input2.sam"; - public static final String OUTPUT_BAM = "./output.bam"; - - @After - public void deleteFiles(){ - //delete inputs - File in1 = new File(INPUT_SAM1); - File in2 = new File(INPUT_SAM2); - in1.delete(); - in2.delete(); - - //delete output - File[] files = new File("./").listFiles(); - for(File f : files) - if(f.toString().startsWith(OUTPUT_BAM)) - f.delete(); - - } - - @Before - public void before(){ - CreateSAMs( ); - } - - - - @Test - public void mainTest() throws Exception{ - - final String[] args1 = { "-i", INPUT_SAM1, "-i", INPUT_SAM2, "-o", OUTPUT_BAM }; - AlignerCompare.main(args1); - - } - - - public static void CreateSAMs(){ - List mydata = new ArrayList(); - - //common - mydata.add("@HD VN:1.4 SO:queryname"); - mydata.add("@SQ SN:GL000196.1 LN:38914"); - - mydata.add("@RG ID:2010072264129530 LB:Library_20100413_C DS:RUNTYPE{50F} SM:S0414_20100607_2_FragBC_bcSample1_F3_bcA10_05"); - mydata.add("@PG ID:2010072264129500 PN:MANUAL"); - mydata.add("603_1107_1232 0 GL000196.1 480 1 25M25H * 0 0 AATCACTTGAACCCAGGAGGCGGAG IIIIIIIIIIIIIIIIIIIIIIII: RG:Z:2010072264129530 CS:Z:T30321120120100120220330223100133302310303131133123 AS:i:24 CQ:Z:BBBB@AAA>><>B@;9AA<:BB=@>:AB<<=@9@7'9<22>?921<:/'1 XN:i:24 NH:i:10 IH:i:2 HI:i:1 CC:Z:GL000247.1 CP:i:35405 MD:Z:25"); - mydata.add("603_1107_1233 163 GL000196.1 36008 29 75M = 36083 142 GGATCTAGAATGCTGAAGGATCTAGTGTGTTGAGGGATCTAGCATGCTGAAGGATCTAGCATGTTAAGGGATCTA BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFI X0:i:1 X1:i:0 ZC:i:5 MD:Z:8G66 PG:Z:MarkDuplicates RG:Z:2010072264129530 XG:i:0 AM:i:29 NM:i:1 SM:i:29 XM:i:1 XO:i:0 XT:A:U"); - mydata.add("603_1107_1233 83 GL000196.1 36083 29 4S67M4S = 36008 -142 TCTAGCATGTCGAGAGATCTAGCATGCTGAAGGATCTAGCATGCTGAAGGATCTAGCATGTTGAGGGTTCTAGTG FFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFBBB ZC:i:5 MD:Z:63A3 PG:Z:MarkDuplicates RG:Z:2010072264129530 XG:i:0 AM:i:29 NM:i:1 SM:i:29 XM:i:1 XO:i:0 XT:A:M"); -//?? mydata.add("603_1107_1233 87 GL000196.1 36083 29 4S67M4S = 36008 -142 TCTAGCATGTCGAGAGATCTAGCATGCTGAAGGATCTAGCATGCTGAAGGATCTAGCATGTTGAGGGTTCTAGTG FFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFBBB ZC:i:5 MD:Z:63A3 PG:Z:MarkDuplicates RG:Z:2010072264129530 XG:i:0 AM:i:29 NM:i:1 SM:i:29 XM:i:1 XO:i:0 XT:A:M"); - - mydata.add("603_1108_0001 0 GL000196.1 38525 3 37M5D13H * 0 0 AGGCTGAGGTGGGCGGATCACTTGAGGTCCAGAGTTC IIIIIIIIIIIIIIIII;?IIIB@IIIBAIIIIIIII RG:Z:2010072264129530 CS:Z:T32032122011003302321120122012012221023222003301200 AS:i:30 CQ:Z: mydata1 = new ArrayList(); - mydata1.add("603_1108_0002 73 GL000196.1 319 3 50M = 319 0 GACATATACACAACACTGTACCCAACTATACGATACATATTCTTCTCAAG !IIIIIFIIIGIIII:?IIF4CIII;7CI''''IIIIIII**IIGIIIIA" + " X0:i:1 X1:i:0 MD:Z:100 PG:Z:MarkDuplicates RG:Z:2010072264129530 XG:i:0 AM:i:0 NM:i:0 SM:i:37 XM:i:0 XO:i:0 XT:A:U"); - mydata1.add("603_1108_0002 133 GL000196.1 319 0 * = 319 0 GACATATACACAACACTGTACCCAACTATACGATACATATTCTTCTCAAG !IIIIIFIIIGIIII:?IIF4CIII;7CI''''IIIIIII**IIGIIIIA RG:Z:2010072264129530 CS:Z:T22012220200333113323133321010013112111011113333112 AS:i:34 CQ:Z:/3:8@62B-*46?-A+B;'A'<9+-/@@6.'@B4,/;@2=+@B)>/?B@A XN:i:34 HI:i:2"); - - List mydata2 = new ArrayList(); - mydata2.add("603_1108_0002 73 GL000196.1 319 3 50M = 319 0 GACATATACACAACACTGTACCCAACTATACGATACATATTCTTCTCAAG !IIIIIFIIIGIIII:?IIF4CIII;7CI''''IIIIIII**IIGIIIIA RG:Z:2010072264129530 CS:Z:T22012220200333113323133321010013112111011113333112 AS:i:34 CQ:Z:/3:8@62B-*46?-A+B;'A'<9+-/@@6.'@B4,/;@2=+@B)>/?B@A XN:i:34 NH:i:2 IH:i:2 HI:i:2 MD:Z:26T3CG18"); - mydata2.add("603_1108_0002 133 GL000196.1 319 0 * = 319 0 GACATATACACAACACTGTACCCAACTATACGATACATATTCTTCTCAAG !IIIIIFIIIGIIII:?IIF4CIII;7CI''''IIIIIII**IIGIIIIA RG:Z:2010072264129530"); - - //add invalide record since mapq is not zero for unmapped reads - - try { - - BufferedWriter writer1 = new BufferedWriter(new FileWriter(INPUT_SAM1)); - BufferedWriter writer2 = new BufferedWriter(new FileWriter(INPUT_SAM2)); - - //create SAM - for (String line : mydata){ - writer1.write(line + "\n"); - writer2.write(line + "\n"); - } - - for (String line : mydata1) - writer1.write(line + "\n"); - - for (String line : mydata2) - writer2.write(line + "\n"); - - - writer1.close(); - writer2.close(); -//debug -// System.out.println(new File(INPUT_SAM).getAbsolutePath() ); - - } catch (IOException e) { - System.err.println(e.toString() + "\n\t can't write to : " + INPUT_SAM1 + " or " + INPUT_SAM2 ); - } - - } - - -} diff --git a/qmule/test/org/qcmg/qmule/AnnotateDCCWithGFFRegionTest.java-- b/qmule/test/org/qcmg/qmule/AnnotateDCCWithGFFRegionTest.java-- deleted file mode 100644 index 49ffbb19d..000000000 --- a/qmule/test/org/qcmg/qmule/AnnotateDCCWithGFFRegionTest.java-- +++ /dev/null @@ -1,234 +0,0 @@ -package org.qcmg.qmule; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.util.Vector; - -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class AnnotateDCCWithGFFRegionTest { - - private File inputDCC1File; - private File inputRepeatGFF; - private File inputGermGFF; - private File outputFile; - private AnnotateDCCWithGFFRegions ann; - private File inputDCCQFile; - private static String FILE_SEPARATOR = System.getProperty("file.separator"); - - @Rule - public TemporaryFolder testFolder = new TemporaryFolder(); - - @Before - public void setUp() throws IOException { - inputDCC1File = createDCCFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "input.dcc1"); - inputDCCQFile = createDCCQFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "input.dccq"); - inputRepeatGFF = createRepeatGFFFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "input.gff3"); - inputGermGFF = createGermGFFFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "germ.gff3"); - outputFile = new File(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "output.dccq"); - ann = new AnnotateDCCWithGFFRegions(); - } - - @After - public void tearDown() { - inputDCC1File.delete(); - outputFile.delete(); - inputDCCQFile.delete(); - outputFile.delete(); - inputRepeatGFF.delete(); - inputGermGFF.delete(); - inputDCC1File = null; - outputFile = null; - ann = null; - } - - @Test - public void testGoodOptions() throws Exception { - String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", inputDCC1File.getAbsolutePath(), "-i", inputRepeatGFF.getAbsolutePath(), "--output", outputFile.getAbsolutePath()}; - ann.setup(args); - assertEquals(inputDCC1File.getAbsolutePath(), ann.getCmdLineInputFiles()[0]); - assertEquals(inputRepeatGFF.getAbsolutePath(), ann.getCmdLineInputFiles()[1]); - assertEquals(outputFile.getAbsolutePath(), ann.getCmdLineOutputFiles()[0]); - - //with annotation - String[] args2 = {"--log", testFolder.newFile().getAbsolutePath(), "-i", inputDCC1File.getAbsolutePath(), "-i", inputRepeatGFF.getAbsolutePath(), "--output", outputFile.getAbsolutePath(), "--annotation", "GERM"}; - ann.setup(args2); - assertEquals("GERM", ann.getAnnotation()); - } - - @Test(expected=QMuleException.class) - public void testBadOptions() throws Exception { - outputFile = testFolder.newFile(); - String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", inputDCC1File.getAbsolutePath(), "-i", inputRepeatGFF.getAbsolutePath(), "--output", outputFile.getAbsolutePath()}; - ann.setup(args); - } - - @Test - public void testEngageWithDCC1Repeat() throws Exception { - String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", inputDCC1File.getAbsolutePath(), "-i", inputRepeatGFF.getAbsolutePath(), "--output", outputFile.getAbsolutePath()}; - ann.setup(args); - int exit = ann.engage(); - assertEquals(0, exit); - assertTrue(outputFile.exists()); - - BufferedReader reader = new BufferedReader(new FileReader(outputFile)); - - String line; - int count = 0; - while ((line = reader.readLine()) != null) { - count++; - String[] values = line.split("\t"); - if (count == 2) { - assertEquals("PASS", values[23]); - } - if (count == 3) { - assertEquals("PASS;Simple_repeat::(CCCTAA)n", values[23]); - } - } - reader.close(); - } - - @Test - public void testEngageWithDCC1GERM() throws Exception { - String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", inputDCC1File.getAbsolutePath(), - "-i", inputGermGFF.getAbsolutePath(), "--output", outputFile.getAbsolutePath(), "--annotation", "GERM"}; - ann.setup(args); - int exit = ann.engage(); - assertEquals(0, exit); - assertTrue(outputFile.exists()); - assertEquals(1, ann.getOverlapCount()); - assertEquals(1, ann.getNotOverlappingCount()); - BufferedReader reader = new BufferedReader(new FileReader(outputFile)); - - String line; - int count = 0; - while ((line = reader.readLine()) != null) { - count++; - String[] values = line.split("\t"); - if (count == 2) { - assertEquals("PASS", values[23]); - } - if (count == 3) { - assertEquals("PASS;GERM", values[23]); - } - } - reader.close(); - } - - @Test - public void testEngageWithDCCQGERM() throws Exception { - String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", inputDCCQFile.getAbsolutePath(), - "-i", inputGermGFF.getAbsolutePath(), "--output", outputFile.getAbsolutePath(), "--annotation", "GERM"}; - ann.setup(args); - int exit = ann.engage(); - assertEquals(0, exit); - assertTrue(outputFile.exists()); - assertEquals(3, ann.getOverlapCount()); - assertEquals(3, ann.getNotOverlappingCount()); - BufferedReader reader = new BufferedReader(new FileReader(outputFile)); - - String line; - int count = 0; - while ((line = reader.readLine()) != null) { - count++; - String[] values = line.split("\t"); - if (count == 2 || count == 3) { - assertEquals("4", values[1]); - } - if (count == 4 || count == 5) { - assertEquals("2", values[1]); - } - if (count == 6 || count == 7) { - assertEquals("3", values[1]); - } - if (count == 2 || count == 4 || count == 6) { - assertTrue(values[37].contains("GERM")); - } - if (count == 3 || count == 5 || count == 7) { - assertFalse(values[37].contains("GERM")); - } - } - reader.close(); - } - - @Test - public void testParseDCCColumnsWithDCCQ() throws QMuleException { - Vector headers = new Vector(); - headers.add("mutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation" + - "\texpressed_allele\tquality_score\tprobability\tread_count\tis_annotated\tverification_status\tverification_platform\txref_ensembl_var_id\tnote\tND\tTD\tNNS\tconsequence_type\taa_mutation\tcds_mutation" + - "\tprotein_domain_affected\tgene_affected\ttranscript_affected\tgene_build_version\tnote_s\tgene_symbol\tAll_domains\tAll_domains_type\tAll_domains_description\tChrPosition\tQCMGflag\tFlankSeq"); - ann.parseDCCHeader(headers, "dccq"); - assertEquals(5, ann.getDCC_STRAND_INDEX()); - assertEquals(37, ann.getQCMGFLAG_COLUMN_INDEX()); - assertEquals(8, ann.getREFERENCE_ALLELE_INDEX()); - assertEquals(10, ann.getTUMOUR_ALLELE_INDEX()); - assertEquals(1, ann.getMUTATION_TYPE_INDEX()); - } - - @Test - public void testParseDCCColumnsWithDCC1() throws QMuleException { - Vector headers = new Vector(); - headers.add("analysis_id\tanalyzed_sample_id\tmutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele" + - "\tcontrol_genotype\ttumour_genotype\tmutation\texpressed_allele\tquality_score\tprobability\tread_count\tis_annotated\tverification_status\tverification_platform\txref_ensembl_var_id\tnote\tQCMGflag\tND\tTD\tNNS\tFlankSeq"); - ann.parseDCCHeader(headers, "dcc1"); - assertEquals(7, ann.getDCC_STRAND_INDEX()); - assertEquals(23, ann.getQCMGFLAG_COLUMN_INDEX()); - } - - private File createDCCFile(String fileName) throws IOException { - BufferedWriter w = new BufferedWriter(new FileWriter(new File(fileName))); - w.write("analysis_id\tanalyzed_sample_id\tmutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation\texpressed_allele\tquality_score\tprobability\tread_count\tis_annotated\tverification_status\tverification_platform\txref_ensembl_var_id\tnote\tQCMGflag\tND\tTD\tNNS\tFlankSeq\n"); - w.write("id\ttest\ttest_ind1\t2\tchr1\t85\t86\t1\t-999\t-999\t-\t-999\tT\t->T\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\tPASS\t--\t--\t--\t--\n"); - w.write("id\ttest\ttest_ind1\t2\tchr1\t10001\t10002\t1\t-999\t-999\t-\t-999\tT\t->T\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\tPASS\t--\t--\t--\t--\n"); - - w.close(); - return new File(fileName); - } - - private File createRepeatGFFFile(String fileName) throws IOException { - BufferedWriter w = new BufferedWriter(new FileWriter(new File(fileName))); - w.write("chr1\thg19.fa.out\tSimple_repeat::(CCCTAA)n\t10001\t10468\t1504\t+\t.\tID=1;Note=(CCCTAA)n;SR_length=6;\n"); - w.close(); - return new File(fileName); - } - - private File createGermGFFFile(String fileName) throws IOException { - BufferedWriter w = new BufferedWriter(new FileWriter(new File(fileName))); - w.write("chr1\thg19.fa.out\t.\t10024\t10024\t1504\t+\t.\tReferenceAllele=C;TumourAllele=-;PatientCount=10\n"); - w.write("chr1\thg19.fa.out\t.\t10021\t10022\t1504\t+\t.\tReferenceAllele=-;TumourAllele=T;PatientCount=10\n"); - w.write("chr1\thg19.fa.out\t.\t10001\t10011\t1504\t+\t.\tReferenceAllele=CTAAGTCACC;TumourAllele=-;PatientCount=10\n"); - w.write("chr1\thg19.fa.out\t.\t10001\t10002\t1504\t+\t.\tReferenceAllele=-;TumourAllele=T;PatientCount=10\n"); - w.close(); - return new File(fileName); - } - - private File createDCCQFile(String fileName) throws IOException { - BufferedWriter w = new BufferedWriter(new FileWriter(new File(fileName))); - w.write("mutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation" + - "\texpressed_allele\tquality_score\tprobability\tread_count\tis_annotated\tverification_status\tverification_platform\txref_ensembl_var_id\tnote\tND\tTD\tNNS\tconsequence_type\taa_mutation\tcds_mutation" + - "\tprotein_domain_affected\tgene_affected\ttranscript_affected\tgene_build_version\tnote_s\tgene_symbol\tAll_domains\tAll_domains_type\tAll_domains_description\tChrPosition\tQCMGflag\tFlankSeq\n"); - w.write("test_ind1\t4\tchr1\t10001\t10011\t1\t-999\t-999\tCTAAGTCACC\t-999\tCCTTCAAGATTCAACCTGAATAAATCGCT\tCTAAGTCACC>CCTTCAAGATTCAACCTGAATAAATCGCT\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t0;44;26;0;4;1;9\t0;113;76;0;12;0;21;\t--\t-888\t-888\t-888\t-888\t-888\t-888\t70\t-999\t--\t--\t--\t--\tchr1:817120-817130\tPASS;NNS\t--\n"); - w.write("test_ind1\t4\tchr1\t10001\t10010\t1\t-999\t-999\tCTAAGTCACC\t-999\tCCTTCAAGATTCAACCTGAATAAATCGCT\tCTAAGTCACC>CCTTCAAGATTCAACCTGAATAAATCGCT\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t0;44;26;0;4;1;9\t0;113;76;0;12;0;21;\t--\t-888\t-888\t-888\t-888\t-888\t-888\t70\t-999\t--\t--\t--\t--\tchr1:817120-817130\tPASS;NNS\t--\n"); - w.write("test_ind1\t2\tchr1\t10021\t10022\t1\t-999\t-999\t-\t-999\tT\t->T\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t0;44;26;0;4;1;9\t0;113;76;0;12;0;21;\t--\t-888\t-888\t-888\t-888\t-888\t-888\t70\t-999\t--\t--\t--\t--\tchr1:817120-817130\tPASS;NNS\t--\n"); - w.write("test_ind1\t2\tchr1\t10021\t10022\t1\t-999\t-999\t-\t-999\tC\t->C\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t0;44;26;0;4;1;9\t0;113;76;0;12;0;21;\t--\t-888\t-888\t-888\t-888\t-888\t-888\t70\t-999\t--\t--\t--\t--\tchr1:817120-817130\tPASS;NNS\t--\n"); - w.write("test_ind1\t3\tchr1\t10024\t10024\t1\t-999\t-999\tC\t-999\t-\tC>-\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t0;44;26;0;4;1;9\t0;113;76;0;12;0;21;\t--\t-888\t-888\t-888\t-888\t-888\t-888\t70\t-999\t--\t--\t--\t--\tchr1:817120-817130\tPASS;NNS\t--\n"); - w.write("test_ind1\t3\tchr1\t10024\t10024\t1\t-999\t-999\tG\t-999\t-\tG>-\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t0;44;26;0;4;1;9\t0;113;76;0;12;0;21;\t--\t-888\t-888\t-888\t-888\t-888\t-888\t70\t-999\t--\t--\t--\t--\tchr1:817120-817130\tPASS;NNS\t--\n"); - - w.close(); - return new File(fileName); - } - - -} diff --git a/qmule/test/org/qcmg/qmule/BamCompressTest.java-- b/qmule/test/org/qcmg/qmule/BamCompressTest.java-- deleted file mode 100644 index 2011ee61d..000000000 --- a/qmule/test/org/qcmg/qmule/BamCompressTest.java-- +++ /dev/null @@ -1,95 +0,0 @@ -package org.qcmg.qmule; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; - - -import java.util.ArrayList; -import java.util.List; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SAMFileWriter; -import htsjdk.samtools.SAMRecord; - -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import org.qcmg.picard.SAMFileReaderFactory; -import org.qcmg.picard.SAMOrBAMWriterFactory; - - -public class BamCompressTest { - public static final String INPUT_SAM = "./input.sam"; - public static final String OUTPUT_BAM = "./output.bam"; - - @After - public void deleteFiles(){ - File in = new File(INPUT_SAM); - File out = new File(OUTPUT_BAM); - - in.delete(); - out.delete(); - - - } - - @Before - public void before(){ - CreateBAM(INPUT_SAM); - - } - - @Test - public void mainTest() throws Exception{ - final String[] args1 = { "-i", INPUT_SAM, "-o", OUTPUT_BAM, "--compressLevel", "1" }; - final String[] args2 = { "-i", INPUT_SAM, "-o", OUTPUT_BAM, "--compressLevel", "9" }; - - - BAMCompress.main(args1); - BAMCompress.main(args2); - - } - - - public static void CreateBAM(String INPUT_SAM ){ - List mydata = new ArrayList(); - - //common - mydata.add("@HD VN:1.0"); - mydata.add("@SQ SN:GL000196.1 LN:38914"); - - mydata.add("@RG ID:2010072264129530 LB:Library_20100413_C DS:RUNTYPE{50F} SM:S0414_20100607_2_FragBC_bcSample1_F3_bcA10_05"); - mydata.add("@PG ID:2010072264129500 PN:MANUAL"); - mydata.add("1035_217_1202 272 GL000196.1 319 3 50M * 0 0 GACATATACACAACACTGTACCCAACTATACGATACATATTCTTCTCAAG !IIIIIFIIIGIIII:?IIF4CIII;7CI''''IIIIIII**IIGIIIIA RG:Z:2010072264129530 CS:Z:T22012220200333113323133321010013112111011113333112 AS:i:34 CQ:Z:/3:8@62B-*46?-A+B;'A'<9+-/@@6.'@B4,/;@2=+@B)>/?B@A XN:i:34 NH:i:2 IH:i:2 HI:i:2 MD:Z:26T3CG18"); - mydata.add("603_1107_1232 0 GL000196.1 480 1 25M25H * 0 0 AATCACTTGAACCCAGGAGGCGGAG IIIIIIIIIIIIIIIIIIIIIIII: RG:Z:2010072264129530 CS:Z:T30321120120100120220330223100133302310303131133123 AS:i:24 CQ:Z:BBBB@AAA>><>B@;9AA<:BB=@>:AB<<=@9@7'9<22>?921<:/'1 XN:i:24 NH:i:10 IH:i:2 HI:i:1 CC:Z:GL000247.1 CP:i:35405 MD:Z:25"); - mydata.add("828_1019_1921 0 GL000196.1 38525 3 37M5D13H * 0 0 AGGCTGAGGTGGGCGGATCACTTGAGGTCCAGAGTTC IIIIIIIIIIIIIIIII;?IIIB@IIIBAIIIIIIII RG:Z:2010072264129530 CS:Z:T32032122011003302321120122012012221023222003301200 AS:i:30 CQ:Z: normaldata = new ArrayList(); - - //common - normaldata.add("@HD VN:1.0"); - normaldata.add("@SQ SN:GL000196.1 LN:38914"); - normaldata.add("@RG ID:2010072264129530 LB:Library_20100413_C DS:RUNTYPE{50F} SM:S0414_20100607_2_FragBC_bcSample1_F3_bcA10_05"); - normaldata.add("@PG ID:2010072264129500 PN:MANUAL"); - normaldata.add("1035_217_1202 272 GL000196.1 319 3 50M * 0 0 GACATATACACAACACTGTACCCAACTATACGATACATATTCTTCTCAAG !IIIIIFIIIGIIII:?IIF4CIII;7CI''''IIIIIII**IIGIIIIA RG:Z:2010072264129530 CS:Z:T22012220200333113323133321010013112111011113333112 AS:i:34 CQ:Z:/3:8@62B-*46?-A+B;'A'<9+-/@@6.'@B4,/;@2=+@B)>/?B@A XN:i:34 NH:i:2 IH:i:2 HI:i:2 MD:Z:26T3CG18"); - normaldata.add("603_1107_1232 0 GL000196.1 480 1 25M25H * 0 0 AATCACTTGAACCCAGGAGGCGGAG IIIIIIIIIIIIIIIIIIIIIIII: RG:Z:2010072264129530 CS:Z:T30321120120100120220330223100133302310303131133123 AS:i:24 CQ:Z:BBBB@AAA>><>B@;9AA<:BB=@>:AB<<=@9@7'9<22>?921<:/'1 XN:i:24 NH:i:10 IH:i:2 HI:i:1 CC:Z:GL000247.1 CP:i:35405 MD:Z:25"); - normaldata.add("828_1019_1921 0 GL000196.1 38525 3 37M5D13H * 0 0 AGGCTGAGGTGGGCGGATCACTTGAGGTCCAGAGTTC IIIIIIIIIIIIIIIII;?IIIB@IIIBAIIIIIIII RG:Z:2010072264129530 CS:Z:T32032122011003302321120122012012221023222003301200 AS:i:30 CQ:Z: list = new ArrayList(); - list.add(r1); - list.add(r2); - - Collections.sort(list, new GFF3RecordChromosomeAndPositionComparator()); - - Assert.assertEquals(r2, list.get(0)); - } - -} diff --git a/qmule/test/org/qcmg/qmule/CompareReferenceRegionsTest.java-- b/qmule/test/org/qcmg/qmule/CompareReferenceRegionsTest.java-- deleted file mode 100644 index 2b80dae4c..000000000 --- a/qmule/test/org/qcmg/qmule/CompareReferenceRegionsTest.java-- +++ /dev/null @@ -1,162 +0,0 @@ -package org.qcmg.qmule; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; - -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class CompareReferenceRegionsTest { - - File fileA; - File fileB; - File fileC; - String output; - private static String FILE_SEPARATOR = System.getProperty("file.separator"); - - @Rule - public TemporaryFolder testFolder = new TemporaryFolder(); - - @Before - public void setUp() throws IOException { - fileA = createFileA(); - fileB = createFileB(); - fileC = createFileC(); - } - - @After - public void tearDown() { - fileA.delete(); - fileB.delete(); - fileC.delete(); - } - - @Test - public void testOneway() throws Exception { - CompareReferenceRegions c = new CompareReferenceRegions(); - output = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "output.gff3"; - String outputB = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "outputB.gff3"; - String[] args = {"--log", testFolder.newFile("test.log").getAbsolutePath(), "--mode", "oneway", "--input", fileA.getAbsolutePath(), "--input", fileB.getAbsolutePath(), "--output", output, "--output", outputB,}; - c.setup(args); - - assertOutputFile(output, 100, 110, 2); - assertOutputFile(outputB, 90, 90, 1); - } - - @Test - public void testAnnotate() throws Exception { - CompareReferenceRegions c = new CompareReferenceRegions(); - output = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "output.gff3"; - String outputB = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "outputB.gff3"; - String[] args = {"--log", testFolder.newFile("test.log").getAbsolutePath(), "--mode", "annotate", "--input", fileA.getAbsolutePath(), "--input", fileB.getAbsolutePath(), "--output", output, "--output", outputB, "--column", "9", "--annotation", "ANNOTATION"}; - c.setup(args); - BufferedReader reader = new BufferedReader(new FileReader(new File(output))); - String line; - int count = 0; - while ((line = reader.readLine()) != null) { - count++; - String[] values = line.split("\t"); - if (count == 1) { - assertEquals("90", values[3]); - assertEquals("90", values[4]); - assertFalse(values[8].contains("ANNOTATION")); - } - if (count == 2) { - assertEquals("100", values[3]); - assertEquals("110", values[4]); - assertTrue(values[8].contains("ANNOTATION")); - } - if (count == 3) { - assertEquals("200", values[3]); - assertEquals("210", values[4]); - assertTrue(values[8].contains("ANNOTATION")); - } - } - reader.close(); - } - - @Test - public void testIntersect() throws Exception { - CompareReferenceRegions c = new CompareReferenceRegions(); - output = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "output.gff3"; - String[] args = {"--log", testFolder.newFile("test.log").getAbsolutePath(), "--mode", "intersect", "--input", fileA.getAbsolutePath(), "--input", fileB.getAbsolutePath(), "--input", fileC.getAbsolutePath(), "--output", output}; - c.setup(args); - - assertOutputFile(output, 190, 220, 1); - } - - private void assertOutputFile(String file, int start, int end, int count) throws IOException { - assertTrue(new File(file).exists()); - - BufferedReader reader = new BufferedReader(new FileReader(new File(file))); - String line = reader.readLine(); - assertNotNull(line); - String[] values = line.split("\t"); - assertEquals(Integer.toString(start), values[3]); - assertEquals(Integer.toString(end), values[4]); - reader.close(); - } - - @Test - public void testUnique() throws Exception { - CompareReferenceRegions c = new CompareReferenceRegions(); - output = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "outputA.gff3"; - String outputB = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "outputB.gff3"; - String outputC = testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "outputC.gff3"; - String[] args = {"--log", testFolder.newFile("test.log").getAbsolutePath(), "--mode", "unique", "--input", fileA.getAbsolutePath(), "--input", fileB.getAbsolutePath(), "--input", fileC.getAbsolutePath(), "--output", output, - "--output", outputB, "--output", outputC - }; - c.setup(args); - assertOutputFile(output, 90, 90, 1); - assertOutputFile(outputB, 80, 80, 1); - assertOutputFile(outputC, 50, 55, 1); - } - - - private File createFileA() throws IOException { - File f = new File(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "fileA.gff3"); - BufferedWriter writer = new BufferedWriter(new FileWriter(f)); - - writer.write("chr1\ttest\t0\t100\t110\t1.92\t0\t0\tName=Test\n");//overlap with 2 - writer.write("chr1\ttest\t0\t90\t90\t1.92\t0\t0\tName=Test\n");//unique - writer.write("chr1\ttest\t0\t200\t210\t1.92\t0\t0\tName=Test\n");//overlap with 2 and 3 - writer.close(); - return f; - } - - private File createFileB() throws IOException { - File f = new File(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "fileB.gff3"); - BufferedWriter writer = new BufferedWriter(new FileWriter(f)); - - writer.write("chr1\ttest\t0\t100\t105\t1.92\t0\t0\tName=Test\n");//overlap with 1 - writer.write("chr1\ttest\t0\t80\t80\t1.92\t0\t0\tName=Test\n");//unique - writer.write("chr1\ttest\t0\t190\t210\t1.92\t0\t0\tName=Test\n");//overlap with 2 and 3 - writer.close(); - return f; - } - - private File createFileC() throws IOException { - File f = new File(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "fileC.gff3"); - BufferedWriter writer = new BufferedWriter(new FileWriter(f)); - - writer.write("chr1\ttest\t0\t50\t55\t1.92\t0\t0\tName=Test\n");//unique - writer.write("chr1\ttest\t0\t70\t70\t1.92\t0\t0\tName=Test\n");//unique - writer.write("chr1\ttest\t0\t200\t220\t1.92\t0\t0\tName=Test\n");//overlap with 2 and 3 - writer.close(); - return f; - } - - -} diff --git a/qmule/test/org/qcmg/qmule/DccToMafTest.java-- b/qmule/test/org/qcmg/qmule/DccToMafTest.java-- deleted file mode 100644 index 516a67a6e..000000000 --- a/qmule/test/org/qcmg/qmule/DccToMafTest.java-- +++ /dev/null @@ -1,167 +0,0 @@ -package org.qcmg.qmule; - -import java.util.HashMap; -import java.util.Map; - -import junit.framework.Assert; - -import org.junit.Ignore; -import org.junit.Test; -import org.qcmg.common.dcc.DccConsequence; -import org.qcmg.common.string.StringUtils; - -public class DccToMafTest { - - - @Ignore - public void testRealLifeExample1() { - // want to test the following dcc record - /** - * APGI_2193_SNP_42944 1 9 21815432 21815432 1 -888 -888 A A/A A/G A>G - * -999 -999 30 2 2 -888 -999 -999 A:43[39.7],1[40] - * A:25[39.15],0[0],G:5[40],0[0] - * NON_SYNONYMOUS_CODING--SPLICE_SITE,NON_SYNONYMOUS_CODING--SPLICE_SITE,5PRIME_UTR--SPLICE_SITE, - * NON_SYNONYMOUS_CODING--SPLICE_SITE,NON_SYNONYMOUS_CODING--SPLICE_SITE,DOWNSTREAM,UPSTREAM - * I12V,I12V,-888,I12V,I29V,-888,-888 147A>G,147A>G,128A>G,126A>G,85A>G,-888,-888 -888,-888,-888,-888,-888,-888,-888 - * ENSG00000099810|ENSG00000233326|ENSG00000229298 - * ENST00000404796,ENST00000380172,ENST00000355696,ENST00000419385,ENST00000443256|ENST00000427788|ENST00000447235 - * 55 -999 CDKN2BAS|-888|-888 -888,TIGR01694,-888,-888,TIGR01694|-888|-888 - * -888,Tigrfam,-888,-888,Tigrfam|-888|-888 - * -888,MeThioAdo_phosphorylase,-888,-888,MeThioAdo_phosphorylase|-888|-888 A/G chr9:21815432-21815432 -- - */ - Map canonicalMap = new HashMap(); - canonicalMap.put("ENSG00000099810", "ENST00000404796"); - canonicalMap.put("ENSG00000233326", "ENST00000427788"); - canonicalMap.put("ENSG00000229298", "ENST00000447235"); - - String geneString = "ENSG00000099810|ENSG00000233326|ENSG00000229298"; - String [] genes = geneString.split("\\|"); - Assert.assertEquals(3, genes.length); - - String transcriptsString = "ENST00000404796,ENST00000380172,ENST00000355696,ENST00000419385,ENST00000443256|ENST00000427788|ENST00000447235"; - String [] transcriptIds = transcriptsString.split("\\|"); - Assert.assertEquals(3, transcriptIds.length); - - String[] consequenceResults = new String[] {"Splice_Site" , "3'Flank", "5'Flank"}; - - String consequencesString = "NON_SYNONYMOUS_CODING--SPLICE_SITE,NON_SYNONYMOUS_CODING--SPLICE_SITE,5PRIME_UTR--SPLICE_SITE,NON_SYNONYMOUS_CODING--SPLICE_SITE,NON_SYNONYMOUS_CODING--SPLICE_SITE,DOWNSTREAM,UPSTREAM"; - - testInputs(canonicalMap, genes, transcriptIds, consequenceResults, consequencesString); - - } - - @Ignore - public void testRealLifeExample2() { - // want to test the following dcc record - /** - * APGI_2158_SNP_61733 1 13 25068851 25068851 1 -888 -888 G G/G A/G G>A - * -999 -999 76 2 2 -888 -999 -999 G:7[40],53[38.71] G:10[39.9],58[37.33],A:8[40],0[0] - * WITHIN_NON_CODING_GENE,STOP_GAINED -888,Q201* -888,707G>A -888,-888 - * ENSG00000205822|ENSG00000102699 ENST00000445572|ENST00000381989 55 - * -999 -888|PARP4 -888|-888 -888|-888 -888|-888 G/A chr13:25068851-25068851 ��� - */ - Map canonicalMap = new HashMap(); - canonicalMap.put("ENSG00000205822", "noMatch"); - canonicalMap.put("ENSG00000102699", "ENST00000381989"); - - String geneString = "ENSG00000205822|ENSG00000102699"; - String [] genes = geneString.split("\\|"); - Assert.assertEquals(2, genes.length); - - String transcriptsString = "ENST00000445572|ENST00000381989"; - String [] transcriptIds = transcriptsString.split("\\|"); - Assert.assertEquals(2, transcriptIds.length); - - String[] consequenceResults = new String[] {null, "Nonsense_Mutation"}; - - String consequencesString = "WITHIN_NON_CODING_GENE,STOP_GAINED"; - - testInputs(canonicalMap, genes, transcriptIds, consequenceResults, consequencesString); - - } - - @Test - public void testRealLifeExample3() { - // v70 Ensembl - // AOCS exome solid data - // want to test the following dccq record - /** - * AOCS_066_SNP_3124 1 1 115256530 115256530 1 G/T -1 G - * G/G G/T G>T -999 -999 1.2420510993064712E-22 110 1 2 -888 - * rs121913254 -999 G:25[34.12],67[36.06] G:10[33.2],31[33.35],T:16[39.62],53[38.58] 44 - * missense_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant - * Q61K;Q61K;Q61K;Q61K;Q61K;Q61K;Q61K;Q61K;Q61K,-888,-888,-888,-888,-888,-888,-888,-888 - * 435G>T;435G>T;435G>T;435G>T;435G>T;435G>T;435G>T;435G>T;435G>T,-888,-888,-888,-888,-888,-888,-888,-888 - * PF00071;PF08477;PF00025;PF00009;TIGR00231;PR00449;SM00173;SM00175;SM00174 - * ENSG00000213281,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307 - * ENST00000369535,ENST00000339438,ENST00000438362,ENST00000358528,ENST00000261443,ENST00000530886,ENST00000369530,ENST00000483407,ENST00000534699 - * 70 -999 NRAS,CSDE1,CSDE1,CSDE1,CSDE1,CSDE1,CSDE1,CSDE1,CSDE1 PF00071;PF08477;PF00025;PF00009;TIGR00231;PR00449;SM00173;SM00175;SM00174 - * pfam;pfam;pfam;pfam;tigrfam;prints;smart;smart;smart Small_GTPase;MIRO-like;Small_GTPase_ARF/SAR;EF_GTP-bd_dom;Small_GTP-bd_dom;Small_GTPase;Small_GTPase_Ras;Small_GTPase_Rab_type;Small_GTPase_Rho - * chr1:115256530-115256530 PASS TTCTTTTCCAG - */ - Map canonicalMap = new HashMap(); - canonicalMap.put("ENSG00000205822", "noMatch"); - canonicalMap.put("ENSG00000102699", "ENST00000381989"); - - String geneString = "ENSG00000213281,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307,ENSG00000009307"; - String [] genes = geneString.split(","); - Assert.assertEquals(9, genes.length); - - String transcriptsString = "ENST00000369535,ENST00000339438,ENST00000438362,ENST00000358528,ENST00000261443,ENST00000530886,ENST00000369530,ENST00000483407,ENST00000534699"; - String [] transcriptIds = transcriptsString.split(","); - Assert.assertEquals(9, transcriptIds.length); - - String[] consequenceResults = new String[] {"Nonsense_Mutation", "3'Flank"}; - - String consequencesString = "missense_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant"; - - testInputs(canonicalMap, genes, transcriptIds, consequenceResults, consequencesString); - - } - - private void testInputs(Map canonicalMap, String[] genes, - String[] transcriptIds, String[] consequenceResults, - String consequencesString) { - int i = 0, allTranscriptIdCount = 0; - for (String gene : genes) { - String[] geneSpecificTranscriptIds = transcriptIds[i].split(","); - - String canonicalTranscripId = canonicalMap.get(gene); - - if (null != canonicalTranscripId) { - int positionInTranscripts = StringUtils.getPositionOfStringInArray(geneSpecificTranscriptIds, canonicalTranscripId, true); - String [] consequences = consequencesString.split(","); - if (positionInTranscripts > -1) { - // we have a matching canonical transcript - positionInTranscripts += allTranscriptIdCount; - - if (consequences.length > positionInTranscripts) { - Assert.assertEquals(consequenceResults[i], DccConsequence.getMafName(consequences[positionInTranscripts], org.qcmg.common.dcc.MutationType.SNP, -1)); -// maf.setVariantClassification(DccConsequence.getMafName(params[22], type, Integer.parseInt(params[1]))); - } else { - Assert.fail("consequences.length is <= positionInTranscripts"); - } - } - // update transcript count - allTranscriptIdCount += geneSpecificTranscriptIds.length; - - } else { - // still want to keep the transcript count up to date - allTranscriptIdCount += geneSpecificTranscriptIds.length; -// maf.setVariantClassification(DccConsequence.getMafName(params[22], type, Integer.parseInt(params[1]))); - } - - i++; - } - } - - - @Test - public void testMultipleDelimiters() { - String inputString = "ENST00000438000,ENST00000428930,ENST00000447407,ENST00000419503|ENST00000439302,ENST00000437865,ENST00000422716,ENST00000435585,ENST00000456937|ENST00000416712,ENST00000429121,ENST00000427309"; - - String [] params = inputString.split("[,|]"); - Assert.assertEquals(12, params.length); - Assert.assertEquals("ENST00000427309", params[11]); - } -} diff --git a/qmule/test/org/qcmg/qmule/IndelDCCHeaderTest.java-- b/qmule/test/org/qcmg/qmule/IndelDCCHeaderTest.java-- deleted file mode 100644 index 4ba0cf6ad..000000000 --- a/qmule/test/org/qcmg/qmule/IndelDCCHeaderTest.java-- +++ /dev/null @@ -1,222 +0,0 @@ -package org.qcmg.qmule; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class IndelDCCHeaderTest { - - File tumourBam; - File normalBam; - File somaticFile; - File somaticOutputFile; - File germlineFile; - File germlineOutputFile; - IndelDCCHeader id; - private static String FILE_SEPARATOR = System.getProperty("file.separator"); - - @Rule - public TemporaryFolder testFolder = new TemporaryFolder(); - - @Before - public void setUp() throws IOException { - tumourBam = createBamFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "tumor.bam", "tumourId"); - normalBam = createBamFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "normal.bam", "normalId"); - somaticFile = createDCCFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "input.dcc1", 13, "3d9d495c-94f7-46a4-9301-7dcbad7285d1"); - somaticOutputFile = new File(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "output.dcc1"); - germlineFile = createDCCFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "germ.input.dcc1", 13, "2d9d495c-94f7-46a4-9301-7dcbad7285d1"); - germlineOutputFile = new File(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "germ.output.dcc1"); - id = new IndelDCCHeader(); - } - - - @After - public void tearDown() { - tumourBam.delete(); - normalBam.delete(); - germlineFile.delete(); - germlineOutputFile.delete(); - somaticOutputFile.delete(); - somaticFile.delete(); - tumourBam = null; - normalBam = null; - germlineFile = null; - germlineOutputFile = null; - somaticFile = null; - somaticOutputFile = null; - id = null; - } - - @Test - public void testGoodOptions() throws Exception { - IndelDCCHeader id = new IndelDCCHeader(); - String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", somaticFile.getAbsolutePath(), "-i", germlineFile.getAbsolutePath(), "--tumour", tumourBam.getAbsolutePath(), "--normal", normalBam.getAbsolutePath(), "--output", somaticOutputFile.getAbsolutePath(), "--output", germlineOutputFile.getAbsolutePath(), "--mode", "gatk"}; - id.setup(args); - assertEquals(tumourBam.getAbsolutePath(), id.getTumourBam().getAbsolutePath()); - assertEquals(normalBam.getAbsolutePath(), id.getNormalBam().getAbsolutePath()); - assertEquals(somaticFile.getAbsolutePath(), id.getSomaticFile().getAbsolutePath()); - assertEquals(germlineFile.getAbsolutePath(), id.getGermlineFile().getAbsolutePath()); - assertEquals(somaticOutputFile.getAbsolutePath(), id.getSomaticOutputFile().getAbsolutePath()); - assertEquals(germlineOutputFile.getAbsolutePath(), id.getGermlineOutputFile().getAbsolutePath()); - assertEquals("gatk", id.getMode()); - } - - @Test - public void testAnnotate() throws Exception { - - String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", somaticFile.getAbsolutePath(), "-i", germlineFile.getAbsolutePath(), "--tumour", tumourBam.getAbsolutePath(), "--normal", normalBam.getAbsolutePath(), "--output", somaticOutputFile.getAbsolutePath(), "--output", germlineOutputFile.getAbsolutePath(), "--mode", "gatk"}; - - id.setup(args); - assertFalse(somaticOutputFile.exists()); - assertFalse(germlineOutputFile.exists()); - id.annotate(); - assertTrue(somaticOutputFile.exists()); - assertTrue(germlineOutputFile.exists()); - - assertAnnotationCorrect(somaticOutputFile, "tumourId"); - assertAnnotationCorrect(germlineOutputFile, "normalId"); - - } - - private void assertAnnotationCorrect(File outputFile, String sampleId) throws IOException { - BufferedReader reader = new BufferedReader(new FileReader(outputFile)); - - String line; - int count = 0; - while ((line = reader.readLine()) != null) { - count++; - if (count == 1 || count == 15) { - assertTrue(line.contains("3d9d495c_94f7_46a4_9301_7dcbad7285d1")); - } - if (count == 52) { - String[] results = line.split("\t"); - assertEquals("3d9d495c_94f7_46a4_9301_7dcbad7285d1", results[0]); - assertEquals(sampleId, results[1]); - assertEquals("3d9d495c_94f7_46a4_9301_7dcbad7285d1_" + sampleId + "_ind1", results[2]); - } - if (count == 15) { - assertTrue(line.startsWith("#Q_DCCMETA")); - } - if (count == 25) { - assertTrue(line.startsWith("#Q_LIMSMETA_TEST")); - } - if (count == 38) { - assertTrue(line.startsWith("#Q_LIMSMETA_CONTROL")); - } - - } - reader.close(); - - } - - - @Test - public void testIsCorrectUuidFormat() { - assertTrue(id.isCorrectUuidFormat("3d9d495c_94f7_46a4_9301_7dcbad7285d1")); - assertFalse(id.isCorrectUuidFormat("3d9d495c-94f7_46a4_9301_7dcbad7285d1")); - assertFalse(id.isCorrectUuidFormat("3d9d495c_94f7_46a4_9301_7dcbad7285d")); - } - - @Test - public void testReplaceAnalysisIdInLine() { - String uuid = "3d9d495c_94f7_46a4_9301_7dcbad7285d1"; - String tumour = "tumourId_added"; - String normal = "normalId_added"; - id.setUuid(uuid); - id.setTumourSampleId(tumour); - id.setNormalSampleId(normal); - String line = "id\tsecond\tthird_ind1"; - String[] results = id.replaceIdsInLine(line, false).split("\t"); - assertEquals(uuid, results[0]); - assertEquals(tumour, results[1]); - assertEquals(uuid + "_" + tumour + "_ind1" , results[2]); - - results = id.replaceIdsInLine(line, true).split("\t"); - assertEquals(uuid, results[0]); - assertEquals(normal, results[1]); - assertEquals(uuid + "_" + normal + "_ind1" , results[2]); - } - - @Test - public void testCheckForUUid() throws Exception { - String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", somaticFile.getAbsolutePath(), "-i", germlineFile.getAbsolutePath(), "--tumour", tumourBam.getAbsolutePath(), "--normal", normalBam.getAbsolutePath(), "--output", somaticOutputFile.getAbsolutePath(), "--output", germlineOutputFile.getAbsolutePath(), "--mode", "gatk"}; - id.setup(args); - assertFalse(id.isCompleteHeaderPresent()); - assertFalse(id.isQexecPresent()); - assertEquals(0, id.getQexec().size()); - id.checkForUUid(); - assertFalse(id.isCompleteHeaderPresent()); - assertEquals(14, id.getQexec().size()); - assertTrue(id.isQexecPresent()); - } - - @Test(expected=QMuleException.class) - public void testCheckForUUidThrowsException() throws Exception { - somaticFile = createDCCFile(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "input.dcc1", 12, "3d9d495c-94f7-46a4-9301-7dcbad7285d1"); - String[] args = {"--log", testFolder.newFile().getAbsolutePath(), "-i", somaticFile.getAbsolutePath(), "-i", germlineFile.getAbsolutePath(), "--tumour", tumourBam.getAbsolutePath(), "--normal", normalBam.getAbsolutePath(), "--output", somaticOutputFile.getAbsolutePath(), "--output", germlineOutputFile.getAbsolutePath(), "--mode", "gatk"}; - - id.setup(args); - assertFalse(id.isCompleteHeaderPresent()); - assertFalse(id.isQexecPresent()); - assertEquals(0, id.getQexec().size()); - id.checkForUUid(); - assertFalse(id.isCompleteHeaderPresent()); - assertEquals(14, id.getQexec().size()); - assertTrue(id.isQexecPresent()); - } - - private File createDCCFile(String fileName, int qexecLength, String uuid) throws IOException { - BufferedWriter w = new BufferedWriter(new FileWriter(new File(fileName))); - w.write("#Q_EXEC Uuid "+uuid +"\n"); - for (int i=1; i<=qexecLength; i++) { - w.write("#Q_EXEC\n"); - } - w.write("analysis_id\tanalyzed_sample_id\tmutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation\texpressed_allele\tquality_score\tprobability\tread_count\tis_annotated\tverification_status\tverification_platform\txref_ensembl_var_id\tnote\tQCMGflag\tND\tTD\tNNS\tFlankSeq\n"); - w.write("id\ttest\ttest_ind1\t2\tchr1\t85\t86\t1\t-999\t-999\t-\t-999\tT\t->T\t\t-999\t-999\t-999\t-999\t-999\t-999\t-999\t-999\tPASS\t--\t--\t--\t--\n"); - w.close(); - return new File(fileName); - } - - private File createBamFile(String fileName, String sampleID) throws IOException { - final List data = new ArrayList(); - data.add("@HD VN:1.0 GO:none SO:coordinate"); - data.add("@SQ SN:chr1 LN:249250621 "); - data.add("@SQ SN:chr4 LN:191154276 "); - data.add("@SQ SN:chr7 LN:159138663 "); - data.add("@SQ SN:chrX LN:155270560 "); - data.add("@SQ SN:chrY LN:59373566 "); - data.add("@SQ SN:chr19 LN:59128983 "); - data.add("@SQ SN:GL000191.1 LN:106433 "); - data.add("@SQ SN:GL000211.1 LN:166566 "); - data.add("@SQ SN:chrMT LN:16569 "); - data.add("@RG ID:20120817075934728 PL:ILLUMINA PU:lane_7 LB:Library_20120726_B zc:6:/mnt/seq_results/icgc_pancreatic/APGI_1992/seq_mapped/120804_SN7001240_0063_AC0VM1ACXX.lane_7.nobc.bam SM:Colo-829"); - data.add("@CO CN:QCMG QN:qlimsmeta Aligner=bwa Capture Kit=NoCapture Donor=test Failed QC=0 Library Protocol=Illumina TruSEQ Multiplexed Manual Material=1:DNA Project=test_project Reference Genome File=/panfs/share/genomes/GRCh37_ICGC_standard_v2/GRCh37_ICGC_standard_v2.fa Sample="+sampleID+" Sample Code=4:Normal control (other site) Sequencing Platform=HiSeq Species Reference Genome=Homo sapiens (GRCh37_ICGC_standard_v2)"); - - BufferedWriter out; - out = new BufferedWriter(new FileWriter(fileName)); - for (final String line : data) { - out.write(line + "\n"); - } - out.close(); - return new File(fileName); - } - - - - -} diff --git a/qmule/test/org/qcmg/qmule/MAF2DCC1Test.java-- b/qmule/test/org/qcmg/qmule/MAF2DCC1Test.java-- deleted file mode 100644 index db9d7d1ef..000000000 --- a/qmule/test/org/qcmg/qmule/MAF2DCC1Test.java-- +++ /dev/null @@ -1,315 +0,0 @@ -package org.qcmg.qmule; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.ChrPositionName; -import org.qcmg.common.model.ChrRangePosition; -import org.qcmg.qmule.tab.TabbedRecord; - -public class MAF2DCC1Test { - - private File snpMafFile; - private File snpDccFile; - private File indelMafFile; - private File indelDccFile; - private File outputFile; - private MAF2DCC1 test; - private final String DCCHEADER = "analysis_id analyzed_sample_id mutation_id mutation_type chromosome chromosome_start chromosome_end chromosome_strand refsnp_allele refsnp_strand reference_genome_allele control_genotype tumour_genotype mutation expressed_allele quality_score probability read_count is_annotated validation_status validation_platform xref_ensembl_var_id note QCMGflag ND TD NNS FlankSeq"; - private final String MAFHEADER = "Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer QCMG_Flag ND TD Canonical_Transcript_Id Canonical_AA_Change Canonical_Base_Change Alternate_Transcript_Id Alternate_AA_Change Alternate_Base_Change Confidence CPG Gff3_Bait Novel_Starts"; - - private static String FILE_SEPARATOR = System.getProperty("file.separator"); - - @Rule - public TemporaryFolder testFolder = new TemporaryFolder(); - - @Before - public void setUp() throws Exception { - snpMafFile = createMafFile("snp", testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "snp.maf"); - indelMafFile = createMafFile("indel", testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "indel.maf"); - snpDccFile = createDccFile("snp", testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "snp.dcc1"); - indelDccFile = createDccFile("indel", testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "indel.dcc1"); - outputFile = new File(testFolder.getRoot().getAbsolutePath() + FILE_SEPARATOR + "output.dcc1"); - String[] args = {"-i", indelMafFile.getAbsolutePath(), "-i", indelDccFile.getAbsolutePath(), "-o", outputFile.getAbsolutePath(), "-mode", "indel", "-log", testFolder.newFile().getAbsolutePath()}; - - test = new MAF2DCC1(); - test.setup(args); - } - - @After - public void tearDown() { - snpDccFile.delete(); - snpMafFile.delete(); - indelMafFile.delete(); - indelDccFile.delete(); - outputFile.delete(); - snpDccFile = null; - snpMafFile = null; - indelDccFile = null; - indelMafFile = null; - } - - @Test - public void testRunSnpMode() throws Exception { - String[] args = {"-i", snpMafFile.getAbsolutePath(), "-i", snpDccFile.getAbsolutePath(), "-o", outputFile.getAbsolutePath(), "-mode", "snp", "-log", testFolder.newFile().getAbsolutePath()}; - - MAF2DCC1 test = new MAF2DCC1(); - test.setup(args); - assertEquals(0, test.annotate()); - assertTrue(outputFile.exists()); - assertTrue(outputFile.length() > 0); - assertEquals(1, test.getInputMafRecordCount()); - } - - @Test - public void testRunIndelMode() throws Exception { - - assertEquals(0, test.annotate()); - assertTrue(outputFile.exists()); - assertTrue(outputFile.length() > 0); - assertEquals(2, test.getInputMafRecordCount()); - } - - @Test - public void testGoodOptions() throws Exception { - String log = testFolder.newFile().getAbsolutePath(); - String[] args = {"-i", indelMafFile.getAbsolutePath(), "-i", indelDccFile.getAbsolutePath(), "-o", outputFile.getAbsolutePath(), "-mode", "indel", "-log", log}; - MAF2DCC1 test = new MAF2DCC1(); - test.setup(args); - assertEquals(indelMafFile, test.getMafFile()); - assertEquals(1, test.getDccFiles().size()); - assertEquals(indelDccFile, test.getDccFiles().get(0)); - assertEquals(outputFile, test.getOutputDccFile()); - assertEquals("indel", test.getMode()); - assertEquals(log, test.getLogFile()); - } - - @Test(expected=QMuleException.class) - public void testOutputFileExistsThrowsException() throws Exception { - String log = testFolder.newFile().getAbsolutePath(); - outputFile = testFolder.newFile("test"); - String[] args = {"-i", indelMafFile.getAbsolutePath(), "-i", indelDccFile.getAbsolutePath(), "-o", outputFile.getAbsolutePath(), "-mode", "indel", "-log", log}; - assertTrue(outputFile.exists()); - MAF2DCC1 test = new MAF2DCC1(); - test.setup(args); - } - - @Test(expected=QMuleException.class) - public void testUnknownModeThrowsException() throws Exception { - String[] args = {"-i", indelMafFile.getAbsolutePath(), "-i", indelDccFile.getAbsolutePath(), "-o", outputFile.getAbsolutePath(), "-mode", "idel", "-log", testFolder.newFile().getAbsolutePath()}; - MAF2DCC1 test = new MAF2DCC1(); - test.setup(args); - } - - @Test - public void testMatchChrPos() { - ChrPosition maf = new ChrRangePosition("chr1", 1, 2); - ChrPosition dcc = new ChrRangePosition("chr1", 1, 2); - - assertTrue(test.match(maf, dcc)); - dcc = new ChrRangePosition("chr1", 1, 3); - assertFalse(test.match(maf, dcc)); - dcc = new ChrRangePosition("chr1", 2, 2); - assertFalse(test.match(maf, dcc)); - dcc = new ChrRangePosition("chr1", 1, 1); - assertFalse(test.match(maf, dcc)); - } - - @Test - public void testMatchingMutation() { - assertTrue(test.matchingMutation("SNP", "1")); - assertTrue(test.matchingMutation("INS", "2")); - assertTrue(test.matchingMutation("DEL", "3")); - assertFalse(test.matchingMutation("SNP", "3")); - assertFalse(test.matchingMutation("INS", "1")); - assertFalse(test.matchingMutation("DEL", "2")); - } - - @Test - public void testMatchRecordsSnpMode() { - int[] indexes = {0, 1, 2, 3, 4, 5}; - test.setMafColumnIndexes(indexes); - test.setDccColumnIndexes(indexes); - test.setMode("snp"); - TabbedRecord maf = new TabbedRecord(); - TabbedRecord dcc = new TabbedRecord(); - maf.setData("chr1\t1\t2\tSNP"); - dcc.setData("chr1\t1\t2\t1"); - assertTrue(test.matchOtherColumns(maf, dcc)); - dcc.setData("chr1\t1\t2\t4"); - assertFalse(test.matchOtherColumns(maf, dcc)); - } - - @Test - public void testMatchRecordsIndelMode() { - int[] indexes = {0, 1, 2, 3, 4, 5}; - test.setMafColumnIndexes(indexes); - test.setDccColumnIndexes(indexes); - test.setMode("indel"); - TabbedRecord maf = new TabbedRecord(); - TabbedRecord dcc = new TabbedRecord(); - maf.setData("chr1\t1\t2\tINS\t-\tA"); - dcc.setData("chr1\t1\t2\t2\t-\tA"); - assertTrue(test.matchOtherColumns(maf, dcc)); - dcc.setData("chr1\t1\t2\t1\t-\tA"); - assertFalse(test.matchOtherColumns(maf, dcc)); - } - - @Test - public void testRecordInMaf() throws QMuleException { - int[] indexes = {0, 1, 2, 3, 4, 5}; - test.setMafColumnIndexes(indexes); - test.setDccColumnIndexes(indexes); - test.setMode("indel"); - TabbedRecord maf = new TabbedRecord(); - TabbedRecord dcc = new TabbedRecord(); - maf.setData("chr1\t1\t2\tINS\t-\tA"); - dcc.setData("chr1\t1\t2\t2\t-\tA"); - List listOfRecords = new ArrayList<>(); - listOfRecords.add(maf); - Map> mafs = new HashMap<>(); - ChrPosition c = new ChrPositionName("chr1", 1, 2, "a"); - mafs.put(c, listOfRecords); - test.setMafRecords(mafs); - assertTrue(test.recordInMaf(c, dcc)); - } - - @Test(expected=QMuleException.class) - public void testRecordInMafThrowsException() throws QMuleException { - int[] indexes = {0, 1, 2, 3, 4, 5}; - test.setMafColumnIndexes(indexes); - test.setDccColumnIndexes(indexes); - test.setMode("indel"); - TabbedRecord maf = new TabbedRecord(); - TabbedRecord dcc = new TabbedRecord(); - maf.setData("chr1\t1\t2\tINS\t-\tA"); - dcc.setData("chr1\t1\t2\t2\t-\tA"); - List listOfRecords = new ArrayList<>(); - listOfRecords.add(maf); - listOfRecords.add(maf); -// List listOfRecords2 = new ArrayList<>(); -// listOfRecords2.add(maf); - Map> mafs = new HashMap<>(); - ChrPosition c = new ChrRangePosition("chr1", 1, 2); -// ChrPosition c2 = new ChrPosition("chr1", 1, 2); -// ChrPosition c = new ChrPosition("chr1", 1, 2, "a"); -// ChrPosition c2 = new ChrPosition("chr1", 1, 2, "b"); - mafs.put(c, listOfRecords); -// mafs.put(c2, listOfRecords2); - assertEquals(1, mafs.size()); - assertEquals(2, mafs.get( new ChrRangePosition("chr1", 1, 2)).size()); - test.setMafRecords(mafs); - test.recordInMaf(c, dcc); - } - - @Test - public void testfindColumnIndexesFromHeaderWithMaf() { - TabbedRecord rec = new TabbedRecord(); - rec.setData(MAFHEADER); - int[] cols = test.findColumnIndexesFromHeader(rec); - assertEquals(4, cols[0]); - assertEquals(5, cols[1]); - assertEquals(6, cols[2]); - assertEquals(9, cols[3]); - assertEquals(10, cols[4]); - assertEquals(11, cols[5]); - } - - @Test - public void testfindColumnIndexesFromHeaderWithDcc() { - TabbedRecord rec = new TabbedRecord(); - rec.setData(DCCHEADER); - int[] cols = test.findColumnIndexesFromHeader(rec); - assertEquals(4, cols[0]); - assertEquals(5, cols[1]); - assertEquals(6, cols[2]); - assertEquals(3, cols[3]); - assertEquals(10, cols[4]); - assertEquals(12, cols[5]); - } - - @Test(expected=QMuleException.class) - public void testMissingColumnIndexThrowsException() throws QMuleException { - int[] i = {-1}; - test.missingColumnIndex(i); - } - - @Test - public void testMissingColumnIndex() throws QMuleException { - int[] i = {1}; - assertFalse(test.missingColumnIndex(i)); - } - - @Test - public void testAddRecordToMap() throws QMuleException { - int[] indexes = {0, 1, 2, 3, 4, 5}; - test.setMafColumnIndexes(indexes); - test.setDccColumnIndexes(indexes); - test.setMode("indel"); - TabbedRecord maf = new TabbedRecord(); - maf.setData("chr1\t1\t2\tINS\t-\tA"); - test.addToMafRecordMap(maf, 1); - assertEquals(1, test.getMafRecords().size()); - assertTrue(test.getMafRecords().containsKey(new ChrRangePosition("1", 1, 2))); -// assertTrue(test.getMafRecords().containsKey(new ChrPosition("1", 1, 2, "" + 1))); - maf = new TabbedRecord(); - maf.setData("chr1\t1\t2\tINS\t-\tA"); - test.addToMafRecordMap(maf, 2); - assertEquals(1, test.getMafRecords().size()); - assertEquals(2, test.getMafRecords().get(new ChrRangePosition("1", 1, 2)).size()); -// assertEquals(2, test.getMafRecords().size()); - assertTrue(test.getMafRecords().containsKey(new ChrRangePosition("1", 1, 2))); -// assertTrue(test.getMafRecords().containsKey(new ChrPosition("1", 1, 2, "" +2))); - } - - - private File createDccFile(String type, String fileName) throws IOException { - BufferedWriter w = new BufferedWriter(new FileWriter(new File(fileName))); - //w.write("analysis_id\tanalyzed_sample_id\tmutation_id\tmutation_type\tchromosome\tchromosome_start\tchromosome_end\tchromosome_strand\trefsnp_allele\trefsnp_strand\treference_genome_allele\tcontrol_genotype\ttumour_genotype\tmutation\texpressed_allele\tquality_score\tprobability\tread_count\tis_annotated\tverification_status\tverification_platform\txref_ensembl_var_id\tnote\tQCMGflag\tND\tTD\tNNS\tFlankSeq\n"); - w.write(DCCHEADER + "\n"); - if (type.equals("indel")) { - w.write("aba9fc0c_7f03_417f_b087_2e8ab1a45e42 test test_ind716 2 chr1 4412134 4412135 1 -999 -999 -0 -999 A -/A -999 -999 -999 -999 -999 -999 -999 -999 PASS -- -- -- --\n"); - w.write("aba9fc0c_7f03_417f_b087_2e8ab1a45e42 test test_ind2740 3 chr1 12126362 12126362 1 -999 -999 T -999 -0 T/- -999 -999 -999 -999 -999 -999 -999 -999 PASS -- -- -- --\n"); - } - - if (type.equals("snp")) { - w.write("02ebc0c3_3102_4bf0_9c5b_eabcab65414d ICGC-ABMJ-20120706-01 APGI_1992_SNP_248 1 1 569492 569492 1 C/T 1 T T/T C/T T>C -999 -999 0.0119695263 106 1 2 -888 rs147253560 -999 MIN A:1[35],0[0],C:0[0],1[37],T:42[36.71],47[35.89] C:9[31.33],1[36],T:49[36.61],46[33.41] 5 ATCCCCATACT\n"); - w.write("02ebc0c3_3102_4bf0_9c5b_eabcab65414d ICGC-ABMJ-20120706-01 APGI_1992_SNP_260 1 1 604271 604271 1 -888 -888 G G/G A/G G>A -999 -999 0.3973437368 56 2 2 -888 -999 -999 MIN;MR;GERM A:0[0],1[29],G:20[34.1],19[35.79] A:2[37.5],2[32],G:30[36.43],22[38] 4 TGGAGAGGAAC"); - } - - w.close(); - return new File(fileName); - } - - private File createMafFile(String type, String fileName) throws IOException { - BufferedWriter w = new BufferedWriter(new FileWriter(new File(fileName))); - w.write(MAFHEADER + "\n"); - if (type.equals("indel")) { - w.write("Unknown null qcmg.uq.edu.au 37 1 4412134 4412135 0 null INS -0 A A novel null QCMG-66-APGI_1992-ICGC-ABMJ-20120706-01 QCMG-66-APGI_1992-ICGC-ABMP-20091203-10-ND -0 -0 null null null null null Unknown Somatic null Unknown null null null Unknown PASS;HOMCON_4 0;28;28;0;0;0;0 9;52;52;9;0;0;0;\"4 contiguous CTAAAAACACaAAAATTAGCT\" null null null null null null HIGH -- null 0\n"); - w.write("TNFRSF8 0 qcmg.uq.edu.au 37 1 12126362 12126362 0 Intron DEL T -0 -0 novel null QCMG-66-APGI_1992-ICGC-ABMJ-20120706-01 QCMG-66-APGI_1992-ICGC-ABMP-20091203-10-ND -0 -0 null null null null null Unknown Somatic null Unknown null null null Unknown PASS;HOMCON_3 0;67;66;0;0;2;0 15;52;49;16;0;1;0;\"3 contiguous AAGCTCGTTA_TTTAAAAAAA\" ENST00000263932 -888 -888 null null null HIGH -- fill\n"); - } - - if (type.equals("snp")) { - w.write("Unknown 0 qcmg.uq.edu.au 37 1 569492 569492 0 RNA SNP T C T rs147253560 null QCMG-66-APGI_1992-ICGC-ABMJ-20120706-01 QCMG-66-APGI_1992-ICGC-ABMP-20091203-10-ND T T null null null null null Unknown Somatic null Unknown null null null Unknown PASS A:1[35],0[0],C:0[0],1[37],T:42[36.71],47[35.89] C:9[31.33],1[36],T:49[36.61],46[33.41] ENST00000440200 -888 -888 null null null HIGH ATCCCCATACT fill\n"); - } - - w.close(); - return new File(fileName); - } -} diff --git a/qmule/test/org/qcmg/qmule/TestThreading.java-- b/qmule/test/org/qcmg/qmule/TestThreading.java-- deleted file mode 100644 index a50c9d2e0..000000000 --- a/qmule/test/org/qcmg/qmule/TestThreading.java-- +++ /dev/null @@ -1,55 +0,0 @@ -package org.qcmg.qmule; - -import java.util.concurrent.atomic.AtomicLong; - -import org.junit.Ignore; - -public class TestThreading { - - private static final int testRuns = 50000000; - - @Ignore - public void testLongUpdate() { - - long counter = 0L; - - long start = System.currentTimeMillis(); - - for (int i = 0 ; i < testRuns ; i++) counter++; - - long end = System.currentTimeMillis(); - System.out.println("counter: " + counter); - System.out.println("Time taken: " + (end - start) + "ms"); - - } - - @Ignore - public void testLongUpdateSynchronised() { - - long counter = 0L; - - long start = System.currentTimeMillis(); - - for (int i = 0 ; i < testRuns ; i++) synchronized(this){counter++;} - - long end = System.currentTimeMillis(); - System.out.println("counter: " + counter); - System.out.println("Time taken (synchronised): " + (end - start) + "ms"); - - } - - @Ignore - public void testAtomicLongUpdate() { - - AtomicLong counter = new AtomicLong(); - - long start = System.currentTimeMillis(); - - for (int i = 0 ; i < testRuns ; i++) counter.getAndIncrement(); - - long end = System.currentTimeMillis(); - System.out.println("counter: " + counter.longValue()); - System.out.println("Time taken (Atomic): " + (end - start) + "ms"); - - } -} diff --git a/qmule/test/org/qcmg/qmule/WiggleFromPileupTakeTwoTest.java-- b/qmule/test/org/qcmg/qmule/WiggleFromPileupTakeTwoTest.java-- deleted file mode 100644 index d645bb287..000000000 --- a/qmule/test/org/qcmg/qmule/WiggleFromPileupTakeTwoTest.java-- +++ /dev/null @@ -1,428 +0,0 @@ -package org.qcmg.qmule; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.PrintStream; -import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; - -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.rules.TemporaryFolder; -import org.qcmg.common.commandline.Executor; -import org.qcmg.common.util.FileUtils; - -public class WiggleFromPileupTakeTwoTest { - @Rule - public TemporaryFolder tempFolder = new TemporaryFolder(); - @Rule - public ExpectedException thrown = ExpectedException.none(); - - private File pileupFile; - private File gff3File; - private File wiggleFile; - private File pileupFileGZIP; - private File wiggleFileGZIP; - - @Before - public final void before() { - try { - pileupFile = tempFolder.newFile("wigglePileupTest.pileup"); - wiggleFile = tempFolder.newFile("wigglePileupTest.wiggle"); - gff3File = tempFolder.newFile("wigglePileupTest.gff3"); - pileupFileGZIP = tempFolder.newFile("wigglePileupTest.pileup.gz"); - wiggleFileGZIP = tempFolder.newFile("wigglePileupTest.wiggle.gz"); - createPileupFile(pileupFile); - createPileupFile(pileupFileGZIP); - createGFF3File(gff3File); - assertTrue(pileupFile.exists()); - assertTrue(gff3File.exists()); - assertTrue(pileupFileGZIP.exists()); - } catch (Exception e) { - System.err.println("File creation error in test harness: " + e.getMessage()); - } - } - - -// @Test -// public void testIsPositionInBaitSingleGff() { -// GFF3Record gff = new GFF3Record(); -// gff.setSeqId("chr1"); -// gff.setStart(1); -// gff.setEnd(10); -// -// List gffs = new ArrayList(); -// gffs.add(gff); -// Iterator iter = gffs.iterator(); -//// WiggleFromPileup.setGffRecord(gff); -// -// Assert.assertEquals(false, WiggleFromPileupTakeTwo.isPositionInBait("chr0", 0, iter, iter.next())); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 0, iter, gff)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 1, iter, gff)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 10, iter, gff)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 11, iter, gff)); -// -// gff.setSeqId("chrX"); -// gff.setStart(1000123); -// gff.setEnd(1000223); -// -//// WiggleFromPileup.setGffRecord(gff); -// -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 0, iter, gff)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 1, iter, gff)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrx", 1000124, iter, gff)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrx", 11, iter, gff)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 11, iter, gff)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 1000123, iter, gff)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 1000124, iter, gff)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 1000223, iter, gff)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 1000224, iter, gff)); -// } - -// @Test -// public void testIsPositionInBaitMultipleGff() { -// GFF3Record gff1 = new GFF3Record(); -// gff1.setSeqId("chr1"); -// gff1.setStart(1); -// gff1.setEnd(10); -// GFF3Record gff2 = new GFF3Record(); -// gff2.setSeqId("chr1"); -// gff2.setStart(11); -// gff2.setEnd(20); -// GFF3Record gff3 = new GFF3Record(); -// gff3.setSeqId("chr1"); -// gff3.setStart(31); -// gff3.setEnd(40); -// -// List gffs = new ArrayList(); -// gffs.add(gff1); -// gffs.add(gff2); -// gffs.add(gff3); -// Iterator iter = gffs.iterator(); -// -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 0, iter, iter.next())); -// -//// Assert.assertEquals(gff1, WiggleFromPileup.getGffRecord()); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 1, iter, gff1)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 10, iter, gff1)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 11, iter, gff1)); -// // iterator should have been advanced -// Assert.assertEquals(gff2, WiggleFromPileup.getGffRecord()); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 20, iter, gff2)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 21, iter, gff2)); -// // iterator should have been advanced -// Assert.assertEquals(gff3, WiggleFromPileup.getGffRecord()); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 29, iter, gff3)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 31, iter, gff3)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 40, iter, gff3)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 41, iter, gff3)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 141, iter, gff3)); -// } - -// @Test -// public void testIsPositionInBaitMultipleGffMultipleChromosomes() { -// GFF3Record gff1 = new GFF3Record(); -// gff1.setSeqId("chr1"); -// gff1.setStart(1); -// gff1.setEnd(10); -// GFF3Record gff2 = new GFF3Record(); -// gff2.setSeqId("chr1"); -// gff2.setStart(11); -// gff2.setEnd(20); -// GFF3Record gff3 = new GFF3Record(); -// gff3.setSeqId("chr1"); -// gff3.setStart(31); -// gff3.setEnd(40); -// GFF3Record gff4 = new GFF3Record(); -// gff4.setSeqId("chr2"); -// gff4.setStart(15); -// gff4.setEnd(25); -// GFF3Record gff5 = new GFF3Record(); -// gff5.setSeqId("chr2"); -// gff5.setStart(26); -// gff5.setEnd(40); -// GFF3Record gff6 = new GFF3Record(); -// gff6.setSeqId("chrX"); -// gff6.setStart(100026); -// gff6.setEnd(100040); -// -// List gffs = new ArrayList(); -// gffs.add(gff1); -// gffs.add(gff2); -// gffs.add(gff3); -// gffs.add(gff4); -// gffs.add(gff5); -// gffs.add(gff6); -// Iterator iter = gffs.iterator(); -// -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr0", 0, iter, iter.next())); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 0, iter, gff1)); -// -//// Assert.assertEquals(gff1, WiggleFromPileup.getGffRecord()); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 1, iter, gff1)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 10, iter, gff1)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 11, iter, gff1)); -// // iterator should have been advanced -// Assert.assertEquals(gff2, WiggleFromPileup.getGffRecord()); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 20, iter, gff2)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 21, iter, gff2)); -// // iterator should have been advanced -// Assert.assertEquals(gff3, WiggleFromPileup.getGffRecord()); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 29, iter, gff3)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 31, iter, gff3)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 40, iter, gff3)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 41, iter, gff3)); -// // iterator should have been advanced -// Assert.assertEquals(gff4, WiggleFromPileup.getGffRecord()); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 141, iter, gff4)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 142, iter, gff4)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 1000142, iter, gff4)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr2", 1, iter, gff4)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr2", 2, iter, gff4)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 15, iter, gff4)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 25, iter, gff4)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 26, iter, gff4)); -// // iterator should have been advanced -// Assert.assertEquals(gff5, WiggleFromPileup.getGffRecord()); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 40, iter, gff5)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr2", 41, iter, gff5)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr3", 15, iter, gff5)); -// // iterator should have been advanced -// Assert.assertEquals(gff6, WiggleFromPileup.getGffRecord()); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr3", 10015, iter, gff6)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr4", 10015, iter, gff6)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr5", 10015, iter, gff6)); -// Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr15", 10015, iter, gff6)); -// Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 100026, iter, gff6)); -// -// } - - @Test - public final void callWithNoArgs() throws Exception { - String command = ""; - Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileupTakeTwo"); - assertTrue(1 == exec.getErrCode()); - assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); - assertTrue(0 < exec.getErrorStreamConsumer().getLines().length); - } - - @Test - public final void callWithNoInputFile() throws Exception { - String command = "-log ./logfile -o " + tempFolder.getRoot().getAbsolutePath(); - Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileupTakeTwo"); - assertTrue(1 == exec.getErrCode()); - assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); - assertTrue(0 < exec.getErrorStreamConsumer().getLines().length); - } - - @Test - public final void callWithMissingArgs() throws Exception { - String command = "-log ./logfile -o blah.wiggle -i " + pileupFile.getAbsolutePath(); - Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileupTakeTwo"); - assertTrue(1 == exec.getErrCode()); - assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); - assertTrue(0 < exec.getErrorStreamConsumer().getLines().length); - } - - @Test - public final void callWithValidArguments() throws Exception { - ExpectedException.none(); - String command = "-log ./logfile -pileupFormat NNTT -normalCoverage 1 -tumourCoverage 1 -i " + pileupFile.getAbsolutePath() - + " -i " + gff3File.getAbsolutePath() - + " -gffRegions exon" - + " -o " + wiggleFile.getAbsolutePath(); - Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileupTakeTwo"); - assertEquals(0, exec.getErrCode()); - assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); - - // check the wiggle file - InputStream reader = new FileInputStream(wiggleFile); - assertEquals(29, examineWiggle(reader)); - } - - @Test - public final void callWithValidArgumentsLargeCoverage() throws Exception { - ExpectedException.none(); - String command = "-log ./logfile -pileupFormat NNTT -normalCoverage 50 -tumourCoverage 50 -i " + pileupFile.getAbsolutePath() - + " -i " + gff3File.getAbsolutePath() - + " -gffRegions exon" - + " -o " + wiggleFile.getAbsolutePath(); - Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileupTakeTwo"); - assertEquals(0, exec.getErrCode()); - assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); - - // check the wiggle file - InputStream reader = new FileInputStream(wiggleFile); - assertEquals(0, examineWiggle(reader)); - } - - @Test - public final void callWithZippedFiles() throws Exception { - ExpectedException.none(); - String command = "-log ./logfile -pileupFormat NNTT -normalCoverage 20 -tumourCoverage 20 -i " + pileupFileGZIP.getAbsolutePath() - + " -i " + gff3File.getAbsolutePath() - + " -gffRegions exon" - + " -o " + wiggleFileGZIP.getAbsolutePath(); - Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileupTakeTwo"); - assertEquals(0, exec.getErrCode()); - assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); - - // check the wiggle file - InputStream reader = new GZIPInputStream(new FileInputStream(wiggleFileGZIP)); - assertEquals(14, examineWiggle(reader)); - } - - private int examineWiggle(InputStream reader) throws IOException { - int count = 0; - BufferedReader fr = new BufferedReader(new InputStreamReader(reader)); - String line = fr.readLine(); // first line has the header - while ((line = fr.readLine()) != null) { - if (line.startsWith("fixedStep")) continue; - count += Integer.parseInt(line); - } - return count; - } - - private void createPileupFile(File pileupFile) throws IOException { - - OutputStream os = FileUtils.isFileNameGZip(pileupFile) ? new GZIPOutputStream( new FileOutputStream(pileupFile)) - : new FileOutputStream(pileupFile); - -// OutputStream os = new FileOutputStream(pileupFile); - PrintStream ps = new PrintStream(os); - - ps.println("chr1\t14923\tG\t8\t.......^!.\tIIIIIIIE\t7\t,.....^!.\t5IIIIIE\t10\t.........^T.\t0IIIIIIIIE\t7\t...,...\tIIIIIII"); - ps.println("chr1\t14924\tA\t9\t........^!.\tEI@III?IB\t7\t,......\t@IIIIII\t10\t..........\t-IIIIIIIII\t8\t...,...^!.\tIIII/IIB"); - ps.println("chr1\t14925\tA\t11\t.........^!.^P.\tIIDIIIHIEEE\t8\t,......^N.\tBIIIIIIE\t10\t..........\t)IIIIIIIII\t8\t...,....\tIII:4IIE"); - ps.println("chr1\t14926\tT\t11\t...........\tDIIIIIIIIII\t8\t,.......\t9IIIIIII\t10\t..........\t-IIIIIIIII\t8\t...,....\tIIH;DIII"); - ps.println("chr1\t14927\tT\t11\t...........\tDIIIIIIIIII\t8\t,.......\t8IIIIIII\t11\t..........^O.\t&FIIIIIIIIE\t8\t...,....\tII:>IIII"); - ps.println("chr1\t14928\tA\t11\t...........\tIIIIIIIIIII\t9\t,.......^(.\tGAIIIIIIE\t12\t...........^G.\t&CIBIIII9IIE\t8\t...,....\tII;0DIII"); - ps.println("chr1\t14929\tC\t11\t...........\tIIII\t9\t,........\tB37%I7III\t12\t............\t9FI77IIIIIII\t8\t...,....\t?I;>4I7I"); - ps.println("chr1\t14932\tG\t11\t...........\tI=IIIIIIIII\t9\t,........\t?@IIIIIII\t12\t............\t>IIIIIIIIIII\t8\t...,....\t?ICI@III"); - ps.println("chr1\t14933\tG\t11\t...........\tEAIIIIDIIII\t9\t,........\tD8III?III\t12\t............\t3EIIIIIIIIII\t9\t...,....^L.\t8I9HIIIIE"); - ps.println("chr1\t14934\tT\t11\t...........\t9I>IIIIIIFIIIIE\t9\t,........\tHCIIIIIII\t12\t............\t*IIIIIIIIIII\t9\t...,.....\tIII7IIIII"); - ps.println("chr1\t14936\tC\t12\t............\tI@IIIIIIIIII\t9\t,........\tBIIDIIIII\t12\t............\t8GIIIIIIIIII\t9\t...,.....\tIII,BIIII"); - ps.println("chr1\t14937\tT\t12\t............\tIIIIIIIIIIII\t9\t,........\t8IIIIFIII\t12\t............\t:IIIIIIIIIII\t9\t...,.....\tBII?)IIII"); - ps.println("chr1\t14938\tG\t12\t....$........\t%=I1II6IFIII\t9\t,........\tD%IIB/IHI\t12\t............\t3II>IIIIIIHI\t9\t...,.....\t0IAI/I?II"); - ps.println("chr1\t14939\tG\t11\t...........\t%@IHI:IIIHI\t9\t,........\tI%II@CIDI\t12\t............\t7IICIIIIII9A\t9\t...,.....\t1IAI;I9II"); - ps.println("chr1\t14940\tC\t11\t...........\t:IF?I-IIIII\t9\t,........\tF+II+IIII\t12\t......$......\t2%I%A>I>IIIA\t9\t...,.....\t3?)G:III"); - ps.println("chr1\t14944\tG\t11\t.....C.....\t(//AI%IIIFI\t9\t,$........\tI=II%ICIII\t8\t.$.......\t2II@6IBI\t9\t.........\t?:16IIB=,\t8\t..,.....\t9/%&>CI0"); - ps.println("chr1\t14946\tG\t11\t...........\t3I>II%I@I(I\t7\t.......\tIICIIII\t9\t.........\t4ID?II@GD\t8\t..,.....\tI@%;HIII"); - ps.println("chr1\t14947\tC\t11\t...$........\tDI?IIAIDI(I\t7\t.......\tIIIIIII\t9\t.$.....N$..\tEI58II!(B\t8\t..,.....\tI@C?IIII"); - ps.println("chr1\t14948\tG\t10\t.$.$........\t=;-%3I6I"); - ps.println("chr1\t14949\tG\t8\t.......$.\t5%6I>I%D\t7\t.......\tBI:%I;B\t6\t......\t*1,:0%\t7\t.$.,....\t'1I59;'"); - ps.println("chr1\t14950\tG\t7\t.$......\t?H3B+B7\t7\t.$......\t:+%%D7@\t6\t......\t%-%50%\t6\t.,....\t-I3'C'"); - ps.println("chr1\t14951\tC\t6\t......\tG2=+95\t6\t......\t)%%A6C\t6\t......\t%9%C89\t6\t.,....\t8H6(=%"); - - ps.close(); - os.close(); - } - - private void createGFF3File(File pileupFile) throws IOException { - - OutputStream os = FileUtils.isFileNameGZip( pileupFile) ? new GZIPOutputStream( new FileOutputStream(pileupFile)) - : new FileOutputStream(pileupFile); - -// OutputStream os = new FileOutputStream(pileupFile); - PrintStream ps = new PrintStream(os); - - - ps.println("##gff-version 3"); - ps.println("# Created by: simple_segmenter.pl[v2940]"); - ps.println("# Created on: Tue May 24 01:48:54 2011"); - ps.println("# Commandline: -v -g -l -i SureSelect_All_Exon_50mb_filtered_exons_1-200_20110524.gff3 -o SureSelect_All_Exon_50mb_filtered_exons_1-200_20110524_shoulders.gff3 -f exon,100,100,100 -f highexon,300 -f lowexon"); - ps.println("chr1 simple_segmenter.pl[v2940] fill 1 14166 . . . ID=gnl|fill"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 14167 14266 . + . ID=gnl|exon_3_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 14267 14366 . + . ID=gnl|exon_2_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 14367 14466 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 14467 14587 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 14588 14638 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 14639 14883 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon 14884 14942 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 14943 15064 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 15065 15164 . + . ID=gnl|exon_1_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 15165 15264 . + . ID=gnl|exon_2_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 15265 15364 . + . ID=gnl|exon_3_100"); - ps.println("chr1 simple_segmenter.pl[v2940] fill 15365 15370 . . . ID=gnl|fill"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 15371 15470 . + . ID=gnl|exon_3_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 15471 15570 . + . ID=gnl|exon_2_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 15571 15670 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 15671 15990 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 15991 16090 . + . ID=gnl|exon_1_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 16091 16190 . + . ID=gnl|exon_2_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 16191 16390 . + . ID=gnl|exon_3_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 16391 16490 . + . ID=gnl|exon_2_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 16491 16590 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 16591 16719 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 16720 16749 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 16750 17074 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 17075 17177 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 17178 17420 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 17421 17442 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 17443 18108 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 18109 18202 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 18203 18448 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 18449 18548 . + . ID=gnl|exon_1_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 18549 18648 . + . ID=gnl|exon_2_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 18649 18848 . + . ID=gnl|exon_3_100"); -// ps.println("##gff-version 3"); -// ps.println("# Created by: simple_segmenter.pl[v2940]"); -// ps.println("# Created on: Tue May 24 01:48:54 2011"); -// ps.println("# Commandline: -v -g -l -i SureSelect_All_Exon_50mb_filtered_baits_1-200_20110524.gff3 -o SureSelect_All_Exon_50mb_filtered_baits_1-200_20110524_shoulders.gff3 -f bait,100,100,100 -f highbait,300 -f lowbait"); -// ps.println("chr1 simple_segmenter.pl[v2940] fill 1 14166 . . . ID=gnl|fill"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 14167 14266 . + . ID=gnl|bait_3_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 14267 14366 . + . ID=gnl|bait_2_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 14367 14466 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 14467 14587 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 14588 14638 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 14639 14883 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 14884 14942 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 14943 15064 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 15065 15164 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 15165 15264 . + . ID=gnl|bait_2_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 15265 15364 . + . ID=gnl|bait_3_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] fill 15365 15370 . . . ID=gnl|fill"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 15371 15470 . + . ID=gnl|bait_3_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 15471 15570 . + . ID=gnl|bait_2_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 15571 15670 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 15671 15990 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 15991 16090 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 16091 16190 . + . ID=gnl|bait_2_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 16191 16390 . + . ID=gnl|bait_3_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 16391 16490 . + . ID=gnl|bait_2_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 16491 16590 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 16591 16719 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 16720 16749 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 16750 17074 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 17075 17177 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 17178 17420 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 17421 17442 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 17443 18108 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 18109 18202 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 18203 18448 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 18449 18548 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 18549 18648 . + . ID=gnl|bait_2_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 18649 18848 . + . ID=gnl|bait_3_100"); - - ps.close(); - os.close(); - } -} diff --git a/qmule/test/org/qcmg/qmule/WiggleFromPileupTest.java-- b/qmule/test/org/qcmg/qmule/WiggleFromPileupTest.java-- deleted file mode 100644 index 4cd748f37..000000000 --- a/qmule/test/org/qcmg/qmule/WiggleFromPileupTest.java-- +++ /dev/null @@ -1,431 +0,0 @@ -package org.qcmg.qmule; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; - -import junit.framework.Assert; - -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.rules.TemporaryFolder; -import org.qcmg.common.commandline.Executor; -import org.qcmg.common.util.FileUtils; -import org.qcmg.gff3.GFF3Record; - -public class WiggleFromPileupTest { - @Rule - public TemporaryFolder tempFolder = new TemporaryFolder(); - @Rule - public ExpectedException thrown = ExpectedException.none(); - - private File pileupFile; - private File gff3File; - private File wiggleFile; - private File pileupFileGZIP; - private File wiggleFileGZIP; - - @Before - public final void before() { - try { - pileupFile = tempFolder.newFile("wigglePileupTest.pileup"); - wiggleFile = tempFolder.newFile("wigglePileupTest.wiggle"); - gff3File = tempFolder.newFile("wigglePileupTest.gff3"); - pileupFileGZIP = tempFolder.newFile("wigglePileupTest.pileup.gz"); - wiggleFileGZIP = tempFolder.newFile("wigglePileupTest.wiggle.gz"); - createPileupFile(pileupFile); - createPileupFile(pileupFileGZIP); - createGFF3File(gff3File); - assertTrue(pileupFile.exists()); - assertTrue(gff3File.exists()); - assertTrue(pileupFileGZIP.exists()); - } catch (Exception e) { - System.err.println("File creation error in test harness: " + e.getMessage()); - } - } - - - @Test - public void testIsPositionInBaitSingleGff() { - GFF3Record gff = new GFF3Record(); - gff.setSeqId("chr1"); - gff.setStart(1); - gff.setEnd(10); - - List gffs = new ArrayList(); - gffs.add(gff); - Iterator iter = gffs.iterator(); -// WiggleFromPileup.setGffRecord(gff); - - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr0", 0, iter, iter.next())); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 0, iter, gff)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 1, iter, gff)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 10, iter, gff)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 11, iter, gff)); - - gff.setSeqId("chrX"); - gff.setStart(1000123); - gff.setEnd(1000223); - -// WiggleFromPileup.setGffRecord(gff); - - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 0, iter, gff)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 1, iter, gff)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrx", 1000124, iter, gff)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrx", 11, iter, gff)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 11, iter, gff)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 1000123, iter, gff)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 1000124, iter, gff)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 1000223, iter, gff)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chrX", 1000224, iter, gff)); - } - - @Test - public void testIsPositionInBaitMultipleGff() { - GFF3Record gff1 = new GFF3Record(); - gff1.setSeqId("chr1"); - gff1.setStart(1); - gff1.setEnd(10); - GFF3Record gff2 = new GFF3Record(); - gff2.setSeqId("chr1"); - gff2.setStart(11); - gff2.setEnd(20); - GFF3Record gff3 = new GFF3Record(); - gff3.setSeqId("chr1"); - gff3.setStart(31); - gff3.setEnd(40); - - List gffs = new ArrayList(); - gffs.add(gff1); - gffs.add(gff2); - gffs.add(gff3); - Iterator iter = gffs.iterator(); - - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 0, iter, iter.next())); - -// Assert.assertEquals(gff1, WiggleFromPileup.getGffRecord()); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 1, iter, gff1)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 10, iter, gff1)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 11, iter, gff1)); - // iterator should have been advanced - Assert.assertEquals(gff2, WiggleFromPileup.getGffRecord()); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 20, iter, gff2)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 21, iter, gff2)); - // iterator should have been advanced - Assert.assertEquals(gff3, WiggleFromPileup.getGffRecord()); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 29, iter, gff3)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 31, iter, gff3)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 40, iter, gff3)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 41, iter, gff3)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 141, iter, gff3)); - } - - @Test - public void testIsPositionInBaitMultipleGffMultipleChromosomes() { - GFF3Record gff1 = new GFF3Record(); - gff1.setSeqId("chr1"); - gff1.setStart(1); - gff1.setEnd(10); - GFF3Record gff2 = new GFF3Record(); - gff2.setSeqId("chr1"); - gff2.setStart(11); - gff2.setEnd(20); - GFF3Record gff3 = new GFF3Record(); - gff3.setSeqId("chr1"); - gff3.setStart(31); - gff3.setEnd(40); - GFF3Record gff4 = new GFF3Record(); - gff4.setSeqId("chr2"); - gff4.setStart(15); - gff4.setEnd(25); - GFF3Record gff5 = new GFF3Record(); - gff5.setSeqId("chr2"); - gff5.setStart(26); - gff5.setEnd(40); - GFF3Record gff6 = new GFF3Record(); - gff6.setSeqId("chrX"); - gff6.setStart(100026); - gff6.setEnd(100040); - - List gffs = new ArrayList(); - gffs.add(gff1); - gffs.add(gff2); - gffs.add(gff3); - gffs.add(gff4); - gffs.add(gff5); - gffs.add(gff6); - Iterator iter = gffs.iterator(); - - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr0", 0, iter, iter.next())); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 0, iter, gff1)); - -// Assert.assertEquals(gff1, WiggleFromPileup.getGffRecord()); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 1, iter, gff1)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 10, iter, gff1)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 11, iter, gff1)); - // iterator should have been advanced - Assert.assertEquals(gff2, WiggleFromPileup.getGffRecord()); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 20, iter, gff2)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 21, iter, gff2)); - // iterator should have been advanced - Assert.assertEquals(gff3, WiggleFromPileup.getGffRecord()); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 29, iter, gff3)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 31, iter, gff3)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr1", 40, iter, gff3)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 41, iter, gff3)); - // iterator should have been advanced - Assert.assertEquals(gff4, WiggleFromPileup.getGffRecord()); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 141, iter, gff4)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 142, iter, gff4)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr1", 1000142, iter, gff4)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr2", 1, iter, gff4)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr2", 2, iter, gff4)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 15, iter, gff4)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 25, iter, gff4)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 26, iter, gff4)); - // iterator should have been advanced - Assert.assertEquals(gff5, WiggleFromPileup.getGffRecord()); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chr2", 40, iter, gff5)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr2", 41, iter, gff5)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr3", 15, iter, gff5)); - // iterator should have been advanced - Assert.assertEquals(gff6, WiggleFromPileup.getGffRecord()); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr3", 10015, iter, gff6)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr4", 10015, iter, gff6)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr5", 10015, iter, gff6)); - Assert.assertEquals(false, WiggleFromPileup.isPositionInBait("chr15", 10015, iter, gff6)); - Assert.assertEquals(true, WiggleFromPileup.isPositionInBait("chrX", 100026, iter, gff6)); - - } - - @Test - public final void callWithNoArgs() throws Exception { - String command = ""; - Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileup"); - assertTrue(1 == exec.getErrCode()); - assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); - assertTrue(0 < exec.getErrorStreamConsumer().getLines().length); - } - - @Test - public final void callWithNoInputFile() throws Exception { - String command = "-log ./logfile -o " + tempFolder.getRoot().getAbsolutePath(); - Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileup"); - assertTrue(1 == exec.getErrCode()); - assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); - assertTrue(0 < exec.getErrorStreamConsumer().getLines().length); - } - - @Test - public final void callWithMissingArgs() throws Exception { - String command = "-log ./logfile -o blah.wiggle -i " + pileupFile.getAbsolutePath(); - Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileup"); - assertTrue(1 == exec.getErrCode()); - assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); - assertTrue(0 < exec.getErrorStreamConsumer().getLines().length); - } - - @Test - public final void callWithValidArguments() throws Exception { - ExpectedException.none(); - String command = "-log ./logfile -pileupFormat NNTT -normalCoverage 1 -tumourCoverage 1 -i " + pileupFile.getAbsolutePath() - + " -i " + gff3File.getAbsolutePath() - + " -o " + wiggleFile.getAbsolutePath(); - Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileup"); - assertEquals(0, exec.getErrCode()); - assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); - - // check the wiggle file - InputStream reader = new FileInputStream(wiggleFile); - assertEquals(29, examineWiggle(reader)); - } - - @Test - public final void callWithValidArgumentsLargeCoverage() throws Exception { - ExpectedException.none(); - String command = "-log ./logfile -pileupFormat NNTT -normalCoverage 50 -tumourCoverage 50 -i " + pileupFile.getAbsolutePath() - + " -i " + gff3File.getAbsolutePath() - + " -o " + wiggleFile.getAbsolutePath(); - Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileup"); - assertEquals(0, exec.getErrCode()); - assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); - - // check the wiggle file - InputStream reader = new FileInputStream(wiggleFile); - assertEquals(0, examineWiggle(reader)); - } - - @Test - public final void callWithZippedFiles() throws Exception { - ExpectedException.none(); - String command = "-log ./logfile -pileupFormat NNTT -normalCoverage 20 -tumourCoverage 20 -i " + pileupFileGZIP.getAbsolutePath() - + " -i " + gff3File.getAbsolutePath() - + " -o " + wiggleFileGZIP.getAbsolutePath(); - Executor exec = new Executor(command, "org.qcmg.qmule.WiggleFromPileup"); - assertEquals(0, exec.getErrCode()); - assertTrue(0 == exec.getOutputStreamConsumer().getLines().length); - - // check the wiggle file - InputStream reader = new GZIPInputStream(new FileInputStream(wiggleFileGZIP)); - assertEquals(14, examineWiggle(reader)); - } - - private int examineWiggle(InputStream reader) throws IOException { - int count = 0; - BufferedReader fr = new BufferedReader(new InputStreamReader(reader)); - String line = fr.readLine(); // first line has the header - while ((line = fr.readLine()) != null) { - if (line.startsWith("fixedStep")) continue; - count += Integer.parseInt(line); - } - return count; - } - - private void createPileupFile(File pileupFile) throws IOException { - - OutputStream os = FileUtils.isFileNameGZip(pileupFile) ? new GZIPOutputStream( new FileOutputStream(pileupFile)) - : new FileOutputStream(pileupFile); - -// OutputStream os = new FileOutputStream(pileupFile); - PrintStream ps = new PrintStream(os); - - ps.println("chr1\t14923\tG\t8\t.......^!.\tIIIIIIIE\t7\t,.....^!.\t5IIIIIE\t10\t.........^T.\t0IIIIIIIIE\t7\t...,...\tIIIIIII"); - ps.println("chr1\t14924\tA\t9\t........^!.\tEI@III?IB\t7\t,......\t@IIIIII\t10\t..........\t-IIIIIIIII\t8\t...,...^!.\tIIII/IIB"); - ps.println("chr1\t14925\tA\t11\t.........^!.^P.\tIIDIIIHIEEE\t8\t,......^N.\tBIIIIIIE\t10\t..........\t)IIIIIIIII\t8\t...,....\tIII:4IIE"); - ps.println("chr1\t14926\tT\t11\t...........\tDIIIIIIIIII\t8\t,.......\t9IIIIIII\t10\t..........\t-IIIIIIIII\t8\t...,....\tIIH;DIII"); - ps.println("chr1\t14927\tT\t11\t...........\tDIIIIIIIIII\t8\t,.......\t8IIIIIII\t11\t..........^O.\t&FIIIIIIIIE\t8\t...,....\tII:>IIII"); - ps.println("chr1\t14928\tA\t11\t...........\tIIIIIIIIIII\t9\t,.......^(.\tGAIIIIIIE\t12\t...........^G.\t&CIBIIII9IIE\t8\t...,....\tII;0DIII"); - ps.println("chr1\t14929\tC\t11\t...........\tIIII\t9\t,........\tB37%I7III\t12\t............\t9FI77IIIIIII\t8\t...,....\t?I;>4I7I"); - ps.println("chr1\t14932\tG\t11\t...........\tI=IIIIIIIII\t9\t,........\t?@IIIIIII\t12\t............\t>IIIIIIIIIII\t8\t...,....\t?ICI@III"); - ps.println("chr1\t14933\tG\t11\t...........\tEAIIIIDIIII\t9\t,........\tD8III?III\t12\t............\t3EIIIIIIIIII\t9\t...,....^L.\t8I9HIIIIE"); - ps.println("chr1\t14934\tT\t11\t...........\t9I>IIIIIIFIIIIE\t9\t,........\tHCIIIIIII\t12\t............\t*IIIIIIIIIII\t9\t...,.....\tIII7IIIII"); - ps.println("chr1\t14936\tC\t12\t............\tI@IIIIIIIIII\t9\t,........\tBIIDIIIII\t12\t............\t8GIIIIIIIIII\t9\t...,.....\tIII,BIIII"); - ps.println("chr1\t14937\tT\t12\t............\tIIIIIIIIIIII\t9\t,........\t8IIIIFIII\t12\t............\t:IIIIIIIIIII\t9\t...,.....\tBII?)IIII"); - ps.println("chr1\t14938\tG\t12\t....$........\t%=I1II6IFIII\t9\t,........\tD%IIB/IHI\t12\t............\t3II>IIIIIIHI\t9\t...,.....\t0IAI/I?II"); - ps.println("chr1\t14939\tG\t11\t...........\t%@IHI:IIIHI\t9\t,........\tI%II@CIDI\t12\t............\t7IICIIIIII9A\t9\t...,.....\t1IAI;I9II"); - ps.println("chr1\t14940\tC\t11\t...........\t:IF?I-IIIII\t9\t,........\tF+II+IIII\t12\t......$......\t2%I%A>I>IIIA\t9\t...,.....\t3?)G:III"); - ps.println("chr1\t14944\tG\t11\t.....C.....\t(//AI%IIIFI\t9\t,$........\tI=II%ICIII\t8\t.$.......\t2II@6IBI\t9\t.........\t?:16IIB=,\t8\t..,.....\t9/%&>CI0"); - ps.println("chr1\t14946\tG\t11\t...........\t3I>II%I@I(I\t7\t.......\tIICIIII\t9\t.........\t4ID?II@GD\t8\t..,.....\tI@%;HIII"); - ps.println("chr1\t14947\tC\t11\t...$........\tDI?IIAIDI(I\t7\t.......\tIIIIIII\t9\t.$.....N$..\tEI58II!(B\t8\t..,.....\tI@C?IIII"); - ps.println("chr1\t14948\tG\t10\t.$.$........\t=;-%3I6I"); - ps.println("chr1\t14949\tG\t8\t.......$.\t5%6I>I%D\t7\t.......\tBI:%I;B\t6\t......\t*1,:0%\t7\t.$.,....\t'1I59;'"); - ps.println("chr1\t14950\tG\t7\t.$......\t?H3B+B7\t7\t.$......\t:+%%D7@\t6\t......\t%-%50%\t6\t.,....\t-I3'C'"); - ps.println("chr1\t14951\tC\t6\t......\tG2=+95\t6\t......\t)%%A6C\t6\t......\t%9%C89\t6\t.,....\t8H6(=%"); - - ps.close(); - os.close(); - } - - private void createGFF3File(File pileupFile) throws IOException { - - OutputStream os = FileUtils.isFileNameGZip(pileupFile) ? new GZIPOutputStream( new FileOutputStream(pileupFile)) - : new FileOutputStream(pileupFile); - -// OutputStream os = new FileOutputStream(pileupFile); - PrintStream ps = new PrintStream(os); - - - ps.println("##gff-version 3"); - ps.println("# Created by: simple_segmenter.pl[v2940]"); - ps.println("# Created on: Tue May 24 01:48:54 2011"); - ps.println("# Commandline: -v -g -l -i SureSelect_All_Exon_50mb_filtered_exons_1-200_20110524.gff3 -o SureSelect_All_Exon_50mb_filtered_exons_1-200_20110524_shoulders.gff3 -f exon,100,100,100 -f highexon,300 -f lowexon"); - ps.println("chr1 simple_segmenter.pl[v2940] fill 1 14166 . . . ID=gnl|fill"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 14167 14266 . + . ID=gnl|exon_3_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 14267 14366 . + . ID=gnl|exon_2_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 14367 14466 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 14467 14587 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 14588 14638 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 14639 14883 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon 14884 14942 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 14943 15064 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 15065 15164 . + . ID=gnl|exon_1_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 15165 15264 . + . ID=gnl|exon_2_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 15265 15364 . + . ID=gnl|exon_3_100"); - ps.println("chr1 simple_segmenter.pl[v2940] fill 15365 15370 . . . ID=gnl|fill"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 15371 15470 . + . ID=gnl|exon_3_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 15471 15570 . + . ID=gnl|exon_2_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 15571 15670 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 15671 15990 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 15991 16090 . + . ID=gnl|exon_1_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 16091 16190 . + . ID=gnl|exon_2_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 16191 16390 . + . ID=gnl|exon_3_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 16391 16490 . + . ID=gnl|exon_2_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 16491 16590 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 16591 16719 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 16720 16749 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 16750 17074 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 17075 17177 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 17178 17420 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 17421 17442 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 17443 18108 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 18109 18202 . + . ID=gnl|exon_1_100"); - ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed exon 18203 18448 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_1_100 18449 18548 . + . ID=gnl|exon_1_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_2_100 18549 18648 . + . ID=gnl|exon_2_100"); - ps.println("chr1 simple_segmenter.pl[v2940] exon_3_100 18649 18848 . + . ID=gnl|exon_3_100"); -// ps.println("##gff-version 3"); -// ps.println("# Created by: simple_segmenter.pl[v2940]"); -// ps.println("# Created on: Tue May 24 01:48:54 2011"); -// ps.println("# Commandline: -v -g -l -i SureSelect_All_Exon_50mb_filtered_baits_1-200_20110524.gff3 -o SureSelect_All_Exon_50mb_filtered_baits_1-200_20110524_shoulders.gff3 -f bait,100,100,100 -f highbait,300 -f lowbait"); -// ps.println("chr1 simple_segmenter.pl[v2940] fill 1 14166 . . . ID=gnl|fill"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 14167 14266 . + . ID=gnl|bait_3_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 14267 14366 . + . ID=gnl|bait_2_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 14367 14466 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 14467 14587 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 14588 14638 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 14639 14883 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 14884 14942 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 14943 15064 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 15065 15164 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 15165 15264 . + . ID=gnl|bait_2_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 15265 15364 . + . ID=gnl|bait_3_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] fill 15365 15370 . . . ID=gnl|fill"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 15371 15470 . + . ID=gnl|bait_3_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 15471 15570 . + . ID=gnl|bait_2_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 15571 15670 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 15671 15990 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 15991 16090 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 16091 16190 . + . ID=gnl|bait_2_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 16191 16390 . + . ID=gnl|bait_3_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 16391 16490 . + . ID=gnl|bait_2_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 16491 16590 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 16591 16719 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 16720 16749 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 16750 17074 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 17075 17177 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 17178 17420 . + . ID=ens|ENST00000423562,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 17421 17442 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 17443 18108 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 18109 18202 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 SureSelect_All_Exon_50mb_with_annotation.hg19.bed bait 18203 18448 . + . ID=ens|ENST00000423562,ens|ENST00000430492,ens|ENST00000438504,ens|ENST00000488147,ref|NR_024540,ref|WASH7P"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_1_100 18449 18548 . + . ID=gnl|bait_1_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_2_100 18549 18648 . + . ID=gnl|bait_2_100"); -// ps.println("chr1 simple_segmenter.pl[v2940] bait_3_100 18649 18848 . + . ID=gnl|bait_3_100"); - - ps.close(); - os.close(); - } -} diff --git a/qmule/test/org/qcmg/qmule/snppicker/CompareSnpsTest.java-- b/qmule/test/org/qcmg/qmule/snppicker/CompareSnpsTest.java-- deleted file mode 100644 index c8a332287..000000000 --- a/qmule/test/org/qcmg/qmule/snppicker/CompareSnpsTest.java-- +++ /dev/null @@ -1,70 +0,0 @@ -package org.qcmg.qmule.snppicker; - - -import org.junit.Assert; -import org.junit.Ignore; -import org.junit.Test; -import org.qcmg.common.util.SnpUtils; -import org.qcmg.qmule.tab.TabbedRecord; - -public class CompareSnpsTest { - - @Test - public void testIsStopNonSynonymous() { - try { - CompareSnps.isStopNonSynonymous(null, -1); - Assert.fail("should have thrown a wobbly"); - } catch (IllegalArgumentException e) {} - - TabbedRecord tr = new TabbedRecord(); - try { - CompareSnps.isStopNonSynonymous(tr, -1); - Assert.fail("should have thrown a wobbly"); - } catch (IllegalArgumentException e) {} - - tr.setData(""); - Assert.assertFalse(CompareSnps.isStopNonSynonymous(tr, -1)); - tr.setData("1\t2\t3\t4\t5"); - Assert.assertFalse(CompareSnps.isStopNonSynonymous(tr, -1)); - tr.setData("1\t2\t3\t4\t5\tSTOP\t7\t8"); - Assert.assertFalse(CompareSnps.isStopNonSynonymous(tr, -1)); - Assert.assertTrue(CompareSnps.isStopNonSynonymous(tr, 5)); - tr.setData("1\t2\t3\t4\t5\t6\t7\t8\tNON_SYNONYMOUS"); - Assert.assertTrue(CompareSnps.isStopNonSynonymous(tr, -1)); - Assert.assertFalse(CompareSnps.isStopNonSynonymous(tr, 5)); - - } - - @Ignore - public void testIsClassAB() { - try { - CompareSnps.isClassAB(null, -1); - Assert.fail("should have thrown a wobbly"); - } catch (IllegalArgumentException e) {} - - TabbedRecord tr = new TabbedRecord(); - try { - CompareSnps.isClassAB(tr, -1); - Assert.fail("should have thrown a wobbly"); - } catch (IllegalArgumentException e) {} - - tr.setData(""); - Assert.assertFalse(CompareSnps.isClassAB(tr, -1)); - tr.setData("1\t2\t3\t4\t5"); - Assert.assertFalse(CompareSnps.isClassAB(tr, -1)); - tr.setData("1\t2\t3\t4\t5\tSTOP\t7\t8"); - Assert.assertFalse(CompareSnps.isClassAB(tr, -1)); - Assert.assertFalse(CompareSnps.isClassAB(tr, 5)); - tr.setData("1\t2\t3\t4\t5\t6\t7\t8\tNON_SYNONYMOUS"); - Assert.assertFalse(CompareSnps.isClassAB(tr, -1)); - Assert.assertFalse(CompareSnps.isClassAB(tr, 5)); - - tr.setData("1\t2\t3\t4\t5\t6\t7\t8\t--"); - Assert.assertTrue(CompareSnps.isClassAB(tr, -1)); - tr.setData("1\t2\t3\t" + SnpUtils.LESS_THAN_3_READS_NORMAL + "\t5\t6\t7\t8\t--"); - Assert.assertTrue(CompareSnps.isClassAB(tr, 3)); - Assert.assertFalse(CompareSnps.isClassAB(tr, 4)); - - } - -} diff --git a/qmule/test/org/qcmg/qmule/snppicker/GatkUniqueSnpsTest.java-- b/qmule/test/org/qcmg/qmule/snppicker/GatkUniqueSnpsTest.java-- deleted file mode 100644 index 7e4b342e7..000000000 --- a/qmule/test/org/qcmg/qmule/snppicker/GatkUniqueSnpsTest.java-- +++ /dev/null @@ -1,154 +0,0 @@ -package org.qcmg.qmule.snppicker; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import htsjdk.samtools.SAMRecord; - -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.qcmg.common.util.SnpUtils; -import org.qcmg.common.vcf.header.VcfHeaderUtils; -import org.qcmg.pileup.QSnpRecord; - -public class GatkUniqueSnpsTest { - - - private static List samRecords = new ArrayList(); - - @Before - public void setup() throws IOException { - SAMRecord record = new SAMRecord(null); - record.setAlignmentStart(100); - record.setReferenceName("chr1"); - record.setReadBases(new byte[] {'A', 'C', 'G', 'T', 'A','A','A','A','A','A','A','A','A'}); - samRecords.add(record); - - for (int i = 1 ; i < 12 ; i++) { - record = new SAMRecord(null); - record.setAlignmentStart(100+i); - record.setReferenceName("chr1"); - record.setReadBases(new byte[] {'A', 'A', 'A', 'A', 'A','A','A','A','A','A','A','A','A'}); - samRecords.add(record); - } - } - - @Test - public void testFailingRead() throws Exception { - SAMRecord record = new SAMRecord(null); - record.setReferenceName("chr1"); - record.setAlignmentStart(168512433); -// record.setAlignmentEnd(168512486); - record.setCigarString("7M4D43M"); - record.setReadString("AGCTGGTATTGCACATGGTGTGGACCCCATCAAGCTGGTTAACTTTCTGN"); - List records = new ArrayList(); - records.add(record); - - QSnpRecord qpr = new QSnpRecord("chr1", 168512486, "G"); - qpr.setAlt("C"); - - GatkUniqueSnps.examinePileup(records, qpr); - - Assert.assertNotNull(qpr.getAnnotation()); - Assert.assertFalse(qpr.getAnnotation().contains("mutation also found in pileup of normal")); - Assert.assertTrue(qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_COVERAGE)); - } - - @Test - public void testFailingRead2() throws Exception{ - SAMRecord record = new SAMRecord(null); - record.setReferenceName("chr1"); - record.setAlignmentStart(55524198); - record.setCigarString("1H49M"); - record.setReadString("TGGTCAGCACACTGGGGGCCTACACGGATGGCCACAGCCATCGCCCGCT"); - List records = new ArrayList(); - records.add(record); - - record = new SAMRecord(null); - record.setReferenceName("chr1"); - record.setAlignmentStart(55524210); - record.setCigarString("13H37M"); - record.setReadString("TCGGGGCCTACACGGATGGCCACAGCCATCGCCCGCT"); - records.add(record); - - record = new SAMRecord(null); - record.setReferenceName("chr1"); - record.setAlignmentStart(55524212); - record.setCigarString("10H40M"); - record.setReadString("GGGGCCTACACGGATGGCCACAGCCATCGCCCGCTGCGCC"); - records.add(record); - - record = new SAMRecord(null); - record.setReferenceName("chr1"); - record.setAlignmentStart(55524218); - record.setCigarString("2H48M"); - record.setReadString("TACACGGATGGCCACAGCCGTCGCCCGCTGCGCCCCAGATGAGGAGCT"); - records.add(record); - - record = new SAMRecord(null); - record.setReferenceName("chr1"); - record.setAlignmentStart(55524228); - record.setCigarString("4M6D21M"); - record.setReadString("GCCATCGCCCGCTGCGCCCCAGATG"); - records.add(record); - - QSnpRecord qpr = new QSnpRecord("chr1", 55524237, "G"); - qpr.setAlt("A"); - - GatkUniqueSnps.examinePileup(records, qpr); - - Assert.assertNotNull(qpr.getAnnotation()); - Assert.assertTrue(qpr.getAnnotation().contains(SnpUtils.MUTATION_IN_NORMAL)); - Assert.assertTrue(qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_COVERAGE)); - } - - - @Test - public void testExaminePileup() throws Exception { - QSnpRecord qpr = new QSnpRecord("chr1", 101, "G"); - qpr.setAlt("C"); - - GatkUniqueSnps.examinePileup(samRecords.subList(0,1), qpr); - - Assert.assertNotNull(qpr.getAnnotation()); - Assert.assertTrue(qpr.getAnnotation().contains(SnpUtils.MUTATION_IN_NORMAL)); - Assert.assertTrue(qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_COVERAGE)); - - qpr = new QSnpRecord("chr1", 102, "G"); - qpr.setAlt("C"); - - GatkUniqueSnps.examinePileup(samRecords.subList(0, 1), qpr); - - Assert.assertNotNull(qpr.getAnnotation()); - Assert.assertFalse(qpr.getAnnotation().contains(SnpUtils.MUTATION_IN_NORMAL)); - Assert.assertTrue(qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_COVERAGE)); - - qpr = new QSnpRecord("chr1", 110, "A"); - qpr.setAlt("G"); - - GatkUniqueSnps.examinePileup(samRecords.subList(0, 10), qpr); - - Assert.assertNotNull(qpr.getAnnotation()); - Assert.assertFalse(qpr.getAnnotation().contains(SnpUtils.MUTATION_IN_NORMAL)); - Assert.assertTrue(qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_COVERAGE)); - - qpr = new QSnpRecord("chr1", 112, "A"); - qpr.setAlt("G"); - - GatkUniqueSnps.examinePileup(samRecords, qpr); - Assert.assertNull(qpr.getAnnotation()); - - qpr = new QSnpRecord("chr1", 112, "G"); - qpr.setAlt("A"); - - GatkUniqueSnps.examinePileup(samRecords, qpr); - Assert.assertNotNull(qpr.getAnnotation()); - Assert.assertTrue(qpr.getAnnotation().contains(SnpUtils.MUTATION_IN_NORMAL)); - Assert.assertFalse(qpr.getAnnotation().contains(VcfHeaderUtils.FILTER_COVERAGE)); - } - - - -} diff --git a/qmule/test/org/qcmg/qmule/util/IGVBatchFileGeneratorTest.java-- b/qmule/test/org/qcmg/qmule/util/IGVBatchFileGeneratorTest.java-- deleted file mode 100644 index 6ea4f2382..000000000 --- a/qmule/test/org/qcmg/qmule/util/IGVBatchFileGeneratorTest.java-- +++ /dev/null @@ -1,73 +0,0 @@ -package org.qcmg.qmule.util; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -import junit.framework.Assert; - -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; - -public class IGVBatchFileGeneratorTest { - - @Rule - public TemporaryFolder tempFolder = new TemporaryFolder(); - - @Test - public void testGenerate() throws IOException { - try { - IGVBatchFileGenerator.generate(null, null); - Assert.fail("Should not have reached here"); - } catch (IllegalArgumentException iae) {} - try { - IGVBatchFileGenerator.generate(null, ""); - Assert.fail("Should not have reached here"); - } catch (IllegalArgumentException iae) {} - try { - IGVBatchFileGenerator.generate(Collections.EMPTY_LIST, ""); - Assert.fail("Should not have reached here"); - } catch (IllegalArgumentException iae) {} - - // create a temp File - File tmpOutput = tempFolder.newFile("testGenerate.igv.batch"); - try { - IGVBatchFileGenerator.generate(Collections.EMPTY_LIST, tmpOutput.getAbsolutePath()); - Assert.fail("Should not have reached here"); - } catch (IllegalArgumentException iae) {} - - List positions = new ArrayList(); - positions.add(ChrPointPosition.valueOf("chr1", 1)); - positions.add(ChrPointPosition.valueOf("chr2", 1234567890)); - - IGVBatchFileGenerator.generate(positions, tmpOutput.getAbsolutePath()); - - //read in contents of file - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(tmpOutput))); - List fileContents = new ArrayList(); - String line = null; - while ((line = reader.readLine()) != null) { - fileContents.add(line); - } - reader.close(); - - Assert.assertEquals("snapshotDirectory " + tmpOutput.getParent() , fileContents.get(0)); - Assert.assertEquals("genome " + IGVBatchFileGenerator.GENOME, fileContents.get(1)); - Assert.assertEquals("goto chr1:1-1", fileContents.get(2)); - Assert.assertEquals("sort base", fileContents.get(3)); - Assert.assertEquals("collapse", fileContents.get(4)); - Assert.assertEquals("snapshot chr1:1.png", fileContents.get(5)); - Assert.assertEquals("goto chr2:1234567890-1234567890", fileContents.get(6)); - Assert.assertEquals("snapshot chr2:1234567890.png", fileContents.get(9)); - - } - -} diff --git a/qmule/test/org/qcmg/qmule/util/TabbedDataLoaderTest.java-- b/qmule/test/org/qcmg/qmule/util/TabbedDataLoaderTest.java-- deleted file mode 100644 index 213d0f15c..000000000 --- a/qmule/test/org/qcmg/qmule/util/TabbedDataLoaderTest.java-- +++ /dev/null @@ -1,21 +0,0 @@ -package org.qcmg.qmule.util; - -import junit.framework.Assert; - -import org.junit.Test; - -public class TabbedDataLoaderTest { - - @Test - public void testGetStringFromArray() { - Assert.assertNull(TabbedDataLoader.getStringFromArray(null, -1)); - Assert.assertNull(TabbedDataLoader.getStringFromArray(new String[] {}, -1)); - Assert.assertNull(TabbedDataLoader.getStringFromArray(new String[] {}, 0)); - Assert.assertEquals("Hello", TabbedDataLoader.getStringFromArray(new String[] {"Hello"}, 0)); - Assert.assertEquals("Hello", TabbedDataLoader.getStringFromArray(new String[] {"Hello"}, -1)); - Assert.assertNull(TabbedDataLoader.getStringFromArray(new String[] {"Hello"}, -10)); - Assert.assertEquals("there", TabbedDataLoader.getStringFromArray(new String[] {"Hello", "there"}, -1)); - Assert.assertEquals("there", TabbedDataLoader.getStringFromArray(new String[] {"Hello", "1", "2", "3", "there"}, -1)); - Assert.assertEquals("1", TabbedDataLoader.getStringFromArray(new String[] {"Hello", "1", "2", "3", "there"}, 1)); - } -} From 7e63e83bb87d27a97456563632e5164f6d9f5230 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 25 Nov 2020 21:50:33 +1000 Subject: [PATCH 32/73] create unit test for new illumuina --- .../org/qcmg/illumina/IlluminaReaderTest.java | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 qio/test/org/qcmg/illumina/IlluminaReaderTest.java diff --git a/qio/test/org/qcmg/illumina/IlluminaReaderTest.java b/qio/test/org/qcmg/illumina/IlluminaReaderTest.java new file mode 100644 index 000000000..e4e09207a --- /dev/null +++ b/qio/test/org/qcmg/illumina/IlluminaReaderTest.java @@ -0,0 +1,119 @@ +package org.qcmg.qio.illumina; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; + +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + + +public class IlluminaSerializerTest { + private static String invalidInputString = "chr12 126890980 - rs1000000 0.8379 C C hom [T/C]"; + + private static final String rawInputString = "rs10002311 4802094023_R02C01 A A " + + "0.7992 14 93259 0 A A T T A A 4 77661528 " + + "0.7895 0.5494 [T/G] BOT TOP 0.109 0.409 0.349 0.060 4887 1511 0.0290 0.1235"; + + private static final String rawInputString2 = "rs6680706 5760640025_R02C01 A G " + + "0.7956 70 900225 0 T C T C A B 1 4231843 " + + "0.7872 1.0000 [T/C] BOT BOT 0.515 1.911 0.933 0.977 12110 13151 0.4745 0.0471"; + + + + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + + @Test + public void testParseIDInvalid() throws Exception { + + File illuminaFile = tmpFolder.newFile("illumina"); + IlluminaFileReader reader = new IlluminaFileReader(illuminaFile); + + // test empty string + try { + reader.getRecord(""); + Assert.fail("Should have thrown an Exception"); + } catch (Exception e) {} + + // string that does not start with 'chr' + try { + reader.getRecord("testing testing 123"); + Assert.fail("Should have thrown an Exception"); + } catch (Exception e) {} + + // string containing 'chr' but not at the start.. + try { + reader.getRecord("this is a chr1 test"); + Assert.fail("Should have thrown an Exception"); + } catch (Exception e) {} + + // string that is not the right length + try { + reader.getRecord(invalidInputString); + Assert.fail("Should have thrown an Exception"); + } catch (Exception e) { + Assert.assertEquals(true, e.getMessage().startsWith("Bad Illumina data format")); + } + } + + + @Test + public void testParseRecords() throws Exception { + + File illuminaFile = tmpFolder.newFile("illumina"); + IlluminaFileReader reader = new IlluminaFileReader(illuminaFile); + + // real record + // inputString = "chr12 126890980 - rs1000000 0.8379 C C hom [T/C] G__C/T"; + IlluminaRecord record = reader.getRecord(rawInputString); + Assert.assertNotNull(record); + Assert.assertEquals("4", record.getChr()); // we now parse chr and position for Illumina records + Assert.assertEquals(77661528, record.getStart()); + Assert.assertEquals("rs10002311", record.getSnpId()); + Assert.assertEquals(0.7992f, record.getGCScore(), 0.00000); + Assert.assertEquals('T', record.getFirstAllele()); + Assert.assertEquals('T', record.getSecondAllele()); + Assert.assertTrue(record.isHom()); + Assert.assertEquals("[T/G]", record.getSnp()); + } + + @Test + public void testNextRecord() throws Exception { + // create tmp illumina file + File illuminaFile = tmpFolder.newFile("illumina"); + generateIllumiaFile(illuminaFile); + IlluminaFileReader reader = new IlluminaFileReader(illuminaFile); + + //only one record + for(IlluminaRecord rec: reader) { + Assert.assertNotNull(rec); + Assert.assertEquals("cnvi0000657", rec.getSnpId()); + } + + + } + + private void generateIllumiaFile(File file) throws IOException { + FileWriter writer = new FileWriter(file); + writer.write("[Header]\n"); + writer.write("GSGT Version 1.8.4\n"); + writer.write("Processing Date 8/12/2011 8:41 PM\n"); + writer.write("Content HumanOmni1-Quad_v1-0_H.bpm\n"); + writer.write("Num SNPs 1134514\n"); + writer.write("Total SNPs 1134514\n"); + writer.write("Num Samples 259\n"); + writer.write("Total Samples 260\n"); + writer.write("File 77 of 259\n"); + writer.write("[Data]\n"); + writer.write("SNP Name Sample ID Allele1 - Top Allele2 - Top GC Score Sample Name Sample Group Sample Index SNP Index SNP Aux Allele1 - Forward Allele2 - Forward Allele1 - Design Allele2 - Design Allele1 - AB Allele2 - AB Chr Position GT Score Cluster Sep SNP ILMN Strand Customer Strand Top Genomic Sequence Theta R X Y" ++ "X Raw Y Raw B Allele Freq Log R Ratio\n"); + writer.write("cnvi0000657 5636391030_R02C01 - - 0.0000 78 127 0 - - - - - - 6 160513181 0.0000 0.0000 [A/G] TOP TOP\t\t" + + "0.021 1.675 1.621 0.054 17348 1140 0.0100 -0.1054\n"); + writer.flush(); + } +} From 2f015ecfea9696388299de4a116d7a6ac7798a50 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 25 Nov 2020 21:54:30 +1000 Subject: [PATCH 33/73] fix bug in illumina --- qio/src/org/qcmg/qio/illumina/IlluminaFileReader.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/qio/src/org/qcmg/qio/illumina/IlluminaFileReader.java b/qio/src/org/qcmg/qio/illumina/IlluminaFileReader.java index 549527ea9..3b29c5684 100644 --- a/qio/src/org/qcmg/qio/illumina/IlluminaFileReader.java +++ b/qio/src/org/qcmg/qio/illumina/IlluminaFileReader.java @@ -18,7 +18,8 @@ public IlluminaFileReader(File file) throws IOException { super(file, DEFAULT_BUFFER_SIZE, HEADER_LINE, DEFAULT_CHARSET); } - public String readHeader(CharSequence headerPrefix ) throws IOException { + @Override + public String readHeaderAndReturnFirstNonHeaderLine(CharSequence headerPrefix ) throws IOException { String nextLine = bin.readLine(); //empty file @@ -49,7 +50,7 @@ public IlluminaRecord getRecord(String line) { // raw Illumina data has 32 fields... and the first one is an integer if (dataArray.length != 32) { - throw new IllegalArgumentException("Bad Illumina data format - expecting 32 fields but saw " + dataArray.length); + throw new IllegalArgumentException("Bad Illumina data format - expecting 32 fields but saw " + dataArray.length + ":\n " + line); } return new IlluminaRecord( dataArray ); From be0389ef5592faaeb98ef3d91aad75297dc5434d Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 25 Nov 2020 22:23:50 +1000 Subject: [PATCH 34/73] update package name for illumina test --- qio/test/org/qcmg/{ => qio}/illumina/IlluminaReaderTest.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) rename qio/test/org/qcmg/{ => qio}/illumina/IlluminaReaderTest.java (97%) diff --git a/qio/test/org/qcmg/illumina/IlluminaReaderTest.java b/qio/test/org/qcmg/qio/illumina/IlluminaReaderTest.java similarity index 97% rename from qio/test/org/qcmg/illumina/IlluminaReaderTest.java rename to qio/test/org/qcmg/qio/illumina/IlluminaReaderTest.java index e4e09207a..baa7e8bbb 100644 --- a/qio/test/org/qcmg/illumina/IlluminaReaderTest.java +++ b/qio/test/org/qcmg/qio/illumina/IlluminaReaderTest.java @@ -1,8 +1,6 @@ package org.qcmg.qio.illumina; -import java.io.BufferedReader; import java.io.File; -import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; @@ -12,7 +10,7 @@ import org.junit.rules.TemporaryFolder; -public class IlluminaSerializerTest { +public class IlluminaReaderTest { private static String invalidInputString = "chr12 126890980 - rs1000000 0.8379 C C hom [T/C]"; private static final String rawInputString = "rs10002311 4802094023_R02C01 A A " + From 6973ac2992e68c788b07ad373d4c762e57569ffa Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 25 Nov 2020 22:27:46 +1000 Subject: [PATCH 35/73] remove unused imports --- .../src/au/edu/qimr/tiledaligner/util/TiledAlignerUtil.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/q3tiledaligner/src/au/edu/qimr/tiledaligner/util/TiledAlignerUtil.java b/q3tiledaligner/src/au/edu/qimr/tiledaligner/util/TiledAlignerUtil.java index 71b5ff9f8..2198bf5bc 100644 --- a/q3tiledaligner/src/au/edu/qimr/tiledaligner/util/TiledAlignerUtil.java +++ b/q3tiledaligner/src/au/edu/qimr/tiledaligner/util/TiledAlignerUtil.java @@ -33,8 +33,6 @@ import org.qcmg.common.util.NumberUtils; import org.qcmg.qio.record.StringFileReader; -import org.qcmg.qmule.SmithWatermanGotoh; - import gnu.trove.list.TLongList; import gnu.trove.list.array.TLongArrayList; import gnu.trove.map.TIntObjectMap; From 2b5ea6c4da1886a06d2d30c11badd9edaf37122b Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 25 Nov 2020 22:27:59 +1000 Subject: [PATCH 36/73] remove unused imports --- q3panel/src/au/edu/qimr/panel/Q3Panel.java | 1 - 1 file changed, 1 deletion(-) diff --git a/q3panel/src/au/edu/qimr/panel/Q3Panel.java b/q3panel/src/au/edu/qimr/panel/Q3Panel.java index f6271c2e8..3ff36a1a3 100644 --- a/q3panel/src/au/edu/qimr/panel/Q3Panel.java +++ b/q3panel/src/au/edu/qimr/panel/Q3Panel.java @@ -92,7 +92,6 @@ import org.qcmg.qio.record.StringFileReader; import org.qcmg.vcf.VCFFileReader; import org.qcmg.qio.record.RecordWriter; -import org.qcmg.qmule.SmithWatermanGotoh; public class Q3Panel { From 4e45a5e280b35bf80623acb074277bbd6ab2c3f1 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 25 Nov 2020 22:29:00 +1000 Subject: [PATCH 37/73] mv old illumina test --- .../illumina/IlluminaSerializerTest.java | 131 ------------------ 1 file changed, 131 deletions(-) delete mode 100644 qio/test/org/qcmg/unused/illumina/IlluminaSerializerTest.java diff --git a/qio/test/org/qcmg/unused/illumina/IlluminaSerializerTest.java b/qio/test/org/qcmg/unused/illumina/IlluminaSerializerTest.java deleted file mode 100644 index 852f816ca..000000000 --- a/qio/test/org/qcmg/unused/illumina/IlluminaSerializerTest.java +++ /dev/null @@ -1,131 +0,0 @@ -package org.qcmg.unused.illumina; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; - -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - - -public class IlluminaSerializerTest { -// private static String inputString = "chr12 126890980 - rs1000000 0.8379 C C hom [T/C] G__C/T"; - private static String invalidInputString = "chr12 126890980 - rs1000000 0.8379 C C hom [T/C]"; - - private static final String rawInputString = "rs10002311 4802094023_R02C01 A A " + - "0.7992 14 93259 0 A A T T A A 4 77661528 " + - "0.7895 0.5494 [T/G] BOT TOP 0.109 0.409 0.349 0.060 4887 1511 0.0290 0.1235"; -// private static final String rawInputString2 = "rs10013427 4802094023_R02C01 G G " + -// "0.9080 14 94874 0 G G G G B B 4 169381756 " + -// "0.8710 1.0000 [A/G] TOP TOP 0.940 0.548 0.047 0.501 1145 11978 0.9675 -0.5171"; - - private static final String rawInputString2 = "rs6680706 5760640025_R02C01 A G " + - "0.7956 70 900225 0 T C T C A B 1 4231843 " + - "0.7872 1.0000 [T/C] BOT BOT 0.515 1.911 0.933 0.977 12110 13151 0.4745 0.0471"; - - - - @Rule - public TemporaryFolder tmpFolder = new TemporaryFolder(); - - @Test - public void testParseIDInvalid() throws Exception { - // test empty string - try { - IlluminaSerializer.parseData(""); - Assert.fail("Should have thrown an Exception"); - } catch (Exception e) {} - - // string that does not start with 'chr' - try { - IlluminaSerializer.parseData("testing testing 123"); - Assert.fail("Should have thrown an Exception"); - } catch (Exception e) {} - - // string containing 'chr' but not at the start.. - try { - IlluminaSerializer.parseData("this is a chr1 test"); - Assert.fail("Should have thrown an Exception"); - } catch (Exception e) {} - - // string that is not the right length - try { - IlluminaSerializer.parseData(invalidInputString); - Assert.fail("Should have thrown an Exception"); - } catch (Exception e) { - Assert.assertEquals(true, e.getMessage().startsWith("Bad Illumina data format")); - } - } - - @Test - public void testParseData() throws Exception { - String[] outputStringArray = null; - String outputString = ""; - - outputStringArray = IlluminaSerializer.parseData(rawInputString); - for (String s : outputStringArray) { - outputString += s + "\t"; - } - - // remove final tab - outputString = outputString.substring(0, outputString.length()-1); - - Assert.assertNotNull(outputStringArray); - Assert.assertEquals(rawInputString, outputString); - } - - - - @Test - public void testParseRecords() throws Exception { - // real record - // inputString = "chr12 126890980 - rs1000000 0.8379 C C hom [T/C] G__C/T"; - IlluminaRecord record = IlluminaSerializer.parseRecord(rawInputString); - Assert.assertNotNull(record); - Assert.assertEquals("4", record.getChr()); // we now parse chr and position for Illumina records - Assert.assertEquals(77661528, record.getStart()); - -// Assert.assertEquals('-', record.getStrand()); - Assert.assertEquals("rs10002311", record.getSnpId()); - Assert.assertEquals(0.7992f, record.getGCScore(), 0.00000); - Assert.assertEquals('T', record.getFirstAllele()); - Assert.assertEquals('T', record.getSecondAllele()); - Assert.assertTrue(record.isHom()); - Assert.assertEquals("[T/G]", record.getSnp()); -// Assert.assertEquals("G__C/T", record.getRefGenomeRefSNPAllele()); - } - - @Test - public void testNextRecord() throws Exception { - // create tmp illumina file - File illuminaFile = tmpFolder.newFile("illumina"); - generateIllumiaFile(illuminaFile); - BufferedReader reader = new BufferedReader(new FileReader(illuminaFile)); - IlluminaRecord rec = IlluminaSerializer.nextRecord(reader); - Assert.assertNotNull(rec); - Assert.assertEquals("cnvi0000657", rec.getSnpId()); - } - - private void generateIllumiaFile(File file) throws IOException { - FileWriter writer = new FileWriter(file); - writer.write("[Header]\n"); - writer.write("GSGT Version 1.8.4\n"); - writer.write("Processing Date 8/12/2011 8:41 PM\n"); - writer.write("Content HumanOmni1-Quad_v1-0_H.bpm\n"); - writer.write("Num SNPs 1134514\n"); - writer.write("Total SNPs 1134514\n"); - writer.write("Num Samples 259\n"); - writer.write("Total Samples 260\n"); - writer.write("File 77 of 259\n"); - writer.write("[Data]\n"); - writer.write("SNP Name Sample ID Allele1 - Top Allele2 - Top GC Score Sample Name Sample Group Sample Index SNP Index SNP Aux Allele1 - Forward Allele2 - Forward Allele1 - Design Allele2 - Design Allele1 - AB Allele2 - AB Chr Position GT Score Cluster Sep SNP ILMN Strand Customer Strand Top Genomic Sequence Theta R X Y" -+ "X Raw Y Raw B Allele Freq Log R Ratio\n"); - writer.write("cnvi0000657 5636391030_R02C01 - - 0.0000 78 127 0 - - - - - - 6 160513181 0.0000 0.0000 [A/G] TOP TOP\t\t" - + "0.021 1.675 1.621 0.054 17348 1140 0.0100 -0.1054\n"); - writer.flush(); - } -} From cd40a0ca679375edff5d59ef90489a3454af3b6a Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 25 Nov 2020 22:30:19 +1000 Subject: [PATCH 38/73] delete old illumina since it replace by a new one --- .../unused/illumina/IlluminaFileReader.java | 21 -- .../qcmg/unused/illumina/IlluminaRecord.java | 262 ------------------ .../illumina/IlluminaRecordIterator.java | 21 -- .../unused/illumina/IlluminaSerializer.java | 73 ----- 4 files changed, 377 deletions(-) delete mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java delete mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaRecord.java delete mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java delete mode 100644 qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java b/qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java deleted file mode 100644 index 5ef126ee3..000000000 --- a/qio/src/org/qcmg/unused/illumina/IlluminaFileReader.java +++ /dev/null @@ -1,21 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.illumina; - -import java.io.File; -import java.io.IOException; - -import org.qcmg.unused.reader.AbstractReader; - -public final class IlluminaFileReader extends AbstractReader { - - public IlluminaFileReader(final File file) throws IOException { - super(file); - } - - public IlluminaRecordIterator getRecordIterator() throws Exception { - return new IlluminaRecordIterator(inputStream); - } - -} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaRecord.java b/qio/src/org/qcmg/unused/illumina/IlluminaRecord.java deleted file mode 100644 index eca4e9837..000000000 --- a/qio/src/org/qcmg/unused/illumina/IlluminaRecord.java +++ /dev/null @@ -1,262 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.illumina; - -import org.qcmg.common.string.StringUtils; -import org.qcmg.qmule.record.Record; - -public class IlluminaRecord implements Record { - - private String chr; - private int start; - private final String strand; - private String snpId; - - //TODO do we need this field? - private float GCScore; - - private char firstAllele; - private char secondAllele; - - private final char firstAlleleForward; - private final char secondAlleleForward; - - private final char firstAlleleCall; - private final char secondAlleleCall; - - //TODO do we need this field? - private boolean hom; - private boolean isSnp; - private String snp; - - private final float logRRatio; - private final float bAlleleFreq; - - private final int rawX; - private final int rawY; - - /** - * Constructor that takes in a String array, retrieving pertinent fields from the array to populate the record - * - * @param rawIlluminaData String[] representing a line in the raw Illumina data file - */ - public IlluminaRecord(String [] rawIlluminaData) { - // chromosome and position defined in the raw Illumina data file relate to an old version - // of the genome (hg18), so instead, we use the dbSNP id to get the more recent - //(hg19) chromosome and position details from the dbSNP file at a later date - int length = rawIlluminaData.length; - snpId = rawIlluminaData[0]; - GCScore = Float.parseFloat(rawIlluminaData[4]); - firstAlleleForward = rawIlluminaData[10].charAt(0); - secondAlleleForward = rawIlluminaData[11].charAt(0); - firstAllele = rawIlluminaData[12].charAt(0); - secondAllele = rawIlluminaData[13].charAt(0); - setHom(rawIlluminaData[14].equals(rawIlluminaData[15])); - chr = rawIlluminaData[16]; - start = Integer.parseInt(rawIlluminaData[17]); - snp = rawIlluminaData[20]; - rawX = Integer.parseInt(rawIlluminaData[length - 4]); - rawY = Integer.parseInt(rawIlluminaData[length - 3]); - bAlleleFreq = Float.parseFloat(rawIlluminaData[length - 2]); - String logRRatioString = rawIlluminaData[length - 1]; - if (StringUtils.isNullOrEmpty(logRRatioString)) - logRRatioString = "NaN"; - logRRatio = Float.parseFloat(logRRatioString); - firstAlleleCall = rawIlluminaData[14].charAt(0); - secondAlleleCall = rawIlluminaData[15].charAt(0); - strand = rawIlluminaData[22]; // use customer strand rather than illumina strand -// strand = rawIlluminaData[21]; - } - - - public String getChr() { - return chr; - } - public void setChr(String chr) { - this.chr = chr; - } - public int getStart() { - return start; - } - public void setStart(int start) { - this.start = start; - } - public String getSnpId() { - return snpId; - } - public void setSnpId(String snpId) { - this.snpId = snpId; - } - public float getGCScore() { - return GCScore; - } - public void setGCScore(float GCScore) { - this.GCScore = GCScore; - } - public char getFirstAllele() { - return firstAllele; - } - public void setFirstAllele(char firstAllele) { - this.firstAllele = firstAllele; - } - public char getSecondAllele() { - return secondAllele; - } - public void setSecondAllele(char secondAllele) { - this.secondAllele = secondAllele; - } - public String getSnp() { - return snp; - } - public void setSnp(String snp) { - this.snp = snp; - } - public void setHom(boolean hom) { - this.hom = hom; - } - public boolean isHom() { - return hom; - } - - public void setSnp(boolean isSnp) { - this.isSnp = isSnp; - } - - public boolean isSnp() { - return isSnp; - } - - public float getLogRRatio() { - return logRRatio; - } - - - public float getbAlleleFreq() { - return bAlleleFreq; - } - - - public char getFirstAlleleCall() { - return firstAlleleCall; - } - - - public char getSecondAlleleCall() { - return secondAlleleCall; - } - - public int getRawX() { - return rawX; - } - - public int getRawY() { - return rawY; - } - - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + Float.floatToIntBits(GCScore); - result = prime * result + Float.floatToIntBits(bAlleleFreq); - result = prime * result + ((chr == null) ? 0 : chr.hashCode()); - result = prime * result + firstAllele; - result = prime * result + firstAlleleCall; - result = prime * result + (hom ? 1231 : 1237); - result = prime * result + (isSnp ? 1231 : 1237); - result = prime * result + Float.floatToIntBits(logRRatio); - result = prime * result + rawX; - result = prime * result + rawY; - result = prime * result + secondAllele; - result = prime * result + secondAlleleCall; - result = prime * result + ((snp == null) ? 0 : snp.hashCode()); - result = prime * result + ((snpId == null) ? 0 : snpId.hashCode()); - result = prime * result + start; - return result; - } - - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - IlluminaRecord other = (IlluminaRecord) obj; - if (Float.floatToIntBits(GCScore) != Float - .floatToIntBits(other.GCScore)) - return false; - if (Float.floatToIntBits(bAlleleFreq) != Float - .floatToIntBits(other.bAlleleFreq)) - return false; - if (chr == null) { - if (other.chr != null) - return false; - } else if (!chr.equals(other.chr)) - return false; - if (firstAllele != other.firstAllele) - return false; - if (firstAlleleCall != other.firstAlleleCall) - return false; - if (hom != other.hom) - return false; - if (isSnp != other.isSnp) - return false; - if (Float.floatToIntBits(logRRatio) != Float - .floatToIntBits(other.logRRatio)) - return false; - if (rawX != other.rawX) - return false; - if (rawY != other.rawY) - return false; - if (secondAllele != other.secondAllele) - return false; - if (secondAlleleCall != other.secondAlleleCall) - return false; - if (snp == null) { - if (other.snp != null) - return false; - } else if (!snp.equals(other.snp)) - return false; - if (snpId == null) { - if (other.snpId != null) - return false; - } else if (!snpId.equals(other.snpId)) - return false; - if (start != other.start) - return false; - return true; - } - - - @Override - public String toString() { - return "IlluminaRecord [GCScore=" + GCScore + ", bAlleleFreq=" - + bAlleleFreq + ", chr=" + chr + ", firstAllele=" + firstAllele - + ", firstAlleleCall=" + firstAlleleCall + ", hom=" + hom - + ", isSnp=" + isSnp + ", logRRatio=" + logRRatio + ", rawX=" - + rawX + ", rawY=" + rawY + ", secondAllele=" + secondAllele - + ", secondAlleleCall=" + secondAlleleCall + ", snp=" + snp - + ", snpId=" + snpId + ", start=" + start + "]"; - } - - - public String getStrand() { - return strand; - } - - - public char getFirstAlleleForward() { - return firstAlleleForward; - } - - public char getSecondAlleleForward() { - return secondAlleleForward; - } - - -} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java b/qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java deleted file mode 100644 index 872b77b99..000000000 --- a/qio/src/org/qcmg/unused/illumina/IlluminaRecordIterator.java +++ /dev/null @@ -1,21 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.illumina; - -import java.io.InputStream; - -import org.qcmg.qmule.record.AbstractRecordIterator; - -public class IlluminaRecordIterator extends AbstractRecordIterator { - - public IlluminaRecordIterator(InputStream stream) throws Exception { - super(stream); - } - - @Override - protected void readNext() throws Exception { - next = IlluminaSerializer.nextRecord(reader); - } - -} diff --git a/qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java b/qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java deleted file mode 100644 index 2bef61139..000000000 --- a/qio/src/org/qcmg/unused/illumina/IlluminaSerializer.java +++ /dev/null @@ -1,73 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.unused.illumina; - -import java.io.BufferedReader; -import java.io.IOException; - -import org.qcmg.common.util.TabTokenizer; - -public final class IlluminaSerializer { - private static final String HEADER_LINE = "[Header]"; - private static final String DATA_LINE = "[Data]"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - // header lines are as follows: - /* -[Header] -GSGT Version 1.8.4 -Processing Date 8/12/2011 8:41 PM -Content HumanOmni1-Quad_v1-0_H.bpm -Num SNPs 1134514 -Total SNPs 1134514 -Num Samples 259 -Total Samples 260 -File 77 of 259 -[Data] -SNP Name Sample ID Allele1 - Top Allele2 - Top GC Score Sample Name Sample Group Sample Index SNP Index SNP Aux Allele1 - Forward Allele2 - Forward Allele1 - Design Allele2 - Design Allele1 - AB Allele2 - AB Chr Position GT Score Cluster Sep SNP ILMN Strand Customer Strand Top Genomic Sequence Theta R X Y -X Raw Y Raw B Allele Freq Log R Ratio - */ - - String line = reader.readLine(); - if (null != line && line.startsWith(HEADER_LINE)) { - - // ignore header lines until we hit [DATA] - line = reader.readLine(); - while (null != line && ! line.startsWith(DATA_LINE)) { - line = reader.readLine(); - } - // next line is still header.... - line = reader.readLine(); - line = reader.readLine(); - } - return line; - } - - public static IlluminaRecord nextRecord(final BufferedReader reader) throws Exception { - IlluminaRecord result = null; - - String data = nextNonheaderLine(reader); - if (null != data ) { - result = parseRecord(data); - } - - return result; - } - - static String[] parseData(final String value) throws Exception { - String[] dataArray = TabTokenizer.tokenize(value); - - // raw Illumina data has 32 fields... and the first one is an integer - if (dataArray.length != 32) throw new Exception("Bad Illumina data format - expecting 32 fields but saw " + dataArray.length); - - return dataArray; - } - - static IlluminaRecord parseRecord(final String record) - throws Exception { - return new IlluminaRecord(parseData(record)); - } - -} From d7e96c447e9f88ef6199bb877c95aa703c5d3ebd Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 25 Nov 2020 22:32:44 +1000 Subject: [PATCH 39/73] use new illumina in qmule::SnpPicker --- qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java index c4a76aeaa..4596407f3 100644 --- a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java +++ b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java @@ -38,8 +38,8 @@ import org.qcmg.qmule.Options; import org.qcmg.qmule.QMuleException; import org.qcmg.qmule.record.Record; -import org.qcmg.unused.illumina.IlluminaFileReader; -import org.qcmg.unused.illumina.IlluminaRecord; +import org.qcmg.qio.illumina.IlluminaFileReader; +import org.qcmg.qio.illumina.IlluminaRecord; import org.qcmg.vcf.VCFFileReader; public class SnpPicker { @@ -547,10 +547,10 @@ private void loadRawIlluminaData() { } if (null != reader) { - IlluminaRecord tempRec; - for (Record rec : reader) { - tempRec = (IlluminaRecord) rec; - illuminaMap.put(tempRec.getSnpId(), tempRec); + + for (IlluminaRecord rec : reader) { + + illuminaMap.put(rec.getSnpId(), rec); } try { reader.close(); From c18809d473d0387f71f86b7ed2554577545f5afe Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 25 Nov 2020 22:58:46 +1000 Subject: [PATCH 40/73] mark deprecated on unused classes --- qio/src/org/qcmg/unused/bed/BEDFileReader.java | 3 +++ qio/src/org/qcmg/unused/bed/BEDRecord.java | 5 +++-- qio/src/org/qcmg/unused/bed/BEDRecordIterator.java | 3 +++ qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java | 3 +++ qio/src/org/qcmg/unused/bed/BEDSerializer.java | 2 ++ .../org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java | 2 +- .../org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java | 3 +++ .../qcmg/unused/consensuscalls/ConsensusCallsSerializer.java | 3 +++ qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java | 3 +++ qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java | 4 +++- .../org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java | 3 +++ qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java | 3 +++ qio/src/org/qcmg/unused/maf/MAFFileReader.java | 4 ++++ qio/src/org/qcmg/unused/maf/MAFRecordIterator.java | 3 +++ qio/src/org/qcmg/unused/maf/MAFSerializer.java | 3 +++ .../unused/primerdesignsummary/PrimerDesignFileReader.java | 3 +++ .../qcmg/unused/primerdesignsummary/PrimerDesignRecord.java | 2 ++ .../primerdesignsummary/PrimerDesignRecordSerializer.java | 3 +++ .../org/qcmg/unused/primerdesignsummary/PrimerPosition.java | 3 +++ .../org/qcmg/unused/primerinput/PrimerInputFileReader.java | 3 +++ .../org/qcmg/unused/primerinput/PrimerInputFileWriter.java | 3 +++ qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java | 4 +++- .../qcmg/unused/primerinput/PrimerInputRecordSerializer.java | 3 +++ .../org/qcmg/unused/primerinput/PrimerSequenceTarget.java | 3 +++ qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java | 4 +++- .../org/qcmg/unused/primeroutput/PrimerOutputFileReader.java | 3 +++ .../org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java | 3 +++ qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java | 4 ++++ .../unused/primeroutput/PrimerOutputHeaderSerializer.java | 3 +++ qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java | 4 ++++ .../unused/primeroutput/PrimerOutputRecordSerializer.java | 3 +++ qio/src/org/qcmg/unused/simple/SimpleFileReader.java | 3 +++ qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java | 3 +++ qio/src/org/qcmg/unused/simple/SimpleSerializer.java | 3 +++ 34 files changed, 101 insertions(+), 6 deletions(-) diff --git a/qio/src/org/qcmg/unused/bed/BEDFileReader.java b/qio/src/org/qcmg/unused/bed/BEDFileReader.java index 39090fc6d..66621f763 100644 --- a/qio/src/org/qcmg/unused/bed/BEDFileReader.java +++ b/qio/src/org/qcmg/unused/bed/BEDFileReader.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.bed; import java.io.Closeable; @@ -10,6 +12,7 @@ import java.io.InputStream; import java.util.Iterator; +@Deprecated public final class BEDFileReader implements Closeable, Iterable { private final File file; private final InputStream inputStream; diff --git a/qio/src/org/qcmg/unused/bed/BEDRecord.java b/qio/src/org/qcmg/unused/bed/BEDRecord.java index 297b66cb2..da51e4a36 100644 --- a/qio/src/org/qcmg/unused/bed/BEDRecord.java +++ b/qio/src/org/qcmg/unused/bed/BEDRecord.java @@ -1,10 +1,11 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ -package org.qcmg.unused.bed; - +package org.qcmg.unused.bed; +@Deprecated public class BEDRecord { private final static char T = '\t'; diff --git a/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java b/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java index 6d8726a5d..42d0c4861 100644 --- a/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java +++ b/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.bed; import java.io.BufferedReader; @@ -9,6 +11,7 @@ import java.util.Iterator; import java.util.NoSuchElementException; +@Deprecated public final class BEDRecordIterator implements Iterator { private final BufferedReader reader; private BEDRecord next; diff --git a/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java b/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java index a813a9afc..c90192367 100644 --- a/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java +++ b/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java @@ -1,10 +1,13 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.bed; import java.util.Comparator; +@Deprecated public class BEDRecordPositionComparator implements Comparator { public int compare(BEDRecord recordA, BEDRecord recordB) { diff --git a/qio/src/org/qcmg/unused/bed/BEDSerializer.java b/qio/src/org/qcmg/unused/bed/BEDSerializer.java index 164cf8189..7c6f21c6e 100644 --- a/qio/src/org/qcmg/unused/bed/BEDSerializer.java +++ b/qio/src/org/qcmg/unused/bed/BEDSerializer.java @@ -1,5 +1,6 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ package org.qcmg.unused.bed; @@ -7,6 +8,7 @@ import java.io.IOException; import java.util.regex.Pattern; +@Deprecated public final class BEDSerializer { private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); private static final String DEFAULT_HEADER_PREFIX = "#"; diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java index 6cb38fa52..fcc06b366 100644 --- a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java +++ b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java @@ -3,7 +3,7 @@ */ package org.qcmg.unused.consensuscalls; - +@Deprecated public enum ConsensusCallsFlag { H_1("h1"), diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java index c9321b571..01f434159 100644 --- a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java +++ b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java @@ -1,5 +1,6 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ // // This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 @@ -13,6 +14,8 @@ import java.util.ArrayList; import java.util.List; + +@Deprecated public class ConsensusCallsRecord { protected String chr; diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java index 7695b6486..4e0f76d9e 100644 --- a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java +++ b/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.consensuscalls; import java.io.BufferedReader; @@ -10,6 +12,7 @@ import java.util.Map; import java.util.regex.Pattern; +@Deprecated public final class ConsensusCallsSerializer { private static final Pattern tabbedPattern = Pattern.compile("[\\t]+"); private static final Pattern commaPattern = Pattern.compile("[,]+"); diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java index 3ec52a69c..1ff908a05 100644 --- a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java +++ b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.genesymbol; import java.io.Closeable; @@ -10,6 +12,7 @@ import java.io.InputStream; import java.util.Iterator; +@Deprecated public final class GeneSymbolFileReader implements Closeable, Iterable { private final File file; private final InputStream inputStream; diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java index efc505155..50dd25a46 100644 --- a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java +++ b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java @@ -1,9 +1,11 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ -package org.qcmg.unused.genesymbol; +package org.qcmg.unused.genesymbol; +@Deprecated public class GeneSymbolRecord { private String geneId; diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java index 1ad4c2505..f9ec9f160 100644 --- a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java +++ b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.genesymbol; import java.io.BufferedReader; @@ -9,6 +11,7 @@ import java.util.Iterator; import java.util.NoSuchElementException; +@Deprecated public final class GeneSymbolRecordIterator implements Iterator { private final BufferedReader reader; private GeneSymbolRecord next; diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java index d117b5abe..53fd0c646 100644 --- a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java +++ b/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java @@ -1,12 +1,15 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.genesymbol; import java.io.BufferedReader; import java.io.IOException; import java.util.regex.Pattern; +@Deprecated public final class GeneSymbolSerializer { private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); private static final String DEFAULT_HEADER_PREFIX = "#"; diff --git a/qio/src/org/qcmg/unused/maf/MAFFileReader.java b/qio/src/org/qcmg/unused/maf/MAFFileReader.java index 9d98df780..04aa0de51 100644 --- a/qio/src/org/qcmg/unused/maf/MAFFileReader.java +++ b/qio/src/org/qcmg/unused/maf/MAFFileReader.java @@ -3,7 +3,10 @@ * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. * * This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + + package org.qcmg.unused.maf; import java.io.Closeable; @@ -15,6 +18,7 @@ import org.qcmg.common.maf.MAFRecord; +@Deprecated public final class MAFFileReader implements Closeable, Iterable { private final File file; private final InputStream inputStream; diff --git a/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java b/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java index 760780922..b3d899382 100644 --- a/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java +++ b/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java @@ -3,7 +3,9 @@ * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. * * This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.maf; import java.io.BufferedReader; @@ -14,6 +16,7 @@ import org.qcmg.common.maf.MAFRecord; +@Deprecated public final class MAFRecordIterator implements Iterator { private final BufferedReader reader; private MAFRecord next; diff --git a/qio/src/org/qcmg/unused/maf/MAFSerializer.java b/qio/src/org/qcmg/unused/maf/MAFSerializer.java index bfd4b7d0a..581cb2975 100644 --- a/qio/src/org/qcmg/unused/maf/MAFSerializer.java +++ b/qio/src/org/qcmg/unused/maf/MAFSerializer.java @@ -3,7 +3,9 @@ * © Copyright QIMR Berghofer Medical Research Institute 2014-2016. * * This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.maf; import java.io.BufferedReader; @@ -12,6 +14,7 @@ import org.qcmg.common.maf.MAFRecord; +@Deprecated public final class MAFSerializer { private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); private static final String DEFAULT_HEADER_PREFIX = "#"; diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java index 9ed51e589..247335f12 100644 --- a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java +++ b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java @@ -1,12 +1,15 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.primerdesignsummary; import java.io.File; import org.qcmg.unused.reader.FileReader; +@Deprecated public class PrimerDesignFileReader extends FileReader { private final static PrimerDesignRecordSerializer serializer = new PrimerDesignRecordSerializer(); diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java index 289892f7e..9bd8fd02f 100644 --- a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java +++ b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java @@ -1,9 +1,11 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ package org.qcmg.unused.primerdesignsummary; +@Deprecated public class PrimerDesignRecord { protected String snpId; diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java index 414939f09..166347fd2 100644 --- a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java +++ b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.primerdesignsummary; import java.io.BufferedReader; @@ -9,6 +11,7 @@ import org.qcmg.qmule.record.Serializer; +@Deprecated public final class PrimerDesignRecordSerializer extends Serializer { public PrimerDesignRecord parseRecord(final String line) throws Exception { String[] params = tabbedPattern.split(line); diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java index 1154b4fb4..6d7b20e4e 100644 --- a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java +++ b/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java @@ -1,8 +1,11 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.primerdesignsummary; +@Deprecated public class PrimerPosition { protected String chromosome; diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java b/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java index c2a7262a4..00a1f469f 100644 --- a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java +++ b/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java @@ -1,12 +1,15 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.primerinput; import java.io.File; import org.qcmg.unused.reader.FileReader; +@Deprecated public class PrimerInputFileReader extends FileReader { private final static PrimerInputRecordSerializer serializer = new PrimerInputRecordSerializer(); diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java b/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java index b09623176..25510052a 100644 --- a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java +++ b/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.primerinput; import java.io.Closeable; @@ -9,6 +11,7 @@ import java.io.IOException; import java.io.OutputStream; +@Deprecated public final class PrimerInputFileWriter implements Closeable { private static final String EQUALS = "="; private static final PrimerInputRecordSerializer serializer = new PrimerInputRecordSerializer(); diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java b/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java index 7de08071a..dbf722818 100644 --- a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java +++ b/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java @@ -1,9 +1,11 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ -package org.qcmg.unused.primerinput; +package org.qcmg.unused.primerinput; +@Deprecated public class PrimerInputRecord { protected String sequenceId; diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java b/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java index 9a24a1a4a..89c34459b 100644 --- a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java +++ b/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.primerinput; import java.io.BufferedReader; @@ -10,6 +12,7 @@ import org.qcmg.qmule.gff3.GFF3Record; import org.qcmg.qmule.record.Serializer; +@Deprecated public final class PrimerInputRecordSerializer extends Serializer { private final static String SEQUENCE_ID = "SEQUENCE_ID"; diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java b/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java index 8cb3833fe..0e8a40f94 100644 --- a/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java +++ b/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java @@ -1,8 +1,11 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.primerinput; +@Deprecated public class PrimerSequenceTarget { protected int leftValue; diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java b/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java index bc16b6531..7125e8e72 100644 --- a/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java +++ b/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java @@ -1,9 +1,11 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ -package org.qcmg.unused.primerinput; +package org.qcmg.unused.primerinput; +@Deprecated public class PrimerSizeRange { protected int lowerLimit; diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java index d77d6b5ad..baa5c5d57 100644 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.primeroutput; import java.io.File; @@ -10,6 +12,7 @@ import org.qcmg.unused.reader.ExtendedFileReader; import org.qcmg.unused.reader.FileReader; +@Deprecated public class PrimerOutputFileReader extends ExtendedFileReader { private final static PrimerOutputHeaderSerializer headerSerializer = new PrimerOutputHeaderSerializer(); diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java index 5a0eb1317..9ae418062 100644 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.primeroutput; import java.io.Closeable; @@ -11,6 +13,7 @@ import org.qcmg.unused.primeroutput.PrimerOutputRecord; +@Deprecated public final class PrimerOutputFileWriter implements Closeable { private static final String EQUALS = "="; private static final PrimerOutputRecordSerializer serializer = new PrimerOutputRecordSerializer(); diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java index 4763d993e..b58fef717 100644 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + // // This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 // See http://java.sun.com/xml/jaxb @@ -86,6 +88,8 @@ "internalNumReturned", "pairNumReturned" }) + +@Deprecated public class PrimerOutputHeader { @XmlElement(required = true) diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java index f0accc892..0b748d40c 100644 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java @@ -1,12 +1,15 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.primeroutput; import java.io.BufferedReader; import org.qcmg.qmule.record.Serializer; +@Deprecated public class PrimerOutputHeaderSerializer extends Serializer { private final static String[] FIELDS = { diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java index f208e770b..d174be517 100644 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + // // This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, vhudson-jaxb-ri-2.2-147 // See http://java.sun.com/xml/jaxb @@ -84,6 +86,8 @@ "pairProductTmOligoTmDiff", "pairTOptA" }) + +@Deprecated public class PrimerOutputRecord { protected double pairPenalty; diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java index 8d469a54b..e8038701e 100644 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java +++ b/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.primeroutput; import java.io.BufferedReader; @@ -8,6 +10,7 @@ import org.qcmg.qmule.record.Serializer; +@Deprecated public final class PrimerOutputRecordSerializer extends Serializer { private final static String[] FIELD_PREFIXES = { diff --git a/qio/src/org/qcmg/unused/simple/SimpleFileReader.java b/qio/src/org/qcmg/unused/simple/SimpleFileReader.java index e6839b3b5..f7bb2981c 100644 --- a/qio/src/org/qcmg/unused/simple/SimpleFileReader.java +++ b/qio/src/org/qcmg/unused/simple/SimpleFileReader.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.simple; import java.io.File; @@ -9,6 +11,7 @@ import org.qcmg.qmule.record.AbstractRecordIterator; import org.qcmg.unused.reader.AbstractReader; +@Deprecated public class SimpleFileReader extends AbstractReader { public SimpleFileReader(File file) throws IOException { diff --git a/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java b/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java index e384ce5e9..941bfcf4a 100644 --- a/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java +++ b/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java @@ -1,12 +1,15 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.simple; import java.io.InputStream; import org.qcmg.qmule.record.AbstractRecordIterator; +@Deprecated public class SimpleRecordIterator extends AbstractRecordIterator { public SimpleRecordIterator(InputStream stream) throws Exception{ diff --git a/qio/src/org/qcmg/unused/simple/SimpleSerializer.java b/qio/src/org/qcmg/unused/simple/SimpleSerializer.java index 09f1f99a7..3b49a5131 100644 --- a/qio/src/org/qcmg/unused/simple/SimpleSerializer.java +++ b/qio/src/org/qcmg/unused/simple/SimpleSerializer.java @@ -1,6 +1,8 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + * @deprecated since it is no longer used. */ + package org.qcmg.unused.simple; import java.io.BufferedReader; @@ -8,6 +10,7 @@ import org.qcmg.qmule.record.SimpleRecord; +@Deprecated public final class SimpleSerializer { private static final String DEFAULT_ID_PREFIX = ">"; From e60fe57d277e2bb70de822a654989689a4b435fa Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 25 Nov 2020 23:17:36 +1000 Subject: [PATCH 41/73] rename unused package name back to original --- qio/src/org/qcmg/{unused => }/bed/BEDFileReader.java | 2 +- qio/src/org/qcmg/{unused => }/bed/BEDRecord.java | 2 +- .../org/qcmg/{unused => }/bed/BEDRecordIterator.java | 2 +- .../{unused => }/bed/BEDRecordPositionComparator.java | 2 +- qio/src/org/qcmg/{unused => }/bed/BEDSerializer.java | 2 +- .../consensuscalls/ConsensusCallsFlag.java | 2 +- .../consensuscalls/ConsensusCallsRecord.java | 2 +- .../consensuscalls/ConsensusCallsSerializer.java | 2 +- .../{unused => }/genesymbol/GeneSymbolFileReader.java | 2 +- .../qcmg/{unused => }/genesymbol/GeneSymbolRecord.java | 2 +- .../genesymbol/GeneSymbolRecordIterator.java | 2 +- .../{unused => }/genesymbol/GeneSymbolSerializer.java | 2 +- qio/src/org/qcmg/{unused => }/maf/MAFFileReader.java | 2 +- .../org/qcmg/{unused => }/maf/MAFRecordIterator.java | 2 +- qio/src/org/qcmg/{unused => }/maf/MAFSerializer.java | 2 +- .../primerdesignsummary/PrimerDesignFileReader.java | 4 ++-- .../primerdesignsummary/PrimerDesignRecord.java | 2 +- .../PrimerDesignRecordSerializer.java | 2 +- .../primerdesignsummary/PrimerPosition.java | 2 +- .../primerinput/PrimerInputFileReader.java | 4 ++-- .../primerinput/PrimerInputFileWriter.java | 2 +- .../{unused => }/primerinput/PrimerInputRecord.java | 2 +- .../primerinput/PrimerInputRecordSerializer.java | 2 +- .../{unused => }/primerinput/PrimerSequenceTarget.java | 2 +- .../qcmg/{unused => }/primerinput/PrimerSizeRange.java | 2 +- .../primeroutput/PrimerOutputFileReader.java | 10 +++++----- .../primeroutput/PrimerOutputFileWriter.java | 4 ++-- .../{unused => }/primeroutput/PrimerOutputHeader.java | 2 +- .../primeroutput/PrimerOutputHeaderSerializer.java | 2 +- .../{unused => }/primeroutput/PrimerOutputRecord.java | 2 +- .../primeroutput/PrimerOutputRecordSerializer.java | 2 +- .../org/qcmg/{unused => }/reader/AbstractReader.java | 2 +- .../qcmg/{unused => }/reader/ExtendedFileReader.java | 2 +- qio/src/org/qcmg/{unused => }/reader/FileReader.java | 2 +- qio/src/org/qcmg/{unused => }/reader/Reader.java | 2 +- .../org/qcmg/{unused => }/simple/SimpleFileReader.java | 4 ++-- .../qcmg/{unused => }/simple/SimpleRecordIterator.java | 2 +- .../org/qcmg/{unused => }/simple/SimpleSerializer.java | 2 +- .../qcmg/{unused => }/simple/SimpleSerializerTest.java | 3 ++- 39 files changed, 48 insertions(+), 47 deletions(-) rename qio/src/org/qcmg/{unused => }/bed/BEDFileReader.java (97%) rename qio/src/org/qcmg/{unused => }/bed/BEDRecord.java (98%) rename qio/src/org/qcmg/{unused => }/bed/BEDRecordIterator.java (97%) rename qio/src/org/qcmg/{unused => }/bed/BEDRecordPositionComparator.java (96%) rename qio/src/org/qcmg/{unused => }/bed/BEDSerializer.java (98%) rename qio/src/org/qcmg/{unused => }/consensuscalls/ConsensusCallsFlag.java (96%) rename qio/src/org/qcmg/{unused => }/consensuscalls/ConsensusCallsRecord.java (99%) rename qio/src/org/qcmg/{unused => }/consensuscalls/ConsensusCallsSerializer.java (98%) rename qio/src/org/qcmg/{unused => }/genesymbol/GeneSymbolFileReader.java (96%) rename qio/src/org/qcmg/{unused => }/genesymbol/GeneSymbolRecord.java (95%) rename qio/src/org/qcmg/{unused => }/genesymbol/GeneSymbolRecordIterator.java (97%) rename qio/src/org/qcmg/{unused => }/genesymbol/GeneSymbolSerializer.java (97%) rename qio/src/org/qcmg/{unused => }/maf/MAFFileReader.java (97%) rename qio/src/org/qcmg/{unused => }/maf/MAFRecordIterator.java (98%) rename qio/src/org/qcmg/{unused => }/maf/MAFSerializer.java (98%) rename qio/src/org/qcmg/{unused => }/primerdesignsummary/PrimerDesignFileReader.java (85%) rename qio/src/org/qcmg/{unused => }/primerdesignsummary/PrimerDesignRecord.java (98%) rename qio/src/org/qcmg/{unused => }/primerdesignsummary/PrimerDesignRecordSerializer.java (98%) rename qio/src/org/qcmg/{unused => }/primerdesignsummary/PrimerPosition.java (96%) rename qio/src/org/qcmg/{unused => }/primerinput/PrimerInputFileReader.java (86%) rename qio/src/org/qcmg/{unused => }/primerinput/PrimerInputFileWriter.java (96%) rename qio/src/org/qcmg/{unused => }/primerinput/PrimerInputRecord.java (99%) rename qio/src/org/qcmg/{unused => }/primerinput/PrimerInputRecordSerializer.java (99%) rename qio/src/org/qcmg/{unused => }/primerinput/PrimerSequenceTarget.java (96%) rename qio/src/org/qcmg/{unused => }/primerinput/PrimerSizeRange.java (96%) rename qio/src/org/qcmg/{unused => }/primeroutput/PrimerOutputFileReader.java (73%) rename qio/src/org/qcmg/{unused => }/primeroutput/PrimerOutputFileWriter.java (91%) rename qio/src/org/qcmg/{unused => }/primeroutput/PrimerOutputHeader.java (99%) rename qio/src/org/qcmg/{unused => }/primeroutput/PrimerOutputHeaderSerializer.java (99%) rename qio/src/org/qcmg/{unused => }/primeroutput/PrimerOutputRecord.java (99%) rename qio/src/org/qcmg/{unused => }/primeroutput/PrimerOutputRecordSerializer.java (99%) rename qio/src/org/qcmg/{unused => }/reader/AbstractReader.java (96%) rename qio/src/org/qcmg/{unused => }/reader/ExtendedFileReader.java (97%) rename qio/src/org/qcmg/{unused => }/reader/FileReader.java (97%) rename qio/src/org/qcmg/{unused => }/reader/Reader.java (87%) rename qio/src/org/qcmg/{unused => }/simple/SimpleFileReader.java (87%) rename qio/src/org/qcmg/{unused => }/simple/SimpleRecordIterator.java (95%) rename qio/src/org/qcmg/{unused => }/simple/SimpleSerializer.java (98%) rename qio/test/org/qcmg/{unused => }/simple/SimpleSerializerTest.java (98%) diff --git a/qio/src/org/qcmg/unused/bed/BEDFileReader.java b/qio/src/org/qcmg/bed/BEDFileReader.java similarity index 97% rename from qio/src/org/qcmg/unused/bed/BEDFileReader.java rename to qio/src/org/qcmg/bed/BEDFileReader.java index 66621f763..0fb86e7ea 100644 --- a/qio/src/org/qcmg/unused/bed/BEDFileReader.java +++ b/qio/src/org/qcmg/bed/BEDFileReader.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.bed; +package org.qcmg.bed; import java.io.Closeable; import java.io.File; diff --git a/qio/src/org/qcmg/unused/bed/BEDRecord.java b/qio/src/org/qcmg/bed/BEDRecord.java similarity index 98% rename from qio/src/org/qcmg/unused/bed/BEDRecord.java rename to qio/src/org/qcmg/bed/BEDRecord.java index da51e4a36..2dbcb77a2 100644 --- a/qio/src/org/qcmg/unused/bed/BEDRecord.java +++ b/qio/src/org/qcmg/bed/BEDRecord.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.bed; +package org.qcmg.bed; @Deprecated public class BEDRecord { diff --git a/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java b/qio/src/org/qcmg/bed/BEDRecordIterator.java similarity index 97% rename from qio/src/org/qcmg/unused/bed/BEDRecordIterator.java rename to qio/src/org/qcmg/bed/BEDRecordIterator.java index 42d0c4861..84e908b4b 100644 --- a/qio/src/org/qcmg/unused/bed/BEDRecordIterator.java +++ b/qio/src/org/qcmg/bed/BEDRecordIterator.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.bed; +package org.qcmg.bed; import java.io.BufferedReader; import java.io.InputStream; diff --git a/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java b/qio/src/org/qcmg/bed/BEDRecordPositionComparator.java similarity index 96% rename from qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java rename to qio/src/org/qcmg/bed/BEDRecordPositionComparator.java index c90192367..53eedf558 100644 --- a/qio/src/org/qcmg/unused/bed/BEDRecordPositionComparator.java +++ b/qio/src/org/qcmg/bed/BEDRecordPositionComparator.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.bed; +package org.qcmg.bed; import java.util.Comparator; diff --git a/qio/src/org/qcmg/unused/bed/BEDSerializer.java b/qio/src/org/qcmg/bed/BEDSerializer.java similarity index 98% rename from qio/src/org/qcmg/unused/bed/BEDSerializer.java rename to qio/src/org/qcmg/bed/BEDSerializer.java index 7c6f21c6e..574199d9e 100644 --- a/qio/src/org/qcmg/unused/bed/BEDSerializer.java +++ b/qio/src/org/qcmg/bed/BEDSerializer.java @@ -2,7 +2,7 @@ * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. * @deprecated since it is no longer used. */ -package org.qcmg.unused.bed; +package org.qcmg.bed; import java.io.BufferedReader; import java.io.IOException; diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java b/qio/src/org/qcmg/consensuscalls/ConsensusCallsFlag.java similarity index 96% rename from qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java rename to qio/src/org/qcmg/consensuscalls/ConsensusCallsFlag.java index fcc06b366..772b71ee4 100644 --- a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsFlag.java +++ b/qio/src/org/qcmg/consensuscalls/ConsensusCallsFlag.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.unused.consensuscalls; +package org.qcmg.consensuscalls; @Deprecated public enum ConsensusCallsFlag { diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java b/qio/src/org/qcmg/consensuscalls/ConsensusCallsRecord.java similarity index 99% rename from qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java rename to qio/src/org/qcmg/consensuscalls/ConsensusCallsRecord.java index 01f434159..6a15b545e 100644 --- a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsRecord.java +++ b/qio/src/org/qcmg/consensuscalls/ConsensusCallsRecord.java @@ -10,7 +10,7 @@ // -package org.qcmg.unused.consensuscalls; +package org.qcmg.consensuscalls; import java.util.ArrayList; import java.util.List; diff --git a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java b/qio/src/org/qcmg/consensuscalls/ConsensusCallsSerializer.java similarity index 98% rename from qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java rename to qio/src/org/qcmg/consensuscalls/ConsensusCallsSerializer.java index 4e0f76d9e..3b9ae2604 100644 --- a/qio/src/org/qcmg/unused/consensuscalls/ConsensusCallsSerializer.java +++ b/qio/src/org/qcmg/consensuscalls/ConsensusCallsSerializer.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.consensuscalls; +package org.qcmg.consensuscalls; import java.io.BufferedReader; import java.io.IOException; diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java b/qio/src/org/qcmg/genesymbol/GeneSymbolFileReader.java similarity index 96% rename from qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java rename to qio/src/org/qcmg/genesymbol/GeneSymbolFileReader.java index 1ff908a05..4aff68ef3 100644 --- a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolFileReader.java +++ b/qio/src/org/qcmg/genesymbol/GeneSymbolFileReader.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.genesymbol; +package org.qcmg.genesymbol; import java.io.Closeable; import java.io.File; diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java b/qio/src/org/qcmg/genesymbol/GeneSymbolRecord.java similarity index 95% rename from qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java rename to qio/src/org/qcmg/genesymbol/GeneSymbolRecord.java index 50dd25a46..be1c95378 100644 --- a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecord.java +++ b/qio/src/org/qcmg/genesymbol/GeneSymbolRecord.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.genesymbol; +package org.qcmg.genesymbol; @Deprecated public class GeneSymbolRecord { diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java b/qio/src/org/qcmg/genesymbol/GeneSymbolRecordIterator.java similarity index 97% rename from qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java rename to qio/src/org/qcmg/genesymbol/GeneSymbolRecordIterator.java index f9ec9f160..d3eb9109c 100644 --- a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolRecordIterator.java +++ b/qio/src/org/qcmg/genesymbol/GeneSymbolRecordIterator.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.genesymbol; +package org.qcmg.genesymbol; import java.io.BufferedReader; import java.io.InputStream; diff --git a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java b/qio/src/org/qcmg/genesymbol/GeneSymbolSerializer.java similarity index 97% rename from qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java rename to qio/src/org/qcmg/genesymbol/GeneSymbolSerializer.java index 53fd0c646..819654673 100644 --- a/qio/src/org/qcmg/unused/genesymbol/GeneSymbolSerializer.java +++ b/qio/src/org/qcmg/genesymbol/GeneSymbolSerializer.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.genesymbol; +package org.qcmg.genesymbol; import java.io.BufferedReader; import java.io.IOException; diff --git a/qio/src/org/qcmg/unused/maf/MAFFileReader.java b/qio/src/org/qcmg/maf/MAFFileReader.java similarity index 97% rename from qio/src/org/qcmg/unused/maf/MAFFileReader.java rename to qio/src/org/qcmg/maf/MAFFileReader.java index 04aa0de51..c3bf6af5c 100644 --- a/qio/src/org/qcmg/unused/maf/MAFFileReader.java +++ b/qio/src/org/qcmg/maf/MAFFileReader.java @@ -7,7 +7,7 @@ */ -package org.qcmg.unused.maf; +package org.qcmg.maf; import java.io.Closeable; import java.io.File; diff --git a/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java b/qio/src/org/qcmg/maf/MAFRecordIterator.java similarity index 98% rename from qio/src/org/qcmg/unused/maf/MAFRecordIterator.java rename to qio/src/org/qcmg/maf/MAFRecordIterator.java index b3d899382..f7a716ca1 100644 --- a/qio/src/org/qcmg/unused/maf/MAFRecordIterator.java +++ b/qio/src/org/qcmg/maf/MAFRecordIterator.java @@ -6,7 +6,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.maf; +package org.qcmg.maf; import java.io.BufferedReader; import java.io.InputStream; diff --git a/qio/src/org/qcmg/unused/maf/MAFSerializer.java b/qio/src/org/qcmg/maf/MAFSerializer.java similarity index 98% rename from qio/src/org/qcmg/unused/maf/MAFSerializer.java rename to qio/src/org/qcmg/maf/MAFSerializer.java index 581cb2975..b01bf5cc2 100644 --- a/qio/src/org/qcmg/unused/maf/MAFSerializer.java +++ b/qio/src/org/qcmg/maf/MAFSerializer.java @@ -6,7 +6,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.maf; +package org.qcmg.maf; import java.io.BufferedReader; import java.io.IOException; diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java b/qio/src/org/qcmg/primerdesignsummary/PrimerDesignFileReader.java similarity index 85% rename from qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java rename to qio/src/org/qcmg/primerdesignsummary/PrimerDesignFileReader.java index 247335f12..fe4acf772 100644 --- a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignFileReader.java +++ b/qio/src/org/qcmg/primerdesignsummary/PrimerDesignFileReader.java @@ -3,11 +3,11 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.primerdesignsummary; +package org.qcmg.primerdesignsummary; import java.io.File; -import org.qcmg.unused.reader.FileReader; +import org.qcmg.reader.FileReader; @Deprecated public class PrimerDesignFileReader extends FileReader { diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java b/qio/src/org/qcmg/primerdesignsummary/PrimerDesignRecord.java similarity index 98% rename from qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java rename to qio/src/org/qcmg/primerdesignsummary/PrimerDesignRecord.java index 9bd8fd02f..28b3ab529 100644 --- a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecord.java +++ b/qio/src/org/qcmg/primerdesignsummary/PrimerDesignRecord.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.primerdesignsummary; +package org.qcmg.primerdesignsummary; @Deprecated public class PrimerDesignRecord { diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java b/qio/src/org/qcmg/primerdesignsummary/PrimerDesignRecordSerializer.java similarity index 98% rename from qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java rename to qio/src/org/qcmg/primerdesignsummary/PrimerDesignRecordSerializer.java index 166347fd2..81e374610 100644 --- a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerDesignRecordSerializer.java +++ b/qio/src/org/qcmg/primerdesignsummary/PrimerDesignRecordSerializer.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.primerdesignsummary; +package org.qcmg.primerdesignsummary; import java.io.BufferedReader; import java.io.IOException; diff --git a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java b/qio/src/org/qcmg/primerdesignsummary/PrimerPosition.java similarity index 96% rename from qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java rename to qio/src/org/qcmg/primerdesignsummary/PrimerPosition.java index 6d7b20e4e..fa598724b 100644 --- a/qio/src/org/qcmg/unused/primerdesignsummary/PrimerPosition.java +++ b/qio/src/org/qcmg/primerdesignsummary/PrimerPosition.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.primerdesignsummary; +package org.qcmg.primerdesignsummary; @Deprecated public class PrimerPosition { diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java b/qio/src/org/qcmg/primerinput/PrimerInputFileReader.java similarity index 86% rename from qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java rename to qio/src/org/qcmg/primerinput/PrimerInputFileReader.java index 00a1f469f..4c2eb1ace 100644 --- a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileReader.java +++ b/qio/src/org/qcmg/primerinput/PrimerInputFileReader.java @@ -3,11 +3,11 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.primerinput; +package org.qcmg.primerinput; import java.io.File; -import org.qcmg.unused.reader.FileReader; +import org.qcmg.reader.FileReader; @Deprecated public class PrimerInputFileReader extends FileReader { diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java b/qio/src/org/qcmg/primerinput/PrimerInputFileWriter.java similarity index 96% rename from qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java rename to qio/src/org/qcmg/primerinput/PrimerInputFileWriter.java index 25510052a..540aacbd7 100644 --- a/qio/src/org/qcmg/unused/primerinput/PrimerInputFileWriter.java +++ b/qio/src/org/qcmg/primerinput/PrimerInputFileWriter.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.primerinput; +package org.qcmg.primerinput; import java.io.Closeable; import java.io.File; diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java b/qio/src/org/qcmg/primerinput/PrimerInputRecord.java similarity index 99% rename from qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java rename to qio/src/org/qcmg/primerinput/PrimerInputRecord.java index dbf722818..27b7c6c49 100644 --- a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecord.java +++ b/qio/src/org/qcmg/primerinput/PrimerInputRecord.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.primerinput; +package org.qcmg.primerinput; @Deprecated public class PrimerInputRecord { diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java b/qio/src/org/qcmg/primerinput/PrimerInputRecordSerializer.java similarity index 99% rename from qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java rename to qio/src/org/qcmg/primerinput/PrimerInputRecordSerializer.java index 89c34459b..5c687a2ec 100644 --- a/qio/src/org/qcmg/unused/primerinput/PrimerInputRecordSerializer.java +++ b/qio/src/org/qcmg/primerinput/PrimerInputRecordSerializer.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.primerinput; +package org.qcmg.primerinput; import java.io.BufferedReader; import java.io.IOException; diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java b/qio/src/org/qcmg/primerinput/PrimerSequenceTarget.java similarity index 96% rename from qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java rename to qio/src/org/qcmg/primerinput/PrimerSequenceTarget.java index 0e8a40f94..5933cb870 100644 --- a/qio/src/org/qcmg/unused/primerinput/PrimerSequenceTarget.java +++ b/qio/src/org/qcmg/primerinput/PrimerSequenceTarget.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.primerinput; +package org.qcmg.primerinput; @Deprecated public class PrimerSequenceTarget { diff --git a/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java b/qio/src/org/qcmg/primerinput/PrimerSizeRange.java similarity index 96% rename from qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java rename to qio/src/org/qcmg/primerinput/PrimerSizeRange.java index 7125e8e72..57e125d35 100644 --- a/qio/src/org/qcmg/unused/primerinput/PrimerSizeRange.java +++ b/qio/src/org/qcmg/primerinput/PrimerSizeRange.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.primerinput; +package org.qcmg.primerinput; @Deprecated public class PrimerSizeRange { diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java b/qio/src/org/qcmg/primeroutput/PrimerOutputFileReader.java similarity index 73% rename from qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java rename to qio/src/org/qcmg/primeroutput/PrimerOutputFileReader.java index baa5c5d57..e104d2214 100644 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileReader.java +++ b/qio/src/org/qcmg/primeroutput/PrimerOutputFileReader.java @@ -3,14 +3,14 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.primeroutput; +package org.qcmg.primeroutput; import java.io.File; -import org.qcmg.unused.primeroutput.PrimerOutputHeader; -import org.qcmg.unused.primeroutput.PrimerOutputRecord; -import org.qcmg.unused.reader.ExtendedFileReader; -import org.qcmg.unused.reader.FileReader; +import org.qcmg.primeroutput.PrimerOutputHeader; +import org.qcmg.primeroutput.PrimerOutputRecord; +import org.qcmg.reader.ExtendedFileReader; +import org.qcmg.reader.FileReader; @Deprecated public class PrimerOutputFileReader extends ExtendedFileReader { diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java b/qio/src/org/qcmg/primeroutput/PrimerOutputFileWriter.java similarity index 91% rename from qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java rename to qio/src/org/qcmg/primeroutput/PrimerOutputFileWriter.java index 9ae418062..746222be3 100644 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputFileWriter.java +++ b/qio/src/org/qcmg/primeroutput/PrimerOutputFileWriter.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.primeroutput; +package org.qcmg.primeroutput; import java.io.Closeable; import java.io.File; @@ -11,7 +11,7 @@ import java.io.IOException; import java.io.OutputStream; -import org.qcmg.unused.primeroutput.PrimerOutputRecord; +import org.qcmg.primeroutput.PrimerOutputRecord; @Deprecated public final class PrimerOutputFileWriter implements Closeable { diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java b/qio/src/org/qcmg/primeroutput/PrimerOutputHeader.java similarity index 99% rename from qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java rename to qio/src/org/qcmg/primeroutput/PrimerOutputHeader.java index b58fef717..8b22fdee6 100644 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeader.java +++ b/qio/src/org/qcmg/primeroutput/PrimerOutputHeader.java @@ -11,7 +11,7 @@ // -package org.qcmg.unused.primeroutput; +package org.qcmg.primeroutput; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java b/qio/src/org/qcmg/primeroutput/PrimerOutputHeaderSerializer.java similarity index 99% rename from qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java rename to qio/src/org/qcmg/primeroutput/PrimerOutputHeaderSerializer.java index 0b748d40c..ff5d37b29 100644 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputHeaderSerializer.java +++ b/qio/src/org/qcmg/primeroutput/PrimerOutputHeaderSerializer.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.primeroutput; +package org.qcmg.primeroutput; import java.io.BufferedReader; diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java b/qio/src/org/qcmg/primeroutput/PrimerOutputRecord.java similarity index 99% rename from qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java rename to qio/src/org/qcmg/primeroutput/PrimerOutputRecord.java index d174be517..fdd40ba7e 100644 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecord.java +++ b/qio/src/org/qcmg/primeroutput/PrimerOutputRecord.java @@ -11,7 +11,7 @@ // -package org.qcmg.unused.primeroutput; +package org.qcmg.primeroutput; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; diff --git a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java b/qio/src/org/qcmg/primeroutput/PrimerOutputRecordSerializer.java similarity index 99% rename from qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java rename to qio/src/org/qcmg/primeroutput/PrimerOutputRecordSerializer.java index e8038701e..cb8325444 100644 --- a/qio/src/org/qcmg/unused/primeroutput/PrimerOutputRecordSerializer.java +++ b/qio/src/org/qcmg/primeroutput/PrimerOutputRecordSerializer.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.primeroutput; +package org.qcmg.primeroutput; import java.io.BufferedReader; import java.io.IOException; diff --git a/qio/src/org/qcmg/unused/reader/AbstractReader.java b/qio/src/org/qcmg/reader/AbstractReader.java similarity index 96% rename from qio/src/org/qcmg/unused/reader/AbstractReader.java rename to qio/src/org/qcmg/reader/AbstractReader.java index 68e6e731c..22a7953f0 100644 --- a/qio/src/org/qcmg/unused/reader/AbstractReader.java +++ b/qio/src/org/qcmg/reader/AbstractReader.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.unused.reader; +package org.qcmg.reader; import java.io.File; import java.io.FileInputStream; diff --git a/qio/src/org/qcmg/unused/reader/ExtendedFileReader.java b/qio/src/org/qcmg/reader/ExtendedFileReader.java similarity index 97% rename from qio/src/org/qcmg/unused/reader/ExtendedFileReader.java rename to qio/src/org/qcmg/reader/ExtendedFileReader.java index 3c96319ed..dda3c1739 100644 --- a/qio/src/org/qcmg/unused/reader/ExtendedFileReader.java +++ b/qio/src/org/qcmg/reader/ExtendedFileReader.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.unused.reader; +package org.qcmg.reader; import java.io.BufferedReader; import java.io.File; diff --git a/qio/src/org/qcmg/unused/reader/FileReader.java b/qio/src/org/qcmg/reader/FileReader.java similarity index 97% rename from qio/src/org/qcmg/unused/reader/FileReader.java rename to qio/src/org/qcmg/reader/FileReader.java index 0cc4ae6ce..c472ff003 100644 --- a/qio/src/org/qcmg/unused/reader/FileReader.java +++ b/qio/src/org/qcmg/reader/FileReader.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.unused.reader; +package org.qcmg.reader; import java.io.Closeable; import java.io.File; diff --git a/qio/src/org/qcmg/unused/reader/Reader.java b/qio/src/org/qcmg/reader/Reader.java similarity index 87% rename from qio/src/org/qcmg/unused/reader/Reader.java rename to qio/src/org/qcmg/reader/Reader.java index 5f7a95a30..97849294e 100644 --- a/qio/src/org/qcmg/unused/reader/Reader.java +++ b/qio/src/org/qcmg/reader/Reader.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.unused.reader; +package org.qcmg.reader; import java.io.Closeable; diff --git a/qio/src/org/qcmg/unused/simple/SimpleFileReader.java b/qio/src/org/qcmg/simple/SimpleFileReader.java similarity index 87% rename from qio/src/org/qcmg/unused/simple/SimpleFileReader.java rename to qio/src/org/qcmg/simple/SimpleFileReader.java index f7bb2981c..0cc6e889e 100644 --- a/qio/src/org/qcmg/unused/simple/SimpleFileReader.java +++ b/qio/src/org/qcmg/simple/SimpleFileReader.java @@ -3,13 +3,13 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.simple; +package org.qcmg.simple; import java.io.File; import java.io.IOException; import org.qcmg.qmule.record.AbstractRecordIterator; -import org.qcmg.unused.reader.AbstractReader; +import org.qcmg.reader.AbstractReader; @Deprecated public class SimpleFileReader extends AbstractReader { diff --git a/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java b/qio/src/org/qcmg/simple/SimpleRecordIterator.java similarity index 95% rename from qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java rename to qio/src/org/qcmg/simple/SimpleRecordIterator.java index 941bfcf4a..4a74fd476 100644 --- a/qio/src/org/qcmg/unused/simple/SimpleRecordIterator.java +++ b/qio/src/org/qcmg/simple/SimpleRecordIterator.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.simple; +package org.qcmg.simple; import java.io.InputStream; diff --git a/qio/src/org/qcmg/unused/simple/SimpleSerializer.java b/qio/src/org/qcmg/simple/SimpleSerializer.java similarity index 98% rename from qio/src/org/qcmg/unused/simple/SimpleSerializer.java rename to qio/src/org/qcmg/simple/SimpleSerializer.java index 3b49a5131..e3d2596eb 100644 --- a/qio/src/org/qcmg/unused/simple/SimpleSerializer.java +++ b/qio/src/org/qcmg/simple/SimpleSerializer.java @@ -3,7 +3,7 @@ * @deprecated since it is no longer used. */ -package org.qcmg.unused.simple; +package org.qcmg.simple; import java.io.BufferedReader; import java.io.IOException; diff --git a/qio/test/org/qcmg/unused/simple/SimpleSerializerTest.java b/qio/test/org/qcmg/simple/SimpleSerializerTest.java similarity index 98% rename from qio/test/org/qcmg/unused/simple/SimpleSerializerTest.java rename to qio/test/org/qcmg/simple/SimpleSerializerTest.java index a697b0da3..151f39514 100644 --- a/qio/test/org/qcmg/unused/simple/SimpleSerializerTest.java +++ b/qio/test/org/qcmg/simple/SimpleSerializerTest.java @@ -1,8 +1,9 @@ -package org.qcmg.unused.simple; +package org.qcmg.simple; import org.junit.Assert; import org.junit.Test; import org.qcmg.qmule.record.SimpleRecord; +import org.qcmg.simple.SimpleSerializer; public class SimpleSerializerTest { From 95df4f7cff74afca3e243b37c2c66a6b2f6a6f6f Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Mon, 30 Nov 2020 12:39:21 +1000 Subject: [PATCH 42/73] mv SimpleSerializerTest to qio/FastaRecordTest --- .../SimpleSerializerTest.java => qio/fasta/FastaRecordTest.java} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename qio/test/org/qcmg/{simple/SimpleSerializerTest.java => qio/fasta/FastaRecordTest.java} (100%) diff --git a/qio/test/org/qcmg/simple/SimpleSerializerTest.java b/qio/test/org/qcmg/qio/fasta/FastaRecordTest.java similarity index 100% rename from qio/test/org/qcmg/simple/SimpleSerializerTest.java rename to qio/test/org/qcmg/qio/fasta/FastaRecordTest.java From 74f691493caec247293bd6c1f52c2f58838bf087 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Mon, 30 Nov 2020 12:39:37 +1000 Subject: [PATCH 43/73] mv SimpleSerializerTest to qio/FastaRecordTest --- .../org/qcmg/qio/fasta/FastaRecordTest.java | 74 +++++++------------ 1 file changed, 27 insertions(+), 47 deletions(-) diff --git a/qio/test/org/qcmg/qio/fasta/FastaRecordTest.java b/qio/test/org/qcmg/qio/fasta/FastaRecordTest.java index 151f39514..714259b57 100644 --- a/qio/test/org/qcmg/qio/fasta/FastaRecordTest.java +++ b/qio/test/org/qcmg/qio/fasta/FastaRecordTest.java @@ -1,35 +1,25 @@ -package org.qcmg.simple; +package org.qcmg.qio.fasta; import org.junit.Assert; import org.junit.Test; -import org.qcmg.qmule.record.SimpleRecord; -import org.qcmg.simple.SimpleSerializer; +import org.qcmg.qio.fasta.FastaRecord; - -public class SimpleSerializerTest { +public class FastaRecordTest { @Test public void testParseIDInvalid() throws Exception { // test empty string try { - SimpleSerializer.parseID(""); + (new FastaRecord()).setId(""); Assert.fail("Should have thrown an Exception"); } catch (Exception e) { Assert.assertEquals("Bad id format: ", e.getMessage()); } - - // test null -// try { -// SimpleSerializer.parseID(null); -// Assert.fail("Should have thrown an Exception"); -// } catch (Exception e) { -// Assert.assertEquals("Bad id format", e.getMessage()); -// } - + // string containing anything other than '>' String testString = "this is an id $%^&*()"; try { - SimpleSerializer.parseID(testString); + (new FastaRecord()).setId(testString); Assert.fail("Should have thrown an Exception"); } catch (Exception e) { Assert.assertEquals("Bad id format: " + testString, e.getMessage()); @@ -38,7 +28,8 @@ public void testParseIDInvalid() throws Exception { // string containing '>' but not at the start.. testString = "this is an > id"; try { - SimpleSerializer.parseID(testString); + + (new FastaRecord()).setId(testString); Assert.fail("Should have thrown an Exception"); } catch (Exception e) { Assert.assertEquals("Bad id format: " + testString, e.getMessage()); @@ -47,21 +38,27 @@ public void testParseIDInvalid() throws Exception { @Test public void testParseID() throws Exception { + FastaRecord frecord = new FastaRecord(); String returnedID; - - returnedID = SimpleSerializer.parseID(">123"); + + frecord.setId(">123"); + returnedID = frecord.getId(); Assert.assertNotNull(returnedID); Assert.assertEquals(">123", returnedID); - returnedID = SimpleSerializer.parseID(">"); + + frecord.setId(">"); + returnedID = frecord.getId(); Assert.assertNotNull(returnedID); Assert.assertEquals(">", returnedID); - returnedID = SimpleSerializer.parseID(">123_456_789"); + frecord.setId(">123_456_789"); + returnedID = frecord.getId(); Assert.assertNotNull(returnedID); Assert.assertEquals(">123_456_789", returnedID); - returnedID = SimpleSerializer.parseID(">>>"); + frecord.setId(">>>"); + returnedID = frecord.getId(); Assert.assertNotNull(returnedID); Assert.assertEquals(">>>", returnedID); @@ -70,29 +67,10 @@ public void testParseID() throws Exception { @Test public void testParseSequenceInvalid() throws Exception { // test null -// try { -// SimpleSerializer.parseSequence(null); -// Assert.fail("Should have thrown an exception"); -// } catch (Exception e) { -// Assert.assertEquals("Bad sequence format", e.getMessage()); -// } -// // test empty string -// try { -// SimpleSerializer.parseSequence(""); -// Assert.fail("Should have thrown an Exception"); -// } catch (Exception e) { -// Assert.assertEquals("Bad sequence format", e.getMessage()); -// } -// try { -// SimpleSerializer.parseSequence(" "); -// Assert.fail("Should have thrown an Exception"); -// } catch (Exception e) { -// Assert.assertEquals("Bad sequence format", e.getMessage()); -// } // test id string String testString = ">123"; try { - SimpleSerializer.parseSequence(testString); + (new FastaRecord()).setData(testString); Assert.fail("Should have thrown an Exception"); } catch (Exception e) { Assert.assertEquals("Bad sequence format: " + testString, e.getMessage()); @@ -101,22 +79,24 @@ public void testParseSequenceInvalid() throws Exception { @Test public void testParseSequence() throws Exception { + FastaRecord frecord = new FastaRecord(); - String returnedSequence = SimpleSerializer.parseSequence("G0103200103201032001033001032001032001032001032001"); + frecord.setData("G0103200103201032001033001032001032001032001032001"); + String returnedSequence = frecord.getData(); Assert.assertNotNull(returnedSequence); - Assert.assertEquals("G0103200103201032001033001032001032001032001032001", returnedSequence); + Assert.assertEquals("G0103200103201032001033001032001032001032001032001", returnedSequence); - returnedSequence = SimpleSerializer.parseSequence("31,30,32,26,26,26,23,24,29,31,31,23,25,18,14,20,18,11,27,22,18,23,2,18,29,20,25,11,19,18"); + frecord.setData("31,30,32,26,26,26,23,24,29,31,31,23,25,18,14,20,18,11,27,22,18,23,2,18,29,20,25,11,19,18"); + returnedSequence = frecord.getData(); Assert.assertNotNull(returnedSequence); Assert.assertEquals("31,30,32,26,26,26,23,24,29,31,31,23,25,18,14,20,18,11,27,22,18,23,2,18,29,20,25,11,19,18", returnedSequence); } @Test public void testParseRecords() throws Exception { - SimpleRecord record; + FastaRecord record = new FastaRecord(">123", "G0103200103201032001033001032001032001032001032001"); // real record - record = SimpleSerializer.parseRecord(">123", "G0103200103201032001033001032001032001032001032001"); Assert.assertNotNull(record); Assert.assertEquals(">123", record.getId()); Assert.assertEquals("G0103200103201032001033001032001032001032001032001", record.getData()); From 7b9b7060249ed5b0c303053a8523e9acf2357068 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Mon, 30 Nov 2020 22:03:13 +1000 Subject: [PATCH 44/73] replace qio::pileup::PileupRecord and PileupFileReader with new qio::StringFileReader --- qio/src/org/qcmg/pileup/PileupFileReader.java | 47 ---------------- qio/src/org/qcmg/pileup/PileupRecord.java | 16 ------ .../org/qcmg/pileup/PileupRecordIterator.java | 56 ------------------- qio/src/org/qcmg/pileup/PileupSerializer.java | 53 ------------------ .../{ => qmule}/pileup/QPileupFileReader.java | 0 .../pileup/QPileupRecordIterator.java | 0 .../{ => qmule}/pileup/QPileupSerializer.java | 0 7 files changed, 172 deletions(-) delete mode 100644 qio/src/org/qcmg/pileup/PileupFileReader.java delete mode 100644 qio/src/org/qcmg/pileup/PileupRecord.java delete mode 100644 qio/src/org/qcmg/pileup/PileupRecordIterator.java delete mode 100644 qio/src/org/qcmg/pileup/PileupSerializer.java rename qio/src/org/qcmg/{ => qmule}/pileup/QPileupFileReader.java (100%) rename qio/src/org/qcmg/{ => qmule}/pileup/QPileupRecordIterator.java (100%) rename qio/src/org/qcmg/{ => qmule}/pileup/QPileupSerializer.java (100%) diff --git a/qio/src/org/qcmg/pileup/PileupFileReader.java b/qio/src/org/qcmg/pileup/PileupFileReader.java deleted file mode 100644 index c3e31e5cb..000000000 --- a/qio/src/org/qcmg/pileup/PileupFileReader.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.pileup; - - -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; -import java.util.zip.GZIPInputStream; -import org.qcmg.common.util.FileUtils; - - -public final class PileupFileReader implements Closeable, Iterable { -// public final class PileupFileReader implements Closeable, Iterable { - private final File file; - private final InputStream inputStream; - - public PileupFileReader(final File file) throws Exception { - this.file = file; - boolean isGzip = FileUtils.isInputGZip( file); - inputStream = (isGzip) ? new GZIPInputStream(new FileInputStream(file)) : new FileInputStream(file); - - } - - @Override - public Iterator iterator() { - return getRecordIterator(); - } - - - public PileupRecordIterator getRecordIterator() { - return new PileupRecordIterator(inputStream); - } - - @Override - public void close() throws IOException { - inputStream.close(); - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/pileup/PileupRecord.java b/qio/src/org/qcmg/pileup/PileupRecord.java deleted file mode 100644 index b81c06795..000000000 --- a/qio/src/org/qcmg/pileup/PileupRecord.java +++ /dev/null @@ -1,16 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.pileup; - - -public class PileupRecord { - private String pileup; - - public String getPileup() { - return pileup; - } - public void setPileup(String pileup) { - this.pileup = pileup; - } -} diff --git a/qio/src/org/qcmg/pileup/PileupRecordIterator.java b/qio/src/org/qcmg/pileup/PileupRecordIterator.java deleted file mode 100644 index 95ad2b155..000000000 --- a/qio/src/org/qcmg/pileup/PileupRecordIterator.java +++ /dev/null @@ -1,56 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.pileup; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.NoSuchElementException; - -public final class PileupRecordIterator implements Iterator { -// public final class PileupRecordIterator implements Iterator { - private final BufferedReader reader; - private String next; -// private PileupRecord next; - - public PileupRecordIterator(final InputStream stream) { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - readNext(); - } - - public boolean hasNext() { - return null != next; - } - - public String next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - String result = next; - readNext(); - return result; - } -// public PileupRecord next() { -// if (!hasNext()) { -// throw new NoSuchElementException(); -// } -// PileupRecord result = next; -// readNext(); -// return result; -// } - - private void readNext() { - try { - next = PileupSerializer.nextRecord(reader); - } catch (Exception ex) { - throw new RuntimeException(ex.getMessage(), ex); - } - } - - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/qio/src/org/qcmg/pileup/PileupSerializer.java b/qio/src/org/qcmg/pileup/PileupSerializer.java deleted file mode 100644 index 56fdbcf0b..000000000 --- a/qio/src/org/qcmg/pileup/PileupSerializer.java +++ /dev/null @@ -1,53 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.pileup; - -import java.io.BufferedReader; -import java.io.IOException; - -public final class PileupSerializer { -// private static final Pattern tabbedPattern = Pattern.compile("[\\t]"); - private static final String DEFAULT_HEADER_PREFIX = "#"; - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith(DEFAULT_HEADER_PREFIX)) { - line = reader.readLine(); - } - return line; - } - - public static String nextRecord(final BufferedReader reader) - throws Exception { -// PileupRecord result = null; - String line = nextNonheaderLine(reader); -// if (null != line) { -// result = parseRecord(line); -// } - return line; - } -// public static PileupRecord nextRecord(final BufferedReader reader) -// throws IOException , Exception { -// PileupRecord result = null; -// String line = nextNonheaderLine(reader); -// if (null != line) { -// result = parseRecord(line); -// } -// return result; -// } - - static PileupRecord parseRecord(final String line) throws Exception { -// String[] params = tabbedPattern.split(line, -1); -// if (4 > params.length) { -// throw new Exception("Bad Pileup format. Insufficient columns: '" + line + "'"); -// } - PileupRecord result = new PileupRecord(); -// result.setChromosome(params[0]); -// result.setPosition(Integer.parseInt(params[1])); -// result.setRef(params[2].charAt(0)); - result.setPileup(line); - return result; - } -} diff --git a/qio/src/org/qcmg/pileup/QPileupFileReader.java b/qio/src/org/qcmg/qmule/pileup/QPileupFileReader.java similarity index 100% rename from qio/src/org/qcmg/pileup/QPileupFileReader.java rename to qio/src/org/qcmg/qmule/pileup/QPileupFileReader.java diff --git a/qio/src/org/qcmg/pileup/QPileupRecordIterator.java b/qio/src/org/qcmg/qmule/pileup/QPileupRecordIterator.java similarity index 100% rename from qio/src/org/qcmg/pileup/QPileupRecordIterator.java rename to qio/src/org/qcmg/qmule/pileup/QPileupRecordIterator.java diff --git a/qio/src/org/qcmg/pileup/QPileupSerializer.java b/qio/src/org/qcmg/qmule/pileup/QPileupSerializer.java similarity index 100% rename from qio/src/org/qcmg/pileup/QPileupSerializer.java rename to qio/src/org/qcmg/qmule/pileup/QPileupSerializer.java From 4e8c56ff4e10a357f260ec492c34f63f7076a730 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Mon, 30 Nov 2020 22:04:24 +1000 Subject: [PATCH 45/73] mv QPileupFilerReader to qio::qmule, it is only used by qmule --- qio/src/org/qcmg/qmule/pileup/QPileupFileReader.java | 4 +++- qio/src/org/qcmg/qmule/pileup/QPileupRecordIterator.java | 4 +++- qio/src/org/qcmg/qmule/pileup/QPileupSerializer.java | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/qio/src/org/qcmg/qmule/pileup/QPileupFileReader.java b/qio/src/org/qcmg/qmule/pileup/QPileupFileReader.java index db9f40d7b..1ded6549c 100644 --- a/qio/src/org/qcmg/qmule/pileup/QPileupFileReader.java +++ b/qio/src/org/qcmg/qmule/pileup/QPileupFileReader.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.pileup; +package org.qcmg.qmule.pileup; import java.io.Closeable; import java.io.File; @@ -10,6 +10,8 @@ import java.io.InputStream; import java.util.Iterator; +import org.qcmg.pileup.QSnpRecord; + public final class QPileupFileReader implements Closeable, Iterable { private final File file; private final InputStream inputStream; diff --git a/qio/src/org/qcmg/qmule/pileup/QPileupRecordIterator.java b/qio/src/org/qcmg/qmule/pileup/QPileupRecordIterator.java index e930b2f4f..ecb75ed02 100644 --- a/qio/src/org/qcmg/qmule/pileup/QPileupRecordIterator.java +++ b/qio/src/org/qcmg/qmule/pileup/QPileupRecordIterator.java @@ -1,7 +1,7 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.pileup; +package org.qcmg.qmule.pileup; import java.io.BufferedReader; import java.io.InputStream; @@ -9,6 +9,8 @@ import java.util.Iterator; import java.util.NoSuchElementException; +import org.qcmg.pileup.QSnpRecord; + public final class QPileupRecordIterator implements Iterator { private final BufferedReader reader; private QSnpRecord next; diff --git a/qio/src/org/qcmg/qmule/pileup/QPileupSerializer.java b/qio/src/org/qcmg/qmule/pileup/QPileupSerializer.java index 337c1ee94..d8419a61a 100644 --- a/qio/src/org/qcmg/qmule/pileup/QPileupSerializer.java +++ b/qio/src/org/qcmg/qmule/pileup/QPileupSerializer.java @@ -4,7 +4,7 @@ * * This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.pileup; +package org.qcmg.qmule.pileup; import java.io.BufferedReader; import java.io.IOException; @@ -12,6 +12,7 @@ import org.qcmg.common.model.Classification; import org.qcmg.common.util.BaseUtils; +import org.qcmg.pileup.QSnpRecord; import org.qcmg.common.model.Classification; public final class QPileupSerializer { From 79f328e5849cd4d23beb4d3e4d3937f30fb23714 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Mon, 30 Nov 2020 22:05:11 +1000 Subject: [PATCH 46/73] tidy up QSnpRecord --- qio/src/org/qcmg/pileup/QSnpRecord.java | 153 +----------------------- 1 file changed, 2 insertions(+), 151 deletions(-) diff --git a/qio/src/org/qcmg/pileup/QSnpRecord.java b/qio/src/org/qcmg/pileup/QSnpRecord.java index 12c81aaa7..a63b83e26 100644 --- a/qio/src/org/qcmg/pileup/QSnpRecord.java +++ b/qio/src/org/qcmg/pileup/QSnpRecord.java @@ -15,27 +15,16 @@ @Deprecated public class QSnpRecord { - - + private final VcfRecord vcf; private String alt; -// private String pileup; private GenotypeEnum controlGenotype; private GenotypeEnum testGenotype; private Accumulator control; private Accumulator test; -// private int normalCount; -// private int tumourCount; private int normalNovelStartCount; private int tumourNovelStartCount; private Classification classification; -// private String mutation; -// private String unfilteredNormalPileup; -// private Double probablility; -// private String normalNucleotides; -// private String tumourNucleotides; -// private String normalOABS; -// private String tumourOABS; private String flankingSequence; public QSnpRecord(VcfRecord vcf) { @@ -44,12 +33,8 @@ public QSnpRecord(VcfRecord vcf) { public QSnpRecord(String chr, int position, String ref) { this(chr, position, ref, null); -// this.control = control; -// this.test = test; } public QSnpRecord(String chr, int position, String ref, String alt) { -// int length = StringUtils.isNullOrEmpty(ref) ? 1 : ref.length(); - //vcf = VcfUtils.createVcfRecord(new ChrPosition(chr, position, (position + length) -1), null, ref, alt); vcf = new VcfRecord.Builder(chr, position, ref).allele(alt).build(); } @@ -73,12 +58,6 @@ public String getChromosome() { public int getPosition() { return vcf.getPosition(); } -// public String getPileup() { -// return pileup; -// } -// public void setPileup(String pileup) { -// this.pileup = pileup; -// } public GenotypeEnum getNormalGenotype() { return controlGenotype; } @@ -91,139 +70,17 @@ public GenotypeEnum getTumourGenotype() { public void setTumourGenotype(GenotypeEnum tumourGenotype) { this.testGenotype = tumourGenotype; } -// public void setNormalCount(int normalCount) { -// this.normalCount = normalCount; -// } -// public int getNormalCount() { -// return normalCount; -// } public void setClassification(Classification classification) { this.classification = classification; } public Classification getClassification() { return classification; } -// public void setTumourCount(int tumourCount) { -// this.tumourCount = tumourCount; -// } -// public int getTumourCount() { -// return tumourCount; -// } public String getAnnotation() { return vcf.getFilter(); } -// public void setMutation(String mutation) { -// this.mutation = mutation; -// } -// public String getMutation() { -// return mutation; -// } - -// public void setNormalPileup(String normalPileup) { -// this.normalPileup = normalPileup; -// } -// public String getNormalPileup() { -// return normalPileup; -// } - -// public String getFormattedString() { -// return pileup + TAB -// + (null != controlGenotype ? controlGenotype.getDisplayString() : "") + TAB -// + (null != testGenotype ? testGenotype.getDisplayString() : "") + TAB -// + classification + TAB -// + (null != mutation ? mutation : "") + TAB -// + (StringUtils.isNullOrEmpty(vcf.getFilter()) ? "" : vcf.getFilter()) + TAB; -// } - -// public String getGATKFormattedString() { -// return vcf.getChromosome() + TAB -// + vcf.getPosition() + TAB -// + vcf.getRef() + TAB -// + (null != controlGenotype ? controlGenotype.getDisplayString() : "") + TAB -// + (null != testGenotype ? testGenotype.getDisplayString() : "") + TAB -// + classification + TAB -// + (null != mutation ? mutation : "") + TAB -// + (StringUtils.isNullOrEmpty(vcf.getFilter()) ? "" : vcf.getFilter()) + TAB -// + (StringUtils.isNullOrEmpty(normalNucleotides) ? "--" : normalNucleotides) + TAB -// + (StringUtils.isNullOrEmpty(tumourNucleotides) ? "--" : tumourNucleotides); -// } - -// public String getDCCData(final String mutationId, final String chr) { -// StringBuilder sb = new StringBuilder(); -// sb.append(mutationId).append(TAB); -// sb.append("1").append(TAB); -// sb.append(chr).append(TAB); -// sb.append( vcf.getPosition()).append(TAB); -// sb.append( vcf.getPosition()).append(TAB); -// sb.append(1).append(TAB); -// sb.append(vcf.getRef()).append(TAB); -// sb.append(null != controlGenotype ? controlGenotype.getDisplayString() : "--").append(TAB); -// sb.append(null != testGenotype ? testGenotype.getDisplayString() : "--").append(TAB); -// if (Classification.GERMLINE != classification) { -// sb.append(null != mutation ? mutation : "").append(TAB); -// } -// sb.append("-999").append(TAB); // expressed_allele -// sb.append("-999").append(TAB); // quality_score -// sb.append(null != probablility ? probablility.toString() : "-999").append(TAB); // probability -// sb.append(Classification.GERMLINE != classification ? tumourCount : normalCount).append(TAB); -// sb.append(StringUtils.isNullOrEmpty(vcf.getFilter()) ? "--" : vcf.getFilter()).append(TAB); -// sb.append(StringUtils.isNullOrEmpty(normalNucleotides) ? "--" : normalNucleotides).append(TAB); -// sb.append(StringUtils.isNullOrEmpty(tumourNucleotides) ? "--" : tumourNucleotides); -// -// return sb.toString(); -// } - -// public String getDCCDataNS(final String mutationId, final String chr) { -// StringBuilder sb = new StringBuilder(getDCCData(mutationId, chr)); -// sb.append(TAB).append(getNovelStartCount()); -// return sb.toString(); -// } -// public String getDCCDataNSFlankingSeq(final String mutationId, final String chr) { -// StringBuilder sb = new StringBuilder(getDCCDataNS(mutationId, chr)); -// sb.append(TAB).append(null != getFlankingSequence() ? getFlankingSequence() :"--"); -// return sb.toString(); -// } - -// public String getGermlineDBData() { -// return vcf.getPosition() + TAB -// + vcf.getPosition() + TAB -// + 1 + TAB // strand - always set to 1 ??? -// + vcf.getRef() + TAB -// + (null != controlGenotype ? controlGenotype.getDisplayString() : "") + TAB -// + (null != testGenotype ? testGenotype.getDisplayString() : "") + TAB -// + "-999\t" // quality_score -// + "-999\t" // probability -// + (normalCount + tumourCount) + TAB; // read count -// } - -// public String getNormalNucleotides() { -// return normalNucleotides; -// } -// public void setNormalNucleotides(String normalNucleotides) { -// this.normalNucleotides = normalNucleotides; -// } -// public String getTumourNucleotides() { -// return tumourNucleotides; -// } -// public void setTumourNucleotides(String tumourNucleotides) { -// this.tumourNucleotides = tumourNucleotides; -// } -// public void setTumourOABS(String tumourOABS) { -// this.tumourOABS = tumourOABS; -// } -// public void setNormalOABS(String normalOABS) { -// this.normalOABS = normalOABS; -// } -// public String getNormalOABS() { -// return control != null ? control.getObservedAllelesByStrand() : null; -// } -// public String getTumourOABS() { -// return test != null ? test.getObservedAllelesByStrand() : null; -// } -// public void setUnfilteredNormalPileup(String unfilteredNormalPileup) { -// this.unfilteredNormalPileup = unfilteredNormalPileup; -// } + public String getUnfilteredNormalPileup() { return null != control ? control.getFailedFilterPileup() : null; } @@ -248,12 +105,6 @@ public void setFlankingSequence(String flankingSequence) { public String getFlankingSequence() { return flankingSequence; } -// public double getProbability() { -// return probablility; -// } -// public void setProbability(double probability) { -// this.probablility = Double.valueOf(probability); -// } public VcfRecord getVcfRecord() { return vcf; From 2c78c8c933ab117e7dd4ebf168e27e832594e089 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Mon, 30 Nov 2020 22:17:34 +1000 Subject: [PATCH 47/73] move QSnpRecord from qio:pileup to qcommon::model --- .../src/org/qcmg/common/model}/QSnpRecord.java | 7 +------ .../test/org/qcmg/common/model}/QSnpRecordTest.java | 0 2 files changed, 1 insertion(+), 6 deletions(-) rename {qio/src/org/qcmg/pileup => qcommon/src/org/qcmg/common/model}/QSnpRecord.java (92%) rename {qio/test/org/qcmg/pileup => qcommon/test/org/qcmg/common/model}/QSnpRecordTest.java (100%) diff --git a/qio/src/org/qcmg/pileup/QSnpRecord.java b/qcommon/src/org/qcmg/common/model/QSnpRecord.java similarity index 92% rename from qio/src/org/qcmg/pileup/QSnpRecord.java rename to qcommon/src/org/qcmg/common/model/QSnpRecord.java index a63b83e26..637c82192 100644 --- a/qio/src/org/qcmg/pileup/QSnpRecord.java +++ b/qcommon/src/org/qcmg/common/model/QSnpRecord.java @@ -4,13 +4,9 @@ * * This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.pileup; +package org.qcmg.common.model; -import org.qcmg.common.model.Accumulator; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.Classification; -import org.qcmg.common.model.GenotypeEnum; import org.qcmg.common.vcf.VcfRecord; @Deprecated @@ -21,7 +17,6 @@ public class QSnpRecord { private GenotypeEnum controlGenotype; private GenotypeEnum testGenotype; private Accumulator control; - private Accumulator test; private int normalNovelStartCount; private int tumourNovelStartCount; private Classification classification; diff --git a/qio/test/org/qcmg/pileup/QSnpRecordTest.java b/qcommon/test/org/qcmg/common/model/QSnpRecordTest.java similarity index 100% rename from qio/test/org/qcmg/pileup/QSnpRecordTest.java rename to qcommon/test/org/qcmg/common/model/QSnpRecordTest.java From 6ee343ad9a5857bc8a91edefed6a3644a072ea45 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Mon, 30 Nov 2020 22:18:57 +1000 Subject: [PATCH 48/73] update qmumle due to relocation of qio::QSnpRecord --- .../src/org/qcmg/qmule/TranscriptomeMule.java | 5 +- .../src/org/qcmg/qmule/WiggleFromPileup.java | 5 +- .../qcmg/qmule/WiggleFromPileupTakeTwo.java | 5 +- .../qmule/snppicker/ExamineVerifiedSnps.java | 4 +- .../qcmg/qmule/snppicker/GatkUniqueSnps.java | 2 +- .../org/qcmg/qmule/snppicker/SnpPicker.java | 29 +++---- .../org/qcmg/qmule/snppicker/UniqueQSnps.java | 77 +++++++++---------- .../qmule/snppicker/GatkUniqueSnpsTest.java | 2 +- 8 files changed, 60 insertions(+), 69 deletions(-) diff --git a/qmule/src/org/qcmg/qmule/TranscriptomeMule.java b/qmule/src/org/qcmg/qmule/TranscriptomeMule.java index c9b4f95f5..39007e0bf 100644 --- a/qmule/src/org/qcmg/qmule/TranscriptomeMule.java +++ b/qmule/src/org/qcmg/qmule/TranscriptomeMule.java @@ -21,7 +21,8 @@ import org.qcmg.common.util.PileupUtils; import org.qcmg.common.util.TabTokenizer; import org.qcmg.picard.util.PileupElementUtil; -import org.qcmg.pileup.PileupFileReader; +//import org.qcmg.pileup.PileupFileReader; +import org.qcmg.qio.record.StringFileReader; public class TranscriptomeMule { @@ -119,7 +120,7 @@ private void parsePileup(String record) { } private void walkPileup(String pileupFileName) throws Exception { - PileupFileReader reader = new PileupFileReader(new File(pileupFileName)); + StringFileReader reader = new StringFileReader(new File(pileupFileName)); int count = 0; try { for (String record : reader) { diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileup.java b/qmule/src/org/qcmg/qmule/WiggleFromPileup.java index 222727290..cd30ca363 100644 --- a/qmule/src/org/qcmg/qmule/WiggleFromPileup.java +++ b/qmule/src/org/qcmg/qmule/WiggleFromPileup.java @@ -24,7 +24,8 @@ import org.qcmg.qmule.gff3.GFF3FileReader; import org.qcmg.qmule.gff3.GFF3Record; import org.qcmg.qmule.gff3.GFF3RecordChromosomeAndPositionComparator; -import org.qcmg.pileup.PileupFileReader; +//import org.qcmg.pileup.PileupFileReader; +import org.qcmg.qio.record.StringFileReader; public class WiggleFromPileup { @@ -119,7 +120,7 @@ private void parsePileup() throws Exception { throw new RuntimeException("Unable to set next Gff record"); } - PileupFileReader reader = new PileupFileReader(new File(cmdLineInputFiles[0])); + StringFileReader reader = new StringFileReader(new File(cmdLineInputFiles[0])); StringBuilder sb = new StringBuilder(); try { for (String pr : reader) { diff --git a/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java b/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java index 36c6a7a8f..39d0fa560 100644 --- a/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java +++ b/qmule/src/org/qcmg/qmule/WiggleFromPileupTakeTwo.java @@ -24,7 +24,8 @@ import org.qcmg.common.util.TabTokenizer; import org.qcmg.qmule.gff3.GFF3FileReader; import org.qcmg.qmule.gff3.GFF3Record; -import org.qcmg.pileup.PileupFileReader; +//import org.qcmg.pileup.PileupFileReader; +import org.qcmg.qio.record.StringFileReader; public class WiggleFromPileupTakeTwo { @@ -136,7 +137,7 @@ private void initialise() { private void parsePileup() throws Exception { Writer writer = getWriter(cmdLineOutputFiles[0]); - PileupFileReader reader = new PileupFileReader(new File(cmdLineInputFiles[0])); + StringFileReader reader = new StringFileReader(new File(cmdLineInputFiles[0])); StringBuilder sb = new StringBuilder(); try { for (String pr : reader) { diff --git a/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java b/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java index 1f914b876..7c79935da 100644 --- a/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java +++ b/qmule/src/org/qcmg/qmule/snppicker/ExamineVerifiedSnps.java @@ -16,11 +16,11 @@ import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.model.ChrPointPosition; import org.qcmg.common.model.ChrPosition; +import org.qcmg.common.model.QSnpRecord; import org.qcmg.common.util.FileUtils; import org.qcmg.common.vcf.VcfRecord; import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.pileup.QPileupFileReader; -import org.qcmg.pileup.QSnpRecord; +import org.qcmg.qmule.pileup.QPileupFileReader; import org.qcmg.qmule.pileup.VerifiedSnpFileReader; import org.qcmg.qmule.pileup.VerifiedSnpRecord; import org.qcmg.vcf.VCFFileReader; diff --git a/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java b/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java index 88023e383..0f98356b1 100644 --- a/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java +++ b/qmule/src/org/qcmg/qmule/snppicker/GatkUniqueSnps.java @@ -27,6 +27,7 @@ import org.qcmg.common.model.Classification; import org.qcmg.common.model.GenotypeEnum; import org.qcmg.common.model.QSnpGATKRecord; +import org.qcmg.common.model.QSnpRecord; import org.qcmg.common.util.BaseUtils; import org.qcmg.common.util.Constants; import org.qcmg.common.util.FileUtils; @@ -36,7 +37,6 @@ import org.qcmg.qmule.germlinedb.GermlineDBFileReader; import org.qcmg.qmule.germlinedb.GermlineDBRecord; import org.qcmg.picard.QJumper; -import org.qcmg.pileup.QSnpRecord; import org.qcmg.qmule.Messages; import org.qcmg.qmule.Options; import org.qcmg.qmule.Options.Ids; diff --git a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java index 4596407f3..258155470 100644 --- a/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java +++ b/qmule/src/org/qcmg/qmule/snppicker/SnpPicker.java @@ -33,13 +33,14 @@ import org.qcmg.qmule.gff3.GFF3FileReader; import org.qcmg.qmule.gff3.GFF3Record; import org.qcmg.picard.QJumper; -import org.qcmg.pileup.PileupFileReader; +//import org.qcmg.pileup.PileupFileReader; import org.qcmg.qmule.Messages; import org.qcmg.qmule.Options; import org.qcmg.qmule.QMuleException; import org.qcmg.qmule.record.Record; import org.qcmg.qio.illumina.IlluminaFileReader; import org.qcmg.qio.illumina.IlluminaRecord; +import org.qcmg.qio.record.StringFileReader; import org.qcmg.vcf.VCFFileReader; public class SnpPicker { @@ -442,24 +443,15 @@ private void loadVCFData() { private void loadQSnpData() { String qSnpFile = cmdLineInputFiles[2]; - PileupFileReader reader = null; - try { - reader = new PileupFileReader(new File(qSnpFile)); - } catch (Exception e) { - logger.error("Error caught whilst trying to instantiate PileupFileReader", e); - exitStatus = -1; - } - - if (null != reader) { + + try(StringFileReader reader = new StringFileReader(new File(qSnpFile))) { int vcfCount = 0; ChrPosition id; VariantRecord value; for (String rec : reader) { -// for (PileupRecord rec : reader) { // got some work to do here - need to split the pileup attribute to construct the object String [] params = TabTokenizer.tokenize(rec); -// String [] params = tabbedPattern.split(rec.getPileup(), -1); // skip if the tumour genotype is null String genotype = params[params.length-(isNormal ? 2 : 1)]; @@ -473,19 +465,18 @@ private void loadQSnpData() { variantMap.put(id, value); } value.setVcfRef(params[2].charAt(0)); - // value.setVcfAlt(rec.getAlt()); value.setVcfGenotype(genotype); vcfCount++; } } logger.info("there were " + vcfCount + " records in the qsnp file"); - try { - reader.close(); - } catch (IOException e) { - logger.error("IOException caught whilst trying to close PileupFileReader", e); - exitStatus = -1; - } + + + } catch (IOException e) { + logger.error("Error caught whilst trying to instantiate PileupFileReader", e); + exitStatus = -1; } + } private void loadGff3Data() { diff --git a/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java b/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java index 7e6275fe1..3f092e9e4 100644 --- a/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java +++ b/qmule/src/org/qcmg/qmule/snppicker/UniqueQSnps.java @@ -19,7 +19,7 @@ import org.qcmg.common.model.ChrRangePosition; import org.qcmg.common.util.FileUtils; import org.qcmg.common.util.TabTokenizer; -import org.qcmg.pileup.PileupFileReader; +import org.qcmg.qio.record.StringFileReader; public class UniqueQSnps { @@ -141,59 +141,56 @@ private static void examineVCFs(String outputFile) throws IOException { private static void loadQPileup(String pileupFile) throws Exception { if (FileUtils.canFileBeRead(pileupFile)) { - PileupFileReader reader = new PileupFileReader(new File(pileupFile)); - for (String pr : reader) { -// for (PileupRecord pr : reader) { - String [] params = TabTokenizer.tokenize(pr); -// String [] params = tabbedPattern.split(pr.getPileup()); - String chrPosition = params[params.length-2]; -// logger.info("chrPosition: " + chrPosition); - //TODO refactor to use StringUtils.getChrPositionFromString() - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - - qSnpPileup.put(chrPos,pr); + try( StringFileReader reader = new StringFileReader(new File(pileupFile));){ + for (String pr : reader) { + String [] params = TabTokenizer.tokenize(pr); + String chrPosition = params[params.length-2]; + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + + qSnpPileup.put(chrPos,pr); + } + reader.close(); } - reader.close(); } } private static void loadGatkData(String pileupFile) throws Exception { if (FileUtils.canFileBeRead(pileupFile)) { - PileupFileReader reader = new PileupFileReader(new File(pileupFile)); - for (String pr : reader) { -// for (PileupRecord pr : reader) { - String [] params = TabTokenizer.tokenize(pr); -// String [] params = tabbedPattern.split(pr.getPileup()); - String chrPosition = params[params.length-2]; -// logger.info("chrPosition: " + chrPosition); - //TODO refactor to use StringUtils.getChrPositionFromString() - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - - gatkVcfs.put(chrPos,pr); + try( StringFileReader reader = new StringFileReader(new File(pileupFile));){ + for (String pr : reader) { + // for (PileupRecord pr : reader) { + String [] params = TabTokenizer.tokenize(pr); + // String [] params = tabbedPattern.split(pr.getPileup()); + String chrPosition = params[params.length-2]; + // logger.info("chrPosition: " + chrPosition); + //TODO refactor to use StringUtils.getChrPositionFromString() + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + + gatkVcfs.put(chrPos,pr); + } } - reader.close(); } } private static void loadVerifiedSnps(String verifiedSnpFile) throws Exception { if (FileUtils.canFileBeRead(verifiedSnpFile)) { - PileupFileReader reader = new PileupFileReader(new File(verifiedSnpFile)); - for (String pr : reader) { -// for (PileupRecord pr : reader) { - String [] params = TabTokenizer.tokenize(pr); -// String [] params = tabbedPattern.split(pr.getPileup()); - String chrPosition = params[2]; -// logger.info("chrPosition: " + chrPosition); - //TODO refactor to use StringUtils.getChrPositionFromString() - int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); - ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); - - verifiedSNPs.put(chrPos,pr); + try( StringFileReader reader = new StringFileReader(new File(verifiedSnpFile));){ + for (String pr : reader) { + // for (PileupRecord pr : reader) { + String [] params = TabTokenizer.tokenize(pr); + // String [] params = tabbedPattern.split(pr.getPileup()); + String chrPosition = params[2]; + // logger.info("chrPosition: " + chrPosition); + //TODO refactor to use StringUtils.getChrPositionFromString() + int start = Integer.parseInt(chrPosition.substring(chrPosition.indexOf("-"))); + ChrPosition chrPos = new ChrRangePosition(chrPosition.substring(0, chrPosition.indexOf(":")-1), start, start); + + verifiedSNPs.put(chrPos,pr); + } } - reader.close(); } } diff --git a/qmule/test/org/qcmg/qmule/snppicker/GatkUniqueSnpsTest.java b/qmule/test/org/qcmg/qmule/snppicker/GatkUniqueSnpsTest.java index 7e4b342e7..145b7cb7c 100644 --- a/qmule/test/org/qcmg/qmule/snppicker/GatkUniqueSnpsTest.java +++ b/qmule/test/org/qcmg/qmule/snppicker/GatkUniqueSnpsTest.java @@ -9,9 +9,9 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.qcmg.common.model.QSnpRecord; import org.qcmg.common.util.SnpUtils; import org.qcmg.common.vcf.header.VcfHeaderUtils; -import org.qcmg.pileup.QSnpRecord; public class GatkUniqueSnpsTest { From 7b6cddd3c3ed9704ad4b742a5d13f69505559802 Mon Sep 17 00:00:00 2001 From: christix Date: Mon, 30 Nov 2020 22:26:35 +1000 Subject: [PATCH 49/73] update qio::qmule.QPileup* due to QSnpRecord relocation --- qio/src/org/qcmg/qmule/pileup/QPileupFileReader.java | 2 +- qio/src/org/qcmg/qmule/pileup/QPileupRecordIterator.java | 2 +- qio/src/org/qcmg/qmule/pileup/QPileupSerializer.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/qio/src/org/qcmg/qmule/pileup/QPileupFileReader.java b/qio/src/org/qcmg/qmule/pileup/QPileupFileReader.java index 1ded6549c..080a5c342 100644 --- a/qio/src/org/qcmg/qmule/pileup/QPileupFileReader.java +++ b/qio/src/org/qcmg/qmule/pileup/QPileupFileReader.java @@ -10,7 +10,7 @@ import java.io.InputStream; import java.util.Iterator; -import org.qcmg.pileup.QSnpRecord; +import org.qcmg.common.model.QSnpRecord; public final class QPileupFileReader implements Closeable, Iterable { private final File file; diff --git a/qio/src/org/qcmg/qmule/pileup/QPileupRecordIterator.java b/qio/src/org/qcmg/qmule/pileup/QPileupRecordIterator.java index ecb75ed02..4085ec5ab 100644 --- a/qio/src/org/qcmg/qmule/pileup/QPileupRecordIterator.java +++ b/qio/src/org/qcmg/qmule/pileup/QPileupRecordIterator.java @@ -9,7 +9,7 @@ import java.util.Iterator; import java.util.NoSuchElementException; -import org.qcmg.pileup.QSnpRecord; +import org.qcmg.common.model.QSnpRecord; public final class QPileupRecordIterator implements Iterator { private final BufferedReader reader; diff --git a/qio/src/org/qcmg/qmule/pileup/QPileupSerializer.java b/qio/src/org/qcmg/qmule/pileup/QPileupSerializer.java index d8419a61a..81835526d 100644 --- a/qio/src/org/qcmg/qmule/pileup/QPileupSerializer.java +++ b/qio/src/org/qcmg/qmule/pileup/QPileupSerializer.java @@ -11,8 +11,8 @@ import java.util.regex.Pattern; import org.qcmg.common.model.Classification; +import org.qcmg.common.model.QSnpRecord; import org.qcmg.common.util.BaseUtils; -import org.qcmg.pileup.QSnpRecord; import org.qcmg.common.model.Classification; public final class QPileupSerializer { From 8db04bf13a47ebeb82b6ba7cfe20c49467dd36bf Mon Sep 17 00:00:00 2001 From: christix Date: Mon, 30 Nov 2020 22:33:58 +1000 Subject: [PATCH 50/73] just tidy up qsnp --- qsnp/src/org/qcmg/snp/PileupPipeline.java | 15 +- qsnp/src/org/qcmg/snp/TorrentPipeline.java | 288 +----------------- .../org/qcmg/snp/TorrentPipelineTest.java | 11 +- 3 files changed, 10 insertions(+), 304 deletions(-) diff --git a/qsnp/src/org/qcmg/snp/PileupPipeline.java b/qsnp/src/org/qcmg/snp/PileupPipeline.java index cda920d34..39681f55b 100644 --- a/qsnp/src/org/qcmg/snp/PileupPipeline.java +++ b/qsnp/src/org/qcmg/snp/PileupPipeline.java @@ -20,9 +20,8 @@ import org.qcmg.common.meta.QExec; import org.qcmg.common.model.ChrPosition; import org.qcmg.common.string.StringUtils; -import org.qcmg.pileup.PileupFileReader; -import org.qcmg.pileup.QSnpRecord; -import org.qcmg.common.model.Classification; +import org.qcmg.common.model.QSnpRecord; +import org.qcmg.qio.record.StringFileReader; import org.qcmg.snp.util.IniFileUtil; import org.qcmg.snp.util.QJumperWorker; import org.qcmg.snp.util.QJumperWorker.Mode; @@ -166,17 +165,13 @@ void ingestIni(Ini ini) throws SnpException { // } private void walkPileup(String pileupFileName) throws Exception { - PileupFileReader reader = new PileupFileReader(new File(pileupFileName)); - long count = 0; - try { + try( StringFileReader reader = new StringFileReader(new File(pileupFileName));){ + long count = 0; for (String record : reader) { -// parsePileup(record); if (++count % 1000000 == 0) logger.info("hit " + count + " pileup records, with " + mutationId + " keepers."); } - } finally { - reader.close(); - } + } } } diff --git a/qsnp/src/org/qcmg/snp/TorrentPipeline.java b/qsnp/src/org/qcmg/snp/TorrentPipeline.java index 0451c903f..a2a77f339 100644 --- a/qsnp/src/org/qcmg/snp/TorrentPipeline.java +++ b/qsnp/src/org/qcmg/snp/TorrentPipeline.java @@ -6,51 +6,18 @@ */ package org.qcmg.snp; -import htsjdk.samtools.SAMRecord; - import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; import org.ini4j.Ini; import org.qcmg.common.log.QLogger; import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.meta.QExec; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.PileupElement; -import org.qcmg.common.model.Rule; -import org.qcmg.common.util.Pair; -import org.qcmg.picard.QJumper; -import org.qcmg.picard.util.PileupElementUtil; -import org.qcmg.picard.util.SAMUtils; -import org.qcmg.pileup.QSnpRecord; -import org.qcmg.snp.filters.AdjacentIndelFilter; -import org.qcmg.snp.filters.EndOfReadFilter; -import org.qcmg.snp.filters.MultipleAdjacentSnpsFilter; -import org.qcmg.snp.util.HeaderUtil; import org.qcmg.snp.util.IniFileUtil; -import org.qcmg.snp.util.RulesUtil; /** */ @Deprecated -public final class TorrentPipeline extends Pipeline { - - //input Files -// private String pileupFile; -// private String unfilteredNormalBamFile; - - - private final Map, List>> filteredPEs = - new HashMap<>(); - - private final Map filteredInfo = new HashMap(); - +public final class TorrentPipeline extends Pipeline { final static QLogger logger = QLoggerFactory.getLogger(TorrentPipeline.class); /** @@ -70,237 +37,14 @@ public TorrentPipeline(final Ini iniFile, QExec qexec, boolean singleSample) thr walkBams(); -// if (positionRecordMap.isEmpty()) throw new SnpException("EMPTY_PILEUP_FILE"); logger.info("Finished walking bams"); -// incorporateUnfilteredNormal(); -// -// strandBiasCorrection(); -// compoundSnps(); - - - // run torrent filter against suspected snp positions -// runTorrentFilters(normalBams, true); -// runTorrentFilters(tumourBams, false); - - // re-run initial rules -// reValidateSnpPositions(); - - // time for post-processing - // remove any previous annotations as they may no longer be valid -// for (QSnpRecord record : positionRecordMap.values()) { -// record.setAnnotation(null); -// } -// classifyPileup(); -// -// checkForMutationInNormal(); - + // write output writeVCF(vcfFile); } - private void reValidateSnpPositions() { - -// int removedCount = 0, totalCount = 0; -// -// for (Entry, List>> entry : filteredPEs.entrySet()) { -// boolean remove = true; -// totalCount++; -// -// List normalPileupElements = entry.getValue().getLeft(); -// List tumourPileupElements = entry.getValue().getRight(); -// ChrPosition cp = entry.getKey(); -// -// // get reference -// QSnpRecord qsr = positionRecordMap.get(cp); -// String refString = qsr.getRef(); -// if (refString.length() > 1) { -// logger.warn("refString: " + refString + " at TorrentPipeline.reValidateSnpPositions"); -// } -// char ref = refString.charAt(0); -// -// // get variant count for both -// int normalVariantCount = PileupElementUtil.getLargestVariantCount(normalPileupElements, ref); -// int tumourVariantCount = PileupElementUtil.getLargestVariantCount(tumourPileupElements, ref); -// -// int normalBaseCounts = PileupElementUtil.getCoverageFromPileupList(normalPileupElements); -// int tumourlBaseCounts = PileupElementUtil.getCoverageFromPileupList(tumourPileupElements); -// // get rule for normal and tumour -// Rule normalRule = RulesUtil.getRule(controlRules, normalBaseCounts); -// Rule tumourRule = RulesUtil.getRule(testRules, tumourlBaseCounts); -// if (cp.getChromosome().equals("chr21") && cp.getStartPosition() == 46334140) { -// logger.info("normalVariantCount : " + normalVariantCount); -// logger.info("tumourVariantCount : " + tumourVariantCount); -// logger.info("normalBaseCounts : " + normalBaseCounts); -// logger.info("tumourlBaseCounts : " + tumourlBaseCounts); -// logger.info("normalRule : " + normalRule); -// logger.info("tumourRule : " + tumourRule); -// logger.info("normalPileupElements : " + PileupElementUtil.getPileupFromPileupList(normalPileupElements)); -// logger.info("tumourPileupElements : " + PileupElementUtil.getPileupFromPileupList(tumourPileupElements)); -// } -// -// // only keep record if it has enough variants -// if (isPileupRecordAKeeper(normalVariantCount, normalBaseCounts, normalRule, normalPileupElements, baseQualityPercentage) -// || isPileupRecordAKeeper(tumourVariantCount, tumourlBaseCounts, tumourRule, tumourPileupElements, baseQualityPercentage)) { -// remove = false; -// -// // if we have an entry in the filteringINfo map for this position, we have modified values for either ND or TD (or both) -// // update qsr record -// String info = filteredInfo.get(cp); -// if (null != info) { -// // do we need to update ND -// if (info.contains("ND")) { -//// qsr.setNormalNucleotides(PileupElementUtil.getPileupElementString(normalPileupElements, ref)); -// } -// if (info.contains("TD")) { -//// qsr.setTumourNucleotides(PileupElementUtil.getPileupElementString(tumourPileupElements, ref)); -// } -// } -// } -// -// if (remove) { -// // remove item from positions map -// positionRecordMap.remove(entry.getKey()); -// removedCount++; -// } -// } -// logger.info("removed: " + removedCount + ", from " + totalCount); - } - - private void runTorrentFilters(String [] bamFiles, boolean normal) throws Exception { - - QJumper qj = new QJumper(); - qj.setupReader(bamFiles); - - -// for (QSnpRecord rec : positionRecordMap.values()) { -// -// char alt = '\u0000'; -// String refString = rec.getRef(); -// if (refString.length() > 1) { -// logger.warn("refString: " + refString + " at TorrentPipeline.runTorrentFilters"); -// } -// char ref = refString.charAt(0); -// List sams = qj.getOverlappingRecordsAtPosition(rec.getChromosome(), rec.getPosition()); -// -// // get list of pileupelements from ND/TD fields -//// String pileupsString = normal ? rec.getNormalNucleotides() : rec.getTumourNucleotides(); -//// List pileups = PileupElementUtil.createPileupElementsFromString(pileupsString); -// -// -//// if (null == pileups || pileups.size() == 0) { -//// logger.info("no (empty??)" + (normal ? "ND" : "TD") + " field found for rec: " + rec.toString()); -//// continue; -//// } else if (pileups.size() == 1) { -//// char base = pileups.get(0).getBase(); -//// if (ref == base) { -//// continue; -//// } else { -//// alt = base; -//// } -//// } else { -//// // sort collection so that largest count is first -//// Collections.sort(pileups); -//// // get largest variant - set alt to this value -//// alt = PileupElementUtil.getLargestVariant(pileups, ref).getBase(); -//// -//// } -// -// MultipleAdjacentSnpsFilter multipleSnpsFilter = new MultipleAdjacentSnpsFilter(rec.getPosition()); -// AdjacentIndelFilter adjIndelFilter = new AdjacentIndelFilter(rec.getPosition()); -// EndOfReadFilter endOfReadFilter = new EndOfReadFilter(5, rec.getPosition()); -// -// List passingRecords = new ArrayList(); -// -// StringBuilder bases = new StringBuilder(); -// StringBuilder qualities = new StringBuilder(); -// int passedFilterCount = 0; -// int variantCount = 0; -// int deletionCount = 0; -// int i = 0; -// int tripleSnpCount = 0, adjIndelCount = 0, endOfReadCount = 0; -// -// for (SAMRecord sam : sams) { -// -// -// //ONLY FILTER READS THAT CARRY THE ALT ALLELE -// int readPosition = SAMUtils.getIndexInReadFromPosition(sam, rec.getPosition()); -// if (readPosition > -1) { -// byte base = sam.getReadBases()[readPosition]; -// -// // run through filters if we have the alt -// if (base == alt) { -// // go through each of the filters in turn -// if (adjIndelFilter.filterOut(sam)) { -// adjIndelCount++; -// if (rec.getChromosome().equals("chr1") && rec.getPosition() == 27101507) -// logger.info("adj indel filter fail : " + sam.getSAMString()); -// continue; -// } -// if (multipleSnpsFilter.filterOut(sam)) { -// tripleSnpCount++; -// if (rec.getChromosome().equals("chr1") && rec.getPosition() == 27101507) -// logger.info("triple snp filter fail : " + sam.getSAMString()); -// continue; -// } -// if (endOfReadFilter.filterOut(sam)) { -// endOfReadCount++; -// if (rec.getChromosome().equals("chr1") && rec.getPosition() == 27101507) -// logger.info("eor filter fail : " + sam.getSAMString()); -// continue; -// } -// } -// -// passedFilterCount++; -// passingRecords.add(sam); -// -// if (base == alt) { -// variantCount++; -// } -// // set base - use strand info to set case forward strand is upper case, reverse is lower -// bases.append(sam.getReadNegativeStrandFlag() ? Character.toLowerCase((char)base) : (char)base); -// qualities.append(htsjdk.samtools.SAMUtils.phredToFastq(sam.getBaseQualities()[readPosition])); -// } else { -// deletionCount++; -// } -// } -// ChrPosition cp = ChrPointPosition.valueOf(rec.getChromosome(), rec.getPosition()); -// -// if (adjIndelCount + tripleSnpCount + endOfReadCount > 0) { -// // add entry to filteredINfo map -// String info = filteredInfo.get(cp); -// if (null == info) { -// info = (normal ? "ND" : "TD") + (adjIndelCount > 0 ? ":" + adjIndelCount + "AI" : "") -// + (tripleSnpCount > 0 ? ":" + tripleSnpCount + "TS" : "") -// + (endOfReadCount > 0 ? ":" + endOfReadCount + "ER" : ""); -// } else { -// info += (normal ? ";ND" : ";TD") + (adjIndelCount > 0 ? ":" + adjIndelCount + "AI" : "") -// + (tripleSnpCount > 0 ? ":" + tripleSnpCount + "TS" : "") -// + (endOfReadCount > 0 ? ":" + endOfReadCount + "ER" : ""); -// } -// filteredInfo.put(cp, info); -// } -// -// if (bases.length() != qualities.length()) { -// logger.info("bases : " + bases.toString() + ", qual: " + qualities.toString()); -// } -// if (rec.getChromosome().equals("chr21") && rec.getPosition() == 46334140) { -// logger.info("bases : " + bases.toString() + ", qual: " + qualities.toString() + ", ref: " + ref + ", alt: " + alt); -// } -// -// List baseCounts = PileupElementUtil.getPileupCounts(bases.toString(), qualities.toString()); -// -// Pair, List> p = filteredPEs.get(cp); -// if (null == p) { -// p = normal ? new Pair(baseCounts, null) : new Pair(null, baseCounts); -// filteredPEs.put(cp, p); -// } else { -// p = normal ? new Pair(baseCounts, p.getRight()) : new Pair(p.getLeft(), baseCounts); -// filteredPEs.put(cp, p); -// } -// } - } @Override void ingestIni(Ini ini) throws SnpException { @@ -311,23 +55,10 @@ void ingestIni(Ini ini) throws SnpException { controlRules = IniFileUtil.getRules(ini, "control"); testRules = IniFileUtil.getRules(ini, "test"); initialTestSumOfCountsLimit = IniFileUtil.getLowestRuleValue(ini); - - - // ADDITIONAL INPUT FILES -// pileupFile = IniFileUtil.getInputFile(ini, "pileup"); - - // ADDITIONAL SETUP -// noOfControlFiles = IniFileUtil.getNumberOfFiles(ini, 'N'); -// noOfTestFiles = IniFileUtil.getNumberOfFiles(ini, 'T'); - + // INCLUDE INDELS includeIndels = true; - - // log values retrieved from ini file -// logger.tool("**** ADDITIONAL INPUT FILES ****"); -// logger.tool("pileupFile: " + pileupFile); -// logger.tool("unfilteredNormalBamFile: " + unfilteredNormalBamFile); - + logger.tool("**** OTHER CONFIG ****"); logger.tool("No of control rules: " + controlRules.size()); logger.tool("No of test rules: " + testRules.size()); @@ -337,15 +68,4 @@ void ingestIni(Ini ini) throws SnpException { logger.tool("mutationIdPrefix: " + mutationIdPrefix); } -// @Override -// public String getOutputHeader(boolean isSomatic) { -// if (isSomatic) return HeaderUtil.DCC_SOMATIC_HEADER; -// else return HeaderUtil.DCC_GERMLINE_HEADER; -// } -// -// @Override -// public String getFormattedRecord(QSnpRecord record, final String ensemblChr) { -// return record.getDCCDataNSFlankingSeq(mutationIdPrefix, ensemblChr); -// } - } diff --git a/qsnp/test/org/qcmg/snp/TorrentPipelineTest.java b/qsnp/test/org/qcmg/snp/TorrentPipelineTest.java index 2d92cbdd3..7547e9bcd 100644 --- a/qsnp/test/org/qcmg/snp/TorrentPipelineTest.java +++ b/qsnp/test/org/qcmg/snp/TorrentPipelineTest.java @@ -1,16 +1,7 @@ package org.qcmg.snp; -import java.io.IOException; - -import junit.framework.Assert; - -import org.junit.Test; -import org.qcmg.common.model.ChrPointPosition; -import org.qcmg.common.model.ChrPosition; -import org.qcmg.common.model.GenotypeEnum; -import org.qcmg.common.util.SnpUtils; -import org.qcmg.pileup.QSnpRecord; +@Deprecated public class TorrentPipelineTest { From 8e5b6a1f62d4a55b2d4959493fbd342bd866e6ca Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Mon, 30 Nov 2020 22:35:57 +1000 Subject: [PATCH 51/73] mv unit test qio::QSnpRecord to qcommon::model --- qcommon/test/org/qcmg/common/model/QSnpRecordTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qcommon/test/org/qcmg/common/model/QSnpRecordTest.java b/qcommon/test/org/qcmg/common/model/QSnpRecordTest.java index b91100edd..92a5e6032 100644 --- a/qcommon/test/org/qcmg/common/model/QSnpRecordTest.java +++ b/qcommon/test/org/qcmg/common/model/QSnpRecordTest.java @@ -1,8 +1,9 @@ -package org.qcmg.pileup; +package org.qcmg.common.model; import static org.junit.Assert.assertEquals; import org.junit.Test; +import org.qcmg.common.model.QSnpRecord; import org.qcmg.common.util.SnpUtils; public class QSnpRecordTest { From 96dae065e75e9b62bf9e99e8e316a49b15f9a701 Mon Sep 17 00:00:00 2001 From: christix Date: Mon, 30 Nov 2020 23:01:36 +1000 Subject: [PATCH 52/73] update qsnp due to QSnpRecord relocation --- qsnp/src/org/qcmg/snp/MuTectPipeline.java | 2 +- qsnp/src/org/qcmg/snp/util/BAMPileupUtil.java | 3 ++- qsnp/src/org/qcmg/snp/util/QJumperWorker.java | 1 - qsnp/test/org/qcmg/snp/util/BAMPileupUtilTest.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/qsnp/src/org/qcmg/snp/MuTectPipeline.java b/qsnp/src/org/qcmg/snp/MuTectPipeline.java index c40aaa18f..caa1e7b00 100644 --- a/qsnp/src/org/qcmg/snp/MuTectPipeline.java +++ b/qsnp/src/org/qcmg/snp/MuTectPipeline.java @@ -31,12 +31,12 @@ import org.qcmg.common.model.ChrPointPosition; import org.qcmg.common.model.ChrPosition; import org.qcmg.common.model.GenotypeEnum; +import org.qcmg.common.model.QSnpRecord; import org.qcmg.common.model.ReferenceNameComparator; import org.qcmg.common.util.TabTokenizer; import org.qcmg.common.vcf.VcfRecord; import org.qcmg.picard.SAMFileReaderFactory; import org.qcmg.picard.util.SAMUtils; -import org.qcmg.pileup.QSnpRecord; import org.qcmg.common.model.Classification; import org.qcmg.snp.util.IniFileUtil; import org.qcmg.qio.record.StringFileReader; diff --git a/qsnp/src/org/qcmg/snp/util/BAMPileupUtil.java b/qsnp/src/org/qcmg/snp/util/BAMPileupUtil.java index 813a5cb88..4c26da7e2 100644 --- a/qsnp/src/org/qcmg/snp/util/BAMPileupUtil.java +++ b/qsnp/src/org/qcmg/snp/util/BAMPileupUtil.java @@ -16,9 +16,10 @@ import org.qcmg.common.log.QLogger; import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.model.QSnpGATKRecord; +import org.qcmg.common.model.QSnpRecord; import org.qcmg.picard.util.PileupElementUtil; import org.qcmg.picard.util.SAMUtils; -import org.qcmg.pileup.QSnpRecord; + import org.qcmg.snp.util.QJumperWorker.Mode; public class BAMPileupUtil { diff --git a/qsnp/src/org/qcmg/snp/util/QJumperWorker.java b/qsnp/src/org/qcmg/snp/util/QJumperWorker.java index 6af2298c0..1353636cc 100644 --- a/qsnp/src/org/qcmg/snp/util/QJumperWorker.java +++ b/qsnp/src/org/qcmg/snp/util/QJumperWorker.java @@ -14,7 +14,6 @@ import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.model.QSnpGATKRecord; import org.qcmg.picard.QJumper; -import org.qcmg.pileup.QSnpRecord; public class QJumperWorker implements Runnable { diff --git a/qsnp/test/org/qcmg/snp/util/BAMPileupUtilTest.java b/qsnp/test/org/qcmg/snp/util/BAMPileupUtilTest.java index e524b9500..208efd516 100644 --- a/qsnp/test/org/qcmg/snp/util/BAMPileupUtilTest.java +++ b/qsnp/test/org/qcmg/snp/util/BAMPileupUtilTest.java @@ -12,7 +12,7 @@ import org.qcmg.common.util.SnpUtils; import org.qcmg.common.vcf.VcfRecord; import org.qcmg.common.vcf.VcfUtils; -import org.qcmg.pileup.QSnpRecord; +import org.qcmg.common.model.QSnpRecord; import org.qcmg.snp.util.QJumperWorker.Mode; public class BAMPileupUtilTest { From dc63f6f51b377c9ad4a3684c1153be23522ed48e Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 2 Dec 2020 12:56:40 +1000 Subject: [PATCH 53/73] update qio::ma package by using extends qio::Record::RecordReader --- .../src/org/qcmg/qbamannotate/Annotator.java | 12 +- .../org/qcmg/qbamannotate/AnnotatorType.java | 6 +- .../src/org/qcmg/qbamannotate/Frag.java | 6 +- .../org/qcmg/qbamannotate/LongMatePair.java | 15 +- .../src/org/qcmg/qbamannotate/PairedEnd.java | 14 +- qio/src/org/qcmg/ma/MAFileReader.java | 55 ------- qio/src/org/qcmg/ma/MAMapping.java | 44 ----- qio/src/org/qcmg/{ => qio}/ma/MAHeader.java | 0 .../{ => qio}/ma/MAMappingParameters.java | 0 .../qcmg/{ => qio}/ma/MARecordIterator.java | 0 .../org/qcmg/{ => qio}/ma/MASerializer.java | 0 .../MADefLine.java => qio/ma/MaDefLine.java} | 22 ++- .../ma/MaDirection.java} | 11 +- qio/src/org/qcmg/qio/ma/MaFileReader.java | 98 +++++++++++ qio/src/org/qcmg/qio/ma/MaMapping.java | 73 +++++++++ .../MARecord.java => qio/ma/MaRecord.java} | 10 +- qio/src/org/qcmg/qio/record/RecordReader.java | 1 - qio/test/org/qcmg/ma/MAMappingTest.java | 35 ---- qio/test/org/qcmg/ma/MASerializerTest.java | 113 ------------- qio/test/org/qcmg/qio/gff/GffReaderTest.java | 2 +- .../org/qcmg/{ => qio}/ma/MADefLineTest.java | 21 +-- .../qcmg/{ => qio}/ma/MADirectionTest.java | 29 ++-- .../org/qcmg/{ => qio}/ma/MAHeaderTest.java | 0 .../{ => qio}/ma/MAMappingParametersTest.java | 0 qio/test/org/qcmg/qio/ma/MAMappingTest.java | 24 +++ .../org/qcmg/{ => qio}/ma/MARecordTest.java | 22 ++- .../org/qcmg/qio/ma/MaFileReaderTest.java | 153 ++++++++++++++++++ 27 files changed, 421 insertions(+), 345 deletions(-) delete mode 100644 qio/src/org/qcmg/ma/MAFileReader.java delete mode 100644 qio/src/org/qcmg/ma/MAMapping.java rename qio/src/org/qcmg/{ => qio}/ma/MAHeader.java (100%) rename qio/src/org/qcmg/{ => qio}/ma/MAMappingParameters.java (100%) rename qio/src/org/qcmg/{ => qio}/ma/MARecordIterator.java (100%) rename qio/src/org/qcmg/{ => qio}/ma/MASerializer.java (100%) rename qio/src/org/qcmg/{ma/MADefLine.java => qio/ma/MaDefLine.java} (56%) rename qio/src/org/qcmg/{ma/MADirection.java => qio/ma/MaDirection.java} (68%) create mode 100644 qio/src/org/qcmg/qio/ma/MaFileReader.java create mode 100644 qio/src/org/qcmg/qio/ma/MaMapping.java rename qio/src/org/qcmg/{ma/MARecord.java => qio/ma/MaRecord.java} (70%) delete mode 100644 qio/test/org/qcmg/ma/MAMappingTest.java delete mode 100644 qio/test/org/qcmg/ma/MASerializerTest.java rename qio/test/org/qcmg/{ => qio}/ma/MADefLineTest.java (61%) rename qio/test/org/qcmg/{ => qio}/ma/MADirectionTest.java (62%) rename qio/test/org/qcmg/{ => qio}/ma/MAHeaderTest.java (100%) rename qio/test/org/qcmg/{ => qio}/ma/MAMappingParametersTest.java (100%) create mode 100644 qio/test/org/qcmg/qio/ma/MAMappingTest.java rename qio/test/org/qcmg/{ => qio}/ma/MARecordTest.java (52%) create mode 100644 qio/test/org/qcmg/qio/ma/MaFileReaderTest.java diff --git a/qbamannotate/src/org/qcmg/qbamannotate/Annotator.java b/qbamannotate/src/org/qcmg/qbamannotate/Annotator.java index 2c0ee7219..d793107f0 100644 --- a/qbamannotate/src/org/qcmg/qbamannotate/Annotator.java +++ b/qbamannotate/src/org/qcmg/qbamannotate/Annotator.java @@ -15,8 +15,8 @@ import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMRecord; -import org.qcmg.ma.MAFileReader; -import org.qcmg.ma.MARecord; +import org.qcmg.ma.MaFileReader; +import org.qcmg.ma.MaRecord; import org.qcmg.picard.HeaderUtils; import org.qcmg.picard.SAMFileReaderFactory; @@ -31,10 +31,10 @@ public final class Annotator { private SAMFileWriter fileWriter; private SAMFileHeader header; private SamReader samFileReader; - private MAFileReader maFileReader; + private MaFileReader maFileReader; private Iterator samIterator; - private Iterator maIterator; - private MARecord nextMARecord; + private Iterator maIterator; + private MaRecord nextMARecord; private SAMRecord nextSAMRecord; private AnnotatorType type = new Frag(); @@ -140,7 +140,7 @@ private void nextMARecord() { private void marchAndAnnotate() throws Exception { try { samFileReader = SAMFileReaderFactory.createSAMFileReader( inputBAMFile) ; //new SAMFileReader(inputBAMFile); - maFileReader = new MAFileReader(inputMAFile); + maFileReader = new MaFileReader(inputMAFile); header = samFileReader.getFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.unsorted); if (modifyProgramLine) { diff --git a/qbamannotate/src/org/qcmg/qbamannotate/AnnotatorType.java b/qbamannotate/src/org/qcmg/qbamannotate/AnnotatorType.java index 42ed39dee..b78c3bb08 100644 --- a/qbamannotate/src/org/qcmg/qbamannotate/AnnotatorType.java +++ b/qbamannotate/src/org/qcmg/qbamannotate/AnnotatorType.java @@ -8,7 +8,7 @@ import java.util.regex.Pattern; -import org.qcmg.ma.MARecord; +import org.qcmg.ma.MaRecord; import htsjdk.samtools.SAMRecord; @@ -19,7 +19,7 @@ public abstract class AnnotatorType { public abstract boolean annotate(final SAMRecord record) throws Exception; public abstract boolean annotate(final SAMRecord record, - final MARecord maRecord) throws Exception; + final MaRecord maRecord) throws Exception; public void markRecordUnmatched(final SAMRecord record) { Object zmObj = record.getAttribute("ZM"); @@ -47,7 +47,7 @@ static void setZMAttribute(final SAMRecord record, int n) { record.setAttribute("ZM", Integer.toString(n)); } - static int compareTriplet(final SAMRecord record, final MARecord maRecord) { + static int compareTriplet(final SAMRecord record, final MaRecord maRecord) { return compareTriplet(record.getReadName(), maRecord.getDefLine() .getReadName()); } diff --git a/qbamannotate/src/org/qcmg/qbamannotate/Frag.java b/qbamannotate/src/org/qcmg/qbamannotate/Frag.java index b3f86c392..acc22b39e 100644 --- a/qbamannotate/src/org/qcmg/qbamannotate/Frag.java +++ b/qbamannotate/src/org/qcmg/qbamannotate/Frag.java @@ -6,8 +6,8 @@ */ package org.qcmg.qbamannotate; -import org.qcmg.ma.MADirection; -import org.qcmg.ma.MARecord; +import org.qcmg.ma.MaDirection; +import org.qcmg.ma.MaRecord; import htsjdk.samtools.SAMRecord; @@ -19,7 +19,7 @@ public boolean annotate(final SAMRecord record) throws Exception { } @Override - public boolean annotate(SAMRecord record, MARecord maRecord) throws Exception { + public boolean annotate(SAMRecord record, MaRecord maRecord) throws Exception { int n = maRecord.getDefLine().getNumberMappings(); setZMAttribute(record, n); return annotate(record); diff --git a/qbamannotate/src/org/qcmg/qbamannotate/LongMatePair.java b/qbamannotate/src/org/qcmg/qbamannotate/LongMatePair.java index 431af5964..4255625fb 100644 --- a/qbamannotate/src/org/qcmg/qbamannotate/LongMatePair.java +++ b/qbamannotate/src/org/qcmg/qbamannotate/LongMatePair.java @@ -18,20 +18,21 @@ import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.stream.StreamSource; + +import org.qcmg.ma.MaDirection; +import org.qcmg.ma.MaRecord; + import javax.xml.transform.stream.StreamResult; import java.io.*; import htsjdk.samtools.SAMRecord; -import org.qcmg.ma.MADirection; -import org.qcmg.ma.MARecord; - public class LongMatePair extends AnnotatorType { final int isizeLowerLimit; final int isizeUpperLimit; private String zpAnnotation; private SAMRecord record; - private MARecord maRecord; + private MaRecord maRecord; private int annotatedCount = 0; private final Map zpToCount = new HashMap(); private String xmlReport; @@ -65,7 +66,7 @@ public boolean annotate(final SAMRecord record) throws Exception { } @Override - public boolean annotate(SAMRecord record, MARecord maRecord) throws Exception { + public boolean annotate(SAMRecord record, MaRecord maRecord) throws Exception { this.record = record; this.maRecord = maRecord; performZMAnnotation(); @@ -96,10 +97,10 @@ public String generateReport() throws Exception { private void performZMAnnotation() { assert 0 == compareTriplet(record, maRecord); - if (maRecord.getDefLine().getDirection() == MADirection.F3 + if (maRecord.getDefLine().getDirection() == MaDirection.F3 && record.getReadPairedFlag() && record.getFirstOfPairFlag()) { setZMAttribute(); - } else if (maRecord.getDefLine().getDirection() == MADirection.R3 + } else if (maRecord.getDefLine().getDirection() == MaDirection.R3 && record.getReadPairedFlag() && record.getSecondOfPairFlag()) { setZMAttribute(); } else { diff --git a/qbamannotate/src/org/qcmg/qbamannotate/PairedEnd.java b/qbamannotate/src/org/qcmg/qbamannotate/PairedEnd.java index 18bc66bd8..eb2fb9d05 100644 --- a/qbamannotate/src/org/qcmg/qbamannotate/PairedEnd.java +++ b/qbamannotate/src/org/qcmg/qbamannotate/PairedEnd.java @@ -14,17 +14,17 @@ import javax.xml.bind.JAXBContext; import javax.xml.bind.Marshaller; -import htsjdk.samtools.SAMRecord; +import org.qcmg.ma.MaDirection; +import org.qcmg.ma.MaRecord; -import org.qcmg.ma.MADirection; -import org.qcmg.ma.MARecord; +import htsjdk.samtools.SAMRecord; public class PairedEnd extends AnnotatorType { final int isizeLowerLimit; final int isizeUpperLimit; private String zpAnnotation; private SAMRecord record; - private MARecord maRecord; + private MaRecord maRecord; private int annotatedCount = 0; private final Map zpToCount = new HashMap(); private String xmlReport; @@ -57,7 +57,7 @@ public boolean annotate(final SAMRecord record) throws Exception { } @Override - public boolean annotate(SAMRecord record, MARecord maRecord) + public boolean annotate(SAMRecord record, MaRecord maRecord) throws Exception { this.record = record; this.maRecord = maRecord; @@ -90,10 +90,10 @@ public String generateReport() throws Exception { private void performZMAnnotation() { assert 0 == compareTriplet(record, maRecord); - if (maRecord.getDefLine().getDirection() == MADirection.F3 + if (maRecord.getDefLine().getDirection() == MaDirection.F3 && record.getReadPairedFlag() && record.getFirstOfPairFlag()) { setZMAttribute(); - } else if (maRecord.getDefLine().getDirection() == MADirection.F5 + } else if (maRecord.getDefLine().getDirection() == MaDirection.F5 && record.getReadPairedFlag() && record.getSecondOfPairFlag()) { setZMAttribute(); } else { diff --git a/qio/src/org/qcmg/ma/MAFileReader.java b/qio/src/org/qcmg/ma/MAFileReader.java deleted file mode 100644 index 36d165beb..000000000 --- a/qio/src/org/qcmg/ma/MAFileReader.java +++ /dev/null @@ -1,55 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.ma; - -import java.io.BufferedReader; -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; - -public final class MAFileReader implements Closeable, Iterable { - private final File file; - private final MAHeader header; - private final InputStream inputStream; - - public MAFileReader(final File file) throws Exception { - this.file = file; - - FileInputStream stream = new FileInputStream(file); - InputStreamReader streamReader = new InputStreamReader(stream); - BufferedReader in = new BufferedReader(streamReader); - - header = MASerializer.readHeader(in); - stream.close(); - - FileInputStream fileStream = new FileInputStream(file); - inputStream = fileStream; - } - - public MAHeader getHeader() { - return header; - } - - @Override - public Iterator iterator() { - return getRecordIterator(); - } - - public MARecordIterator getRecordIterator() { - return new MARecordIterator(inputStream); - } - - @Override - public void close() throws IOException { - inputStream.close(); - } - - public File getFile() { - return file; - } -} diff --git a/qio/src/org/qcmg/ma/MAMapping.java b/qio/src/org/qcmg/ma/MAMapping.java deleted file mode 100644 index d549ccd77..000000000 --- a/qio/src/org/qcmg/ma/MAMapping.java +++ /dev/null @@ -1,44 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.ma; - -public final class MAMapping { - private final String chromosome; - private final String location; - private final int mismatchCount; - private final MAMappingParameters parameters; - private final String quality; - - MAMapping(final String mappingChromosome, final String mappingLocation, - final int mappingMismatchCount, - final MAMappingParameters mappingParameters, - final String mappingQuality) { - chromosome = mappingChromosome; - location = mappingLocation; - mismatchCount = mappingMismatchCount; - parameters = mappingParameters; - quality = mappingQuality; - } - - public String getChromosome() { - return chromosome; - } - - public String getLocation() { - return location; - } - - public int getMismatchCount() { - return mismatchCount; - } - - public MAMappingParameters getParameters() { - return parameters; - } - - public String getQuality() { - return quality; - } - -} diff --git a/qio/src/org/qcmg/ma/MAHeader.java b/qio/src/org/qcmg/qio/ma/MAHeader.java similarity index 100% rename from qio/src/org/qcmg/ma/MAHeader.java rename to qio/src/org/qcmg/qio/ma/MAHeader.java diff --git a/qio/src/org/qcmg/ma/MAMappingParameters.java b/qio/src/org/qcmg/qio/ma/MAMappingParameters.java similarity index 100% rename from qio/src/org/qcmg/ma/MAMappingParameters.java rename to qio/src/org/qcmg/qio/ma/MAMappingParameters.java diff --git a/qio/src/org/qcmg/ma/MARecordIterator.java b/qio/src/org/qcmg/qio/ma/MARecordIterator.java similarity index 100% rename from qio/src/org/qcmg/ma/MARecordIterator.java rename to qio/src/org/qcmg/qio/ma/MARecordIterator.java diff --git a/qio/src/org/qcmg/ma/MASerializer.java b/qio/src/org/qcmg/qio/ma/MASerializer.java similarity index 100% rename from qio/src/org/qcmg/ma/MASerializer.java rename to qio/src/org/qcmg/qio/ma/MASerializer.java diff --git a/qio/src/org/qcmg/ma/MADefLine.java b/qio/src/org/qcmg/qio/ma/MaDefLine.java similarity index 56% rename from qio/src/org/qcmg/ma/MADefLine.java rename to qio/src/org/qcmg/qio/ma/MaDefLine.java index 642d25e06..2bb112a61 100644 --- a/qio/src/org/qcmg/ma/MADefLine.java +++ b/qio/src/org/qcmg/qio/ma/MaDefLine.java @@ -1,26 +1,24 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.ma; +package org.qcmg.qio.ma; import java.util.Iterator; import java.util.Vector; -public final class MADefLine { +public final class MaDefLine { private final String readName; - private final MADirection direction; - private final Vector mappings = new Vector(); + private final MaDirection direction; + private final Vector mappings = new Vector(); - public MADefLine(final String readName, final MADirection direction) - throws Exception { + public MaDefLine(final String readName, final MaDirection direction) { this.readName = readName; this.direction = direction; } - public MADefLine(final String readName, final MADirection direction, - final Vector mappings) throws Exception { + public MaDefLine(final String readName, final MaDirection direction, final Vector mappings) { this(readName, direction); - for (final MAMapping mapping : mappings) { + for (final MaMapping mapping : mappings) { this.mappings.add(mapping); } } @@ -29,7 +27,7 @@ public String getReadName() { return readName; } - public MADirection getDirection() { + public MaDirection getDirection() { return direction; } @@ -41,11 +39,11 @@ public int getNumberMappings() { return mappings.size(); } - public Iterator iterator() { + public Iterator iterator() { return getMappingIterator(); } - public Iterator getMappingIterator() { + public Iterator getMappingIterator() { return mappings.iterator(); } } diff --git a/qio/src/org/qcmg/ma/MADirection.java b/qio/src/org/qcmg/qio/ma/MaDirection.java similarity index 68% rename from qio/src/org/qcmg/ma/MADirection.java rename to qio/src/org/qcmg/qio/ma/MaDirection.java index ef15ebcf9..41d3dbb57 100644 --- a/qio/src/org/qcmg/ma/MADirection.java +++ b/qio/src/org/qcmg/qio/ma/MaDirection.java @@ -1,14 +1,13 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.ma; +package org.qcmg.qio.ma; -public enum MADirection { +public enum MaDirection { F3, R3, F5,F5_BC; - public static MADirection getDirection(String directionStr) - throws Exception { - MADirection result = null; + public static MaDirection getDirection(String directionStr) { + MaDirection result = null; if (0 == directionStr.compareTo("F3")) { result = F3; @@ -19,7 +18,7 @@ public static MADirection getDirection(String directionStr) } else if (0 == directionStr.compareTo("F5-BC")) { result = F5_BC; } else { - throw new Exception("Unknown direction type: " + directionStr); + throw new IllegalArgumentException("Unknown direction type: " + directionStr); } return result; } diff --git a/qio/src/org/qcmg/qio/ma/MaFileReader.java b/qio/src/org/qcmg/qio/ma/MaFileReader.java new file mode 100644 index 000000000..fca30a6e1 --- /dev/null +++ b/qio/src/org/qcmg/qio/ma/MaFileReader.java @@ -0,0 +1,98 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ + +package org.qcmg.qio.ma; + +import java.io.File; +import java.io.IOException; +import java.util.Vector; +import java.util.regex.Pattern; + +import org.qcmg.common.util.Constants; +import org.qcmg.qio.record.RecordReader; + +public final class MaFileReader extends RecordReader { + private static final String HEADER_PREFIX = Constants.HASH_STRING; + private static final Pattern mappingPattern = Pattern.compile("[_.:()]+"); + + public MaFileReader(File file) throws IOException { + super(file, HEADER_PREFIX); + } + + @Override + /** + * it has to read two line to construct one record + */ + public MaRecord getRecord(String line) { + + String defLine = line; + + //read sequence + try { + String sequence = bin.readLine(); + return new MaRecord(parseDefLine(defLine), sequence); + } catch (IOException e) { + e.printStackTrace(); + return null; + } + } + + MaDefLine parseDefLine(final String value) { + if (!value.startsWith(">")) { + throw new IllegalArgumentException("Missing \">\" prefix for defLine: " + value); + } + + String rawValue = value.substring(1); + + String[] params = rawValue.split(Constants.COMMA_STRING); + //commaDelimitedPattern.split(rawValue); + if (1 > params.length) { + throw new IllegalArgumentException("Bad defLine format: " + rawValue); + } + + String key = params[0]; + String[] indices = key.split("_"); + if (4 != indices.length) { + throw new IllegalArgumentException("Bad defLine ID: " + key); + } + + String panel = indices[0]; + String x = indices[1]; + String y = indices[2]; + String type = indices[3]; + + String readName = panel + "_" + x + "_" + y; + MaDirection direction = MaDirection.getDirection(type); + + Vector mappings = new Vector(); + for (int i = 1; i < params.length; i++) { + mappings.add(parseMapping(params[i])); + } + + return new MaDefLine(readName, direction, mappings); + } + + private MaMapping parseMapping(final String value) { + String[] params = mappingPattern.split( value ); //value.split("_.:()"); + + if (7 != params.length) { + throw new IllegalArgumentException("Bad mapping format: " + value); + } + + int length = Integer.parseInt(params[3].trim()); + int possibleMismatches = Integer.parseInt(params[4].trim()); + int seedStart = Integer.parseInt(params[5].trim()); + + String chromosome = params[0].trim(); + String location = params[1].trim(); + int mismatchCount = Integer.parseInt(params[2].trim()); + String quality = params[6].trim(); + + return new MaMapping(chromosome, location, mismatchCount, length, possibleMismatches, seedStart, quality); + } +} + + + + diff --git a/qio/src/org/qcmg/qio/ma/MaMapping.java b/qio/src/org/qcmg/qio/ma/MaMapping.java new file mode 100644 index 000000000..564799270 --- /dev/null +++ b/qio/src/org/qcmg/qio/ma/MaMapping.java @@ -0,0 +1,73 @@ +/** + * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. + */ +package org.qcmg.qio.ma; + +public final class MaMapping { + private final String chromosome; + private final String location; + private final int mismatchCount; +// private final MAMappingParameters parameters; + private final String quality; + + private final int length; + private final int possibleMismatches; + private final int seedStart; + + + + MaMapping(final String mappingChromosome, final String mappingLocation, + final int mappingMismatchCount, + // final MAMappingParameters mappingParameters, + int mappingLength, + int mappingPossibleMismatches, int mappingSeedStart, + + + final String mappingQuality) { + chromosome = mappingChromosome; + location = mappingLocation; + mismatchCount = mappingMismatchCount; + // parameters = mappingParameters; + + length = mappingLength; + possibleMismatches = mappingPossibleMismatches; + seedStart = mappingSeedStart; + + + quality = mappingQuality; + } + + public String getChromosome() { + return chromosome; + } + + public String getLocation() { + return location; + } + + public int getMismatchCount() { + return mismatchCount; + } + +// public MAMappingParameters getParameters() { +// return parameters; +// } + public int getLength() { + return length; + } + + public int getPossibleMismatches() { + return possibleMismatches; + } + + public int getSeedStart() { + return seedStart; + } + + + + public String getQuality() { + return quality; + } + +} diff --git a/qio/src/org/qcmg/ma/MARecord.java b/qio/src/org/qcmg/qio/ma/MaRecord.java similarity index 70% rename from qio/src/org/qcmg/ma/MARecord.java rename to qio/src/org/qcmg/qio/ma/MaRecord.java index 0581193f9..421c4b215 100644 --- a/qio/src/org/qcmg/ma/MARecord.java +++ b/qio/src/org/qcmg/qio/ma/MaRecord.java @@ -1,19 +1,19 @@ /** * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. */ -package org.qcmg.ma; +package org.qcmg.qio.ma; -public final class MARecord { - private final MADefLine defLine; +public final class MaRecord { + private final MaDefLine defLine; private final String readSequence; - public MARecord(final MADefLine recordDefLine, + public MaRecord(final MaDefLine recordDefLine, final String recordReadSequence) { defLine = recordDefLine; readSequence = recordReadSequence; } - public MADefLine getDefLine() { + public MaDefLine getDefLine() { return defLine; } diff --git a/qio/src/org/qcmg/qio/record/RecordReader.java b/qio/src/org/qcmg/qio/record/RecordReader.java index 8c169d279..b4c3be656 100644 --- a/qio/src/org/qcmg/qio/record/RecordReader.java +++ b/qio/src/org/qcmg/qio/record/RecordReader.java @@ -71,7 +71,6 @@ public RecordReader(final File file, int bufferSize, CharSequence headerPrefix, */ public String readHeaderAndReturnFirstNonHeaderLine(CharSequence headerPrefix ) throws IOException { - String nextLine = bin.readLine(); //keep empty header and return first nonHeaderline diff --git a/qio/test/org/qcmg/ma/MAMappingTest.java b/qio/test/org/qcmg/ma/MAMappingTest.java deleted file mode 100644 index 9f7732205..000000000 --- a/qio/test/org/qcmg/ma/MAMappingTest.java +++ /dev/null @@ -1,35 +0,0 @@ -package org.qcmg.ma; - -import static org.junit.Assert.assertTrue; - -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public class MAMappingTest -{ - @Before - public final void before() - { - } - - @After - public final void after() - { - } - - @Test - public final void createMAMapping() - throws Exception - { - MAMappingParameters mp = new MAMappingParameters(3,2,4); - MAMapping mm = new MAMapping("chromo1", "location1", 3, mp, "qv"); - - assertTrue(mm.getChromosome().equals("chromo1")); - assertTrue(mm.getLocation().equals("location1")); - assertTrue(3 == mm.getMismatchCount()); - assertTrue(mp == mm.getParameters()); - assertTrue(mm.getQuality().equals("qv")); - } - -} diff --git a/qio/test/org/qcmg/ma/MASerializerTest.java b/qio/test/org/qcmg/ma/MASerializerTest.java deleted file mode 100644 index 01467dfc5..000000000 --- a/qio/test/org/qcmg/ma/MASerializerTest.java +++ /dev/null @@ -1,113 +0,0 @@ -package org.qcmg.ma; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.util.Iterator; - -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -public class MASerializerTest -{ - @Rule - public ExpectedException thrown = ExpectedException.none(); - - @Before - public final void before() - { - } - - @After - public final void after() - { - } - - @Test - public final void decodeDefLineNoMappings() - throws Exception - { - String defLine = ">1_8_184_F3"; - MADefLine d = MASerializer.parseDefLine(defLine); - - assertTrue(d.getReadName().equals("1_8_184")); - assertFalse(d.hasMappings()); - } - - @Test - public final void decodeDefLineWithMappings() - throws Exception - { - ExpectedException.none(); - - String defLine = ">1_8_184_F3,8_-30078837.2:(31.3.0):q4,10_-9547536.2:(27.2.0):q1,18_-46572772.2:(26.2.0):q1,23_16023538.2:(24.2.0):q0"; - MADefLine d = MASerializer.parseDefLine(defLine); - - assertTrue(d.getReadName().equals("1_8_184")); - assertTrue(d.hasMappings()); - - Iterator iter = d.iterator(); - - assertTrue(4 == d.getNumberMappings()); - - assertTrue(iter.hasNext()); - if (iter.hasNext()) { - MAMapping mapping = iter.next(); - assertTrue(mapping.getChromosome().equals("8")); - assertTrue(mapping.getLocation(), mapping.getLocation().equals("-30078837")); - assertTrue(31 == mapping.getParameters().getLength()); - assertTrue(3 == mapping.getParameters().getPossibleMismatches()); - assertTrue(0 == mapping.getParameters().getSeedStart()); - assertTrue(mapping.getQuality(), mapping.getQuality().equals("q4")); - } - - assertTrue(iter.hasNext()); - if (iter.hasNext()) { - MAMapping mapping = iter.next(); - assertTrue(mapping.getChromosome().equals("10")); - assertTrue(mapping.getLocation(), mapping.getLocation().equals("-9547536")); - assertTrue(27 == mapping.getParameters().getLength()); - assertTrue(2 == mapping.getParameters().getPossibleMismatches()); - assertTrue(0 == mapping.getParameters().getSeedStart()); - assertTrue(mapping.getQuality(), mapping.getQuality().equals("q1")); - } - - assertTrue(iter.hasNext()); - if (iter.hasNext()) { - MAMapping mapping = iter.next(); - assertTrue(mapping.getChromosome().equals("18")); - assertTrue(mapping.getLocation(), mapping.getLocation().equals("-46572772")); - assertTrue(26 == mapping.getParameters().getLength()); - assertTrue(2 == mapping.getParameters().getPossibleMismatches()); - assertTrue(0 == mapping.getParameters().getSeedStart()); - assertTrue(mapping.getQuality(), mapping.getQuality().equals("q1")); - } - - assertTrue(iter.hasNext()); - if (iter.hasNext()) { - MAMapping mapping = iter.next(); - assertTrue(mapping.getChromosome().equals("23")); - assertTrue(mapping.getLocation(), mapping.getLocation().equals("16023538")); - assertTrue(24 == mapping.getParameters().getLength()); - assertTrue(2 == mapping.getParameters().getPossibleMismatches()); - assertTrue(0 == mapping.getParameters().getSeedStart()); - assertTrue(mapping.getQuality(), mapping.getQuality().equals("q0")); - } - - assertFalse(iter.hasNext()); - } - - @Test - public final void decodeRecord() - throws Exception - { - ExpectedException.none(); - - String defLine = ">1_8_184_F3,8_-30078837.2:(31.3.0):q4,10_-9547536.2:(27.2.0):q1,18_-46572772.2:(26.2.0):q1,23_16023538.2:(24.2.0):q0"; - String sequence = "T1100011201110111121111111111211121.112211122111221"; - MASerializer.parseRecord(defLine, sequence); - } -} diff --git a/qio/test/org/qcmg/qio/gff/GffReaderTest.java b/qio/test/org/qcmg/qio/gff/GffReaderTest.java index 4cab49d75..bcc1a6688 100644 --- a/qio/test/org/qcmg/qio/gff/GffReaderTest.java +++ b/qio/test/org/qcmg/qio/gff/GffReaderTest.java @@ -137,7 +137,7 @@ public void testParseRecordWithInvalidAttributes() throws Exception { } } - private static void createTestFile(String fileName, List data) { + public static void createTestFile(String fileName, List data) { PrintWriter out; try { diff --git a/qio/test/org/qcmg/ma/MADefLineTest.java b/qio/test/org/qcmg/qio/ma/MADefLineTest.java similarity index 61% rename from qio/test/org/qcmg/ma/MADefLineTest.java rename to qio/test/org/qcmg/qio/ma/MADefLineTest.java index 3d305e9ea..eece8ef08 100644 --- a/qio/test/org/qcmg/ma/MADefLineTest.java +++ b/qio/test/org/qcmg/qio/ma/MADefLineTest.java @@ -1,4 +1,4 @@ -package org.qcmg.ma; +package org.qcmg.qio.ma; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -6,29 +6,20 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.qcmg.qio.ma.MaDefLine; +import org.qcmg.qio.ma.MaDirection; public class MADefLineTest { - @Before - public final void before() - { - } - - @After - public final void after() - { - } - @Test - public final void create() - throws Exception + public final void create() throws Exception { - MADefLine defLine = new MADefLine("12_444_3", MADirection.F3); + MaDefLine defLine = new MaDefLine("12_444_3", MaDirection.F3); assertFalse(defLine.getMappingIterator().hasNext()); assertFalse(defLine.iterator().hasNext()); assertTrue(defLine.getReadName().equals("12_444_3")); - assertTrue(MADirection.F3 == defLine.getDirection()); + assertTrue(MaDirection.F3 == defLine.getDirection()); assertFalse(defLine.hasMappings()); assertTrue(0 == defLine.getNumberMappings()); } diff --git a/qio/test/org/qcmg/ma/MADirectionTest.java b/qio/test/org/qcmg/qio/ma/MADirectionTest.java similarity index 62% rename from qio/test/org/qcmg/ma/MADirectionTest.java rename to qio/test/org/qcmg/qio/ma/MADirectionTest.java index f8034bae5..28fb07da0 100644 --- a/qio/test/org/qcmg/ma/MADirectionTest.java +++ b/qio/test/org/qcmg/qio/ma/MADirectionTest.java @@ -1,4 +1,4 @@ -package org.qcmg.ma; +package org.qcmg.qio.ma; import static org.junit.Assert.assertTrue; @@ -7,36 +7,27 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.qcmg.qio.ma.MaDirection; public final class MADirectionTest { @Rule public ExpectedException thrown = ExpectedException.none(); - @Before - public void before() - { - } - - @After - public void after() - { - } - @Test public void createDirection() throws Exception { ExpectedException.none(); - MADirection f3 = MADirection.getDirection("F3"); - assertTrue(MADirection.F3 == f3); + MaDirection f3 = MaDirection.getDirection("F3"); + assertTrue(MaDirection.F3 == f3); - MADirection r3 = MADirection.getDirection("R3"); - assertTrue(MADirection.R3 == r3); + MaDirection r3 = MaDirection.getDirection("R3"); + assertTrue(MaDirection.R3 == r3); - MADirection f5 = MADirection.getDirection("F5"); - assertTrue(MADirection.F5 == f5); + MaDirection f5 = MaDirection.getDirection("F5"); + assertTrue(MaDirection.F5 == f5); } @Test @@ -47,7 +38,7 @@ public void excludeR5Direction() thrown.expect(Exception.class); thrown.expectMessage("Unknown direction type: R5"); - MADirection.getDirection("R5"); + MaDirection.getDirection("R5"); } @Test @@ -56,6 +47,6 @@ public void invalidDirection() { thrown.expect(Exception.class); thrown.expectMessage("Unknown direction type: X6"); - MADirection.getDirection("X6"); + MaDirection.getDirection("X6"); } } diff --git a/qio/test/org/qcmg/ma/MAHeaderTest.java b/qio/test/org/qcmg/qio/ma/MAHeaderTest.java similarity index 100% rename from qio/test/org/qcmg/ma/MAHeaderTest.java rename to qio/test/org/qcmg/qio/ma/MAHeaderTest.java diff --git a/qio/test/org/qcmg/ma/MAMappingParametersTest.java b/qio/test/org/qcmg/qio/ma/MAMappingParametersTest.java similarity index 100% rename from qio/test/org/qcmg/ma/MAMappingParametersTest.java rename to qio/test/org/qcmg/qio/ma/MAMappingParametersTest.java diff --git a/qio/test/org/qcmg/qio/ma/MAMappingTest.java b/qio/test/org/qcmg/qio/ma/MAMappingTest.java new file mode 100644 index 000000000..76282876a --- /dev/null +++ b/qio/test/org/qcmg/qio/ma/MAMappingTest.java @@ -0,0 +1,24 @@ +package org.qcmg.qio.ma; + +import static org.junit.Assert.assertTrue; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.qcmg.qio.ma.MaMapping; + +public class MAMappingTest +{ + @Test + public final void createMAMapping() { + // MaMappingParameters mp = new MAMappingParameters(); + MaMapping mm = new MaMapping("chromo1", "location1", 3, 3,2,4, "qv"); + + assertTrue(mm.getChromosome().equals("chromo1")); + assertTrue(mm.getLocation().equals("location1")); + assertTrue(3 == mm.getMismatchCount()); + // assertTrue(mp == mm.getParameters()); + assertTrue(mm.getQuality().equals("qv")); + } + +} diff --git a/qio/test/org/qcmg/ma/MARecordTest.java b/qio/test/org/qcmg/qio/ma/MARecordTest.java similarity index 52% rename from qio/test/org/qcmg/ma/MARecordTest.java rename to qio/test/org/qcmg/qio/ma/MARecordTest.java index 394f5fb64..0969e534f 100644 --- a/qio/test/org/qcmg/ma/MARecordTest.java +++ b/qio/test/org/qcmg/qio/ma/MARecordTest.java @@ -1,32 +1,28 @@ -package org.qcmg.ma; +package org.qcmg.qio.ma; import static org.junit.Assert.assertTrue; +import java.util.concurrent.TimeUnit; +import java.util.regex.Pattern; + import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.qcmg.qio.ma.MaDefLine; +import org.qcmg.qio.ma.MaDirection; +import org.qcmg.qio.ma.MaRecord; public class MARecordTest { - @Before - public final void before() - { - } - - @After - public final void after() - { - } @Test public final void createMARecord() throws Exception { - MADefLine defLine = new MADefLine("14_443_3", MADirection.F3); - MARecord record = new MARecord(defLine, "value"); + MaDefLine defLine = new MaDefLine("14_443_3", MaDirection.F3); + MaRecord record = new MaRecord(defLine, "value"); assertTrue(defLine == record.getDefLine()); assertTrue(record.getReadSequence().equals("value")); } - } diff --git a/qio/test/org/qcmg/qio/ma/MaFileReaderTest.java b/qio/test/org/qcmg/qio/ma/MaFileReaderTest.java new file mode 100644 index 000000000..c9da9b7dd --- /dev/null +++ b/qio/test/org/qcmg/qio/ma/MaFileReaderTest.java @@ -0,0 +1,153 @@ +package org.qcmg.qio.ma; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Vector; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; +import org.qcmg.qio.gff.GffReader; +import org.qcmg.qio.gff.GffReaderTest; +import org.qcmg.qio.ma.MaDefLine; +import org.qcmg.qio.ma.MaFileReader; +import org.qcmg.qio.ma.MaMapping; +import org.qcmg.qio.ma.MaRecord; + +public class MaFileReaderTest { + private static File EMPTY_FILE ; + + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @ClassRule + public static TemporaryFolder testFolder = new TemporaryFolder(); + + @BeforeClass + public static void setup() throws IOException { + EMPTY_FILE = testFolder.newFile("empty.gff"); + GffReaderTest.createTestFile(EMPTY_FILE.getAbsolutePath(), new ArrayList()); + } + + @Test + public final void decodeDefLineNoMappings() { + try(MaFileReader reader = new MaFileReader(EMPTY_FILE);) { + String defLine = ">1_8_184_F3"; + MaDefLine d = reader.parseDefLine(defLine); + + assertTrue(d.getReadName().equals("1_8_184")); + assertFalse(d.hasMappings()); + } catch (IOException e) { + Assert.fail("IOException during unit test"); + } + } + + @Test + public final void decodeDefLineWithMappings() throws IOException { + ExpectedException.none(); + + MaFileReader reader = new MaFileReader(EMPTY_FILE); + String defLine = ">1_8_184_F3,8_-30078837.2:(31.3.0):q4,10_-9547536.2:(27.2.0):q1,18_-46572772.2:(26.2.0):q1,23_16023538.2:(24.2.0):q0"; + MaDefLine d = reader.parseDefLine(defLine); + + assertTrue(d.getReadName().equals("1_8_184")); + assertTrue(d.hasMappings()); + + Iterator iter = d.iterator(); + + assertTrue(4 == d.getNumberMappings()); + + assertTrue(iter.hasNext()); + if (iter.hasNext()) { + MaMapping mapping = iter.next(); + assertTrue(mapping.getChromosome().equals("8")); + assertTrue(mapping.getLocation(), mapping.getLocation().equals("-30078837")); + assertTrue(31 == mapping.getLength()); + assertTrue(3 == mapping.getPossibleMismatches()); + assertTrue(0 == mapping.getSeedStart()); + assertTrue(mapping.getQuality(), mapping.getQuality().equals("q4")); + } + + assertTrue(iter.hasNext()); + if (iter.hasNext()) { + MaMapping mapping = iter.next(); + assertTrue(mapping.getChromosome().equals("10")); + assertTrue(mapping.getLocation(), mapping.getLocation().equals("-9547536")); + assertTrue(27 == mapping.getLength()); + assertTrue(2 == mapping.getPossibleMismatches()); + assertTrue(0 == mapping.getSeedStart()); + assertTrue(mapping.getQuality(), mapping.getQuality().equals("q1")); + } + + assertTrue(iter.hasNext()); + if (iter.hasNext()) { + MaMapping mapping = iter.next(); + assertTrue(mapping.getChromosome().equals("18")); + assertTrue(mapping.getLocation(), mapping.getLocation().equals("-46572772")); + assertTrue(26 == mapping.getLength()); + assertTrue(2 == mapping.getPossibleMismatches()); + assertTrue(0 == mapping.getSeedStart()); + assertTrue(mapping.getQuality(), mapping.getQuality().equals("q1")); + } + + assertTrue(iter.hasNext()); + if (iter.hasNext()) { + MaMapping mapping = iter.next(); + assertTrue(mapping.getChromosome().equals("23")); + assertTrue(mapping.getLocation(), mapping.getLocation().equals("16023538")); + assertTrue(24 == mapping.getLength()); + assertTrue(2 == mapping.getPossibleMismatches()); + assertTrue(0 == mapping.getSeedStart()); + assertTrue(mapping.getQuality(), mapping.getQuality().equals("q0")); + } + + assertFalse(iter.hasNext()); + } + + @Test + public final void decodeRecord() throws IOException { + ExpectedException.none(); + MaFileReader reader = new MaFileReader(EMPTY_FILE); + String defLine = ">1_8_184_F3,8_-30078837.2:(31.3.0):q4,10_-9547536.2:(27.2.0):q1,18_-46572772.2:(26.2.0):q1,23_16023538.2:(24.2.0):q0"; + String sequence = "T1100011201110111121111111111211121.112211122111221"; + new MaRecord(reader.parseDefLine(defLine), sequence); + } + + @Test + public final void headerTest() throws IOException { + + //create file + List headerRecords = new ArrayList<>(); + headerRecords.add("#firstline"); + headerRecords.add("#secondline"); + headerRecords.add("#thirdline"); + File f = testFolder.newFile(); + GffReaderTest.createTestFile(f.getAbsolutePath(), headerRecords); + MaFileReader reader = new MaFileReader(f); + + // MAHeader header = new MAHeader(headerRecords); + + Iterator iter = reader.getHeader().iterator(); + + assertTrue(iter.hasNext()); + assertTrue(iter.next().equals("#firstline")); + assertTrue(iter.hasNext()); + assertTrue(iter.next().equals("#secondline")); + assertTrue(iter.hasNext()); + assertTrue(iter.next().equals("#thirdline")); + assertFalse(iter.hasNext()); + } + +} From ce808f21e0bfbc509d783c86f21e0f0531b2599c Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 2 Dec 2020 12:57:41 +1000 Subject: [PATCH 54/73] update qprofiler with new qio::ma package --- .../org/qcmg/qprofiler/ma/MaSummarizer.java | 21 ++--- .../qcmg/qprofiler/ma/MaSummaryReport.java | 14 +-- .../qcmg/qprofiler/ma/MaSummarizerTest.java | 90 ++++++++++--------- 3 files changed, 60 insertions(+), 65 deletions(-) diff --git a/qprofiler/src/org/qcmg/qprofiler/ma/MaSummarizer.java b/qprofiler/src/org/qcmg/qprofiler/ma/MaSummarizer.java index 65929b2b5..84061dfc0 100644 --- a/qprofiler/src/org/qcmg/qprofiler/ma/MaSummarizer.java +++ b/qprofiler/src/org/qcmg/qprofiler/ma/MaSummarizer.java @@ -14,8 +14,8 @@ import org.qcmg.common.log.QLevel; import org.qcmg.common.log.QLogger; import org.qcmg.common.log.QLoggerFactory; -import org.qcmg.ma.MAFileReader; -import org.qcmg.ma.MARecord; +import org.qcmg.qio.ma.MaFileReader; +import org.qcmg.qio.ma.MaRecord; import org.qcmg.qprofiler.report.SummaryReport; import org.qcmg.qprofiler.summarise.Summarizer; @@ -25,14 +25,7 @@ public class MaSummarizer implements Summarizer { @Override public SummaryReport summarize(String input, String index, String[] regions) throws Exception { - - MAFileReader reader = null; - try { - reader = new MAFileReader(new File(input)); - } catch (Exception e) { - logger.error("Exception caught whilst trying to instantiate MAFileReader with file: " + input, e); - } - + // create the SummaryReport MaSummaryReport maSummaryReport = new MaSummaryReport(); maSummaryReport.setFileName(input); @@ -40,17 +33,15 @@ public SummaryReport summarize(String input, String index, String[] regions) thr // set logging level for printing of no of records parsed final boolean isLevelEnabled = logger.isLevelEnabled(QLevel.DEBUG); - try { - for (MARecord maRecord : reader) { + try (MaFileReader reader = new MaFileReader(new File(input));) { + for (MaRecord maRecord : reader) { maSummaryReport.parseRecord(maRecord); if (isLevelEnabled && maSummaryReport.getRecordsParsed() % FEEDBACK_LINES_COUNT == 0) { logger.debug("Records parsed in MaSummarizer: " + maSummaryReport.getRecordsParsed()); } } - } finally { - reader.close(); - } + } logger.info("records parsed: " + maSummaryReport.getRecordsParsed()); diff --git a/qprofiler/src/org/qcmg/qprofiler/ma/MaSummaryReport.java b/qprofiler/src/org/qcmg/qprofiler/ma/MaSummaryReport.java index 88596d698..3ae92efa3 100644 --- a/qprofiler/src/org/qcmg/qprofiler/ma/MaSummaryReport.java +++ b/qprofiler/src/org/qcmg/qprofiler/ma/MaSummaryReport.java @@ -16,9 +16,9 @@ import java.util.concurrent.atomic.AtomicLong; import org.qcmg.common.model.ProfileType; -import org.qcmg.ma.MADefLine; -import org.qcmg.ma.MAMapping; -import org.qcmg.ma.MARecord; +import org.qcmg.qio.ma.MaDefLine; +import org.qcmg.qio.ma.MaMapping; +import org.qcmg.qio.ma.MaRecord; import org.qcmg.qprofiler.report.SummaryReport; import org.qcmg.qprofiler.util.SummaryReportUtils; import org.qcmg.qvisualise.util.SummaryByCycle; @@ -65,7 +65,7 @@ public void toXml(Element parent) { * @param record * MARecord next row in file */ - protected void parseRecord(MARecord record) { + protected void parseRecord(MaRecord record) { if (null != record) { updateRecordsParsed(); @@ -76,12 +76,12 @@ protected void parseRecord(MARecord record) { // SummaryReportUtils.tallyBadReads(record.getReadSequence(), seqBadReadLineLengths); // tally up some other details from the MaDefine - MADefLine defLine = record.getDefLine(); + MaDefLine defLine = record.getDefLine(); SummaryByCycleUtils.incrementCount(defCountLineLengths, Integer.valueOf(defLine.getNumberMappings())); - for (Iterator i = defLine.iterator() ; i.hasNext() ; ) { - MAMapping map = i.next(); + for (Iterator i = defLine.iterator() ; i.hasNext() ; ) { + MaMapping map = i.next(); SummaryByCycleUtils.incrementCount(defChromosomeLineLengths, map.getChromosome()); SummaryByCycleUtils.incrementCount(defQualityLineLengths, map.getQuality()); } diff --git a/qprofiler/test/org/qcmg/qprofiler/ma/MaSummarizerTest.java b/qprofiler/test/org/qcmg/qprofiler/ma/MaSummarizerTest.java index ad15be865..8c15a427c 100644 --- a/qprofiler/test/org/qcmg/qprofiler/ma/MaSummarizerTest.java +++ b/qprofiler/test/org/qcmg/qprofiler/ma/MaSummarizerTest.java @@ -11,63 +11,64 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.logging.Level; import java.util.logging.Logger; - -import junit.framework.Assert; - import org.junit.After; import org.junit.Before; +import org.junit.ClassRule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import static org.junit.Assert.*; public class MaSummarizerTest { - private static final String MA_INPUT_FILE = "testInputFile.ma"; - private static final String MA_DODGY_INPUT_FILE = "testInputFileDodgy.ma"; + @ClassRule + public static TemporaryFolder testFolder = new TemporaryFolder(); + + private static String MA_INPUT_FILE; + private static String MA_DODGY_INPUT_FILE; // = "testInputFileDodgy.ma"; @Before - public void setup() { + public void setup() throws IOException { + + File f = testFolder.newFile("testInputFile.ma"); + MA_INPUT_FILE = f.getAbsolutePath(); createTestMaFile(MA_INPUT_FILE, createValidMaData()); + + } @After public void tearDown() { File outputFile = new File(MA_INPUT_FILE); - Assert.assertTrue(outputFile.delete()); + assertTrue(outputFile.delete()); } @Test public void testSummarize() throws Exception { MaSummarizer ms = new MaSummarizer(); - MaSummaryReport sr = (MaSummaryReport) ms.summarize(new File( - MA_INPUT_FILE)); + MaSummaryReport sr = (MaSummaryReport) ms.summarize(new File(MA_INPUT_FILE)); - Assert.assertNotNull(sr); - Assert.assertEquals(6, sr.getColorByCycle().count(1, '0').get()); - Assert.assertEquals(2, sr.getColorByCycle().count(2, '2').get()); + assertNotNull(sr); + assertEquals(6, sr.getColorByCycle().count(1, '0').get()); + assertEquals(2, sr.getColorByCycle().count(2, '2').get()); - Assert.assertEquals(4, sr.getBadReadsCount().size()); + assertEquals(4, sr.getBadReadsCount().size()); // lets take a look at the MAMapings - Assert.assertEquals(24, sr.getChromosomeCount().size()); + assertEquals(24, sr.getChromosomeCount().size()); int chromoCount = 0; for (Entry mapEntry : sr.getChromosomeCount().entrySet()) { chromoCount += mapEntry.getValue().get(); } - Assert.assertTrue(chromoCount == 216); - - Assert.assertTrue(sr.getLocationCount().isEmpty()); - -// Assert.assertEquals(3, sr.getMismatchCount().size()); -// int mismatchCount = 0; -// for (Entry mapEntry : sr.getMismatchCount().entrySet()) { -// mismatchCount += mapEntry.getValue(); -// } -// Assert.assertTrue(mismatchCount == 216); + assertTrue(chromoCount == 216); - Assert.assertEquals(4, sr.getQualityCount().size()); + assertTrue(sr.getLocationCount().isEmpty()); + + assertEquals(4, sr.getQualityCount().size()); int qualityCount = 0; for (Entry mapEntry : sr.getQualityCount().entrySet()) { qualityCount += mapEntry.getValue().get(); } - Assert.assertTrue(qualityCount == 216); + assertTrue(qualityCount == 216); } @Test @@ -75,10 +76,14 @@ public void testSummarizeMissingData() throws Exception { createDodgyDataFile(createMaDataMissingData()); MaSummarizer qs = new MaSummarizer(); - MaSummaryReport sr = (MaSummaryReport) qs.summarize(new File(MA_DODGY_INPUT_FILE)); - Assert.assertEquals(0, sr.getRecordsParsed()); + try { + MaSummaryReport sr = (MaSummaryReport) qs.summarize(new File(MA_DODGY_INPUT_FILE)); + fail("dodgy ma file should cause exception but not!"); + }catch(IllegalArgumentException e) { + //exception expected + } + - deleteDodgyDataFile(); } @Test @@ -87,9 +92,8 @@ public void testSummarizeEmptyFile() throws Exception { MaSummarizer qs = new MaSummarizer(); MaSummaryReport sr = (MaSummaryReport) qs.summarize(new File(MA_DODGY_INPUT_FILE)); - Assert.assertEquals(0, sr.getRecordsParsed()); + assertEquals(0, sr.getRecordsParsed()); - deleteDodgyDataFile(); } @Test @@ -97,30 +101,30 @@ public void testSummarizeExtraData() throws Exception { createDodgyDataFile(createMaDataExtraData()); MaSummarizer qs = new MaSummarizer(); - MaSummaryReport sr = (MaSummaryReport) qs.summarize(new File(MA_DODGY_INPUT_FILE)); - Assert.assertEquals(0, sr.getRecordsParsed()); + try { + MaSummaryReport sr = (MaSummaryReport) qs.summarize(new File(MA_DODGY_INPUT_FILE)); + fail("dodgy ma file should cause exception but not!"); + }catch(IllegalArgumentException e) { + //exception expected + } - deleteDodgyDataFile(); } @Test public void testSummarizeNoHeader() throws Exception { + createDodgyDataFile(createMaDataBody()); MaSummarizer qs = new MaSummarizer(); - MaSummaryReport sr = (MaSummaryReport) qs.summarize(new File( - MA_DODGY_INPUT_FILE)); - Assert.assertEquals(6, sr.getRecordsParsed()); + MaSummaryReport sr = (MaSummaryReport) qs.summarize(new File(MA_DODGY_INPUT_FILE)); + assertEquals(6, sr.getRecordsParsed()); - deleteDodgyDataFile(); } - private void deleteDodgyDataFile() { - File outputFile = new File(MA_DODGY_INPUT_FILE); - Assert.assertTrue(outputFile.delete()); - } + private void createDodgyDataFile(List dodgyData) throws IOException { + File f = testFolder.newFile(); + MA_DODGY_INPUT_FILE = f.getAbsolutePath(); - private void createDodgyDataFile(List dodgyData) { createTestMaFile(MA_DODGY_INPUT_FILE, dodgyData); } From b27f2877e099a09131eb7d41dc45639b3e1965b9 Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 2 Dec 2020 12:58:07 +1000 Subject: [PATCH 55/73] update qbamannotate with new qio::ma --- qbamannotate/src/org/qcmg/qbamannotate/Annotator.java | 4 ++-- qbamannotate/src/org/qcmg/qbamannotate/AnnotatorType.java | 2 +- qbamannotate/src/org/qcmg/qbamannotate/Frag.java | 4 ++-- qbamannotate/src/org/qcmg/qbamannotate/LongMatePair.java | 4 ++-- qbamannotate/src/org/qcmg/qbamannotate/PairedEnd.java | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/qbamannotate/src/org/qcmg/qbamannotate/Annotator.java b/qbamannotate/src/org/qcmg/qbamannotate/Annotator.java index d793107f0..0599f6a72 100644 --- a/qbamannotate/src/org/qcmg/qbamannotate/Annotator.java +++ b/qbamannotate/src/org/qcmg/qbamannotate/Annotator.java @@ -15,10 +15,10 @@ import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMRecord; -import org.qcmg.ma.MaFileReader; -import org.qcmg.ma.MaRecord; import org.qcmg.picard.HeaderUtils; import org.qcmg.picard.SAMFileReaderFactory; +import org.qcmg.qio.ma.MaFileReader; +import org.qcmg.qio.ma.MaRecord; public final class Annotator { private final boolean modifyProgramLine; diff --git a/qbamannotate/src/org/qcmg/qbamannotate/AnnotatorType.java b/qbamannotate/src/org/qcmg/qbamannotate/AnnotatorType.java index b78c3bb08..b80a89c08 100644 --- a/qbamannotate/src/org/qcmg/qbamannotate/AnnotatorType.java +++ b/qbamannotate/src/org/qcmg/qbamannotate/AnnotatorType.java @@ -8,7 +8,7 @@ import java.util.regex.Pattern; -import org.qcmg.ma.MaRecord; +import org.qcmg.qio.ma.MaRecord; import htsjdk.samtools.SAMRecord; diff --git a/qbamannotate/src/org/qcmg/qbamannotate/Frag.java b/qbamannotate/src/org/qcmg/qbamannotate/Frag.java index acc22b39e..f89c90934 100644 --- a/qbamannotate/src/org/qcmg/qbamannotate/Frag.java +++ b/qbamannotate/src/org/qcmg/qbamannotate/Frag.java @@ -6,8 +6,8 @@ */ package org.qcmg.qbamannotate; -import org.qcmg.ma.MaDirection; -import org.qcmg.ma.MaRecord; +import org.qcmg.qio.ma.MaDirection; +import org.qcmg.qio.ma.MaRecord; import htsjdk.samtools.SAMRecord; diff --git a/qbamannotate/src/org/qcmg/qbamannotate/LongMatePair.java b/qbamannotate/src/org/qcmg/qbamannotate/LongMatePair.java index 4255625fb..198344956 100644 --- a/qbamannotate/src/org/qcmg/qbamannotate/LongMatePair.java +++ b/qbamannotate/src/org/qcmg/qbamannotate/LongMatePair.java @@ -19,8 +19,8 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.stream.StreamSource; -import org.qcmg.ma.MaDirection; -import org.qcmg.ma.MaRecord; +import org.qcmg.qio.ma.MaDirection; +import org.qcmg.qio.ma.MaRecord; import javax.xml.transform.stream.StreamResult; import java.io.*; diff --git a/qbamannotate/src/org/qcmg/qbamannotate/PairedEnd.java b/qbamannotate/src/org/qcmg/qbamannotate/PairedEnd.java index eb2fb9d05..8fda1c4c5 100644 --- a/qbamannotate/src/org/qcmg/qbamannotate/PairedEnd.java +++ b/qbamannotate/src/org/qcmg/qbamannotate/PairedEnd.java @@ -14,8 +14,8 @@ import javax.xml.bind.JAXBContext; import javax.xml.bind.Marshaller; -import org.qcmg.ma.MaDirection; -import org.qcmg.ma.MaRecord; +import org.qcmg.qio.ma.MaDirection; +import org.qcmg.qio.ma.MaRecord; import htsjdk.samtools.SAMRecord; From c2084409b0bbbedf1a26b5cb50e7a8138e594068 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 2 Dec 2020 13:11:45 +1000 Subject: [PATCH 56/73] delete old classes replace by new MaFileReader --- qio/src/org/qcmg/qio/ma/MAHeader.java | 23 ---- .../org/qcmg/qio/ma/MAMappingParameters.java | 30 ----- qio/src/org/qcmg/qio/ma/MARecordIterator.java | 52 -------- qio/src/org/qcmg/qio/ma/MASerializer.java | 116 ------------------ 4 files changed, 221 deletions(-) delete mode 100644 qio/src/org/qcmg/qio/ma/MAHeader.java delete mode 100644 qio/src/org/qcmg/qio/ma/MAMappingParameters.java delete mode 100644 qio/src/org/qcmg/qio/ma/MARecordIterator.java delete mode 100644 qio/src/org/qcmg/qio/ma/MASerializer.java diff --git a/qio/src/org/qcmg/qio/ma/MAHeader.java b/qio/src/org/qcmg/qio/ma/MAHeader.java deleted file mode 100644 index 738d3eee3..000000000 --- a/qio/src/org/qcmg/qio/ma/MAHeader.java +++ /dev/null @@ -1,23 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.ma; - -import java.util.Iterator; -import java.util.Vector; - -public final class MAHeader implements Iterable { - private final Vector records = new Vector(); - - public MAHeader(final Vector headerRecords) { - for (final String record : headerRecords) { - records.add(record); - } - } - - @Override - public Iterator iterator() { - return records.iterator(); - } - -} diff --git a/qio/src/org/qcmg/qio/ma/MAMappingParameters.java b/qio/src/org/qcmg/qio/ma/MAMappingParameters.java deleted file mode 100644 index 209adeafe..000000000 --- a/qio/src/org/qcmg/qio/ma/MAMappingParameters.java +++ /dev/null @@ -1,30 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.ma; - -public final class MAMappingParameters { - private final int length; - private final int possibleMismatches; - private final int seedStart; - - public MAMappingParameters(int mappingLength, - int mappingPossibleMismatches, int mappingSeedStart) { - length = mappingLength; - possibleMismatches = mappingPossibleMismatches; - seedStart = mappingSeedStart; - } - - public int getLength() { - return length; - } - - public int getPossibleMismatches() { - return possibleMismatches; - } - - public int getSeedStart() { - return seedStart; - } - -} diff --git a/qio/src/org/qcmg/qio/ma/MARecordIterator.java b/qio/src/org/qcmg/qio/ma/MARecordIterator.java deleted file mode 100644 index 3beaa5b0e..000000000 --- a/qio/src/org/qcmg/qio/ma/MARecordIterator.java +++ /dev/null @@ -1,52 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.ma; - -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Iterator; -import java.util.NoSuchElementException; - -public final class MARecordIterator implements Iterator { - private final BufferedReader reader; - private MARecord next; - - public MARecordIterator(final InputStream stream) { - InputStreamReader streamReader = new InputStreamReader(stream); - reader = new BufferedReader(streamReader); - readNext(); - } - - @Override - public boolean hasNext() { - return null != next; - } - - @Override - public MARecord next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - MARecord result = next; - readNext(); - return result; - } - - private void readNext() { - try { - next = MASerializer.nextRecord(reader); - } catch (NoSuchElementException e) { - throw e; - } catch (Exception ex) { - next = null; - } - } - - @Override - @SuppressWarnings("unchecked") - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/qio/src/org/qcmg/qio/ma/MASerializer.java b/qio/src/org/qcmg/qio/ma/MASerializer.java deleted file mode 100644 index db3e7a6ac..000000000 --- a/qio/src/org/qcmg/qio/ma/MASerializer.java +++ /dev/null @@ -1,116 +0,0 @@ -/** - * © Copyright The University of Queensland 2010-2014. This code is released under the terms outlined in the included LICENSE file. - */ -package org.qcmg.ma; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.regex.Pattern; -import java.util.Vector; - -public final class MASerializer { - private static final Pattern commaDelimitedPattern = Pattern - .compile("[,]+"); - private static final Pattern underscoreDelimitedPattern = Pattern - .compile("[_]+"); - private static final Pattern mappingPattern = Pattern.compile("[_.:()]+"); - - public static MAHeader readHeader(final BufferedReader reader) - throws Exception { - Vector headerLines = new Vector(); - String line = reader.readLine(); - while (null != line && line.startsWith("#")) { - headerLines.add(line); - line = reader.readLine(); - } - return new MAHeader(headerLines); - } - - private static String nextNonheaderLine(final BufferedReader reader) - throws IOException { - String line = reader.readLine(); - while (null != line && line.startsWith("#")) { - line = reader.readLine(); - } - return line; - } - - public static MARecord nextRecord(final BufferedReader reader) - throws Exception, IOException { - MARecord result = null; - try { - String defLine = nextNonheaderLine(reader); - String sequence = reader.readLine(); - if (null != defLine && null != sequence) { - result = parseRecord(defLine, sequence); - } - } catch (IOException e) { - throw e; - } catch (Exception e) { - throw e; - } - return result; - } - - static MARecord parseRecord(final String defLine, final String sequence) - throws Exception { - return new MARecord(parseDefLine(defLine), sequence); - } - - static MADefLine parseDefLine(final String value) throws Exception { - if (!value.startsWith(">")) { - throw new Exception("Missing \">\" prefix for defLine: " + value); - } - - String rawValue = value.substring(1); - - String[] params = commaDelimitedPattern.split(rawValue); - if (1 > params.length) { - throw new Exception("Bad defLine format: " + rawValue); - } - - String key = params[0]; - String[] indices = underscoreDelimitedPattern.split(key); - if (4 != indices.length) { - throw new Exception("Bad defLine ID: " + key); - } - - String panel = indices[0]; - String x = indices[1]; - String y = indices[2]; - String type = indices[3]; - - String readName = panel + "_" + x + "_" + y; - MADirection direction = MADirection.getDirection(type); - - Vector mappings = new Vector(); - for (int i = 1; i < params.length; i++) { - mappings.add(parseMapping(params[i])); - } - - return new MADefLine(readName, direction, mappings); - } - - static MAMapping parseMapping(final String value) throws Exception { - String[] params = mappingPattern.split(value); - - if (7 != params.length) { - throw new Exception("Bad mapping format"); - } - - int length = Integer.parseInt(params[3].trim()); - int possibleMismatches = Integer.parseInt(params[4].trim()); - int seedStart = Integer.parseInt(params[5].trim()); - - String chromosome = params[0].trim(); - String location = params[1].trim(); - int mismatchCount = Integer.parseInt(params[2].trim()); - MAMappingParameters parameters = new MAMappingParameters(length, - possibleMismatches, seedStart); - String quality = params[6].trim(); - - return new MAMapping(chromosome, location, mismatchCount, parameters, - quality); - } - -} From 6ba9882c7b5c4b54bd4e4ef3f96d3ee86436558a Mon Sep 17 00:00:00 2001 From: christix Date: Wed, 2 Dec 2020 13:14:51 +1000 Subject: [PATCH 57/73] change unit test classes name; make it consistence with qio::ma --- .../org/qcmg/qio/ma/{MADefLineTest.java => MaDefLineTest.java} | 2 +- .../qcmg/qio/ma/{MADirectionTest.java => MaDirectionTest.java} | 2 +- .../org/qcmg/qio/ma/{MAMappingTest.java => MaMappingTest.java} | 2 +- .../org/qcmg/qio/ma/{MARecordTest.java => MaRecordTest.java} | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) rename qio/test/org/qcmg/qio/ma/{MADefLineTest.java => MaDefLineTest.java} (96%) rename qio/test/org/qcmg/qio/ma/{MADirectionTest.java => MaDirectionTest.java} (97%) rename qio/test/org/qcmg/qio/ma/{MAMappingTest.java => MaMappingTest.java} (95%) rename qio/test/org/qcmg/qio/ma/{MARecordTest.java => MaRecordTest.java} (96%) diff --git a/qio/test/org/qcmg/qio/ma/MADefLineTest.java b/qio/test/org/qcmg/qio/ma/MaDefLineTest.java similarity index 96% rename from qio/test/org/qcmg/qio/ma/MADefLineTest.java rename to qio/test/org/qcmg/qio/ma/MaDefLineTest.java index eece8ef08..878e1d2c2 100644 --- a/qio/test/org/qcmg/qio/ma/MADefLineTest.java +++ b/qio/test/org/qcmg/qio/ma/MaDefLineTest.java @@ -9,7 +9,7 @@ import org.qcmg.qio.ma.MaDefLine; import org.qcmg.qio.ma.MaDirection; -public class MADefLineTest +public class MaDefLineTest { @Test public final void create() throws Exception diff --git a/qio/test/org/qcmg/qio/ma/MADirectionTest.java b/qio/test/org/qcmg/qio/ma/MaDirectionTest.java similarity index 97% rename from qio/test/org/qcmg/qio/ma/MADirectionTest.java rename to qio/test/org/qcmg/qio/ma/MaDirectionTest.java index 28fb07da0..cfe62cf0e 100644 --- a/qio/test/org/qcmg/qio/ma/MADirectionTest.java +++ b/qio/test/org/qcmg/qio/ma/MaDirectionTest.java @@ -9,7 +9,7 @@ import org.junit.rules.ExpectedException; import org.qcmg.qio.ma.MaDirection; -public final class MADirectionTest +public final class MaDirectionTest { @Rule public ExpectedException thrown = ExpectedException.none(); diff --git a/qio/test/org/qcmg/qio/ma/MAMappingTest.java b/qio/test/org/qcmg/qio/ma/MaMappingTest.java similarity index 95% rename from qio/test/org/qcmg/qio/ma/MAMappingTest.java rename to qio/test/org/qcmg/qio/ma/MaMappingTest.java index 76282876a..16d3c5b52 100644 --- a/qio/test/org/qcmg/qio/ma/MAMappingTest.java +++ b/qio/test/org/qcmg/qio/ma/MaMappingTest.java @@ -7,7 +7,7 @@ import org.junit.Test; import org.qcmg.qio.ma.MaMapping; -public class MAMappingTest +public class MaMappingTest { @Test public final void createMAMapping() { diff --git a/qio/test/org/qcmg/qio/ma/MARecordTest.java b/qio/test/org/qcmg/qio/ma/MaRecordTest.java similarity index 96% rename from qio/test/org/qcmg/qio/ma/MARecordTest.java rename to qio/test/org/qcmg/qio/ma/MaRecordTest.java index 0969e534f..994dd10a4 100644 --- a/qio/test/org/qcmg/qio/ma/MARecordTest.java +++ b/qio/test/org/qcmg/qio/ma/MaRecordTest.java @@ -12,7 +12,7 @@ import org.qcmg.qio.ma.MaDirection; import org.qcmg.qio.ma.MaRecord; -public class MARecordTest +public class MaRecordTest { @Test From 4dcc73db6b13ec0801523dbed13b6aa11a610854 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Wed, 2 Dec 2020 13:21:09 +1000 Subject: [PATCH 58/73] delete unit test due to the related classes is already deleted --- qio/test/org/qcmg/qio/ma/MAHeaderTest.java | 47 ------------------- .../qcmg/qio/ma/MAMappingParametersTest.java | 32 ------------- 2 files changed, 79 deletions(-) delete mode 100644 qio/test/org/qcmg/qio/ma/MAHeaderTest.java delete mode 100644 qio/test/org/qcmg/qio/ma/MAMappingParametersTest.java diff --git a/qio/test/org/qcmg/qio/ma/MAHeaderTest.java b/qio/test/org/qcmg/qio/ma/MAHeaderTest.java deleted file mode 100644 index 48113fee7..000000000 --- a/qio/test/org/qcmg/qio/ma/MAHeaderTest.java +++ /dev/null @@ -1,47 +0,0 @@ -package org.qcmg.ma; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.util.Iterator; -import java.util.Vector; - -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public final class MAHeaderTest -{ - @Before - public void before() - { - } - - @After - public void after() - { - } - - @Test - public final void create() - throws Exception - { - Vector headerRecords = new Vector(); - headerRecords.add("#firstline"); - headerRecords.add("#secondline"); - headerRecords.add("#thirdline"); - - MAHeader header = new MAHeader(headerRecords); - - Iterator iter = header.iterator(); - - assertTrue(iter.hasNext()); - assertTrue(iter.next().equals("#firstline")); - assertTrue(iter.hasNext()); - assertTrue(iter.next().equals("#secondline")); - assertTrue(iter.hasNext()); - assertTrue(iter.next().equals("#thirdline")); - assertFalse(iter.hasNext()); - } - -} diff --git a/qio/test/org/qcmg/qio/ma/MAMappingParametersTest.java b/qio/test/org/qcmg/qio/ma/MAMappingParametersTest.java deleted file mode 100644 index 0dbfaa5da..000000000 --- a/qio/test/org/qcmg/qio/ma/MAMappingParametersTest.java +++ /dev/null @@ -1,32 +0,0 @@ -package org.qcmg.ma; - -import static org.junit.Assert.assertTrue; - -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public class MAMappingParametersTest -{ - @Before - public final void before() - { - } - - @After - public final void after() - { - } - - @Test - public final void createMAMappingParameters() - throws Exception - { - MAMappingParameters p = new MAMappingParameters(3,2,4); - - assertTrue(3 == p.getLength()); - assertTrue(2 == p.getPossibleMismatches()); - assertTrue(4 == p.getSeedStart()); - } - -} From ea246a296448443046c7a2e8ba25ebeaa47cbc3d Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Thu, 3 Dec 2020 11:30:09 +1000 Subject: [PATCH 59/73] remove api qio from build files(qmotif, qpileup and qtesting) --- qmotif/build.gradle | 1 - qpileup/build.gradle | 1 - qtesting/build.gradle | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/qmotif/build.gradle b/qmotif/build.gradle index e0d1a0763..2abddaded 100644 --- a/qmotif/build.gradle +++ b/qmotif/build.gradle @@ -10,7 +10,6 @@ repositories { mavenCentral() } dependencies { configurations.compile.transitive = true api project(':qcommon') - api project(':qio') api project(':qbamfilter') api project(':qpicard') diff --git a/qpileup/build.gradle b/qpileup/build.gradle index d47230ace..b8cff1779 100644 --- a/qpileup/build.gradle +++ b/qpileup/build.gradle @@ -18,7 +18,6 @@ dependencies { api project(':qcommon') api project(':qpicard') - api project(':qio') api project(':qbamfilter') ant { untar(src: "../lib/hdf-java-2.8-bin.tar", dest: "build/deps") } diff --git a/qtesting/build.gradle b/qtesting/build.gradle index 33258a786..7fb701f8e 100644 --- a/qtesting/build.gradle +++ b/qtesting/build.gradle @@ -2,7 +2,7 @@ def isExecutable = false dependencies { configurations.compile.transitive = true - api project(':qio') +// api project(':qio') api project(':qcommon') } From 956d0269f0cfc415a204b5773226b61fa69260d6 Mon Sep 17 00:00:00 2001 From: Christina Xu Date: Thu, 3 Dec 2020 16:45:51 +1000 Subject: [PATCH 60/73] remove qio from qannotate --- qannotate/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qannotate/build.gradle b/qannotate/build.gradle index 73155379c..0198ae209 100644 --- a/qannotate/build.gradle +++ b/qannotate/build.gradle @@ -28,7 +28,7 @@ dependencies { configurations.compile.transitive = true api project(':qcommon') - api project(':qio') +// api project(':qio') api project(':qbamfilter') api 'com.github.samtools:htsjdk:2.14.1' From 92f2340e847905f1f5b3f171940c9aa6e68a05c6 Mon Sep 17 00:00:00 2001 From: christix Date: Fri, 4 Dec 2020 10:53:04 +1000 Subject: [PATCH 61/73] mv s3URL connection related classes from qio to qpicard. --- qpicard/src/org/qcmg/picard/SAMFileReaderFactory.java | 2 +- .../src/org/qcmg/picard}/s3/S3AwareURLStreamHandlerFactory.java | 2 +- .../src/org/qcmg/picard}/s3/S3URLConnection.java | 2 +- .../src/org/qcmg/picard}/s3/S3URLStreamHandler.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) rename {qio/src/org/qcmg/protocol => qpicard/src/org/qcmg/picard}/s3/S3AwareURLStreamHandlerFactory.java (97%) rename {qio/src/org/qcmg/protocol => qpicard/src/org/qcmg/picard}/s3/S3URLConnection.java (99%) rename {qio/src/org/qcmg/protocol => qpicard/src/org/qcmg/picard}/s3/S3URLStreamHandler.java (93%) diff --git a/qpicard/src/org/qcmg/picard/SAMFileReaderFactory.java b/qpicard/src/org/qcmg/picard/SAMFileReaderFactory.java index a54ce7d63..6497797f7 100644 --- a/qpicard/src/org/qcmg/picard/SAMFileReaderFactory.java +++ b/qpicard/src/org/qcmg/picard/SAMFileReaderFactory.java @@ -27,7 +27,7 @@ import org.qcmg.common.log.QLogger; import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.string.StringUtils; -import org.qcmg.protocol.s3.S3AwareURLStreamHandlerFactory; +import org.qcmg.picard.s3.S3AwareURLStreamHandlerFactory; public class SAMFileReaderFactory { // EnumSet