-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'dev' of https://github.com/NYU-Molecular-Pathology/NGS5…
…80-nf into dev
- Loading branch information
Showing
18 changed files
with
12,064 additions
and
630 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,3 +72,4 @@ nextflow.html.* | |
*.html.* | ||
trace.txt.* | ||
data/* | ||
samples.cnv.tsv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Filters the ANNOVAR annotation .tsv table for usage with IGV Snapshots | ||
INPUT: ANNOVAR annotations merged with original .vcf .tsv table | ||
OUTPUT: Filtered annotations .tsv table | ||
USAGE: igv-variant-filter.py -c HaplotypeCaller -s "sampleID" -i "annotations.tsv" -o "sampleID.tmb.filtered.tsv" | ||
Criteria: | ||
For both matched and unmatched we will apply the following criteria: | ||
1- VAF >5% tumor | ||
2- VAF <2% normal | ||
""" | ||
import csv | ||
import sys | ||
import argparse | ||
|
||
from signal import signal, SIGPIPE, SIG_DFL | ||
signal(SIGPIPE,SIG_DFL) | ||
""" | ||
https://stackoverflow.com/questions/14207708/ioerror-errno-32-broken-pipe-python | ||
""" | ||
|
||
frequency_min_tumor = 0.05 # 5% | ||
frequency_min_normal = 0.02 # 5% | ||
|
||
|
||
def unpaired_filter(row): | ||
""" | ||
Return True or False if the row passes all the filter criteria | ||
""" | ||
frequency = float(row['AF']) | ||
|
||
frequency_pass = frequency > frequency_min_tumor | ||
|
||
return(all([ frequency_pass ])) | ||
|
||
def LoFreqSomatic(fin, fout): | ||
reader = csv.DictReader(fin, delimiter = '\t') | ||
fieldnames = reader.fieldnames | ||
writer = csv.DictWriter(fout, delimiter = '\t', fieldnames = fieldnames) | ||
writer.writeheader() | ||
for row in reader: | ||
if unpaired_filter(row): | ||
writer.writerow(row) | ||
|
||
def MuTect2(fin, fout): | ||
reader = csv.DictReader(fin, delimiter = '\t') | ||
fieldnames = reader.fieldnames | ||
writer = csv.DictWriter(fout, delimiter = '\t', fieldnames = fieldnames) | ||
writer.writeheader() | ||
for row in reader: | ||
if unpaired_filter(row): | ||
writer.writerow(row) | ||
|
||
|
||
|
||
def main(**kwargs): | ||
""" | ||
Main control function for the script | ||
""" | ||
input_file = kwargs.pop('input_file', None) | ||
output_file = kwargs.pop('output_file', None) | ||
caller = kwargs.pop('caller') | ||
|
||
if input_file: | ||
fin = open(input_file) | ||
else: | ||
fin = sys.stdin | ||
|
||
if output_file: | ||
fout = open(output_file, "w") | ||
else: | ||
fout = sys.stdout | ||
|
||
if caller == "LoFreqSomatic": | ||
LoFreqSomatic(fin, fout) | ||
fout.close() | ||
fin.close() | ||
elif caller == "MuTect2": | ||
MuTect2(fin, fout) # TODO: create this function & filter methods for paired calling | ||
fout.close() | ||
fin.close() | ||
else: | ||
print("ERROR: caller not recognized: {0}".format(caller)) | ||
sys.exit(1) | ||
|
||
def parse(): | ||
""" | ||
Parses script args | ||
""" | ||
parser = argparse.ArgumentParser(description='Filters the ANNOVAR annotation .tsv table for usage with IGV snapshots') | ||
parser.add_argument("-i", default = None, dest = 'input_file', help="Input file") | ||
parser.add_argument("-o", default = None, dest = 'output_file', help="Output file") | ||
parser.add_argument("-c", "--caller", dest = 'caller', help="Variant caller used", required=True) | ||
args = parser.parse_args() | ||
|
||
main(**vars(args)) | ||
|
||
if __name__ == '__main__': | ||
parse() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Script to create an IGV batchscript | ||
https://software.broadinstitute.org/software/igv/PortCommands | ||
http://software.broadinstitute.org/software/igv/automation | ||
https://software.broadinstitute.org/software/igv/batch | ||
example IGV batch script: | ||
new | ||
snapshotDirectory IGV_Snapshots | ||
load test_alignments.bam | ||
genome hg19 | ||
maxPanelHeight 500 | ||
goto chr1:713167-714758 | ||
snapshot chr1_713167_714758_h500.png | ||
goto chr1:713500-714900 | ||
snapshot chr1_713500_714900_h500.png | ||
exit | ||
Usage: | ||
./make-batchscript.py foo.bam bar.bam | ||
""" | ||
import os | ||
import argparse | ||
|
||
def append_string(string, output_file): | ||
""" | ||
Append a string to a file | ||
""" | ||
with open(output_file, "a") as myfile: | ||
myfile.write(string + '\n') | ||
|
||
def make_regions(regions_file): | ||
""" | ||
Parse the .bed format regions file to generate the IGV location and output filenames | ||
""" | ||
regions = [] | ||
with open(regions_file) as f: | ||
for line in f: | ||
if len(line.split()) >= 3: | ||
chrom, start, stop = line.split()[0:3] | ||
elif len(line.split()) == 2: | ||
chrom, start = line.split() | ||
stop = start | ||
# make IGV format location | ||
loc = '{0}:{1}-{2}'.format(chrom, start, stop) | ||
filename = '{0}_{1}_{2}.png'.format(chrom, start, stop) | ||
region = {'chrom': chrom, 'start': start, 'stop': stop, 'loc': loc, 'filename': filename} | ||
regions.append(region) | ||
return(regions) | ||
|
||
def main(**kwargs): | ||
""" | ||
Main control function for the script | ||
""" | ||
input_files = kwargs.pop('input_files') | ||
regions_file = kwargs.pop('regions_file', "regions.bed") | ||
snapshotDirectory = kwargs.pop('snapshotDirectory', "IGV_snapshots") | ||
batchscript_file = kwargs.pop('batchscript_file', "IGV_snapshots.bat") | ||
image_height = int(kwargs.pop('image_height', 500)) | ||
genome = kwargs.pop('genome', "hg19") | ||
|
||
regions = make_regions(regions_file) | ||
|
||
append_string("new", batchscript_file) | ||
append_string("snapshotDirectory " + snapshotDirectory, batchscript_file) | ||
append_string("genome " + genome, batchscript_file) | ||
for input_file in input_files: | ||
append_string("load " + input_file, batchscript_file) | ||
append_string("maxPanelHeight " + str(image_height), batchscript_file) | ||
for region in regions: | ||
append_string("goto " + region['loc'], batchscript_file) | ||
append_string("snapshot " + region['filename'], batchscript_file) | ||
append_string("exit", batchscript_file) | ||
|
||
def parse(): | ||
""" | ||
Parses script args | ||
""" | ||
parser = argparse.ArgumentParser(description='IGV batchscript creator') | ||
parser.add_argument("input_files", | ||
nargs='+', | ||
help="pathes to the files to create snapshots from e.g. .bam, .bigwig, etc.") | ||
parser.add_argument("-r", "--regions", | ||
default = "regions.bed", | ||
dest = 'regions_file', | ||
metavar = 'regions_file', | ||
help="Path to .bed formatted regions file") | ||
parser.add_argument("-b", | ||
default = "IGV_snapshots.bat", | ||
dest = 'batchscript_file', | ||
metavar = 'batchscript_file', | ||
help="Name of the IGV batchscript file to create") | ||
parser.add_argument("-d", | ||
default = "IGV_snapshots.bat", | ||
dest = 'snapshotDirectory', | ||
metavar = 'snapshotDirectory', | ||
help="Name of the IGV snapshot directory to save images to") | ||
parser.add_argument("--height", | ||
default = 500, | ||
dest = 'image_height', | ||
metavar = 'image_height', | ||
help="Height in pixels of the images to create") | ||
parser.add_argument("--genome", | ||
default = "hg19", | ||
dest = 'genome', | ||
metavar = 'genome', | ||
help="Name of genome to use in IGV") | ||
|
||
args = parser.parse_args() | ||
main(**vars(args)) | ||
|
||
if __name__ == '__main__': | ||
parse() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Convert the .vcf TSV file to a bed file | ||
""" | ||
import csv | ||
import sys | ||
import argparse | ||
|
||
from signal import signal, SIGPIPE, SIG_DFL | ||
signal(SIGPIPE,SIG_DFL) | ||
""" | ||
https://stackoverflow.com/questions/14207708/ioerror-errno-32-broken-pipe-python | ||
""" | ||
|
||
def main(**kwargs): | ||
""" | ||
Main control function for the script | ||
""" | ||
input_file = kwargs.pop('input_file', None) | ||
output_file = kwargs.pop('output_file', None) | ||
|
||
# load input/output file handles | ||
if input_file: | ||
fin = open(input_file) | ||
else: | ||
fin = sys.stdin | ||
|
||
if output_file: | ||
fout = open(output_file, "w") | ||
else: | ||
fout = sys.stdout | ||
|
||
# start processing input | ||
reader = csv.DictReader(fin, delimiter = '\t') | ||
fieldnames = reader.fieldnames | ||
writer = csv.writer(fout, delimiter = '\t') | ||
for row in reader: | ||
chrom = row['CHROM'] | ||
pos = int(row['POS']) | ||
ref = row['REF'] | ||
alt = row['ALT'] | ||
|
||
alt_len = len(alt) | ||
end = pos + alt_len | ||
start = pos | ||
|
||
row = [chrom, start, end] | ||
writer.writerow(row) | ||
|
||
fout.close() | ||
fin.close() | ||
|
||
def parse(): | ||
""" | ||
Parses script args | ||
""" | ||
parser = argparse.ArgumentParser(description='Filters the ANNOVAR annotation .tsv table for usage with IGV snapshots') | ||
parser.add_argument("-i", default = None, dest = 'input_file', help="Input file") | ||
parser.add_argument("-o", default = None, dest = 'output_file', help="Output file") | ||
args = parser.parse_args() | ||
|
||
main(**vars(args)) | ||
|
||
if __name__ == '__main__': | ||
parse() |
Oops, something went wrong.