Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] WES workflow #242

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions lib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,25 @@ def is_paired_end(sampletable, sample):
pass
return False

def is_tumor_only(sampletable):
"""
For somatic WES pipeline, inspects the sampletable to see if there are only
tumor files rather than paired tumor/normal. Assumes the presence of
a column named 'normal_filename' indicates that there are paired normal
samples for all tumor samples. Does not support mixing tumor/normal and
tumor only samples in the same sampletable.

Parameters
----------
sampletable : pandas.DataFrame
Only contains columns called 'normal_filename' and (if paired end)
'normal_R2_filename' if there are paired normal samples for every tumor
sample. Does not support blank/NA 'normal_filename' columns.
"""
if 'normal_filename' in sampletable.columns:
return False
return True


def fill_r1_r2(sampletable, pattern, r1_only=False):
"""
Expand Down
29 changes: 29 additions & 0 deletions lib/patterns_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,35 @@ def __init__(self, config, patterns, workdir=None):
self.n = [1]


class WESConfig(SeqConfig):
def __init__(self, config, patterns, workdir=None):
"""
Config object specific to WES workflows.

Fills in patterns to create targets

Parameters
----------

config : dict

patterns : str
Path to patterns YAML file

workdir : str
Config, patterns, and all paths in `config` should be interpreted
as relative to `workdir`
"""
SeqConfig.__init__(self, config, patterns, workdir)
self.tumoronly = common.is_tumor_only(self.sampletable)
if self.tumoronly:
self.genotype = ['tumor']
else:
self.genotype = ['tumor', 'normal']
self.fill = dict(sample=self.samples, genotype=self.genotype, n=self.n)
self.targets = helpers.fill_patterns(self.patterns, self.fill, zip)


class RNASeqConfig(SeqConfig):
def __init__(self, config, patterns, workdir=None):
"""
Expand Down
7 changes: 7 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
atropos
bcftools
bedtools
bioconductor-annotationhub
bioconductor-biocparallel
Expand All @@ -14,6 +15,7 @@ bioconductor-sva
bioconductor-tximport
biopython >=1.68
bowtie2
bwa
cutadapt
deeptools >=3.0.1
fastqc
Expand All @@ -23,6 +25,7 @@ fastq-screen
font-ttf-dejavu-sans-mono

gat
gatk4
gffutils >=0.8.7.1
ghostscript
git
Expand Down Expand Up @@ -69,6 +72,8 @@ samtools >=1.4.1
scipy >=0.18.1
seaborn >=0.7.1
snakemake==5.5.4
snpeff
somatic-sniper
sra-tools
star
subread
Expand All @@ -84,3 +89,5 @@ ucsc-liftover
ucsc-oligomatch
ucsc-twobittofa
ucsc-wigtobigwig
varscan
vcftools
Loading