Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] include diffbind in new downstream chipseq #315

Draft
wants to merge 5 commits into
base: v1.6rc
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ workflows/rnaseq/downstream/final_clusters
workflows/rnaseq/downstream/rnaseq.log
workflows/rnaseq/downstream/*tsv
workflows/chipseq/data
workflows/chipseq/*.log
workflows/chipseq/downstream/*.tsv
workflows/chipseq/downstream/*.html
workflows/chipseq/downstream/*.bed
workflows/chipseq/downstream/*_cache
workflows/chipseq/downstream/*_files
workflows/rnaseq/data
workflows/colocalization/results
work
Expand Down
5 changes: 4 additions & 1 deletion lib/lcdbwf/R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,10 @@ make.dds <- function(design_data, salmon.files=NULL, combine.by=NULL,

if (remove.version){
rownames(dds) <- sapply(strsplit(rownames(dds), '.', fixed=TRUE),
function (x) x[1])
function (x) {ifelse(grepl('_', x[2]),
paste(x[1], x[2], sep='.'),
x[1])}
)
}

if(!is.null(combine.by)){
Expand Down
4 changes: 4 additions & 0 deletions requirements-r.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
bioconductor-annotationhub
bioconductor-apeglm
bioconductor-biocparallel
bioconductor-chipseeker
bioconductor-clusterprofiler
bioconductor-degreport
bioconductor-deseq2
bioconductor-diffbind
bioconductor-dupradar
bioconductor-genomeinfodbdata
bioconductor-genomicfeatures
Expand All @@ -14,7 +16,9 @@ bioconductor-tximport
r-base >3.5
r-devtools
r-dt
r-future
r-ggrepel
r-ggupset
r-heatmaply
r-knitr
r-plotly
Expand Down
137 changes: 71 additions & 66 deletions workflows/chipseq/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
sampletable: 'config/sampletable.tsv'

# Which key in the `references` dict below to use
organism: 'dmel'
organism: 'human'

# If not specified here, use the environment variable REFERENCES_DIR.
references_dir: 'references_data'
Expand Down Expand Up @@ -37,103 +37,108 @@ chipseq:
# at least a BED file of peaks.
#
peak_calling:
- label: gaf-embryo-sicer
algorithm: sicer
- label: BRD4_dBET6_1
algorithm: macs2
ip:
- gaf-embryo-1
- BRD4_dBET6_1
control:
- input-embryo-1
redundancy_threshold: 1
window_size: 200
fragment_size: 150
- mockIgG_dBET6_1
# optional user-specified override mappable genome proportion if
# specified here, SICER will use this value instead of the value specific
# to the genome build if NOT specified here, SICER will use the
# mappability value for your genome build
effective_genome_fraction: 0.75
genome_build: dm6
gap_size: 600
fdr: 0.01
#effective_genome_count: 7e7
extra: '--nomodel -p 0.001 --cutoff-analysis' # --broad for histones, paper says in ‘histone’ mode for MTHFD1, but got very small number of peaks with --broad

- label: BRD4_dBET6_2
algorithm: macs2
ip:
- BRD4_dBET6_2
control:
- mockIgG_dBET6_2
extra: '--nomodel -p 0.001 --cutoff-analysis'

- label: gaf-embryo-1
- label: BRD4_DMSO_1
algorithm: macs2
ip:
- gaf-embryo-1
- BRD4_DMSO_1
control:
- input-embryo-1
# optional user-specified override mappable genome size if specified
# here, MACS will use this value instead of the value specific to the
# genome build if NOT specified here, MACS will use the mappability value
# for your genome build
effective_genome_count: 7e7
extra: '--nomodel --extsize 147'

- label: gaf-embryo-1
algorithm: spp
- mockIgG_DMSO_1
extra: '--nomodel -p 0.001 --cutoff-analysis'

- label: BRD4_DMSO_2
algorithm: macs2
ip:
- gaf-embryo-1
- BRD4_DMSO_2
control:
- input-embryo-1
extra:
fdr: 0.3
zthr: 4
- mockIgG_DMSO_2
extra: '--nomodel -p 0.001 --cutoff-analysis'

- label: gaf-embryo-1-defaults
algorithm: spp
- label: MTHFD1_dBET6_1
algorithm: macs2
ip:
- gaf-embryo-1
- MTHFD1_dBET6_1
control:
- input-embryo-1
- mockIgG_dBET6_1
extra: '--nomodel -p 0.001 --cutoff-analysis' # --broad for histones, paper says in ‘histone’ mode for MTHFD1

- label: gaf-wingdisc-pooled
- label: MTHFD1_dBET6_1_inputCTRL
algorithm: macs2
ip:
- gaf-wingdisc-1
- gaf-wingdisc-2
- MTHFD1_dBET6_1
control:
- input-wingdisc-1
- input-wingdisc-2
extra: '--nomodel --extsize 147'
- input_dBET6_1
extra: '--nomodel -p 0.001 --cutoff-analysis'

- label: gaf-wingdisc-pooled
algorithm: spp
- label: MTHFD1_dBET6_2
algorithm: macs2
ip:
- gaf-wingdisc-1
- gaf-wingdisc-2
- MTHFD1_dBET6_2
control:
- input-wingdisc-1
# - input-wingdisc-2
extra:
fdr: 0.5
zthr: 4
- mockIgG_dBET6_2
extra: '--nomodel -p 0.001 --cutoff-analysis'

- label: MTHFD1_DMSO_1
algorithm: macs2
ip:
- MTHFD1_DMSO_1
control:
- mockIgG_DMSO_1
extra: '--nomodel -p 0.001 --cutoff-analysis'

- label: MTHFD1_DMSO_2
algorithm: macs2
ip:
- MTHFD1_DMSO_2
control:
- mockIgG_DMSO_2
extra: '--nomodel -p 0.001 --cutoff-analysis'

fastq_screen:
- label: rRNA
organism: dmel
tag: test
organism: human
tag: gencode-v28
- label: PhiX
organism: phix
tag: default
- label: Fly
organism: dmel
tag: test

merged_bigwigs:
input-wingdisc:
- input-wingdisc-1
- input-wingdisc-2
gaf-wingdisc:
- gaf-wingdisc-1
- gaf-wingdisc-2
gaf-embryo:
- gaf-embryo-1
- label: Human
organism: human
tag: gencode-v28

#merged_bigwigs:
# input-wingdisc:
# - input-wingdisc-1
# - input-wingdisc-2
# gaf-wingdisc:
# - gaf-wingdisc-1
# - gaf-wingdisc-2
# gaf-embryo:
# - gaf-embryo-1

aligner:
index: 'bowtie2'
tag: 'test'
tag: 'gencode-v28'

include_references:
- '../../include/reference_configs/PhiX.yaml'
- '../../include/reference_configs/Drosophila_melanogaster.yaml'
- '../../include/reference_configs/test.yaml'
- '../../include/reference_configs/Homo_sapiens.yaml'
28 changes: 17 additions & 11 deletions workflows/chipseq/config/sampletable.tsv
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
# Samplenames with the same "label" will be considered technical replicates
samplename antibody biological_material replicate label orig_filename
input_1 input wingdisc-1 1 input-wingdisc-1 data/example_data/chipseq_input1.fq.gz
input_2 input wingdisc-2 2 input-wingdisc-2 data/example_data/chipseq_input2.fq.gz
ip_1 gaf wingdisc-1 1 gaf-wingdisc-1 data/example_data/chipseq_ip1.fq.gz
ip_2 gaf wingdisc-2 2 gaf-wingdisc-2 data/example_data/chipseq_ip2.fq.gz

# Note here we are treating ip_3 and ip_4 as technical replicates for the sake of testing
ip_3 gaf embryo-1 1 gaf-embryo-1 data/example_data/chipseq_ip3.fq.gz
ip_4 gaf embryo-1 1 gaf-embryo-1 data/example_data/chipseq_ip4.fq.gz
input_3 input embryo-1 1 input-embryo-1 data/example_data/chipseq_input3.fq.gz
samplename label biological_material group GEO_Run LibraryLayout source_name Treatment antibody replicate orig_filename
BRD4_dBET6_1 BRD4_dBET6_1 HAP1_B1 BRD4_dBET6 SRR6202977 SINGLE ChIP-seq for BRD4 in HAP1 cell line treated with dBET6 dBET6 BRD4 1 ../../raw_files/SRR6202977.fastq.gz
BRD4_dBET6_2 BRD4_dBET6_2 HAP1_B2 BRD4_dBET6 SRR6202978 SINGLE ChIP-seq for BRD4 in HAP1 cell line treated with dBET6 dBET6 BRD4 2 ../../raw_files/SRR6202978.fastq.gz
BRD4_DMSO_1 BRD4_DMSO_1 HAP1_D1 BRD4_DMSO SRR6202979 SINGLE ChIP-seq for BRD4 in HAP1 cell line treated with DMSO DMSO BRD4 1 ../../raw_files/SRR6202979.fastq.gz
BRD4_DMSO_2 BRD4_DMSO_2 HAP1_D2 BRD4_DMSO SRR6202980 SINGLE ChIP-seq for BRD4 in HAP1 cell line treated with DMSO DMSO BRD4 2 ../../raw_files/SRR6202980.fastq.gz
mockIgG_dBET6_1 mockIgG_dBET6_1 HAP1_B1 mockIgG_dBET6 SRR6202981 SINGLE ChIP-seq with mock IgG antibody in HAP1 cell line treated with dBET6 dBET6 mockIgG 1 ../../raw_files/SRR6202981.fastq.gz
mockIgG_dBET6_2 mockIgG_dBET6_2 HAP1_B2 mockIgG_dBET6 SRR6202982 SINGLE ChIP-seq with mock IgG antibody in HAP1 cell line treated with dBET6 dBET6 mockIgG 2 ../../raw_files/SRR6202982.fastq.gz
mockIgG_DMSO_1 mockIgG_DMSO_1 HAP1_D1 mockIgG_DMSO SRR6202983 SINGLE ChIP-seq with mock IgG antibody in HAP1 cell line treated with DMSO DMSO mockIgG 1 ../../raw_files/SRR6202983.fastq.gz
mockIgG_DMSO_2 mockIgG_DMSO_2 HAP1_D2 mockIgG_DMSO SRR6202984 SINGLE ChIP-seq with mock IgG antibody in HAP1 cell line treated with DMSO DMSO mockIgG 2 ../../raw_files/SRR6202984.fastq.gz
input_dBET6_1 input_dBET6_1 HAP1_B1 input_dBET6 SRR6202985 SINGLE ChIP-seq for Input in HAP1 cell line treated with dBET6 dBET6 input 1 ../../raw_files/SRR6202985.fastq.gz
input_dBET6_2 input_dBET6_2 HAP1_B2 input_dBET6 SRR6202986 SINGLE ChIP-seq for Input in HAP1 cell line treated with dBET6 dBET6 input 2 ../../raw_files/SRR6202986.fastq.gz
input_DMSO_1 input_DMSO_1 HAP1_D1 input_DMSO SRR6202987 SINGLE ChIP-seq for Input in HAP1 cell line treated with DMSO DMSO input 1 ../../raw_files/SRR6202987.fastq.gz
input_DMSO_2 input_DMSO_2 HAP1_D2 input_DMSO SRR6202988 SINGLE ChIP-seq for Input in HAP1 cell line treated with DMSO DMSO input 2 ../../raw_files/SRR6202988.fastq.gz
MTHFD1_dBET6_1 MTHFD1_dBET6_1 HAP1_B1 MTHFD1_dBET6 SRR6202989 SINGLE ChIP-seq for MTHFD1 in HAP1 cell line treated with dBET6 dBET6 MTHFD1 1 ../../raw_files/SRR6202989.fastq.gz
MTHFD1_dBET6_2 MTHFD1_dBET6_2 HAP1_B2 MTHFD1_dBET6 SRR6202990 SINGLE ChIP-seq for MTHFD1 in HAP1 cell line treated with dBET6 dBET6 MTHFD1 2 ../../raw_files/SRR6202990.fastq.gz
MTHFD1_DMSO_1 MTHFD1_DMSO_1 HAP1_D1 MTHFD1_DMSO SRR6202991 SINGLE ChIP-seq for MTHFD1 in HAP1 cell line treated with DMSO DMSO MTHFD1 1 ../../raw_files/SRR6202991.fastq.gz
MTHFD1_DMSO_2 MTHFD1_DMSO_2 HAP1_D2 MTHFD1_DMSO SRR6202992 SINGLE ChIP-seq for MTHFD1 in HAP1 cell line treated with DMSO DMSO MTHFD1 2 ../../raw_files/SRR6202992.fastq.gz<Paste>
Loading