diff --git a/Dockerfile b/Dockerfile index bcc4606..472054c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,18 +19,18 @@ RUN pip3 install --ignore-installed \ html5lib RUN cd /tmp \ - && wget https://github.com/lh3/bwa/releases/download/v0.7.13/bwa-0.7.13.tar.bz2 \ - && echo "559b3c63266e5d5351f7665268263dbb9592f3c1c4569e7a4a75a15f17f0aedc *bwa-0.7.13.tar.bz2" | sha256sum --check \ - && tar xf bwa-0.7.13.tar.bz2 \ - && cd bwa-0.7.13 \ + && wget https://github.com/lh3/bwa/releases/download/v0.7.17/bwa-0.7.17.tar.bz2 \ + && echo "de1b4d4e745c0b7fc3e107b5155a51ac063011d33a5d82696331ecf4bed8d0fd *bwa-0.7.17.tar.bz2" | sha256sum --check \ + && tar xf bwa-0.7.17.tar.bz2 \ + && cd bwa-0.7.17 \ && make -j$(nproc) \ && mv bwa /usr/local/bin RUN cd /tmp \ - && wget https://github.com/alexdobin/STAR/archive/2.7.1a.tar.gz \ - && echo "9a35bf4e8a12bec505e11132bc53f94671f596584a6a0dd8f237120dd0df740e *2.7.1a.tar.gz" | sha256sum --check \ - && tar xf 2.7.1a.tar.gz \ - && mv STAR-2.7.1a/bin/Linux_x86_64_static/STAR /usr/local/bin + && wget https://github.com/alexdobin/STAR/archive/refs/tags/2.7.10a.tar.gz \ + && echo "af0df8fdc0e7a539b3ec6665dce9ac55c33598dfbc74d24df9dae7a309b0426a *2.7.10a.tar.gz" | sha256sum --check \ + && tar xf 2.7.10a.tar.gz \ + && mv STAR-2.7.10a/bin/Linux_x86_64_static/STAR /usr/local/bin # bz2 and lzma support is for CRAM files. curses is for `samtools tview`. RUN cd /tmp \ @@ -101,6 +101,4 @@ COPY --chmod=755 --from=builder /opt/picard /opt/picard COPY --chmod=755 --from=builder /opt/xenocp /opt/xenocp COPY --chmod=755 --from=builder /opt/xenocp/bin/* /usr/local/bin/ -COPY --chmod=755 cwl /opt/xenocp/cwl - -ENTRYPOINT ["cwl-runner", "--parallel", "--outdir", "results", "--no-container", "/opt/xenocp/cwl/xenocp.cwl"] +COPY --chmod=755 cwl /opt/xenocp/cwl \ No newline at end of file diff --git a/README.md b/README.md index 6c28d83..ba03578 100755 --- a/README.md +++ b/README.md @@ -1,5 +1,30 @@ # XenoCP +- [XenoCP](#xenocp) + - [Quick Start](#quick-start) + - [Introduction to XenoCP](#introduction-to-xenocp) + - [Reference Files](#reference-files) + - [BWA for DNA Reads](#bwa-for-dna-reads) + - [STAR for RNA Reads](#star-for-rna-reads) + - [Local Usage without Docker](#local-usage-without-docker) + - [Prerequisites](#prerequisites) + - [Obtain and Build XenoCP](#obtain-and-build-xenocp) + - [Inputs](#inputs) + - [Run](#run) + - [Local Usage with Docker](#local-usage-with-docker) + - [Build Docker image](#build-docker-image) + - [Run](#run-1) + - [Singularity as a Docker alternative](#singularity-as-a-docker-alternative) + - [WDL workflow](#wdl-workflow) + - [WDL reference files](#wdl-reference-files) + - [Running WDL](#running-wdl) + - [Evaluate test data results](#evaluate-test-data-results) + - [St. Jude Cloud](#st-jude-cloud) + - [Availability](#availability) + - [Seeking help](#seeking-help) + - [Citing XenoCP](#citing-xenocp) + - [Common Issues](#common-issues) + XenoCP is a tool for cleansing mouse reads in xenograft BAMs. XenoCP can be easily incorporated into any workflow, as it takes a BAM file as input and efficiently cleans up the mouse contamination. The output is a clean @@ -141,8 +166,8 @@ aligner: "bwa aln" For example, a prefix of `MGSCv37.fa` would assume for bwa alignment that the following files in the same directory exist: `MGSCv37.fa.amb`, `MGSCv37.fa.ann`, `MGSCv37.fa.bwt`, -`MGSCv37.fa.pac`, and `MGSCv37.fa.sa`. -For STAR alignment, `ref_db_prefix` should be a directory and +`MGSCv37.fa.pac`, and `MGSCv37.fa.sa`. `index` should be the path to that folder. +For STAR alignment, `index` should be a directory and it would assume the following files exist in the directory: `chrLength.txt`, `chrNameLength.txt`, `chrName.txt`, `chrStart.txt`, `exonGeTrInfo.tab`, `exonInfo.tab`, `geneInfo.tab`, `Genome`, @@ -195,10 +220,10 @@ $ docker build --tag xenocp . ### Run -The Docker image uses `cwl-runner cwl/xenocp.cwl` as its entrypoint. +The Docker image does not provide an entrypoint. -The image assumes three working directories: `/data` for inputs, `/references` for -reference files, and `/results` for outputs. `/data` and `/references` can be +The image assumes three working directories: `/data` for inputs, `/reference` for +reference files, and `/results` for outputs. `/data` and `/reference` can be read-only, where as `/results` needs write access. The paths given in the input parameters file must be from inside the @@ -208,13 +233,16 @@ container, not the host, e.g., bam: class: File path: /data/sample.bam -ref_db_prefix: /reference/ref.fa +ref_db_prefix: ref.fa +index: + class: Directory + path: /reference aligner: "bwa aln" ``` -The following is an example `run` command where files are stored in `test/{data,reference}`. Outputs are saved in `test/results`. +The following is an example `run` command where the data files are stored in the current directory under `sample_data/input_data`. Outputs are saved in `results` in the current directory. The path to the reference files on the host machine needs to be provided. -This example assumes you are running against Mus musculus (genome build MGSCv37). Set the path to the folder containing your reference data +This example assumes you are running against *Mus musculus* (genome build MGSCv37). Set the path to the folder containing your reference data and run the following command to produce output from the included sample data. Test output for comparison is located at `sample_data/output_data`. ``` @@ -223,7 +251,33 @@ $ docker run \ --mount type=bind,source=$(pwd)/sample_data/input_data,target=/data,readonly \ --mount type=bind,source=/path/to/reference,target=/reference,readonly \ --mount type=bind,source=$(pwd)/results,target=/results \ - xenocp \ + ghcr.io/stjude/xenocp:latest \ + cwl-runner \ + --parallel \ + --outdir results \ + --no-container \ + /opt/xenocp/cwl/xenocp.cwl \ + /data/inputs.yml +``` + +### Singularity as a Docker alternative + +Singularity is an experimental container solution that is an HPC-friendly alternative to Docker. For many reasons, `singularity` is not a drop-in replacement for Docker. Many applications require modification to fully run with `singularity`. This alternative is provided on a best-effort basis. If issues are encountered, please open an issue on this repository with details and the maintainers will try to provide support as possible. + +``` +$ mkdir $(pwd)/results +$ singularity run \ + --containall \ # Isolate container from host + -W /path/to/directory \ # Provide a directory with sufficient space to use for working directory + -B $(pwd)/sample_data/input_data:/data \ + -B /path/to/reference:/reference \ + -B $(pwd)/results:/results \ + docker://ghcr.io/stjude/xenocp:latest \ + cwl-runner \ + --parallel \ + --outdir results \ + --no-container \ + /opt/xenocp/cwl/xenocp.cwl \ /data/inputs.yml ``` @@ -232,8 +286,30 @@ default temporary file location, /tmp, is small. To solve this, include `-W ` when executing via Singularity to redirect temp files to a larger directory ``. +Note: By default, `singularity` makes many host resources available inside the container. This is in contrast with Docker's native isolation. This also tends to cause conflicts and errors when running Docker-based workflows. Therefore we recommend always using the `--containall` option to Singularity. + [Dockerfile]: ./Dockerfile +## WDL workflow + +XenoCP includes a [WDL](https://github.com/openwdl/wdl) workflow implementation. This can be run locally or on a supported HPC system. It can also use Docker or Singularity for containerization. + +### WDL reference files + +As of v1.2, WDL does not support directory inputs. Therefore the reference files provided to the WDL workflow must be compressed (`.tar.gz`) before running. The compressed reference files can be downloaded from [Zenodo](https://zenodo.org/uploads/10162103). + +### Running WDL + +To run the WDL workflow, you will need a WDL engine. We suggest [miniwdl](https://github.com/chanzuckerberg/miniwdl), though the [Cromwell](https://github.com/broadinstitute/cromwell/) engine should work, but is untested with XenoCP. + +After acquiring the reference files for your chosen aligner, you can run the sample data through the WDL workflow with the following command. + +``` +miniwdl run https://raw.githubusercontent.com/stjude/XenoCP/main/wdl/workflows/xenocp.wdl input_bam=https://github.com/stjude/XenoCP/raw/main/sample_data/input_data/SJRB001_X.subset.bam input_bai=https://github.com/stjude/XenoCP/raw/main/sample_data/input_data/SJRB001_X.subset.bam.bai reference_tar_gz=MGSCv37_bwa.tar.gz aligner='bwa aln' +``` + +This will run all of the steps on the local machine with Docker. The WDL runner `miniwdl` supports alternative execution modes, such as the [Singularity](https://miniwdl.readthedocs.io/en/latest/runner_backends.html#singularity-beta) container engine, [Slurm](https://github.com/miniwdl-ext/miniwdl-slurm) for batch systems, and [LSF](https://github.com/adthrasher/miniwdl-lsf) for batch systems. Alternative execution modes can be specified using `miniwdl`'s [configuration system](https://miniwdl.readthedocs.io/en/latest/runner_reference.html#configuration). + ## Evaluate test data results If you have [bcftools] and a [GRCh37-lite] reference file, the following will show two variants in the input file. diff --git a/cwl/bwa_alignse_onlymapped.cwl b/cwl/bwa_alignse_onlymapped.cwl index 0a66ef4..6eaf03e 100755 --- a/cwl/bwa_alignse_onlymapped.cwl +++ b/cwl/bwa_alignse_onlymapped.cwl @@ -11,12 +11,18 @@ hints: specs: ["bwa aln", "bwa samse"] tweak_sam: specs: ["java.sh org.stjude.compbio.sam.TweakSam"] + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" inputs: ref_db_prefix: type: string inputBinding: position: 1 + valueFrom: | + ${ + return inputs.index.path + "/" + self; + } input_fastq: type: File inputBinding: @@ -26,6 +32,8 @@ inputs: label: Must be an output bam file name, not an absolute path inputBinding: position: 3 + index: + type: Directory outputs: bam: diff --git a/cwl/bwa_mem_onlymapped.cwl b/cwl/bwa_mem_onlymapped.cwl index 1e9db9d..aaef44a 100644 --- a/cwl/bwa_mem_onlymapped.cwl +++ b/cwl/bwa_mem_onlymapped.cwl @@ -11,6 +11,8 @@ hints: specs: ["bwa mem"] tweak_sam: specs: ["java.sh org.stjude.compbio.sam.TweakSam"] + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" requirements: ResourceRequirement: @@ -22,6 +24,10 @@ inputs: type: string inputBinding: position: 1 + valueFrom: | + ${ + return inputs.index.path + "/" + self; + } input_fastq: type: File inputBinding: @@ -31,6 +37,8 @@ inputs: label: Must be an output bam file name, not an absolute path inputBinding: position: 3 + index: + type: Directory outputs: bam: diff --git a/cwl/cat.cwl b/cwl/cat.cwl index 87dd395..c3aab65 100644 --- a/cwl/cat.cwl +++ b/cwl/cat.cwl @@ -6,7 +6,11 @@ doc: | Merge a set of files into file using the cat utility. requirements: - - class: InlineJavascriptRequirement + InlineJavascriptRequirement: {} + +hints: + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" baseCommand: cat diff --git a/cwl/create_contam_lists.cwl b/cwl/create_contam_lists.cwl index 4f78c08..bf64706 100755 --- a/cwl/create_contam_lists.cwl +++ b/cwl/create_contam_lists.cwl @@ -9,6 +9,8 @@ hints: packages: create_contam_list: specs: [ "java.sh org.stjude.compbio.xenocp.CreateContamLists" ] + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" inputs: input_bam: diff --git a/cwl/extract.cwl b/cwl/extract.cwl index aeac96d..a65d50c 100644 --- a/cwl/extract.cwl +++ b/cwl/extract.cwl @@ -37,6 +37,9 @@ steps: out: [out_bam] run: class: CommandLineTool + hints: + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" stdout: other.bam inputs: bam: @@ -62,6 +65,9 @@ steps: scatter: chroms run: class: CommandLineTool + hints: + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" inputs: chroms: type: string @@ -88,6 +94,9 @@ steps: out: [unmapped_bam] run: class: CommandLineTool + hints: + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" stdout: unmapped.bam inputs: bam: diff --git a/cwl/get_chroms.cwl b/cwl/get_chroms.cwl index 52717f4..ac71ea2 100755 --- a/cwl/get_chroms.cwl +++ b/cwl/get_chroms.cwl @@ -7,6 +7,8 @@ hints: packages: bam_to_chr: specs: ["bam_to_chrs.sh"] + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" baseCommand: bam_to_chrs.sh diff --git a/cwl/merge_markdup_index.cwl b/cwl/merge_markdup_index.cwl index 2c8458f..640a8bb 100755 --- a/cwl/merge_markdup_index.cwl +++ b/cwl/merge_markdup_index.cwl @@ -10,6 +10,10 @@ doc: | requirements: - class: InlineJavascriptRequirement +hints: + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" + baseCommand: merge_markdup_index.sh inputs: diff --git a/cwl/qc_bam.cwl b/cwl/qc_bam.cwl index ab4225b..5b23fdc 100755 --- a/cwl/qc_bam.cwl +++ b/cwl/qc_bam.cwl @@ -10,6 +10,8 @@ hints: packages: qclib: specs: ["qclib.sh"] + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" inputs: bam: diff --git a/cwl/sort_flagstat.cwl b/cwl/sort_flagstat.cwl index 303b254..c40070d 100755 --- a/cwl/sort_flagstat.cwl +++ b/cwl/sort_flagstat.cwl @@ -18,6 +18,8 @@ hints: samtools: specs: ["samtools flagstat"] version: ["1.3.1"] + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" inputs: input_bam: diff --git a/cwl/split_sam.cwl b/cwl/split_sam.cwl index 7c445f1..368dc24 100755 --- a/cwl/split_sam.cwl +++ b/cwl/split_sam.cwl @@ -9,6 +9,8 @@ hints: packages: SplitSam: specs: [ "SplitSam.java" ] + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" inputs: suffix_length: diff --git a/cwl/star_onlymapped.cwl b/cwl/star_onlymapped.cwl index 60c31b8..2e2ecb3 100644 --- a/cwl/star_onlymapped.cwl +++ b/cwl/star_onlymapped.cwl @@ -16,12 +16,14 @@ hints: specs: ["STAR"] tweak_sam: specs: ["java.sh org.stjude.compbio.sam.TweakSam"] + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" inputs: ref_db_prefix: type: string inputBinding: - position: 1 + position: 4 input_fastq: type: File inputBinding: @@ -31,6 +33,10 @@ inputs: label: Must be an output bam file name, not an absolute path inputBinding: position: 3 + index: + type: Directory + inputBinding: + position: 1 outputs: bam: diff --git a/cwl/tweak_sam.cwl b/cwl/tweak_sam.cwl index 82c9d9d..fa6a49c 100755 --- a/cwl/tweak_sam.cwl +++ b/cwl/tweak_sam.cwl @@ -9,6 +9,8 @@ hints: packages: create_contam_list: specs: [ "java.sh org.stjude.compbio.sam.TweakSam" ] + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" inputs: input_bam: diff --git a/cwl/view_awk_picard.cwl b/cwl/view_awk_picard.cwl index 64ee101..01eaa50 100755 --- a/cwl/view_awk_picard.cwl +++ b/cwl/view_awk_picard.cwl @@ -15,6 +15,8 @@ hints: samtools: specs: ["samtools view"] version: ["1.3.1"] + DockerRequirement: + dockerPull: "ghcr.io/stjude/xenocp:latest" inputs: input_bam: diff --git a/cwl/xenocp.cwl b/cwl/xenocp.cwl index b741354..d36e53b 100755 --- a/cwl/xenocp.cwl +++ b/cwl/xenocp.cwl @@ -23,6 +23,8 @@ inputs: type: enum symbols: ["bwa aln", "bwa mem", "star"] name: aligner + index: + type: Directory # See doc in split_sam.cwl for the meaning of the following arguments suffix_length: type: int? @@ -84,6 +86,7 @@ steps: in: aligner: aligner ref_db_prefix: ref_db_prefix + index: index input_fastq: mapped-fastq/fastq output_bam: valueFrom: $(inputs.input_fastq.nameroot).contam.bam @@ -101,6 +104,7 @@ steps: in: aligner: aligner ref_db_prefix: ref_db_prefix + index: index input_fastq: mapped-fastq/fastq output_bam: valueFrom: $(inputs.input_fastq.nameroot).contam.bam @@ -118,6 +122,7 @@ steps: in: aligner: aligner ref_db_prefix: ref_db_prefix + index: index input_fastq: mapped-fastq/fastq output_bam: valueFrom: $(inputs.input_fastq.nameroot).contam.bam @@ -160,7 +165,7 @@ steps: out: [cleaned_bam] # Step05b: sort tie BAMs prior to merge - sort-bams: + sort_tie_bams: run: bio-cwl-tools:picard/picard_SortSam.cwl in: alignments: contamination/output_tie_bam @@ -190,7 +195,7 @@ steps: run: merge_markdup_index.cwl in: input_bams: - source: [cleanse/cleaned_bam, split/unmapped] + source: sort_tie_bams/sorted_alignments linkMerge: merge_flattened output_bam: source: bam diff --git a/sample_data/input_data/inputs.yml b/sample_data/input_data/inputs.yml index c1e4b88..005869f 100644 --- a/sample_data/input_data/inputs.yml +++ b/sample_data/input_data/inputs.yml @@ -1,5 +1,8 @@ bam: class: File path: /data/SJRB001_X.subset.bam -ref_db_prefix: /reference/MGSCv37/MGSCv37.fa +ref_db_prefix: MGSCv37.fa +index: + class: Directory + path: /reference/MGSCv37 aligner: "bwa aln" diff --git a/sample_data/input_data/inputs_local.yml b/sample_data/input_data/inputs_local.yml index 6bf28ad..4e133a6 100644 --- a/sample_data/input_data/inputs_local.yml +++ b/sample_data/input_data/inputs_local.yml @@ -1,5 +1,8 @@ bam: class: File path: SJRB001_X.subset.bam -ref_db_prefix: /path/to/reference +ref_db_prefix: MGSCv37.fa +index: + class: Directory + path: /path/to/reference aligner: "bwa aln" diff --git a/sample_data/output_data/SJRB001_X.subset.contam.txt b/sample_data/output_data/SJRB001_X.subset.contam.txt new file mode 100644 index 0000000..b2d2350 --- /dev/null +++ b/sample_data/output_data/SJRB001_X.subset.contam.txt @@ -0,0 +1,130 @@ +HWI-EAS404_103648183:1:17:7195:5142#0 +HWI-EAS404_103648183:1:8:18635:18783#0 +HWI-EAS404_103648183:2:120:5769:17386#0 +HWI-EAS404_103648183:2:57:2690:15572#0 +HWI-EAS404_103648183:3:86:16268:18479#0 +HWI-EAS404_103648183:6:20:14232:13962#0 +HWI-EAS404_103648183:6:64:9292:14561#0 +HWI-EAS404_103648183:7:20:2615:20823#0 +HWI-EAS404_103648183:8:55:17901:2212#0 +HWUSI-EAS1593_103644290:1:68:15529:14754#0 +HWUSI-EAS1593_103644290:2:106:3512:18732#0 +HWUSI-EAS1593_103644290:2:115:18104:3792#0 +HWUSI-EAS1593_103644290:2:13:18495:11792#0 +HWUSI-EAS1593_103644290:2:13:18495:11792#0 +HWUSI-EAS1593_103644290:2:93:2932:7997#0 +HWUSI-EAS1593_103644290:3:105:15017:15875#0 +HWUSI-EAS1593_103644290:3:114:6759:18783#0 +HWUSI-EAS1593_103644290:3:24:12959:9428#0 +HWUSI-EAS1593_103644290:3:28:18379:2887#0 +HWUSI-EAS1593_103644290:3:2:11478:3785#0 +HWUSI-EAS1593_103644290:3:2:11478:3785#0 +HWUSI-EAS1593_103644290:3:48:6929:11812#0 +HWUSI-EAS1593_103644290:3:48:8464:18534#0 +HWUSI-EAS1593_103644290:4:5:6998:4700#0 +HWUSI-EAS1593_103644290:4:62:5447:8358#0 +HWUSI-EAS1593_103644290:6:17:11906:4800#0 +HWUSI-EAS1593_103644290:6:17:11920:4786#0 +HWUSI-EAS1593_103644290:6:32:8034:18123#0 +HWUSI-EAS1593_103644290:6:77:13698:7102#0 +HWUSI-EAS1593_103644290:7:105:1371:7959#0 +HWUSI-EAS1593_103644290:7:94:17795:10319#0 +HWUSI-EAS664_103011728:1:67:6364:11171#0 +HWUSI-EAS664_103011728:5:14:4183:1273#0 +HWUSI-EAS664_103011728:5:19:13881:19082#0 +HWUSI-EAS664_103011728:5:21:13554:13485#0 +HWUSI-EAS664_103011728:7:6:8927:20508#0 +HWUSI-EAS664_103011728:7:78:2953:11032#0 +HWUSI-EAS664_103011728:7:94:14536:1660#0 +HWI-EAS404_103648183:1:61:9311:9733#0 +HWI-EAS404_103648183:3:116:5388:9971#0 +HWI-EAS404_103648183:3:3:19441:4741#0 +HWI-EAS404_103648183:3:54:18420:9690#0 +HWI-EAS404_103648183:6:82:8801:7194#0 +HWI-EAS404_103648183:7:105:19635:14338#0 +HWI-EAS404_103648183:7:34:10144:13066#0 +HWI-EAS404_103648183:7:57:16200:10376#0 +HWI-EAS404_103648183:8:102:1342:6149#0 +HWUSI-EAS1593_103644290:2:106:15525:6513#0 +HWUSI-EAS1593_103644290:2:106:15525:6513#0 +HWUSI-EAS1593_103644290:2:107:19669:12584#0 +HWUSI-EAS1593_103644290:3:66:1758:9599#0 +HWUSI-EAS1593_103644290:3:8:18128:8348#0 +HWUSI-EAS1593_103644290:4:21:1649:9240#0 +HWUSI-EAS1593_103644290:4:26:5546:12091#0 +HWUSI-EAS1593_103644290:5:106:16837:12361#0 +HWUSI-EAS1593_103644290:5:21:19432:10478#0 +HWUSI-EAS1593_103644290:5:21:19432:10478#0 +HWUSI-EAS1593_103644290:6:18:14289:1442#0 +HWUSI-EAS1593_103644290:6:27:17283:1907#0 +HWUSI-EAS1593_103644290:6:27:17283:1907#0 +HWUSI-EAS1593_103644290:7:12:7103:13607#0 +HWUSI-EAS1593_103644290:8:57:9808:6218#0 +HWUSI-EAS664_103011728:3:14:16392:19813#0 +HWUSI-EAS664_103011728:4:17:19639:18970#0 +HWUSI-EAS664_103011728:6:2:2250:2628#0 +HWI-EAS404_103648183:2:44:9543:19907#0 +HWI-EAS404_103648183:2:97:10256:15065#0 +HWI-EAS404_103648183:5:60:18172:20626#0 +HWI-EAS404_103648183:5:63:12751:5076#0 +HWI-EAS404_103648183:5:67:4873:9007#0 +HWI-EAS404_103648183:6:102:18244:18298#0 +HWI-EAS404_103648183:6:64:1659:14184#0 +HWI-EAS404_103648183:7:104:14751:8248#0 +HWI-EAS404_103648183:7:95:10127:18355#0 +HWI-EAS404_103648183:8:23:7051:1309#0 +HWI-EAS404_103648183:8:31:17167:13377#0 +HWI-EAS404_103648183:8:59:8152:5014#0 +HWUSI-EAS1593_103644290:1:24:15080:5186#0 +HWUSI-EAS1593_103644290:2:76:12687:6105#0 +HWUSI-EAS1593_103644290:2:76:12687:6105#0 +HWUSI-EAS1593_103644290:3:73:11409:6472#0 +HWUSI-EAS1593_103644290:4:108:10769:3121#0 +HWUSI-EAS1593_103644290:4:33:17694:6022#0 +HWUSI-EAS1593_103644290:4:43:15940:7379#0 +HWUSI-EAS1593_103644290:6:24:10224:20407#0 +HWUSI-EAS1593_103644290:6:35:2854:9544#0 +HWUSI-EAS1593_103644290:7:102:16530:8855#0 +HWUSI-EAS1593_103644290:8:19:16783:12931#0 +HWUSI-EAS1593_103644290:8:19:16783:12931#0 +HWUSI-EAS1593_103644290:8:98:12433:18845#0 +HWUSI-EAS664_103011728:1:25:13348:20151#0 +HWUSI-EAS664_103011728:1:3:15736:15591#0 +HWUSI-EAS664_103011728:1:44:8871:12237#0 +HWUSI-EAS664_103011728:1:77:12982:15802#0 +HWUSI-EAS664_103011728:1:85:12261:8585#0 +HWUSI-EAS664_103011728:2:116:18392:19542#0 +HWUSI-EAS664_103011728:3:115:16959:19349#0 +HWUSI-EAS664_103011728:3:8:3427:16166#0 +HWUSI-EAS664_103011728:4:53:1149:18181#0 +HWUSI-EAS664_103011728:4:61:16145:16349#0 +HWUSI-EAS664_103011728:4:8:6572:13120#0 +HWUSI-EAS664_103011728:4:93:17161:5740#0 +HWUSI-EAS664_103011728:5:64:17127:9523#0 +HWUSI-EAS664_103011728:6:97:5148:16443#0 +HWI-EAS404_103648183:1:108:12456:8453#0 +HWI-EAS404_103648183:1:79:4193:16738#0 +HWI-EAS404_103648183:4:32:2968:2378#0 +HWI-EAS404_103648183:6:82:14435:7768#0 +HWI-EAS404_103648183:6:91:13996:19404#0 +HWI-EAS404_103648183:7:113:19082:9785#0 +HWI-EAS404_103648183:8:45:19659:17922#0 +HWUSI-EAS1593_103644290:1:12:11820:3481#0 +HWUSI-EAS1593_103644290:1:12:11820:3481#0 +HWUSI-EAS1593_103644290:5:108:15174:8419#0 +HWUSI-EAS1593_103644290:6:107:13567:17165#0 +HWUSI-EAS1593_103644290:6:107:13567:17165#0 +HWUSI-EAS1593_103644290:6:38:3272:3793#0 +HWUSI-EAS1593_103644290:6:7:16811:18321#0 +HWUSI-EAS1593_103644290:6:7:16811:18321#0 +HWUSI-EAS1593_103644290:7:16:10905:1912#0 +HWUSI-EAS1593_103644290:7:61:11369:20313#0 +HWUSI-EAS1593_103644290:8:79:3464:8962#0 +HWUSI-EAS1593_103644290:8:79:3464:8962#0 +HWUSI-EAS664_103011728:1:98:5168:11039#0 +HWUSI-EAS664_103011728:2:21:3790:3261#0 +HWUSI-EAS664_103011728:4:89:6229:18405#0 +HWUSI-EAS664_103011728:6:106:15535:17603#0 +HWUSI-EAS664_103011728:6:106:15535:17603#0 +HWUSI-EAS664_103011728:6:117:12300:20088#0 +HWUSI-EAS664_103011728:6:12:16393:4129#0 diff --git a/sample_data/output_data/SJRB001_X.subset.xenocp.bam b/sample_data/output_data/SJRB001_X.subset.xenocp.bam index a4e0ee9..4562219 100644 Binary files a/sample_data/output_data/SJRB001_X.subset.xenocp.bam and b/sample_data/output_data/SJRB001_X.subset.xenocp.bam differ diff --git a/sample_data/output_data/SJRB001_X.subset.xenocp.bam.bai b/sample_data/output_data/SJRB001_X.subset.xenocp.bam.bai index e44d2a0..e08027e 100644 Binary files a/sample_data/output_data/SJRB001_X.subset.xenocp.bam.bai and b/sample_data/output_data/SJRB001_X.subset.xenocp.bam.bai differ diff --git a/sample_data/output_data/SJRB001_X.subset.xenocp.bam.md5 b/sample_data/output_data/SJRB001_X.subset.xenocp.bam.md5 index cf438b7..49e5056 100644 --- a/sample_data/output_data/SJRB001_X.subset.xenocp.bam.md5 +++ b/sample_data/output_data/SJRB001_X.subset.xenocp.bam.md5 @@ -1 +1 @@ -e8ae9322439ed84ecc11432e2691b1b6 \ No newline at end of file +f0b03c1f3ac36111155229b70d409141 SJRB001_X.subset.xenocp.bam diff --git a/sample_data/output_data/SJRB001_X.subset.xenocp.flagstat.txt b/sample_data/output_data/SJRB001_X.subset.xenocp.flagstat.txt index 11c5bde..337055a 100644 --- a/sample_data/output_data/SJRB001_X.subset.xenocp.flagstat.txt +++ b/sample_data/output_data/SJRB001_X.subset.xenocp.flagstat.txt @@ -2,12 +2,12 @@ 0 + 0 secondary 0 + 0 supplementary 3 + 0 duplicates -572 + 0 mapped (81.37% : N/A) +572 + 0 mapped (81.37%:N/A) 694 + 0 paired in sequencing 352 + 0 read1 342 + 0 read2 -563 + 0 properly paired (81.12% : N/A) +563 + 0 properly paired (81.12%:N/A) 563 + 0 with itself and mate mapped -2 + 0 singletons (0.29% : N/A) +2 + 0 singletons (0.29%:N/A) 0 + 0 with mate mapped to a different chr 0 + 0 with mate mapped to a different chr (mapQ>=5) diff --git a/sample_data/output_data/SJRB001_X.subset.xenocp.tie.bam b/sample_data/output_data/SJRB001_X.subset.xenocp.tie.bam new file mode 100644 index 0000000..3bfbf1e Binary files /dev/null and b/sample_data/output_data/SJRB001_X.subset.xenocp.tie.bam differ diff --git a/sample_data/output_data/SJRB001_X.subset.xenocp.tie.bam.bai b/sample_data/output_data/SJRB001_X.subset.xenocp.tie.bam.bai new file mode 100644 index 0000000..2045f46 Binary files /dev/null and b/sample_data/output_data/SJRB001_X.subset.xenocp.tie.bam.bai differ diff --git a/sample_data/output_data/SJRB001_X.subset.xenocp.tie.bam.md5 b/sample_data/output_data/SJRB001_X.subset.xenocp.tie.bam.md5 new file mode 100644 index 0000000..dba8192 --- /dev/null +++ b/sample_data/output_data/SJRB001_X.subset.xenocp.tie.bam.md5 @@ -0,0 +1 @@ +83606b82af1694b0d1eb3bed4d528c89 SJRB001_X.subset.xenocp.tie.bam diff --git a/sample_data/output_data/xenocp-000.contam.txt b/sample_data/output_data/xenocp-000.contam.txt deleted file mode 100644 index b73bbff..0000000 --- a/sample_data/output_data/xenocp-000.contam.txt +++ /dev/null @@ -1,10 +0,0 @@ -HWI-EAS404_103648183:3:54:18420:9690#0 -HWI-EAS404_103648183:6:102:18244:18298#0 -HWI-EAS404_103648183:6:64:1659:14184#0 -HWI-EAS404_103648183:6:82:14435:7768#0 -HWUSI-EAS1593_103644290:3:105:15017:15875#0 -HWUSI-EAS1593_103644290:4:26:5546:12091#0 -HWUSI-EAS1593_103644290:7:16:10905:1912#0 -HWUSI-EAS1593_103644290:8:98:12433:18845#0 -HWUSI-EAS664_103011728:4:61:16145:16349#0 -HWUSI-EAS664_103011728:5:14:4183:1273#0 diff --git a/sample_data/output_data/xenocp-000.tie.bam b/sample_data/output_data/xenocp-000.tie.bam deleted file mode 100644 index 0af432d..0000000 Binary files a/sample_data/output_data/xenocp-000.tie.bam and /dev/null differ diff --git a/sample_data/output_data/xenocp-001.contam.txt b/sample_data/output_data/xenocp-001.contam.txt deleted file mode 100644 index fc6457c..0000000 --- a/sample_data/output_data/xenocp-001.contam.txt +++ /dev/null @@ -1,8 +0,0 @@ -HWI-EAS404_103648183:7:105:19635:14338#0 -HWI-EAS404_103648183:8:31:17167:13377#0 -HWUSI-EAS1593_103644290:1:24:15080:5186#0 -HWUSI-EAS1593_103644290:2:76:12687:6105#0 -HWUSI-EAS1593_103644290:2:76:12687:6105#0 -HWUSI-EAS1593_103644290:3:114:6759:18783#0 -HWUSI-EAS1593_103644290:5:106:16837:12361#0 -HWUSI-EAS664_103011728:1:44:8871:12237#0 diff --git a/sample_data/output_data/xenocp-001.tie.bam b/sample_data/output_data/xenocp-001.tie.bam deleted file mode 100644 index 0af432d..0000000 Binary files a/sample_data/output_data/xenocp-001.tie.bam and /dev/null differ diff --git a/sample_data/output_data/xenocp-002.contam.txt b/sample_data/output_data/xenocp-002.contam.txt deleted file mode 100644 index cbbba7f..0000000 --- a/sample_data/output_data/xenocp-002.contam.txt +++ /dev/null @@ -1,8 +0,0 @@ -HWI-EAS404_103648183:2:120:5769:17386#0 -HWI-EAS404_103648183:5:63:12751:5076#0 -HWI-EAS404_103648183:6:20:14232:13962#0 -HWUSI-EAS1593_103644290:1:12:11820:3481#0 -HWUSI-EAS1593_103644290:1:12:11820:3481#0 -HWUSI-EAS1593_103644290:3:48:6929:11812#0 -HWUSI-EAS1593_103644290:4:43:15940:7379#0 -HWUSI-EAS1593_103644290:7:12:7103:13607#0 diff --git a/sample_data/output_data/xenocp-002.tie.bam b/sample_data/output_data/xenocp-002.tie.bam deleted file mode 100644 index 0af432d..0000000 Binary files a/sample_data/output_data/xenocp-002.tie.bam and /dev/null differ diff --git a/sample_data/output_data/xenocp-003.contam.txt b/sample_data/output_data/xenocp-003.contam.txt deleted file mode 100644 index 7368e48..0000000 --- a/sample_data/output_data/xenocp-003.contam.txt +++ /dev/null @@ -1,8 +0,0 @@ -HWI-EAS404_103648183:6:91:13996:19404#0 -HWI-EAS404_103648183:7:20:2615:20823#0 -HWI-EAS404_103648183:8:55:17901:2212#0 -HWUSI-EAS1593_103644290:3:8:18128:8348#0 -HWUSI-EAS1593_103644290:7:105:1371:7959#0 -HWUSI-EAS1593_103644290:8:79:3464:8962#0 -HWUSI-EAS1593_103644290:8:79:3464:8962#0 -HWUSI-EAS664_103011728:1:25:13348:20151#0 diff --git a/sample_data/output_data/xenocp-003.tie.bam b/sample_data/output_data/xenocp-003.tie.bam deleted file mode 100644 index 0af432d..0000000 Binary files a/sample_data/output_data/xenocp-003.tie.bam and /dev/null differ diff --git a/sample_data/output_data/xenocp-004.contam.txt b/sample_data/output_data/xenocp-004.contam.txt deleted file mode 100644 index 78070ff..0000000 --- a/sample_data/output_data/xenocp-004.contam.txt +++ /dev/null @@ -1,13 +0,0 @@ -HWI-EAS404_103648183:1:79:4193:16738#0 -HWI-EAS404_103648183:7:113:19082:9785#0 -HWI-EAS404_103648183:7:95:10127:18355#0 -HWUSI-EAS1593_103644290:2:106:3512:18732#0 -HWUSI-EAS1593_103644290:5:21:19432:10478#0 -HWUSI-EAS1593_103644290:5:21:19432:10478#0 -HWUSI-EAS1593_103644290:6:17:11920:4786#0 -HWUSI-EAS1593_103644290:6:27:17283:1907#0 -HWUSI-EAS1593_103644290:6:27:17283:1907#0 -HWUSI-EAS1593_103644290:7:102:16530:8855#0 -HWUSI-EAS664_103011728:1:67:6364:11171#0 -HWUSI-EAS664_103011728:6:12:16393:4129#0 -HWUSI-EAS664_103011728:7:94:14536:1660#0 diff --git a/sample_data/output_data/xenocp-004.tie.bam b/sample_data/output_data/xenocp-004.tie.bam deleted file mode 100644 index 0af432d..0000000 Binary files a/sample_data/output_data/xenocp-004.tie.bam and /dev/null differ diff --git a/sample_data/output_data/xenocp-005.contam.txt b/sample_data/output_data/xenocp-005.contam.txt deleted file mode 100644 index 04ff5f0..0000000 --- a/sample_data/output_data/xenocp-005.contam.txt +++ /dev/null @@ -1,14 +0,0 @@ -HWI-EAS404_103648183:1:17:7195:5142#0 -HWI-EAS404_103648183:1:61:9311:9733#0 -HWI-EAS404_103648183:4:32:2968:2378#0 -HWI-EAS404_103648183:5:60:18172:20626#0 -HWI-EAS404_103648183:6:82:8801:7194#0 -HWI-EAS404_103648183:7:34:10144:13066#0 -HWI-EAS404_103648183:8:59:8152:5014#0 -HWUSI-EAS1593_103644290:1:68:15529:14754#0 -HWUSI-EAS1593_103644290:2:115:18104:3792#0 -HWUSI-EAS1593_103644290:3:48:8464:18534#0 -HWUSI-EAS1593_103644290:3:73:11409:6472#0 -HWUSI-EAS1593_103644290:6:35:2854:9544#0 -HWUSI-EAS1593_103644290:7:61:11369:20313#0 -HWUSI-EAS664_103011728:4:93:17161:5740#0 diff --git a/sample_data/output_data/xenocp-005.tie.bam b/sample_data/output_data/xenocp-005.tie.bam deleted file mode 100644 index 0af432d..0000000 Binary files a/sample_data/output_data/xenocp-005.tie.bam and /dev/null differ diff --git a/sample_data/output_data/xenocp-006.contam.txt b/sample_data/output_data/xenocp-006.contam.txt deleted file mode 100644 index c2aa3a3..0000000 --- a/sample_data/output_data/xenocp-006.contam.txt +++ /dev/null @@ -1,18 +0,0 @@ -HWI-EAS404_103648183:2:44:9543:19907#0 -HWI-EAS404_103648183:2:97:10256:15065#0 -HWI-EAS404_103648183:3:86:16268:18479#0 -HWI-EAS404_103648183:6:64:9292:14561#0 -HWUSI-EAS1593_103644290:2:106:15525:6513#0 -HWUSI-EAS1593_103644290:2:106:15525:6513#0 -HWUSI-EAS1593_103644290:2:107:19669:12584#0 -HWUSI-EAS1593_103644290:6:18:14289:1442#0 -HWUSI-EAS1593_103644290:6:32:8034:18123#0 -HWUSI-EAS1593_103644290:6:77:13698:7102#0 -HWUSI-EAS1593_103644290:8:57:9808:6218#0 -HWUSI-EAS664_103011728:1:3:15736:15591#0 -HWUSI-EAS664_103011728:2:21:3790:3261#0 -HWUSI-EAS664_103011728:3:14:16392:19813#0 -HWUSI-EAS664_103011728:6:2:2250:2628#0 -HWUSI-EAS664_103011728:6:97:5148:16443#0 -HWUSI-EAS664_103011728:7:6:8927:20508#0 -HWUSI-EAS664_103011728:7:78:2953:11032#0 diff --git a/sample_data/output_data/xenocp-006.tie.bam b/sample_data/output_data/xenocp-006.tie.bam deleted file mode 100644 index 0af432d..0000000 Binary files a/sample_data/output_data/xenocp-006.tie.bam and /dev/null differ diff --git a/sample_data/output_data/xenocp-007.contam.txt b/sample_data/output_data/xenocp-007.contam.txt deleted file mode 100644 index 84312ac..0000000 --- a/sample_data/output_data/xenocp-007.contam.txt +++ /dev/null @@ -1,16 +0,0 @@ -HWI-EAS404_103648183:1:8:18635:18783#0 -HWI-EAS404_103648183:3:3:19441:4741#0 -HWUSI-EAS1593_103644290:2:13:18495:11792#0 -HWUSI-EAS1593_103644290:2:13:18495:11792#0 -HWUSI-EAS1593_103644290:3:24:12959:9428#0 -HWUSI-EAS1593_103644290:3:66:1758:9599#0 -HWUSI-EAS1593_103644290:4:108:10769:3121#0 -HWUSI-EAS1593_103644290:4:21:1649:9240#0 -HWUSI-EAS1593_103644290:4:33:17694:6022#0 -HWUSI-EAS1593_103644290:4:5:6998:4700#0 -HWUSI-EAS1593_103644290:4:62:5447:8358#0 -HWUSI-EAS1593_103644290:6:17:11906:4800#0 -HWUSI-EAS1593_103644290:8:19:16783:12931#0 -HWUSI-EAS1593_103644290:8:19:16783:12931#0 -HWUSI-EAS664_103011728:4:53:1149:18181#0 -HWUSI-EAS664_103011728:5:19:13881:19082#0 diff --git a/sample_data/output_data/xenocp-007.tie.bam b/sample_data/output_data/xenocp-007.tie.bam deleted file mode 100644 index 0af432d..0000000 Binary files a/sample_data/output_data/xenocp-007.tie.bam and /dev/null differ diff --git a/sample_data/output_data/xenocp-008.contam.txt b/sample_data/output_data/xenocp-008.contam.txt deleted file mode 100644 index ba25ffa..0000000 --- a/sample_data/output_data/xenocp-008.contam.txt +++ /dev/null @@ -1,14 +0,0 @@ -HWI-EAS404_103648183:2:57:2690:15572#0 -HWI-EAS404_103648183:3:116:5388:9971#0 -HWI-EAS404_103648183:8:102:1342:6149#0 -HWUSI-EAS1593_103644290:2:93:2932:7997#0 -HWUSI-EAS1593_103644290:5:108:15174:8419#0 -HWUSI-EAS1593_103644290:6:38:3272:3793#0 -HWUSI-EAS664_103011728:1:77:12982:15802#0 -HWUSI-EAS664_103011728:1:85:12261:8585#0 -HWUSI-EAS664_103011728:1:98:5168:11039#0 -HWUSI-EAS664_103011728:2:116:18392:19542#0 -HWUSI-EAS664_103011728:3:115:16959:19349#0 -HWUSI-EAS664_103011728:3:8:3427:16166#0 -HWUSI-EAS664_103011728:4:89:6229:18405#0 -HWUSI-EAS664_103011728:5:64:17127:9523#0 diff --git a/sample_data/output_data/xenocp-008.tie.bam b/sample_data/output_data/xenocp-008.tie.bam deleted file mode 100644 index 0af432d..0000000 Binary files a/sample_data/output_data/xenocp-008.tie.bam and /dev/null differ diff --git a/sample_data/output_data/xenocp-009.contam.txt b/sample_data/output_data/xenocp-009.contam.txt deleted file mode 100644 index 9ad1af0..0000000 --- a/sample_data/output_data/xenocp-009.contam.txt +++ /dev/null @@ -1,8 +0,0 @@ -HWI-EAS404_103648183:7:104:14751:8248#0 -HWI-EAS404_103648183:7:57:16200:10376#0 -HWI-EAS404_103648183:8:23:7051:1309#0 -HWI-EAS404_103648183:8:45:19659:17922#0 -HWUSI-EAS1593_103644290:3:2:11478:3785#0 -HWUSI-EAS1593_103644290:3:2:11478:3785#0 -HWUSI-EAS1593_103644290:7:94:17795:10319#0 -HWUSI-EAS664_103011728:6:117:12300:20088#0 diff --git a/sample_data/output_data/xenocp-009.tie.bam b/sample_data/output_data/xenocp-009.tie.bam deleted file mode 100644 index 0af432d..0000000 Binary files a/sample_data/output_data/xenocp-009.tie.bam and /dev/null differ diff --git a/sample_data/output_data/xenocp-010.contam.txt b/sample_data/output_data/xenocp-010.contam.txt deleted file mode 100644 index 74fdeb6..0000000 --- a/sample_data/output_data/xenocp-010.contam.txt +++ /dev/null @@ -1,13 +0,0 @@ -HWI-EAS404_103648183:1:108:12456:8453#0 -HWI-EAS404_103648183:5:67:4873:9007#0 -HWUSI-EAS1593_103644290:3:28:18379:2887#0 -HWUSI-EAS1593_103644290:6:107:13567:17165#0 -HWUSI-EAS1593_103644290:6:107:13567:17165#0 -HWUSI-EAS1593_103644290:6:24:10224:20407#0 -HWUSI-EAS1593_103644290:6:7:16811:18321#0 -HWUSI-EAS1593_103644290:6:7:16811:18321#0 -HWUSI-EAS664_103011728:4:17:19639:18970#0 -HWUSI-EAS664_103011728:4:8:6572:13120#0 -HWUSI-EAS664_103011728:5:21:13554:13485#0 -HWUSI-EAS664_103011728:6:106:15535:17603#0 -HWUSI-EAS664_103011728:6:106:15535:17603#0 diff --git a/sample_data/output_data/xenocp-010.tie.bam b/sample_data/output_data/xenocp-010.tie.bam deleted file mode 100644 index 0af432d..0000000 Binary files a/sample_data/output_data/xenocp-010.tie.bam and /dev/null differ diff --git a/src/main/scripts/star_onlymapped.sh b/src/main/scripts/star_onlymapped.sh index 8727833..26c99da 100755 --- a/src/main/scripts/star_onlymapped.sh +++ b/src/main/scripts/star_onlymapped.sh @@ -31,19 +31,19 @@ name=$(basename $BAM ".bam") # Ensure reads are not compressed reads=$(readlink -f ${FASTQ}) +read_cmd="" if file ${reads} | grep -c "gzip compressed data" then - gzip -dc ${reads} > reads.fq - reads="reads.fq" + read_cmd="--readFilesCommand zcat" elif file ${reads} | grep -c "bzip2 compressed data" then - bzip2 -dc ${reads} > reads.fq - reads="reads.fq" + read_cmd="--readFilesCommand bzcat" fi # STAR n_cores=$(nproc) cmd="STAR --genomeDir ${GENOMEDIR} \ + ${read_cmd} \ --readFilesIn ${reads} \ --runMode alignReads \ --runThreadN ${n_cores} \