diff --git a/.github/workflows/render-puml.yaml b/.github/workflows/render-puml.yaml new file mode 100644 index 00000000..23e86213 --- /dev/null +++ b/.github/workflows/render-puml.yaml @@ -0,0 +1,20 @@ +--- +name: PlantUML Generation + +on: + push: + paths: + - '**.puml' + workflow_dispatch: + +jobs: + plantuml: + runs-on: ubuntu-latest + + steps: + - name: Generate PUML diagrams + uses: uclahs-cds/tool-PlantUML-action@v1.0.0 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + ghcr-username: ${{ github.actor }} + ghcr-password: ${{ secrets.GITHUB_TOKEN }} diff --git a/README.md b/README.md index bf5d1176..c58a74aa 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ python submit_nextflow_pipeline.py \ ## Flow Diagram -![recalibrate-BAM flow diagram](docs/recalibrate-BAM.png) +![recalibrate-BAM flow diagram](docs/recalibrate-bam-flow.svg) --- diff --git a/docs/recalibrate-bam-flow.puml b/docs/recalibrate-bam-flow.puml new file mode 100644 index 00000000..3b877f32 --- /dev/null +++ b/docs/recalibrate-bam-flow.puml @@ -0,0 +1,114 @@ +@startuml + +skinparam SwimlaneTitleFontStyle bold + + +|s| Parallelized by sample +|i| Parallelized by interval + + +|s| + +start + +partition "**Unparallelized Setup**\nThis block is run once\nregardless of sample count" { + if (Parallelize by\nchromosome?) is (Yes) then + :==run_SplitIntervals_GATK + ---- + Split reference genome into + interval lists by chomosome + (1-22, X, Y, M, nonassembled); + else (No) + :==run_SplitIntervals_GATK + ---- + Split reference genome into + **scatter_count** interval lists; + endif +} + +:==run_validate_PipeVal +---- +Validate the input BAM and index file; + + +|i| + +:==run_RealignerTargetCreator_GATK +---- +Split input BAMs by interval and identify +potentially misaligned sub-intervals to +target across all input samples; + +:==run_IndelRealigner_GATK +---- +Realign indels across all input +samples simultaneously; + +|s| + +:==run_BaseRecalibrator_GATK +---- +Generate base quality score recalibration +(BQSR) table based on read group, reported +quality score, machine cycle, and nucleotide +context; + +|i| + +:==run_ApplyBQSR_GATK +---- +Apply the recalibration to each input +sample sequentially; + +|s| + +:==run_MergeSamFiles_Picard +---- +Merge interval BAMS into recalibrated BAM; + +if (Parallelize by\nchromosome?) is (No) then + :==deduplicate_records_SAMtools + ---- + Remove duplicate reads due to + overlap on interval splitting sites; +else (Yes) +endif + +:==run_index_SAMtools +---- +Create index file for recalibrated BAM; + +:==calculate_sha512 +---- +Generate sha512 checksum for +recalibrated BAM and index file; + +split + :==run_GetPileupSummaries_GATK + ---- + Summarize counts of reads that support + reference, alternate, and other alleles + for given sites; + + :==run_CalculateContamination_GATK + ---- + Calculate the fraction of reads coming + from cross-sample contamination. + + If the input is a paired sample, run + again in matched normal mode; +split again + if (Compute depth\nof coverage?) is (Yes) then + :==run_DepthOfCoverage_GATK + ---- + Assess sequence coverage by a wide array + of metrics, partioned by sample, read + group, and library; + endif +end split + +stop + + +@enduml +