config/methods.config

import nextflow.util.SysHelper

includeConfig "${projectDir}/external/pipeline-Nextflow-config/config/schema/schema.config"
includeConfig "${projectDir}/external/pipeline-Nextflow-config/config/retry/retry.config"
includeConfig "${projectDir}/external/pipeline-Nextflow-config/config/methods/common_methods.config"
includeConfig "${projectDir}/external/pipeline-Nextflow-config/config/bam/bam_parser.config"
includeConfig "${projectDir}/external/pipeline-Nextflow-config/config/store_object_as_json/store_object_as_json.config"

methods {
    set_process = {
        process.cache = params.cache_intermediate_pipeline_steps
    }

    modify_base_allocations = {
        if (!(params.containsKey('base_resource_update') && params.base_resource_update)) {
            return
        }

        params.base_resource_update.each { resource, updates ->
            updates.each { processes, multiplier ->
                def processes_to_update = (custom_schema_types.is_string(processes)) ? [processes] : processes
                methods.update_base_resource_allocation(resource, multiplier, processes_to_update)
            }
        }
    }

    sanitize_string = { raw ->
        if (![String, GString].any{ raw in it }) {
            throw new Exception("Input to sanitize is either empty or not a string! Provide a non-empty string.")
            }
        def disallowed_characters = /[^a-zA-Z\d\/_.-]/
        return raw.replaceAll(disallowed_characters, '').replace('_', '-')
    }

    get_ids_from_bams = {
        params.samples_to_process = [] as Set
        params.input.each { k, v ->
            v.each { sampleMap ->
                def bam_path = sampleMap['BAM']
                def bam_header = bam_parser.parse_bam_header(bam_path)
                def sm_tags = bam_header['read_group'].collect{ it['SM'] }.unique()
                if (sm_tags.size() > 1) {
                    throw new Exception("${bam_path} contains multiple samples! Please run pipeline with single sample BAMs.")
                }
                sm_tag = methods.sanitize_string(sm_tags[0])
                params.samples_to_process.add([
                    'orig_id': sm_tags[0],
                    'id': sm_tag,
                    'path': bam_path,
                    'contamination_table': sampleMap['contamination_table'],
                    'sample_type': k]
                )
            }
        }
    }

    get_vcfs_to_process = {
        params.tumor_id = methods.sanitize_string(params.input_tumor_id)
        params.sample_id = params.tumor_id
        params.normal_id = methods.sanitize_string(params.input_normal_id)
        params.samples_to_process = [] as Set
        params.input.each { k, v ->
            params.samples_to_process.add([
                'path': v,
                'algorithm': k]
            )
        }
    }

    set_sample_params = {
        params.single_NT_paired = false
        def tumor_ids = params.samples_to_process.findAll { it['sample_type'] == 'tumor' }['id']
        def normal_ids = params.samples_to_process.findAll { it['sample_type'] == 'normal' }['id']
        if (tumor_ids.size() == 1 && normal_ids.size() == 1) {
            params.single_NT_paired = true
            // these are used directly for all non-mutect2 tools
            params.tumor_id = tumor_ids[0]
            params.normal_id = normal_ids[0]
        }

        if (tumor_ids.size() > 1) {
            params.sample_id = params.patient_id
        } else {
            params.sample_id = tumor_ids[0]
        }
    }

    set_mutect2_params = {
        if (params.containsKey("germline_resource_gnomad_vcf") && params.germline_resource_gnomad_vcf) {
            params.germline = true
        } else {
            params.germline_resource_gnomad_vcf = "${params.work_dir}/NO_FILE.vcf.gz"
            params.germline = false
        }
        params.germline_resource_gnomad_vcf_index = "${params.germline_resource_gnomad_vcf}.tbi"
        if (!params.containsKey("panel_of_normals_vcf")) {
            params.panel_of_normals_vcf = ""
        }
    }

    check_valid_algorithms = {
        valid_algorithms = params.single_NT_paired ? ['somaticsniper', 'strelka2', 'mutect2', 'muse'] : ['mutect2']
        for (algo in params.algorithm) {
            if (!(algo in valid_algorithms)) {
                throw new Exception("ERROR: params.algorithm ${params.algorithm} contains an invalid value. Valid algorithms for given inputs: ${valid_algorithms}")
            }
        }
    }

    set_output_directory = {
        def tz = TimeZone.getTimeZone("UTC")
        def date = new Date().format("yyyyMMdd'T'HHmmss'Z'", tz)
        params.output_dir_base = "${params.output_dir}/${manifest.name}-${manifest.version}/${params.sample_id}"
        params.log_output_dir = "${params.output_dir_base}/log-${manifest.name}-${manifest.version}-${date}"
    }

    set_intersect_regions_params = {
        if (params.containsKey("intersect_regions") && params.intersect_regions) {
            params.intersect_regions_index = "${params.intersect_regions}.tbi"
            params.use_intersect_regions = true
        } else {
            params.intersect_regions = "${params.work_dir}/NO_FILE.bed"
            params.intersect_regions_index = "${params.work_dir}/NO_FILE.bed.tbi"
            params.use_intersect_regions = false
        }
    }

    set_pipeline_log = {
        trace.enabled = true
        trace.file = "${params.log_output_dir}/nextflow-log/trace.txt"

        timeline.enabled = true
        timeline.file = "${params.log_output_dir}/nextflow-log/timeline.html"

        report.enabled = true
        report.file = "${params.log_output_dir}/nextflow-log/report.html"
    }

    setup = {
        schema.load_custom_types("${projectDir}/config/custom_schema_types.config")
        schema.validate()
        if (params.input_type == 'bam') {
            methods.get_ids_from_bams()
            methods.set_sample_params()
            methods.set_mutect2_params()
            methods.check_valid_algorithms()
        } else {
            methods.get_vcfs_to_process()
        }
        methods.set_process()
        methods.set_resources_allocation()
        methods.modify_base_allocations()
        retry.setup_retry()
        methods.set_env()
        methods.set_output_directory()
        methods.set_intersect_regions_params()
        methods.set_pipeline_log()
        methods.setup_docker_cpus()
        methods.setup_process_afterscript()
        json_extractor.store_object_as_json( // must be last
            params,
            new File("${params.log_output_dir}/nextflow-log/params.json")
        )
    }
}