conf/sample_dataset1.config

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Defines input files and everything required to run a fast and simple pipeline test.

    Use as follows:
        nextflow run urlpipe -profile <docker/singularity>

----------------------------------------------------------------------------------------
*/

params {
    config_profile_name = 'Sample dataset1'
    config_profile_description = 'Configuration using a minimal test dataset1: 6 samples from HQ50 (human cell line)'

    input  = "./assets/samplesheet_dataset1.csv"
    outdir = "./results_dataset1"

    ref = "./assets/IlluminaHsQ50FibTrim_Ref.fa"
    ref_repeat_start = 69
    ref_repeat_end = 218
    ref_repeat_unit = "CAG"

    length_mode = "reference_align"
    umi_cutoffs = "1,3,5,7,10,30,100"
    umi_correction_method = "least_distance"
    repeat_bins = "[(0,50), (51,60), (61,137), (138,154), (155,1000)]"
    allele_number = 2

    max_memory = "16.GB"
    max_cpus = 16
    max_time = "240.h"
}

// Below are the parameters specific to each module. Most are using the module default settings, they are listed here explicitly here for clarity.
process {
    withName: CUTADAPT {
        ext.args = '-a AGATCGGAAGAGCACACGTCT -A TCGGAAGAGCGTCGTGTAG' // Illumina TruSeq adapter: https://dnatech.genomecenter.ucdavis.edu/wp-content/uploads/2019/03/illumina-adapter-sequences-2019-1000000002694-10.pdf
    }

    withName: CLASSIFY_LOCUS {
        ext.ref_start_bp_to_check = 20 // number of base pair to examine from ref start
        ext.ref_end_bp_to_check = 20 // number of base pair to examine from ref end
        ext.m = 2 // allowed mismatches (including INDELs)
    }

    withName: CLASSIFY_INDEL {
        ext.ref_before_repeat_bp_to_check = 20 // number of base pair to examine from ref_repeat_start site
        ext.ref_after_repeat_bp_to_check = 20 // number of base pair to examine from ref_repeat_end site
        ext.m = 2 // allowed mismatches (substitutions only)
        ext.indel_cutoff = 0.5 // if > 0.5 reads from the same UMI group are indel, all reads in that group will be treated as indel.
    }

    withName: CLASSIFY_READTHROUGH {
        ext.ref_before_repeat_bp_to_check = 20 // number of base pair to examine from ref_repeat_start site
        ext.ref_after_repeat_bp_to_check = 20 // number of base pair to examine from ref_repeat_end site
        ext.m = 2 // allowed mismatches (including INDELs)
    }

    withName: PREP_REF {
        ext.repeat_range = "0:150" // range of number of repeat units (e.g. "CAG") when it comes to prep_ref
    }

    withName: REPEAT_LENGTH_DISTRIBUTION_DEFAULT {
        ext.ref_before_repeat_bp_to_check = 20 // number of base pair to examine from ref_repeat_start site
        ext.ref_after_repeat_bp_to_check = 20 // number of base pair to examine from ref_repeat_end site
        ext.m = 2 // allowed mismatches (including INDELs)
    }
}