nextflow_schema.json

{
    "$schema": "http://json-schema.org/draft-07/schema",
    "$id": "git@github.com:bwh-bioinformatics-hub/nextflow-RNAseq.git/main/nextflow_schema.json",
    "title": "pipeline parameters",
    "description": "Nextflow pipeline for RNAseq analysis",
    "type": "object",
    "definitions": {
        "input_output_options": {
            "title": "Input/output options",
            "type": "object",
            "fa_icon": "fas fa-terminal",
            "description": "Define where the pipeline should find input data and write results.",
            "required": [
                "input",
                "input_type",
                "outdir"
            ],
            "properties": {
                "input": {
                    "type": "string",
                    "fa_icon": "fas fa-copy",
                    "description": "Path to directory containing FASTQ/BAM files or a CSV file containing the absolute path to FASTQ/BAM files.",
                    "help_text": "There are two ways to supply input data to nf-core/circrna:\n\n1. Provide the path to the directory containing FASTQ or BAM files, with the appropriate wildcard glob pattern *e.g:*\n```bash\n--input \"/data/*_r{1,2}.fastq.gz\"\n```\n2. Provide a CSV file containing the absolute paths to FASTQ or BAM files *e.g:*\n\n| Sample_ID    | Read1                          | Read2                          | Bam  |\n|------------- |------------------------------- |------------------------------- |----- |\n| control_rep1  | /data/control_rep1_r1.fastq.gz  | /data/control_rep1_r2.fastq.gz  | NA   |\n| control_rep2  | /data/control_rep2_r1.fastq.gz  | /data/control_rep2_r2.fastq.gz  | NA   |\n| control_rep3  | /data/control_rep3_r1.fastq.gz  | /data/control_rep3_r2.fastq.gz  | NA   |\n| lung_rep1   | /data/lung_rep1_r1.fastq.gz   | /data/lung_rep1_r2.fastq.gz   | NA   |\n| lung_rep2   | /data/lung_rep2_r1.fastq.gz   | /data/lung_rep2_r2.fastq.gz   | NA   |\n| lung_rep3   | /data/lung_rep3_r1.fastq.gz   | /data/lung_rep3_r2.fastq.gz   | NA   |\n| melanoma_rep1   | /data/melanoma_rep1_r1.fastq.gz   | /data/melanoma_rep1_r2.fastq.gz   | NA   |\n| melanoma_rep2   | /data/melanoma_rep2_r1.fastq.gz   | /data/melanoma_rep2_r2.fastq.gz   | NA   |\n| melanoma_rep3   | /data/melanoma_rep3_r1.fastq.gz   | /data/melanoma_rep3_r2.fastq.gz   | NA   |\n\n When supplying BAM files to the CSV file, set Read1 & Read2 columns to 'NA'."
                },
                "input_type": {
                    "type": "string",
                    "fa_icon": "fas fa-dna",
                    "description": "Input data type, 'fastq' or 'bam'.",
                    "enum": [
                        "fastq",
                        "bam"
                    ]
                },
                "outdir": {
                    "type": "string",
                    "description": "The output directory where the results will be saved.",
                    "default": "./results",
                    "fa_icon": "fas fa-folder-open"
                },
                "phenotype": {
                    "type": "string",
                    "description": "Phenotype CSV file specifying the experimental design for DESeq2.",
                    "fa_icon": "fas fa-journal-whills",
                    "help_text": "The response variable containing the phenotype of primary interest in the experiment must have the column name condition. An example phenotype file is given below:\n\n| Sample_ID | condition | replicates |\n|---------|-----------|------------|\n| control_rep1  | control   | 1          |\n| control_rep2  | control   | 2          |\n| control_rep3  | control   | 3          |\n| lung_rep1  | lung      | 1          |\n| lung_rep2  | lung      | 2          |\n| lung_rep3  | lung      | 3          |\n| melanoma_rep1  | melanoma  | 1          |\n| melanoma_rep2  | melanoma  | 2          |\n| melanoma_rep3  | melanoma  | 3          |\n\nThis will produce the DESeq2 design formula '~ replicates + condition' i.e all columns not named condition will be controlled for in the linear mixed model.",
                    "pattern": "\\.csv$"
                }
            }
        },
        "reference_genome_files": {
            "title": "Reference genome files",
            "type": "object",
            "fa_icon": "fas fa-dna",
            "description": "Reference genome files supplied to the workflow.",
            "help_text": "The workflow has been configured to use iGenomes and is thus the recommended route for nf-core/circrna.\n\nAlternatively, the user must provide reference FASTA, GTF files at a minimum, miRbase mature.fa file for miRNA prediction and a species ID for differential expression analysis. ",
            "properties": {
                "genome": {
                    "type": "string",
                    "fa_icon": "fas fa-clone",
                    "description": "iGenome version to use.",
                    "help_text": "Required if --fasta, --gtf set to null."
                },
                "fasta": {
                    "type": "string",
                    "format": "file-path",
                    "mimetype": "text/plain",
                    "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
                    "description": "Path to FASTA genome file.",
                    "help_text": "Must be provided if --genome null"
                },
                "gtf": {
                    "type": "string",
                    "fa_icon": "fas fa-address-book",
                    "description": "Path to reference GTF file.",
                    "help_text": "If left empty, the parameter `--genome` must be supplied and the reference GTF file will be automatically downloaded.\n```bash\n--gtf \"/reference/GRCh38.gtf\"\n```\n\n*N.B:* The pipleine has been developed using reference files, UCSC/ENSEMBL files have not been tested.",
                    "pattern": "\\.gtf$"
                },
                "mature": {
                    "type": "string",
                    "description": "Path to FASTA file with mature miRNAs.",
                    "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.",
                    "fa_icon": "fas fa-wheelchair"
                },
                "species": {
                    "type": "string",
                    "fa_icon": "fas fa-dog",
                    "description": "String identifying species.",
                    "help_text": "Check conf/igenomes.config file & bin/ensemblDatabase_map.txt for inspiration.. "
                },
                "bowtie": {
                    "type": "string",
                    "fa_icon": "fas fa-bold",
                    "description": "Path to Bowtie index files.",
                    "help_text": "Automatically generated if set to null. Alternatively, provide the absolute path to Bowtie indices directory e.g:\n/data/reference_genome/BowtieIndex"
                },
                "bowtie2": {
                    "type": "string",
                    "fa_icon": "fas fa-bold",
                    "description": "Path to Bowtie2 index files.",
                    "help_text": "Automatically generated if left empty. Alternatively, provide the absolute path to the Bowtie2 indices directory e.g:\n/data/reference_genome/Bowtie2Index"
                },
                "bwa": {
                    "type": "string",
                    "fa_icon": "fas fa-bold",
                    "description": "Path to BWA index directory.",
                    "help_text": "Automatically generated if left empty. Alternatively, provide the absolute path to BWA indices directory e.g:\n/data/reference_genome/BWAIndex\n"
                },
                "fasta_fai": {
                    "type": "string",
                    "description": "Path to SAMtools index file.",
                    "fa_icon": "fab fa-stripe-s"
                },
                "segemehl": {
                    "type": "string",
                    "fa_icon": "fab fa-stripe-s",
                    "description": "Path to Segemehl Index file",
                    "help_text": "Automatically generated if set null. Alternatively, provide path to Segemehl index file."
                },
                "star": {
                    "type": "string",
                    "fa_icon": "far fa-star",
                    "description": "Path to STAR index directory.",
                    "help_text": "Automatically generated if left empty. Alternatively, provide the absolute path to STAR indices directory e.g:\n/data/reference_genome/STARIndex"
                },
                "igenomes_base": {
                    "type": "string",
                    "description": "Directory / URL base for iGenomes references.",
                    "default": "s3://ngi-igenomes/igenomes",
                    "fa_icon": "fas fa-cloud-download-alt",
                    "hidden": false
                },
                "igenomes_ignore": {
                    "type": "boolean",
                    "description": "Do not load the iGenomes reference config.",
                    "fa_icon": "fas fa-ban",
                    "hidden": false,
                    "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`."
                }
            }
        },
        "star_general": {
            "title": "STAR",
            "type": "object",
            "description": "Define parameters for STAR 2 pass mode",
            "help_text": "STAR 2 pass mode is performed to identify novel splice sites in *all* samples. STAR takes the novel splice sites into account when performig re-alignment during the second pass. STAR is used for CIRCexplorer2, circRNA_finder & DCC",
            "properties": {
                "alignIntronMax": {
                    "type": "integer",
                    "default": 1000000,
                    "description": "The maximum intron length is set to 1,000,000",
                    "fa_icon": "fas fa-sliders-h"
                },
                "alignIntronMin": {
                    "type": "integer",
                    "default": 20,
                    "description": "The minimum intron length is set to 20. If the genomic gap is smaller than this value, it is considered as a deletion",
                    "fa_icon": "fas fa-sliders-h"
                },
                "alignMatesGapMax": {
                    "type": "integer",
                    "default": 1000000,
                    "description": "The maximum genomic distance between mates is 1,000,000",
                    "fa_icon": "fas fa-sliders-h"
                },
                "alignSJDBoverhangMin": {
                    "type": "integer",
                    "default": 1,
                    "description": "The number of minimum overhang for annotated junctions",
                    "fa_icon": "fas fa-sliders-h"
                },
                "alignSJoverhangMin": {
                    "type": "integer",
                    "default": 1,
                    "description": "The number of minimum overhang for unannotated junctions",
                    "fa_icon": "fas fa-sliders-h"
                },
                "alignSoftClipAtReferenceEnds": {
                    "type": "string",
                    "default": "No",
                    "description": "Allow the soft-clipping of the alignments past the end of chromosomes",
                    "fa_icon": "fas fa-sliders-h"
                },
                "alignTranscriptsPerReadNmax": {
                    "type": "integer",
                    "default": 10000,
                    "description": "Max number of different alignments per read to consider",
                    "fa_icon": "fas fa-sliders-h"
                },
                "chimJunctionOverhangMin": {
                    "type": "integer",
                    "default": 15,
                    "description": "Minimum overhang for a chimeric junction",
                    "fa_icon": "fas fa-sliders-h"
                },
                "chimScoreMin": {
                    "type": "integer",
                    "default": 15,
                    "description": "Minimum total (summed) score of the chimeric segments",
                    "fa_icon": "fas fa-sliders-h"
                },
                "chimScoreSeparation": {
                    "type": "integer",
                    "default": 15,
                    "description": "Minimum difference (separation) between the best chimeric score and the next one",
                    "fa_icon": "fas fa-sliders-h"
                },
                "chimSegmentMin": {
                    "type": "integer",
                    "default": 10,
                    "description": " Minimum length of chimeric segment length, if == 0, no chimeric output",
                    "fa_icon": "fas fa-sliders-h",
                    "help_text": "Do not set to 0, this will disable outputs compatible with circRNA quantification."
                },
                "genomeLoad": {
                    "type": "string",
                    "default": "NoSharedMemory",
                    "description": "Mode of shared memory usage for the genome files",
                    "fa_icon": "fas fa-sliders-h",
                    "help_text": "Users can select a variety of options depending on their resource configuration:\n\n1. `LoadAndKeep`: load genome into shared and keep it in memory after run\n2. `LoadAndRemove`: load genome into shared but remove it after run\n3. `LoadAndExit`: load genome into shared memory and exit, keeping the genome in memory for future runs\n4. `Remove`: do not map anything, just remove loaded genome from memory\n5. `NoSharedMemory`: do not use shared memory, each job will have its own private copy of the genome",
                    "enum": [
                        "LoadAndKeep",
                        "LoadAndRemove",
                        "LoadAndExit",
                        "Remove",
                        "NoSharedMemory"
                    ]
                },
                "limitSjdbInsertNsj": {
                    "type": "integer",
                    "default": 1000000,
                    "description": "Maximum number of junction to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run",
                    "fa_icon": "fas fa-sliders-h"
                },
                "outFilterMatchNminOverLread": {
                    "type": "number",
                    "default": 0.33,
                    "description": "Alignment output if ratio of matched bases relative to read length is equal to or higher than this value",
                    "fa_icon": "fas fa-sliders-h",
                    "help_text": "Consider 75bp paired end reads with sum matched bases of 120bp. Matched bp is summmed over combined read length (120/150 = 0.8). Simply put, lowering this ratio reduces the number of required matches in reads.\n\n`outFilterMatchNminOverLread` is preferred over `outFilterMatchNmin` as it considers read length, suitable for experiments with varying read length."
                },
                "outFilterMismatchNoverLmax": {
                    "type": "number",
                    "default": 0.05,
                    "description": "Alignment output if ratio of mismatched bases relative to **mapped** read length is lower than value",
                    "fa_icon": "fas fa-sliders-h",
                    "help_text": "For example, for reads <20b no mismatches are allowed (1/19 = 0.0526), 20-39b: 1 mismatch, 40-59b 2 mismatches and so on. Simply put, increasing this value will allow for more mismatches in the mapped reads.\n\n`outFilterMismatchNoverLmax` is preferred over `outFilterMismatchNmax` as it considers the mapped read length, suitable for experiments with varying read length."
                },
                "outFilterMultimapNmax": {
                    "type": "integer",
                    "default": 20,
                    "description": "Max number of multiple alignments allowed for a read: if exceeded, the read is considered unmapped",
                    "fa_icon": "fas fa-sliders-h"
                },
                "outFilterMultimapScoreRange": {
                    "type": "integer",
                    "default": 1,
                    "description": " Score range below the maximum score for multimapping alignments",
                    "fa_icon": "fas fa-sliders-h"
                },
                "outFilterScoreMinOverLread": {
                    "type": "number",
                    "default": 0.33,
                    "description": "Alignment will be output only if its score relative to read length is higher than or equal to this value",
                    "fa_icon": "fas fa-sliders-h"
                },
                "outSJfilterOverhangMin": {
                    "type": "string",
                    "default": "15 15 15 15",
                    "description": "Minimum overhang length for novel splice junctions",
                    "fa_icon": "fas fa-sliders-h",
                    "help_text": "4 integers: minimum overhang length for splice junctions on both sides for:\n1. non-canonical motifs\n2. GT/AG and CT/AC motif\n3. GC/AG and CT/GC motif\n4. AT/AC and GT/AT motif\n\n-1 means no output for that motif"
                },
                "sjdbOverhang": {
                    "type": "integer",
                    "default": 100,
                    "description": "Option to specify the length of the donor/acceptor sequence on each side of the junctions used in constructing the splice junctions database",
                    "fa_icon": "fas fa-sliders-h",
                    "help_text": "By default the option is set to 100. However, we recommend setting a value depending on the read length: read/mate length - 1"
                },
                "sjdbScore": {
                    "type": "integer",
                    "default": 2,
                    "description": "Alignment score for alignmets that cross database junctions",
                    "fa_icon": "fas fa-sliders-h"
                },
                "winAnchorMultimapNmax": {
                    "type": "integer",
                    "default": 999,
                    "description": "The maximum number of loci anchors that are allowed to map. By default, the pipeline uses a large number 999 to switch this filter off.",
                    "fa_icon": "fas fa-sliders-h"
                }
            },
            "fa_icon": "fas fa-star"
        },
        "generic_options": {
            "title": "Generic options",
            "type": "object",
            "fa_icon": "fas fa-file-import",
            "description": "Less common options for the pipeline, typically set in a config file.",
            "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.",
            "properties": {
                "help": {
                    "type": "boolean",
                    "description": "Display help text.",
                    "hidden": true,
                    "fa_icon": "fas fa-question-circle"
                },
                "publish_dir_mode": {
                    "type": "string",
                    "default": "copy",
                    "hidden": true,
                    "description": "Method used to save pipeline results to output directory.",
                    "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                    "fa_icon": "fas fa-copy",
                    "enum": [
                        "symlink",
                        "rellink",
                        "link",
                        "copy",
                        "copyNoFollow",
                        "move"
                    ]
                },
                "validate_params": {
                    "type": "boolean",
                    "description": "Boolean whether to validate parameters against the schema at runtime",
                    "default": true,
                    "fa_icon": "fas fa-check-square",
                    "hidden": true
                },
                "name": {
                    "type": "string",
                    "description": "Workflow name.",
                    "fa_icon": "fas fa-fingerprint",
                    "hidden": true,
                    "help_text": "A custom name for the pipeline run. Unlike the core nextflow `-name` option with one hyphen this parameter can be reused multiple times, for example if using `-resume`. Passed through to steps such as MultiQC and used for things like report filenames and titles."
                },
                "email": {
                    "type": "string",
                    "description": "Email address for completion summary.",
                    "fa_icon": "fas fa-envelope",
                    "hidden": true,
                    "help_text": "An email address to send a summary email to when the pipeline is completed.",
                    "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
                },
                "email_on_fail": {
                    "type": "string",
                    "description": "Email address for completion summary, only when pipeline fails.",
                    "fa_icon": "fas fa-exclamation-triangle",
                    "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$",
                    "hidden": true,
                    "help_text": "This works exactly as with `--email`, except emails are only sent if the workflow is not successful."
                },
                "plaintext_email": {
                    "type": "boolean",
                    "description": "Send plain-text email instead of HTML.",
                    "fa_icon": "fas fa-remove-format",
                    "hidden": true,
                    "help_text": "Set to receive plain-text e-mails instead of HTML formatted."
                },
                "max_multiqc_email_size": {
                    "type": "string",
                    "description": "File size limit when attaching MultiQC reports to summary emails.",
                    "default": "25.MB",
                    "fa_icon": "fas fa-file-upload",
                    "hidden": true,
                    "help_text": "If file generated by pipeline exceeds the threshold, it will not be attached."
                },
                "monochrome_logs": {
                    "type": "boolean",
                    "description": "Do not use coloured log outputs.",
                    "fa_icon": "fas fa-palette",
                    "hidden": true,
                    "help_text": "Set to disable colourful command line output and live life in monochrome."
                },
                "multiqc_config": {
                    "type": "string",
                    "description": "Custom config file to supply to MultiQC.",
                    "fa_icon": "fas fa-cog",
                    "hidden": true
                },
                "tracedir": {
                    "type": "string",
                    "description": "Directory to keep pipeline Nextflow logs and reports.",
                    "default": "${params.outdir}/pipeline_info",
                    "fa_icon": "fas fa-cogs",
                    "hidden": true
                },
                "show_hidden_params": {
                    "type": "boolean",
                    "fa_icon": "far fa-eye-slash",
                    "description": "Show all params when using `--help`",
                    "hidden": true,
                    "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters."
                }
            }
        },
        "max_job_request_options": {
            "title": "Max job request options",
            "type": "object",
            "fa_icon": "fab fa-acquisitions-incorporated",
            "description": "Set the top limit for requested resources for any single job.",
            "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.",
            "properties": {
                "max_cpus": {
                    "type": "integer",
                    "description": "Maximum number of CPUs that can be requested    for any single job.",
                    "default": 16,
                    "fa_icon": "fas fa-microchip",
                    "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`"
                },
                "max_memory": {
                    "type": "string",
                    "description": "Maximum amount of memory that can be requested for any single job.",
                    "default": "128.GB",
                    "fa_icon": "fas fa-memory",
                    "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
                    "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`"
                },
                "max_time": {
                    "type": "string",
                    "description": "Maximum amount of time that can be requested for any single job.",
                    "default": "240.h",
                    "fa_icon": "far fa-clock",
                    "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$",
                    "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`"
                }
            }
        }
    },
    "allOf": [
        {
            "$ref": "#/definitions/input_output_options"
        },
        {
            "$ref": "#/definitions/pipeline_options"
        },
        {
            "$ref": "#/definitions/save_intermediates"
        },
        {
            "$ref": "#/definitions/reference_genome_files"
        },
        {
            "$ref": "#/definitions/read_trimming_and_adapter_removal"
        },
        {
            "$ref": "#/definitions/star_general"
        },
        {
            "$ref": "#/definitions/generic_options"
        },
        {
            "$ref": "#/definitions/max_job_request_options"
        },
        {
            "$ref": "#/definitions/institutional_config_options"
        }
    ]
}