-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsample_dataset1.config
70 lines (57 loc) · 3 KB
/
sample_dataset1.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run urlpipe -profile <docker/singularity>
----------------------------------------------------------------------------------------
*/
params {
config_profile_name = 'Sample dataset1'
config_profile_description = 'Configuration using a minimal test dataset1: 6 samples from HQ50 (human cell line)'
input = "./assets/samplesheet_dataset1.csv"
outdir = "./results_dataset1"
ref = "./assets/IlluminaHsQ50FibTrim_Ref.fa"
ref_repeat_start = 69
ref_repeat_end = 218
ref_repeat_unit = "CAG"
length_mode = "reference_align"
umi_cutoffs = "1,3,5,7,10,30,100"
umi_correction_method = "least_distance"
repeat_bins = "[(0,50), (51,60), (61,137), (138,154), (155,1000)]"
allele_number = 2
max_memory = "16.GB"
max_cpus = 16
max_time = "240.h"
}
// Below are the parameters specific to each module. Most are using the module default settings, they are listed here explicitly here for clarity.
process {
withName: CUTADAPT {
ext.args = '-a AGATCGGAAGAGCACACGTCT -A TCGGAAGAGCGTCGTGTAG' // Illumina TruSeq adapter: https://dnatech.genomecenter.ucdavis.edu/wp-content/uploads/2019/03/illumina-adapter-sequences-2019-1000000002694-10.pdf
}
withName: CLASSIFY_LOCUS {
ext.ref_start_bp_to_check = 20 // number of base pair to examine from ref start
ext.ref_end_bp_to_check = 20 // number of base pair to examine from ref end
ext.m = 2 // allowed mismatches (including INDELs)
}
withName: CLASSIFY_INDEL {
ext.ref_before_repeat_bp_to_check = 20 // number of base pair to examine from ref_repeat_start site
ext.ref_after_repeat_bp_to_check = 20 // number of base pair to examine from ref_repeat_end site
ext.m = 2 // allowed mismatches (substitutions only)
ext.indel_cutoff = 0.5 // if > 0.5 reads from the same UMI group are indel, all reads in that group will be treated as indel.
}
withName: CLASSIFY_READTHROUGH {
ext.ref_before_repeat_bp_to_check = 20 // number of base pair to examine from ref_repeat_start site
ext.ref_after_repeat_bp_to_check = 20 // number of base pair to examine from ref_repeat_end site
ext.m = 2 // allowed mismatches (including INDELs)
}
withName: PREP_REF {
ext.repeat_range = "0:150" // range of number of repeat units (e.g. "CAG") when it comes to prep_ref
}
withName: REPEAT_LENGTH_DISTRIBUTION_DEFAULT {
ext.ref_before_repeat_bp_to_check = 20 // number of base pair to examine from ref_repeat_start site
ext.ref_after_repeat_bp_to_check = 20 // number of base pair to examine from ref_repeat_end site
ext.m = 2 // allowed mismatches (including INDELs)
}
}