-
Notifications
You must be signed in to change notification settings - Fork 2
/
config.yaml
86 lines (67 loc) · 3.49 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
strique:
/path/to/cloned/repo/strline/scripts/STRique/scripts/STRique.py
strique_pore_model:
/path/to/pore/model/for/strique
bonito:
/path/to/bonito/executable
strline_env:
/path/to/conda/folder/envs/strline
straglr_dir:
/path/to/cloned/repo/strline/scripts/straglr/straglr.py
guppy5_fast:
path: "/path/to/guppy/executable"
mode: "fast"
guppy_extra: ""
guppy5_hac:
path: "/path/to/guppy/executable"
mode: "hac"
guppy_extra: ""
guppy5_super:
path: "/path/to/guppy/executable"
mode: "sup"
guppy_extra: ""
guppy5_hac_modbases:
path: "/path/to/guppy/executable"
mode: "modbases_5mc_hac"
guppy_extra: ""
guppy5_hac_modbases_as:
path: "/path/to/guppy/executable"
mode: "modbases_5mc_hac"
guppy_extra: "--pt_scaling"
guppy5_hac_as:
path: "/path/to/guppy/executable"
mode: "hac"
guppy_extra: "--pt_scaling"
guppy5_hac_cs5k:
path: "/path/to/guppy/executable"
mode: "hac"
guppy_extra: "--chunk_size 5000"
bonito_fast:
mode: "[email protected]"
bonito_hac:
mode: "[email protected]"
bonito_super:
mode: "[email protected]"
#choose from the above basecall configs. for better results, run bonito and guppy separately.
basecall_configs: [ <basecall_configs_you_want_to_test> ]
#this sample name should match the sample name described later in this config file
samples:
[ <sample_name> ]
strique_reads_per_chunk: 2000 #chunks in which to process reads using strique. the smaller this number the quicker STRique performs but the more number of times it will have to be called.
sample_data_type:
reference: /path/to/reference/genome
repeat_config: /path/to/repeat/config #repeat config needed by our method and STRique. please refer to configs if unsure.
fast5: "/path/to/fast5/folder" #folder where all the raw sequence data is stored.
barcoding_kit: "<barcoding kit(if any, if none, put none)" #barcoding kit details if any have been used. eg SQK-RBK004
graphaligner_mode: "<graphaligner mode>(recommeded: --multiseed-DP 1 for plasmid(very high coverage), and --precise-clipping 0.502 for cell line(lower coverage)"
methods: [ <methods> ] #options include: "ga","strique","simplecount","straglr","tg","strscore"
maximum_length_for_plot: <max repeat length for plots> #the counts will be cutoff at this length. please make sure this is larger than your expected max count, if unsure, leave as a large number and then reduce if needed
cn: <copy number of repeats in reference genome> #please provide the copy number as in the reference genome for the locus in question. eg. 3 for c9orf72.
microsat: /path/to/microsat/file/needed/by/tg #please provide microsat file as needed by tandem-genotypes. please check configs folder if unsure
straglr_config: /path/to/config/file/needed/by/straglr #must match straglr config type. please check configs folder if unsure of format.
sequencing_summary: /path/to/sequencing/summary #needed if providing your own fastq
sample_name:
fastq: "/path/to/fastq.fastq" #in case you do not want to basecall and want to supply your own fastq, if you do this, you need to supply a sequencing summary as well in the corresponding data_type
barcode: "<barcode number>" #barcode number is barcoded, else leave blank as ""
data_type: "<sample_data_type>" #corresponding data type of the sample
summary: /path/to/sequencing/summary/file.txt #in case you do not want to basecall, you MUST provide a sequencing summary else the pipeline WILL basecall