-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsimulation_config.yaml
48 lines (39 loc) · 1.64 KB
/
simulation_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#-----------------------------
# Choose which workflow to generate data for.
# One at a time.
#-----------------------------
wf: "MG-4"
#wf: "MG-5"
# location of raptor binaries
raptor_bin_dir: "/home/evelin/metagenomics/raptor_data_simulation/build/bin"
# location of simulated data
data_out_dir: "/home/evelin/metagenomics/A2-job-granularity/data"
#-----------------------------
# Specifying the number of jobs:
#-----------------------------
number_of_bins: 3 # number of dream_FM_index jobs (MG-4)
# number of FM_index and yara_mapper jobs (MG-5)
read_file_count: 22 # number of dream_mapper jobs (MG-4)
# number of search_prefilter jobs (MG-5)
#-----------------------------
# Parameters of simulated data.
#
# Note: if you want to rerun the same workflow with NEW data you have to change at least one of the following parameters:
# * number of bins
# * read error rate
# * read length
#
#-----------------------------
ref_size: 102400 # size of reference in bytes? 4*2^20 = 4MiB
errors_per_read: 5 # number of errors; this is the basis for error rate in an approximate match
read_length: 150
reads_per_genome: 100 # number of reads simulated from each reference sequence
#-----------------------------
# Mason variator parameters.
#
# These have been highered to make sure that there is enough of a difference between the haplotypes in each bin.
# Only if there is a difference between the haplotypes can a read map to multiple haplotypes with a different edit distance.
#-----------------------------
haplotype_count: 4
snp_rate: 0.01
small_indel_rate: 0.001