-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodelling_TEs.slim
138 lines (118 loc) · 6.79 KB
/
modelling_TEs.slim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
// Population of size N, burn-in for 10N generations, with mutations of mean(s) = deff, non neutral mutation rate = 1/3 * mu. Pop splits at 10N, when some (1/3*mu) mutations become mean(s) = deff for one pop but not for the other, and vice-versa
initialize() {
defineConstant("N", 1000);
defineConstant("mu", 2.1e-7);
defineConstant("r1", 2e-7);
defineConstant("r2", 2e-8);
defineConstant("L", 4e6); // total chromosome length
defineConstant("L0", 2e6); // between high rec (low rec region)
defineConstant("L1", 1e6); // high rec regions
defineConstant("teInitialCount", 10); // number of progenitors (see loops) Could be any number as long as we scale jumping rates. This leads to a constant rate though.
//defineConstant("teJumpP", 1.11e-3); // TE jump Full Length probability: we take 50% of all jumps, with an overall prob of 1.10-3, so 5e-4, multiplied by 1000 (scaling), but scaled by 4e6/1.8e9
//defineConstant("teDisableP", 1.11e-3); // disabling mut probability. These jumping options are now specified in the command line
//Note that we could also simply provide these parameters in the command line with the -d option, like what was done with selection coefficients. Could be useful if we were to create an ABC framework.
initializeMutationRate(mu); //at any time, 1/3 of mutations are going to be beneficial in that pop, therefore beneficial mutation rate = mu*1/3
initializeMutationType("m0", 0.5, "f", sel_SNPs); //deleterious mutations fixed effects
initializeMutationType("m1", 0.5, "f", 0); //neutral mutations fixed effects
initializeGenomicElementType("g1", c(m0,m1), c(0.1,0.9)); //90-10% proportions neutral/deleterious //could easily be modified to incorporate positive selection.
initializeGenomicElement(g1,0,L-1); //This is the length of the whole simulated fragment.
initializeRecombinationRate(c(r1,r2,r1),c(L1-1,L1+L0-1,L-1)); // three regions of length 1Mb with recombination rate of r1, then 2Mb with r2, and 1Mb with r1. Note that this could be replaced by a recombination map for the species of interest if we want to make the script more realistic.
// transposon mutation type; full length and deleterious, but high recombination
initializeMutationType("m6", 0.5, "f", 0);//The progenitors. They are fixed in all individuals at the start of the simulation, no fitness effect since we need to put them somewhere in the simulated sequence but they are actually taken from the whole genome.
m6.convertToSubstitution = F;
m6.color = "#FF0000";
initializeMutationType("m2", dominance_coeff, "f", sel_FL_highrec);
m2.convertToSubstitution = F;
m2.color = "#FF0000";
// transposon mutation type; full length and deleterious, but low recombination
initializeMutationType("m5", dominance_coeff, "f", sel_FL_lowrec);
m5.convertToSubstitution = F;
m5.color = "#FFFB00";
// disabled transposon mutation type; dark red.
initializeMutationType("m3", 0.5, "f", sel_TR);
m3.convertToSubstitution = F;
m3.color = "#700000";
}
///Events
1 late() {
sim.addSubpop("p1", N);
sim.tag = 0; // the next unique tag value to use for TEs
// create some progenitor transposons at random positions
genomes = sim.subpopulations.genomes;
positions = rdunif(teInitialCount, 0,L-1);
for (teIndex in 0:(teInitialCount-1)) // overall there are as many regions of high than of low recombination!!!
{
pos = positions[teIndex];
mut = genomes.addNewDrawnMutation(m6, pos);///it is genomeS, so fixed in all individuals
mut.tag = sim.tag;
sim.tag = sim.tag + 1;
}
}
transposition_start:transposition_stop late() {
// make active transposons copy themselves full length with rate teJumpP.
// Note that transposition_start:transposition_stop need to be replaced by a sed command before editing the script. It looks like SLIM would not accept a -d command like for selection coefficients.
for (individual in sim.subpopulations.individuals)
{
for (genome in individual.genomes)
{
tes = genome.mutationsOfType(m6);///Note that it is genome, not genomeS. Note that we always sample from the 10 progenitors. It is a simplification to keep the rate constant and avoid the issue of finding a way to balance
jumpCount_high = teInitialCount ? rpois(1, teInitialCount * teJumpP * biasP) else 0; //Here we see biasP, which basically controls the probability of jumping then inserting in a high recombination region rather than a low recombination region.
truncatedCount_high = teInitialCount ? rpois(1, teInitialCount * teDisableP * biasP) else 0; //A word about syntax: the ? [...] else basically means either we draw from a Poisson distribution if there are active TEs, or do nothing otherwise. In our case, there are always 50 active TEs (the progenitors). So we could very well simply write truncatedCount_high = rpois(1, teInitialCount * teDisableP * biasP). I kept this in case we want to complexify the model by including a probability for TEs to be inactivated (e.g. through methylation and truncation) when simulating whole genomes instead of a single region.
jumpCount_low = teInitialCount ? rpois(1, teInitialCount * teJumpP * (1-biasP)) else 0;
truncatedCount_low = teInitialCount ? rpois(1, teInitialCount * teDisableP * (1-biasP)) else 0;
if (jumpCount_high) //Means that if the Poisson sampling produced any new TE, then we keep going (short for "if jumpCount_high>0...")
{
jumpTEs = sample(tes, jumpCount_high);
for (te in jumpTEs)
{
// make a new TE mutation that goes in a region of high recombination
pos = sample(c(rdunif(1, 0, L1-1),rdunif(1, L0+L1-1, L-1)),1);
jumpTE = genome.addNewDrawnMutation(m2, pos);
jumpTE.tag = sim.tag;
sim.tag = sim.tag + 1;
}
}
if (jumpCount_low)
{
jumpTEs = sample(tes, jumpCount_low);
for (te in jumpTEs)
{
// make a new TE mutation
pos = rdunif(1, L1-1, L0+L1-1);
jumpTE = genome.addNewDrawnMutation(m5, pos);
jumpTE.tag = sim.tag;
sim.tag = sim.tag + 1;
}
}
if (truncatedCount_high)
{
jumpmutatedTEs = sample(tes, truncatedCount_high);
for (te in jumpmutatedTEs)
{
// make a new TE mutation
pos = sample(c(rdunif(1, 0, L1-1),rdunif(1, L0+L1-1, L-1)),1);
jumpTE = genome.addNewDrawnMutation(m3, pos);
jumpTE.tag = sim.tag;
sim.tag = sim.tag + 1;
}
}
if (truncatedCount_low)
{
jumpmutatedTEs = sample(tes, truncatedCount_low);
for (te in jumpmutatedTEs)
{
// make a new TE mutation
pos = rdunif(1, L1-1, L0+L1-1);
jumpTE = genome.addNewDrawnMutation(m3, pos);
jumpTE.tag = sim.tag;
sim.tag = sim.tag + 1;
}
}
}
}
}
20000 late()
{
g = p1.sampleIndividuals(8).genomes;// We sample 8 diploid individuals. Can be changed depending on the sample size for a specific study.
g.outputVCF(filePath="simulation.vcf");//VCF file but can be changed to ms-style format, or any format useful to compute summary stats.
}