-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBSAS_analysis_2.R
440 lines (353 loc) · 21.8 KB
/
BSAS_analysis_2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
#####################################
# ANALYSIS
#####################################
setwd("/home/user/Dropbox/Projects/IN_PROGRESS/Best_sampling_across_systems/")
load("all_res_mult.Rdata")
#Remember the composition of the results matrix- this is the minumum sampling needed to capture 95% of the alleles
#with columns as the minimum needed in a given category
#For example, once (cols 1:9), five times (cols 10:18), or 10 or 25 or 50 times
# There are nine allele categories able to calculate (see below) even though only four are used in the paper
#Rows are the scenarios (combinations of parameters)
#Some allele type/ parameter combinations have Inf/ NAs for many/ most replicates, which means that category not caught
#This replaces all Inf values (never reached 0.95) with NAs
is.na(all_res) <- sapply(all_res, is.infinite)
means_caught<-apply(all_res[,,],c(1,2),mean,na.rm=T)
#This will count the NAs- important to report!!!
apply(is.na(all_res[,,]),c(1,2),sum)
#run this and pull the params and then do table on that to see how many large/ small populations, bottleneck, migr etc.
#i.e. which situations is it most likely
apply(is.na(all_res[,,]),c(1,2),sum)[,3]>=15
#####################################
# MAIN PLOTS BY PARAMETER #
#####################################
scenarios_ran<-list.dirs(path = "./Simulations6/", full.names = TRUE,recursive=F)
#set up the params list
get_params<- function(scenario) strsplit(strsplit(scenario,"/")[[1]][4],"_")
lapply(scenarios_ran,get_params)
sim_params<-matrix(unlist(lapply(scenarios_ran,get_params)),ncol=7,byrow=T)
sim_params[,5]<-paste(".",sim_params[,5],sep="")
allele_categories<-c("all alleles (including rare)", "overall common f>0.05", "overall low fr 0.10>f>0.01",
"overall rare f<0.01", "local1", "local2", "somwhere common f>0.05", "somwhere common f>0.05", "BM overall f~0.05")
summary(lm(sim_params[,3],means_caught[,2]))
plot(sim_params[,5],means_caught[,2])
plot(sim_params[,7],means_caught[,2])
#Or
#sim_params<-read.csv("sim_params.csv")[,-1]
#####################################################
# FIGURE 1 AND SUPP PLOTS- GRID ALL PARAMS #
#####################################################
#PDFs showing each factor for the four kinds of alleles- will reproduce Figure 1
#Can do separately for the three kinds of bottlenecks by looking at rows for means_caught
#in the inner loop just paste in 1:392 for rows of means_cause and sim_params etc.
#into the below boxplot code to subset by bottleneck, otherwise the code calculates across all bottlenecks
#and of course change the file output name to B1, B5, or B25 for bottleneck times
#These are the different bottleneck scenarios simulated...
#B1 [1:392,]
#B25 [393:784,]
#B5 [785:1176,]
#Can also do separately for the multiple copies (for example for five copies)
#Just change the AT loop i.e. 1,3,8,9 sub for 10,12,17,18
#and of course change the file name
pdf(file="BSAS_grid_final_1cop.pdf", height=12, width=14)
par(mfcol=c(4,3), mar=c(2,2,2,2), oma=c(6,5,4,4), cex.axis=2)
for (P in c(3,5,7)){
do_yaxis="s"; do_xaxis="n"
for (AT in c(1,3,8,9)){
if (P>3) do_yaxis="n"; if(AT==9|AT==18) do_xaxis="s"
boxplot(means_caught[,AT]~as.numeric(sim_params[,P]),xaxt=do_xaxis,yaxt=do_yaxis); if (P==7) axis(4,labels=F)
} }
mtext("number of individuals needed", side=2, line=2,outer=T,cex=1.75)
mtext("Cat 4- species >0.05 Cat 3- local >0.05 Cat 2- 0.01 to 0.10 Cat 1- all alleles ", side=4, line=2,outer=T,cex=1.4)
mtext("Number of populations Migration rate Population size", side=1, line=2,outer=T,cex=1.4)
dev.off()
##############################################################
# POP SIZE AND MIGRATION COMBINED- FIGURE 2 #
##############################################################
#The relatively flat boxplots for migration rate and population size in Figure 1 suggest that these factors
#are dominated by the influence of number of populations which makes sense because the samples are
#divided up among the different populations
#So we need to parcel out these factors, to see if they have some influence
#The following will go through every combination of migration rate and population size
sim_params[sim_params[,7]==50,7]<-" 50"; sim_params[sim_params[,7]==75,7]<-" 75"; sim_params[sim_params[,7]==150,7]<-" 150"
sim_params[sim_params[,7]==100,7]<-" 100"; sim_params[sim_params[,7]==200,7]<-" 200"; sim_params[sim_params[,7]==300,7]<-" 300"
sim_params[sim_params[,7]==400,7]<-" 400"; sim_params[sim_params[,7]==500,7]<-" 500";
#This one for publication- focuses on local alleles, has grey for separating each population size
psize_mig<-paste("p=",sim_params[,7]," m=",sim_params[,5],sep="")
pdf("vary_migr_by_psize.pdf",height=6,width=12)
par(mar=c(10,5,3,2))
AT<-8; boxplot(means_caught[,AT]~psize_mig,las=2,col=c(rep("light grey",7),rep("white",7)),ylab="Ni, number of individuals to sample per population")
dev.off()
#This one NOT for publication- has separate page for each allele category- just for examining
sim_params_temp<-cbind(sim_params,paste(sim_params[,5],sim_params[,7],sep="-"))
pdf("vary_psize_by_migr.pdf",height=7,width=15)
for (AT in c(1,2,3,4,8,9)) boxplot(means_caught[,AT]~sim_params_temp[,8],las=2,main=allele_categories[AT])
dev.off()
#This one NOT for publication- focus on number of populations (i.e. there are separate graphs for each number of populations)
#To show large variation is due of course to number of populations
#but not a very helpful set of plots
#Change the 7 to a 5 to vary by migration rate
#Change the AT for Allele Type (could look at any alleles or catch multiple)
pdf(file="BSAS_grid_final_psize.pdf", height=12, width=18)
par(mfcol=c(4,7), mar=c(2,2,2,2), oma=c(4,4,4,4))
num_pops_char<-unique(sim_params[,3])
#go through pop numbers, then allele types
for (nump in num_pops_char) for (AT in c(1:4)){
boxplot(means_caught[sim_params[,3]==nump,AT]~as.numeric(sim_params[sim_params[,3]==nump,5]),las=2,main=nump)
}
dev.off()
#################################################
# MULT FACTOR CATCH 1,5,10,25,50- FIGURE 3 #
#################################################
#What is the multiplication factor for catching different numbers of alleles?
#In other words what is the ratio of number of samples needed to catch 1 vs. 5 (or vs. 10 or 25 or 50)
#across the types of alleles and all scenarios
pdf(file="mult_factor_boxplots.pdf",height=5,width=15)
par(oma=c(5,3,3,3),mfrow=c(1,4),cex.axis=1.5,cex.lab=1.7,cex.main=1.7)
boxplot(means_caught[,c(10,12,17,18)]/means_caught[,c(1,3,7,9)], names=1:4,main="5 copies",col="light grey"); abline(h=5,lty=2,col="salmon")
boxplot(means_caught[,9+c(10,12,17,18)]/means_caught[,c(1,3,7,9)],main="10 copies", names=1:4,col="light grey"); abline(h=10,lty=2,col="salmon")
boxplot(means_caught[,18+c(10,12,17,18)]/means_caught[,c(1,3,7,9)],main="25 copies", names=1:4,col="light grey"); abline(h=25,lty=2,col="salmon")
boxplot(means_caught[,27+c(10,12,17,18)]/means_caught[,c(1,3,7,9)],main="50 copies", names=1:4,col="light grey"); abline(h=50,lty=2,col="salmon")
mtext("Category of allele", outer=T,side=1,line=-0.5,cex=1.2)
mtext("1-all alleles, 2-low frequency (0.01-0.10), 3-locally >0.05, 4-species wide >0.05", outer=T,side=1,line=2,cex=1.2);
mtext("size by which collection must be increased",outer=T,side=2,cex=1.2)
mtext("number of allele copies desired in the collection",outer=T,side=3,cex=1.2)
dev.off()
#Calculations for reporting
mean(means_caught[,c(10,12,17,18)]/means_caught[,c(1,3,7,9)],na.rm=T) #4.16
mean(means_caught[,9+c(10,12,17,18)]/means_caught[,c(1,3,7,9)],na.rm=T) #7.19
mean(means_caught[,18+c(10,12,17,18)]/means_caught[,c(1,3,7,9)],na.rm=T) #15.8
mean(means_caught[,27+c(10,12,17,18)]/means_caught[,c(1,3,7,9)],na.rm=T) #27.4
#################################################################################################
#############################################################
# EVERYTHING ELSE IS "CHECKS" TO MAKE SURE IT ALL WORKED #
#############################################################
#####################################
# MAKE SURE BOTTLENECK WORKED #
#####################################
#At first it seems surprising that the bottleneck length had so little effect on sampling
#In spite of the known effect of bottlenecks on number of alleles
#To make sure they worked, look at bottlenecks and migration examine genepop files
#Calculate NUMB ALLELES and OBS HET over 10 reps, compare among bnecks, for 12 random scenarios
setwd("/home/user/Dropbox/Projects/IN_PROGRESS/Best_sampling_across_systems/")
load("all_res_final.Rdata")
library(adegenet); library(diveRsity)
scenarios_ran<-list.dirs(path = "./Simulations6/", full.names = TRUE,recursive=F)
#index 1,393,785 is the first scenario for the three bottlenecks
bneck_nall<-array(dim=c(3,12,10)); bneck_het<-array(dim=c(3,12,10))
bneck_base<-c(1,393,785); bneck_add<-seq(1,300,25)
#The 12 scenarios are:
# scenarios_ran[bneck_add]
# [1] "./Simulations6//b1LBSAS_Np_10_mim_000625_Ps_100" [2] "./Simulations6//b1LBSAS_Np_10_mim_005_Ps_1000"
# [3] "./Simulations6//b1LBSAS_Np_10_mim_04_Ps_150" [4] "./Simulations6//b1LBSAS_Np_14_mim_0025_Ps_200"
# [5] "./Simulations6//b1LBSAS_Np_14_mim_02_Ps_300" [6] "./Simulations6//b1LBSAS_Np_20_mim_00125_Ps_50"
# [7] "./Simulations6//b1LBSAS_Np_20_mim_01_Ps_500" [8] "./Simulations6//b1LBSAS_Np_2_mim_000625_Ps_75"
# [9] "./Simulations6//b1LBSAS_Np_2_mim_01_Ps_100" [10] "./Simulations6//b1LBSAS_Np_3_mim_000625_Ps_1000"
# [11] "./Simulations6//b1LBSAS_Np_3_mim_005_Ps_150" [12] "./Simulations6//b1LBSAS_Np_3_mim_04_Ps_200"
for (b in 1:3){
for (add in 1:12){
this_b<-bneck_base[b]+bneck_add[add]
setwd("/home/user/Dropbox/Projects/IN_PROGRESS/Best_sampling_across_systems/")
reps_ran_gen<-list.files(scenarios_ran[this_b], pattern="gen")
setwd(scenarios_ran[this_b])
for (r in 1:10){
BSAS_genind<-read.genepop(reps_ran_gen[r],ncode=3)
#get alleles and heterozygosity, save
bneck_nall[b,add,r]<-mean(unlist(summary(BSAS_genind)[4]))
bneck_het[b,add,r]<-mean(unlist(summary(BSAS_genind)[6]))
} } }
#RESULTS: These plots show the ratio of non bottleneck to bottleneck- we can see it ranges
#Thus the bottlenecks are having an effect on genetic diversity
#Explanation is that bottlenecks only cause the loss of (mostly) rare alleles and those are likely
#below the threshold we are counting
setwd("/home/user/Dropbox/Projects/IN_PROGRESS/Best_sampling_across_systems/")
het_mean<-apply(bneck_het,c(1,2),mean,na.rm=T); nall_mean<-apply(bneck_nall,c(1,2),mean,na.rm=T)
pdf(file="bneck_nall.pdf"); boxplot(t(bneck_nall[1,,]/bneck_nall[2,,])); dev.off()
pdf(file="bneck_het.pdf"); boxplot(t(bneck_het[1,,]/bneck_het[2,,])); dev.off()
save(bneck_het,file="bneck_het_comp.Rdat"); save(bneck_nall,file="bneck_nall_comp.Rdat")
#het_mean= 1.03, max= 1.13 ; nall_mean= 1.26, max= 1.86
#########################
#COMPARE MSS AND MIM
###########################
#Most simulations regard the island model. This will analyze simulations under the stepping stone model (mss) and compare sampling needed
setwd("/home/user/Dropbox/Projects/IN_PROGRESS/Best_sampling_across_systems/Additional_checks/")
get_params<- function(scenario) strsplit(strsplit(scenario,"/")[[1]][4],"_")
#First get the MSS data, then the MIM data
scenarios_ran<-list.dirs(path = "./mss_migr_model/", full.names = TRUE,recursive=F)
load("all_res_mss.Rdata")
#Remember what is in these results- this is the minumum sampling needed to capture 95% of the alleles in a given category
#Either once (cols 1:9) or five times (cols 10:18) ... etc.
is.na(all_res) <- sapply(all_res, is.infinite)
means_caught_mss<-round(apply(all_res[,,],c(1,2),mean,na.rm=T),2)
sd_caught_mss<-apply(all_res[1:16,8,],1,sd,na.rm=T)
sim_params_mss<-matrix(unlist(lapply(scenarios_ran,get_params)),ncol=7,byrow=T)
setwd("/home/user/Dropbox/Projects/IN_PROGRESS/Best_sampling_across_systems/")
scenarios_ran<-list.dirs(path = "./Simulations6/", full.names = TRUE,recursive=F)
load("all_res_final.Rdata")
is.na(all_res) <- sapply(all_res, is.infinite)
means_caught_mim<-round(apply(all_res[,,],c(1,2),mean,na.rm=T),2)
sim_params_mim<-matrix(unlist(lapply(scenarios_ran,get_params)),ncol=7,byrow=T)
#Recall that the MSS model was only for a subset of parameters, so narrow the data to parameters in common with MIM
shared_bw<-which(do.call("paste",as.data.frame(sim_params_mim[,c(1,3,5,7)])) %in% do.call("paste",as.data.frame(sim_params_mss[,c(1,3,5,7)])))
sd_caught_mim<-apply(all_res[shared_bw[1:16],8,],1,sd,na.rm=T)
#Analysis
diff_mss<-means_caught_mim[shared_bw,c(1,3,8,9)]/means_caught_mss[,c(1,3,8,9)]
colMeans(diff_mss[1:16,]);
#[1] 1.0026127 1.0257867 0.7466511 1.0027878
colMeans(diff_mss[17:32,])
#[1] 1.0017947 0.9992222 1.0003993 0.9986894 #as expected there is no difference for the two population system
colMeans(diff_mss[33:48,])
#[1] 1.0038296 1.0010115 0.9471347 1.0083351
boxplot(diff_mss[,3]~as.numeric(paste(".",sim_params_mss[,5],sep=""))) #difference is highest for high gene flow- makes sense- stepping stone model is very restrictive in this case. 0.4 is much different than 0 while 0.000625 is not much different than 0
boxplot(diff_mss[,3]~as.numeric(sim_params_mss[,7])) #difference is highest for larger population sizes- gene flow has most effect at larger population sizes, whereas at small sizes all populations experience high drift even if gene flow is high
#Which scenarios are especially different?
(means_caught_mim[shared_bw,c(1,3,8,9)]/means_caught_mss[,c(1,3,8,9)])<.95
(means_caught_mim[shared_bw,c(1,3,8,9)]/means_caught_mss[,c(1,3,8,9)])>1.05
#Create a summary table
a<-matrix(cbind(
round(means_caught_mss[,17]/means_caught_mim[shared_bw,17],2)[1:16],
round(means_caught_mss[,17]/means_caught_mim[shared_bw,17],2)[33:48],
sim_params_mss[1:16,c(5,7)]),ncol=4)
colnames(a)<-c("14 pop","5 pop", "migr", "pop size")
mean(as.numeric(a[,1])); mean(as.numeric(a[,2]))
#############################################################
# TO COUNT NUMBER OF SINGLETONS FOR DISCUSSION #
#########################################################
#To count the number of alleles occurring only once or twice in a sample and thus likely to be lost
library(adegenet); library(diveRsity)
source("sample_funcs_BSAS.R"); source("src/arp2gen_edit.R")
colMax <- function(data) sapply(data, max, na.rm = TRUE)
setwd("/home/user/Dropbox/Projects/IN_PROGRESS/Best_sampling_across_systems/Additional_checks/onep_no_bn/nob5LBSAS_Np_1_Ps_1000/")
BSAS_genind<-read.genepop("nob5LBSAS_Np_1_Ps_500_1_3.gen",ncode=3)
sum(colSums(BSAS_genind@tab[sample(1:nrow(BSAS_genind@tab), 500),])==1)
sum(colSums(BSAS_genind@tab[sample(1:nrow(BSAS_genind@tab), 500),])==0)
length(colSums(BSAS_genind@tab[sample(1:nrow(BSAS_genind@tab), 500),]))
hist(colSums(BSAS_genind@tab[sample(1:nrow(BSAS_genind@tab), 500),]),breaks=c(0,seq(5,100,by=5)))
#####################################
# FST CHECKING MIGR RATE #
#####################################
#Ok now do FSTs based on migration rate
#It seems odd that the migration rate has little affect on how to sample for allele capture
#So we want to make sure the migration rate is actually affecting the FST
#We will look at several population sizes, for several numbers of populations, across the range of mig rates
#RESULTS: migration does influence FST, but less than population size, i.e. allele frequencies don't change
#a lot across the range of migration rates, as the populations are already quite small
#In small populations the population size will have more influence on drift than migr!
#(Also, again, some rare alleles might be lost or shared due to migration but we're ignoring rare alleles)
#To report migration rate in the results also
library(hierfstat)
pops_focus<-c("50","100","300","500")
migr_comp<-array(dim=c(4,7,10))
for (pnum in c("2","3","5","10")){
for (psize in 1:4){
for (migr in 1:7){
setwd("/home/user/Dropbox/Projects/IN_PROGRESS/Best_sampling_across_systems")
setwd(scenarios_ran[sim_params[,3]==pnum&sim_params[,1]=="b5LBSAS"&sim_params[,7]==pops_focus[psize]][migr])
reps_ran_gen<-list.files(pattern="gen")
for (r in 1:10){
BSAS_genind<-read.genepop(reps_ran_gen[r],ncode=3)
migr_comp[psize,migr,r]<-mean(pairwise.fst(BSAS_genind))
} } }
fst_mean<-apply(migr_comp,c(1,2),mean,na.rm=T)
colnames(fst_mean)<- unique(sim_params[,5]) ; rownames(fst_mean)<-pops_focus
setwd("/home/user/Dropbox/Projects/IN_PROGRESS/Best_sampling_across_systems/")
write.csv(fst_mean,file=paste("fst_w_mig",pnum,".csv"))
}
#################################
# ANALYZING ONE POPS #
#################################
setwd("/home/user/Dropbox/Projects/IN_PROGRESS/Best_sampling_across_systems/Additional_checks")
#set up the params list
pdf("one_population.pdf", height=6, width=9); par(mfrow=c(1,2))
load("all_res_onep_bn.Rdata")
is.na(all_res) <- sapply(all_res, is.infinite)
means_caught<-apply(all_res[,,],c(1,2),mean,na.rm=T)
boxplot(means_caught[,c(1,3,9)],ylim=c(0,250),ylab="number plants to sample", names=c("all","low fr","0.05"),main="bottleneck, 1 population")
axis(side=4,at=c(0,50,100,150,200,250),labels=F); mtext("allele category",side=1,line=3,cex=1.3)
load("all_res_onep_nobn.Rdata")
is.na(all_res) <- sapply(all_res, is.infinite)
means_caught<-apply(all_res[,,],c(1,2),mean,na.rm=T)
boxplot(means_caught[,c(1,3,9)],ylim=c(0,250),ylab="number plants to sample", names=c("all","low fr","0.05"),main="no bottleneck, 1 population")
axis(side=4,at=c(0,50,100,150,200,250),labels=F); mtext("allele category",side=1,line=3,cex=1.3)
dev.off()
#For reporting, one population, for BM, compare to theoretical expecations
summary(c(all_res[,9,])) #mean of 27.39 median of 28; mean of 27.9 median of 29
sd(c(all_res[,9,])) #sd 2.67 2.47
#good, it does conform to expectations of approximately 28 individuals
#and the ratio- the "big result" of the paper in terms of capturing multiple alleles- in this case five copies
mean(means_caught[,10:18]/means_caught[,1:9],na.rm=T)
#no bn then bn #[1] 4.824 [1] 4.136
boxplot(means_caught[,10:18]/means_caught[,1:9])
#################################################
# ALLELE FREQUENCY SHIFT and NUMBER ALLELES #
#################################################
#This will look at possible explanations for results, in particular for bottleneck and population size
setwd("/home/user/Dropbox/Projects/IN_PROGRESS/Best_sampling_across_systems/")
library(adegenet); library(diveRsity)
source("sample_funcs_BSAS.R"); source("src/arp2gen_edit.R")
load("all_res_final.Rdata")
scenarios_ran<-list.dirs(path = "./Simulations6/", full.names = TRUE,recursive=F)
#set up the params list
get_params<- function(scenario) strsplit(strsplit(scenario,"/")[[1]][4],"_")
sim_params<-matrix(unlist(lapply(scenarios_ran,get_params)),ncol=7,byrow=T)
sim_params[,5]<-paste(".",sim_params[,5],sep="")
allele_categories<-c("all alleles (including rare)", "overall common f>0.05", "overall low fr 0.10>f>0.01",
"overall rare f<0.01", "local1", "local2", "somwhere common f>0.05", "somwhere common f>0.05", "BM overall f~0.05")
results_allele_shift<-matrix(nrow=length(scenarios_ran),ncol=9)
#We'll do all scenarios and 3 alleles with 3 stats (# alleles, median, .95 quantile) each
for (scen in 1:length(scenarios_ran))
{
num_reps<-4
concat_af<-list(0,0,0)
for (nrep in 1:num_reps){
reps_ran_gen<-list.files(scenarios_ran[[scen]], pattern="gen")
temp_file_name<-file.path(scenarios_ran[[scen]],reps_ran_gen[nrep],sep="")
if (.Platform$OS.type=="unix") temp_file_name<-substr(temp_file_name,1,nchar(temp_file_name)-1)
BSAS_genind<-read.genepop(temp_file_name,ncode=3)
BSAS_genpop<-genind2genpop(BSAS_genind)
n_total_indivs<- length(BSAS_genind@tab[,1])
n_ind_p_pop<-table(BSAS_genind@pop)
allele_freqs<-colSums(BSAS_genpop@tab)/(n_total_indivs*2)
allele_cat<-get.allele.cat(BSAS_genpop, n_ind_p_pop, local=T)
concat_af[[1]]<-c(concat_af[[1]],allele_freqs[allele_cat[[1]]]) #all alleles
concat_af[[2]]<-c(concat_af[[2]],allele_freqs[allele_cat[[3]]]) #low frequency
concat_af[[3]]<-c(concat_af[[3]],allele_freqs[allele_cat[[7]]]) #somewhere common
}
results_allele_shift[scen,1:3]<-unlist(lapply(concat_af,length))
results_allele_shift[scen,4:6]<-unlist(lapply(concat_af,median))
results_allele_shift[scen,7:9]<-unlist(lapply(concat_af,quantile,.95))
}
write.csv(results_allele_shift,"results_allele_shift.csv")
#explanation of columns of results table
#first three columns are number of alleles for all, low freq, and some_com alleles
#next three columns are median frequency for all, low freq, and some_com alleles
#next three columns are the .95 quantile of allele frequency for all, low freq, and some_com alleles
results_allele_shift<-read.csv("results_allele_shift.csv")
results_allele_shift<-results_allele_shift[,-1]
#########################
# With bottleneck #
#########################
get_params<- function(scenario) strsplit(strsplit(scenario,"/")[[1]][4],"_")
sim_params<-matrix(unlist(lapply(scenarios_ran,get_params)),ncol=7,byrow=T)
for (i in 1:9){
print(c(mean(results_allele_shift[sim_params[,1]=="b1LBSAS",i]),
mean(results_allele_shift[sim_params[,1]=="b5LBSAS",i]),
mean(results_allele_shift[sim_params[,1]=="b25LBSAS",i])))
}
#for i=1, all alleles ... 157486/201183= 0.783
#Results for i=4 (all alleles, allele frequency)... .0384/.032= 1.2
#################################
# With population size #
#################################
#(pop size is "100" "1000" "150" "200" "300" "50" "500" "75" ).. double checked this 1/4/2019
num_mark<-c(10000, 1500, 9000, 8000, 5000, 15000, 4000, 10000)
boxplot((results_allele_shift[,1]/num_mark)~sim_params[,7])
for (i in 1:3){
print(mean(results_allele_shift[sim_params[,7]=="1000",i])/1500)
print(mean(results_allele_shift[sim_params[,7]=="50",i])/15000)
}
#Results for i=1 (all alleles, number of alleles per locus)... 21.01/26.96=0.779
for (i in 4:9){
print(mean(results_allele_shift[sim_params[,7]=="1000",i]))
print(mean(results_allele_shift[sim_params[,7]=="50",i]))
}
#Results for i=4 (all alleles, allele frequency)... .0308/.0403=0.764