Skip to content

Commit 9adb3ea

Browse files
committed
update
1 parent cda01c7 commit 9adb3ea

7 files changed

+374
-12
lines changed

aintrogression.py

+55-3
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@
4949
5050
R_D: (Racimo 2016)
5151
52-
U20: (Racimo 2016, Jagoda 2018)
52+
Utwenty: (Racimo 2016, Jagoda 2018)
5353
54-
Q95: (Racimo 2016, Jagoda 2018)
54+
QninetyFive: (Racimo 2016, Jagoda 2018)
5555
5656
Dp_intro: (Racimo 2016)
5757
@@ -175,12 +175,64 @@ def QninetyFive(pos, gt, out, target, bait, clen, ofreq=0.01, winSize=10000, win
175175
return(Q95)
176176

177177

178-
def Fd(pos, gt, out, target, bait, ofreq=0.01, winSize=10000, winStep=None):
178+
def F_d(pos, gt, out, target, bait, ofreq=0.01, winSize=10000, winStep=None):
179179
"""Fd from Martin 2015
180180
"""
181181
return(None)
182182

183183

184+
def Dout():
185+
"""
186+
dout: (dXO + dYO) / 2
187+
average distance between species X and the Outgroup and Species Y and the
188+
Outgroup.
189+
"""
190+
191+
192+
def Dxy():
193+
"""
194+
dxy: pairwise distance / bases; low values possible introgression
195+
dxy as number of sequence diff between any 2 sequences, x and y, in two
196+
taxa, X and Y (divided by the number of sites), then dxy is the average
197+
distance between all sequences in the two species. # above assumes no
198+
variation in neutral mutation rate, low mutation rate can be mistaken for
199+
recent introgression
200+
"""
201+
202+
203+
def Dmin():
204+
"""
205+
REQUIRES PHASES, HAPLOTYPEARRAY
206+
dmin: min(dxy), requires haplotypes
207+
minimum distance among all pairing of haplotypes in the 2 species. Pvalue
208+
by coalescent with no migration or from other parts of the genome average.
209+
# above assumes no variation in neutral mutation rate, low mutation rate
210+
can be mistaken for recent introgression
211+
"""
212+
213+
214+
def RND():
215+
"""
216+
RND (relative node depth): dxy / dout
217+
Robust to low mutation rates like HKY test if neutrality. # not sensitive
218+
to low-frequency migrants.
219+
calculate Dxy in windows on haplotypeArray between Species X and Y
220+
calculate Dxy between Species X and Outgroup
221+
calculate Dxy between Species Y and Outgroup
222+
"""
223+
# Dxy/ Dout
224+
225+
226+
def RNDmin():
227+
"""
228+
RNDmin: dmin / dout
229+
Similarly, like both dmin and Gmin, RNDmin should be sensitive to even rare
230+
migrant haplotypes. In addition, we expect RNDmin to be powerful even when
231+
migrants are high in frequency
232+
"""
233+
# Dmin/Dout
234+
235+
184236
def adaptPlot(Q, chrom, name, p, save=True):
185237
"""
186238
"""

ald.py

+2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
@author: scott
77
"""
88
import allel
9+
import matplotlib as mpl
910
import matplotlib.pyplot as plt
1011
import seaborn as sns
12+
mpl.rcParams['pdf.fonttype'] = 42
1113
import numpy as np
1214
import scipy
1315
from collections import defaultdict

aplot.py

+2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77
"""
88
import allel
99
import numpy as np
10+
import matplotlib as mpl
1011
import matplotlib.pyplot as plt
1112
import seaborn as sns
13+
mpl.rcParams['pdf.fonttype'] = 42
1214

1315

1416
def plotvars(chrm, callset, window_size=10000, title=None, saved=False):

autil.pyc

4.3 KB
Binary file not shown.

run_main.py

+15-9
Original file line numberDiff line numberDiff line change
@@ -92,17 +92,14 @@ def loadgenome_extradata_fx(fasta_handle, gff3_handle, meta):
9292
genome, gff3, meta = loadgenome_extradata_fx(fasta_handle, gff3_handle,
9393
meta)
9494

95-
#meta = "/home/scott/Documents/Wb/Wb_sWGA/data_files/allel/WbAllpops.47.info"
96-
meta = "/home/scott/Desktop/AnopSG_liftvcf/AnopSG.55.info"
95+
meta = "AnopSG.55.info"
9796
meta = pd.read_csv(meta, delimiter=",")
98-
#var = Chr('All', 'WbAllpops.impute.allchr.47.h5')
99-
var = Chr('All', '/home/scott/Desktop/AnopSG_liftvcf/2L.SNP.recode.h5')
97+
var = Chr('All', '2L.FSG.SNP.recode.h5')
10098
popdict = autil.subpops(var, meta, bypop=True, bykary=False)
10199
pop2color = autil.popcols(popdict)
102100
chrlist = np.unique(var.chrm[:])
103-
# chrlist = del chrlist['Wb_ChrX_0']
104101
chrlen = {}
105-
with open("/home/scott/Documents/Wb/Wb_sWGA/data_files/allel/chr_info", 'r') as c:
102+
with open("chr_info", 'r') as c:
106103
for line in c:
107104
x = line.strip().split()
108105
chrlen[x[0]] = int(x[1])
@@ -202,11 +199,20 @@ def loadgenome_extradata_fx(fasta_handle, gff3_handle, meta):
202199
pop2color, var)
203200

204201

205-
206-
202+
import matplotlib.pyplot as plt
203+
# tajd histogram
207204
x = []
208205
for p in tajddict.keys():
209-
x.append((tajddict[p]["Haiti"][2][0]))
206+
x.append((tajddict[p]["Fun"][2][0]))
210207
m = np.concatenate(x).ravel()
211208
n = m[~np.isnan(m)]
212209
b,bins,patches = plt.hist(n, 50, density=True)
210+
211+
# pi histogram
212+
x = []
213+
for p in pidict.keys():
214+
x.append((pidict[p]["Par"][2][0]))
215+
m = np.concatenate(x).ravel()
216+
n = m[~np.isnan(m)]
217+
b, bins, patches = plt.hist(n, 50, density=True)
218+

supplemental_figDiversity.py

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Mon Jan 21 16:34:02 2019
5+
6+
@author: scott
7+
"""
8+
from __future__ import division
9+
from __future__ import print_function
10+
import numpy as np
11+
import pandas as pd
12+
import matplotlib as mpl
13+
# functions
14+
from allel_class import Chr
15+
import autil as autil
16+
import adiv as av
17+
import ald as ald
18+
import adxy as adxy
19+
import aplot as aplot
20+
mpl.rcParams['pdf.fonttype'] = 42
21+
22+
meta = "AnopSG.55.info"
23+
meta = pd.read_csv(meta, delimiter=",")
24+
var = Chr('All', '2L.FSG.SNP.recode.h5')
25+
popdict = autil.subpops(var, meta, bypop=True, bykary=False)
26+
pop2color = autil.popcols(popdict)
27+
28+
# chrlist = np.unique(var.chrm[:])
29+
chrlen = {}
30+
with open("chr_info", 'r') as c:
31+
for line in c:
32+
x = line.strip().split()
33+
chrlen[x[0]] = int(x[1])
34+
35+
# RND
36+
dxydict = {}
37+
for c in chrlen.keys():
38+
var = Chr('All', '{}.FSG.SNP.recode.h5'.format(c))
39+
var.geno(c, meta)
40+
# var.miss(var.gt, var.pos, .20)
41+
# var.mac(var.gt, var.pos, 1)
42+
print("\nStats for Chromosome {}\n".format(c))
43+
# allele count object
44+
ac_subpops = var.gt.count_alleles_subpops(popdict, max_allele=2)
45+
df_dxy = adxy.pairDxy(c, chrlen[c], ac_subpops, var.pos, plot=True)
46+
dxydict[c] = df_dxy
47+
48+
# Diversity statistics
49+
pidict = {}
50+
tajddict = {}
51+
thetadict = {}
52+
for c in chrlen.keys():
53+
var = Chr('All', '{}.FSG.SNP.recode.h5'.format(c))
54+
var.geno(c, meta)
55+
print("\nStats for Chromosome {}\n".format(c))
56+
# var.miss(var.gt, var.pos, .20)
57+
# var.mac(var.gt, var.pos, 1)
58+
# allele count object
59+
ac_subpops = var.gt.count_alleles_subpops(popdict, max_allele=1)
60+
pi = av.pi(c, chrlen[c], ac_subpops, var.pos, plot=True)
61+
pidict[c] = pi
62+
d = av.tajd(c, chrlen[c], ac_subpops, var.pos, plot=True)
63+
tajddict[c] = d
64+
t = av.theta(c, chrlen[c], ac_subpops, var.pos, plot=True)
65+
thetadict[c] = t
66+
67+
# Diversity boxplot; sumdict() autil then boxplot in aplot
68+
69+
theta = autil.catdict(thetadict)
70+
aplot.divboxplot(theta, pop2color)
71+
pi = autil.catdict(pidict)
72+
aplot.divboxplot(pi, pop2color)
73+
tajd = autil.catdict(tajddict)
74+
aplot.divboxplot(tajd, pop2color)
75+
76+
# tajd histogram how do I get colors and transparency??
77+
for pop in popdict.keys():
78+
b, bins, patches = mpl.pyplot.hist(tajd[pop], 50, density=True)
79+
80+
# LD decay plot
81+
lddict = {}
82+
for c in chrlen.keys():
83+
var = Chr('All', '{}.FSG.SNP.recode.h5'.format(c))
84+
var.geno(c, meta)
85+
print("\nStats for Chromosome {}\n".format(c))
86+
var.miss(var.gt, var.pos, .20)
87+
# allele count object
88+
ac_subpops = var.gt.count_alleles_subpops(popdict, max_allele=1)
89+
lddict[c] = ald.ld_decay(c, chrlen[c], ac_subpops, popdict,
90+
pop2color, var)

0 commit comments

Comments
 (0)