-
Notifications
You must be signed in to change notification settings - Fork 10
/
precheck.py
149 lines (131 loc) · 5.53 KB
/
precheck.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
DFOIL: Directional introgression testing a five-taxon phylogeny
pre-dfoil - site count checking testt
James B. Pease
http://www.github.com/jbpease/dfoil
"""
from __future__ import print_function, unicode_literals
_LICENSE = """
If you use this software please cite:
Pease JB and MW Hahn. 2015.
"Detection and Polarization of Introgression in a Five-taxon Phylogeny"
Systematic Biology. 64 (4): 651-662.
http://www.dx.doi.org/10.1093/sysbio/syv023
This file is part of DFOIL.
DFOIL is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
DFOIL is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with DFOIL. If not, see <http://www.gnu.org/licenses/>.
"""
SITECODES = dict([(x, "{}{}".format('A' * (7 - len(bin(x))),
str(bin(x))[2:].replace('0', 'A').replace('1', 'B')))
for x in range(0, 32, 2)])
def pre_check(window_data, mode='dfoil', verbose=True):
sum_data = dict([(x, 0) for x in range(0, 32, 2)])
for window in window_data:
for code in window.counts:
sum_data[code] = sum_data.get(
code, 0) + window.counts[code]
# Check 1
check_concordant(sum_data, mode, verbose=verbose)
# Check2
divergence_order(sum_data, mode, verbose=verbose)
# Check3
check_terminal(sum_data, mode, verbose=verbose)
return ''
def check_concordant(sum_data, mode, verbose=True):
checkok = True
if verbose is True:
print("="*79)
print("""Checking that concordant patterns are more common that discordant
(Note that this is normal when introgression is extreme, but could also
indicate the taxa are out of order):""")
print("-"*79)
if mode == 'dfoil':
concordant_patterns = (2, 4, 8, 16, 6, 24)
discordant_patterns = (10, 12, 14, 18, 20, 22, 26, 28)
elif mode == 'dstat':
concordant_patterns = (2, 4, 8, 12)
discordant_patterns = (6, 10)
for concode in concordant_patterns:
for discode in discordant_patterns:
if sum_data.get(concode, 0) < sum_data.get(discode, 0):
print(("WARNING: Total count of "
"discordant pattern {}={} is higher than "
"concordant pattern {}={}").format(
SITECODES.get(discode, 0), sum_data.get(discode, 0),
SITECODES.get(concode, 0), sum_data.get(concode, 0)))
checkok = False
if checkok and verbose is True:
print("Pass")
print("="*79)
return ''
def divergence_order(sum_data, mode, verbose=True):
if mode != 'dfoil':
return ''
checkok = True
if verbose is True:
print("="*79)
print("Checking that divergences are correctly ordered "
"(P1 and P2 should diverge AFTER P3 and P4)")
print("-"*79)
for abcode in (8, 16):
for cdcode in (2, 4):
if sum_data.get(abcode, 0) > sum_data.get(cdcode, 0):
print(("WARNING: Total count of P1/P2 terminal substitutions "
"{}={} is higher than P3/P4 terminal substitutions "
"{}={}"
).format(SITECODES.get(abcode, 0),
sum_data.get(abcode, 0),
SITECODES.get(cdcode, 0),
sum_data.get(cdcode, 0)))
checkok = False
if checkok:
print("Pass")
print("="*79)
return ''
def check_terminal(sum_data, mode, verbose=True):
checkok = True
if verbose is True:
print("="*79)
print("Checking that terminal branch pairs are "
"proportionate approximately")
print("-"*79)
if mode == 'dfoil':
abratio = (float(sum_data[16]) / sum_data[8]
if sum_data[8] > 0 else "inf")
print("BAAAA/ABAAA ratio = {} ({}/{})".format(abratio, sum_data[16],
sum_data[8]))
if abratio == "inf" or (0.8 < abratio > 1.25):
checkok = False
print("WARNING: P1/P2 ratio deviates more than 25% from 1.0")
cdratio = (float(sum_data[4]) / sum_data[2]
if sum_data[2] > 0 else "inf")
print("AABAA/AAABA ratio = {} ({}/{})".format(cdratio, sum_data[4],
sum_data[2]))
if cdratio == "inf" or (0.8 < cdratio > 1.25):
checkok = False
print("WARNING: P3/P4 ratio deviates more than 25% from 1.0")
elif mode == 'dstat':
cdratio = (float(sum_data[8]) / sum_data[4]
if sum_data[4] > 0 else "inf")
print("BAAA/ABAA ratio = {} ({}/{})".format(cdratio, sum_data[8],
sum_data[4]))
if cdratio == "inf" or (0.8 < cdratio > 1.25):
checkok = False
print("WARNING: P1/P2 ratio deviates more than 25% from 1.0")
if checkok:
print("Pass")
print("="*79)
if __name__ == "__main__":
print("pre-dfoil can no longer be run on its own.")
print("Please use dfoil.py with the --pre-check-only "
"flag to just run a pre-check")