Skip to content

Commit

Permalink
Update detect.non-causal.v2b.py
Browse files Browse the repository at this point in the history
  • Loading branch information
kofiamoah authored Nov 3, 2020
1 parent 0e5dafd commit b9a96fa
Showing 1 changed file with 0 additions and 32 deletions.
32 changes: 0 additions & 32 deletions src/detect.non-causal.v2b.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ def main(argv):
#store the snv to be tested
testing = defaultdict(set) #(chrm.pos.strd,exon): set of lines
thresh = {} #(candid,exon,tag): si thresh
# means = defaultdict(list) #(chrm.pos.strd,exon): list of peakSiMean
# stdev = defaultdict(list) #(chrm.pos.strd,exon): list of peakSiStdev
if opts.r:
with open(opts.r) as f:
for ll in f:
Expand All @@ -52,21 +50,17 @@ def main(argv):
try:
thresh[(l[0],l[3],l[4])] = (1+float(l[13]))/2
except ValueError: pass
# means[(l[0],l[3])].append(float(l[13]))
# stdev[(l[0],l[3])].append(float(l[14]))

#data[case]: [het-good,het-bad,hom-good,hom-bad]; good: si->1, bad: si->0
zero = lambda:{'hetgood':0, 'hetbad':0, 'homgood':0, 'hombad':0}
data = defaultdict(zero)
# hetgood,hetbad,homgood,hombad = 0, 0, 0, 0
for ff in glob.glob('{}/*2/{}*'.format(opts.i,opts.s)):
with open(ff) as f:
for l in f:
if l.startswith('causalCandidate'): continue
l = l.strip().split('\t')
if (l[0],l[2],l[4]) not in thresh: continue
gt,source,nt = l[1].split('|')
# data[(l[0],l[3])][gt].append(float(l[-1]))
si = float(l[-1])
if si > thresh[(l[0],l[2],l[4])]:
if gt in ('0/1','1/0'): #het
Expand All @@ -88,37 +82,11 @@ def main(argv):
hetbad = data[case]['hetbad']
hombad = data[case]['hombad']
oddsratio, pvalue = stats.fisher_exact([[hetgood, hetbad], [homgood, hombad]])
#if pvalue <= 0.05: continue
#exicute below if no bias based on gt => GOOD!
for x in testing[case]:
#out.write(x)
out.write('{}\t{}\n'.format(x,pvalue))

#generate 2x2
#res[case]: [het-good,het-bad,hom-good,hom-bad]; good: si->1, bad: si->0
# for case in data.iterkeys():
# mu = np.mean(means[case])
# sigma = np.mean(stdev[case])
# norm = np.random.normal(mu, sigma, 1000)
# het = data[case]['0/1'] + data[case]['1/0']
# hom = data[case]['0/0'] + data[case]['1/1']
# ht_t, ht_p = stats.ttest_ind(het, norm, equal_var = False)
# hm_t, hm_p = stats.ttest_ind(hom, norm, equal_var = False)
# if ht_p < 0.05 and hm_p < 0.05:
# print 'uh-oh1',case
# print 'mu:', mu
# print 'het:',het,ht_p
# print 'hom:',hom,hm_p
# sys.exit()
# elif ht_p < 0.05: #ht is deviate from mu Si => bad group
#
# elif hm_p < 0.05: #hm is deviate from mu Si => bad group
# else: #both are insignificant...
# print 'uh-oh2',case
# print 'mu:', mu
# print 'het:',het,ht_p
# print 'hom:',hom,hm_p

print("--- %s seconds ---" % (time.time() - start_time))
print 'DONE!', strftime('%a, %d %b %Y %I:%M:%S')

Expand Down

0 comments on commit b9a96fa

Please sign in to comment.