From 106d42a4234b73125e8cc89b8d5c3b041ae079f4 Mon Sep 17 00:00:00 2001 From: blackrim Date: Wed, 28 Feb 2024 14:02:03 -0500 Subject: [PATCH] more --- src/join_paftol_tax.py | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/src/join_paftol_tax.py b/src/join_paftol_tax.py index 661c0ac..f4d9c15 100644 --- a/src/join_paftol_tax.py +++ b/src/join_paftol_tax.py @@ -3,17 +3,49 @@ import tree_reader import tree_utils +""" +TODO +- need to do the mrca using names that includes internal nodes +- need to go back to the node that is the MRCA but includes the clade of unsampled +so if it is +(a,b,c),(d,e,f) and i have (((e,f),d),a) -> ((a,b,c),(d,(e,f))) + +""" + def process_tax(t): return +def intersect_taxa(n,t): + return len(set(n).intersection(t)) + def get_mrca_wnms(n,t): if len(n) == 1: - for i in t.leaves(): + for i in t.iternodes(): if i.label == n[0]: return i else: return tree_utils.get_mrca_wnms(n,t) +# n = names that we wanted to get mrca +# nd = the mrca +# paflvsnms = the paf tree lvs nms +def walk_back_mrca(nd,paflvsnms): + rnd = nd + intn = intersect_taxa(rnd.lvsnms(),paflvsnms) + going = True + while going: + nintn = intersect_taxa(rnd.parent.lvsnms(),paflvsnms) + if nintn != intn: + break + else: + if rnd.parent == None: + break + rnd = rnd.parent + return rnd + + + + if __name__ == "__main__": if len(sys.argv) != 3: print("python",sys.argv[0],"paf tax") @@ -25,7 +57,7 @@ def get_mrca_wnms(n,t): for i in paf.leaves(): i.data["original_name"] = i.label i.label = i.label.split("_")[-1] - for i in tax.leaves(): + for i in tax.iternodes(): i.data["original_name"] = i.label i.label = i.label.split("_")[0] count= 0 @@ -40,6 +72,7 @@ def get_mrca_wnms(n,t): k = get_mrca_wnms(j.lvsnms(),tax) if k == None: continue + k = walk_back_mrca(k,paf.lvsnms()) chds.append(k) if len(chds) == 1: continue @@ -50,6 +83,6 @@ def get_mrca_wnms(n,t): n.add_child(j) p.add_child(n) count += 1 - if count == 21: - break + #if count == 100: + # break print(tax.get_newick_repr(False))