From f61b19daaa9d0e705254d4205a6dfc64da1cf7c8 Mon Sep 17 00:00:00 2001 From: Max Brown Date: Tue, 3 May 2022 21:01:49 +0100 Subject: [PATCH] Add more telomeric repeats to clades. --- clades/clades.txt | 156 ++++++++++++++++++++++++---------------------- src/clades.rs | 114 +++++++++++++++++---------------- 2 files changed, 143 insertions(+), 127 deletions(-) diff --git a/clades/clades.txt b/clades/clades.txt index 8116601..a8aa130 100644 --- a/clades/clades.txt +++ b/clades/clades.txt @@ -57,6 +57,7 @@ "Hymenoptera", "Hypnales", "Isochrysidales", + "Isopoda", "Lamiales", "Lepidoptera", "Liliales", @@ -172,12 +173,12 @@ "Araneae" => TelomereSeq { clade: "Araneae", - seq: Seq(Box::new(&["AATAT", - "ACTAT", + seq: Seq(Box::new(&["AACTTGT", "AACAT", - "AAAGC", + "AATAT", "ACATG", - "AACTTGT"])), + "AAAGC", + "ACTAT"])), length: 6, }, @@ -191,8 +192,8 @@ "Asparagales" => TelomereSeq { clade: "Asparagales", - seq: Seq(Box::new(&["AACCGAGCCCAT", - "AACCCT"])), + seq: Seq(Box::new(&["AACCCT", + "AACCGAGCCCAT"])), length: 2, }, @@ -297,9 +298,9 @@ "Cheilostomatida" => TelomereSeq { clade: "Cheilostomatida", - seq: Seq(Box::new(&["AAACCCC", + seq: Seq(Box::new(&["AAGTCT", "ACAGT", - "AAGTCT"])), + "AAACCCC"])), length: 3, }, @@ -320,34 +321,34 @@ "Chlamydomonadales" => TelomereSeq { clade: "Chlamydomonadales", - seq: Seq(Box::new(&["AAGGATGGAC", - "AACCCT"])), + seq: Seq(Box::new(&["AACCCT", + "AAGGATGGAC"])), length: 2, }, "Coleoptera" => TelomereSeq { clade: "Coleoptera", - seq: Seq(Box::new(&["AGATATAT", - "AAAAATTC", - "AAGTC", - "AAGTCG", - "AAAAATAC", - "AAAGGTCACC", - "AAAGGAC", + seq: Seq(Box::new(&["AAAAATTC", + "ACCTG", + "AACCT", "AACAGACCCG", - "AAATAACT", "ACAGACTG", + "AAATAACT", "AACAT", - "AACTCC", - "ACTATG", - "ACCTG", "AAATAATT", - "AACCT", - "ACTCTG", - "AAGACAGAC", + "AGATATAT", + "AAGTC", + "AAAAATAC", "AAGTAATC", - "AACCC"])), + "AACTCC", + "AAGACAGAC", + "ACTCTG", + "AAGTCG", + "AACCC", + "AAAGGAC", + "ACTATG", + "AAAGGTCACC"])), length: 20, }, @@ -361,9 +362,9 @@ "Crassiclitellata" => TelomereSeq { clade: "Crassiclitellata", - seq: Seq(Box::new(&["AAGGAC", - "AACTC", - "AACCCT"])), + seq: Seq(Box::new(&["AACCCT", + "AAGGAC", + "AACTC"])), length: 3, }, @@ -406,16 +407,16 @@ "Ericales" => TelomereSeq { clade: "Ericales", seq: Seq(Box::new(&["AAGCATT", - "AAACCCT", - "AAGCATC"])), + "AAGCATC", + "AAACCCT"])), length: 3, }, "Eucoccidiorida" => TelomereSeq { clade: "Eucoccidiorida", - seq: Seq(Box::new(&["AAACCCT", - "AAGGAGGAGACAAT"])), + seq: Seq(Box::new(&["AAGGAGGAGACAAT", + "AAACCCT"])), length: 2, }, @@ -458,8 +459,8 @@ "Fucales" => TelomereSeq { clade: "Fucales", - seq: Seq(Box::new(&["ACACT", - "AACCCT"])), + seq: Seq(Box::new(&["AACCCT", + "ACACT"])), length: 2, }, @@ -502,19 +503,19 @@ "Hemiptera" => TelomereSeq { clade: "Hemiptera", - seq: Seq(Box::new(&["ACAGAGAGGC", - "ACCGAG", - "AACCT", + seq: Seq(Box::new(&["AAACCACCCT", "AATATAG", - "AATAC", "AACACTCCCT", - "AAACCTATCC", - "AACCTACCT", - "AAAATTGTTGATGGAGATCATAC", + "ACAGAGAGGC", + "AATAC", "AACCATCCCT", - "AAACCACCCT", + "AAGAATATAGAAT", + "AACCT", + "AAAATTGTTGATGGAGATCATAC", + "ACCGAG", + "AACCTACCT", "AAGAAT", - "AAGAATATAGAAT"])), + "AAACCTATCC"])), length: 13, }, @@ -535,38 +536,39 @@ "Hymenoptera" => TelomereSeq { clade: "Hymenoptera", - seq: Seq(Box::new(&["ACGATG", - "AATAT", - "AACGAGTCG", - "AAACGAGTC", - "AGAGAT", - "AAAACG", - "AACCCGAACCT", - "AACCCAGACCT", + seq: Seq(Box::new(&["AAACGAGTC", + "ACCTG", + "AACCT", "AACGTAT", - "AAACAC", - "AGGGATATC", + "AACGAGTCG", "ACAGAG", - "AAAAAC", "AACGAC", - "ACCTG", "AACCCAGACCC", - "AACCCAGACGC", - "AACCCTGACGC", + "AAAAAC", + "AGAGAT", "ACTCT", - "AACCT", + "ACGATG", "AACCCCAACCT", "ACATCGT", + "AACCCTGACGC", "AACCCT", - "AACCC"])), - length: 24, + "AAAGGC", + "AACCCAGACGC", + "AACCCAGACCT", + "AATAT", + "AACCCGAACCT", + "AAACAC", + "AACCC", + "AAAACG", + "AGGGATATC"])), + length: 25, }, "Hypnales" => TelomereSeq { clade: "Hypnales", - seq: Seq(Box::new(&["AAACCCT", - "AACAG"])), + seq: Seq(Box::new(&["AACAG", + "AAACCCT"])), length: 2, }, @@ -578,6 +580,13 @@ }, + "Isopoda" => TelomereSeq { + clade: "Isopoda", + seq: Seq(Box::new(&["AGGATG"])), + length: 1, + }, + + "Lamiales" => TelomereSeq { clade: "Lamiales", seq: Seq(Box::new(&["AAACCCT", @@ -588,13 +597,13 @@ "Lepidoptera" => TelomereSeq { clade: "Lepidoptera", - seq: Seq(Box::new(&["ACTCTG", + seq: Seq(Box::new(&["ACATC", + "AACGTGAT", + "ACTCTG", + "AACCATCCCT", "AACCT", "AACTCG", - "ACATC", - "AACCATCCCT", - "AAGACGGTAAGTGTGTATGTATGT", - "AACGTGAT"])), + "AAGACGGTAAGTGTGTATGTATGT"])), length: 7, }, @@ -673,16 +682,15 @@ "Nudibranchia" => TelomereSeq { clade: "Nudibranchia", - seq: Seq(Box::new(&["ACAGTAGC", - "AAACAC"])), - length: 2, + seq: Seq(Box::new(&["AAACAC"])), + length: 1, }, "Odonata" => TelomereSeq { clade: "Odonata", - seq: Seq(Box::new(&["AGATC", - "AGCCATCGCCAT", + seq: Seq(Box::new(&["AGCCATCGCCAT", + "AGATC", "AACCC"])), length: 3, }, @@ -796,9 +804,9 @@ "Ranunculales" => TelomereSeq { clade: "Ranunculales", seq: Seq(Box::new(&["AAACCCT", - "AACCCTG", "AACCCCG", - "AAACCG"])), + "AAACCG", + "AACCCTG"])), length: 4, }, diff --git a/src/clades.rs b/src/clades.rs index 64c4eee..4bcd05f 100644 --- a/src/clades.rs +++ b/src/clades.rs @@ -88,6 +88,7 @@ pub const CLADES: &[&str] = &[ "Hymenoptera", "Hypnales", "Isochrysidales", + "Isopoda", "Lamiales", "Lepidoptera", "Liliales", @@ -193,7 +194,7 @@ pub fn return_telomere_sequence(clade: &str) -> TelomereSeq { "Araneae" => TelomereSeq { clade: "Araneae", seq: Seq(Box::new(&[ - "AATAT", "ACTAT", "AACAT", "AAAGC", "ACATG", "AACTTGT", + "AACTTGT", "AACAT", "AATAT", "ACATG", "AAAGC", "ACTAT", ])), length: 6, }, @@ -206,7 +207,7 @@ pub fn return_telomere_sequence(clade: &str) -> TelomereSeq { "Asparagales" => TelomereSeq { clade: "Asparagales", - seq: Seq(Box::new(&["AACCGAGCCCAT", "AACCCT"])), + seq: Seq(Box::new(&["AACCCT", "AACCGAGCCCAT"])), length: 2, }, @@ -296,7 +297,7 @@ pub fn return_telomere_sequence(clade: &str) -> TelomereSeq { "Cheilostomatida" => TelomereSeq { clade: "Cheilostomatida", - seq: Seq(Box::new(&["AAACCCC", "ACAGT", "AAGTCT"])), + seq: Seq(Box::new(&["AAGTCT", "ACAGT", "AAACCCC"])), length: 3, }, @@ -314,33 +315,33 @@ pub fn return_telomere_sequence(clade: &str) -> TelomereSeq { "Chlamydomonadales" => TelomereSeq { clade: "Chlamydomonadales", - seq: Seq(Box::new(&["AAGGATGGAC", "AACCCT"])), + seq: Seq(Box::new(&["AACCCT", "AAGGATGGAC"])), length: 2, }, "Coleoptera" => TelomereSeq { clade: "Coleoptera", seq: Seq(Box::new(&[ - "AGATATAT", "AAAAATTC", - "AAGTC", - "AAGTCG", - "AAAAATAC", - "AAAGGTCACC", - "AAAGGAC", + "ACCTG", + "AACCT", "AACAGACCCG", - "AAATAACT", "ACAGACTG", + "AAATAACT", "AACAT", - "AACTCC", - "ACTATG", - "ACCTG", "AAATAATT", - "AACCT", - "ACTCTG", - "AAGACAGAC", + "AGATATAT", + "AAGTC", + "AAAAATAC", "AAGTAATC", + "AACTCC", + "AAGACAGAC", + "ACTCTG", + "AAGTCG", "AACCC", + "AAAGGAC", + "ACTATG", + "AAAGGTCACC", ])), length: 20, }, @@ -353,7 +354,7 @@ pub fn return_telomere_sequence(clade: &str) -> TelomereSeq { "Crassiclitellata" => TelomereSeq { clade: "Crassiclitellata", - seq: Seq(Box::new(&["AAGGAC", "AACTC", "AACCCT"])), + seq: Seq(Box::new(&["AACCCT", "AAGGAC", "AACTC"])), length: 3, }, @@ -389,13 +390,13 @@ pub fn return_telomere_sequence(clade: &str) -> TelomereSeq { "Ericales" => TelomereSeq { clade: "Ericales", - seq: Seq(Box::new(&["AAGCATT", "AAACCCT", "AAGCATC"])), + seq: Seq(Box::new(&["AAGCATT", "AAGCATC", "AAACCCT"])), length: 3, }, "Eucoccidiorida" => TelomereSeq { clade: "Eucoccidiorida", - seq: Seq(Box::new(&["AAACCCT", "AAGGAGGAGACAAT"])), + seq: Seq(Box::new(&["AAGGAGGAGACAAT", "AAACCCT"])), length: 2, }, @@ -431,7 +432,7 @@ pub fn return_telomere_sequence(clade: &str) -> TelomereSeq { "Fucales" => TelomereSeq { clade: "Fucales", - seq: Seq(Box::new(&["ACACT", "AACCCT"])), + seq: Seq(Box::new(&["AACCCT", "ACACT"])), length: 2, }, @@ -468,19 +469,19 @@ pub fn return_telomere_sequence(clade: &str) -> TelomereSeq { "Hemiptera" => TelomereSeq { clade: "Hemiptera", seq: Seq(Box::new(&[ - "ACAGAGAGGC", - "ACCGAG", - "AACCT", + "AAACCACCCT", "AATATAG", - "AATAC", "AACACTCCCT", - "AAACCTATCC", - "AACCTACCT", - "AAAATTGTTGATGGAGATCATAC", + "ACAGAGAGGC", + "AATAC", "AACCATCCCT", - "AAACCACCCT", - "AAGAAT", "AAGAATATAGAAT", + "AACCT", + "AAAATTGTTGATGGAGATCATAC", + "ACCGAG", + "AACCTACCT", + "AAGAAT", + "AAACCTATCC", ])), length: 13, }, @@ -500,37 +501,38 @@ pub fn return_telomere_sequence(clade: &str) -> TelomereSeq { "Hymenoptera" => TelomereSeq { clade: "Hymenoptera", seq: Seq(Box::new(&[ - "ACGATG", - "AATAT", - "AACGAGTCG", "AAACGAGTC", - "AGAGAT", - "AAAACG", - "AACCCGAACCT", - "AACCCAGACCT", + "ACCTG", + "AACCT", "AACGTAT", - "AAACAC", - "AGGGATATC", + "AACGAGTCG", "ACAGAG", - "AAAAAC", "AACGAC", - "ACCTG", "AACCCAGACCC", - "AACCCAGACGC", - "AACCCTGACGC", + "AAAAAC", + "AGAGAT", "ACTCT", - "AACCT", + "ACGATG", "AACCCCAACCT", "ACATCGT", + "AACCCTGACGC", "AACCCT", + "AAAGGC", + "AACCCAGACGC", + "AACCCAGACCT", + "AATAT", + "AACCCGAACCT", + "AAACAC", "AACCC", + "AAAACG", + "AGGGATATC", ])), - length: 24, + length: 25, }, "Hypnales" => TelomereSeq { clade: "Hypnales", - seq: Seq(Box::new(&["AAACCCT", "AACAG"])), + seq: Seq(Box::new(&["AACAG", "AAACCCT"])), length: 2, }, @@ -540,6 +542,12 @@ pub fn return_telomere_sequence(clade: &str) -> TelomereSeq { length: 1, }, + "Isopoda" => TelomereSeq { + clade: "Isopoda", + seq: Seq(Box::new(&["AGGATG"])), + length: 1, + }, + "Lamiales" => TelomereSeq { clade: "Lamiales", seq: Seq(Box::new(&["AAACCCT", "AACCCTAAT"])), @@ -549,13 +557,13 @@ pub fn return_telomere_sequence(clade: &str) -> TelomereSeq { "Lepidoptera" => TelomereSeq { clade: "Lepidoptera", seq: Seq(Box::new(&[ + "ACATC", + "AACGTGAT", "ACTCTG", + "AACCATCCCT", "AACCT", "AACTCG", - "ACATC", - "AACCATCCCT", "AAGACGGTAAGTGTGTATGTATGT", - "AACGTGAT", ])), length: 7, }, @@ -622,13 +630,13 @@ pub fn return_telomere_sequence(clade: &str) -> TelomereSeq { "Nudibranchia" => TelomereSeq { clade: "Nudibranchia", - seq: Seq(Box::new(&["ACAGTAGC", "AAACAC"])), - length: 2, + seq: Seq(Box::new(&["AAACAC"])), + length: 1, }, "Odonata" => TelomereSeq { clade: "Odonata", - seq: Seq(Box::new(&["AGATC", "AGCCATCGCCAT", "AACCC"])), + seq: Seq(Box::new(&["AGCCATCGCCAT", "AGATC", "AACCC"])), length: 3, }, @@ -724,7 +732,7 @@ pub fn return_telomere_sequence(clade: &str) -> TelomereSeq { "Ranunculales" => TelomereSeq { clade: "Ranunculales", - seq: Seq(Box::new(&["AAACCCT", "AACCCTG", "AACCCCG", "AAACCG"])), + seq: Seq(Box::new(&["AAACCCT", "AACCCCG", "AAACCG", "AACCCTG"])), length: 4, },