diff --git a/src/tola/assembly/build_assembly.py b/src/tola/assembly/build_assembly.py index 0b62c79..4ecaed9 100644 --- a/src/tola/assembly/build_assembly.py +++ b/src/tola/assembly/build_assembly.py @@ -279,25 +279,24 @@ def assemblies_with_scaffolds_fused(self) -> list[Assembly]: def scaffolds_fused_by_name(self) -> Iterator[Scaffold]: gap = self.default_gap - new_scffld = None - current_hap_chr = None, None + hap_name_scaffold = {} for scffld in self.scaffolds: if not scffld.rows: # discard_overhanging_fragments() may have removed the only # row from an OverlapResult continue - hap_chr = scffld.haplotype, scffld.name - if hap_chr != current_hap_chr: - if new_scffld: - yield new_scffld - current_hap_chr = hap_chr - new_scffld = Scaffold( - scffld.name, tag=scffld.tag, haplotype=scffld.haplotype - ) + build_scffld = hap_name_scaffold.setdefault( + (scffld.haplotype, scffld.name), + Scaffold( + scffld.name, + tag=scffld.tag, + haplotype=scffld.haplotype, + ), + ) if isinstance(scffld, OverlapResult): - new_scffld.append_scaffold(scffld.to_scaffold(), gap) + build_scffld.append_scaffold(scffld.to_scaffold(), gap) else: - new_scffld.append_scaffold(scffld) + build_scffld.append_scaffold(scffld) - if new_scffld: - yield new_scffld + for scffld in hap_name_scaffold.values(): + yield scffld diff --git a/tests/data/ilIthSala1_1/ilIthSala1_1-pretext-to-tpf.log b/tests/data/ilIthSala1_1/ilIthSala1_1-pretext-to-tpf.log index 99d950e..fcc9024 100644 --- a/tests/data/ilIthSala1_1/ilIthSala1_1-pretext-to-tpf.log +++ b/tests/data/ilIthSala1_1/ilIthSala1_1-pretext-to-tpf.log @@ -201,7 +201,7 @@ ilIthSala1_1-pretext-to-tpf_HAP1 651,870 HAP1_SCAFFOLD_54 ... ... 16,408 HAP1_SCAFFOLD_864 - 92,470,266 bp total + 92,524,843 bp total ilIthSala1_1-pretext-to-tpf_HAP2 389,828,879 bp sequence (minus gaps) @@ -223,4 +223,4 @@ ilIthSala1_1-pretext-to-tpf_Haplotigs n = 1 309,695 H_1 -Curation made 37 cuts in contigs, 87 breaks at gaps and 185 joins +Curation made 37 cuts in contigs, 87 breaks at gaps and 186 joins diff --git a/tests/data/ilIthSala1_1/ilIthSala1_1-pretext-to-tpf_HAP1.tpf b/tests/data/ilIthSala1_1/ilIthSala1_1-pretext-to-tpf_HAP1.tpf index 382c23b..8175420 100644 --- a/tests/data/ilIthSala1_1/ilIthSala1_1-pretext-to-tpf_HAP1.tpf +++ b/tests/data/ilIthSala1_1/ilIthSala1_1-pretext-to-tpf_HAP1.tpf @@ -305,6 +305,7 @@ GAP TYPE-2 200 GAP TYPE-2 200 ? HAP1_SCAFFOLD_1:18881119-21883956 Z PLUS ? HAP1_SCAFFOLD_48:409325-463901 HAP1_SCAFFOLD_48 MINUS +GAP TYPE-2 200 ? HAP1_SCAFFOLD_48:1-136441 HAP1_SCAFFOLD_48 PLUS ? HAP1_SCAFFOLD_54:1-651870 HAP1_SCAFFOLD_54 PLUS ? HAP1_SCAFFOLD_55:1-597161 HAP1_SCAFFOLD_55 PLUS diff --git a/tests/data/ilIthSala1_1/ilIthSala1_1-pretext-to-tpf_info.yaml b/tests/data/ilIthSala1_1/ilIthSala1_1-pretext-to-tpf_info.yaml index 54ac949..0090eba 100644 --- a/tests/data/ilIthSala1_1/ilIthSala1_1-pretext-to-tpf_info.yaml +++ b/tests/data/ilIthSala1_1/ilIthSala1_1-pretext-to-tpf_info.yaml @@ -1,10 +1,10 @@ assemblies: HAP1: manual_breaks: 32 - manual_joins: 97 + manual_joins: 98 HAP2: manual_breaks: 55 manual_joins: 88 manual_breaks: 87 -manual_joins: 185 +manual_joins: 186 manual_haplotig_removals: 1 diff --git a/tests/data/ngHelPoly1_1/chrs.csv b/tests/data/ngHelPoly1_1/chrs.csv index 257c02e..6679872 100644 --- a/tests/data/ngHelPoly1_1/chrs.csv +++ b/tests/data/ngHelPoly1_1/chrs.csv @@ -1,6 +1,6 @@ RL_1,RL_1_unloc_1,RL_1_unloc_2,RL_1_unloc_3,RL_1_unloc_4,RL_1_unloc_5,RL_1_unloc_6,RL_1_unloc_7,RL_1_unloc_8,RL_1_unloc_9,RL_1_unloc_10,RL_1_unloc_11,RL_1_unloc_12,RL_1_unloc_13 -RL_2,RL_2,RL_2,RL_2,RL_2_unloc_1,RL_2_unloc_2,RL_2_unloc_3,RL_2_unloc_4,RL_2_unloc_5,RL_2_unloc_6,RL_2_unloc_7,RL_2_unloc_8,RL_2_unloc_9,RL_2_unloc_10,RL_2_unloc_11,RL_2_unloc_12 +RL_2,RL_2_unloc_1,RL_2_unloc_2,RL_2_unloc_3,RL_2_unloc_4,RL_2_unloc_5,RL_2_unloc_6,RL_2_unloc_7,RL_2_unloc_8,RL_2_unloc_9,RL_2_unloc_10,RL_2_unloc_11,RL_2_unloc_12 RL_3,RL_3_unloc_1,RL_3_unloc_2,RL_3_unloc_3,RL_3_unloc_4,RL_3_unloc_5,RL_3_unloc_6,RL_3_unloc_7,RL_3_unloc_8,RL_3_unloc_9,RL_3_unloc_10 RL_4,RL_4_unloc_1,RL_4_unloc_2,RL_4_unloc_3,RL_4_unloc_4,RL_4_unloc_5,RL_4_unloc_6,RL_4_unloc_7 -RL_5,RL_5,RL_5_unloc_1,RL_5_unloc_2,RL_5_unloc_3,RL_5_unloc_4,RL_5_unloc_5,RL_5_unloc_6,RL_5_unloc_7,RL_5_unloc_8,RL_5_unloc_9,RL_5_unloc_10,RL_5_unloc_11 +RL_5,RL_5_unloc_1,RL_5_unloc_2,RL_5_unloc_3,RL_5_unloc_4,RL_5_unloc_5,RL_5_unloc_6,RL_5_unloc_7,RL_5_unloc_8,RL_5_unloc_9,RL_5_unloc_10,RL_5_unloc_11 X,X_unloc_1,X_unloc_2,X_unloc_3,X_unloc_4,X_unloc_5,X_unloc_6,X_unloc_7,X_unloc_8,X_unloc_9 diff --git a/tests/data/ngHelPoly1_1/ngHelPoly1_1-pretext-to-tpf.log b/tests/data/ngHelPoly1_1/ngHelPoly1_1-pretext-to-tpf.log index 2329273..2107791 100644 --- a/tests/data/ngHelPoly1_1/ngHelPoly1_1-pretext-to-tpf.log +++ b/tests/data/ngHelPoly1_1/ngHelPoly1_1-pretext-to-tpf.log @@ -81,4 +81,4 @@ ngHelPoly1_1-pretext-to-tpf_Haplotigs 30,757 H_24 2,164,462 bp total -Curation made 9 cuts in contigs, 210 breaks at gaps and 468 joins +Curation made 9 cuts in contigs, 210 breaks at gaps and 472 joins diff --git a/tests/data/ngHelPoly1_1/ngHelPoly1_1-pretext-to-tpf.tpf b/tests/data/ngHelPoly1_1/ngHelPoly1_1-pretext-to-tpf.tpf index cba227a..64d5415 100644 --- a/tests/data/ngHelPoly1_1/ngHelPoly1_1-pretext-to-tpf.tpf +++ b/tests/data/ngHelPoly1_1/ngHelPoly1_1-pretext-to-tpf.tpf @@ -1413,6 +1413,7 @@ GAP TYPE-2 200 ? scaffold_2:22958123-23757084 RL_2 PLUS GAP TYPE-2 200 ? scaffold_2:23757285-23952476 RL_2 PLUS +GAP TYPE-2 200 ? scaffold_2:24023605-24245085 RL_2 PLUS GAP TYPE-2 200 ? scaffold_2:24245286-24337062 RL_2 PLUS @@ -2286,6 +2287,7 @@ GAP TYPE-2 200 ? scaffold_2:84005701-84076804 RL_2 PLUS GAP TYPE-2 200 ? scaffold_2:84077005-84270004 RL_2 PLUS +GAP TYPE-2 200 ? scaffold_2:84324151-84517061 RL_2 PLUS GAP TYPE-2 200 ? scaffold_2:84517262-84628943 RL_2 PLUS @@ -2363,6 +2365,7 @@ GAP TYPE-2 200 ? scaffold_2:93382642-93420642 RL_2 PLUS GAP TYPE-2 200 ? scaffold_2:93420843-93689450 RL_2 PLUS +GAP TYPE-2 200 ? scaffold_2:93772333-93813810 RL_2 PLUS GAP TYPE-2 200 ? scaffold_2:93814011-94409008 RL_2 PLUS @@ -5070,6 +5073,7 @@ GAP TYPE-2 200 ? scaffold_378:1-30343 RL_5 PLUS GAP TYPE-2 200 ? scaffold_5:42746124-43081660 RL_5 PLUS +GAP TYPE-2 200 ? scaffold_5:43175773-43394772 RL_5 PLUS GAP TYPE-2 200 ? scaffold_471:1-24981 RL_5 PLUS diff --git a/tests/data/ngHelPoly1_1/ngHelPoly1_1-pretext-to-tpf_info.yaml b/tests/data/ngHelPoly1_1/ngHelPoly1_1-pretext-to-tpf_info.yaml index 6d11672..271d993 100644 --- a/tests/data/ngHelPoly1_1/ngHelPoly1_1-pretext-to-tpf_info.yaml +++ b/tests/data/ngHelPoly1_1/ngHelPoly1_1-pretext-to-tpf_info.yaml @@ -1,5 +1,5 @@ assemblies: Primary: manual_breaks: 210 - manual_joins: 468 + manual_joins: 472 manual_haplotig_removals: 24