From ee14fb1fee861ffb87f1c46bb485fae1f6cc3299 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Wed, 11 Sep 2024 16:12:23 -0500 Subject: [PATCH 1/7] fix: Update query_end after tail patching in write_merged_alignment --- src/common/wflign/src/wflign_patch.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/common/wflign/src/wflign_patch.cpp b/src/common/wflign/src/wflign_patch.cpp index e7d45ae2..f2ed66f4 100644 --- a/src/common/wflign/src/wflign_patch.cpp +++ b/src/common/wflign/src/wflign_patch.cpp @@ -1396,8 +1396,14 @@ void write_merged_alignment( query_pos = query_length; target_pos = target_length; - // Adjust target_length if we used additional sequence + // Adjust query_end and target_end if we used additional sequence + query_end = query_length; target_end += tail_aln.target_length; + + std::cerr << "After tail patching: query_end=" << query_end + << ", query_length=" << query_length + << ", target_end=" << target_end + << ", target_length=" << target_length << std::endl; } } From ddbc55414200639cce869a5c317f4a4e3638bb71 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Wed, 11 Sep 2024 16:13:47 -0500 Subject: [PATCH 2/7] fix: remove debugging statement and update query_end assignment --- src/common/wflign/src/wflign_patch.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/common/wflign/src/wflign_patch.cpp b/src/common/wflign/src/wflign_patch.cpp index f2ed66f4..fda62e70 100644 --- a/src/common/wflign/src/wflign_patch.cpp +++ b/src/common/wflign/src/wflign_patch.cpp @@ -1397,13 +1397,8 @@ void write_merged_alignment( target_pos = target_length; // Adjust query_end and target_end if we used additional sequence - query_end = query_length; + query_end += tail_aln.query_length; target_end += tail_aln.target_length; - - std::cerr << "After tail patching: query_end=" << query_end - << ", query_length=" << query_length - << ", target_end=" << target_end - << ", target_length=" << target_length << std::endl; } } From 92f13340f625a1f34c2460a590e496a57c0a8a62 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Wed, 11 Sep 2024 16:19:44 -0500 Subject: [PATCH 3/7] feat: Add tests for wfmash and wgatools --- .github/workflows/test_on_push.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/.github/workflows/test_on_push.yml b/.github/workflows/test_on_push.yml index 72645261..7d34b429 100644 --- a/.github/workflows/test_on_push.yml +++ b/.github/workflows/test_on_push.yml @@ -46,3 +46,22 @@ jobs: run: ASAN_OPTIONS=detect_leaks=1:symbolize=1 LSAN_OPTIONS=verbosity=0:log_threads=1 build/bin/wfmash data/reference.fa.gz data/reads.500bps.fa.gz -s 0.5k -N -a > reads.500bps.sam && samtools view reads.500bps.sam -bS | samtools sort > reads.500bps.bam && samtools index reads.500bps.bam && samtools view reads.500bps.bam | head - name: Test mapping+alignment with short reads (255bps) (PAF output) run: ASAN_OPTIONS=detect_leaks=1:symbolize=1 LSAN_OPTIONS=verbosity=0:log_threads=1 build/bin/wfmash data/reads.255bps.fa.gz -w 16 -s 100 -L > reads.255bps.paf && head reads.255bps.paf + - name: Install Rust and Cargo + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + - name: Install wgatools + run: cargo install --git https://github.com/wjwei-handsome/wgatools.git + - name: Run wfmash and generate PAF + run: build/bin/wfmash -t 8 -n 1 -k 19 -s 5000 -p 90 -c 30k -P 50k -T SGDref -Q S288C -Y '#' data/scerevisiae8.fa.gz data/scerevisiae8.fa.gz > test.paf + - name: Convert PAF to MAF using wgatools + run: wgatools paf2maf --target data/scerevisiae8.fa.gz --query data/scerevisiae8.fa.gz test.paf > test.maf + - name: Check if MAF file is not empty + run: | + if [ -s test.maf ]; then + echo "MAF file is not empty. Test passed." + else + echo "MAF file is empty. Test failed." + exit 1 + fi From 5aed9a034ec3df1d3656dc62f96c461c9925f8ce Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Wed, 11 Sep 2024 16:20:50 -0500 Subject: [PATCH 4/7] add aider directory to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 31955a22..57d2c40f 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,4 @@ test/ .idea/ cmake-build-debug/ result +.aider* From 1fb59935b4ade0a9f1441ebeb818eefcebae1e4c Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Wed, 11 Sep 2024 18:13:13 -0500 Subject: [PATCH 5/7] fix: Improve query and target end handling in do_progressive_wfa_patch_alignment --- src/common/wflign/src/wflign_patch.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/common/wflign/src/wflign_patch.cpp b/src/common/wflign/src/wflign_patch.cpp index fda62e70..c1435d58 100644 --- a/src/common/wflign/src/wflign_patch.cpp +++ b/src/common/wflign/src/wflign_patch.cpp @@ -1397,8 +1397,19 @@ void write_merged_alignment( target_pos = target_length; // Adjust query_end and target_end if we used additional sequence - query_end += tail_aln.query_length; - target_end += tail_aln.target_length; + uint64_t new_query_end = query_offset + query_length; + uint64_t new_target_end = target_offset + target_length + actual_extension; + + // Ensure we don't exceed the total lengths + query_end = std::min(new_query_end, query_total_length); + target_end = std::min(new_target_end, target_total_length); + + // Add safety checks + if (query_end > query_total_length || target_end > target_total_length) { + std::cerr << "Warning: Alignment extends beyond sequence bounds. Truncating." << std::endl; + query_end = std::min(query_end, query_total_length); + target_end = std::min(target_end, target_total_length); + } } } From da557a6f3b97e818457b1eee358bd7dcda5a62eb Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Wed, 11 Sep 2024 18:14:00 -0500 Subject: [PATCH 6/7] fix: Capture query_total_length in lambda function --- src/common/wflign/src/wflign_patch.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/common/wflign/src/wflign_patch.cpp b/src/common/wflign/src/wflign_patch.cpp index c1435d58..af8120c0 100644 --- a/src/common/wflign/src/wflign_patch.cpp +++ b/src/common/wflign/src/wflign_patch.cpp @@ -939,7 +939,8 @@ void write_merged_alignment( &max_dist_threshold, &wf_aligner, &multi_patch_alns, &convex_penalties, - &chain_gap, &max_patching_score, &min_inversion_length, &erode_k + &chain_gap, &max_patching_score, &min_inversion_length, &erode_k, + &query_total_length // Add this line to capture query_total_length #ifdef WFA_PNG_TSV_TIMING ,&emit_patching_tsv, &out_patching_tsv From 7e5c5a5a27bf5c6333c5f7f537bf5a0788cb7b12 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Thu, 12 Sep 2024 10:50:37 -0500 Subject: [PATCH 7/7] fix: Improve safety checks and warning messages in write_merged_alignment --- src/common/wflign/src/wflign_patch.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/common/wflign/src/wflign_patch.cpp b/src/common/wflign/src/wflign_patch.cpp index af8120c0..65d467e7 100644 --- a/src/common/wflign/src/wflign_patch.cpp +++ b/src/common/wflign/src/wflign_patch.cpp @@ -1397,20 +1397,17 @@ void write_merged_alignment( query_pos = query_length; target_pos = target_length; - // Adjust query_end and target_end if we used additional sequence + // Add safety checks first uint64_t new_query_end = query_offset + query_length; uint64_t new_target_end = target_offset + target_length + actual_extension; - // Ensure we don't exceed the total lengths + if (new_query_end > query_total_length || new_target_end > target_total_length) { + std::cerr << "[wfmash::patch] Warning: Alignment extends beyond sequence bounds. Truncating." << std::endl; + } + + // Adjust query_end and target_end, ensuring we don't exceed the total lengths query_end = std::min(new_query_end, query_total_length); target_end = std::min(new_target_end, target_total_length); - - // Add safety checks - if (query_end > query_total_length || target_end > target_total_length) { - std::cerr << "Warning: Alignment extends beyond sequence bounds. Truncating." << std::endl; - query_end = std::min(query_end, query_total_length); - target_end = std::min(target_end, target_total_length); - } } }