From ed23b6b0c56f4fb2184fff1fd528f86a45f4962d Mon Sep 17 00:00:00 2001 From: Eric Nawrocki Date: Thu, 15 Aug 2024 17:56:52 +0100 Subject: [PATCH] Removes SS_cons gaps from cmemit -c | cmalign alignment This is rare but occurs for some families if cmalign inserts a consensus nt followed by a nearby deletion. Examples for Rfam 15.0 are RF00023 and RF03057. --- Rfam/Lib/Bio/Rfam/QC.pm | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Rfam/Lib/Bio/Rfam/QC.pm b/Rfam/Lib/Bio/Rfam/QC.pm index 63c3ef0c..b6324c2a 100644 --- a/Rfam/Lib/Bio/Rfam/QC.pm +++ b/Rfam/Lib/Bio/Rfam/QC.pm @@ -261,6 +261,10 @@ sub checkCMFormat { chomp $line; if($line =~ /^#=GC\s+SS_cons\s+(\S+)/) { $cm_ss_cons = $1; + # remove any gaps, this is rare but gaps can occur if cmalign thinks a consensus position + # should be inserted followed by a nearby deletion (e.g. RF00023 and RF03057 for Rfam + # 15.0 with infernal 1.1.5) + $cm_ss_cons =~ s/\.//g; } else { warn "Failed to read SS_cons from alignment created with command $cmd\n";