diff --git a/python-core/pyproject.toml b/python-core/pyproject.toml index 9b3b845..92d1959 100644 --- a/python-core/pyproject.toml +++ b/python-core/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "deciphon-core" -version = "0.23.1" +version = "0.23.2" description = "Python wrapper around the Deciphon C library" authors = ["Danilo Horta "] license = "MIT" diff --git a/python-core/tests/test_scan.py b/python-core/tests/test_scan.py index f849264..882530e 100644 --- a/python-core/tests/test_scan.py +++ b/python-core/tests/test_scan.py @@ -52,7 +52,7 @@ def test_scan(tmp_path, files_path: Path): shutil.unpack_archive(snapfile.path, format="zip") products = snapfile.basename / "products.tsv" - assert checksum(products)[:8] == "e06f9686" + assert checksum(products)[:8] == "ee567b88" sequences = [ diff --git a/snap/deciphon_snap/prod.py b/snap/deciphon_snap/prod.py index 5364971..695eb7b 100644 --- a/snap/deciphon_snap/prod.py +++ b/snap/deciphon_snap/prod.py @@ -20,6 +20,7 @@ class Prod(BaseModel): seq_id: int window: int window_interval: Interval + hit: int hit_interval: Interval profile: str abc: str diff --git a/snap/deciphon_snap/snap_file.py b/snap/deciphon_snap/snap_file.py index 26a906e..7f2f0c8 100644 --- a/snap/deciphon_snap/snap_file.py +++ b/snap/deciphon_snap/snap_file.py @@ -36,8 +36,9 @@ def __init__(self, filesystem): for idx, row in enumerate((csv_parse(fieldnames, r) for r in rows[1:])): seq_id = int(row["sequence"]) window = int(row["window"]) + hit = int(row["hit"]) profile = str(row["profile"]) - with fs.open(f"{hmmer_dir}/{seq_id}/{window}/{profile}.h3r", "rb") as f2: + with fs.open(f"{hmmer_dir}/{seq_id}/{window}/{hit}/{profile}.h3r", "rb") as f2: h3r = H3Result(raw=read_h3result(fileno=f2.fileno())) window_start = int(row["window_start"]) window_stop = int(row["window_stop"]) @@ -51,6 +52,7 @@ def __init__(self, filesystem): seq_id=seq_id, window=window, window_interval=window_interval, + hit=hit, hit_interval=hit_interval, profile=profile, abc=row["abc"], diff --git a/snap/pyproject.toml b/snap/pyproject.toml index 7537921..210a55f 100644 --- a/snap/pyproject.toml +++ b/snap/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "deciphon-snap" -version = "0.10.2" +version = "0.11.0" description = "Reader for Deciphon snap files." authors = ["Danilo Horta "] license = "MIT" diff --git a/snap/tests/files/consensus.dcs b/snap/tests/files/consensus.dcs index 650a7ab..a86a9df 100644 Binary files a/snap/tests/files/consensus.dcs and b/snap/tests/files/consensus.dcs differ diff --git a/snap/tests/files/example.dcs b/snap/tests/files/example.dcs deleted file mode 100644 index b78f088..0000000 Binary files a/snap/tests/files/example.dcs and /dev/null differ diff --git a/snap/tests/test_alignments.py b/snap/tests/test_alignments.py new file mode 100644 index 0000000..0b3e8ef --- /dev/null +++ b/snap/tests/test_alignments.py @@ -0,0 +1,9 @@ +from pathlib import Path + +from deciphon_snap.read_snap import read_snap +from deciphon_snap.view import view_alignments + +def test_alignments(files_path: Path): + snap_file = read_snap(files_path / "consensus.dcs") + txt = view_alignments(snap_file) + print(txt) diff --git a/snap/tests/test_fasta.py b/snap/tests/test_fasta.py deleted file mode 100644 index d667c69..0000000 --- a/snap/tests/test_fasta.py +++ /dev/null @@ -1,86 +0,0 @@ -from pathlib import Path - -from deciphon_snap.prod import MatchElemName -from deciphon_snap.read_snap import read_snap - -queries = """>2 -CTCAAGGATATCAACCTGACCATTCCGGAAAAAATGGTGACCGCCTTTATCGGTCCCTCA -GGCTGCGGCAAATCCACCATGCTGCGCACCTTCAATCGCATGTATCAACTCTATCCGAAG -CAAAAAGCGACGGGTGAAGTTCTGCTGGATGGGGAAAATATTCTCGACAAAAAGCAGGAT -CTCAATACACTTCGGGCCAAGATCGGTATGGTATTCCAAAAACCGACTCCGTTCCCCATG -TCTATTTATGACAACATTGCTTTCGGTGCGAAACTCTATGAAAACCTTAGCCGCCACGAT -ATGGACGAACGGGTGGAATGGGCATTGAGGAAAGCAGCGCTATGGACGGAGGCGAAAGAT -AAACTGAAACAGAGCGGCACCGGACTTTCCGGTGGTCAGCAACAGCGCTTGTGTATCGCG -CGCGCCATCGCGGTCAAACCGCAAATCTTGTTACTCGACGAACCGACTTCT ->17 -CTACAGGGGGTCAACCTGGAACTGAAAGCAGGCGAGTCGCTTGGACTGATTGGCGAAAAT -GGTGCTGGTAAATCCACGCTGTTGAAAATCATTGCCGGGGTGGTCAAGCCCTCCACTGGC -CAAGTGGTGGTTAATGGACGTATCGGTGCTTTGCTGGAACTGGGCAGCGGGTTCCACCCC -GAATATAACGGCTTGGAAAACATCCATCTGGCGGCCGCGCTGATGGGTATGAGCAACGCA -GAAATCGACAGCAAACTGGATTCCATAATTGAATTCGCCGATATTGGTTCCCATATTGCG -GAACCGATCAAGCATTACTCGTCGGGCATGGTGGTGCGGCTGGGTTTTGCCGTAGCAACC -GCCATGCAACCGGATATTTTGATTACTGACGAA -""" - -states = """>2 -BM1M2M3M4M5M6M7M8M9M10M11M12M13M14M15M16M17M18M19M20M21M22M2 -3M24M25M26M27M28M29M30M31M32M33M34M35M36M37I37I37I37I37I37M3 -8M39M40M41M42M43M44M45M46M47M48M49M50M51I51I51M52M53M54M55M5 -6M57M58M59M60M61M62M63M64M65M66M67M68M69M70M71M72M73M74D75M7 -6M77M78M79I79I79I79I79I79I79I79I79I79I79I79I79I79I79M80M81M8 -2M83M84M85M86M87M88M89M90M91M92M93M94M95M96M97M98M99M100M101 -M102M103M104M105M106M107M108M109M110M111M112M113M114M115M116 -M117M118M119M120M121M122M123M124M125M126M127M128M129M130M131 -M132M133M134M135M136M137E ->17 -BM1M2M3M4M5M6M7M8M9M10M11M12M13M14M15M16M17M18M19M20M21M22M2 -3M24M25M26M27M28M29M30M31M32M33M34M35M36M37M38M39M40M41M42M4 -3M44M45M46M47EJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ -JJJJJJJJJJBM107M108M109M110M111M112M113M114M115M116M117M118M -119M120M121M122M123M124M125M126M127M128M129M130M131M132M133M -134E -""" - -codons = """>2 -CTCAAGGATATCAACCTGACCATTCCGGAAAAAATGGTGACCGCCTTTATCGGTCCCTCA -GGCTGCGGCAAATCCACCATGCTGCGCACCTTCAATCGCATGTATCAACTCTATCCGAAG -CAAAAAGCGACGGGTGAAGTTCTGCTGGATGGGGAAAATATTCTCGACAAAAAGCAGGAT -CTCAATACACTTCGGGCCAAGATCGGTATGGTATTCCAAAAACCGACTCCGTTCCCCATG -TCTATTTATGACAACATTGCTTTCGGTGCGAAACTCTATGAAAACCTTAGCCGCCACGAT -ATGGACGAACGGGTGGAATGGGCATTGAGGAAAGCAGCGCTATGGACGGAGGCGAAAGAT -AAACTGAAACAGAGCGGCACCGGACTTTCCGGTGGTCAGCAACAGCGCTTGTGTATCGCG -CGCGCCATCGCGGTCAAACCGCAAATCTTGTTACTCGACGAACCGACTTCT ->17 -CTACAGGGGGTCAACCTGGAACTGAAAGCAGGCGAGTCGCTTGGACTGATTGGCGAAAAT -GGTGCTGGTAAATCCACGCTGTTGAAAATCATTGCCGGGGTGGTCAAGCCCTCCACTGGC -CAAGTGGTGGTTAATGGACGTATCGGTGCTTTGCTGGAACTGGGCAGCGGGTTCCACCCC -GAATATAACGGCTTGGAAAACATCCATCTGGCGGCCGCGCTGATGGGTATGAGCAACGCA -GAAATCGACAGCAAACTGGATTCCATAATTGAATTCGCCGATATTGGTTCCCATATTGCG -GAACCGATCAAGCATTACTCGTCGGGCATGGTGGTGCGGCTGGGTTTTGCCGTAGCAACC -GCCATGCAACCGGATATTTTGATTACTGACGAA -""" - -aminos = """>2 -LKDINLTIPEKMVTAFIGPSGCGKSTMLRTFNRMYQLYPKQKATGEVLLDGENILDKKQD -LNTLRAKIGMVFQKPTPFPMSIYDNIAFGAKLYENLSRHDMDERVEWALRKAALWTEAKD -KLKQSGTGLSGGQQQRLCIARAIAVKPQILLLDEPTS ->17 -LQGVNLELKAGESLGLIGENGAGKSTLLKIIAGVVKPSTGQVVVNGRIGALLELGSGFHP -EYNGLENIHLAAALMGMSNAEIDSKLDSIIEFADIGSHIAEPIKHYSSGMVVRLGFAVAT -AMQPDILITDE -""" - - -def test_fasta(files_path: Path): - snap_file = read_snap(files_path / "example.dcs") - prods = snap_file.products[:2] - assert queries == prods.fasta_list(MatchElemName.QUERY).format(ncols=60) - assert states == prods.fasta_list(MatchElemName.STATE).format(ncols=60) - assert codons == prods.fasta_list(MatchElemName.CODON).format(ncols=60) - assert aminos == prods.fasta_list(MatchElemName.AMINO).format(ncols=60) - - -def test_empty_fasta(files_path: Path): - snap_file = read_snap(files_path / "example.dcs") - prods = snap_file.products[0:0] - assert "" == prods.fasta_list(MatchElemName.QUERY).format(ncols=60) diff --git a/snap/tests/test_gff.py b/snap/tests/test_gff.py deleted file mode 100644 index 7584e1a..0000000 --- a/snap/tests/test_gff.py +++ /dev/null @@ -1,22 +0,0 @@ -from pathlib import Path - -from deciphon_snap.read_snap import read_snap - -desired = """##gff-version 3 -2 deciphon CDS 76 546 1.3e-37 + 0 Profile=PF00005.30;Alphabet=dna;ID=1 -17 deciphon CDS 130 270 1.3e-20 + 0 Profile=PF00005.30;Alphabet=dna;ID=2 -17 deciphon CDS 439 522 1.3e-20 + 0 Profile=PF00005.30;Alphabet=dna;ID=3 -""" - - -def test_gff(files_path: Path): - snap_file = read_snap(files_path / "example.dcs") - prod = snap_file.products[0:2] - print(prod.gff_list().format()) - assert prod.gff_list().format() == desired - - -def test_empty_gff(files_path: Path): - snap_file = read_snap(files_path / "example.dcs") - prod = snap_file.products[0:0] - assert prod.gff_list().format() == "##gff-version 3\n" diff --git a/snap/tests/test_read.py b/snap/tests/test_read.py deleted file mode 100644 index 5880d2e..0000000 --- a/snap/tests/test_read.py +++ /dev/null @@ -1,22 +0,0 @@ -from pathlib import Path - - -from deciphon_snap.read_snap import read_snap - - -def test_read_snap(files_path: Path): - snap_file = read_snap(files_path / "example.dcs") - assert len(snap_file.products) == 403 - prod = snap_file.products[0] - assert prod.id == 0 - assert len(prod.match_list) == 160 - assert str(prod.match_list[0]) == "(∅,B,∅,∅)" - - assert prod.query[:4] == "CTCA" - assert prod.query[-4:] == "TTCT" - - assert prod.codon[:4] == "CTCA" - assert prod.codon[-4:] == "TTCT" - - assert prod.amino[:4] == "LKDI" - assert prod.amino[-4:] == "EPTS"