diff --git a/README.md b/README.md index 0481c1f..f4ccf96 100644 --- a/README.md +++ b/README.md @@ -15,11 +15,17 @@ Added to your `PATH` if the suggested development venv is set up. Run with Parses and reformats AGP and TPF files, converting into either format. -### [`pretext-to-tpf`](src/tola/assembly/scripts/pretext_to_tpf.py) +### [`pretext-to-asm`](src/tola/assembly/scripts/pretext_to_asm.py) Takes the AGP file output by [PretextView](https://github.com/wtsi-hpag/PretextView) -and creates TPF files containing precise coordinates of the curated assembly. +and the input assembly (usually FASTA), and produces an output assembly in +FASTA and AGP formats. The input and output file formats are determined from +the extensions of the files. FASTA input and output uses the `.fai` index +format, as produced by +[`faidx`](http://www.htslib.org/doc/samtools-faidx.html), and uses a streaming +strategy with a 250 KB buffer to keep memory usage low no matter how large +the chromosome. ## File Formats diff --git a/pyproject.toml b/pyproject.toml index 4ee01b5..b4a818a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,8 +21,8 @@ devel = [ [project.scripts] asm-format = "tola.assembly.scripts.asm_format:cli" -pretext-to-asm = "tola.assembly.scripts.pretext_to_tpf:cli" -pretext-to-tpf = "tola.assembly.scripts.pretext_to_tpf:cli" +pretext-to-asm = "tola.assembly.scripts.pretext_to_asm:cli" +pretext-to-tpf = "tola.assembly.scripts.pretext_to_asm:cli" find-overlaps = "tola.assembly.scripts.find_overlaps:cli" [tool.setuptools.packages.find] diff --git a/src/tola/assembly/scripts/pretext_to_tpf.py b/src/tola/assembly/scripts/pretext_to_asm.py similarity index 93% rename from src/tola/assembly/scripts/pretext_to_tpf.py rename to src/tola/assembly/scripts/pretext_to_asm.py index db2b916..bc687cf 100644 --- a/src/tola/assembly/scripts/pretext_to_tpf.py +++ b/src/tola/assembly/scripts/pretext_to_asm.py @@ -30,7 +30,7 @@ def ul(txt): @click.command( help=f""" Uses fragments in the assembly (AGP) produced by PretextView to find - matching fragments in the assembly (TPF) which was fed into Pretext and + matching fragments in the assembly which was fed into Pretext and output an assembly made from the input assembly fragments. {ul("Named Chromsomes")} @@ -78,9 +78,13 @@ def ul(txt): path_type=pathlib.Path, exists=True, readable=True, + resolve_path=True, ), required=True, - help="Assembly file from before curation, which is usually a TPF.", + help="""Assembly before curation, usually a FASTA file. + FASTA files will be indexed, creating a '.fai' and a '.agp' file + alongside the assembly if they are missing or are older than the + FASTA.""", ) @click.option( "--pretext", @@ -102,8 +106,11 @@ def ul(txt): path_type=pathlib.Path, dir_okay=False, ), - help="""Output file, usually a TPF. - If not given, prints to STDOUT in 'STR' format.""", + help="""Output file, usually a FASTA file. + If not given, prints to STDOUT in 'STR' format. + The output file type is determined from its extension. If the outuput is + FASTA ('.fa'), an AGP format file ('.fa.agp') is also written. Other + output files are named after the output file minus its extension.""", ) # @click.option( # "--version", diff --git a/tests/pretext_to_tpf_test.py b/tests/pretext_to_asm_test.py similarity index 97% rename from tests/pretext_to_tpf_test.py rename to tests/pretext_to_asm_test.py index 0977b49..e2a2e11 100644 --- a/tests/pretext_to_tpf_test.py +++ b/tests/pretext_to_asm_test.py @@ -6,7 +6,7 @@ import pytest from click.testing import CliRunner -from tola.assembly.scripts.pretext_to_tpf import cli +from tola.assembly.scripts.pretext_to_asm import cli def list_example_assemblies():