Skip to content

PyOpenSci REVIEW - config and download default directories #51

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ The documentation is formed by a series of markdown files located in directory [
This means that, to modify the [API reference](https://robaina.github.io/Pynteny/references/api/), all you need to do is to modify the docstring directly in the source file where the definion/class is located. And, to update the documentation pages, you just have to update the corresponding markdown file in the [docs](https://github.com/Robaina/Pynteny/tree/main/docs) directory. Note that, if you need to change the documentation structure (e.g., add or new pages),you would need to tell mkdocs about this change through its [configuration file](https://github.com/Robaina/Pynteny/blob/main/mkdocs.yml). Or just open an issue and ask for help!

When all the changes are ready to deploy, just open a pull request. After reviewing and merging the changes, the documentation will be automatically deployed.

Run the documentation locally with:

> mkdocs serve

## Tests on push and pull request to main

Pynteny's repo contains a [GitHub Action](https://github.com/features/actions) to perform build and integration tests which is triggered automatically on push and pull request events to the main brach. Currently the tests include building and installing Pynteny in Ubuntu and MacOS and running the [test](tests) suit.
8 changes: 8 additions & 0 deletions src/pynteny/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,14 @@ def download() -> argparse.ArgumentParser:
action="store_true",
help="unpack originally compressed database files",
)
optional.add_argument(
"-f",
"--force",
dest="force",
default=False,
action="store_true",
help="force-download database again if already downloaded",
)
optional.add_argument(
"-l",
"--log",
Expand Down
17 changes: 11 additions & 6 deletions src/pynteny/subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,17 @@
import shutil
import logging
from pathlib import Path
import wget

from pynteny.filter import SyntenyHits, filter_FASTA_by_synteny_structure
from pynteny.hmm import PGAP
import pynteny.parsers.syntenyparser as syntenyparser
from pynteny.utils import CommandArgs, ConfigParser, is_tar_file, terminal_execute
from pynteny.utils import (
CommandArgs,
ConfigParser,
is_tar_file,
terminal_execute,
download_file,
)
from pynteny.preprocessing import Database


Expand Down Expand Up @@ -212,11 +217,11 @@ def download_hmms(args) -> None:
logger = init_logger(args)
module_dir = Path(__file__).parent
config = ConfigParser.get_default_config()
if config.get_field("data_downloaded"):
if (config.get_field("data_downloaded")) and (not args.force):
logger.info("PGAP database already downloaded. Skipping download")
sys.exit(1)
if args.outdir is None:
download_dir = Path(module_dir.parent) / "data"
download_dir = module_dir / "data"
else:
download_dir = Path(args.outdir).absolute()
if not download_dir.exists():
Expand All @@ -231,8 +236,8 @@ def download_hmms(args) -> None:
try:
PGAP_file = download_dir / "hmm_PGAP.HMM.tgz"
meta_file = download_dir / "hmm_PGAP.tsv"
wget.download(data_url, PGAP_file.as_posix())
wget.download(meta_url, meta_file.as_posix())
download_file(data_url, PGAP_file)
download_file(meta_url, meta_file)
logger.info("Database dowloaded successfully\n")
config.update_config("data_downloaded", True)
config.update_config("PGAP_database", PGAP_file.as_posix())
Expand Down
20 changes: 13 additions & 7 deletions src/pynteny/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from __future__ import annotations
import os
import sys
import wget
import logging
import shutil
import subprocess
Expand Down Expand Up @@ -46,7 +47,7 @@ def initialize_config_file() -> Path:
Returns:
Path: path to generated config file.
"""
config_file = Path(__file__).parent.parent / "config.json"
config_file = Path(__file__).parent / "config.json"
if not config_file.exists():
config = {
"database_dir": "",
Expand Down Expand Up @@ -187,6 +188,16 @@ def parallelize_over_input_files(
p.join()


def download_file(url: str, output_file: Path) -> None:
"""Download file from url

Args:
url (str): url where file to be downloaded
output_file (Path): path to downloaded file
"""
wget.download(url, output_file.as_posix())


def is_tar_file(tar_file: Path) -> bool:
"""Check whether file is tar-compressed.

Expand All @@ -196,12 +207,7 @@ def is_tar_file(tar_file: Path) -> bool:
Returns:
bool: whether file is compressed or not.
"""
tar_file_str = tar_file.as_posix()
return (
(tar_file_str.endswith("tar.gz"))
or (tar_file_str.endswith("tgz"))
or (tar_file_str.endswith("tar"))
)
return Path(tar_file).is_file() and tarfile.is_tarfile(tar_file)


def extract_tar_file(tar_file: Path, dest_dir: Path = None) -> None:
Expand Down
11 changes: 5 additions & 6 deletions tests/test_integration_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@

from pynteny.api import Search

this_file_dir = Path(__file__).parent
tests_dir = Path(__file__).parent


class TestSyntenySearch(unittest.TestCase):
def test_search(self):
with TemporaryDirectory() as tempdir:
search = Search(
data=this_file_dir / "test_data/MG1655.fasta",
data=tests_dir / "test_data/MG1655.fasta",
synteny_struc="<TIGR00171.1 0 <TIGR00170.1 1 <TIGR00973.1",
hmm_dir=this_file_dir / "test_data/hmms",
hmm_dir=tests_dir / "test_data/hmms",
hmm_meta=None,
outdir=Path(tempdir),
prefix="",
Expand All @@ -31,12 +31,11 @@ def test_search(self):
processes=None,
unordered=False,
)
hmm_meta = this_file_dir / "test_data/hmm_meta.tsv"
hmm_meta = tests_dir / "test_data/hmm_meta.tsv"
synhits = search.run()
# synhits_df = synhits.hits
synhits = synhits.add_HMM_meta_info_to_hits(hmm_meta)
synhits_df = synhits.hits
config = Path(this_file_dir.parent) / "config.json"
config = Path(tests_dir.parent) / "config.json"
if config.exists():
config.unlink()
hit_labels = [
Expand Down