Skip to content

Commit

Permalink
Run black
Browse files Browse the repository at this point in the history
  • Loading branch information
mvdbeek committed Jan 31, 2024
1 parent d5b92a7 commit a42771d
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 32 deletions.
8 changes: 5 additions & 3 deletions src/ephemeris/_config_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
Extra,
)


StrOrPath = Union[Path, str]


Expand All @@ -30,6 +29,7 @@ class RepositoryInstallTarget(BaseModel):

class RepositoryInstallTargets(BaseModel):
""" """

api_key: Optional[str]
galaxy_instance: Optional[str]
tools: List[RepositoryInstallTarget]
Expand Down Expand Up @@ -58,7 +58,9 @@ class Genome(BaseModel):
version: Optional[str] # Any version information associated with the data

# Description of actions (data managers) to run on target genome.
indexers: Optional[List[str]] # indexers to run - keyed on repository name - see data_managers.yml for how to resolve these to tools
indexers: Optional[
List[str]
] # indexers to run - keyed on repository name - see data_managers.yml for how to resolve these to tools
skiplist: Optional[List[str]] # unimplemented: but if we implement classes of indexers, these will be ones to skip


Expand All @@ -67,7 +69,7 @@ class Genomes(BaseModel):


def _read_yaml(path: StrOrPath):
with open(path, "r") as f:
with open(path) as f:
return yaml.safe_load(f)


Expand Down
4 changes: 2 additions & 2 deletions src/ephemeris/_idc_data_managers_to_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ def _parser():
general_group = parser.add_argument_group("General options")
add_verbosity_argument(general_group)
add_log_file_argument(general_group)
parser.add_argument('--data-managers-conf', default="data_managers.yml")
parser.add_argument('--shed-install-output-conf', default="tools.yml")
parser.add_argument("--data-managers-conf", default="data_managers.yml")
parser.add_argument("--shed-install-output-conf", default="tools.yml")
return parser


Expand Down
8 changes: 5 additions & 3 deletions src/ephemeris/_idc_lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


def read_yaml(path: Path):
with open(path, "r") as f:
with open(path) as f:
return yaml.safe_load(f)


Expand All @@ -25,11 +25,13 @@ def lint_idc_directory(directory: Path):
for data_manager in data_managers.values():
data_manager_tool_id = data_manager.tool_id
if not data_manager_tool_id.startswith("toolshed.g2.bx.psu.edu/"):
raise Exception(f"Expected a data manager repository from main Galaxy tool shed but discovered tool ID {data_manager_tool_id}")
raise Exception(
f"Expected a data manager repository from main Galaxy tool shed but discovered tool ID {data_manager_tool_id}"
)

for genome in genomes.genomes:
print(genome)
for indexer in (genome.indexers or []):
for indexer in genome.indexers or []:
if indexer not in data_managers:
raise Exception(f"Failed to find data manager {indexer} referenced for genome {genome}")

Expand Down
29 changes: 11 additions & 18 deletions src/ephemeris/_idc_split_data_manager_genomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import logging
import os
import re
import xml.etree.ElementTree as ElementTree
from copy import deepcopy
from typing import (
Any,
Expand All @@ -17,7 +18,6 @@
List,
Optional,
)
import xml.etree.ElementTree as ElementTree

import requests
import yaml
Expand All @@ -32,9 +32,7 @@
DataManager,
read_data_managers_configuration,
)
from .common_parser import (
get_common_args,
)
from .common_parser import get_common_args
from .ephemeris_log import (
disable_external_library_logging,
setup_global_logger,
Expand Down Expand Up @@ -184,9 +182,7 @@ def walk_over_incomplete_runs(split_options: SplitOptions):
fetch_params.append({"sequence_name": description})
elif re.match("^[A-Z_]+[0-9.]+", source):
fetch_params.append({"reference_source|reference_source_selector": "ncbi"})
fetch_params.append(
{"reference_source|requested_identifier": source}
)
fetch_params.append({"reference_source|requested_identifier": source})
fetch_params.append({"sequence_name": genome["description"]})
fetch_params.append({"sequence.id": genome["id"]})
elif re.match("^http", source):
Expand Down Expand Up @@ -247,7 +243,6 @@ def walk_over_incomplete_runs(split_options: SplitOptions):


def split_genomes(split_options: SplitOptions) -> None:

def write_task_file(build_id: str, indexer: str, run_data_manager: RunDataManager):
split_genomes_path = split_options.split_genomes_path
if not os.path.exists(split_options.split_genomes_path):
Expand All @@ -262,7 +257,6 @@ def write_task_file(build_id: str, indexer: str, run_data_manager: RunDataManage


class GalaxyHistoryIsBuildComplete:

def __init__(self, history_names: List[str]):
self._history_names = history_names

Expand All @@ -272,7 +266,6 @@ def __call__(self, build_id: str, indexer_name: str) -> bool:


class CVMFSPublishIsComplete:

def __init__(self, records: Dict[str, List[str]]):
self.records = records

Expand All @@ -284,18 +277,18 @@ def _parser():
"""returns the parser object."""
# login required to check history...
parser = get_common_args(login_required=True, log_file=True)
parser.add_argument('--merged-genomes-path', '-m', default="genomes.yml")
parser.add_argument('--split-genomes-path', '-s', default="data_manager_tasks")
parser.add_argument('--data-managers-path', default="data_managers.yml")
parser.add_argument('--complete-check-cvmfs', default=False, action="store_true")
parser.add_argument('--cvmfs-root', default="/cvmfs/idc.galaxyproject.org")
parser.add_argument("--merged-genomes-path", "-m", default="genomes.yml")
parser.add_argument("--split-genomes-path", "-s", default="data_manager_tasks")
parser.add_argument("--data-managers-path", default="data_managers.yml")
parser.add_argument("--complete-check-cvmfs", default=False, action="store_true")
parser.add_argument("--cvmfs-root", default="/cvmfs/idc.galaxyproject.org")

parser.add_argument("--tool-id-mode", choices=["tool_shed_guid", "short"], default=DEFAULT_TOOL_ID_MODE)

# filters
parser.add_argument('--filter-stage', default=None)
parser.add_argument('--filter-data-manager', default=None)
parser.add_argument('--filter-build-id', default=None)
parser.add_argument("--filter-stage", default=None)
parser.add_argument("--filter-data-manager", default=None)
parser.add_argument("--filter-build-id", default=None)

return parser

Expand Down
17 changes: 14 additions & 3 deletions src/ephemeris/run_data_managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,10 @@ def run_jobs(jobs, skipped_jobs):
all_skipped_jobs.append(skipped_job)
for job in jobs:
started_job = self.tool_client.run_tool(
history_id=history_id, tool_id=job["tool_id"], tool_inputs=job["inputs"], data_manager_mode=data_manager_mode
history_id=history_id,
tool_id=job["tool_id"],
tool_inputs=job["inputs"],
data_manager_mode=data_manager_mode,
)
log.info(
'Dispatched job %i. Running DM: "%s" with parameters: %s'
Expand Down Expand Up @@ -357,7 +360,9 @@ def _parser():
action="store_true",
help="Do not stop running when jobs have failed.",
)
parser.add_argument("--data-manager-mode", "--data_manager_mode", choices=["bundle", "populate", "dry_run"], default="populate")
parser.add_argument(
"--data-manager-mode", "--data_manager_mode", choices=["bundle", "populate", "dry_run"], default="populate"
)
parser.add_argument("--history-name", default=None)
return parser

Expand All @@ -374,7 +379,13 @@ def main(argv=None):
gi = get_galaxy_connection(args, file=args.config, log=log, login_required=True)
config = load_yaml_file(args.config)
data_managers = DataManagers(gi, config)
data_managers.run(log, args.ignore_errors, args.overwrite, data_manager_mode=args.data_manager_mode, history_name=args.history_name)
data_managers.run(
log,
args.ignore_errors,
args.overwrite,
data_manager_mode=args.data_manager_mode,
history_name=args.history_name,
)


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions src/ephemeris/shed_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
Galaxy's configuration directory and set Galaxy configuration option
`tool_config_file` to include it.
"""

import datetime as dt
import json
import logging
Expand Down
1 change: 0 additions & 1 deletion tests/test_idc_lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from ephemeris._idc_lint import lint_idc_directory
from .test_split_genomes import setup_mock_idc_dir


MISSPELLED_DATA_MANAGER_YAML_STR = """
data_manager_fetch_genome_dbkeys_all_fasta:
tool_id: 'toolshed.g2.bx.psu.edu/repos/devteam/data_manager_fetch_genome_dbkeys_all_fasta/data_manager_fetch_genome_all_fasta_dbkey/0.0.3'
Expand Down
7 changes: 5 additions & 2 deletions tests/test_split_genomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def setup_mock_idc_dir(directory: Path):


def read_and_validate_run_data_manager_yaml(path):
with open(path, "r") as f:
with open(path) as f:
return RunDataManagers(**yaml.safe_load(f))


Expand Down Expand Up @@ -90,7 +90,10 @@ def test_split_genomes(tmp_path: Path):
run = read_and_validate_run_data_manager_yaml(new_task_run_yaml)
assert len(run.data_managers) == 1
data_manager = run.data_managers[0]
assert data_manager.id == "toolshed.g2.bx.psu.edu/repos/devteam/data_manager_twobit_builder/twobit_builder_data_manager/0.0.2"
assert (
data_manager.id
== "toolshed.g2.bx.psu.edu/repos/devteam/data_manager_twobit_builder/twobit_builder_data_manager/0.0.2"
)
assert data_manager.items[0]["id"] == "hg19_rCRS_pUC18_phiX174"
assert data_manager.items[0]["dbkey"] == "hg19_rCRS_pUC18_phiX174"

Expand Down

0 comments on commit a42771d

Please sign in to comment.