Skip to content

Commit

Permalink
Merge pull request #1090 from KevinMenden/refgenie
Browse files Browse the repository at this point in the history
initial working version of refgenie integration
  • Loading branch information
mirpedrol authored Jul 6, 2022
2 parents 87a633d + 8e04b61 commit 65cb765
Show file tree
Hide file tree
Showing 4 changed files with 223 additions and 1 deletion.
160 changes: 160 additions & 0 deletions nf_core/refgenie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#!/usr/bin/env python
"""
Update a nextflow.config file with refgenie genomes
"""

import logging
import os
import re
from pathlib import Path
from textwrap import dedent

# import refgenconf
from warnings import warn

import rich
from rich.logging import RichHandler

import nf_core.utils

# Set up logging
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)

# # Setup rich traceback
stderr = rich.console.Console(stderr=True, force_terminal=nf_core.utils.rich_force_colors())
rich.traceback.install(console=stderr, width=200, word_wrap=True, extra_lines=1)

NF_CFG_TEMPLATE = """
// This is a read-only config file managed by refgenie. Manual changes to this file will be overwritten
// To make changes here, use refgenie to update the reference genome data
params {{
genomes {{
{content}
}}
}}
"""


def _print_nf_config(rgc):
"""
Generate a nextflow config file with the genomes
from the refgenie config file
Adapted from: https://github.com/refgenie/refgenie_nfcore
Takes a RefGenConf object as argument
"""
abg = rgc.list_assets_by_genome()
genomes_str = ""
for genome, asset_list in abg.items():
genomes_str += f" '{genome}' {{\n"
for asset in asset_list:
try:
pth = rgc.seek(genome, asset)
# Catch general exception instead of refgencof exception --> no refgenconf import needed
except Exception as e:
log.warn(f"{genome}/{asset} is incomplete, ignoring...")
else:
genomes_str += f' {asset.ljust(20, " ")} = "{pth}"\n'
genomes_str += " }\n"

return NF_CFG_TEMPLATE.format(content=genomes_str)


def _update_nextflow_home_config(refgenie_genomes_config_file, nxf_home):
"""
Update the $NXF_HOME/config file by adding a includeConfig statement to it
for the 'refgenie_genomes_config_file' if not already defined
"""
# Check if NXF_HOME/config exists and has a
include_config_string = dedent(
f"""
///// >>> nf-core + RefGenie >>> /////
// !! Contents within this block are managed by 'nf-core/tools' !!
// Includes auto-generated config file with RefGenie genome assets
includeConfig '{os.path.abspath(refgenie_genomes_config_file)}'
///// <<< nf-core + RefGenie <<< /////
"""
)
nxf_home_config = Path(nxf_home) / "config"
if os.path.exists(nxf_home_config):
# look for include statement in config
has_include_statement = False
with open(nxf_home_config, "r") as fh:
lines = fh.readlines()
for line in lines:
if re.match(f"\s*includeConfig\s*'{os.path.abspath(refgenie_genomes_config_file)}'", line):
has_include_statement = True
break

# if include statement is missing, add it to the last line
if not has_include_statement:
with open(nxf_home_config, "a") as fh:
fh.write(include_config_string)

log.info(f"Included refgenie_genomes.config to {nxf_home_config}")

else:
# create new config and add include statement
with open(nxf_home_config, "w") as fh:
fh.write(include_config_string)
log.info(f"Created new nextflow config file: {nxf_home_config}")


def update_config(rgc):
"""
Update the genomes.config file after a local refgenie database has been updated
This function is executed after running 'refgenie pull <genome>/<asset>'
The refgenie config file is transformed into a nextflow.config file, which is used to
overwrited the 'refgenie_genomes.config' file.
The path to the target config file is inferred from the following options, in order:
- the 'nextflow_config' attribute in the refgenie config file
- the NXF_REFGENIE_PATH environment variable
- otherwise defaults to: $NXF_HOME/nf-core/refgenie_genomes.config
Additionaly, an 'includeConfig' statement is added to the file $NXF_HOME/config
"""

# Compile nextflow refgenie_genomes.config from refgenie config
refgenie_genomes = _print_nf_config(rgc)

# Get the path to NXF_HOME
# If NXF_HOME is not set, create it at $HOME/.nextflow
# If $HOME is not set, set nxf_home to false
nxf_home = os.environ.get("NXF_HOME")
if not nxf_home:
try:
nxf_home = Path.home() / ".nextflow"
if not os.path.exists(nxf_home):
log.info(f"Creating NXF_HOME directory at {nxf_home}")
os.makedirs(nxf_home, exist_ok=True)
except RuntimeError:
nxf_home = False

# Get the path for storing the updated refgenie_genomes.config
if hasattr(rgc, "nextflow_config"):
refgenie_genomes_config_file = rgc.nextflow_config
elif "NXF_REFGENIE_PATH" in os.environ:
refgenie_genomes_config_file = os.environ.get("NXF_REFGENIE_PATH")
elif nxf_home:
refgenie_genomes_config_file = Path(nxf_home) / "nf-core/refgenie_genomes.config"
else:
log.info("Could not determine path to 'refgenie_genomes.config' file.")
return False

# Save the updated genome config
try:
with open(refgenie_genomes_config_file, "w") as fh:
fh.write(refgenie_genomes)
log.info(f"Updated nf-core genomes config: {refgenie_genomes_config_file}")
except FileNotFoundError as e:
log.warn(f"Could not write to {refgenie_genomes_config_file}")
return False

# Add include statement to NXF_HOME/config
if nxf_home:
_update_nextflow_home_config(refgenie_genomes_config_file, nxf_home)

return True
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ pytest-datafiles
pytest-cov
mock
black
refgenie
Sphinx
sphinx_rtd_theme
isort
Expand Down
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@
author_email="[email protected]",
url="https://github.com/nf-core/tools",
license="MIT",
entry_points={"console_scripts": ["nf-core=nf_core.__main__:run_nf_core"]},
entry_points={
"console_scripts": ["nf-core=nf_core.__main__:run_nf_core"],
"refgenie.hooks.post_update": ["nf-core-refgenie=nf_core.refgenie:update_config"],
},
install_requires=required,
packages=find_packages(exclude=("docs")),
include_package_data=True,
Expand Down
58 changes: 58 additions & 0 deletions tests/test_refgenie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env python
""" Tests covering the refgenie integration code
"""

import os
import shlex
import subprocess
import tempfile
import unittest

import nf_core.refgenie


class TestRefgenie(unittest.TestCase):
"""Class for refgenie tests"""

def setUp(self):
"""
Prepare a refgenie config file
"""
self.tmp_dir = tempfile.mkdtemp()
self.NXF_HOME = os.path.join(self.tmp_dir, ".nextflow")
self.NXF_REFGENIE_PATH = os.path.join(self.NXF_HOME, "nf-core", "refgenie_genomes.config")
self.REFGENIE = os.path.join(self.tmp_dir, "genomes_config.yaml")
# Set NXF_HOME environment variable
# avoids adding includeConfig statement to config file outside the current tmpdir
try:
self.NXF_HOME_ORIGINAL = os.environ["NXF_HOME"]
except:
self.NXF_HOME_ORIGINAL = None
os.environ["NXF_HOME"] = self.NXF_HOME

# create NXF_HOME and nf-core directories
os.makedirs(os.path.join(self.NXF_HOME, "nf-core"), exist_ok=True)

# Initialize a refgenie config
os.system(f"refgenie init -c {self.REFGENIE}")

# Add NXF_REFGENIE_PATH to refgenie config
with open(self.REFGENIE, "a") as fh:
fh.write(f"nextflow_config: {os.path.join(self.NXF_REFGENIE_PATH)}\n")

def tearDown(self) -> None:
# Remove the tempdir again
os.system(f"rm -rf {self.tmp_dir}")
# Reset NXF_HOME environment variable
if self.NXF_HOME_ORIGINAL is None:
del os.environ["NXF_HOME"]
else:
os.environ["NXF_HOME"] = self.NXF_HOME_ORIGINAL

def test_update_refgenie_genomes_config(self):
"""Test that listing pipelines works"""
# Populate the config with a genome
cmd = f"refgenie pull t7/fasta -c {self.REFGENIE}"
out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT)

assert "Updated nf-core genomes config" in str(out)

0 comments on commit 65cb765

Please sign in to comment.