Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option --fix to update the meta.yml file of subworkflows. #3077

Merged
merged 14 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
- The `modules_nfcore` tag in the `main.nf.test` file of modules/subworkflows now displays the organization name in custom modules repositories ([#3005](https://github.com/nf-core/tools/pull/3005))
- Add `--migrate_pytest` option to `nf-core <modules|subworkflows> test` command ([#3085](https://github.com/nf-core/tools/pull/3085))
- Components: allow spaces at the beginning of include statements ([#3115](https://github.com/nf-core/tools/pull/3115))
- Add option `--fix` to update the `meta.yml` file of subworkflows ([#3077](https://github.com/nf-core/tools/pull/3077))

### General

Expand Down
7 changes: 5 additions & 2 deletions nf_core/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1479,11 +1479,14 @@ def command_subworkflows_list_local(ctx, keywords, json, directory): # pylint:
help="Sort lint output by subworkflow or test name.",
show_default=True,
)
def command_subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by):
@click.option("--fix", is_flag=True, help="Fix all linting tests if possible.")
def command_subworkflows_lint(
ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by, fix
):
"""
Lint one or more subworkflows in a directory.
"""
subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by)
subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by, fix)


# nf-core subworkflows info
Expand Down
3 changes: 2 additions & 1 deletion nf_core/commands_subworkflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def subworkflows_list_local(ctx, keywords, json, directory): # pylint: disable=
sys.exit(1)


def subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by):
def subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by, fix):
"""
Lint one or more subworkflows in a directory.

Expand All @@ -121,6 +121,7 @@ def subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warn
subworkflow_lint = SubworkflowLint(
directory,
fail_warned=fail_warned,
fix=fix,
registry=ctx.params["registry"],
remote_url=ctx.obj["modules_repo_url"],
branch=ctx.obj["modules_repo_branch"],
Expand Down
76 changes: 55 additions & 21 deletions nf_core/components/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,25 @@ def get_remote_yaml(self) -> Optional[dict]:
self.remote_location = self.modules_repo.remote_url
return yaml.safe_load(file_contents)

def generate_params_table(self, type) -> Table:
"Generate a rich table for inputs and outputs"
table = Table(expand=True, show_lines=True, box=box.MINIMAL_HEAVY_HEAD, padding=0)
table.add_column(f":inbox_tray: {type}")
table.add_column("Description")
if self.component_type == "modules":
table.add_column("Pattern", justify="right", style="green")
elif self.component_type == "subworkflows":
table.add_column("Structure", justify="right", style="green")
return table

def get_channel_structure(self, structure: dict) -> str:
"Get the structure of a channel"
structure_str = ""
for key, info in structure.items():
pattern = f" - {info['pattern']}" if info.get("pattern") else ""
structure_str += f"{key} ({info['type']}{pattern})"
return structure_str

def generate_component_info_help(self):
"""Take the parsed meta.yml and generate rich help.

Expand Down Expand Up @@ -277,33 +296,48 @@ def generate_component_info_help(self):

# Inputs
if self.meta.get("input"):
inputs_table = Table(expand=True, show_lines=True, box=box.MINIMAL_HEAVY_HEAD, padding=0)
inputs_table.add_column(":inbox_tray: Inputs")
inputs_table.add_column("Description")
inputs_table.add_column("Pattern", justify="right", style="green")
for input in self.meta["input"]:
for key, info in input.items():
inputs_table.add_row(
f"[orange1 on black] {key} [/][dim i] ({info['type']})",
Markdown(info["description"] if info["description"] else ""),
info.get("pattern", ""),
)
inputs_table = self.generate_params_table("Inputs")
for i, input in enumerate(self.meta["input"]):
inputs_table.add_row(f"[italic]input[{i}][/]", "", "")
if self.component_type == "modules":
for element in input:
for key, info in element.items():
inputs_table.add_row(
f"[orange1 on black] {key} [/][dim i] ({info['type']})",
Markdown(info["description"] if info["description"] else ""),
info.get("pattern", ""),
)
elif self.component_type == "subworkflows":
for key, info in input.items():
inputs_table.add_row(
f"[orange1 on black] {key} [/][dim i]",
Markdown(info["description"] if info["description"] else ""),
self.get_channel_structure(info["structure"]) if info.get("structure") else "",
)

renderables.append(inputs_table)

# Outputs
if self.meta.get("output"):
outputs_table = Table(expand=True, show_lines=True, box=box.MINIMAL_HEAVY_HEAD, padding=0)
outputs_table.add_column(":outbox_tray: Outputs")
outputs_table.add_column("Description")
outputs_table.add_column("Pattern", justify="right", style="green")
outputs_table = self.generate_params_table("Outputs")
for output in self.meta["output"]:
for key, info in output.items():
outputs_table.add_row(
f"[orange1 on black] {key} [/][dim i] ({info['type']})",
Markdown(info["description"] if info["description"] else ""),
info.get("pattern", ""),
)
if self.component_type == "modules":
for ch_name, elements in output.items():
outputs_table.add_row(f"{ch_name}", "", "")
for element in elements:
for key, info in element.items():
outputs_table.add_row(
f"[orange1 on black] {key} [/][dim i] ({info['type']})",
Markdown(info["description"] if info["description"] else ""),
info.get("pattern", ""),
)
elif self.component_type == "subworkflows":
for key, info in output.items():
outputs_table.add_row(
f"[orange1 on black] {key} [/][dim i]",
Markdown(info["description"] if info["description"] else ""),
self.get_channel_structure(info["structure"]) if info.get("structure") else "",
)

renderables.append(outputs_table)

Expand Down
143 changes: 87 additions & 56 deletions nf_core/components/nfcore_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging
import re
from pathlib import Path
from typing import List, Optional, Tuple, Union
from typing import Any, List, Optional, Tuple, Union

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -41,6 +41,7 @@ def __init__(
remote_component (bool): Whether the module is to be treated as a
nf-core or local component
"""
self.component_type = component_type
self.component_name = component_name
self.repo_url = repo_url
self.component_dir = component_dir
Expand Down Expand Up @@ -170,65 +171,95 @@ def _get_included_components_in_chained_tests(self, main_nf_test: Union[Path, st

def get_inputs_from_main_nf(self) -> None:
"""Collect all inputs from the main.nf file."""
inputs: list[list[dict[str, dict[str, str]]]] = []
inputs: Any = [] # Can be 'list[list[dict[str, dict[str, str]]]]' or 'list[str]'
mashehu marked this conversation as resolved.
Show resolved Hide resolved
with open(self.main_nf) as f:
data = f.read()
# get input values from main.nf after "input:", which can be formatted as tuple val(foo) path(bar) or val foo or val bar or path bar or path foo
# regex matches:
# val(foo)
# path(bar)
# val foo
# val bar
# path bar
# path foo
# don't match anything inside comments or after "output:"
if "input:" not in data:
log.debug(f"Could not find any inputs in {self.main_nf}")
return
input_data = data.split("input:")[1].split("output:")[0]
for line in input_data.split("\n"):
channel_elements: list[dict[str, dict[str, str]]] = []
regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
matches = re.finditer(regex, line)
for _, match in enumerate(matches, start=1):
input_val = None
if match.group(3):
input_val = match.group(3).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
elif match.group(4):
input_val = match.group(4).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
if input_val:
channel_elements.append({input_val: {}})
if len(channel_elements) > 0:
inputs.append(channel_elements)
log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
self.inputs = inputs
if self.component_type == "modules":
# get input values from main.nf after "input:", which can be formatted as tuple val(foo) path(bar) or val foo or val bar or path bar or path foo
# regex matches:
# val(foo)
# path(bar)
# val foo
# val bar
# path bar
# path foo
# don't match anything inside comments or after "output:"
if "input:" not in data:
log.debug(f"Could not find any inputs in {self.main_nf}")
return
input_data = data.split("input:")[1].split("output:")[0]
for line in input_data.split("\n"):
channel_elements: Any = []
regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
matches = re.finditer(regex, line)
for _, match in enumerate(matches, start=1):
input_val = None
if match.group(3):
input_val = match.group(3).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
elif match.group(4):
input_val = match.group(4).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
if input_val:
channel_elements.append({input_val: {}})
if len(channel_elements) > 0:
inputs.append(channel_elements)
log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
self.inputs = inputs
elif self.component_type == "subworkflows":
# get input values from main.nf after "take:"
if "take:" not in data:
log.debug(f"Could not find any inputs in {self.main_nf}")
return
# get all lines between "take" and "main" or "emit"
input_data = data.split("take:")[1].split("main:")[0].split("emit:")[0]
for line in input_data.split("\n"):
try:
inputs.append(line.split()[0])
except IndexError:
pass # Empty lines
log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
self.inputs = inputs

def get_outputs_from_main_nf(self):
outputs = []
with open(self.main_nf) as f:
data = f.read()
# get output values from main.nf after "output:". the names are always after "emit:"
if "output:" not in data:
log.debug(f"Could not find any outputs in {self.main_nf}")
return outputs
output_data = data.split("output:")[1].split("when:")[0]
regex_emit = r"emit:\s*([^)\s,]+)"
regex_elements = r"(val|path|env|stdout)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
for line in output_data.split("\n"):
match_emit = re.search(regex_emit, line)
matches_elements = re.finditer(regex_elements, line)
if not match_emit:
continue
output_channel = {match_emit.group(1): []}
for _, match_element in enumerate(matches_elements, start=1):
output_val = None
if match_element.group(3):
output_val = match_element.group(3)
elif match_element.group(4):
output_val = match_element.group(4)
if output_val:
output_val = output_val.strip("'").strip('"') # remove quotes
output_channel[match_emit.group(1)].append({output_val: {}})
outputs.append(output_channel)
log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
self.outputs = outputs
if self.component_type == "modules":
# get output values from main.nf after "output:". the names are always after "emit:"
if "output:" not in data:
log.debug(f"Could not find any outputs in {self.main_nf}")
return outputs
output_data = data.split("output:")[1].split("when:")[0]
regex_emit = r"emit:\s*([^)\s,]+)"
regex_elements = r"(val|path|env|stdout)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
for line in output_data.split("\n"):
match_emit = re.search(regex_emit, line)
matches_elements = re.finditer(regex_elements, line)
if not match_emit:
continue
output_channel = {match_emit.group(1): []}
for _, match_element in enumerate(matches_elements, start=1):
output_val = None
if match_element.group(3):
output_val = match_element.group(3)
elif match_element.group(4):
output_val = match_element.group(4)
if output_val:
output_val = output_val.strip("'").strip('"') # remove quotes
output_channel[match_emit.group(1)].append({output_val: {}})
outputs.append(output_channel)
log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
self.outputs = outputs
elif self.component_type == "subworkflows":
# get output values from main.nf after "emit:". Can be named outputs or not.
if "emit:" not in data:
log.debug(f"Could not find any outputs in {self.main_nf}")
return outputs
output_data = data.split("emit:")[1].split("}")[0]
for line in output_data.split("\n"):
try:
outputs.append(line.split("=")[0].split()[0])
except IndexError:
# Empty lines
pass
log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
self.outputs = outputs
10 changes: 5 additions & 5 deletions nf_core/module-template/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,6 @@ input:
## TODO nf-core: Add a description of all of the variables used as output
{% endif -%}
output:
- versions:
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"
- {{ 'bam:' if not_empty_template else "output:" }}
#{% if has_meta -%} Only when we have meta
- meta:
Expand All @@ -81,6 +76,11 @@ output:
{% else -%}
- edam: ""
{%- endif %}
- versions:
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "{{ author }}"
Expand Down
17 changes: 17 additions & 0 deletions nf_core/modules/lint/meta_yml.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,23 @@ def meta_yml(module_lint_object: ComponentLint, module: NFCoreComponent) -> None

# Confirm that all input and output channels are correctly specified
if valid_meta_yml:
# confirm that the name matches the process name in main.nf
if meta_yaml["name"].upper() == module.process_name:
module.passed.append(
(
"meta_name",
"Correct name specified in `meta.yml`.",
module.meta_yml,
)
)
else:
module.failed.append(
(
"meta_name",
f"Conflicting `process` name between meta.yml (`{meta_yaml['name']}`) and main.nf (`{module.process_name}`)",
module.meta_yml,
)
)
# Check that inputs are specified in meta.yml
if len(module.inputs) > 0 and "input" not in meta_yaml:
module.failed.append(
Expand Down
4 changes: 2 additions & 2 deletions nf_core/pipeline-template/modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
{%- if fastqc %}
"fastqc": {
"branch": "master",
"git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
}{% endif %}{%- if multiqc %}{% if fastqc %},{% endif %}
"multiqc": {
"branch": "master",
"git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
}
{%- endif %}
Expand Down
5 changes: 4 additions & 1 deletion nf_core/pipeline-template/modules/nf-core/fastqc/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading