From a26cd89e8a02904513e2a9df6f3b3b21bc923031 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Fri, 12 Apr 2024 12:41:21 +0100 Subject: [PATCH] Fix linting problems. --- .github/CONTRIBUTING.md | 9 ++- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/workflows/branch.yml | 2 +- .github/workflows/linting.yml | 10 +-- .github/workflows/linting_comment.yml | 4 +- .gitignore | 3 +- .nf-core.yml | 63 +++++++++++-------- README.md | 16 ++--- modules.json | 3 +- .../sourmash/sketch/sourmash-sketch.diff | 14 +++++ nextflow.config | 7 ++- nextflow_schema.json | 26 ++------ pyproject.toml | 8 ++- 13 files changed, 91 insertions(+), 76 deletions(-) create mode 100644 modules/nf-core/sourmash/sketch/sourmash-sketch.diff diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 98259e6..86c95fe 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -23,8 +23,11 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests -You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to -receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -82,7 +85,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 249cf1f..a9a56d6 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -17,7 +17,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/ebi-metageno - [ ] If you've fixed a bug or added code that should be tested, add tests! - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/ebi-metagenomics/shallowmapping/tree/master/.github/CONTRIBUTING.md) - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index e52a3c8..0228046 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -19,7 +19,7 @@ jobs: # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v2 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 81cd098..073e187 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,10 +14,10 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Set up Python 3.11 - uses: actions/setup-python@v5 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: 3.11 cache: "pip" @@ -32,12 +32,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v4 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v5 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.11" architecture: "x64" @@ -60,7 +60,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 147bcd1..b706875 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v3 + uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.gitignore b/.gitignore index 378073a..5124c9a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,7 @@ .nextflow* work/ +data/ results/ -.nf-test/ -.nf-test.log .DS_Store testing/ testing* diff --git a/.nf-core.yml b/.nf-core.yml index 2b2842f..3205ff8 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,34 +1,47 @@ lint: files_exist: - - CODE_OF_CONDUCT.md - - assets/nf-core-shallowmapping_logo_light.png - - docs/ - - pyproject.toml - - .gitignore - - .github/ISSUE_TEMPLATE/config.yml - - .github/workflows/* - - .github/CONTRIBUTING.md - - .github/PULL_REQUEST_TEMPLATE.md - - conf/igenomes.config - - conf/igenomes.config - - conf/test.config - - conf/test_full.config + - CODE_OF_CONDUCT.md + - assets/nf-core-shallowmapping_logo_light.png + - docs/ + - docs/output.md + - docs/README.md + - docs/README.md + - docs/usage.md + - docs/images/nf-core-shallowmapping_logo_dark.png + - docs/images/nf-core-shallowmapping_logo_light.png + - pyproject.toml + - .gitignore + - .github/ISSUE_TEMPLATE/config.yml + - .github/workflows/* + - .github/CONTRIBUTING.md + - .github/PULL_REQUEST_TEMPLATE.md + - conf/igenomes.config + - conf/igenomes.config + - conf/test.config + - conf/test_full.config files_unchanged: - - CODE_OF_CONDUCT.md - - assets/nf-core-shallowmapping_logo_light.png - - .github/ISSUE_TEMPLATE/bug_report.yml + - CODE_OF_CONDUCT.md + - assets/nf-core-shallowmapping_logo_light.png + - .github/ISSUE_TEMPLATE/bug_report.yml + - .github/CONTRIBUTING.md does not match the template + - .github/PULL_REQUEST_TEMPLATE.md does not match the template + - .github/workflows/branch.yml does not match the template + - .github/workflows/linting_comment.yml does not match the template + - .github/workflows/linting.yml does not match the template + - .gitignore does not match the template + - pyproject.toml does not match the template multiqc_config: - - report_comment + - report_comment nextflow_config: - - manifest.name - - manifest.homePage - - process.cpus - - process.memory - - process.time - - custom_config + - manifest.name + - manifest.homePage + - process.cpus + - process.memory + - process.time + - custom_config repository_type: pipeline template: prefix: ebi-metagenomics skip: - - igenomes - - nf_core_configs + - igenomes + - nf_core_configs diff --git a/README.md b/README.md index cf27f0b..1461bf5 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,12 @@ ## Introduction -**ebi-metagenomics/shallowmapping** is a bioinformatics pipeline that generates taxonomic and functional profiles for low-yield (shallow shotgun: < 10 M reads) short raw-reads using [`MGnify biome-specific genome catalogues`](https://www.ebi.ac.uk/metagenomics/browse/genomes) as a reference. +**ebi-metagenomics/shallowmapping** is a bioinformatics pipeline that generates taxonomic and functional profiles for low-yield (shallow shotgun: < 10 M reads) short raw-reads using [`MGnify biome-specific genome catalogues`](https://www.ebi.ac.uk/metagenomics/browse/genomes) as a reference. At the moment, the biome selection is limited to the precomputed databases available to downloading (chicken-gut-v1-0-1 and mouse-gut-v1-0). Other databases can be build for any of the [`MGnify genome catalogues`](https://www.ebi.ac.uk/metagenomics/browse/genomes) under request by opening an issue in this repo. The main sections of the pipeline includes the following steps: + 1. Raw-reads quality control ([`fastp`](https://github.com/OpenGene/fastp)) 2. HQ reads decontamination versus human, phyX, and host ([`bwa-mem2`](https://github.com/bwa-mem2/bwa-mem2)) 3. QC report of decontaminated reads ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) @@ -22,7 +23,6 @@ The final output includes a species relative abundance table, Pfam and KEGG Orth

- ## Install and dependencies This workflow was built using [Nextflow](https://www.nextflow.io/) and follows the [nf-core guidelines](https://nf-co.re/docs/contributing/guidelines). It uses Singularity containers making installation trivial and results highly reproducible. To run the pipeline in your system you need: @@ -36,12 +36,11 @@ Clone the Shallow-mapping pipeline github repo: git clone https://github.com/EBI-Metagenomics/shallowmapping.git ``` - ### Required reference databases The first time you run the pipeline you need to put available indexed databases for the decontamination step, MGnify genomes catalogue tables, and some external tables for DRAM visuals generation. MGnify host most of the databases and setting up can be done in a single step by providing the location for decontamination and MGnify databases where the new files will be added. The directories have to already exists. Please provide full paths. -Consider that decontamination reference genomes require ~15-20G of storage. +Consider that decontamination reference genomes require ~15-20G of storage. MGnify catalogue genomes db occupy ~1G. ```bash @@ -55,7 +54,6 @@ bash bin/setup_script.sh \ Running the pipeline using bwamem2 is optional. If you want to run the pipeline with this option set the `--download_bwa true`. Consider that this database will occupy >15G of storage in your system. - ### Usage Prepare a samplesheet with your input data that looks as follows: @@ -85,8 +83,8 @@ At the moment, the biome selection is limited to the precomputed databases avail The central location for the databases can be set in the config file. - Optional arguments includes: + ```bash --run_bwa default = `false` # To generate results using bwamem2 besides sourmash --core_mode default = `false` # To use core functions instead of pangenome functions @@ -94,8 +92,7 @@ Optional arguments includes: Use `--core_mode true` for large catalogues like the mouse-gut to avoid over-prediction due to an extremely large number of accessory genes in the pangenome. Nextflow option `-profile` can be use to select a suitable config for your computational resources. -Nextflow option `-resume` can be use to re-run the pipeline from the last successfully finished step. - +Nextflow option `-resume` can be use to re-run the pipeline from the last successfully finished step. ## Credits @@ -104,10 +101,9 @@ ebi-metagenomics/shallowmapping pipeline was originally written by @Ales-ibt. We thank the following people for their extensive assistance in the development of this pipeline: @mberacochea, @ebi-jlu8 - ## Citations -If you use ebi-metagenomics/shallowmapping for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) +If you use ebi-metagenomics/shallowmapping for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE). diff --git a/modules.json b/modules.json index 5491d2e..8f634a3 100644 --- a/modules.json +++ b/modules.json @@ -39,7 +39,8 @@ "sourmash/sketch": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/sourmash/sketch/sourmash-sketch.diff" } } } diff --git a/modules/nf-core/sourmash/sketch/sourmash-sketch.diff b/modules/nf-core/sourmash/sketch/sourmash-sketch.diff new file mode 100644 index 0000000..04ea22d --- /dev/null +++ b/modules/nf-core/sourmash/sketch/sourmash-sketch.diff @@ -0,0 +1,14 @@ +Changes in module 'nf-core/sourmash/sketch' +--- modules/nf-core/sourmash/sketch/main.nf ++++ modules/nf-core/sourmash/sketch/main.nf +@@ -19,7 +19,7 @@ + + script: + // required defaults for the tool to run, but can be overridden +- def args = task.ext.args ?: "dna --param-string 'scaled=1000,k=21,k=31,k=51,abund'" ++ def args = task.ext.args ?: "dna --param-string 'scaled=1000,k=51,abund'" + def prefix = task.ext.prefix ?: "${meta.id}" + """ + sourmash sketch \\ + +************************************************************ diff --git a/nextflow.config b/nextflow.config index b9b3983..d84f372 100644 --- a/nextflow.config +++ b/nextflow.config @@ -47,8 +47,8 @@ params { /* * Biome options: * chicken-gut-v1-0-1 - * mouse-gut-v1-0 - * + * mouse-gut-v1-0 + * */ // MultiQC options @@ -110,6 +110,9 @@ profiles { memory = '1 GB' } } + test { + // TODO: add testing profile + } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile diff --git a/nextflow_schema.json b/nextflow_schema.json index 2f51299..ead61f9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,9 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input" - ], + "required": ["input"], "properties": { "input": { "type": "string", @@ -108,14 +106,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -218,10 +209,7 @@ "biome": { "type": "string", "description": "This can be any of the MGnify catalogues for which shallow-mapping databases are currently available", - "enum": [ - "chicken-gut-v1-0-1", - "mouse-gut-v1-0" - ] + "enum": ["chicken-gut-v1-0-1", "mouse-gut-v1-0"] }, "run_bwa": { "type": "boolean", @@ -247,9 +235,5 @@ "type": "string" } }, - "required": [ - "biome", - "shallow_dbs_path", - "decont_reference_paths" - ] -} \ No newline at end of file + "required": ["biome", "shallow_dbs_path", "decont_reference_paths"] +} diff --git a/pyproject.toml b/pyproject.toml index 7d08e1c..5611062 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,11 +3,13 @@ [tool.ruff] line-length = 120 target-version = "py38" -select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] cache-dir = "~/.cache/ruff" -[tool.ruff.isort] +[tool.ruff.lint] +select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] + +[tool.ruff.lint.isort] known-first-party = ["nf_core"] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "__init__.py" = ["E402", "F401"]