diff --git a/bio/seqtk/mergepe/environment.yaml b/bio/seqtk/mergepe/environment.yaml new file mode 100644 index 00000000000..80a74836fd3 --- /dev/null +++ b/bio/seqtk/mergepe/environment.yaml @@ -0,0 +1,6 @@ +channels: + - bioconda + - conda-forge +dependencies: + - seqtk =1.3 + - pigz =2.3 diff --git a/bio/seqtk/mergepe/meta.yaml b/bio/seqtk/mergepe/meta.yaml new file mode 100644 index 00000000000..742a0334bb7 --- /dev/null +++ b/bio/seqtk/mergepe/meta.yaml @@ -0,0 +1,19 @@ +name: seqtk mergepe +description: Interleave two paired-end FASTA/Q files +url: https://github.com/lh3/seqtk +authors: + - Michael Hall +input: + - paired fastq files - can be compressed in gzip format (``*.gz``). +output: + - > + a single, interleaved FASTA/Q file. By default, the output will be compressed, + use the param ``compress_lvl`` to change this. +params: + compress_lvl: > + Regulate the speed of compression using the specified digit, + where 1 indicates the fastest compression method (less compression) + and 9 indicates the slowest compression method (best compression). + 0 is no compression. 11 gives a few percent better compression at a severe cost + in execution time, using the zopfli algorithm. The default is 6. +notes: Multiple threads can be used during compression of the output file with ``pigz``. diff --git a/bio/seqtk/mergepe/test/Snakefile b/bio/seqtk/mergepe/test/Snakefile new file mode 100644 index 00000000000..8f72c94f8eb --- /dev/null +++ b/bio/seqtk/mergepe/test/Snakefile @@ -0,0 +1,13 @@ +rule seqtk_mergepe: + input: + r1="{sample}.1.fastq.gz", + r2="{sample}.2.fastq.gz", + output: + merged="{sample}.merged.fastq.gz", + params: + compress_lvl=9, + log: + "logs/seqtk_mergepe/{sample}.log", + threads: 2 + wrapper: + "master/bio/seqtk/mergepe" diff --git a/bio/seqtk/mergepe/test/a.1.fastq.gz b/bio/seqtk/mergepe/test/a.1.fastq.gz new file mode 100644 index 00000000000..97a72ce5d48 Binary files /dev/null and b/bio/seqtk/mergepe/test/a.1.fastq.gz differ diff --git a/bio/seqtk/mergepe/test/a.2.fastq.gz b/bio/seqtk/mergepe/test/a.2.fastq.gz new file mode 100644 index 00000000000..038bc976ac3 Binary files /dev/null and b/bio/seqtk/mergepe/test/a.2.fastq.gz differ diff --git a/bio/seqtk/mergepe/wrapper.py b/bio/seqtk/mergepe/wrapper.py new file mode 100644 index 00000000000..95a56cc87ad --- /dev/null +++ b/bio/seqtk/mergepe/wrapper.py @@ -0,0 +1,16 @@ +"""Snakemake wrapper for interleaving reads from paired FASTA/Q files using seqtk.""" + +__author__ = "Michael Hall" +__copyright__ = "Copyright 2021, Michael Hall" +__email__ = "michael@mbh.sh" +__license__ = "MIT" + +from snakemake.shell import shell + +log = snakemake.log_fmt_shell(stdout=False, stderr=True, append=False) +compress_lvl = int(snakemake.params.get("compress_lvl", 6)) + +shell( + "(seqtk mergepe {snakemake.input} " + "| pigz -{compress_lvl} -c -p {snakemake.threads}) > {snakemake.output} {log}" +) diff --git a/bio/seqtk/subsample/pe/test/logs/seqtk_subsample/a.log b/bio/seqtk/subsample/pe/test/logs/seqtk_subsample/a.log deleted file mode 100644 index b8bc04ebd90..00000000000 --- a/bio/seqtk/subsample/pe/test/logs/seqtk_subsample/a.log +++ /dev/null @@ -1 +0,0 @@ -/bin/bash: pigz: command not found diff --git a/bio/seqtk/subsample/se/test/logs/seqtk_subsample/a.log b/bio/seqtk/subsample/se/test/logs/seqtk_subsample/a.log deleted file mode 100644 index b8bc04ebd90..00000000000 --- a/bio/seqtk/subsample/se/test/logs/seqtk_subsample/a.log +++ /dev/null @@ -1 +0,0 @@ -/bin/bash: pigz: command not found diff --git a/docs/_templates/wrapper.rst b/docs/_templates/wrapper.rst index 21a7f99b75e..efcced1262b 100644 --- a/docs/_templates/wrapper.rst +++ b/docs/_templates/wrapper.rst @@ -5,6 +5,7 @@ {{ description }} +**URL**: {{ url }} Example ------- @@ -16,6 +17,7 @@ This wrapper can be used in the following way: {{ snakefile }} Note that input, output and log file paths can be chosen freely. + When running with .. code-block:: bash @@ -53,6 +55,25 @@ Input/Output {% endfor %} {% endif %} +{% if params %} + +Params +------ + +{# Parse the params section of .yaml #} +{% for key in params %} + {% if key is mapping %} + {% for k, value in key.items() %} +* ``{{ k }}``: {{ value }} + {% endfor %} + {% else %} +* ``{{ key }}``: {{ params[key] }} + {% endif %} + +{% endfor %} + +{% endif %} + {% if notes %} Notes diff --git a/docs/contributing.rst b/docs/contributing.rst new file mode 100644 index 00000000000..296ef2d5249 --- /dev/null +++ b/docs/contributing.rst @@ -0,0 +1,143 @@ +.. _contributing: + +Contributing +============ + +We invite anybody to contribute to the Snakemake Wrapper Repository. +If you want to contribute we suggest the following procedure: + +#. Fork the repository: https://github.com/snakemake/snakemake-wrappers +#. Clone your fork locally. +#. Locally, create a new branch: ``git checkout -b my-new-snakemake-wrapper`` +#. Commit your contributions to that branch and push them to your fork: ``git push -u origin my-new-snakemake-wrapper`` +#. Create a pull request. + +The pull request will be reviewed and included as fast as possible. +If your pull request does not get a review quickly, you can `@mention ` previous contributors to a particular wrapper (``git blame``) or regular contributors that you think might be able to give a review. +Contributions should follow the coding style of the already present examples, i.e.: + +* provide a ``meta.yaml`` that describes the wrapper (see the `meta.yaml documentation below `_) +* provide an ``environment.yaml`` which lists all required software packages and follows + `the respective best practices `_. The + packages should be available for installation via the + `default anaconda channels `_ or via the + `conda`_ channels + `bioconda `_ or + `conda-forge `_. + Other sustainable community maintained channels are possible as well. +* add a ``wrapper.py`` or ``wrapper.R`` file that can deal with arbitrary ``input:`` and ``output:`` paths. +* provide a minimal test case in a subfolder called ``test``, with an example + ``Snakefile`` that shows how to use the wrapper (rule names should be descriptive and written in `snake_case `_), some minimal testing data + (also check existing wrappers for suitable data) and add an invocation of the + test in ``test.py`` +* ensure consistent `formatting`_ of Python files and `linting`_ of Snakefiles. + +.. _meta: + +``meta.yaml`` file +------------------- + +The following fields are available to use in the wrapper ``meta.yaml`` file. All, except +those marked optional, should be provided. + +* **name**: The name of the wrapper. +* **description**: a description of what the wrapper does. +* **url**: URL to the wrapper tool webpage. +* **authors**: A `sequence`_ of names of the people who have contributed to the wrapper. +* **input**: A `mapping`_ or `sequence`_ of required inputs for the wrapper. +* **output**: A `mapping`_ or `sequence`_ of output(s) from the wrapper. +* **params** (optional): A `mapping`_ of parameters that can be used in the wrapper's ``params`` directive. If no parameters are used for the wrapper, this field can be omitted. +* **notes** (optional): Anything of note that does not fit into the scope of the other fields. + +Example +^^^^^^^ + +.. code-block:: yaml + + name: seqtk mergepe + description: Interleave two paired-end FASTA/Q files + url: https://github.com/lh3/seqtk + authors: + - Michael Hall + input: + - paired fastq files - can be compressed. + output: + - > + a single, interleaved FASTA/Q file. By default, the output will be compressed, + use the param ``compress_lvl`` to change this. + params: + compress_lvl: > + Regulate the speed of compression using the specified digit, + where 1 indicates the fastest compression method (less compression) + and 9 indicates the slowest compression method (best compression). + 0 is no compression. 11 gives a few percent better compression at a severe cost + in execution time, using the zopfli algorithm. The default is 6. + notes: Multiple threads can be used during compression of the output file with ``pigz``. + + + +.. _sequence: https://yaml.org/spec/1.2/spec.html#id2759963 +.. _mapping: https://yaml.org/spec/1.2/spec.html#id2759963 + +.. _formatting: + +Formatting +---------- + +Please ensure Python files such as ``test.py`` and ``wrapper.py`` are formatted with +|black|_. Additionally, please format your test ``Snakefile`` with |snakefmt|_. + +.. |black| replace:: ``black`` +.. _black: https://github.com/psf/black +.. |snakefmt| replace:: ``snakefmt`` +.. _snakefmt: https://github.com/snakemake/snakefmt + +.. _linting: + +Linting +------- + +Please `lint`_ your test ``Snakefile`` with:: + + snakemake -s --lint + +.. _lint: https://snakemake.readthedocs.io/en/stable/snakefiles/writing_snakefiles.html#best-practices + +Testing locally +--------------- + +If you want to debug your contribution locally (before creating a pull request), you +can install all dependencies with |mamba|_ (or |conda|_). `Install miniconda with the +channels as described for bioconda `_ and +set up an environment with the necessary dependencies and activate it:: + + mamba create -n test-snakemake-wrappers snakemake pytest conda snakefmt black + conda activate test-snakemake-wrappers + +Afterwards, from the main directory of the repo, you can run the test(s) for your +contribution by `specifying an expression `_ +that matches the name(s) of your test(s) via the ``-k`` option of ``pytest``:: + + pytest test.py -v -k your_test + + +If you also want to test the docs generation locally, create another environment +and activate it:: + + mamba create -n test-snakemake-wrapper-docs sphinx sphinx_rtd_theme pyyaml sphinx-copybutton + conda activate test-snakemake-wrapper-docs + +Then, enter the respective directory and build the docs:: + + cd docs + make html + +If it runs through, you can open the main page at ``docs/_build/html/index.html`` +in a web browser. If you want to start fresh, you can clean up the build +with ``make clean``. + + +.. |mamba| replace:: ``mamba`` +.. _mamba: https://github.com/mamba-org/mamba +.. |conda| replace:: ``conda`` +.. _conda: https://conda.io diff --git a/docs/index.rst b/docs/index.rst index 62e52b8c808..9cb6184f189 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -55,69 +55,12 @@ For the above example, the explicit GitHub URL to specify would need to be the ` "https://github.com/snakemake/snakemake-wrappers/raw/0.2.0/bio/samtools/sort" -Contribute ----------- +Contributing +------------ We invite anybody to contribute to the Snakemake Wrapper Repository. -If you want to contribute we suggest the following procedure: +If you want to contribute refer to the :ref:`contributing guide `. -#. Fork the repository: https://github.com/snakemake/snakemake-wrappers -#. Clone your fork locally. -#. Locally, create a new branch: ``git checkout -b my-new-snakemake-wrapper`` -#. Commit your contributions to that branch and push them to your fork: ``git push -u origin my-new-snakemake-wrapper`` -#. Create a pull request. - -The pull request will be reviewed and included as fast as possible. -Contributions should follow the coding style of the already present examples, i.e.: - -* provide a ``meta.yaml`` with name, description and author(s) of the wrapper -* provide an ``environment.yaml`` which lists all required software packages (the - packages should be available for installation via the - `default anaconda channels `_ or via the - `conda `_ channels - `bioconda `_ or - `conda-forge `_. - Other sustainable community maintained channels are possible as well.) -* provide a minimal test case in a subfolder called ``test``, with an example - ``Snakefile`` that shows how to use the wrapper, some minimal testing data - (also check existing wrappers for suitable data) and add an invocation of the - test in ``test.py`` -* follow the python `style guide `_, - using 4 spaces for indentation. - -Testing locally -^^^^^^^^^^^^^^^ - -If you want to debug your contribution locally, before creating a pull request, -we recommend adding your test case to the start of the list in ``test.py``, so -that it runs first. Then, `install miniconda with the channels as described for -bioconda `_ and set up an -environment with the necessary dependencies and activate it:: - - conda create -n test-snakemake-wrappers snakemake pytest conda - conda activate test-snakemake-wrappers - -Afterwards, from the main directory of the repo, you can run the tests with:: - - pytest test.py -v - -If you use a keyboard interrupt after your test has failed, you will get all -the relevant stdout and stderr messages printed. - -If you also want to test the docs generation locally, create another environment -and activate it:: - - conda create -n test-snakemake-wrapper-docs sphinx sphinx_rtd_theme pyyaml sphinx-copybutton - conda activate test-snakemake-wrapper-docs - -Then, enter the respective directory and build the docs:: - - cd docs - make html - -If it runs through, you can open the main page at ``docs/_build/html/index.html`` -in a web browser. If you want to start fresh, you can clean up the build -with ``make clean``. .. toctree:: :maxdepth: 4 @@ -127,3 +70,11 @@ with ``make clean``. wrappers meta-wrappers + + +.. toctree:: + :caption: Development + :maxdepth: 2 + :hidden: + + contributing \ No newline at end of file diff --git a/test.py b/test.py index 27beb558a9e..fde9d7085c6 100644 --- a/test.py +++ b/test.py @@ -130,6 +130,7 @@ def run(wrapper, cmd, check_log=None): os.chdir(origdir) + @skip_if_not_modified def test_rbt_csvreport(): run( @@ -137,7 +138,7 @@ def test_rbt_csvreport(): ["snakemake", "--cores", "1", "qc_data", "--use-conda", "-F"], ) - + @skip_if_not_modified def test_liftoff(): run( @@ -573,6 +574,21 @@ def test_shovill(): ) +@skip_if_not_modified +def test_seqtk_mergepe(): + run( + "bio/seqtk/mergepe", + [ + "snakemake", + "--cores", + "1", + "--use-conda", + "-F", + "a.merged.fastq.gz", + ], + ) + + @skip_if_not_modified def test_seqtk_subsample_se(): run(