docs, config

pirovc · Feb 11, 2024 · c2528d4 · c2528d4
1 parent 4a0044c
commit c2528d4
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 20 deletions.
diff --git a/docs/custom_databases.md b/docs/custom_databases.md
@@ -68,8 +68,8 @@ The classification max. level against this database will depend on the value set
 #### Files, taxonomy and specialization
 
 ```
-sequences.fasta  FILE_A  562  ID44444  Escherichia coli TW10119
-others.fasta     FILE_B  623  ID55555  Shigella flexneri 1a
+sequences.fasta  sequences  562  ID44444  Escherichia coli TW10119
+others.fasta     others     623  ID55555  Shigella flexneri 1a
 ```
 
 The classification max. level against this database will depend on the value set for `--level`:

diff --git a/docs/index.md b/docs/index.md
@@ -49,8 +49,10 @@ python3 -V
 # Install packages via pip or conda:
 # PIP
 python3 -m pip install "pandas>=1.2.0" "multitax>=1.3.1"
+wget --quiet --show-progress https://raw.githubusercontent.com/pirovc/genome_updater/master/genome_updater.sh && chmod +x genome_updater.sh
+
 # Conda/Mamba (alternative)
-conda install "pandas>=1.2.0" "multitax>=1.3.1"
+conda install -c bioconda -c conda-forge "pandas>=1.2.0" "multitax>=1.3.1" "genome_updater>=0.6.3"
 ```
 ### C++ dependencies
 
@@ -90,7 +92,7 @@ sudo make install  # optional
 
 ### Installing raptor
 
-The easiest way to install [raptor](https://github.com/seqan/raptor) is via conda with `conda install -c bioconda -c conda-forge "raptor>=3.0.1"` (already included in ganon install via conda).
+The easiest way to install [raptor](https://github.com/seqan/raptor) is via conda with `conda install -c bioconda -c conda-forge "raptor=3.0.1"` (already included in ganon install via conda).
 
 !!! Note
     raptor is required to build databases with the Hierarchical Interleaved Bloom Filter (`ganon build --filter-type hibf`)
@@ -131,7 +133,7 @@ ganon -h
 #### Running tests
 
 ```bash
-python3 -m pip install "parameterized>=0.9.0"
+python3 -m pip install "parameterized>=0.9.0" # Alternative: conda install -c conda-forge "parameterized>=0.9.0"
 python3 -m unittest discover -s tests/ganon/integration/
 python3 -m unittest discover -s tests/ganon/integration_online/  # optional - downloads large files
 cd build/
@@ -147,7 +149,7 @@ usage: ganon [-h] [-v]
 - - - - - - - - - -
    _  _  _  _  _   
   (_|(_|| |(_)| |  
-   _|   v. 2.0.1
+   _|   v. 2.1.0
 - - - - - - - - - -
 
 positional arguments:
@@ -271,18 +273,15 @@ required arguments:
                         Database output prefix (default: None)
 
 custom arguments:
-  -n , --input-file     Manually set information for input files: file <tab> [target <tab> node <tab> specialization
-                        <tab> specialization name]. target is the sequence identifier if --input-target sequence (file
-                        can be repeated for multiple sequences). if --input-target file and target is not set, filename
-                        is used. node is the taxonomic identifier. Mutually exclusive --input (default: None)
-  -a , --input-target   Target to use [file, sequence]. By default: 'file' if multiple input files are provided or
-                        --input-file is set, 'sequence' if a single file is provided. Using 'file' is recommended and
-                        will speed-up the building process (default: None)
-  -l , --level          Use a specialized target to build the database. By default, --level is the --input-target.
-                        Options: any available taxonomic rank [species, genus, ...] or 'leaves' (requires --taxonomy).
-                        Further specialization options [assembly, custom]. assembly will retrieve and use the assembly
-                        accession and name. custom requires and uses the specialization field in the --input-file.
-                        (default: None)
+  -n , --input-file     Tab-separated file with all necessary file/sequence information. Fields: file [<tab> target
+                        <tab> node <tab> specialization <tab> specialization name]. For details:
+                        https://pirovc.github.io/ganon/custom_databases/. Mutually exclusive --input (default: None)
+  -a , --input-target   Target to use [file, sequence]. Parse input by file or by sequence. Using 'file' is recommended
+                        and will speed-up the building process (default: file)
+  -l , --level          Max. level to build the database. By default, --level is the --input-target. Options: any
+                        available taxonomic rank [species, genus, ...] or 'leaves' (requires --taxonomy). Further
+                        specialization options [assembly, custom]. assembly will retrieve and use the assembly accession
+                        and name. custom requires and uses the specialization field in the --input-file. (default: None)
   -m [ ...], --taxonomy-files [ ...]
                         Specific files for taxonomy - otherwise files will be downloaded (default: None)
   -z [ ...], --genome-size-files [ ...]

diff --git a/src/ganon/config.py b/src/ganon/config.py
@@ -88,7 +88,7 @@ def __init__(self, which: str=None, **kwargs):
         build_custom_required_args.add_argument("-c", "--input-recursive", action="store_true",                             help="Look for files recursively in folder(s) provided with --input")
 
         build_custom_args = build_custom_parser.add_argument_group("custom arguments")
-        build_custom_args.add_argument("-n", "--input-file",        type=file_exists,                 metavar="", help="Manually set information for input files: file <tab> [target <tab> node <tab> specialization <tab> specialization name]. target is the sequence identifier if --input-target sequence (file can be repeated for multiple sequences). if --input-target file and target is not set, filename is used. node is the taxonomic identifier. Mutually exclusive --input")
+        build_custom_args.add_argument("-n", "--input-file",        type=file_exists,                 metavar="", help="Tab-separated file with all necessary file/sequence information. Fields: file [<tab> target <tab> node <tab> specialization <tab> specialization name]. For details: https://pirovc.github.io/ganon/custom_databases/. Mutually exclusive --input")
         build_custom_args.add_argument("-a", "--input-target",      type=str,         default="file", metavar="", help="Target to use [file, sequence]. Parse input by file or by sequence. Using 'file' is recommended and will speed-up the building process", choices=self.choices_input_target)
         build_custom_args.add_argument("-l", "--level",             type=str,                         metavar="", help="Max. level to build the database. By default, --level is the --input-target. Options: any available taxonomic rank [species, genus, ...] or 'leaves' (requires --taxonomy). Further specialization options [" + ", ".join(self.choices_level) + "]. assembly will retrieve and use the assembly accession and name. custom requires and uses the specialization field in the --input-file.")
         build_custom_args.add_argument("-m", "--taxonomy-files",    type=file_exists, nargs="*",      metavar="", help="Specific files for taxonomy - otherwise files will be downloaded")

diff --git a/tests/ganon/integration/test_classify.py b/tests/ganon/integration/test_classify.py
@@ -144,7 +144,6 @@ def test_multiple_matches_em(self):
         params["rel_cutoff"] = 0.001
         params["rel_filter"] = 1
 
-        print(params)
         # Build config from params
         cfg = Config("classify", **params)
         # Run