07-multismash.yaml

#------------------------<  Set these for every job  >------------------------#

# Cores to use in parallel
cores: 3

# Input directory containing the data
in_dir: /scratch/ad14556/pangenome-large/results/antismash

# Input file extension (no leading period)
in_ext:                   # Leave blank for antiSMASH result folders

# Output directory to store the results
out_dir: /scratch/ad14556/pangenome-large/results/multismash

# Desired analyses - antiSMASH will always be run unless existing results are given
run_tabulation: True
run_bigscape: False


#------------<  Change these if the defaults don't match your needs  >------------#

# Flags for Snakemake are set on the command line, but you can also set them here.
snakemake_flags:
  --keep-going              # Go on with independent jobs if a job fails


## Note: The following flags are set by multiSMASH and cannot be used directly:
#     --snakefile --cores --use-conda --configfile --conda-prefix


#####  run_antismash  #####

## sequence, --output-dir, --cpus, and --logfile are set automatically
antismash_flags:
  --minimal
  #--genefinding-tool none
  #--no-abort-on-invalid-records

# If you have paired fasta/gff inputs, multiSMASH will set the --genefinding-gff3 flag.
#     Put the extension of the annotations here (e.g. gff or gff3). Basename must match the fasta!
antismash_annotation_ext: #gff3
# Should downstream steps (tabulation and/or BiG-SCAPE) run if jobs fail?
antismash_accept_failure: False
# Should multiSMASH set the --reuse-results flag? (for antiSMASH JSON inputs)
antismash_reuse_results: False


#####  run_tabulation  #####

# Should regions be counted per each individual contig rather than per assembly?
count_per_contig: False

# Should hybrids be counted separately for BGC class they contain,
#     rather than once as a separate "hybrid" BGC class?
# Caution: [True] artificially inflates total BGC counts
split_hybrids: False


#####  run_bigscape  #####

bigscape_flags:
  # --mibig
  --mix
  --no_classify 
  --include_singletons 
  --clans-off 
  --cutoffs 0.5
  ## [--inputdir], [--outputdir], [--pfam-dir] and [--cores] are set automatically

# Should the final BiG-SCAPE results be compressed?
zip_bigscape: True


#-----------<  Change these if you have a non-standard installation  >-----------#

## Only set this if antiSMASH is in a different environment from multiSMASH
antismash_conda_env_name:
antismash_command: antismash       # Or maybe `python /path/to/run_antismash.py`

# By default, a new BiG-SCAPE conda environment is automatically installed
#   the first time multiSMASH is run with the flag [run_bigscape: True].
#   If you already have a BiG-SCAPE environment that you want to use,
#   put the environment name here.
bigscape_conda_env_name:
bigscape_command:                 # Maybe "bigscape.py" for some versions
# BiG-SCAPE also requires a hmmpress'd Pfam database (Pfam-A.hmm plus .h3* files).
#   By default, multiSMASH uses antiSMASH's Pfam directory. If antiSMASH isn't installed,
#   or multiSMASH instructs you to do so, set this to the directory containing Pfam-A.hmm.
pfam_dir:                         # Relative paths are relative to THIS file!