diff --git a/.github/workflows/docker-pipeline.yml b/.github/workflows/docker-pipeline.yml
index f39ae26..2475791 100644
--- a/.github/workflows/docker-pipeline.yml
+++ b/.github/workflows/docker-pipeline.yml
@@ -2,14 +2,19 @@ name: Build and push docker image for pipeline
 
 on:
   push:
-    tags: [ '*.*.*' ]
+    branches:
+      - "*"
+    tags: 
+      - "*.*.*"
+  schedule:
+    - cron: '0 6 15 * *'  # 15th of each month at 6am
 
 env:
   REGISTRY: ghcr.io
   IMAGE_NAME: ${{ github.repository }}
 
 jobs:
-  build_pipeline_image:
+  build_and_test_pipeline_image:
     runs-on: ubuntu-latest
 
     steps:
@@ -29,9 +34,31 @@ jobs:
       with:
         images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
 
+    - name: Build image
+      uses: docker/build-push-action@v6
+      with:
+        annotations: ${{ steps.meta.outputs.annotations }}
+        build-args: |
+          MEFETCH_EMAIL=${{ secrets.MEFETCH_EMAIL }}
+          MEFETCH_API_KEY=${{ secrets.MEFETCH_API_KEY }}
+          YA16SDB_VERSION=${{ steps.meta.outputs.version }}
+        cache-from: type=gha
+        cache-to: type=gha,mode=max
+        labels: ${{ steps.meta.outputs.labels }}
+        load: true
+        push: false
+        tags: |
+          ${{ steps.meta.outputs.tags }}
+          gha_image
+
+    - name: Test run and unnitests
+      run: docker run gha_image /bin/bash -c "scons settings=testfiles/settings.conf;python -m unittest"
+
     - name: Build and push tag
+      if: github.ref_type == 'tag'
       uses: docker/build-push-action@v6
       with:
+        annotations: ${{ steps.meta.outputs.annotations }}
         cache-from: type=gha
         cache-to: type=gha,mode=max
         labels: ${{ steps.meta.outputs.labels }}
diff --git a/Dockerfile b/Dockerfile
index b643370..0254a34 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,23 +1,20 @@
 FROM python:3.11-bookworm
-
-RUN apt-get update && apt-get upgrade -y && apt-get install --assume-yes --no-install-recommends \
-    ca-certificates git wget
-
-ADD requirements.txt /usr/local/share/ya16sdb/
-ADD bin/bootstrap.sh /usr/local/share/ya16sdb/bin/
-
+ARG MEFETCH_API_KEY MEFETCH_EMAIL YA16SDB_VERSION
+ENV \
+MEFETCH_API_KEY=${MEFETCH_API_KEY} \
+MEFETCH_EMAIL=${MEFETCH_EMAIL} \
+PIP_ROOT_USER_ACTION=ignore \
+SCONSFLAGS="--file /usr/local/share/ya16sdb/SConstruct" \
+YA16SDB_VERSION=${YA16SDB_VERSION}
+RUN apt-get update && apt-get upgrade -y && apt-get install -y ca-certificates wget
 WORKDIR /usr/local/share/ya16sdb/
-RUN ["/bin/bash", "-c", "bin/bootstrap.sh /usr/local/"]
-
-ADD .git/ /usr/local/share/ya16sdb/.git/
-ADD data/ /usr/local/share/ya16sdb/data/
-ADD bin/ /usr/local/share/ya16sdb/bin/
-ADD SConstruct ncbi.conf /usr/local/share/ya16sdb/
-
-RUN find /usr/local/share/ya16sdb/ -type f -exec chmod 644 {} \; && \
-find /usr/local/share/ya16sdb/ -type d -exec chmod 755 {} \; && \
-find /usr/local/share/ya16sdb/bin/ -type f -exec chmod 755 {} \;
-
-ENV SCONSFLAGS="--file /usr/local/share/ya16sdb/SConstruct"
-
+COPY requirements.txt SConstruct ncbi.conf ./
+COPY testfiles/ ./testfiles/
+COPY tests/ ./tests/
+COPY bin/ ./bin/
+COPY data/ ./data/
+RUN bin/bootstrap.sh /usr/local/
+RUN find . -type f -exec chmod 644 {} \; && \
+find . -type d -exec chmod 755 {} \; && \
+find ./bin/ -type f -exec chmod 755 {} \;
 CMD ["scons", "--dry-run"]
diff --git a/SConstruct b/SConstruct
index ad579ad..e56ef50 100644
--- a/SConstruct
+++ b/SConstruct
@@ -8,57 +8,32 @@ import configparser
 import csv
 import errno
 import os
-import SCons
 import sys
 import time
 import warnings
 
-from SCons.Script import ARGUMENTS, GetBuildFailures, Depends
+from SCons.Script import ARGUMENTS, Environment, GetBuildFailures, Depends
 
+this_dir = os.path.dirname((lambda x: x).__code__.co_filename)
 venv = os.environ.get('VIRTUAL_ENV')
 if not venv:
     warnings.warn('No active virtualenv detected, using system environment')
-if not os.path.exists('settings.conf'):
+settings_file = ARGUMENTS.get('settings', 'settings.conf')
+if not os.path.isfile(settings_file):
     sys.exit("Can't find settings.conf")
-
-
-class Environment(SCons.Environment.Environment):
-    def __init__(self, singularity=None, verbosity=0, **kws):
-        self.singularity = singularity
-        self.verbosity = verbosity
-        SCons.Environment.Environment.__init__(self, **kws)
-
-    def Command(self,
-                target,
-                source,
-                action,
-                singularity=None,
-                options=None,
-                **kws):
-        # if docker and self.docker:  # TODO: implement this
-        if singularity and self.singularity:
-            self.Depends(target, singularity)
-            sactions = []
-            for a in self.Flatten(action):
-                sa = self.singularity
-                sa = '{} --bind $pipeline'.format(sa)
-                if options:
-                    sa = '{} {}'.format(sa, options)
-                sa = '{} {} {}'.format(sa, singularity, a)
-                sactions.append(VirtualAction(a, sa, self.verbosity))
-            action = sactions
-        return SCons.Environment.Environment.Command(
-            self, target, source, action, **kws)
-
-
-class VirtualAction(SCons.Action.CommandAction):
-    def __init__(self, command, singularity, verbosity, **kw):
-        self.command = singularity if verbosity else command
-        SCons.Action.CommandAction.__init__(self, singularity, **kw)
-
-    def print_cmd_line(self, _, target, source, env):
-        c = env.subst(self.command, SCons.Subst.SUBST_RAW, target, source)
-        SCons.Action.CommandAction.print_cmd_line(self, c, target, source, env)
+conf = configparser.ConfigParser()
+conf.optionxform = str  # preserve key case-sensitivity
+conf.read(settings_file)
+settings = conf['DEFAULT']
+if 'ncbi_conf' in settings:
+    conf.read(settings['ncbi_conf'])
+else:
+    conf.read(os.path.join(this_dir, 'ncbi.conf'))
+true_vals = ['t', 'y', '1']
+release = ARGUMENTS.get('release', 'no').lower()[0] in true_vals
+out = os.path.join(settings['outdir'], time.strftime('%Y%m%d'))
+log = os.path.join(out, 'log.txt')
+cachedir = settings['cachedir']
 
 
 def blast_db(env, sequence_file, output_base):
@@ -69,9 +44,8 @@ def blast_db(env, sequence_file, output_base):
     blast_out = env.Command(
         target=[output_base + ext for ext in extensions],
         source=sequence_file,
-        action=('makeblastdb -dbtype nucl '
-                '-in $SOURCE -out ' + output_base),
-        singularity=settings['blast'])
+        action='makeblastdb -dbtype nucl '
+               '-in $SOURCE -out ' + output_base)
     env.Command(
         target=output_base,
         source=blast_out,
@@ -86,12 +60,10 @@ def taxonomy(fa, info, path):
     taxtable = env.Command(
         target=os.path.join(path, 'taxonomy.csv'),
         source=info,
-        action=['taxit taxtable '
-                '--seq-info $SOURCE '
-                '--out $TARGET '
-                '$tax_url'],
-        singularity=settings['taxit'],
-        options='--bind $tax_url')
+        action='taxit taxtable '
+               '--seq-info $SOURCE '
+               '--out $TARGET '
+               '$tax_url')
 
     """
     Taxtable output replacing tax_ids with taxnames
@@ -99,8 +71,7 @@ def taxonomy(fa, info, path):
     lineages = env.Command(
         target=os.path.join(path, 'lineages.csv'),
         source=[taxtable, info],
-        action='taxit lineage_table --csv-table $TARGET $SOURCES',
-        singularity=settings['taxit'])
+        action='taxit lineage_table --csv-table $TARGET $SOURCES')
 
     """
     Mothur output - https://mothur.org/wiki/Taxonomy_File
@@ -108,24 +79,13 @@ def taxonomy(fa, info, path):
     mothur = env.Command(
         target=os.path.join(path, 'lineages.txt'),
         source=[taxtable, info],
-        action='taxit lineage_table --taxonomy-table $TARGET $SOURCES',
-        singularity=settings['taxit'])
+        action='taxit lineage_table --taxonomy-table $TARGET $SOURCES')
 
     blast = blast_db(env, fa, os.path.join(path, 'blast'))
 
     return taxtable, lineages, mothur, blast
 
 
-true_vals = ['t', 'y', '1']
-release = ARGUMENTS.get('release', 'no').lower()[0] in true_vals
-test = ARGUMENTS.get('test', 'no').lower()[0] in true_vals
-absolute_dir = os.path.dirname((lambda x: x).__code__.co_filename)
-conf = configparser.SafeConfigParser()
-conf.read(['settings.conf', os.path.join(absolute_dir, 'ncbi.conf')])
-settings = conf['TEST'] if test else conf['DEFAULT']
-out = os.path.join(settings['outdir'], time.strftime('%Y%m%d'))
-cachedir = settings['cachedir']
-
 cfiles = {
     'genbank_cache': os.path.join(cachedir, 'records.gb'),
     'outliers_cache': os.path.join(cachedir, 'filter_outliers.csv'),
@@ -145,46 +105,32 @@ for k, v in cfiles.items():
             os.makedirs(d)
         open(v, 'w').close()
 
-environment_variables = dict(
-    os.environ,
-    PATH=':'.join([
-        os.path.join(absolute_dir, 'bin'),
-        (os.path.join(venv, 'bin') if venv else ''),
-        '/usr/local/bin',
-        '/usr/bin',
-        '/bin']),
-)
-
+# use default ENV and add/append values after
 env = Environment(
-    ENV=environment_variables,
-    log=os.path.join(out, 'log.txt'),
+    log=log,
     out=out,
-    pipeline=absolute_dir,
-    shell='bash',
+    pipeline=this_dir,
     tax_url=os.path.abspath(settings['taxonomy']),
-    verbosity=1,
     **settings
 )
+env.PrependENVPath('PATH', os.path.join(this_dir, 'bin'))
 
-env.EnsureSConsVersion(3, 0, 5)
-env.Decider('MD5')
+for k, v in os.environ.items():
+    if k.startswith('MEFETCH_'):
+        env['ENV'][k] = v
+env['ENV'].update(conf['ENV'])
+env['ENV']['MEFETCH_DB'] = 'nucleotide'
+env['ENV']['MEFETCH_LOG'] = log
+env['ENV']['MEFETCH_MODE'] = 'text'
 
-mefetch = ('mefetch -vv '
-           '-api-key $api_key '  # FIXME: what happens if no $api_key?
-           '-db nucleotide '
-           '-email $email '
-           '-log $log '
-           '-max-retry -1 '
-           '-mode text '
-           '-proc $nreq '
-           '-retry $retry')
+env.Decider('MD5-timestamp')
 
 classified = env.Command(
     source=None,
     target='$out/ncbi/classified.txt',
-    action=['esearch -db nucleotide -query "$classified" | ' +
-            mefetch + ' -format acc -out $TARGET'],
-    singularity=settings['eutils'])
+    action='esearch -db nucleotide -query "$classified" | '
+           'mefetch -vv -format acc -out $TARGET -reqs 3'
+    )
 
 """
 Candidatus Saccharibacteria
@@ -193,9 +139,8 @@ https://gitlab.labmed.uw.edu/molmicro/mkrefpkg/issues/36
 tm7 = env.Command(
     source=None,
     target='$out/ncbi/tm7.txt',
-    action=['esearch -db nucleotide -query "$tm7" | ' +
-            mefetch + ' -format acc -out $TARGET'],
-    singularity=settings['eutils'])
+    action='esearch -db nucleotide -query "$tm7" | '
+           'mefetch -vv -format acc -out $TARGET')
 
 """
 Check the cache for last download_date and download list of modified
@@ -211,11 +156,9 @@ else:
 modified = env.Command(
     source=None,
     target='$out/ncbi/modified.txt',
-    action=[
-        'esearch -db nucleotide -query "($classified OR $tm7) '
-        'AND $download_date[Modification Date] : 3000[Modification Date]" | ' +
-        mefetch + ' -format acc -out $TARGET'],
-    singularity=settings['eutils'])
+    action='esearch -db nucleotide -query "($classified OR $tm7) AND '
+           '$download_date[Modification Date] : 3000[Modification Date]" | '
+           'mefetch -vv -format acc -out $TARGET -reqs 3')
 
 """
 type strains records
@@ -225,9 +168,8 @@ http://www.ncbi.nlm.nih.gov/news/01-21-2014-sequence-by-type/
 types = env.Command(
     source=None,
     target='$out/ncbi/types.txt',
-    action=['esearch -db nucleotide -query "$types" | ' +
-            mefetch + ' -format acc -out $TARGET'],
-    singularity=settings['eutils'])
+    action='esearch -db nucleotide -query "$types" | '
+           'mefetch -vv -format acc -out $TARGET -reqs 3')
 
 """
 Trim accession2taxid with 16s records and update taxids
@@ -240,12 +182,10 @@ accession2taxid = env.Command(
 accession2taxid = env.Command(
     target='$out/ncbi/accession2update_taxids.csv',
     source=accession2taxid,
-    action=['taxit update_taxids '
-            '--outfile $TARGET '
-            '--unknown-action drop '
-            '$SOURCE $tax_url'],
-    singularity=settings['taxit'],
-    options='--bind $tax_url')
+    action='taxit update_taxids '
+           '--outfile $TARGET '
+           '--unknown-action drop '
+           '$SOURCE $tax_url')
 
 """
 Create a list of cached records removing:
@@ -273,18 +213,32 @@ download = env.Command(
     source=[accession2taxid, cache, settings['do_not_download']],
     action='download.py --out $TARGET $SOURCES')
 
+coordinates = env.Command(
+    target='$out/ncbi/genbank.csv',
+    source=download,
+    action='mefetch -vv '
+           '-failed $out/ncbi/ft_failed.txt '
+           '-format ft '
+           '-id $SOURCE '
+           '-retmax 10 '
+           '| ftract -feature "rrna:product:16S ribosomal RNA" -out $TARGET'
+           )
+env.Precious(coordinates)
+
 """
 download genbank records
+
+-retmax 1 when reading from -csv file
 """
 gbs = env.Command(
     target='$out/ncbi/download.gb',
-    source=download,
-    action=[mefetch + ' -format ft -id $SOURCE -retmax 1 | '
-            'ftract -feature "rrna:product:16S ribosomal RNA" '
-            '-log $log '
-            '-on-error continue '
-            '-min-length 1200 | ' +
-            mefetch + ' -csv -format gbwithparts -out $TARGET -retmax 1'])
+    source=coordinates,
+    action='mefetch -vv '
+           '-csv '
+           '-failed $out/ncbi/failed.gb '
+           '-format gb '
+           '-id $SOURCE '
+           '-out $TARGET')
 
 today = time.strftime('%d-%b-%Y')
 fa, seq_info, pubmed_info, references, refseq_info = env.Command(
@@ -322,12 +276,10 @@ fa, seq_info = env.Command(
     target=['$out/ncbi/extract_genbank/update_taxids/seqs.fasta',
             '$out/ncbi/extract_genbank/update_taxids/seq_info.csv'],
     source=[fa, seq_info],
-    action=['taxit update_taxids '
-            '--unknown-action drop '
-            '${SOURCES[1]} $tax_url | '
-            'partition_refs.py ${SOURCES[0]} - $TARGETS'],
-    singularity=settings['taxit'],
-    options='--bind $tax_url')
+    action='taxit update_taxids '
+           '--unknown-action drop '
+           '${SOURCES[1]} $tax_url | '
+           'partition_refs.py ${SOURCES[0]} - $TARGETS')
 
 """
 cmsearch new sequences against rfam model
@@ -335,9 +287,8 @@ cmsearch new sequences against rfam model
 cmsearch = env.Command(
     target='$out/ncbi/extract_genbank/update_taxids/cmsearch/table.tsv',
     source=['$pipeline/data/SSU_rRNA_bacteria.cm', fa],
-    action=('cmsearch --cpu 14 -E 0.01 --hmmonly -o /dev/null '
-            '--tblout $TARGET $SOURCES || true'),
-    singularity=settings['infernal'])
+    action='cmsearch --cpu 14 -E 0.01 --hmmonly -o /dev/null '
+           '--tblout $TARGET $SOURCES || true')
 
 """
 Fix record orientations
@@ -397,9 +348,7 @@ env.Command(
 taxtable = env.Command(
     target='$out/taxonomy.csv',
     source=seq_info,
-    action='taxit -v taxtable --seq-info $SOURCE --out $TARGET $tax_url',
-    singularity=settings['taxit'],
-    options='--bind $tax_url')
+    action='taxit -v taxtable --seq-info $SOURCE --out $TARGET $tax_url')
 
 """
 Map for WGS records without a refseq assembly accession
@@ -407,13 +356,13 @@ Map for WGS records without a refseq assembly accession
 asm = env.Command(
     target='$out/ani/assembly_summary_genbank.txt',
     source=None,
-    action=('wget '
-            '--output-document $TARGET '
-            '--quiet '
-            '--retry-on-http-error 403 '
-            '--tries 100 '
-            'https://ftp.ncbi.nlm.nih.gov/'
-            'genomes/genbank/assembly_summary_genbank.txt'))
+    action='wget '
+           '--output-document $TARGET '
+           '--quiet '
+           '--retry-on-http-error 403 '
+           '--tries 100 '
+           'https://ftp.ncbi.nlm.nih.gov/'
+           'genomes/genbank/assembly_summary_genbank.txt')
 
 """
 The ANI tax check report
@@ -421,13 +370,13 @@ The ANI tax check report
 ani = env.Command(
     target='$out/ani/ANI_report_prokaryotes.txt',
     source=None,
-    action=('wget '
-            '--output-document $TARGET '
-            '--quiet '
-            '--retry-on-http-error 403 '
-            '--tries 100 '
-            'https://ftp.ncbi.nlm.nih.gov/'
-            'genomes/ASSEMBLY_REPORTS/ANI_report_prokaryotes.txt'))
+    action='wget '
+           '--output-document $TARGET '
+           '--quiet '
+           '--retry-on-http-error 403 '
+           '--tries 100 '
+           'https://ftp.ncbi.nlm.nih.gov/'
+           'genomes/ASSEMBLY_REPORTS/ANI_report_prokaryotes.txt')
 
 """
 Create feather file and initial columns
@@ -475,14 +424,14 @@ type_fa, type_info = env.Command(
     target=['$out/dedup/1200bp/types/seqs.fasta',
             '$out/dedup/1200bp/types/seq_info.csv'],
     source=[fa, feather],
-    action=['partition_refs.py '
-            '--drop-duplicate-sequences '
-            '--is_species '
-            '--is_type '
-            '--is_valid '
-            '--min-length 1200 '
-            '--prop-ambig-cutoff 0.01 '
-            '$SOURCES $TARGETS'])
+    action='partition_refs.py '
+           '--drop-duplicate-sequences '
+           '--is_species '
+           '--is_type '
+           '--is_valid '
+           '--min-length 1200 '
+           '--prop-ambig-cutoff 0.01 '
+           '$SOURCES $TARGETS')
 
 type_tax, type_lineages, types_mothur, type_blast = taxonomy(
     type_fa, type_info, '$out/dedup/1200bp/types/')
@@ -494,14 +443,14 @@ fa, seq_info = env.Command(
     target=['$out/dedup/1200bp/named/seqs.fasta',
             '$out/dedup/1200bp/named/seq_info.csv'],
     source=[fa, feather],
-    action=('partition_refs.py '
-            '--drop-duplicate-sequences '
-            '--is_species '
-            '--is_valid '
-            '--min-length 1200 '
-            '--prop-ambig-cutoff 0.01 '
-            '--species-cap %(species_cap)s '
-            '${SOURCES[:2]} $TARGETS' % settings))
+    action='partition_refs.py '
+           '--drop-duplicate-sequences '
+           '--is_species '
+           '--is_valid '
+           '--min-length 1200 '
+           '--prop-ambig-cutoff 0.01 '
+           '--species-cap %(species_cap)s '
+           '${SOURCES[:2]} $TARGETS' % settings)
 
 named = fa
 
@@ -539,8 +488,7 @@ fa, details = env.Command(
             '--strategy cluster '
             '--threads-per-job 14 '
             '${SOURCES[:3]}',
-            'cp ${TARGETS[1]} ' + cfiles['outliers_cache']],
-    singularity=settings['deenurp'])
+            'cp ${TARGETS[1]} ' + cfiles['outliers_cache']])
 
 """
 add distance metrics to feather file
@@ -567,16 +515,15 @@ filtered_type_tax, filtered_type_lineages, filtered_type_mothur, _ = taxonomy(
 filtered_type_hits = env.Command(
     target='$out/dedup/1200bp/named/types_vsearch.tsv',
     source=[named, filtered_type_fa],
-    action=('vsearch --usearch_global ${SOURCES[0]} '
-            '--blast6out $TARGET '
-            '--db ${SOURCES[1]} '
-            '--id 0.75 '
-            '--maxaccepts 5 '
-            '--self '  # reject same sequence hits
-            '--strand plus '
-            '--threads 14 '
-            '--top_hits_only'),
-    singularity=settings['vsearch'])
+    action='vsearch --usearch_global ${SOURCES[0]} '
+           '--blast6out $TARGET '
+           '--db ${SOURCES[1]} '
+           '--id 0.75 '
+           '--maxaccepts 5 '
+           '--self '  # reject same sequence hits
+           '--strand plus '
+           '--threads 14 '
+           '--top_hits_only')
 
 """
 This output will be used in the filter plots
@@ -587,13 +534,13 @@ filtered_type_classifications = env.Command(
             filtered_type_info,
             filtered_type_tax,
             os.path.join('$pipeline', 'data/classifier_thresholds.csv')],
-    action=['classify -vv '
-            '--lineages ${SOURCES[2]} '
-            '--rank-thresholds ${SOURCES[3]} '
-            '--seq-info ${SOURCES[1]} '
-            '--starred 101 '
-            '--out $TARGET '
-            '${SOURCES[0]}'])
+    action='classify -vv '
+           '--lineages ${SOURCES[2]} '
+           '--rank-thresholds ${SOURCES[3]} '
+           '--seq-info ${SOURCES[1]} '
+           '--starred 101 '
+           '--out $TARGET '
+           '${SOURCES[0]}')
 
 """
 Adds type_classification to feather file for Dash application
@@ -610,9 +557,7 @@ expand taxids into descendants
 trusted_taxids = env.Command(
     target='$out/dedup/1200bp/named/filtered/trusted/taxids.txt',
     source=settings['trust'],
-    action='taxit get_descendants --out $TARGET $tax_url $SOURCE',
-    singularity=settings['taxit'],
-    options='--bind $tax_url')
+    action='taxit get_descendants --out $TARGET $tax_url $SOURCE')
 
 """
 expand taxids into descendants
@@ -620,9 +565,7 @@ expand taxids into descendants
 dnt_ids = env.Command(
     target='$out/dedup/1200bp/named/filtered/trusted/dnt_ids.txt',
     source=settings['do_not_trust'],
-    action='taxit get_descendants --out $TARGET $tax_url $SOURCE',
-    singularity=settings['taxit'],
-    options='--bind $tax_url')
+    action='taxit get_descendants --out $TARGET $tax_url $SOURCE')
 
 trusted = env.Command(
     target='$out/dedup/1200bp/named/filtered/trusted/trust_ids.txt',
@@ -641,17 +584,17 @@ fa, seq_info = env.Command(
     target=['$out/dedup/1200bp/named/filtered/trusted/seqs.fasta',
             '$out/dedup/1200bp/named/filtered/trusted/seq_info.csv'],
     source=[named, feather, trusted, dnt],
-    action=['partition_refs.py '
-            '--do_not_trust ${SOURCES[3]} '
-            '--drop-duplicate-sequences '
-            '--inliers '  # filter_outliers = True & is_out = False
-            '--is_species '
-            '--is_valid '
-            '--min-length 1200 '
-            '--prop-ambig-cutoff 0.01 '
-            '--species-cap %(species_cap)s '
-            '--trusted ${SOURCES[2]} '
-            '${SOURCES[:2]} $TARGETS' % settings])
+    action='partition_refs.py '
+           '--do_not_trust ${SOURCES[3]} '
+           '--drop-duplicate-sequences '
+           '--inliers '  # filter_outliers = True & is_out = False
+           '--is_species '
+           '--is_valid '
+           '--min-length 1200 '
+           '--prop-ambig-cutoff 0.01 '
+           '--species-cap %(species_cap)s '
+           '--trusted ${SOURCES[2]} '
+           '${SOURCES[:2]} $TARGETS' % settings)
 Depends([fa, seq_info], filter_outliers)
 
 taxtable, lineages, mothur, blast = taxonomy(
@@ -681,16 +624,15 @@ FIXME: use named types not trusted types
 named_type_hits = env.Command(
     target='$out/dedup/1200bp/named/trusted_vsearch.tsv',
     source=[named, fa],
-    action=('vsearch --usearch_global ${SOURCES[0]} '
-            '--blast6out $TARGET '
-            '--db ${SOURCES[1]} '
-            '--id 0.75 '
-            '--maxaccepts 5 '
-            '--self '  # reject same sequence hits
-            '--strand plus '
-            '--threads 14 '
-            '--top_hits_only'),
-    singularity=settings['deenurp'])
+    action='vsearch --usearch_global ${SOURCES[0]} '
+           '--blast6out $TARGET '
+           '--db ${SOURCES[1]} '
+           '--id 0.75 '
+           '--maxaccepts 5 '
+           '--self '  # reject same sequence hits
+           '--strand plus '
+           '--threads 14 '
+           '--top_hits_only')
 
 """
 Creates match_seqname, match_pct, match_version, match_species and
@@ -730,9 +672,8 @@ git version used to generate output
 """
 commit = env.Command(
     target='$out/git_version.txt',
-    source=os.path.join('$pipeline', '.git/objects'),
-    action=['(echo $$(hostname):$pipeline;'
-            'git --git-dir $pipeline/.git describe --tags --dirty) > $TARGET'])
+    source=None,
+    action='(echo $$(hostname):$pipeline;version.py) > $TARGET')
 
 
 def write_build_status():
diff --git a/bin/cmsearch.py b/bin/cmsearch.py
index f792546..f43b32b 100755
--- a/bin/cmsearch.py
+++ b/bin/cmsearch.py
@@ -39,11 +39,11 @@ def main():
     else:
         cmsearch = pandas.read_csv(
             args.cmsearch,
-            delim_whitespace=True,
             comment='#',
             dtype=dtypes,
             header=None,
             names=CMSEARCH_COLS,
+            sep='\\s+',
             usecols=dtypes.keys())
     # alignments are already sorted by bitscore quality
     cmsearch = cmsearch.drop_duplicates(subset='seqname', keep='first')
diff --git a/bin/version.py b/bin/version.py
new file mode 100755
index 0000000..c2f695e
--- /dev/null
+++ b/bin/version.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import subprocess
+import sys
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--out', type=argparse.FileType('w'), default=sys.stdout)
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+    if 'YA16SDB_VERSION' in os.environ:
+        ver = os.environ['YA16SDB_VERSION']
+    else:
+        try:
+            cmd = ['git', 'describe', '--tags', '--dirty']
+            ver = subprocess.check_output(cmd, text=True).strip()
+        except subprocess.CalledProcessError:
+            ver = ''
+    args.out.write(ver)
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/ncbi.conf b/ncbi.conf
index 133837d..fb67ff1 100644
--- a/ncbi.conf
+++ b/ncbi.conf
@@ -5,10 +5,3 @@
 classified=%(16s)s NOT(environmental samples[Organism] OR unclassified Bacteria[Organism])
 tm7=%(16s)s AND Candidatus Saccharibacteria[Organism]
 types=%(16s)s AND sequence_from_type[Filter]
-
-[TEST]
-classified=%(16s)s AND (%(test_ids)s)
-test_ids=txid104100[Organism] OR txid198480[Organism] OR txid333297[Organism] OR txid487838[Organism] OR txid1978400[Organism] OR txid749509[Organism] OR txid1138917[Organism] OR txid1140002[Organism] OR txid393538[Organism] OR txid1676683[Organism] OR txid1736139[Organism]
-test_tm7=txid1805375[Organism] OR txid1476577[Organism]
-tm7=%(16s)s AND %(test_tm7)s
-types=%(16s)s AND sequence_from_type[Filter] AND (%(test_ids)s OR %(test_tm7)s)
diff --git a/settings-example.conf b/settings-example.conf
index ed8f702..4d6f060 100644
--- a/settings-example.conf
+++ b/settings-example.conf
@@ -2,46 +2,31 @@
 outdir=output
 cachedir=%(outdir)s/.cache
 
-# sequences searching and downloading
-# api_key=
-email=
-nreq=3
-retry=10000
-
 # sequence sorting and maximum representative capping
 sort_by=is_type,is_published,is_refseq,ambig_count,modified_date,download_date,seqhash
 species_cap=5000
 
-path=
-
-# sequence selection filter lists
-do_not_download=%(path)s/lists/do_not_download.txt
-do_not_trust=%(path)s/lists/do_not_trust.txt
-trust=%(path)s/lists/trust.txt
+# optional sequence selection filter lists
+do_not_download=path/to/do_not_download.txt
+do_not_trust=path/to/do_not_trust.txt
+trust=path/to/trust.txt
 
 # taxonomy
-accession2taxid=%(path)s/ncbi/taxonomy/LATEST/accession2taxid.gz
-taxdmp=%(path)s/ncbi/taxonomy/LATEST/taxdmp.zip
-taxonomy=%(path)s/credentials/postgresql-ncbi_taxonomy-taxonomy_user.conf
-
-# virtual container runtime commands
-# docker=/user/bin/docker run  # TODO: implement
-# singularity=/usr/local/bin/singularity run
-
-# images
-blast=%(path)s/singularity/blast-2.10.1-singularity3.6.1.img
-deenurp=%(path)s/singularity/deenurp-v0.2.7-singularity3.4.1-dist.img
-eutils=%(path)s/singularity/ncbi-edirect-15.6-singularity3.7.3.img
-infernal=%(path)s/singularity/infernal-1.1.4-singularity3.7.1.img
-taxit=%(path)s/singularity/taxtastic-0.9.1-singularity3.6.1.img
-vsearch=%(path)s/singularity/vsearch-2.13.4.img
-
-[TEST]
-testfiles=testfiles
-accession2taxid=%(testfiles)s/accession2taxid.gz
-do_not_download=%(testfiles)s/do_not_download.txt
-do_not_trust=%(testfiles)s/do_not_trust.txt
-trust=%(testfiles)s/trust.txt
-outdir=test_output
-taxdmp=%(testfiles)s/taxdmp.zip
-taxonomy=%(testfiles)s/taxonomy.conf
+accession2taxid=path/to/accession2taxid.gz
+taxdmp=path/to/taxdmp.zip
+taxonomy=path/to/taxonomy.conf
+
+# this var is for dev purposes only
+# ncbi_conf=testfiles/ncbi.conf
+
+[ENV]
+
+# optional MEFETCH_API_KEY for more mefetch -reqs
+# MEFETCH_API_KEY=
+
+# must set MEFETCH_EMAIL if not in os.environ
+# MEFETCH_EMAIL=
+
+MEFETCH_MAX_RETRY=-1
+MEFETCH_RETRY=10000
+MEFETCH_WORKERS=100
diff --git a/testfiles/ncbi.conf b/testfiles/ncbi.conf
new file mode 100644
index 0000000..2927bdc
--- /dev/null
+++ b/testfiles/ncbi.conf
@@ -0,0 +1,7 @@
+[DEFAULT]
+16s=16s[All Fields] AND rRNA[Feature Key] AND Bacteria[Organism] AND 1200 : 99999999999[Sequence Length]
+classified=%(16s)s AND (%(test_ids)s)
+test_ids=txid104100[Organism] OR txid198480[Organism] OR txid333297[Organism] OR txid487838[Organism] OR txid1978400[Organism] OR txid749509[Organism] OR txid1138917[Organism] OR txid1140002[Organism] OR txid393538[Organism] OR txid1676683[Organism] OR txid1736139[Organism]
+test_tm7=txid1805375[Organism] OR txid1476577[Organism]
+tm7=%(16s)s AND %(test_tm7)s
+types=%(16s)s AND sequence_from_type[Filter] AND (%(test_ids)s OR %(test_tm7)s)
diff --git a/testfiles/settings.conf b/testfiles/settings.conf
new file mode 100644
index 0000000..eb129d0
--- /dev/null
+++ b/testfiles/settings.conf
@@ -0,0 +1,31 @@
+[DEFAULT]
+outdir=test_output
+cachedir=%(outdir)s/.cache
+
+# sequence sorting and maximum representative capping
+sort_by=is_type,is_published,is_refseq,ambig_count,modified_date,download_date,seqhash
+species_cap=5000
+
+# optional sequence selection filter lists
+do_not_download=testfiles/do_not_download.txt
+do_not_trust=testfiles/do_not_trust.txt
+trust=testfiles/trust.txt
+
+accession2taxid=testfiles/accession2taxid.gz
+taxdmp=testfiles/taxdmp.zip
+taxonomy=testfiles/taxonomy.conf
+
+# only set this var for dev or testing purposes
+ncbi_conf=testfiles/ncbi.conf
+
+[ENV]
+
+# optional MEFETCH_API_KEY for more mefetch -reqs
+# MEFETCH_API_KEY=
+
+# must set MEFETCH_EMAIL if not in os.environ
+# MEFETCH_EMAIL=
+
+MEFETCH_MAX_RETRY=-1
+MEFETCH_RETRY=10000
+MEFETCH_WORKERS=100
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..c2443ab
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1,51 @@
+import unittest
+import os
+
+
+class TestSConsPipelineOutput(unittest.TestCase):
+    def test_pipeline_output(self):
+        output_dir = 'test_output'
+
+        output_files = [
+            'blast',
+            'lineages.csv',
+            'seqs.fasta',
+            'seq_info.csv',
+            'taxonomy.csv',
+            ]
+
+        expected = {}
+        for date_dir in os.listdir(output_dir):
+            if len(date_dir) == 8 and date_dir.isnumeric():
+                out = f'{output_dir}/{date_dir}'
+                self.assertTrue(os.path.isfile(out + '/SUCCESS'))
+                expected.update({
+                    f'{out}/dedup/1200bp/types': output_files,
+                    f'{out}/dedup/1200bp/named/': output_files,
+                    f'{out}/dedup/1200bp/named/filtered': [
+                        'blast',
+                        'taxonomy.csv',
+                        'outliers.csv',
+                        'unsorted.fasta'],
+                    f'{out}/dedup/1200bp/named/filtered/trusted': output_files,
+                    f'{out}/dedup/1200bp/named/filtered/types': output_files,
+                    })
+
+        # Verify the folder structure and contents
+        for folder_path, contents in expected.items():
+            self.assertTrue(
+                os.path.isdir(folder_path),
+                f"Folder {folder_path} does not exist")
+            actual_contents = os.listdir(folder_path)
+            for expected_file in contents:
+                self.assertIn(
+                    expected_file,
+                    actual_contents,
+                    f"{expected_file} not found in {folder_path}")
+                file = os.path.join(folder_path, expected_file)
+                self.assertTrue(os.path.isfile(file))
+                self.assertTrue(os.stat(file).st_size > 0)  # not empty
+
+
+if __name__ == '__main__':
+    unittest.main()