diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..677ea10
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,7 @@
+[report]
+show_missing = True
+omit =
+    claspy/_version.py
+
+[run]
+branch = True
diff --git a/.github/workflows/cibuild.yml b/.github/workflows/cibuild.yml
new file mode 100644
index 0000000..7c348a2
--- /dev/null
+++ b/.github/workflows/cibuild.yml
@@ -0,0 +1,26 @@
+name: CI Build
+
+on: [push]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      max-parallel: 4
+      matrix:
+        python-version: ["3.10", "3.11"]
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+        claspy_db --dest=/home/runner/work/claspy/claspy/claspy/cellosaurus.json
+    - name: Test with pytest
+      run: make test
+    - name: Style check
+      run: make style
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..06776b7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,8 @@
+.vscode/
+__pycache__/
+.coverage
+sandbox/
+claspy.egg-info/
+claspy/cellosaurus.txt
+claspy/cellosaurus.json
+.DS_Store
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000..d21b901
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1 @@
+good-names=fh,db
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..30042f6
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,36 @@
+# Change Log
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+
+## Unreleased
+
+### Changed
+- Improvements to loading profile and database objects (!9)
+- Database search is now restricted based on species inferred from markers in the query profile, not by user-specified species (!8)
+- Summary report is displayed in terminal, full report to a CSV file (!11, !12)
+
+### Fixed
+- Added names of additional valid markers present in ForenSeq but not in Cellosaurus; includes four autosomal, seven X chromosome, and 21 Y chromosome STR markers (!8)
+- Rank order of results with the same score but different numbers of shared alleles (!10)
+
+
+## [0.0.2] 2023-05-25
+
+### Fixed
+- Divide by zero bug when query and reference have no shared alleles (!6)
+- Marker name validation and standardization for human, mouse, and dog (!6)
+- Rank order of results with the same score but different numbers of shared alleles (!6)
+- Handling of string alleles, e.g. X and Y for Amelogenin (!7)
+- Smart natural (not lexicographical) sorting of alleles for display (!7)
+
+
+## [0.0.1] 2023-05-22
+
+Initial release! Includes:
+
+- `claspy_db` for downloading and formatting the Cellosaurus database
+- `claspy` for searching a profile against Cellosaurus and reporting the best results
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..866e2c3
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,34 @@
+Copyright (c) 2023, DHS; all rights reserved.
+
+This Software was prepared for the Department of Homeland Security
+(DHS) by the Battelle National Biodefense Institute, LLC (BNBI) as
+part of contract HSHQDC-15-C-00064 to manage and operate the National
+Biodefense Analysis and Countermeasures Center (NBACC), a Federally
+Funded Research and Development Center.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..a98abca
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+recursive-include claspy/tests/data/ *
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..2c9b2e9
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,23 @@
+## #===== development tasks =====#
+
+## help:        print this help message and exit
+help: Makefile
+	@sed -n 's/^## //p' Makefile
+
+## test:        run automated test suite
+test:
+	pytest --cov=claspy claspy
+
+## format:      autoformat Python and Snakemake code
+format:
+	black --line-length=99 setup.py claspy/*.py claspy/tests/*.py
+
+## style:       check code style
+style:
+	black --line-length=99 --check setup.py claspy/*.py claspy/tests/*.py
+
+## hooks:       deploy git pre-commit hooks for development
+hooks:
+	echo "set -eo pipefail" > .git/hooks/pre-commit
+	echo "make style" >> .git/hooks/pre-commit
+	chmod 755 .git/hooks/pre-commit
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f3a5928
--- /dev/null
+++ b/README.md
@@ -0,0 +1,32 @@
+# Claspy: cell line authentication with STRs in Python
+
+Documentation for Claspy is pending.
+In the mean time, see the following hints.
+
+```
+claspy_db  # Run one time to install Cellosaurus database
+claspy query.csv  # Run to find closest profile to the query in the database
+```
+
+STR profiles should be in tabular/CSV format and look something like this.
+
+```csv
+Sample,Marker,Allele1,Allele2
+sample1,CSF1PO,12,13
+sample1,D13S317,12,
+sample1,D16S539,9,11
+sample1,D18S51,12,15
+sample1,D19S433,13,15
+sample1,D21S11,29,32.2
+sample1,D2S1338,20,23
+sample1,D3S1358,16,17
+sample1,D5S818,10,11
+sample1,D7S820,10,11
+sample1,D8S1179,13,15
+sample1,FGA,18,24
+sample1,Penta D,9,
+sample1,Penta E,17,
+sample1,TH01,9,9.3
+sample1,TPOX,8,
+sample1,vWA,18,19
+```
diff --git a/claspy/__init__.py b/claspy/__init__.py
new file mode 100644
index 0000000..3d7b58a
--- /dev/null
+++ b/claspy/__init__.py
@@ -0,0 +1,18 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2023, DHS.
+# This file is part of claspy: https://github.com/bioforensics/claspy
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+
+from .markers import validate_names
+from .str_profile import Profile
+from . import db
+from .cli import main, db_main
+from ._version import get_versions
+
+__version__ = get_versions()["version"]
+del get_versions
diff --git a/claspy/_version.py b/claspy/_version.py
new file mode 100644
index 0000000..1680332
--- /dev/null
+++ b/claspy/_version.py
@@ -0,0 +1,646 @@
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.20 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:  # pylint: disable=too-few-public-methods
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = ""
+    cfg.parentdir_prefix = "yeat-"
+    cfg.versionfile_source = "yeat/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY = {}
+HANDLERS = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+
+    return decorate
+
+
+# pylint:disable=too-many-arguments,consider-using-with # noqa
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen(
+                [command] + args,
+                cwd=cwd,
+                env=env,
+                stdout=subprocess.PIPE,
+                stderr=(subprocess.PIPE if hide_stderr else None),
+            )
+            break
+        except EnvironmentError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {
+                "version": dirname[len(parentdir_prefix) :],
+                "full-revisionid": None,
+                "dirty": False,
+                "error": None,
+                "date": None,
+            }
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print(
+            "Tried directories %s but none started with prefix %s"
+            % (str(rootdirs), parentdir_prefix)
+        )
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except EnvironmentError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r"\d", r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix) :]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r"\d", r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {
+                "version": r,
+                "full-revisionid": keywords["full"].strip(),
+                "dirty": False,
+                "error": None,
+                "date": date,
+            }
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {
+        "version": "0+unknown",
+        "full-revisionid": keywords["full"].strip(),
+        "dirty": False,
+        "error": "no suitable tags",
+        "date": None,
+    }
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(
+        GITS,
+        ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix],
+        cwd=root,
+    )
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[: git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
+        if not mo:
+            # unparseable. Maybe git-describe is misbehaving?
+            pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix) :]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_pre(pieces):
+    """TAG[.post0.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += ".post0.dev%d" % pieces["distance"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {
+            "version": "unknown",
+            "full-revisionid": pieces.get("long"),
+            "dirty": None,
+            "error": pieces["error"],
+            "date": None,
+        }
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {
+        "version": rendered,
+        "full-revisionid": pieces["long"],
+        "dirty": pieces["dirty"],
+        "error": None,
+        "date": pieces.get("date"),
+    }
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split("/"):
+            root = os.path.dirname(root)
+    except NameError:
+        return {
+            "version": "0+unknown",
+            "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to find root of source tree",
+            "date": None,
+        }
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {
+        "version": "0+unknown",
+        "full-revisionid": None,
+        "dirty": None,
+        "error": "unable to compute version",
+        "date": None,
+    }
diff --git a/claspy/cli.py b/claspy/cli.py
new file mode 100644
index 0000000..05a6ba0
--- /dev/null
+++ b/claspy/cli.py
@@ -0,0 +1,131 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2023, DHS.
+# This file is part of claspy: https://github.com/bioforensics/claspy
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+
+from .db import CellosaurusDB
+from .str_profile import Profile
+from argparse import ArgumentParser
+from claspy.db import CellosaurusDB
+import claspy
+import pandas as pd
+import sys
+
+
+def main(arglist=None):
+    if arglist:
+        arglist = map(str, arglist)
+    args = get_parser().parse_args(arglist)
+    db = CellosaurusDB.load(args.db)
+    all_summaries = list()
+    all_reports = list()
+    for query in Profile.load(args.query):
+        results = db.search(
+            query,
+            algorithm=args.algorithm,
+            mode=args.mode,
+            taxid=query.taxid,
+            amel=args.amel,
+            minscore=args.min_score,
+            maxhits=args.max_hits,
+        )
+        all_summaries.append(results.summary)
+        all_reports.append(results.full_report)
+    pd.concat(all_summaries).to_markdown(sys.stdout, index=False, floatfmt=".3f")
+    print("")
+    if args.out:
+        pd.concat(all_reports).to_csv(args.out, index=False)
+        print(f"\nFull report written to {args.out}", file=sys.stderr)
+
+
+def get_parser():
+    parser = ArgumentParser(description="Claspy: cell line authentication with STRs in Python")
+    parser.add_argument("query", help="query STR profile")
+    parser.add_argument(
+        "-v", "--version", action="version", version=f"Claspy v{claspy.__version__}"
+    )
+    parser.add_argument(
+        "-d",
+        "--db",
+        metavar="PATH",
+        default=CellosaurusDB.default_path(),
+        help=f"path to Cellosaurus database; default is {CellosaurusDB.default_path()}",
+    )
+    parser.add_argument(
+        "-a",
+        "--algorithm",
+        metavar="A",
+        choices=("Tanabe", "reference", "query"),
+        default="Tanabe",
+        help="scoring algorithm; available options are Tanabe (2S/(Q+R)), query (S/Q), and reference (S/R); default is Tanabe",
+    )
+    parser.add_argument(
+        "-m",
+        "--mode",
+        metavar="M",
+        choices=("intersect", "reference", "query"),
+        default="intersect",
+        help="mode for handling missing data; available options are query (all query markers), reference (all reference markers), and intersect (only shared markers); default is intersect",
+    )
+    parser.add_argument(
+        "-s",
+        "--min-score",
+        type=float,
+        metavar="S",
+        default=0.0,
+        help="do not report candidate matches with a score < S; by default S=0 (filter disabled)",
+    )
+    parser.add_argument(
+        "-x",
+        "--max-hits",
+        type=int,
+        metavar="X",
+        default=20,
+        help="do not report more than X candidate matches; by default X=20; set X<=0 to disable this filter",
+    )
+    parser.add_argument(
+        "--amel",
+        action="store_true",
+        help="include the Amelogenin marker, if present, in scoring calculations; by default it is excluded",
+    )
+    parser.add_argument(
+        "-o",
+        "--out",
+        metavar="FILE",
+        help="write a full report in CSV format to FILE; by default only a summary report is printed to the terminal",
+    )
+    return parser
+
+
+def db_main(arglist=None):
+    args = get_db_parser().parse_args(arglist)
+    if args.path is None:
+        records = CellosaurusDB.convert_from_download()
+    else:
+        records = CellosaurusDB.convert_from_path(args.path)
+    records.to_json(args.dest)
+    print(f"Database written to {args.dest}", file=sys.stderr)
+
+
+def get_db_parser():
+    parser = ArgumentParser(
+        description="Retrieve, format, and install the Cellosaurus database for Claspy"
+    )
+    parser.add_argument(
+        "-p",
+        "--path",
+        help="install the Cellosaurus database from local file PATH rather than a remote URL",
+    )
+    parser.add_argument(
+        "-d",
+        "--dest",
+        metavar="PATH",
+        default=CellosaurusDB.default_path(),
+        help=f"destination for the Cellosaurus database in JSON format; by default PATH={CellosaurusDB.default_path()}",
+    )
+    return parser
diff --git a/claspy/db.py b/claspy/db.py
new file mode 100644
index 0000000..7d85c47
--- /dev/null
+++ b/claspy/db.py
@@ -0,0 +1,198 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2023, DHS.
+# This file is part of claspy: https://github.com/bioforensics/claspy
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+
+from .str_profile import Profile
+from .result import ProfileResult, SearchResult
+from importlib.resources import files
+import json
+from pathlib import Path
+import re
+import sys
+from tqdm import tqdm
+from urllib.request import urlretrieve
+
+
+class CellosaurusDB(list):
+    def search(
+        self,
+        query,
+        algorithm="Tanabe",
+        mode="intersect",
+        amel=False,
+        taxid=9606,
+        minscore=0.0,
+        maxhits=20,
+    ):
+        result = SearchResult(query, minscore=minscore, maxhits=maxhits)
+        for reference in self:
+            if taxid is not None and not reference.taxid_match(taxid):
+                continue
+            score, num_shared_alleles = Profile.score(
+                query, reference, algorithm=algorithm, mode=mode, amel=amel
+            )
+            proresult = ProfileResult(query._meta["sample"], score, num_shared_alleles, reference)
+            result.add_profile_result(proresult)
+        return result
+
+    @classmethod
+    def load(cls, path=None):
+        if path is None:
+            path = cls.default_path()
+        with open(path, "r") as instream:
+            return cls.from_json(instream)
+
+    @staticmethod
+    def default_path():
+        return files("claspy") / "cellosaurus.json"
+
+    @classmethod
+    def from_json(cls, instream):
+        payload = json.load(instream)
+        if not isinstance(payload, dict) and not isinstance(payload, list):
+            raise ValueError(f"unexpected data type '{type(payload)}'")
+        if isinstance(payload, dict):
+            payload = [payload]
+        records = cls()
+        for profile in payload:
+            metadata = profile["meta"]
+            alleles = profile["alleles"]
+            records.append(Profile(alleles, metadata))
+        return records
+
+    @classmethod
+    def convert_from_download(cls, url=None):
+        if url is None:
+            url = "https://ftp.expasy.org/databases/cellosaurus/cellosaurus.txt"
+        path = files("claspy") / "cellosaurus.txt"
+        with ProgressBar(unit="B", unit_scale=True, miniters=1, desc=Path(url).name) as pb:
+            urlretrieve(url, path, reporthook=pb.update_to)
+        return cls.convert_from_path(path)
+
+    @classmethod
+    def convert_from_path(cls, path=None):
+        profiles = cls()
+        with open(path, "r") as instream:
+            parser = cls.parse_cellosaurus_records(instream)
+            for n, profile in enumerate(parser):
+                profiles.append(profile)
+        print(f"[CellosaurusDB] parsed {n+1} distinct cell line STR profiles", file=sys.stderr)
+        return profiles
+
+    @staticmethod
+    def parse_cellosaurus_records(instream):
+        parser = CellosaurusDB.parse_cellosaurus_into_blocks(instream)
+        for n, block in enumerate(parser):
+            entry = CellosaurusEntry(block)
+            for alleles, meta in entry.profiles:
+                yield Profile(alleles, meta)
+        print(f"[CellosaurusDB] parsed {n+1} database records", file=sys.stderr)
+
+    @staticmethod
+    def parse_cellosaurus_into_blocks(instream):
+        block = list()
+        for line in instream:
+            if line.startswith("ID"):
+                break
+        block.append(line.strip())
+        for line in instream:
+            line = line.strip()
+            if line == "//":
+                yield block
+                block = list()
+            block.append(line)
+
+    def to_json(self, output):
+        if isinstance(output, str) or isinstance(output, Path):
+            with open(output, "w") as outstream:
+                json.dump([profile.payload for profile in self], outstream, indent=4)
+        else:
+            json.dump([profile.payload for profile in self], output, indent=4)
+
+
+class CellosaurusEntry:
+    ATTRIBUTES = {
+        "ID": "identifier",
+        "AC": "accession",
+        "SY": "synonyms",
+    }
+
+    def __init__(self, data):
+        self._data = data
+        self.meta = dict()
+        self.alleles = dict()
+        for line in data:
+            self.parse_meta(line)
+            self.parse_sources(line)
+            self.parse_alleles(line)
+
+    def parse_meta(self, line):
+        if line.startswith(("ID", "AC", "SY")):
+            key, value = re.split(r"\s+", line, 1)
+            assert key not in self.meta, key
+            self.meta[self.ATTRIBUTES[key]] = value
+        elif line.startswith("OX"):
+            match = re.match(r"OX   NCBI_TaxID=(\d+); ! ([^\n]+)", line)
+            if not match:
+                raise ValueError(f"cannot parse species of origin: {line}")
+            taxid, organism = match.groups()
+            if "taxid" not in self.meta:
+                self.meta["taxid"] = list()
+                self.meta["organism"] = list()
+            self.meta["taxid"].append(int(taxid))
+            self.meta["organism"].append(organism)
+
+    def parse_sources(self, line):
+        if line.startswith("ST") and "Source" in line:
+            match = re.match(r"ST   Source\(s\): ([^\n]+)", line)
+            if not match:
+                raise ValueError(f"could not parse sources: {line}")
+            source_string = match.group(1)
+            for source in source_string.split("; "):
+                self.alleles[source] = dict()
+
+    def parse_alleles(self, line):
+        if line.startswith("ST") and "Source" not in line and "Not_detected" not in line:
+            match = re.match(r"^ST   ([^:]+): ([\dXY,\. ]+)(.+)?", line)
+            if not match:
+                raise ValueError(f"could not parse STR profile data: {line}")
+            marker, allele_str, sources = match.groups()
+            if sources is None:
+                for marker_alleles in self.alleles.values():
+                    marker_alleles[marker] = allele_str.strip()
+            else:
+                sources = sources.replace("(", "").replace(")", "")
+                for source in sources.split("; "):
+                    if source not in self.alleles:
+                        print(
+                            "[CellosaurusDB] WARNING:",
+                            f"Source '{source}' not defined for cell line {self.meta['identifier']}",
+                            file=sys.stderr,
+                        )
+                    else:
+                        self.alleles[source][marker] = allele_str.strip()
+
+    @property
+    def profiles(self):
+        for source, marker_alleles in self.alleles.items():
+            metadata = dict(self.meta)
+            if len(metadata["taxid"]) == 1:
+                metadata["taxid"] = metadata["taxid"][0]
+                metadata["organism"] = metadata["organism"][0]
+            metadata["source"] = source
+            yield marker_alleles, metadata
+
+
+class ProgressBar(tqdm):
+    """Stolen shamelessly from https://stackoverflow.com/a/53877507/459780."""
+
+    def update_to(self, b=1, bsize=1, tsize=None):
+        if tsize is not None:
+            self.total = tsize
+        self.update(b * bsize - self.n)
diff --git a/claspy/markers.py b/claspy/markers.py
new file mode 100644
index 0000000..ab5763d
--- /dev/null
+++ b/claspy/markers.py
@@ -0,0 +1,158 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2023, DHS.
+# This file is part of claspy: https://github.com/bioforensics/claspy
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+
+
+valid_names = {
+    9606: [
+        "Amelogenin",
+        "CSF1PO",
+        "D10S1248",
+        "D12S391",
+        "D13S317",
+        "D16S539",
+        "D17S1301",
+        "D18S51",
+        "D19S433",
+        "D1S1656",
+        "D20S482",
+        "D21S11",
+        "D22S1045",
+        "D2S1338",
+        "D2S441",
+        "D3S1358",
+        "D4S2408",
+        "D5S818",
+        "D6S1043",
+        "D7S820",
+        "D8S1179",
+        "D9S1122",
+        "DXS10074",
+        "DXS101",
+        "DXS10103",
+        "DXS10135",
+        "DXS7132",
+        "DXS7423",
+        "DXS8378",
+        "DYF387S1",
+        "DYS19",
+        "DYS385a-b",
+        "DYS389I",
+        "DYS389II",
+        "DYS390",
+        "DYS391",
+        "DYS391",
+        "DYS392",
+        "DYS437",
+        "DYS438",
+        "DYS439",
+        "DYS448",
+        "DYS460",
+        "DYS481",
+        "DYS505",
+        "DYS522",
+        "DYS533",
+        "DYS549",
+        "DYS570",
+        "DYS570",
+        "DYS576",
+        "DYS576",
+        "DYS612",
+        "DYS635",
+        "DYS643",
+        "F13A01",
+        "F13B",
+        "FESFPS",
+        "FGA",
+        "HPRTB",
+        "LPL",
+        "Penta C",
+        "Penta D",
+        "Penta E",
+        "SE33",
+        "TH01",
+        "TPOX",
+        "Y-GATA-H4",
+        "vWA",
+    ],
+    10090: [
+        "Mouse STR 1-1",
+        "Mouse STR 1-2",
+        "Mouse STR 2-1",
+        "Mouse STR 3-2",
+        "Mouse STR 4-2",
+        "Mouse STR 5-5",
+        "Mouse STR 6-4",
+        "Mouse STR 6-7",
+        "Mouse STR 7-1",
+        "Mouse STR 8-1",
+        "Mouse STR 9-2",
+        "Mouse STR 11-2",
+        "Mouse STR 12-1",
+        "Mouse STR 13-1",
+        "Mouse STR 15-3",
+        "Mouse STR 17-2",
+        "Mouse STR 18-3",
+        "Mouse STR 19-2",
+        "Mouse STR X-1",
+    ],
+    9615: [
+        "Dog FHC2010",
+        "Dog FHC2054",
+        "Dog FHC2079",
+        "Dog PEZ1",
+        "Dog PEZ3",
+        "Dog PEZ5",
+        "Dog PEZ6",
+        "Dog PEZ8",
+        "Dog PEZ12",
+        "Dog PEZ20",
+    ],
+}
+
+species_by_taxid = {
+    9606: "human",
+    10090: "mouse",
+    9615: "dog",
+}
+
+
+def validate_names(marker_names):
+    """Validate marker names
+
+    For each given marker name, determine the standardized form. Determine the species associated
+    with this list of marker names. Raise an exception if any marker name cannot be validated, or
+    if the list contains marker names from multiple species.
+    """
+    taxids = set()
+    valid = dict()
+    for name in marker_names:
+        valid[name], taxid = standardize_name(name)
+        taxids.add(taxid)
+    if None in taxids:
+        invalid = [name for name, valid_name in valid.items() if valid_name is None]
+        invalid = ", ".join(invalid)
+        raise ValueError(f"invalid marker name(s): {invalid}")
+    if len(taxids) > 1:
+        species = sorted([species_by_taxid[taxid] for taxid in taxids])
+        species = ", ".join(species)
+        message = f"list of marker names includes markers from different species: {species}"
+        raise ValueError(message)
+    taxid = taxids.pop()
+    return valid, taxid
+
+
+def standardize_name(name):
+    candidate = name.replace(" ", "").lower()
+    for taxid, species_names in valid_names.items():
+        for species_name in species_names:
+            species_candidate = species_name.replace(" ", "").lower()
+            if candidate == species_candidate:
+                return species_name, taxid
+    return None, None
diff --git a/claspy/result.py b/claspy/result.py
new file mode 100644
index 0000000..c8c841d
--- /dev/null
+++ b/claspy/result.py
@@ -0,0 +1,152 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2023, DHS.
+# This file is part of claspy: https://github.com/bioforensics/claspy
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+
+from collections import defaultdict, namedtuple
+import pandas as pd
+
+
+class SearchResult:
+    """Result for database search of a single query profile
+
+    The SearchResult includes a score for every profile of the relevant species in the database.
+    Distinct profiles for the same cell line are stored in a single CellLineResult object,
+    accessible by that cell line's identifier.
+    """
+
+    def __init__(self, query, minscore=0.0, maxhits=20):
+        self.query = query
+        self.minscore = minscore
+        self.maxhits = maxhits
+        self.results_by_cell_line = defaultdict(CellLineResult)
+
+    def add_profile_result(self, result):
+        self.results_by_cell_line[result.reference.identifier].append(result)
+
+    @property
+    def summary(self):
+        colnames = ["Sample", "CellLine", "Score", "SharedAlleles", "Source"]
+        summary = pd.DataFrame([result.summary for result in self], columns=colnames)
+        return summary
+
+    @property
+    def full_report(self):
+        entries = list()
+        markers = self.all_markers
+        entry = (
+            self.query._meta["sample"],
+            self.query._meta["sample"],
+            "query",
+            pd.NA,
+            pd.NA,
+            pd.NA,
+            *self.query.marker_alleles(markers),
+        )
+        entries.append(entry)
+        for result in self:
+            for entry in result.full_report(markers):
+                entries.append(entry)
+        colnames = ["Sample", "CellLine", "Status", "Score", "SharedAlleles", "Source"] + markers
+        return pd.DataFrame(entries, columns=colnames)
+
+    def __iter__(self):
+        for n, identifier in enumerate(self.ids_by_score):
+            if self.maxhits > 0 and n >= self.maxhits:
+                return
+            result = self.results_by_cell_line[identifier]
+            if result.top_score < self.minscore:
+                return
+            yield result
+
+    @property
+    def ids_by_score(self):
+        sorted_results = sorted(
+            self.results_by_cell_line.values(),
+            key=lambda result: (result.top_score, result.top_score_shared_alleles),
+            reverse=True,
+        )
+        for result in sorted_results:
+            yield result.identifier
+
+    @property
+    def all_markers(self):
+        """Determine all markers to report
+
+        This includes any marker for which allele data is present in the query or at least one of
+        the database profiles to be included in the final full report.
+        """
+        markers = set()
+        for marker, allele in self.query.alleles():
+            markers.add(marker)
+        for result in self:
+            for subresult in result:
+                for marker, allele in subresult.reference.alleles():
+                    markers.add(marker)
+        return sorted(markers)
+
+
+class CellLineResult(list):
+    """A list of query search scores and database profiles from the same cell line
+
+    This class is a list of ProfileResult objects, and essentially provides some convenience
+    functions for handling one or more scored profiles for a cell line from a database search.
+    """
+
+    @property
+    def top_score(self):
+        return max([single_result.score for single_result in self])
+
+    @property
+    def top_score_shared_alleles(self):
+        return max(
+            [
+                single_result.shared_alleles
+                for single_result in self
+                if single_result.score == self.top_score
+            ]
+        )
+
+    @property
+    def identifier(self):
+        ids = [single_result.reference.identifier for single_result in self]
+        assert len(set(ids)) == 1
+        return ids[0]
+
+    @property
+    def sample(self):
+        samples = [single_result.sample for single_result in self]
+        assert len(set(samples)) == 1
+        return samples[0]
+
+    @property
+    def summary(self):
+        results = sorted(self, reverse=True)
+        best = results[0]
+        best = (best.score, best.shared_alleles, best.reference.source)
+        return self.sample, self.identifier, *best
+
+    def full_report(self, markers):
+        results = sorted(self, reverse=True)
+        best = results[0]
+        status = "best" if len(results) > 1 else "only"
+        yield self.sample, self.identifier, status, *best.full_report(markers)
+        if len(results) > 1:
+            worst = results[-1]
+            yield self.sample, self.identifier, "worst", *worst.full_report(markers)
+
+
+class ProfileResult(namedtuple("ProfileResult", "sample score shared_alleles reference")):
+    """Score from comparing a query profile to a single database reference profile"""
+
+    @property
+    def summary(self):
+        return self.score, self.shared_alleles, self.reference.source
+
+    def full_report(self, markers):
+        return (*self.summary, *self.reference.marker_alleles(markers))
diff --git a/claspy/str_profile.py b/claspy/str_profile.py
new file mode 100644
index 0000000..77ab6a8
--- /dev/null
+++ b/claspy/str_profile.py
@@ -0,0 +1,227 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2023, DHS.
+# This file is part of claspy: https://github.com/bioforensics/claspy
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+
+from .markers import validate_names
+import pandas as pd
+import re
+
+
+class Profile:
+    """Class for handling STR profiles
+
+    Includes methods for loading, saving, and scoring genetic profiles based on short tandem repeat
+    (STR) markers. Each profile contains a set of STR alleles. In mammalian cell lines, these are
+    expected to be diploid; that is, there should be at most observed two alleles for each marker.
+    """
+
+    def __init__(self, alleles, meta):
+        self._meta = meta
+        valid_names, taxid = validate_names(alleles.keys())
+        self.taxid = taxid
+        self._alleles = dict()
+        for marker, marker_alleles in alleles.items():
+            marker = valid_names[marker]
+            marker_alleles = Profile.parse_allele_string(marker_alleles)
+            self._alleles[marker] = marker_alleles
+
+    @classmethod
+    def load(cls, path):
+        types = {f"Allele{i+1}": str for i in range(10)}
+        data = pd.read_csv(path, sep=None, engine="python", dtype=types)
+        for column in ("Sample", "Marker", "Allele1"):
+            if column not in data.columns:
+                raise ValueError(f"expected column '{column}' missing")
+        for sample_name, sample_data in data.groupby("Sample"):
+            numalleles = Profile.num_alleles_from_table(sample_data)
+            metadata = {"sample": sample_data.Sample.iloc[0]}
+            alleles = dict()
+            for i, row in sample_data.iterrows():
+                marker_alleles = list()
+                for n in range(numalleles):
+                    allele = row[f"Allele{n+1}"]
+                    if not pd.isna(allele):
+                        marker_alleles.append(allele)
+                alleles[row.Marker] = ",".join(sorted(marker_alleles))
+            yield Profile(alleles, metadata)
+
+    @staticmethod
+    def num_alleles_from_table(table):
+        count = 1
+        for column in table:
+            if column.startswith("Allele"):
+                try:
+                    number = int(column[6:])
+                except ValueError as verr:
+                    raise ValueError(f"invalid table header '{column}'") from verr
+                if number > count:
+                    count = number
+        if count > 10:
+            raise ValueError(f"found {count} allele columns, well above expected limit")
+        return count
+
+    @staticmethod
+    def parse_allele_string(alleles):
+        return set(alleles.replace(" ", "").split(","))
+
+    @staticmethod
+    def allele_repr(allele_set):
+        alleles = [Profile.allele_transform(a) for a in allele_set]
+        alleles = sorted(alleles)
+        alleles = [str(a) for a in alleles]
+        return ",".join(alleles)
+
+    @staticmethod
+    def allele_transform(allele):
+        if "." in allele:
+            return float(allele)
+        elif re.match(r"^\d+$", allele):
+            return int(allele)
+        elif allele in ("X", "Y"):
+            return allele
+        else:
+            raise ValueError(f"unexpected allele '{allele}'")
+
+    @property
+    def table(self):
+        sample = self._meta["sample"] if "sample" in self._meta else "sample"
+        alleles = list()
+        for marker, marker_alleles in self._alleles.items():
+            allele_repr = Profile.allele_repr(marker_alleles)
+            sorted_alleles = allele_repr.split(",")
+            entry = [sample, marker, *sorted_alleles]
+            while len(entry) < self.max_num_alleles + 2:
+                entry.append(None)
+            alleles.append(entry)
+        colnames = ["Sample", "Marker"] + [f"Allele{i+1}" for i in range(self.max_num_alleles)]
+        return pd.DataFrame(alleles, columns=colnames)
+
+    @property
+    def max_num_alleles(self):
+        return max([len(allele_set) for allele_set in self._alleles.values()])
+
+    def marker_alleles(self, markers):
+        for marker in markers:
+            if marker not in self._alleles:
+                yield pd.NA
+            else:
+                yield Profile.allele_repr(self._alleles[marker])
+
+    def __iter__(self):
+        for marker, alleles in self._alleles.items():
+            for allele in sorted(alleles):
+                yield marker, allele
+
+    def __len__(self):
+        return len([allele for allele in self])
+
+    @staticmethod
+    def score(query, reference, algorithm="Tanabe", mode="intersect", amel=False):
+        """Compute a similarity score between two profiles
+
+        The score is based on the number of alleles shared between the query profile and the
+        reference profile. Three scoring algorithms are implemented as described below: "Tanabe" is
+        the default (Q=# query alleles, R=# reference alleles, S=# shared alleles).
+
+        - "Tanabe": 2S / (Q+R)
+        - "query": S / Q
+        - "reference: R / Q
+
+        There are also three modes for handling missing allele data in one or both profiles: the
+        "intersect" mode is used by default.
+
+        - "intersect": consider alleles only at markers present in both profiles
+        - "query": consider alleles for markers present in the query profile, even if missing from
+          the reference profile
+        - "reference": consider alleles for markers present in the reference profile, even if
+          missing from the query profile
+
+        The Amelogenin marker (amel) is used for sex determination and is typically excluded from
+        scoring. Set `amel=True` to include.
+
+        The Tanabe algorithm is described in doi:10.11418/jtca1981.18.4_329, while the query and
+        reference algorithms are described in doi:10.1073/pnas.121616198
+        """
+        if algorithm not in ("Tanabe", "query", "reference"):
+            raise ValueError(f"unsupported scoring algorithm '{algorithm}'")
+        if mode not in ("intersect", "query", "reference"):
+            raise ValueError(f"unsupported scoring mode '{mode}'")
+        markers = Profile.markers_for_scoring(query, reference, mode=mode, amel=amel)
+        query_alleles = len(query.alleles(markers=markers))
+        refr_alleles = len(reference.alleles(markers=markers))
+        shared_alleles = len(query.alleles(markers=markers) & reference.alleles(markers=markers))
+        score = 0.0
+        if shared_alleles > 0:
+            if algorithm == "Tanabe":
+                score = (2 * shared_alleles) / (query_alleles + refr_alleles)
+            elif algorithm == "query":
+                score = shared_alleles / query_alleles
+            else:
+                score = shared_alleles / refr_alleles
+        return score, shared_alleles
+
+    @staticmethod
+    def markers_for_scoring(query, reference, mode="intersect", amel=False):
+        if mode == "intersect":
+            markers = query.markers & reference.markers
+        elif mode == "query":
+            markers = query.markers
+        else:
+            markers = reference.markers
+        if not amel:
+            markers = [m for m in markers if m != "Amelogenin"]
+        return markers
+
+    @property
+    def markers(self):
+        return set(self._alleles.keys())
+
+    def alleles(self, markers=None):
+        if markers is None:
+            markers = self.markers
+        return {(marker, allele) for marker, allele in self if marker in markers}
+
+    def __str__(self):
+        return self.table.to_csv(index=False)
+
+    def taxid_match(self, taxid):
+        if isinstance(self._meta["taxid"], list):
+            for testid in self._meta["taxid"]:
+                if int(testid) == int(taxid):
+                    return True
+            return False
+        else:
+            return int(self._meta["taxid"]) == int(taxid)
+
+    @property
+    def identifier(self):
+        return self._meta["identifier"]
+
+    @property
+    def source(self):
+        return self._meta["source"]
+
+    @property
+    def payload(self):
+        return {"meta": self._meta, "alleles": self.allele_dict}
+
+    @property
+    def allele_dict(self):
+        return {marker: ",".join(sorted(alleles)) for marker, alleles in self._alleles.items()}
+
+    @property
+    def slug(self):
+        return self.identifier, self._meta["accession"], self.source
+
+    def __lt__(self, other):
+        return self.slug < other.slug
+
+    @property
+    def payload(self):
+        return {"meta": self._meta, "alleles": self.allele_dict}
diff --git a/claspy/tests/__init__.py b/claspy/tests/__init__.py
new file mode 100644
index 0000000..dc66b71
--- /dev/null
+++ b/claspy/tests/__init__.py
@@ -0,0 +1,15 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2023, DHS.
+# This file is part of claspy: https://github.com/bioforensics/claspy
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+
+from importlib.resources import files
+
+
+def data_file(path):
+    return files("claspy") / "tests" / "data" / path
diff --git a/claspy/tests/data/db-cvcl-1085.csv b/claspy/tests/data/db-cvcl-1085.csv
new file mode 100644
index 0000000..55d1b18
--- /dev/null
+++ b/claspy/tests/data/db-cvcl-1085.csv
@@ -0,0 +1,18 @@
+Sample,Marker,Allele1,Allele2,Allele3
+refr1,CSF1PO,12,13,
+refr1,D13S317,12,,
+refr1,D16S539,9,11,
+refr1,D18S51,12,15,
+refr1,D19S433,13,15,
+refr1,D21S11,29,32.2,
+refr1,D2S1338,20,23,
+refr1,D3S1358,16,17,
+refr1,D5S818,10,11,
+refr1,D7S820,10,11,
+refr1,D8S1179,13,14,15
+refr1,FGA,18,24,
+refr1,Penta D,9,,
+refr1,Penta E,10,17,
+refr1,TH01,9,9.3,
+refr1,TPOX,8,,
+refr1,vWA,18,19,
diff --git a/claspy/tests/data/examples.json b/claspy/tests/data/examples.json
new file mode 100644
index 0000000..049300b
--- /dev/null
+++ b/claspy/tests/data/examples.json
@@ -0,0 +1,32 @@
+[
+    {
+        "meta": {
+            "description": "Example 1"
+        },
+        "alleles": {
+            "CSF1PO": "13,14",
+            "D5S818": "13",
+            "D7S820": "8",
+            "D13S317": "12",
+            "FGA": "24",
+            "TH01": "8",
+            "TPOX": "11",
+            "vWA": "16"
+        }
+    },
+    {
+        "meta": {
+            "description": "Example 2"
+        },
+        "alleles": {
+            "CSF1PO": "13",
+            "D5S818": "13, 14",
+            "D7S820": "8, 19",
+            "D13S317": "11, 12",
+            "FGA": "24",
+            "TH01": "8",
+            "TPOX": "11",
+            "vWA": "15"
+        }
+    }
+]
diff --git a/claspy/tests/data/mock-cvcl-1085.csv b/claspy/tests/data/mock-cvcl-1085.csv
new file mode 100644
index 0000000..5e7dc4f
--- /dev/null
+++ b/claspy/tests/data/mock-cvcl-1085.csv
@@ -0,0 +1,18 @@
+Sample,Marker,Allele1,Allele2
+mock0,CSF1PO,12,13
+mock0,D13S317,12,
+mock0,D16S539,9,11
+mock0,D18S51,12,15
+mock0,D19S433,13,15
+mock0,D21S11,29,32.2
+mock0,D2S1338,20,23
+mock0,D3S1358,16,17
+mock0,D5S818,10,11
+mock0,D7S820,10,11
+mock0,D8S1179,13,15
+mock0,FGA,18,24
+mock0,Penta D,9,
+mock0,Penta E,17,
+mock0,TH01,9,9.3
+mock0,TPOX,8,
+mock0,vWA,18,19
diff --git a/claspy/tests/data/mock-cvcl-1085.json b/claspy/tests/data/mock-cvcl-1085.json
new file mode 100644
index 0000000..e228af2
--- /dev/null
+++ b/claspy/tests/data/mock-cvcl-1085.json
@@ -0,0 +1,24 @@
+{
+    "meta": {
+        "id": 0
+    },
+    "alleles": {
+        "CSF1PO": "12,13",
+        "D13S317": "12",
+        "D16S539": "9,11",
+        "D18S51": "12,15",
+        "D19S433": "13,15",
+        "D21S11": "29,32.2",
+        "D2S1338": "20,23",
+        "D3S1358": "16,17",
+        "D5S818": "10,11",
+        "D7S820": "10,11",
+        "D8S1179": "13,15",
+        "FGA": "18,24",
+        "Penta D": "9",
+        "Penta E": "17",
+        "TH01": "9,9.3",
+        "TPOX": "8",
+        "vWA": "18,19"
+    }
+}
diff --git a/claspy/tests/data/mock-sk-hep-1-2samples.csv b/claspy/tests/data/mock-sk-hep-1-2samples.csv
new file mode 100644
index 0000000..cb67524
--- /dev/null
+++ b/claspy/tests/data/mock-sk-hep-1-2samples.csv
@@ -0,0 +1,37 @@
+Sample,Marker,Allele1,Allele2
+mock_1,Amelogenin,X,
+mock_1,CSF1PO,11,12
+mock_1,D12S391,18,
+mock_1,D13S317,8,12
+mock_1,D16S539,12,
+mock_1,D18S51,13,15
+mock_1,D21S11,29,31
+mock_1,D2S1338,20,23
+mock_1,D3S1358,16,
+mock_1,D5S818,10,13
+mock_1,D6S1043,11,
+mock_1,D7S820,8,11
+mock_1,D8S1179,14
+mock_1,FGA,17,
+mock_1,Penta D,13,14
+mock_1,TH01,7,9
+mock_1,TPOX,9,
+mock_1,vWA,14,17
+mock_2,Amelogenin,X,
+mock_2,CSF1PO,11,12
+mock_2,D12S391,18,
+mock_2,D13S317,8,12
+mock_2,D16S539,12,
+mock_2,D18S51,13,15
+mock_2,D21S11,29,31
+mock_2,D2S1338,20,23
+mock_2,D3S1358,16,
+mock_2,D5S818,10,13
+mock_2,D6S1043,11,
+mock_2,D7S820,8,11
+mock_2,D8S1179,14
+mock_2,FGA,17,
+mock_2,Penta D,13,14
+mock_2,TH01,7,9
+mock_2,TPOX,9,
+mock_2,vWA,14,17
\ No newline at end of file
diff --git a/claspy/tests/data/mock-sk-hep-1.csv b/claspy/tests/data/mock-sk-hep-1.csv
new file mode 100644
index 0000000..3b9efab
--- /dev/null
+++ b/claspy/tests/data/mock-sk-hep-1.csv
@@ -0,0 +1,19 @@
+Sample,Marker,Allele1,Allele2
+mock,Amelogenin,X,
+mock,CSF1PO,11,12
+mock,D12S391,18,
+mock,D13S317,8,12
+mock,D16S539,12,
+mock,D18S51,13,15
+mock,D21S11,29,31
+mock,D2S1338,20,23
+mock,D3S1358,16,
+mock,D5S818,10,13
+mock,D6S1043,11,
+mock,D7S820,8,11
+mock,D8S1179,14
+mock,FGA,17,
+mock,Penta D,13,14
+mock,TH01,7,9
+mock,TPOX,9,
+mock,vWA,14,17
diff --git a/claspy/tests/data/query-bad-allele-1.csv b/claspy/tests/data/query-bad-allele-1.csv
new file mode 100644
index 0000000..89b2aa6
--- /dev/null
+++ b/claspy/tests/data/query-bad-allele-1.csv
@@ -0,0 +1,4 @@
+Sample,Marker,AlleleOne,AlleleTwo
+query,Amelogenin,X,Y
+query,CSF1PO,7.3,13
+query,D13S317,8,12
diff --git a/claspy/tests/data/query-bad-allele-2.csv b/claspy/tests/data/query-bad-allele-2.csv
new file mode 100644
index 0000000..9911cb2
--- /dev/null
+++ b/claspy/tests/data/query-bad-allele-2.csv
@@ -0,0 +1,4 @@
+Sample,Marker,Allele1,AlleleTwo
+query,Amelogenin,X,Y
+query,CSF1PO,7.3,13
+query,D13S317,8,12
diff --git a/claspy/tests/data/query-multi-sample.csv b/claspy/tests/data/query-multi-sample.csv
new file mode 100644
index 0000000..9e56aeb
--- /dev/null
+++ b/claspy/tests/data/query-multi-sample.csv
@@ -0,0 +1,7 @@
+Sample,Marker,Allele1,Allele2
+query1,Amelogenin,X,Y
+query1,CSF1PO,7.3,13
+query1,D13S317,8,12
+query2,Amelogenin,X,Y
+query2,CSF1PO,7.3,13
+query2,D13S317,8,12
diff --git a/claspy/tests/data/query-wide.csv b/claspy/tests/data/query-wide.csv
new file mode 100644
index 0000000..d1f0630
--- /dev/null
+++ b/claspy/tests/data/query-wide.csv
@@ -0,0 +1,4 @@
+Sample,Marker,Allele1,Allele2,Allele3,Allele4,Allele5,Allele6,Allele7,Allele8,Allele9,Allele10,Allele11
+query,Amelogenin,X,Y,,,,,,,,,
+query,CSF1PO,6,7,7.3,8.3,9,9.3,10,10.1,10.3,11,14
+query,D13S317,8,12,,,,,,,,,
diff --git a/claspy/tests/data/report-cvcl-1085.csv b/claspy/tests/data/report-cvcl-1085.csv
new file mode 100644
index 0000000..4bec2b4
--- /dev/null
+++ b/claspy/tests/data/report-cvcl-1085.csv
@@ -0,0 +1,23 @@
+Sample,CellLine,Status,Score,SharedAlleles,Source,Amelogenin,CSF1PO,D13S317,D16S539,D18S51,D19S433,D21S11,D2S1338,D3S1358,D5S818,D7S820,D8S1179,FGA,Penta D,Penta E,TH01,TPOX,vWA
+mock0,mock0,query,,,,,"12,13",12,"9,11","12,15","13,15","29,32.2","20,23","16,17","10,11","10,11","13,15","18,24",9,17,"9,9.3",8,"18,19"
+mock0,BHT-101,best,0.9655172413793104,28,PubMed=18713817,X,,12,"9,11","12,15","13,15","29,32.2","20,23","16,17","10,11","10,11","13,14,15","18,24",9,"10,17","9,9.3",8,"18,19"
+mock0,BHT-101,worst,0.9310344827586207,27,DSMZ,X,12,12,"9,11","12,15","13,15","29,32.2","20,23","16,17","10,11","10,11",15,"18,24",9,"10,17","9,9.3",8,19
+mock0,FGH,only,0.7586206896551724,11,BCRJ,X,"10,12",12,"9,11",,,,,,"10,11","10,11",,,,,"7,9","8,11","17,18"
+mock0,KR-12,only,0.75,12,ATCC,"X,Y","10,12","11,12","9,11",,,,,,"10,11,13","9,10",,,,,"8,9,9.3",8,"16,18,19"
+mock0,CCD-1059Sk,only,0.7407407407407407,10,ATCC,X,"11,12",12,"9,11",,,,,,11,"11,12",,,,,"9,9.3",8,"16,18"
+mock0,AML14.3D10/CCCKR3 Clone 16,only,0.7142857142857143,10,ATCC,"X,Y",13,12,"9,12",,,,,,"10,11","10,14",,,,,"9,9.3","9,11","18,19"
+mock0,CCD-1127Sk,only,0.7142857142857143,10,ATCC,"X,Y","12,13",12,"10,11",,,,,,11,"8,10",,,,,"9,9.3","8,10","17,18"
+mock0,CG0161,only,0.7142857142857143,10,BCRC,X,"12,13",12,"9,11",,,,,,"9,11","8,11",,,,,9,"8,11","14,18"
+mock0,340-RPE-11tv,only,0.6923076923076923,9,ATCC,X,"12,14","11,12",11,,,,,,11,"10,11",,,,,9,8,"17,18"
+mock0,CCD-944Sk,only,0.6923076923076923,9,ATCC,X,12,12,12,,,,,,"10,11","7,10",,,,,"9,9.3",8,"16,18"
+mock0,HuNS1,only,0.6923076923076923,9,ATCC,"X,Y","10,12",12,11,,,,,,"10,12",10,,,,,"9,9.3",8,"16,19"
+mock0,HE31,only,0.6896551724137931,10,RCB,"X,Y","10,12",12,"9,11",,,,,,"10,11","8,10",,,,,"6,9","9,11","18,19"
+mock0,OX-CO-3,only,0.6896551724137931,10,PubMed=25926053,X,12,"11,12",9,,,28,,,11,"10,11",,,,,"9,9.3",8,"16,18"
+mock0,SS23,only,0.6896551724137931,10,JCRB,X,"12,13","8,12","10,11",,,,,,"10,11","10,11",,,,,"7,9","8,11",14
+mock0,TE 159.T,only,0.6896551724137931,10,ATCC,"X,Y","12,13","12,14","9,11",,,,,,"9,11","10,11",,,,,"6,9.3","8,11",17
+mock0,Yub632,only,0.6896551724137931,10,JCRB,"X,Y",12,"8,10","9,11",,,,,,"10,11","8,10",,,,,"6,9","7,8","18,19"
+mock0,NCC-DMM1-C1,only,0.6666666666666666,10,PubMed=35069873,"X,Y",12,12,"9,12",,,"29,32.2",,,"9,11",8,,,,,9,"8,11","18,19"
+mock0,AIDHC-NMC8,only,0.6666666666666666,9,PubMed=28284873,"X,Y",11,"11,12","11,12",,,,,,11,"10,11",,,,,9.3,"8,9","18,19"
+mock0,CA,only,0.6666666666666666,9,TKG,X,12,"10,12",11,,,,,,"11,15",11,,,,,"6,9","8,11","18,19"
+mock0,CCD-1074Sk,only,0.6666666666666666,9,ATCC,X,"11,12",9,11,,,,,,"10,11","10,11",,,,,"9,9.3",8,"16,17"
+mock0,CLC18,only,0.6666666666666666,9,PubMed=31378681,X,12,"10,11","9,11",,,,,,"10,11","11,12",,,,,9,8,"16,18"
diff --git a/claspy/tests/data/skhep1-db.json b/claspy/tests/data/skhep1-db.json
new file mode 100644
index 0000000..86150f7
--- /dev/null
+++ b/claspy/tests/data/skhep1-db.json
@@ -0,0 +1,611 @@
+[
+    {
+        "meta": {
+            "identifier": "SK-HEP-1",
+            "accession": "CVCL_0525",
+            "synonyms": "SK-Hep-1; SK HEP-1; SK HEP 01; SK-Hep1; Sk-Hep1; SK Hep1; SKHEP-1; SKHEP1; SKHep1; SK_HEP1",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "ATCC"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D21S11": "29,31",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta D": "13,14",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1",
+            "accession": "CVCL_0525",
+            "synonyms": "SK-Hep-1; SK HEP-1; SK HEP 01; SK-Hep1; Sk-Hep1; SK Hep1; SKHEP-1; SKHEP1; SKHep1; SK_HEP1",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "CCRID"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12,15.2",
+            "D21S11": "29,31",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta D": "13,14",
+            "Penta E": "13,21",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1",
+            "accession": "CVCL_0525",
+            "synonyms": "SK-Hep-1; SK HEP-1; SK HEP 01; SK-Hep1; Sk-Hep1; SK Hep1; SKHEP-1; SKHEP1; SKHep1; SK_HEP1",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "CLS"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12,15.2",
+            "D21S11": "29,31",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta D": "13,14",
+            "Penta E": "13,21",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1",
+            "accession": "CVCL_0525",
+            "synonyms": "SK-Hep-1; SK HEP-1; SK HEP 01; SK-Hep1; Sk-Hep1; SK Hep1; SKHEP-1; SKHEP1; SKHep1; SK_HEP1",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "Cosmic-CLP"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D21S11": "29,31",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta D": "13,14",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1",
+            "accession": "CVCL_0525",
+            "synonyms": "SK-Hep-1; SK HEP-1; SK HEP 01; SK-Hep1; Sk-Hep1; SK Hep1; SKHEP-1; SKHEP1; SKHep1; SK_HEP1",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "DSMZ"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12",
+            "D21S11": "29,31",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta D": "13,14",
+            "Penta E": "13,21",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1",
+            "accession": "CVCL_0525",
+            "synonyms": "SK-Hep-1; SK HEP-1; SK HEP 01; SK-Hep1; Sk-Hep1; SK Hep1; SKHEP-1; SKHEP1; SKHep1; SK_HEP1",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "ECACC"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D21S11": "29,31",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta D": "13,14",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1",
+            "accession": "CVCL_0525",
+            "synonyms": "SK-Hep-1; SK HEP-1; SK HEP 01; SK-Hep1; Sk-Hep1; SK Hep1; SKHEP-1; SKHEP1; SKHep1; SK_HEP1",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "KCLB"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D21S11": "29,31",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta D": "13,14",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1",
+            "accession": "CVCL_0525",
+            "synonyms": "SK-Hep-1; SK HEP-1; SK HEP 01; SK-Hep1; Sk-Hep1; SK Hep1; SKHEP-1; SKHEP1; SKHep1; SK_HEP1",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "MSKCC"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12,15.2",
+            "D21S11": "29,31",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta D": "13,14",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1",
+            "accession": "CVCL_0525",
+            "synonyms": "SK-Hep-1; SK HEP-1; SK HEP 01; SK-Hep1; Sk-Hep1; SK Hep1; SKHEP-1; SKHEP1; SKHep1; SK_HEP1",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "PubMed=11416159"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D21S11": "29,31",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta D": "13,14",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1",
+            "accession": "CVCL_0525",
+            "synonyms": "SK-Hep-1; SK HEP-1; SK HEP 01; SK-Hep1; Sk-Hep1; SK Hep1; SKHEP-1; SKHEP1; SKHep1; SK_HEP1",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "PubMed=25877200"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12,15.2",
+            "D21S11": "29,31",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta D": "13,14",
+            "Penta E": "13",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1-Cas9-726",
+            "accession": "CVCL_XD84",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "CCRID"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12,15.2",
+            "D21S11": "29,31,32",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta E": "13,21",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1-Cas9-727",
+            "accession": "CVCL_XD85",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "CCRID"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12,15.2",
+            "D21S11": "29,31",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta E": "13,21",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1-Cas9-728",
+            "accession": "CVCL_XD86",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "CCRID"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12,15.2",
+            "D21S11": "29,31,32",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta E": "13,21",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1-Cas9-729",
+            "accession": "CVCL_XD87",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "CCRID"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12,15.2",
+            "D21S11": "29,31,32",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta E": "13,21",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1-Cas9-730",
+            "accession": "CVCL_XD88",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "CCRID"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12,15.2",
+            "D21S11": "29,31,32",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta E": "13,21",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1-Cas9-731",
+            "accession": "CVCL_XD89",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "CCRID"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12,15.2",
+            "D21S11": "29,31,32",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta E": "13,21",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1-Cas9-732",
+            "accession": "CVCL_XD90",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "CCRID"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12,15.2",
+            "D21S11": "29,31,32",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta E": "13,21",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1-Cas9-733",
+            "accession": "CVCL_XD91",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "CCRID"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12,15.2",
+            "D21S11": "29,31,32",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta E": "13,21",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1-Cas9-734",
+            "accession": "CVCL_XD92",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "CCRID"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12,15.2",
+            "D21S11": "29,31,32",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta E": "13,21",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SK-HEP-1-Cas9-735",
+            "accession": "CVCL_XD93",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "CCRID"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "11,12",
+            "D12S391": "18",
+            "D13S317": "12,8",
+            "D16S539": "12",
+            "D18S51": "13,15",
+            "D19S433": "12,15.2",
+            "D21S11": "29,31,32",
+            "D2S1338": "20,23",
+            "D3S1358": "16",
+            "D5S818": "10,13",
+            "D6S1043": "11",
+            "D7S820": "11,8",
+            "D8S1179": "13,14",
+            "FGA": "17",
+            "Penta E": "13,21",
+            "TH01": "7,9",
+            "TPOX": "9",
+            "vWA": "14,17"
+        }
+    }
+]
\ No newline at end of file
diff --git a/claspy/tests/data/snu-db.json b/claspy/tests/data/snu-db.json
new file mode 100644
index 0000000..0975578
--- /dev/null
+++ b/claspy/tests/data/snu-db.json
@@ -0,0 +1,96 @@
+[
+    {
+        "meta": {
+            "identifier": "SNU-1033-1",
+            "accession": "CVCL_5002",
+            "synonyms": "SNU1033",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "Genomics_Center_BCF_Technion"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "10",
+            "D10S1248": "13,15",
+            "D12S391": "18,21",
+            "D13S317": "10",
+            "D16S539": "9",
+            "D18S51": "13",
+            "D19S433": "13,14",
+            "D1S1656": "13",
+            "D21S11": "29,30",
+            "D22S1045": "17",
+            "D2S1338": "18,26",
+            "D2S441": "10,12",
+            "D3S1358": "17",
+            "D5S818": "13",
+            "D7S820": "11,12",
+            "D8S1179": "12,13",
+            "FGA": "23",
+            "Penta D": "11,12",
+            "Penta E": "11",
+            "TH01": "9",
+            "TPOX": "11",
+            "vWA": "17,19"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SNU-1033-2",
+            "accession": "CVCL_5002",
+            "synonyms": "SNU1033",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "PubMed=25926053"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "D10S1248": "13,15",
+            "D12S391": "18,21",
+            "D13S317": "10",
+            "D16S539": "9",
+            "D18S51": "13",
+            "D19S433": "13,14",
+            "D1S1656": "13",
+            "D21S11": "29,30",
+            "D22S1045": "17",
+            "D2S1338": "18,26",
+            "D2S441": "10,12",
+            "D5S818": "13",
+            "D7S820": "11,12",
+            "TH01": "9",
+            "TPOX": "11"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "SNU-1033-3",
+            "accession": "CVCL_5002",
+            "synonyms": "SNU1033",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "KCLB"
+        },
+        "alleles": {
+            "Amelogenin": "X",
+            "CSF1PO": "10",
+            "D10S1248": "13,15",
+            "D12S391": "18,21",
+            "D13S317": "10",
+            "D19S433": "13,14",
+            "D1S1656": "13",
+            "D21S11": "29,30",
+            "D22S1045": "17",
+            "D2S1338": "18,26",
+            "D2S441": "10,12",
+            "D5S818": "13",
+            "D8S1179": "12,13",
+            "FGA": "23",
+            "Penta D": "11,12",
+            "Penta E": "11",
+            "TH01": "9",
+            "TPOX": "11",
+            "vWA": "17,19"
+        }
+    }
+]
diff --git a/claspy/tests/data/snu-query.csv b/claspy/tests/data/snu-query.csv
new file mode 100644
index 0000000..fe9353f
--- /dev/null
+++ b/claspy/tests/data/snu-query.csv
@@ -0,0 +1,22 @@
+Sample,Marker,Allele1,Allele2
+sample,Amelogenin,X,
+sample,CSF1PO,10,
+sample,D10S1248,13,15
+sample,D12S391,18,21
+sample,D13S317,10,
+sample,D16S539,9,
+sample,D18S51,13,
+sample,D19S433,13,14
+sample,D1S1656,13,
+sample,D21S11,29,30
+sample,D22S1045,17,
+sample,D2S1338,18,26
+sample,D2S441,10,12
+sample,D5S818,13,
+sample,D8S1179,12,13
+sample,FGA,23,
+sample,Penta D,11,12
+sample,Penta E,11,
+sample,TH01,9,
+sample,TPOX,11,
+sample,vWA,17,19
diff --git a/claspy/tests/data/upci-scc-077-db.json b/claspy/tests/data/upci-scc-077-db.json
new file mode 100644
index 0000000..512cb9a
--- /dev/null
+++ b/claspy/tests/data/upci-scc-077-db.json
@@ -0,0 +1,54 @@
+[
+    {
+        "meta": {
+            "sample": "query"
+        },
+        "alleles": {
+            "Amelogenin": "X,Y",
+            "CSF1PO": "10",
+            "D13S317": "11",
+            "D16S539": "11,13",
+            "D19S433": "12,14",
+            "D21S11": "27,29",
+            "D3S1358": "14",
+            "D5S818": "9",
+            "D7S820": "7",
+            "D8S1179": "14,15",
+            "FGA": "21",
+            "Penta D": "13",
+            "Penta E": "13,7",
+            "TH01": "9.3",
+            "TPOX": "11",
+            "vWA": "17,18"
+        }
+    },
+    {
+        "meta": {
+            "identifier": "UPCI-SCC-077",
+            "accession": "CVCL_C043",
+            "synonyms": "UPCI:SCC077; SCC077",
+            "taxid": 9606,
+            "organism": "Homo sapiens (Human)",
+            "source": "DSMZ"
+        },
+        "alleles": {
+            "Amelogenin": "X,Y",
+            "CSF1PO": "10",
+            "D16S539": "11,13",
+            "D18S51": "13",
+            "D19S433": "12,14",
+            "D21S11": "27,29",
+            "D2S1338": "17",
+            "D3S1358": "14",
+            "D5S818": "9",
+            "D7S820": "8",
+            "D8S1179": "14,15",
+            "FGA": "21",
+            "Penta D": "13",
+            "Penta E": "13,7",
+            "TH01": "9.3",
+            "TPOX": "11,12",
+            "vWA": "17,18"
+        }
+    }
+]
diff --git a/claspy/tests/test_cli.py b/claspy/tests/test_cli.py
new file mode 100644
index 0000000..73f6211
--- /dev/null
+++ b/claspy/tests/test_cli.py
@@ -0,0 +1,24 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2023, DHS.
+# This file is part of claspy: https://github.com/bioforensics/claspy
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+
+import claspy
+from claspy.tests import data_file
+import pytest
+
+
+def test_search_report_sorting(tmp_path):
+    report = tmp_path / "report.csv"
+    arglist = [data_file("mock-cvcl-1085.csv"), "--out", report]
+    claspy.cli.main(arglist=arglist)
+    assert report.is_file()
+    with open(report, "r") as fh1, open(data_file("report-cvcl-1085.csv"), "r") as fh2:
+        observed = fh1.read().strip()
+        expected = fh2.read().strip()
+        assert observed == expected
diff --git a/claspy/tests/test_db.py b/claspy/tests/test_db.py
new file mode 100644
index 0000000..7b2beae
--- /dev/null
+++ b/claspy/tests/test_db.py
@@ -0,0 +1,37 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2023, DHS.
+# This file is part of claspy: https://github.com/bioforensics/claspy
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+from claspy.db import CellosaurusDB
+from claspy import Profile
+from claspy.tests import data_file
+from io import StringIO
+import pytest
+
+
+def test_search_report_sorting():
+    db = CellosaurusDB.load(data_file("snu-db.json"))
+    query = next(Profile.load(data_file("snu-query.csv")))
+    report = db.search(query).full_report
+    print(report.to_string())
+    assert report.CellLine.to_list() == ["sample", "SNU-1033-1", "SNU-1033-3", "SNU-1033-2"]
+    for score in report.Score[1:]:
+        assert score == pytest.approx(1.0)
+    assert report.SharedAlleles[1:].to_list() == [29, 27, 20]
+
+
+def test_db_round_trip(tmp_path):
+    db = CellosaurusDB.load()
+    db1 = CellosaurusDB([profile for profile in db if "SK-HEP-1" in profile.identifier])
+    db1.to_json(tmp_path / "db.json")
+    db2 = CellosaurusDB.load(tmp_path / "db.json")
+    assert len(db1) == len(db2)
+    json1, json2 = StringIO(), StringIO()
+    db1.to_json(json1)
+    db2.to_json(json2)
+    assert json1.getvalue() == json2.getvalue()
diff --git a/claspy/tests/test_markers.py b/claspy/tests/test_markers.py
new file mode 100644
index 0000000..f3dfbc6
--- /dev/null
+++ b/claspy/tests/test_markers.py
@@ -0,0 +1,54 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2023, DHS.
+# This file is part of claspy: https://github.com/bioforensics/claspy
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+
+from claspy.markers import validate_names
+import pytest
+
+
+@pytest.mark.parametrize(
+    "input_names,expected_names,expected_taxid",
+    [
+        (
+            ("PENTAD", "AmeLoGenIN", "D21S11"),
+            {"PENTAD": "Penta D", "AmeLoGenIN": "Amelogenin", "D21S11": "D21S11"},
+            9606,
+        ),
+        (
+            ("d8s1179", "PentaE", "Se33", "Tpox"),
+            {"d8s1179": "D8S1179", "PentaE": "Penta E", "Se33": "SE33", "Tpox": "TPOX"},
+            9606,
+        ),
+        (
+            ("CSF1PO", "fgA", "D3S1358"),
+            {"CSF1PO": "CSF1PO", "fgA": "FGA", "D3S1358": "D3S1358"},
+            9606,
+        ),
+        (
+            (" mousestr1-2 ", "MouseSTR8-1"),
+            {" mousestr1-2 ": "Mouse STR 1-2", "MouseSTR8-1": "Mouse STR 8-1"},
+            10090,
+        ),
+    ],
+)
+def test_validate_names_basic(input_names, expected_names, expected_taxid):
+    observed_names, observed_taxid = validate_names(input_names)
+    assert observed_names == expected_names
+    assert observed_taxid == expected_taxid
+
+
+def test_validate_names_invalid_marker():
+    with pytest.raises(ValueError, match=r"invalid marker name\(s\): Penta G"):
+        validate_names(("CSF1PO", "Penta G", "D2S1338"))
+
+
+def test_validate_names_mixed_species():
+    message = r"list of marker names includes markers from different species: dog, human"
+    with pytest.raises(ValueError, match=message):
+        validate_names(("vWA", "DogPEZ8"))
diff --git a/claspy/tests/test_profile.py b/claspy/tests/test_profile.py
new file mode 100644
index 0000000..4791122
--- /dev/null
+++ b/claspy/tests/test_profile.py
@@ -0,0 +1,164 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2023, DHS.
+# This file is part of claspy: https://github.com/bioforensics/claspy
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+
+from claspy import Profile
+from claspy.db import CellosaurusDB
+from claspy.tests import data_file
+import pytest
+
+
+def test_profile_basic(capsys):
+    alleles = {
+        "CSF1PO": "13,14",
+        "D5S818": "13",
+        "D7S820": "8",
+        "D13S317": "12",
+        "FGA": "24",
+        "TH01": "8",
+        "TPOX": "11",
+        "vWA": "16",
+    }
+    meta = {"sample": "sample1"}
+    profile = Profile(alleles, meta)
+    assert len(profile) == 9
+    assert next(iter(profile)) == ("CSF1PO", "13")
+    assert profile.taxid == 9606
+    profile._meta["taxid"] = [9606, 10116]
+    assert profile.taxid_match(9606) is True
+    assert profile.taxid_match(10116) is True
+    assert profile.taxid_match(10090) is False
+    score, num_shared_alleles = Profile.score(profile, profile)
+    assert score == pytest.approx(1.0)
+    assert num_shared_alleles == 9
+    print(profile)
+    terminal = capsys.readouterr()
+    observed = terminal.out
+    expected = """
+Sample,Marker,Allele1,Allele2
+sample1,CSF1PO,13,14
+sample1,D5S818,13,
+sample1,D7S820,8,
+sample1,D13S317,12,
+sample1,FGA,24,
+sample1,TH01,8,
+sample1,TPOX,11,
+sample1,vWA,16,
+"""
+    assert observed.strip() == expected.strip()
+
+
+@pytest.mark.parametrize(
+    "path,message",
+    [
+        ("query-wide.csv", r"found 11 allele columns, well above expected limit"),
+        ("query-bad-allele-1.csv", r"expected column 'Allele1' missing"),
+        ("query-bad-allele-2.csv", r"invalid table header 'AlleleTwo'"),
+    ],
+)
+def test_load_failure_modes(path, message):
+    with pytest.raises(ValueError, match=message):
+        next(Profile.load(data_file(path)))
+
+
+@pytest.mark.parametrize(
+    "input,expected",
+    [
+        ("7", {"7"}),
+        ("7,8", {"7", "8"}),
+        ("7, 8", {"7", "8"}),
+        ("10.2,13", {"10.2", "13"}),
+        ("11.1", {"11.1"}),
+    ],
+)
+def test_parse_allele_string(input, expected):
+    assert Profile.parse_allele_string(input) == expected
+
+
+@pytest.mark.parametrize(
+    "input,expected",
+    [
+        ({"13", "7"}, "7,13"),
+        ({"13"}, "13"),
+        ({"19", "21", "9"}, "9,19,21"),
+    ],
+)
+def test_allele_representation_is_sorted(input, expected):
+    assert Profile.allele_repr(input) == expected
+
+
+def test_allele_transform_failure_mode():
+    with pytest.raises(ValueError, match=r"unexpected allele 'Z'"):
+        Profile.allele_transform("Z")
+
+
+@pytest.mark.parametrize(
+    "algorithm,mode,amel,exp_alleles,exp_score",
+    [
+        ("Tanabe", "intersect", True, 21, 0.933333),
+        ("Tanabe", "intersect", False, 19, 0.9268293),
+        ("Tanabe", "query", False, 19, 0.904762),
+        ("Tanabe", "reference", False, 19, 0.883721),
+        ("query", "intersect", False, 19, 0.95),
+        ("reference", "intersect", False, 19, 0.904762),
+    ],
+)
+def test_score_basic(algorithm, mode, amel, exp_alleles, exp_score):
+    profiles = CellosaurusDB.load(path=data_file("upci-scc-077-db.json"))
+    assert len(profiles) == 2
+    query, reference = profiles
+    score, shared_alleles = Profile.score(
+        query, reference, algorithm=algorithm, mode=mode, amel=amel
+    )
+    assert score == pytest.approx(exp_score)
+    assert shared_alleles == exp_alleles
+
+
+@pytest.mark.parametrize(
+    "algorithm,mode,message",
+    [
+        ("Tanabe", "lizard", r"unsupported scoring mode 'lizard'"),
+        ("AI", "intersect", r"unsupported scoring algorithm 'AI'"),
+    ],
+)
+def test_score_failure_modes(algorithm, mode, message):
+    query = next(Profile.load(data_file("mock-cvcl-1085.csv")))
+    reference = next(Profile.load(data_file("db-cvcl-1085.csv")))
+    with pytest.raises(ValueError, match=message):
+        Profile.score(query, reference, algorithm=algorithm, mode=mode)
+
+
+def test_parse_and_score():
+    profiles = CellosaurusDB.load(path=data_file("examples.json"))
+    assert len(profiles) == 2
+    score, num_shared_alleles = Profile.score(profiles[0], profiles[1])
+    assert score == pytest.approx(0.7)
+    assert num_shared_alleles == 7
+
+
+def test_score():
+    query = next(Profile.load(data_file("mock-cvcl-1085.csv")))
+    reference = next(Profile.load(data_file("db-cvcl-1085.csv")))
+    score, num_shared_alleles = Profile.score(query, reference)
+    assert score == pytest.approx(0.9677, abs=1e-4)
+    assert num_shared_alleles == 30
+
+
+@pytest.mark.parametrize(
+    "allele_set,expected",
+    [
+        ({"9", "13"}, "9,13"),
+        ({"3", "21"}, "3,21"),
+        ({"7"}, "7"),
+        ({"Y", "X"}, "X,Y"),
+        ({"9.3", "13"}, "9.3,13"),
+    ],
+)
+def test_allele_repr(allele_set, expected):
+    assert Profile.allele_repr(allele_set) == expected
diff --git a/claspy/tests/test_result.py b/claspy/tests/test_result.py
new file mode 100644
index 0000000..ca26cf0
--- /dev/null
+++ b/claspy/tests/test_result.py
@@ -0,0 +1,109 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2023, DHS.
+# This file is part of claspy: https://github.com/bioforensics/claspy
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+from claspy.db import CellosaurusDB
+from claspy.str_profile import Profile
+from claspy.tests import data_file
+from io import StringIO
+import pandas as pd
+import pytest
+
+
+@pytest.fixture(scope="session")
+def skhep_result():
+    db = CellosaurusDB.load(data_file("skhep1-db.json"))
+    query = next(Profile.load(data_file("mock-sk-hep-1.csv")))
+    return db.search(query, maxhits=5, minscore=0.9)
+
+
+@pytest.fixture(scope="session")
+def skhep_result_multi_sample():
+    db = CellosaurusDB.load(data_file("skhep1-db.json"))
+    all_results = list()
+    for query in Profile.load(data_file("mock-sk-hep-1-2samples.csv")):
+        all_results.append(db.search(query, maxhits=3, minscore=0.9))
+    return all_results
+
+
+def test_search_result_basic(skhep_result):
+    assert skhep_result.maxhits == 5
+    assert skhep_result.minscore == pytest.approx(0.9)
+    assert len(skhep_result.results_by_cell_line) == 11
+
+
+def test_search_result_summary(skhep_result):
+    observed = skhep_result.summary
+    assert len(observed) == 5
+    exp_data = StringIO(
+        """
+Sample,CellLine,Score,SharedAlleles,Source
+mock,SK-HEP-1,0.981818,27,PubMed=25877200
+mock,SK-HEP-1-Cas9-727,0.980392,25,CCRID
+mock,SK-HEP-1-Cas9-726,0.961538,25,CCRID
+mock,SK-HEP-1-Cas9-728,0.961538,25,CCRID
+mock,SK-HEP-1-Cas9-729,0.961538,25,CCRID"""
+    )
+    expected = pd.read_csv(exp_data)
+    pd.testing.assert_frame_equal(observed, expected, check_exact=False, rtol=1e-6)
+
+
+def test_search_result_full_report(skhep_result):
+    observed = skhep_result.full_report
+    assert len(observed) == 7
+    observed = observed.to_csv(sep=";", index=False)
+    expected = """
+Sample;CellLine;Status;Score;SharedAlleles;Source;Amelogenin;CSF1PO;D12S391;D13S317;D16S539;D18S51;D19S433;D21S11;D2S1338;D3S1358;D5S818;D6S1043;D7S820;D8S1179;FGA;Penta D;Penta E;TH01;TPOX;vWA
+mock;mock;query;;;;X;11,12;18;8,12;12;13,15;;29,31;20,23;16;10,13;11;8,11;14;17;13,14;;7,9;9;14,17
+mock;SK-HEP-1;best;0.9818181818181818;27;PubMed=25877200;X;11,12;18;8,12;12;13,15;12,15.2;29,31;20,23;16;10,13;11;8,11;13,14;17;13,14;13;7,9;9;14,17
+mock;SK-HEP-1;worst;0.9818181818181818;27;ATCC;X;11,12;18;8,12;12;13,15;;29,31;20,23;16;10,13;11;8,11;13,14;17;13,14;;7,9;9;14,17
+mock;SK-HEP-1-Cas9-727;only;0.9803921568627451;25;CCRID;X;11,12;18;8,12;12;13,15;12,15.2;29,31;20,23;16;10,13;11;8,11;13,14;17;;13,21;7,9;9;14,17
+mock;SK-HEP-1-Cas9-726;only;0.9615384615384616;25;CCRID;X;11,12;18;8,12;12;13,15;12,15.2;29,31,32;20,23;16;10,13;11;8,11;13,14;17;;13,21;7,9;9;14,17
+mock;SK-HEP-1-Cas9-728;only;0.9615384615384616;25;CCRID;X;11,12;18;8,12;12;13,15;12,15.2;29,31,32;20,23;16;10,13;11;8,11;13,14;17;;13,21;7,9;9;14,17
+mock;SK-HEP-1-Cas9-729;only;0.9615384615384616;25;CCRID;X;11,12;18;8,12;12;13,15;12,15.2;29,31,32;20,23;16;10,13;11;8,11;13,14;17;;13,21;7,9;9;14,17"""
+    assert observed.strip() == expected.strip()
+
+
+def test_search_result_summary_multisamples(skhep_result_multi_sample):
+    observed = pd.concat(
+        [result.summary for result in skhep_result_multi_sample], ignore_index=True
+    )
+    assert len(observed) == 6
+    exp_data = StringIO(
+        """
+Sample,CellLine,Score,SharedAlleles,Source
+mock_1,SK-HEP-1,0.981818,27,PubMed=25877200
+mock_1,SK-HEP-1-Cas9-727,0.980392,25,CCRID
+mock_1,SK-HEP-1-Cas9-726,0.961538,25,CCRID
+mock_2,SK-HEP-1,0.981818,27,PubMed=25877200
+mock_2,SK-HEP-1-Cas9-727,0.980392,25,CCRID
+mock_2,SK-HEP-1-Cas9-726,0.961538,25,CCRID"""
+    )
+    expected = pd.read_csv(exp_data)
+    pd.testing.assert_frame_equal(observed, expected, check_exact=False, rtol=1e-6)
+
+
+def test_search_result_full_report_multisamples(skhep_result_multi_sample):
+    observed = pd.concat([result.full_report for result in skhep_result_multi_sample])
+    assert len(observed) == 10
+    observed = observed.to_csv(sep=";", index=False)
+    expected = """
+Sample;CellLine;Status;Score;SharedAlleles;Source;Amelogenin;CSF1PO;D12S391;D13S317;D16S539;D18S51;D19S433;D21S11;D2S1338;D3S1358;D5S818;D6S1043;D7S820;D8S1179;FGA;Penta D;Penta E;TH01;TPOX;vWA
+mock_1;mock_1;query;;;;X;11,12;18;8,12;12;13,15;;29,31;20,23;16;10,13;11;8,11;14;17;13,14;;7,9;9;14,17
+mock_1;SK-HEP-1;best;0.9818181818181818;27;PubMed=25877200;X;11,12;18;8,12;12;13,15;12,15.2;29,31;20,23;16;10,13;11;8,11;13,14;17;13,14;13;7,9;9;14,17
+mock_1;SK-HEP-1;worst;0.9818181818181818;27;ATCC;X;11,12;18;8,12;12;13,15;;29,31;20,23;16;10,13;11;8,11;13,14;17;13,14;;7,9;9;14,17
+mock_1;SK-HEP-1-Cas9-727;only;0.9803921568627451;25;CCRID;X;11,12;18;8,12;12;13,15;12,15.2;29,31;20,23;16;10,13;11;8,11;13,14;17;;13,21;7,9;9;14,17
+mock_1;SK-HEP-1-Cas9-726;only;0.9615384615384616;25;CCRID;X;11,12;18;8,12;12;13,15;12,15.2;29,31,32;20,23;16;10,13;11;8,11;13,14;17;;13,21;7,9;9;14,17
+mock_2;mock_2;query;;;;X;11,12;18;8,12;12;13,15;;29,31;20,23;16;10,13;11;8,11;14;17;13,14;;7,9;9;14,17
+mock_2;SK-HEP-1;best;0.9818181818181818;27;PubMed=25877200;X;11,12;18;8,12;12;13,15;12,15.2;29,31;20,23;16;10,13;11;8,11;13,14;17;13,14;13;7,9;9;14,17
+mock_2;SK-HEP-1;worst;0.9818181818181818;27;ATCC;X;11,12;18;8,12;12;13,15;;29,31;20,23;16;10,13;11;8,11;13,14;17;13,14;;7,9;9;14,17
+mock_2;SK-HEP-1-Cas9-727;only;0.9803921568627451;25;CCRID;X;11,12;18;8,12;12;13,15;12,15.2;29,31;20,23;16;10,13;11;8,11;13,14;17;;13,21;7,9;9;14,17
+mock_2;SK-HEP-1-Cas9-726;only;0.9615384615384616;25;CCRID;X;11,12;18;8,12;12;13,15;12,15.2;29,31,32;20,23;16;10,13;11;8,11;13,14;17;;13,21;7,9;9;14,17
+"""
+
+    assert observed.strip() == expected.strip()
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..03ad3be
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,7 @@
+[versioneer]
+VCS = git
+style = pep440
+versionfile_source = claspy/_version.py
+versionfile_build = claspy/_version.py
+tag_prefix =
+parentdir_prefix = claspy-
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..12eb1ca
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,48 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2023, DHS.
+# This file is part of claspy: https://github.com/bioforensics/claspy
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+
+from setuptools import setup
+import versioneer
+
+
+with open("README.md", "r") as infile:
+    longdesc = infile.read()
+
+setup(
+    name="claspy",
+    version=versioneer.get_version(),
+    cmdclass=versioneer.get_cmdclass(),
+    description="Clapsy: cell line authentication with STRs in Python",
+    long_description=longdesc,
+    long_description_content_type="text/markdown",
+    url="https://github.com/bioforensics/claspy",
+    packages=["claspy", "claspy.tests"],
+    package_data={"claspy": ["claspy/tests/data/*"]},
+    include_package_data=True,
+    install_requires=[
+        "black==24.3",
+        "pandas>=2.0",
+        "pytest>=6.0",
+        "pytest-cov>=3.0",
+        "tabulate>=0.9",
+        "tqdm>=3.0",
+    ],
+    entry_points={"console_scripts": ["claspy = claspy:main", "claspy_db = claspy:db_main"]},
+    classifiers=[
+        "Environment :: Console",
+        "Framework :: IPython",
+        "Framework :: Jupyter",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: BSD License",
+        "Programming Language :: Python :: 3",
+        "Topic :: Scientific/Engineering :: Bio-Informatics",
+    ],
+    zip_safe=True,
+)
diff --git a/versioneer.py b/versioneer.py
new file mode 100644
index 0000000..9713007
--- /dev/null
+++ b/versioneer.py
@@ -0,0 +1,2064 @@
+
+# Version: 0.20
+
+"""The Versioneer - like a rocketeer, but for versions.
+
+The Versioneer
+==============
+
+* like a rocketeer, but for versions!
+* https://github.com/python-versioneer/python-versioneer
+* Brian Warner
+* License: Public Domain
+* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3
+* [![Latest Version][pypi-image]][pypi-url]
+* [![Build Status][travis-image]][travis-url]
+
+This is a tool for managing a recorded version number in distutils-based
+python projects. The goal is to remove the tedious and error-prone "update
+the embedded version string" step from your release process. Making a new
+release should be as easy as recording a new tag in your version-control
+system, and maybe making new tarballs.
+
+
+## Quick Install
+
+* `pip install versioneer` to somewhere in your $PATH
+* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))
+* run `versioneer install` in your source tree, commit the results
+* Verify version information with `python setup.py version`
+
+## Version Identifiers
+
+Source trees come from a variety of places:
+
+* a version-control system checkout (mostly used by developers)
+* a nightly tarball, produced by build automation
+* a snapshot tarball, produced by a web-based VCS browser, like github's
+  "tarball from tag" feature
+* a release tarball, produced by "setup.py sdist", distributed through PyPI
+
+Within each source tree, the version identifier (either a string or a number,
+this tool is format-agnostic) can come from a variety of places:
+
+* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
+  about recent "tags" and an absolute revision-id
+* the name of the directory into which the tarball was unpacked
+* an expanded VCS keyword ($Id$, etc)
+* a `_version.py` created by some earlier build step
+
+For released software, the version identifier is closely related to a VCS
+tag. Some projects use tag names that include more than just the version
+string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
+needs to strip the tag prefix to extract the version identifier. For
+unreleased software (between tags), the version identifier should provide
+enough information to help developers recreate the same tree, while also
+giving them an idea of roughly how old the tree is (after version 1.2, before
+version 1.3). Many VCS systems can report a description that captures this,
+for example `git describe --tags --dirty --always` reports things like
+"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
+0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
+uncommitted changes).
+
+The version identifier is used for multiple purposes:
+
+* to allow the module to self-identify its version: `myproject.__version__`
+* to choose a name and prefix for a 'setup.py sdist' tarball
+
+## Theory of Operation
+
+Versioneer works by adding a special `_version.py` file into your source
+tree, where your `__init__.py` can import it. This `_version.py` knows how to
+dynamically ask the VCS tool for version information at import time.
+
+`_version.py` also contains `$Revision$` markers, and the installation
+process marks `_version.py` to have this marker rewritten with a tag name
+during the `git archive` command. As a result, generated tarballs will
+contain enough information to get the proper version.
+
+To allow `setup.py` to compute a version too, a `versioneer.py` is added to
+the top level of your source tree, next to `setup.py` and the `setup.cfg`
+that configures it. This overrides several distutils/setuptools commands to
+compute the version when invoked, and changes `setup.py build` and `setup.py
+sdist` to replace `_version.py` with a small static file that contains just
+the generated version data.
+
+## Installation
+
+See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
+
+## Version-String Flavors
+
+Code which uses Versioneer can learn about its version string at runtime by
+importing `_version` from your main `__init__.py` file and running the
+`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
+import the top-level `versioneer.py` and run `get_versions()`.
+
+Both functions return a dictionary with different flavors of version
+information:
+
+* `['version']`: A condensed version string, rendered using the selected
+  style. This is the most commonly used value for the project's version
+  string. The default "pep440" style yields strings like `0.11`,
+  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
+  below for alternative styles.
+
+* `['full-revisionid']`: detailed revision identifier. For Git, this is the
+  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
+
+* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
+  commit date in ISO 8601 format. This will be None if the date is not
+  available.
+
+* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
+  this is only accurate if run in a VCS checkout, otherwise it is likely to
+  be False or None
+
+* `['error']`: if the version string could not be computed, this will be set
+  to a string describing the problem, otherwise it will be None. It may be
+  useful to throw an exception in setup.py if this is set, to avoid e.g.
+  creating tarballs with a version string of "unknown".
+
+Some variants are more useful than others. Including `full-revisionid` in a
+bug report should allow developers to reconstruct the exact code being tested
+(or indicate the presence of local changes that should be shared with the
+developers). `version` is suitable for display in an "about" box or a CLI
+`--version` output: it can be easily compared against release notes and lists
+of bugs fixed in various releases.
+
+The installer adds the following text to your `__init__.py` to place a basic
+version in `YOURPROJECT.__version__`:
+
+    from ._version import get_versions
+    __version__ = get_versions()['version']
+    del get_versions
+
+## Styles
+
+The setup.cfg `style=` configuration controls how the VCS information is
+rendered into a version string.
+
+The default style, "pep440", produces a PEP440-compliant string, equal to the
+un-prefixed tag name for actual releases, and containing an additional "local
+version" section with more detail for in-between builds. For Git, this is
+TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
+--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
+tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
+that this commit is two revisions ("+2") beyond the "0.11" tag. For released
+software (exactly equal to a known tag), the identifier will only contain the
+stripped tag, e.g. "0.11".
+
+Other styles are available. See [details.md](details.md) in the Versioneer
+source tree for descriptions.
+
+## Debugging
+
+Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
+to return a version of "0+unknown". To investigate the problem, run `setup.py
+version`, which will run the version-lookup code in a verbose mode, and will
+display the full contents of `get_versions()` (including the `error` string,
+which may help identify what went wrong).
+
+## Known Limitations
+
+Some situations are known to cause problems for Versioneer. This details the
+most significant ones. More can be found on Github
+[issues page](https://github.com/python-versioneer/python-versioneer/issues).
+
+### Subprojects
+
+Versioneer has limited support for source trees in which `setup.py` is not in
+the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
+two common reasons why `setup.py` might not be in the root:
+
+* Source trees which contain multiple subprojects, such as
+  [Buildbot](https://github.com/buildbot/buildbot), which contains both
+  "master" and "slave" subprojects, each with their own `setup.py`,
+  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
+  distributions (and upload multiple independently-installable tarballs).
+* Source trees whose main purpose is to contain a C library, but which also
+  provide bindings to Python (and perhaps other languages) in subdirectories.
+
+Versioneer will look for `.git` in parent directories, and most operations
+should get the right version string. However `pip` and `setuptools` have bugs
+and implementation details which frequently cause `pip install .` from a
+subproject directory to fail to find a correct version string (so it usually
+defaults to `0+unknown`).
+
+`pip install --editable .` should work correctly. `setup.py install` might
+work too.
+
+Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
+some later version.
+
+[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
+this issue. The discussion in
+[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
+issue from the Versioneer side in more detail.
+[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
+[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
+pip to let Versioneer work correctly.
+
+Versioneer-0.16 and earlier only looked for a `.git` directory next to the
+`setup.cfg`, so subprojects were completely unsupported with those releases.
+
+### Editable installs with setuptools <= 18.5
+
+`setup.py develop` and `pip install --editable .` allow you to install a
+project into a virtualenv once, then continue editing the source code (and
+test) without re-installing after every change.
+
+"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
+convenient way to specify executable scripts that should be installed along
+with the python package.
+
+These both work as expected when using modern setuptools. When using
+setuptools-18.5 or earlier, however, certain operations will cause
+`pkg_resources.DistributionNotFound` errors when running the entrypoint
+script, which must be resolved by re-installing the package. This happens
+when the install happens with one version, then the egg_info data is
+regenerated while a different version is checked out. Many setup.py commands
+cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
+a different virtualenv), so this can be surprising.
+
+[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
+this one, but upgrading to a newer version of setuptools should probably
+resolve it.
+
+
+## Updating Versioneer
+
+To upgrade your project to a new release of Versioneer, do the following:
+
+* install the new Versioneer (`pip install -U versioneer` or equivalent)
+* edit `setup.cfg`, if necessary, to include any new configuration settings
+  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
+* re-run `versioneer install` in your source tree, to replace
+  `SRC/_version.py`
+* commit any changed files
+
+## Future Directions
+
+This tool is designed to make it easily extended to other version-control
+systems: all VCS-specific components are in separate directories like
+src/git/ . The top-level `versioneer.py` script is assembled from these
+components by running make-versioneer.py . In the future, make-versioneer.py
+will take a VCS name as an argument, and will construct a version of
+`versioneer.py` that is specific to the given VCS. It might also take the
+configuration arguments that are currently provided manually during
+installation by editing setup.py . Alternatively, it might go the other
+direction and include code from all supported VCS systems, reducing the
+number of intermediate scripts.
+
+## Similar projects
+
+* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
+  dependency
+* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
+  versioneer
+* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools
+  plugin
+
+## License
+
+To make Versioneer easier to embed, all its code is dedicated to the public
+domain. The `_version.py` that it creates is also in the public domain.
+Specifically, both are released under the Creative Commons "Public Domain
+Dedication" license (CC0-1.0), as described in
+https://creativecommons.org/publicdomain/zero/1.0/ .
+
+[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
+[pypi-url]: https://pypi.python.org/pypi/versioneer/
+[travis-image]:
+https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
+[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
+
+"""
+
+import configparser
+import errno
+import json
+import os
+import re
+import subprocess
+import sys
+
+
+class VersioneerConfig:  # pylint: disable=too-few-public-methods # noqa
+    """Container for Versioneer configuration parameters."""
+
+
+def get_root():
+    """Get the project root directory.
+
+    We require that all commands are run from the project root, i.e. the
+    directory that contains setup.py, setup.cfg, and versioneer.py .
+    """
+    root = os.path.realpath(os.path.abspath(os.getcwd()))
+    setup_py = os.path.join(root, "setup.py")
+    versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        # allow 'python path/to/setup.py COMMAND'
+        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
+        setup_py = os.path.join(root, "setup.py")
+        versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        err = ("Versioneer was unable to run the project root directory. "
+               "Versioneer requires setup.py to be executed from "
+               "its immediate directory (like 'python setup.py COMMAND'), "
+               "or in a way that lets it use sys.argv[0] to find the root "
+               "(like 'python path/to/setup.py COMMAND').")
+        raise VersioneerBadRootError(err)
+    try:
+        # Certain runtime workflows (setup.py install/develop in a setuptools
+        # tree) execute all dependencies in a single python process, so
+        # "versioneer" may be imported multiple times, and python's shared
+        # module-import table will cache the first one. So we can't use
+        # os.path.dirname(__file__), as that will find whichever
+        # versioneer.py was first imported, even in later projects.
+        my_path = os.path.realpath(os.path.abspath(__file__))
+        me_dir = os.path.normcase(os.path.splitext(my_path)[0])
+        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
+        if me_dir != vsr_dir:
+            print("Warning: build in %s is using versioneer.py from %s"
+                  % (os.path.dirname(my_path), versioneer_py))
+    except NameError:
+        pass
+    return root
+
+
+def get_config_from_root(root):
+    """Read the project setup.cfg file to determine Versioneer config."""
+    # This might raise EnvironmentError (if setup.cfg is missing), or
+    # configparser.NoSectionError (if it lacks a [versioneer] section), or
+    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
+    # the top of versioneer.py for instructions on writing your setup.cfg .
+    setup_cfg = os.path.join(root, "setup.cfg")
+    parser = configparser.ConfigParser()
+    with open(setup_cfg, "r") as cfg_file:
+        parser.read_file(cfg_file)
+    VCS = parser.get("versioneer", "VCS")  # mandatory
+
+    # Dict-like interface for non-mandatory entries
+    section = parser["versioneer"]
+
+    # pylint:disable=attribute-defined-outside-init # noqa
+    cfg = VersioneerConfig()
+    cfg.VCS = VCS
+    cfg.style = section.get("style", "")
+    cfg.versionfile_source = section.get("versionfile_source")
+    cfg.versionfile_build = section.get("versionfile_build")
+    cfg.tag_prefix = section.get("tag_prefix")
+    if cfg.tag_prefix in ("''", '""'):
+        cfg.tag_prefix = ""
+    cfg.parentdir_prefix = section.get("parentdir_prefix")
+    cfg.verbose = section.get("verbose")
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+# these dictionaries contain VCS-specific tools
+LONG_VERSION_PY = {}
+HANDLERS = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        HANDLERS.setdefault(vcs, {})[method] = f
+        return f
+    return decorate
+
+
+# pylint:disable=too-many-arguments,consider-using-with # noqa
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except EnvironmentError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+LONG_VERSION_PY['git'] = r'''
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.20 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
+    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
+    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:  # pylint: disable=too-few-public-methods
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "%(STYLE)s"
+    cfg.tag_prefix = "%(TAG_PREFIX)s"
+    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
+    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY = {}
+HANDLERS = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+# pylint:disable=too-many-arguments,consider-using-with # noqa
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except EnvironmentError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %%s" %% dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %%s" %% (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %%s (error)" %% dispcmd)
+            print("stdout was %%s" %% stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %%s but none started with prefix %%s" %%
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except EnvironmentError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %%d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%%s', no digits" %% ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %%s" %% ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %%s" %% r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %%s not under git control" %% root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match", "%%s*" %% tag_prefix],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparseable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%%s'"
+                               %% describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%%s' doesn't start with prefix '%%s'"
+                print(fmt %% (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
+                               %% (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_pre(pieces):
+    """TAG[.post0.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += ".post0.dev%%d" %% pieces["distance"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%%d" %% pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%%s'" %% style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
+'''
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except EnvironmentError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match", "%s*" % tag_prefix],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparseable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def do_vcs_install(manifest_in, versionfile_source, ipy):
+    """Git-specific installation logic for Versioneer.
+
+    For Git, this means creating/changing .gitattributes to mark _version.py
+    for export-subst keyword substitution.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    files = [manifest_in, versionfile_source]
+    if ipy:
+        files.append(ipy)
+    try:
+        my_path = __file__
+        if my_path.endswith(".pyc") or my_path.endswith(".pyo"):
+            my_path = os.path.splitext(my_path)[0] + ".py"
+        versioneer_file = os.path.relpath(my_path)
+    except NameError:
+        versioneer_file = "versioneer.py"
+    files.append(versioneer_file)
+    present = False
+    try:
+        with open(".gitattributes", "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith(versionfile_source):
+                    if "export-subst" in line.strip().split()[1:]:
+                        present = True
+                        break
+    except EnvironmentError:
+        pass
+    if not present:
+        with open(".gitattributes", "a+") as fobj:
+            fobj.write(f"{versionfile_source} export-subst\n")
+        files.append(".gitattributes")
+    run_command(GITS, ["add", "--"] + files)
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+SHORT_VERSION_PY = """
+# This file was generated by 'versioneer.py' (0.20) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+
+import json
+
+version_json = '''
+%s
+'''  # END VERSION_JSON
+
+
+def get_versions():
+    return json.loads(version_json)
+"""
+
+
+def versions_from_file(filename):
+    """Try to determine the version from _version.py if present."""
+    try:
+        with open(filename) as f:
+            contents = f.read()
+    except EnvironmentError:
+        raise NotThisMethod("unable to read _version.py")
+    mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON",
+                   contents, re.M | re.S)
+    if not mo:
+        mo = re.search(r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
+                       contents, re.M | re.S)
+    if not mo:
+        raise NotThisMethod("no version_json in _version.py")
+    return json.loads(mo.group(1))
+
+
+def write_to_version_file(filename, versions):
+    """Write the given version number to the given _version.py file."""
+    os.unlink(filename)
+    contents = json.dumps(versions, sort_keys=True,
+                          indent=1, separators=(",", ": "))
+    with open(filename, "w") as f:
+        f.write(SHORT_VERSION_PY % contents)
+
+    print("set %s to '%s'" % (filename, versions["version"]))
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_pre(pieces):
+    """TAG[.post0.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += ".post0.dev%d" % pieces["distance"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+class VersioneerBadRootError(Exception):
+    """The project root directory is unknown or missing key files."""
+
+
+def get_versions(verbose=False):
+    """Get the project version from whatever source is available.
+
+    Returns dict with two keys: 'version' and 'full'.
+    """
+    if "versioneer" in sys.modules:
+        # see the discussion in cmdclass.py:get_cmdclass()
+        del sys.modules["versioneer"]
+
+    root = get_root()
+    cfg = get_config_from_root(root)
+
+    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
+    handlers = HANDLERS.get(cfg.VCS)
+    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
+    verbose = verbose or cfg.verbose
+    assert cfg.versionfile_source is not None, \
+        "please set versioneer.versionfile_source"
+    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
+
+    versionfile_abs = os.path.join(root, cfg.versionfile_source)
+
+    # extract version from first of: _version.py, VCS command (e.g. 'git
+    # describe'), parentdir. This is meant to work for developers using a
+    # source checkout, for users of a tarball created by 'setup.py sdist',
+    # and for users of a tarball/zipball created by 'git archive' or github's
+    # download-from-tag feature or the equivalent in other VCSes.
+
+    get_keywords_f = handlers.get("get_keywords")
+    from_keywords_f = handlers.get("keywords")
+    if get_keywords_f and from_keywords_f:
+        try:
+            keywords = get_keywords_f(versionfile_abs)
+            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
+            if verbose:
+                print("got version from expanded keyword %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        ver = versions_from_file(versionfile_abs)
+        if verbose:
+            print("got version from file %s %s" % (versionfile_abs, ver))
+        return ver
+    except NotThisMethod:
+        pass
+
+    from_vcs_f = handlers.get("pieces_from_vcs")
+    if from_vcs_f:
+        try:
+            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
+            ver = render(pieces, cfg.style)
+            if verbose:
+                print("got version from VCS %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        if cfg.parentdir_prefix:
+            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+            if verbose:
+                print("got version from parentdir %s" % ver)
+            return ver
+    except NotThisMethod:
+        pass
+
+    if verbose:
+        print("unable to compute version")
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None, "error": "unable to compute version",
+            "date": None}
+
+
+def get_version():
+    """Get the short version string for this project."""
+    return get_versions()["version"]
+
+
+def get_cmdclass(cmdclass=None):
+    """Get the custom setuptools/distutils subclasses used by Versioneer.
+
+    If the package uses a different cmdclass (e.g. one from numpy), it
+    should be provide as an argument.
+    """
+    if "versioneer" in sys.modules:
+        del sys.modules["versioneer"]
+        # this fixes the "python setup.py develop" case (also 'install' and
+        # 'easy_install .'), in which subdependencies of the main project are
+        # built (using setup.py bdist_egg) in the same python process. Assume
+        # a main project A and a dependency B, which use different versions
+        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
+        # sys.modules by the time B's setup.py is executed, causing B to run
+        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
+        # sandbox that restores sys.modules to it's pre-build state, so the
+        # parent is protected against the child's "import versioneer". By
+        # removing ourselves from sys.modules here, before the child build
+        # happens, we protect the child from the parent's versioneer too.
+        # Also see https://github.com/python-versioneer/python-versioneer/issues/52
+
+    cmds = {} if cmdclass is None else cmdclass.copy()
+
+    # we add "version" to both distutils and setuptools
+    from distutils.core import Command
+
+    class cmd_version(Command):
+        description = "report generated version string"
+        user_options = []
+        boolean_options = []
+
+        def initialize_options(self):
+            pass
+
+        def finalize_options(self):
+            pass
+
+        def run(self):
+            vers = get_versions(verbose=True)
+            print("Version: %s" % vers["version"])
+            print(" full-revisionid: %s" % vers.get("full-revisionid"))
+            print(" dirty: %s" % vers.get("dirty"))
+            print(" date: %s" % vers.get("date"))
+            if vers["error"]:
+                print(" error: %s" % vers["error"])
+    cmds["version"] = cmd_version
+
+    # we override "build_py" in both distutils and setuptools
+    #
+    # most invocation pathways end up running build_py:
+    #  distutils/build -> build_py
+    #  distutils/install -> distutils/build ->..
+    #  setuptools/bdist_wheel -> distutils/install ->..
+    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
+    #  setuptools/install -> bdist_egg ->..
+    #  setuptools/develop -> ?
+    #  pip install:
+    #   copies source tree to a tempdir before running egg_info/etc
+    #   if .git isn't copied too, 'git describe' will fail
+    #   then does setup.py bdist_wheel, or sometimes setup.py install
+    #  setup.py egg_info -> ?
+
+    # we override different "build_py" commands for both environments
+    if 'build_py' in cmds:
+        _build_py = cmds['build_py']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_py import build_py as _build_py
+    else:
+        from distutils.command.build_py import build_py as _build_py
+
+    class cmd_build_py(_build_py):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_py.run(self)
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            if cfg.versionfile_build:
+                target_versionfile = os.path.join(self.build_lib,
+                                                  cfg.versionfile_build)
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+    cmds["build_py"] = cmd_build_py
+
+    if 'build_ext' in cmds:
+        _build_ext = cmds['build_ext']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_ext import build_ext as _build_ext
+    else:
+        from distutils.command.build_ext import build_ext as _build_ext
+
+    class cmd_build_ext(_build_ext):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_ext.run(self)
+            if self.inplace:
+                # build_ext --inplace will only build extensions in
+                # build/lib<..> dir with no _version.py to write to.
+                # As in place builds will already have a _version.py
+                # in the module dir, we do not need to write one.
+                return
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            target_versionfile = os.path.join(self.build_lib,
+                                              cfg.versionfile_build)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile, versions)
+    cmds["build_ext"] = cmd_build_ext
+
+    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
+        from cx_Freeze.dist import build_exe as _build_exe
+        # nczeczulin reports that py2exe won't like the pep440-style string
+        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
+        # setup(console=[{
+        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
+        #   "product_version": versioneer.get_version(),
+        #   ...
+
+        class cmd_build_exe(_build_exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _build_exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["build_exe"] = cmd_build_exe
+        del cmds["build_py"]
+
+    if 'py2exe' in sys.modules:  # py2exe enabled?
+        from py2exe.distutils_buildexe import py2exe as _py2exe
+
+        class cmd_py2exe(_py2exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _py2exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["py2exe"] = cmd_py2exe
+
+    # we override different "sdist" commands for both environments
+    if 'sdist' in cmds:
+        _sdist = cmds['sdist']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.sdist import sdist as _sdist
+    else:
+        from distutils.command.sdist import sdist as _sdist
+
+    class cmd_sdist(_sdist):
+        def run(self):
+            versions = get_versions()
+            # pylint:disable=attribute-defined-outside-init # noqa
+            self._versioneer_generated_versions = versions
+            # unless we update this, the command will keep using the old
+            # version
+            self.distribution.metadata.version = versions["version"]
+            return _sdist.run(self)
+
+        def make_release_tree(self, base_dir, files):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            _sdist.make_release_tree(self, base_dir, files)
+            # now locate _version.py in the new base_dir directory
+            # (remembering that it may be a hardlink) and replace it with an
+            # updated value
+            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile,
+                                  self._versioneer_generated_versions)
+    cmds["sdist"] = cmd_sdist
+
+    return cmds
+
+
+CONFIG_ERROR = """
+setup.cfg is missing the necessary Versioneer configuration. You need
+a section like:
+
+ [versioneer]
+ VCS = git
+ style = pep440
+ versionfile_source = src/myproject/_version.py
+ versionfile_build = myproject/_version.py
+ tag_prefix =
+ parentdir_prefix = myproject-
+
+You will also need to edit your setup.py to use the results:
+
+ import versioneer
+ setup(version=versioneer.get_version(),
+       cmdclass=versioneer.get_cmdclass(), ...)
+
+Please read the docstring in ./versioneer.py for configuration instructions,
+edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
+"""
+
+SAMPLE_CONFIG = """
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+
+[versioneer]
+#VCS = git
+#style = pep440
+#versionfile_source =
+#versionfile_build =
+#tag_prefix =
+#parentdir_prefix =
+
+"""
+
+OLD_SNIPPET = """
+from ._version import get_versions
+__version__ = get_versions()['version']
+del get_versions
+"""
+
+INIT_PY_SNIPPET = """
+from . import {0}
+__version__ = {0}.get_versions()['version']
+"""
+
+
+def do_setup():
+    """Do main VCS-independent setup function for installing Versioneer."""
+    root = get_root()
+    try:
+        cfg = get_config_from_root(root)
+    except (EnvironmentError, configparser.NoSectionError,
+            configparser.NoOptionError) as e:
+        if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
+            print("Adding sample versioneer config to setup.cfg",
+                  file=sys.stderr)
+            with open(os.path.join(root, "setup.cfg"), "a") as f:
+                f.write(SAMPLE_CONFIG)
+        print(CONFIG_ERROR, file=sys.stderr)
+        return 1
+
+    print(" creating %s" % cfg.versionfile_source)
+    with open(cfg.versionfile_source, "w") as f:
+        LONG = LONG_VERSION_PY[cfg.VCS]
+        f.write(LONG % {"DOLLAR": "$",
+                        "STYLE": cfg.style,
+                        "TAG_PREFIX": cfg.tag_prefix,
+                        "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                        "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                        })
+
+    ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
+                       "__init__.py")
+    if os.path.exists(ipy):
+        try:
+            with open(ipy, "r") as f:
+                old = f.read()
+        except EnvironmentError:
+            old = ""
+        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]
+        snippet = INIT_PY_SNIPPET.format(module)
+        if OLD_SNIPPET in old:
+            print(" replacing boilerplate in %s" % ipy)
+            with open(ipy, "w") as f:
+                f.write(old.replace(OLD_SNIPPET, snippet))
+        elif snippet not in old:
+            print(" appending to %s" % ipy)
+            with open(ipy, "a") as f:
+                f.write(snippet)
+        else:
+            print(" %s unmodified" % ipy)
+    else:
+        print(" %s doesn't exist, ok" % ipy)
+        ipy = None
+
+    # Make sure both the top-level "versioneer.py" and versionfile_source
+    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
+    # they'll be copied into source distributions. Pip won't be able to
+    # install the package without this.
+    manifest_in = os.path.join(root, "MANIFEST.in")
+    simple_includes = set()
+    try:
+        with open(manifest_in, "r") as f:
+            for line in f:
+                if line.startswith("include "):
+                    for include in line.split()[1:]:
+                        simple_includes.add(include)
+    except EnvironmentError:
+        pass
+    # That doesn't cover everything MANIFEST.in can do
+    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
+    # it might give some false negatives. Appending redundant 'include'
+    # lines is safe, though.
+    if "versioneer.py" not in simple_includes:
+        print(" appending 'versioneer.py' to MANIFEST.in")
+        with open(manifest_in, "a") as f:
+            f.write("include versioneer.py\n")
+    else:
+        print(" 'versioneer.py' already in MANIFEST.in")
+    if cfg.versionfile_source not in simple_includes:
+        print(" appending versionfile_source ('%s') to MANIFEST.in" %
+              cfg.versionfile_source)
+        with open(manifest_in, "a") as f:
+            f.write("include %s\n" % cfg.versionfile_source)
+    else:
+        print(" versionfile_source already in MANIFEST.in")
+
+    # Make VCS-specific changes. For git, this means creating/changing
+    # .gitattributes to mark _version.py for export-subst keyword
+    # substitution.
+    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
+    return 0
+
+
+def scan_setup_py():
+    """Validate the contents of setup.py against Versioneer's expectations."""
+    found = set()
+    setters = False
+    errors = 0
+    with open("setup.py", "r") as f:
+        for line in f.readlines():
+            if "import versioneer" in line:
+                found.add("import")
+            if "versioneer.get_cmdclass()" in line:
+                found.add("cmdclass")
+            if "versioneer.get_version()" in line:
+                found.add("get_version")
+            if "versioneer.VCS" in line:
+                setters = True
+            if "versioneer.versionfile_source" in line:
+                setters = True
+    if len(found) != 3:
+        print("")
+        print("Your setup.py appears to be missing some important items")
+        print("(but I might be wrong). Please make sure it has something")
+        print("roughly like the following:")
+        print("")
+        print(" import versioneer")
+        print(" setup( version=versioneer.get_version(),")
+        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
+        print("")
+        errors += 1
+    if setters:
+        print("You should remove lines like 'versioneer.VCS = ' and")
+        print("'versioneer.versionfile_source = ' . This configuration")
+        print("now lives in setup.cfg, and should be removed from setup.py")
+        print("")
+        errors += 1
+    return errors
+
+
+if __name__ == "__main__":
+    cmd = sys.argv[1]
+    if cmd == "setup":
+        errors = do_setup()
+        errors += scan_setup_py()
+        if errors:
+            sys.exit(1)