diff --git a/.github/workflows/requirements/unit-tests.txt b/.github/workflows/requirements/unit-tests.txt
index 1393afd..2de0d46 100644
--- a/.github/workflows/requirements/unit-tests.txt
+++ b/.github/workflows/requirements/unit-tests.txt
@@ -1,2 +1,3 @@
-pytest==7.4.3
-pytest-asyncio==0.23.2
+pytest==8.0.1
+pytest-aiohttp==1.0.5
+pytest-mock==3.12.0
\ No newline at end of file
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 551403a..c48dfed 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -9,17 +9,22 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.8', '3.11']
+        python-version: ['3.11']
     steps:
       - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
       - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c
         with:
           python-version: ${{ matrix.python-version }}
           cache: 'pip'
-          cache-dependency-path: '.github/workflows/requirements/unit-tests.txt'
+          cache-dependency-path: |
+            'requirements.txt'
+            '.github/workflows/requirements/unit-tests.txt'
+
       - name: Install Python dependencies
         run: |
+          pip install -r requirements.txt
           pip install -r .github/workflows/requirements/unit-tests.txt
+
       - name: Run Unit Tests with Pytest
         run: |
-          pytest
+          python -m pytest gantry
diff --git a/.gitignore b/.gitignore
index 372e265..944f826 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@ __pycache__
 spack.lock
 .spack-env
 db/*.db
+.coverage
+htmlcov
\ No newline at end of file
diff --git a/db/schema.sql b/db/schema.sql
index bba3549..a9dc104 100644
--- a/db/schema.sql
+++ b/db/schema.sql
@@ -1,3 +1,5 @@
+PRAGMA foreign_keys = ON;
+
 CREATE TABLE nodes (
     id INTEGER PRIMARY KEY,
     uuid TEXT NOT NULL UNIQUE,
@@ -27,7 +29,7 @@ CREATE TABLE jobs (
     stack TEXT NOT NULL,
     build_jobs INTEGER NOT NULL,
     cpu_request REAL NOT NULL,
-    cpu_limit REAL, -- this can be null becasue it's currently not set
+    cpu_limit REAL, -- this can be null because it's currently not set
     cpu_mean REAL NOT NULL,
     cpu_median REAL NOT NULL,
     cpu_max REAL NOT NULL,
diff --git a/gantry/clients/prometheus/prometheus.py b/gantry/clients/prometheus/prometheus.py
index 1d92503..424d1b1 100644
--- a/gantry/clients/prometheus/prometheus.py
+++ b/gantry/clients/prometheus/prometheus.py
@@ -39,7 +39,7 @@ async def query_single(self, query: str | dict, time: int) -> list:
 
         query = util.process_query(query)
         url = f"{self.base_url}/query?query={query}&time={time}"
-        return await self._query(url)
+        return self.prettify_res(await self._query(url))
 
     async def query_range(self, query: str | dict, start: int, end: int) -> list:
         """Query Prometheus for a range of values
@@ -64,7 +64,7 @@ async def query_range(self, query: str | dict, start: int, end: int) -> list:
             f"end={end}&"
             f"step={step}s"
         )
-        return await self._query(url)
+        return self.prettify_res(await self._query(url))
 
     async def _query(self, url: str) -> list:
         """Query Prometheus with a query string"""
@@ -72,7 +72,7 @@ async def _query(self, url: str) -> list:
             # submit cookie with request
             async with session.get(url, cookies=self.cookies) as resp:
                 try:
-                    return self.prettify_res(await resp.json())
+                    return await resp.json()
                 except aiohttp.ContentTypeError:
                     logger.error(
                         """Prometheus query failed with unexpected response.
@@ -81,7 +81,7 @@ async def _query(self, url: str) -> list:
                     return {}
 
     def prettify_res(self, response: dict) -> list:
-        """Process Prometheus response into an arrray of dicts with {label: value}"""
+        """Process Prometheus response into a list of dicts with {label: value}"""
         result_type = response.get("data", {}).get("resultType")
         values_dict = {
             "matrix": "values",
diff --git a/gantry/db/get.py b/gantry/db/get.py
index 362829c..735f1a8 100644
--- a/gantry/db/get.py
+++ b/gantry/db/get.py
@@ -26,15 +26,3 @@ async def job_exists(db: aiosqlite.Connection, gl_id: int) -> bool:
             return True
 
     return False
-
-
-async def ghost_exists(db: aiosqlite.Connection, gl_id: int) -> bool:
-    """return if the ghost job exists in the database"""
-
-    async with db.execute(
-        "select id from ghost_jobs where gitlab_id = ?", (gl_id,)
-    ) as cursor:
-        if await cursor.fetchone():
-            return True
-
-    return False
diff --git a/gantry/db/insert.py b/gantry/db/insert.py
index 7564ad9..c000d4b 100644
--- a/gantry/db/insert.py
+++ b/gantry/db/insert.py
@@ -1,7 +1,11 @@
+import logging
+
 import aiosqlite
 
 from gantry.db.get import get_node
 
+logger = logging.getLogger(__name__)
+
 
 def insert_dict(table: str, input: dict, ignore=False) -> tuple[str, tuple]:
     """
@@ -35,15 +39,20 @@ async def insert_node(db: aiosqlite.Connection, node: dict) -> int:
             "nodes",
             node,
             # deal with races
+            # this also ignores the not-null constraint
+            # so we need to make sure the node is valid before inserting
             ignore=True,
         )
     ) as cursor:
-        pk = cursor.lastrowid
+        # this check ensures that something was inserted
+        # and not relying on lastrowid, which could be anything
+        if cursor.rowcount > 0:
+            return cursor.lastrowid
 
-    if pk == 0:
-        # the ignore part of the query was triggered, some other call
-        # must have inserted the node before this one
-        pk = await get_node(db, node["uuid"])
+    pk = await get_node(db, node["uuid"])
+
+    if pk is None:
+        logger.error(f"node not inserted: {node}. data is likely missing")
 
     return pk
 
@@ -60,4 +69,8 @@ async def insert_job(db: aiosqlite.Connection, job: dict) -> int:
             ignore=True,
         )
     ) as cursor:
-        return cursor.lastrowid
+        if cursor.rowcount > 0:
+            return cursor.lastrowid
+
+    logger.error(f"job not inserted: {job}. data is likely missing")
+    return None
diff --git a/gantry/models/job.py b/gantry/models/job.py
index 3c3a794..5dd8649 100644
--- a/gantry/models/job.py
+++ b/gantry/models/job.py
@@ -15,8 +15,11 @@ def __init__(
         self.status = status
         self.name = name
         self.gl_id = gl_id
-        self.start = datetime.fromisoformat(start).timestamp()
-        self.end = datetime.fromisoformat(end).timestamp()
+        # handle jobs that haven't started or finished
+        if start:
+            self.start = datetime.fromisoformat(start).timestamp()
+        if end:
+            self.end = datetime.fromisoformat(end).timestamp()
         self.ref = ref
 
     @property
diff --git a/gantry/routes/collection.py b/gantry/routes/collection.py
index f8fd695..aeb6602 100644
--- a/gantry/routes/collection.py
+++ b/gantry/routes/collection.py
@@ -45,10 +45,11 @@ async def fetch_job(
     if (
         job.status != "success"
         or not job.valid_build_name  # is not a build job
+        # some jobs don't have runners..?
+        or payload["runner"] is None
         # uo runners are not in Prometheus
         or payload["runner"]["description"].startswith("uo")
         or await db.job_exists(db_conn, job.gl_id)  # job already in the database
-        or await db.ghost_exists(db_conn, job.gl_id)  # ghost already in db
     ):
         return
 
@@ -56,6 +57,7 @@ async def fetch_job(
     job_log = await gitlab.job_log(job.gl_id)
     is_ghost = "No need to rebuild" in job_log
     if is_ghost:
+        logger.warning(f"job {job.gl_id} is a ghost, skipping")
         return
 
     try:
@@ -70,7 +72,7 @@ async def fetch_job(
         logger.error(f"{e} job={job.gl_id}")
         return
 
-    await db.insert_job(
+    job_id = await db.insert_job(
         db_conn,
         {
             "node": node_id,
@@ -89,7 +91,7 @@ async def fetch_job(
     # we don't accidentally commit a node without a job
     await db_conn.commit()
 
-    return
+    return job_id
 
 
 async def fetch_node(
diff --git a/gantry/tests/conftest.py b/gantry/tests/conftest.py
new file mode 100644
index 0000000..5c1bdc2
--- /dev/null
+++ b/gantry/tests/conftest.py
@@ -0,0 +1,16 @@
+# fixtures shared among all tests
+
+import aiosqlite
+import pytest
+
+
+@pytest.fixture
+async def db_conn():
+    """
+    In-memory sqlite connection ensures that the database is clean for each test
+    """
+    db = await aiosqlite.connect(":memory:")
+    with open("db/schema.sql") as f:
+        await db.executescript(f.read())
+    yield db
+    await db.close()
diff --git a/gantry/tests/defs/__init__.py b/gantry/tests/defs/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/gantry/tests/defs/collection.py b/gantry/tests/defs/collection.py
new file mode 100644
index 0000000..92fcab8
--- /dev/null
+++ b/gantry/tests/defs/collection.py
@@ -0,0 +1,37 @@
+# flake8: noqa
+# fmt: off
+
+INVALID_JOB_NAME = "invalid job name"
+# uo runners are not supported
+INVALID_RUNNER = {"description": "uo-blabla1821"}
+INVALID_JOB_STATUS = "failure"
+GHOST_JOB_LOG = "No need to rebuild"
+VALID_JOB_LOG = "some log"
+
+VALID_JOB = {
+    "build_status": "success",
+    "build_name": "gmsh@4.8.4 /jcchwaj %gcc@11.4.0 arch=linux-ubuntu20.04-x86_64_v3 E4S",
+    "build_id": 9892514,  # not used in testing unless it already exists in the db
+    "build_started_at": "2024-01-24T17:24:06.000Z",
+    "build_finished_at": "2024-01-24T17:47:00.000Z",
+    "ref": "pr42264_bugfix/mathomp4/hdf5-appleclang15",
+    "runner": {"description": "aws"},
+}
+
+# used to compare successful insertions
+# run SELECT * FROM table_name WHERE id = 1; from python sqlite api and grab fetchone() result
+INSERTED_JOB = (1, 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 1, 1706117046, 1706118420, 9892514, 'success', 'pr42264_bugfix/mathomp4/hdf5-appleclang15', 'gmsh', '4.8.4', '{"alglib": true, "cairo": false, "cgns": true, "compression": true, "eigen": false, "external": false, "fltk": true, "gmp": true, "hdf5": false, "ipo": false, "med": true, "metis": true, "mmg": true, "mpi": true, "netgen": true, "oce": true, "opencascade": false, "openmp": false, "petsc": false, "privateapi": false, "shared": true, "slepc": false, "tetgen": true, "voropp": true, "build_system": "cmake", "build_type": "Release", "generator": "make"}', 'gcc', '11.4.0', 'linux-ubuntu20.04-x86_64_v3', 'e4s', 16, 0.75, None, 1.899768349523097, 0.2971597591741076, 4.128116379389054, 0.2483743618267752, 1.7602635378120381, 2000000000.0, 48000000000.0, 143698407.6190476, 2785280.0, 594620416.0, 2785280.0, 252073065.82263485)
+INSERTED_NODE = (1, 'ec253b04-b1dc-f08b-acac-e23df83b3602', 'ip-192-168-86-107.ec2.internal', 24.0, 196608000000.0, 'amd64', 'linux', 'i3en.6xlarge')
+
+# these were obtained by executing the respective queries to Prometheus and capturing the JSON output
+# or the raw output of PrometheusClient._query
+VALID_ANNOTATIONS = {'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'__name__': 'kube_pod_annotations', 'annotation_gitlab_ci_job_id': '9892514', 'annotation_metrics_spack_ci_stack_name': 'e4s', 'annotation_metrics_spack_job_spec_arch': 'linux-ubuntu20.04-x86_64_v3', 'annotation_metrics_spack_job_spec_compiler_name': 'gcc', 'annotation_metrics_spack_job_spec_compiler_version': '11.4.0', 'annotation_metrics_spack_job_spec_pkg_name': 'gmsh', 'annotation_metrics_spack_job_spec_pkg_version': '4.8.4', 'annotation_metrics_spack_job_spec_variants': '+alglib~cairo+cgns+compression~eigen~external+fltk+gmp~hdf5~ipo+med+metis+mmg+mpi+netgen+oce~opencascade~openmp~petsc~privateapi+shared~slepc+tetgen+voropp build_system=cmake build_type=Release generator=make', 'container': 'kube-state-metrics', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'job': 'kube-state-metrics', 'namespace': 'pipeline', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'service': 'kube-prometheus-stack-kube-state-metrics', 'uid': 'd7aa13e0-998c-4f21-b1d6-62781f4980b0'}, 'value': [1706117733, '1']}]}}
+VALID_RESOURCE_REQUESTS = {'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'__name__': 'kube_pod_container_resource_requests', 'container': 'build', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'job': 'kube-state-metrics', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'resource': 'cpu', 'service': 'kube-prometheus-stack-kube-state-metrics', 'uid': 'd7aa13e0-998c-4f21-b1d6-62781f4980b0', 'unit': 'core'}, 'value': [1706117733, '0.75']}, {'metric': {'__name__': 'kube_pod_container_resource_requests', 'container': 'build', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'job': 'kube-state-metrics', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'resource': 'memory', 'service': 'kube-prometheus-stack-kube-state-metrics', 'uid': 'd7aa13e0-998c-4f21-b1d6-62781f4980b0', 'unit': 'byte'}, 'value': [1706117733, '2000000000']}]}}
+VALID_RESOURCE_LIMITS = {'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'__name__': 'kube_pod_container_resource_limits', 'container': 'build', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'job': 'kube-state-metrics', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'resource': 'memory', 'service': 'kube-prometheus-stack-kube-state-metrics', 'uid': 'd7aa13e0-998c-4f21-b1d6-62781f4980b0', 'unit': 'byte'}, 'value': [1706117733, '48000000000']}]}}
+VALID_MEMORY_USAGE = {'status': 'success', 'data': {'resultType': 'matrix', 'result': [{'metric': {'__name__': 'container_memory_working_set_bytes', 'container': 'build', 'endpoint': 'https-metrics', 'id': '/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podd7aa13e0_998c_4f21_b1d6_62781f4980b0.slice/cri-containerd-48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1.scope', 'image': 'ghcr.io/spack/ubuntu20.04-runner-amd64-gcc-11.4:2023.08.01', 'instance': '192.168.86.107:10250', 'job': 'kubelet', 'metrics_path': '/metrics/cadvisor', 'name': '48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'service': 'kube-prometheus-stack-kubelet'}, 'values': [[1706117115, '2785280'], [1706117116, '2785280'], [1706117117, '2785280'], [1706117118, '2785280'], [1706117119, '2785280'], [1706117120, '2785280'], [1706117121, '2785280'], [1706117122, '2785280'], [1706117123, '2785280'], [1706117124, '2785280'], [1706117125, '2785280'], [1706117126, '2785280'], [1706117127, '2785280'], [1706117128, '2785280'], [1706117129, '2785280'], [1706117130, '2785280'], [1706118416, '594620416'], [1706118417, '594620416'], [1706118418, '594620416'], [1706118419, '594620416'], [1706118420, '594620416']]}]}}
+VALID_CPU_USAGE = {'status': 'success', 'data': {'resultType': 'matrix', 'result': [{'metric': {'container': 'build', 'cpu': 'total', 'endpoint': 'https-metrics', 'id': '/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podd7aa13e0_998c_4f21_b1d6_62781f4980b0.slice/cri-containerd-48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1.scope', 'image': 'ghcr.io/spack/ubuntu20.04-runner-amd64-gcc-11.4:2023.08.01', 'instance': '192.168.86.107:10250', 'job': 'kubelet', 'metrics_path': '/metrics/cadvisor', 'name': '48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'service': 'kube-prometheus-stack-kubelet'}, 'values': [[1706117145, '0.2483743618267752'], [1706117146, '0.25650526138466395'], [1706117147, '0.26463616094255266'], [1706117148, '0.2727670605004414'], [1706117149, '0.28089796005833007'], [1706117150, '0.2890288596162188'], [1706117151, '0.2971597591741076'], [1706117357, '3.7319005481816236'], [1706117358, '3.7319005481816236'], [1706117359, '3.7319005481816236'], [1706117360, '3.7319005481816245'], [1706117361, '3.7319005481816245'], [1706118420, '4.128116379389054']]}]}}
+VALID_NODE_INFO = {'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'__name__': 'kube_node_info', 'container': 'kube-state-metrics', 'container_runtime_version': 'containerd://1.7.2', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'internal_ip': '192.168.86.107', 'job': 'kube-state-metrics', 'kernel_version': '5.10.205-195.804.amzn2.x86_64', 'kubelet_version': 'v1.27.9-eks-5e0fdde', 'kubeproxy_version': 'v1.27.9-eks-5e0fdde', 'namespace': 'monitoring', 'node': 'ip-192-168-86-107.ec2.internal', 'os_image': 'Amazon Linux 2', 'pod': 'kube-prometheus-stack-kube-state-metrics-dbd66d8c7-6ftw8', 'provider_id': 'aws:///us-east-1c/i-0fe9d9c99fdb3631d', 'service': 'kube-prometheus-stack-kube-state-metrics', 'system_uuid': 'ec253b04-b1dc-f08b-acac-e23df83b3602'}, 'value': [1706117733, '1']}]}}
+VALID_NODE_LABELS = {'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'__name__': 'kube_node_labels', 'container': 'kube-state-metrics', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'job': 'kube-state-metrics', 'label_beta_kubernetes_io_arch': 'amd64', 'label_beta_kubernetes_io_instance_type': 'i3en.6xlarge', 'label_beta_kubernetes_io_os': 'linux', 'label_failure_domain_beta_kubernetes_io_region': 'us-east-1', 'label_failure_domain_beta_kubernetes_io_zone': 'us-east-1c', 'label_k8s_io_cloud_provider_aws': 'ceb9f9cc8e47252a6f7fe7d6bded2655', 'label_karpenter_k8s_aws_instance_category': 'i', 'label_karpenter_k8s_aws_instance_cpu': '24', 'label_karpenter_k8s_aws_instance_encryption_in_transit_supported': 'true', 'label_karpenter_k8s_aws_instance_family': 'i3en', 'label_karpenter_k8s_aws_instance_generation': '3', 'label_karpenter_k8s_aws_instance_hypervisor': 'nitro', 'label_karpenter_k8s_aws_instance_local_nvme': '15000', 'label_karpenter_k8s_aws_instance_memory': '196608', 'label_karpenter_k8s_aws_instance_network_bandwidth': '25000', 'label_karpenter_k8s_aws_instance_pods': '234', 'label_karpenter_k8s_aws_instance_size': '6xlarge', 'label_karpenter_sh_capacity_type': 'spot', 'label_karpenter_sh_initialized': 'true', 'label_karpenter_sh_provisioner_name': 'glr-x86-64-v4', 'label_kubernetes_io_arch': 'amd64', 'label_kubernetes_io_hostname': 'ip-192-168-86-107.ec2.internal', 'label_kubernetes_io_os': 'linux', 'label_node_kubernetes_io_instance_type': 'i3en.6xlarge', 'label_spack_io_pipeline': 'true', 'label_spack_io_x86_64': 'v4', 'label_topology_ebs_csi_aws_com_zone': 'us-east-1c', 'label_topology_kubernetes_io_region': 'us-east-1', 'label_topology_kubernetes_io_zone': 'us-east-1c', 'namespace': 'monitoring', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'kube-prometheus-stack-kube-state-metrics-dbd66d8c7-6ftw8', 'service': 'kube-prometheus-stack-kube-state-metrics'}, 'value': [1706117733, '1']}]}}
+
+# modified version of VALID_MEMORY_USAGE to make the mean/stddev 0
+INVALID_MEMORY_USAGE = {'status': 'success', 'data': {'resultType': 'matrix', 'result': [{'metric': {'__name__': 'container_memory_working_set_bytes', 'container': 'build', 'endpoint': 'https-metrics', 'id': '/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podd7aa13e0_998c_4f21_b1d6_62781f4980b0.slice/cri-containerd-48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1.scope', 'image': 'ghcr.io/spack/ubuntu20.04-runner-amd64-gcc-11.4:2023.08.01', 'instance': '192.168.86.107:10250', 'job': 'kubelet', 'metrics_path': '/metrics/cadvisor', 'name': '48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'service': 'kube-prometheus-stack-kubelet'}, 'values': [[1706117115, '0']]}]}}
diff --git a/gantry/tests/defs/db.py b/gantry/tests/defs/db.py
new file mode 100644
index 0000000..d4e5290
--- /dev/null
+++ b/gantry/tests/defs/db.py
@@ -0,0 +1,5 @@
+# flake8: noqa
+# fmt: off
+
+# valid input into insert_node
+NODE_INSERT_DICT = {"uuid": "ec253b04-b1dc-f08b-acac-e23df83b3602", "hostname": "ip-192-168-86-107.ec2.internal", "cores": 24.0, "mem": 196608000000.0, "arch": "amd64", "os": "linux", "instance_type": "i3en.6xlarge"}
diff --git a/gantry/tests/defs/prometheus.py b/gantry/tests/defs/prometheus.py
new file mode 100644
index 0000000..eeec4d3
--- /dev/null
+++ b/gantry/tests/defs/prometheus.py
@@ -0,0 +1,12 @@
+# flake8: noqa
+# fmt: off
+
+QUERY_DICT = query={"metric": "kube_pod_annotations","filters": {"annotation_gitlab_ci_job_id": 1}}
+QUERY_STR = "rate(container_cpu_usage_seconds_total{pod='1', container='build'}[90s])"
+
+# encoded versions of the above that were put through the original version of process_query
+ENCODED_QUERY_DICT = "kube_pod_annotations%7Bannotation_gitlab_ci_job_id%3D%221%22%7D"
+ENCODED_QUERY_STR = "rate%28container_cpu_usage_seconds_total%7Bpod%3D%271%27%2C%20container%3D%27build%27%7D%5B90s%5D%29"
+
+# this will not be parsed as a query
+INVALID_QUERY = 1
diff --git a/gantry/tests/sql/insert_job.sql b/gantry/tests/sql/insert_job.sql
new file mode 100644
index 0000000..3008da6
--- /dev/null
+++ b/gantry/tests/sql/insert_job.sql
@@ -0,0 +1 @@
+INSERT INTO jobs VALUES(1,'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z',2,1706117046,1706118420,9892514,'success','pr42264_bugfix/mathomp4/hdf5-appleclang15','gmsh','4.8.4','{"alglib": true, "cairo": false, "cgns": true, "compression": true, "eigen": false, "external": false, "fltk": true, "gmp": true, "hdf5": false, "ipo": false, "med": true, "metis": true, "mmg": true, "mpi": true, "netgen": true, "oce": true, "opencascade": false, "openmp": false, "petsc": false, "privateapi": false, "shared": true, "slepc": false, "tetgen": true, "voropp": true, "build_system": "cmake", "build_type": "Release", "generator": "make"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',16,0.75,NULL,4.12532286694540495,3.15805864677520409,11.6038107294648877,0.248374361826775191,3.34888880339475214,2000000000.0,48000000000.0,1649868862.72588062,999763968.0,5679742976.0,2785280.0,1378705563.21018671);
\ No newline at end of file
diff --git a/gantry/tests/sql/insert_node.sql b/gantry/tests/sql/insert_node.sql
new file mode 100644
index 0000000..cad50ee
--- /dev/null
+++ b/gantry/tests/sql/insert_node.sql
@@ -0,0 +1,2 @@
+--- primary key is set to 2 to set up the test that checks for race conditions
+INSERT INTO nodes VALUES(2,'ec253b04-b1dc-f08b-acac-e23df83b3602','ip-192-168-86-107.ec2.internal',24.0,196608000000.0,'amd64','linux','i3en.6xlarge');
\ No newline at end of file
diff --git a/gantry/tests/test_collection.py b/gantry/tests/test_collection.py
new file mode 100644
index 0000000..7e35d11
--- /dev/null
+++ b/gantry/tests/test_collection.py
@@ -0,0 +1,145 @@
+import pytest
+
+from gantry.clients.gitlab import GitlabClient
+from gantry.clients.prometheus import PrometheusClient
+from gantry.routes.collection import fetch_job, fetch_node
+from gantry.tests.defs import collection as defs
+
+# mapping of prometheus request shortcuts
+# to raw values that would be returned by resp.json()
+
+# note: the ordering of this dict indicated the order of the calls
+# if the order in which PrometheusClient._query is called changes,
+# this dict must be updated
+PROMETHEUS_REQS = {
+    "job_annotations": defs.VALID_ANNOTATIONS,
+    "job_resources": defs.VALID_RESOURCE_REQUESTS,
+    "job_limits": defs.VALID_RESOURCE_LIMITS,
+    "job_memory_usage": defs.VALID_MEMORY_USAGE,
+    "job_cpu_usage": defs.VALID_CPU_USAGE,
+    "node_info": defs.VALID_NODE_INFO,
+    "node_labels": defs.VALID_NODE_LABELS,
+}
+
+
+@pytest.fixture
+async def gitlab(mocker):
+    """Returns GitlabClient with some default (mocked) behavior"""
+
+    # mock the request to the gitlab api
+    # default is to return normal log that wouldn't be detected as a ghost job
+    mocker.patch.object(GitlabClient, "_request", return_value=defs.VALID_JOB_LOG)
+    return GitlabClient("", "")
+
+
+@pytest.fixture
+async def prometheus(mocker):
+    """Returns PrometheusClient with some default (mocked) behavior"""
+
+    # use dict value iterable to mock multiple calls
+    mocker.patch.object(
+        PrometheusClient, "_query", side_effect=PROMETHEUS_REQS.values()
+    )
+    return PrometheusClient("", "")
+
+
+@pytest.mark.parametrize(
+    "key, value",
+    [
+        ("build_status", defs.INVALID_JOB_STATUS),
+        ("build_name", defs.INVALID_JOB_NAME),
+        ("runner", defs.INVALID_RUNNER),
+    ],
+)
+async def test_invalid_gitlab_fields(db_conn, gitlab, prometheus, key, value):
+    """Tests behavior when invalid data from Gitlab is passed to fetch_job"""
+    payload = defs.VALID_JOB.copy()
+    payload[key] = value
+
+    assert await fetch_job(payload, db_conn, gitlab, prometheus) is None
+
+
+async def test_job_exists(db_conn):
+    """
+    Tests that fetch_job returns None when the job already exists in the database.
+    The return value of fetch_job is only used to indicate when a job is inserted,
+    not if it's found in the database.
+    """
+    # node must be inserted before job to avoid foreign key constraint
+    with open("gantry/tests/sql/insert_node.sql") as f:
+        await db_conn.executescript(f.read())
+    with open("gantry/tests/sql/insert_job.sql") as f:
+        await db_conn.executescript(f.read())
+
+    assert await fetch_job(defs.VALID_JOB, db_conn, None, None) is None
+
+
+async def test_ghost_job(db_conn, gitlab, mocker):
+    """Tests that a ghost job is detected"""
+
+    mocker.patch.object(gitlab, "_request", return_value=defs.GHOST_JOB_LOG)
+    assert await fetch_job(defs.VALID_JOB, db_conn, gitlab, None) is None
+
+
+@pytest.mark.parametrize(
+    "req",
+    [
+        "job_annotations",
+        "job_resources",
+        "job_limits",
+        "job_memory_usage",
+        "job_cpu_usage",
+        "node_info",
+        "node_labels",
+    ],
+)
+async def test_missing_data(db_conn, gitlab, prometheus, req):
+    """Tests behavior when Prometheus data is missing for certain requests"""
+
+    p = PROMETHEUS_REQS.copy()
+    # for each req in PROMETHEUS_REQS, set it to an empty dict
+    p[req] = {}
+    prometheus._query.side_effect = p.values()
+    assert await fetch_job(defs.VALID_JOB, db_conn, gitlab, prometheus) is None
+
+
+async def test_invalid_usage(db_conn, gitlab, prometheus):
+    """Test that when resource usage is invalid (eg mean=0), the job is not inserted"""
+
+    p = PROMETHEUS_REQS.copy()
+    # could also be cpu usage
+    p["job_memory_usage"] = defs.INVALID_MEMORY_USAGE
+    prometheus._query.side_effect = p.values()
+    assert await fetch_job(defs.VALID_JOB, db_conn, gitlab, prometheus) is None
+
+
+async def test_job_node_inserted(db_conn, gitlab, prometheus):
+    """Tests that the job and node are in the database after calling fetch_node"""
+
+    await fetch_job(defs.VALID_JOB, db_conn, gitlab, prometheus)
+    # as the first records in the database, the ids should be 1
+    async with db_conn.execute("SELECT * FROM jobs WHERE id=?", (1,)) as cursor:
+        job = await cursor.fetchone()
+    async with db_conn.execute("SELECT * FROM nodes WHERE id=?", (1,)) as cursor:
+        node = await cursor.fetchone()
+    assert job == defs.INSERTED_JOB
+    assert node == defs.INSERTED_NODE
+
+
+async def test_node_exists(db_conn, prometheus):
+    """Tests that fetch_node returns the existing node id when the node
+    is already in the database"""
+
+    # when fetch_node is called, only two prometheus requests are made
+    # (see comment above PROMETHEUS_REQS)
+    prometheus._query.side_effect = [
+        PROMETHEUS_REQS["node_info"],
+        PROMETHEUS_REQS["node_labels"],
+    ]
+
+    # in the inserted row, the node id is 2 because if the fetch_node call
+    # inserts a new node, the id would be set to 1
+    with open("gantry/tests/sql/insert_node.sql") as f:
+        await db_conn.executescript(f.read())
+
+    assert await fetch_node(db_conn, prometheus, None, None) == 2
diff --git a/gantry/tests/test_db.py b/gantry/tests/test_db.py
new file mode 100644
index 0000000..8d265c5
--- /dev/null
+++ b/gantry/tests/test_db.py
@@ -0,0 +1,30 @@
+from gantry.db.insert import insert_job, insert_node
+from gantry.tests.defs import db as defs
+
+
+async def test_node_insert_race(db_conn):
+    """
+    Tests the situation where two identical jobs are inserted around the same time.
+    The first call should insert the node and the second should return the id
+    of the first.
+    """
+    # the id of this row is 1
+    with open("gantry/tests/sql/insert_node.sql") as f:
+        await db_conn.executescript(f.read())
+
+    # the id of NODE_INSERT_DICT is 2, but the output should be 1
+    assert await insert_node(db_conn, defs.NODE_INSERT_DICT) == 2
+
+
+async def test_insert_node_incomplete(db_conn):
+    """
+    Tests that when missing data is passed to the insert_node function, it returns None.
+    Issues around using lastrowid to get the id of the inserted row were returning
+    old inserted rows, so this ensures that the function returns None when it should.
+    """
+    assert await insert_node(db_conn, {"uuid": None}) is None
+
+
+async def test_insert_job_incomplete(db_conn):
+    """See test_insert_node_incomplete"""
+    assert await insert_job(db_conn, {"node": None}) is None
diff --git a/gantry/tests/test_prometheus.py b/gantry/tests/test_prometheus.py
new file mode 100644
index 0000000..ecf82e2
--- /dev/null
+++ b/gantry/tests/test_prometheus.py
@@ -0,0 +1,19 @@
+import pytest
+
+from gantry.clients.prometheus import util
+from gantry.clients.prometheus.prometheus import PrometheusClient
+from gantry.tests.defs import prometheus as defs
+
+
+def test_cookie_set():
+    """Test that a cookie is set when specified"""
+    p = PrometheusClient("", "cookie")
+    assert p.cookies == {"_oauth2_proxy": "cookie"}
+
+
+def test_process_query():
+    """Test that a query is parsed and encoded properly, from both dict and string"""
+    assert util.process_query(defs.QUERY_DICT) == defs.ENCODED_QUERY_DICT
+    assert util.process_query(defs.QUERY_STR) == defs.ENCODED_QUERY_STR
+    with pytest.raises(ValueError):
+        util.process_query(defs.INVALID_QUERY)
diff --git a/gantry/tests/test_spec.py b/gantry/tests/test_spec.py
new file mode 100644
index 0000000..b91ca86
--- /dev/null
+++ b/gantry/tests/test_spec.py
@@ -0,0 +1,21 @@
+from gantry.util.spec import spec_variants
+
+
+def test_valid_spec():
+    """Test a valid spec string to be parsed"""
+    assert spec_variants(
+        "+adios2~advanced_debug patches=02253c7,acb3805,b724e6a use_vtkm=on"
+    ) == {
+        "adios2": True,
+        "advanced_debug": False,
+        "patches": ["02253c7", "acb3805", "b724e6a"],
+        "use_vtkm": "on",
+    }
+
+
+def test_invalid_spec():
+    """Test some invalid specs"""
+    assert spec_variants("fefj!@#$%^&eifejifeifeij---5893843$%^&*()") == {}
+    assert spec_variants("fifheife") == {}
+    assert spec_variants("++++++") == {}
+    assert spec_variants("+~~++") == {}
diff --git a/pyproject.toml b/pyproject.toml
index 4620ee1..fe739ea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,3 +2,11 @@
 profile = "black"
 skip_gitignore = true
 color_output = true
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+
+[tool.coverage.run]
+include = [
+    "gantry/*"
+]
diff --git a/spack.yaml b/spack.yaml
index 44863c0..a36b502 100644
--- a/spack.yaml
+++ b/spack.yaml
@@ -3,7 +3,9 @@ spack:
   - python
   - py-aiohttp
   - py-pytest
-  - py-pytest-asyncio
+  - py-pytest-aiohttp
+  - py-pytest-mock
+  - py-coverage
   - py-flake8
   - py-black
   - py-isort