diff --git a/.github/workflows/requirements/unit-tests.txt b/.github/workflows/requirements/unit-tests.txt index 1393afd..2de0d46 100644 --- a/.github/workflows/requirements/unit-tests.txt +++ b/.github/workflows/requirements/unit-tests.txt @@ -1,2 +1,3 @@ -pytest==7.4.3 -pytest-asyncio==0.23.2 +pytest==8.0.1 +pytest-aiohttp==1.0.5 +pytest-mock==3.12.0 \ No newline at end of file diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 551403a..c48dfed 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -9,17 +9,22 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8', '3.11'] + python-version: ['3.11'] steps: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c with: python-version: ${{ matrix.python-version }} cache: 'pip' - cache-dependency-path: '.github/workflows/requirements/unit-tests.txt' + cache-dependency-path: | + 'requirements.txt' + '.github/workflows/requirements/unit-tests.txt' + - name: Install Python dependencies run: | + pip install -r requirements.txt pip install -r .github/workflows/requirements/unit-tests.txt + - name: Run Unit Tests with Pytest run: | - pytest + python -m pytest gantry diff --git a/.gitignore b/.gitignore index 372e265..944f826 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ __pycache__ spack.lock .spack-env db/*.db +.coverage +htmlcov \ No newline at end of file diff --git a/db/schema.sql b/db/schema.sql index bba3549..a9dc104 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -1,3 +1,5 @@ +PRAGMA foreign_keys = ON; + CREATE TABLE nodes ( id INTEGER PRIMARY KEY, uuid TEXT NOT NULL UNIQUE, @@ -27,7 +29,7 @@ CREATE TABLE jobs ( stack TEXT NOT NULL, build_jobs INTEGER NOT NULL, cpu_request REAL NOT NULL, - cpu_limit REAL, -- this can be null becasue it's currently not set + cpu_limit REAL, -- this can be null because it's currently not set cpu_mean REAL NOT NULL, cpu_median REAL NOT NULL, cpu_max REAL NOT NULL, diff --git a/gantry/clients/prometheus/prometheus.py b/gantry/clients/prometheus/prometheus.py index 1d92503..424d1b1 100644 --- a/gantry/clients/prometheus/prometheus.py +++ b/gantry/clients/prometheus/prometheus.py @@ -39,7 +39,7 @@ async def query_single(self, query: str | dict, time: int) -> list: query = util.process_query(query) url = f"{self.base_url}/query?query={query}&time={time}" - return await self._query(url) + return self.prettify_res(await self._query(url)) async def query_range(self, query: str | dict, start: int, end: int) -> list: """Query Prometheus for a range of values @@ -64,7 +64,7 @@ async def query_range(self, query: str | dict, start: int, end: int) -> list: f"end={end}&" f"step={step}s" ) - return await self._query(url) + return self.prettify_res(await self._query(url)) async def _query(self, url: str) -> list: """Query Prometheus with a query string""" @@ -72,7 +72,7 @@ async def _query(self, url: str) -> list: # submit cookie with request async with session.get(url, cookies=self.cookies) as resp: try: - return self.prettify_res(await resp.json()) + return await resp.json() except aiohttp.ContentTypeError: logger.error( """Prometheus query failed with unexpected response. @@ -81,7 +81,7 @@ async def _query(self, url: str) -> list: return {} def prettify_res(self, response: dict) -> list: - """Process Prometheus response into an arrray of dicts with {label: value}""" + """Process Prometheus response into a list of dicts with {label: value}""" result_type = response.get("data", {}).get("resultType") values_dict = { "matrix": "values", diff --git a/gantry/db/get.py b/gantry/db/get.py index 362829c..735f1a8 100644 --- a/gantry/db/get.py +++ b/gantry/db/get.py @@ -26,15 +26,3 @@ async def job_exists(db: aiosqlite.Connection, gl_id: int) -> bool: return True return False - - -async def ghost_exists(db: aiosqlite.Connection, gl_id: int) -> bool: - """return if the ghost job exists in the database""" - - async with db.execute( - "select id from ghost_jobs where gitlab_id = ?", (gl_id,) - ) as cursor: - if await cursor.fetchone(): - return True - - return False diff --git a/gantry/db/insert.py b/gantry/db/insert.py index 7564ad9..c000d4b 100644 --- a/gantry/db/insert.py +++ b/gantry/db/insert.py @@ -1,7 +1,11 @@ +import logging + import aiosqlite from gantry.db.get import get_node +logger = logging.getLogger(__name__) + def insert_dict(table: str, input: dict, ignore=False) -> tuple[str, tuple]: """ @@ -35,15 +39,20 @@ async def insert_node(db: aiosqlite.Connection, node: dict) -> int: "nodes", node, # deal with races + # this also ignores the not-null constraint + # so we need to make sure the node is valid before inserting ignore=True, ) ) as cursor: - pk = cursor.lastrowid + # this check ensures that something was inserted + # and not relying on lastrowid, which could be anything + if cursor.rowcount > 0: + return cursor.lastrowid - if pk == 0: - # the ignore part of the query was triggered, some other call - # must have inserted the node before this one - pk = await get_node(db, node["uuid"]) + pk = await get_node(db, node["uuid"]) + + if pk is None: + logger.error(f"node not inserted: {node}. data is likely missing") return pk @@ -60,4 +69,8 @@ async def insert_job(db: aiosqlite.Connection, job: dict) -> int: ignore=True, ) ) as cursor: - return cursor.lastrowid + if cursor.rowcount > 0: + return cursor.lastrowid + + logger.error(f"job not inserted: {job}. data is likely missing") + return None diff --git a/gantry/models/job.py b/gantry/models/job.py index 3c3a794..5dd8649 100644 --- a/gantry/models/job.py +++ b/gantry/models/job.py @@ -15,8 +15,11 @@ def __init__( self.status = status self.name = name self.gl_id = gl_id - self.start = datetime.fromisoformat(start).timestamp() - self.end = datetime.fromisoformat(end).timestamp() + # handle jobs that haven't started or finished + if start: + self.start = datetime.fromisoformat(start).timestamp() + if end: + self.end = datetime.fromisoformat(end).timestamp() self.ref = ref @property diff --git a/gantry/routes/collection.py b/gantry/routes/collection.py index f8fd695..aeb6602 100644 --- a/gantry/routes/collection.py +++ b/gantry/routes/collection.py @@ -45,10 +45,11 @@ async def fetch_job( if ( job.status != "success" or not job.valid_build_name # is not a build job + # some jobs don't have runners..? + or payload["runner"] is None # uo runners are not in Prometheus or payload["runner"]["description"].startswith("uo") or await db.job_exists(db_conn, job.gl_id) # job already in the database - or await db.ghost_exists(db_conn, job.gl_id) # ghost already in db ): return @@ -56,6 +57,7 @@ async def fetch_job( job_log = await gitlab.job_log(job.gl_id) is_ghost = "No need to rebuild" in job_log if is_ghost: + logger.warning(f"job {job.gl_id} is a ghost, skipping") return try: @@ -70,7 +72,7 @@ async def fetch_job( logger.error(f"{e} job={job.gl_id}") return - await db.insert_job( + job_id = await db.insert_job( db_conn, { "node": node_id, @@ -89,7 +91,7 @@ async def fetch_job( # we don't accidentally commit a node without a job await db_conn.commit() - return + return job_id async def fetch_node( diff --git a/gantry/tests/conftest.py b/gantry/tests/conftest.py new file mode 100644 index 0000000..5c1bdc2 --- /dev/null +++ b/gantry/tests/conftest.py @@ -0,0 +1,16 @@ +# fixtures shared among all tests + +import aiosqlite +import pytest + + +@pytest.fixture +async def db_conn(): + """ + In-memory sqlite connection ensures that the database is clean for each test + """ + db = await aiosqlite.connect(":memory:") + with open("db/schema.sql") as f: + await db.executescript(f.read()) + yield db + await db.close() diff --git a/gantry/tests/defs/__init__.py b/gantry/tests/defs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gantry/tests/defs/collection.py b/gantry/tests/defs/collection.py new file mode 100644 index 0000000..92fcab8 --- /dev/null +++ b/gantry/tests/defs/collection.py @@ -0,0 +1,37 @@ +# flake8: noqa +# fmt: off + +INVALID_JOB_NAME = "invalid job name" +# uo runners are not supported +INVALID_RUNNER = {"description": "uo-blabla1821"} +INVALID_JOB_STATUS = "failure" +GHOST_JOB_LOG = "No need to rebuild" +VALID_JOB_LOG = "some log" + +VALID_JOB = { + "build_status": "success", + "build_name": "gmsh@4.8.4 /jcchwaj %gcc@11.4.0 arch=linux-ubuntu20.04-x86_64_v3 E4S", + "build_id": 9892514, # not used in testing unless it already exists in the db + "build_started_at": "2024-01-24T17:24:06.000Z", + "build_finished_at": "2024-01-24T17:47:00.000Z", + "ref": "pr42264_bugfix/mathomp4/hdf5-appleclang15", + "runner": {"description": "aws"}, +} + +# used to compare successful insertions +# run SELECT * FROM table_name WHERE id = 1; from python sqlite api and grab fetchone() result +INSERTED_JOB = (1, 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 1, 1706117046, 1706118420, 9892514, 'success', 'pr42264_bugfix/mathomp4/hdf5-appleclang15', 'gmsh', '4.8.4', '{"alglib": true, "cairo": false, "cgns": true, "compression": true, "eigen": false, "external": false, "fltk": true, "gmp": true, "hdf5": false, "ipo": false, "med": true, "metis": true, "mmg": true, "mpi": true, "netgen": true, "oce": true, "opencascade": false, "openmp": false, "petsc": false, "privateapi": false, "shared": true, "slepc": false, "tetgen": true, "voropp": true, "build_system": "cmake", "build_type": "Release", "generator": "make"}', 'gcc', '11.4.0', 'linux-ubuntu20.04-x86_64_v3', 'e4s', 16, 0.75, None, 1.899768349523097, 0.2971597591741076, 4.128116379389054, 0.2483743618267752, 1.7602635378120381, 2000000000.0, 48000000000.0, 143698407.6190476, 2785280.0, 594620416.0, 2785280.0, 252073065.82263485) +INSERTED_NODE = (1, 'ec253b04-b1dc-f08b-acac-e23df83b3602', 'ip-192-168-86-107.ec2.internal', 24.0, 196608000000.0, 'amd64', 'linux', 'i3en.6xlarge') + +# these were obtained by executing the respective queries to Prometheus and capturing the JSON output +# or the raw output of PrometheusClient._query +VALID_ANNOTATIONS = {'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'__name__': 'kube_pod_annotations', 'annotation_gitlab_ci_job_id': '9892514', 'annotation_metrics_spack_ci_stack_name': 'e4s', 'annotation_metrics_spack_job_spec_arch': 'linux-ubuntu20.04-x86_64_v3', 'annotation_metrics_spack_job_spec_compiler_name': 'gcc', 'annotation_metrics_spack_job_spec_compiler_version': '11.4.0', 'annotation_metrics_spack_job_spec_pkg_name': 'gmsh', 'annotation_metrics_spack_job_spec_pkg_version': '4.8.4', 'annotation_metrics_spack_job_spec_variants': '+alglib~cairo+cgns+compression~eigen~external+fltk+gmp~hdf5~ipo+med+metis+mmg+mpi+netgen+oce~opencascade~openmp~petsc~privateapi+shared~slepc+tetgen+voropp build_system=cmake build_type=Release generator=make', 'container': 'kube-state-metrics', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'job': 'kube-state-metrics', 'namespace': 'pipeline', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'service': 'kube-prometheus-stack-kube-state-metrics', 'uid': 'd7aa13e0-998c-4f21-b1d6-62781f4980b0'}, 'value': [1706117733, '1']}]}} +VALID_RESOURCE_REQUESTS = {'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'__name__': 'kube_pod_container_resource_requests', 'container': 'build', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'job': 'kube-state-metrics', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'resource': 'cpu', 'service': 'kube-prometheus-stack-kube-state-metrics', 'uid': 'd7aa13e0-998c-4f21-b1d6-62781f4980b0', 'unit': 'core'}, 'value': [1706117733, '0.75']}, {'metric': {'__name__': 'kube_pod_container_resource_requests', 'container': 'build', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'job': 'kube-state-metrics', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'resource': 'memory', 'service': 'kube-prometheus-stack-kube-state-metrics', 'uid': 'd7aa13e0-998c-4f21-b1d6-62781f4980b0', 'unit': 'byte'}, 'value': [1706117733, '2000000000']}]}} +VALID_RESOURCE_LIMITS = {'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'__name__': 'kube_pod_container_resource_limits', 'container': 'build', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'job': 'kube-state-metrics', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'resource': 'memory', 'service': 'kube-prometheus-stack-kube-state-metrics', 'uid': 'd7aa13e0-998c-4f21-b1d6-62781f4980b0', 'unit': 'byte'}, 'value': [1706117733, '48000000000']}]}} +VALID_MEMORY_USAGE = {'status': 'success', 'data': {'resultType': 'matrix', 'result': [{'metric': {'__name__': 'container_memory_working_set_bytes', 'container': 'build', 'endpoint': 'https-metrics', 'id': '/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podd7aa13e0_998c_4f21_b1d6_62781f4980b0.slice/cri-containerd-48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1.scope', 'image': 'ghcr.io/spack/ubuntu20.04-runner-amd64-gcc-11.4:2023.08.01', 'instance': '192.168.86.107:10250', 'job': 'kubelet', 'metrics_path': '/metrics/cadvisor', 'name': '48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'service': 'kube-prometheus-stack-kubelet'}, 'values': [[1706117115, '2785280'], [1706117116, '2785280'], [1706117117, '2785280'], [1706117118, '2785280'], [1706117119, '2785280'], [1706117120, '2785280'], [1706117121, '2785280'], [1706117122, '2785280'], [1706117123, '2785280'], [1706117124, '2785280'], [1706117125, '2785280'], [1706117126, '2785280'], [1706117127, '2785280'], [1706117128, '2785280'], [1706117129, '2785280'], [1706117130, '2785280'], [1706118416, '594620416'], [1706118417, '594620416'], [1706118418, '594620416'], [1706118419, '594620416'], [1706118420, '594620416']]}]}} +VALID_CPU_USAGE = {'status': 'success', 'data': {'resultType': 'matrix', 'result': [{'metric': {'container': 'build', 'cpu': 'total', 'endpoint': 'https-metrics', 'id': '/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podd7aa13e0_998c_4f21_b1d6_62781f4980b0.slice/cri-containerd-48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1.scope', 'image': 'ghcr.io/spack/ubuntu20.04-runner-amd64-gcc-11.4:2023.08.01', 'instance': '192.168.86.107:10250', 'job': 'kubelet', 'metrics_path': '/metrics/cadvisor', 'name': '48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'service': 'kube-prometheus-stack-kubelet'}, 'values': [[1706117145, '0.2483743618267752'], [1706117146, '0.25650526138466395'], [1706117147, '0.26463616094255266'], [1706117148, '0.2727670605004414'], [1706117149, '0.28089796005833007'], [1706117150, '0.2890288596162188'], [1706117151, '0.2971597591741076'], [1706117357, '3.7319005481816236'], [1706117358, '3.7319005481816236'], [1706117359, '3.7319005481816236'], [1706117360, '3.7319005481816245'], [1706117361, '3.7319005481816245'], [1706118420, '4.128116379389054']]}]}} +VALID_NODE_INFO = {'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'__name__': 'kube_node_info', 'container': 'kube-state-metrics', 'container_runtime_version': 'containerd://1.7.2', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'internal_ip': '192.168.86.107', 'job': 'kube-state-metrics', 'kernel_version': '5.10.205-195.804.amzn2.x86_64', 'kubelet_version': 'v1.27.9-eks-5e0fdde', 'kubeproxy_version': 'v1.27.9-eks-5e0fdde', 'namespace': 'monitoring', 'node': 'ip-192-168-86-107.ec2.internal', 'os_image': 'Amazon Linux 2', 'pod': 'kube-prometheus-stack-kube-state-metrics-dbd66d8c7-6ftw8', 'provider_id': 'aws:///us-east-1c/i-0fe9d9c99fdb3631d', 'service': 'kube-prometheus-stack-kube-state-metrics', 'system_uuid': 'ec253b04-b1dc-f08b-acac-e23df83b3602'}, 'value': [1706117733, '1']}]}} +VALID_NODE_LABELS = {'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'__name__': 'kube_node_labels', 'container': 'kube-state-metrics', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'job': 'kube-state-metrics', 'label_beta_kubernetes_io_arch': 'amd64', 'label_beta_kubernetes_io_instance_type': 'i3en.6xlarge', 'label_beta_kubernetes_io_os': 'linux', 'label_failure_domain_beta_kubernetes_io_region': 'us-east-1', 'label_failure_domain_beta_kubernetes_io_zone': 'us-east-1c', 'label_k8s_io_cloud_provider_aws': 'ceb9f9cc8e47252a6f7fe7d6bded2655', 'label_karpenter_k8s_aws_instance_category': 'i', 'label_karpenter_k8s_aws_instance_cpu': '24', 'label_karpenter_k8s_aws_instance_encryption_in_transit_supported': 'true', 'label_karpenter_k8s_aws_instance_family': 'i3en', 'label_karpenter_k8s_aws_instance_generation': '3', 'label_karpenter_k8s_aws_instance_hypervisor': 'nitro', 'label_karpenter_k8s_aws_instance_local_nvme': '15000', 'label_karpenter_k8s_aws_instance_memory': '196608', 'label_karpenter_k8s_aws_instance_network_bandwidth': '25000', 'label_karpenter_k8s_aws_instance_pods': '234', 'label_karpenter_k8s_aws_instance_size': '6xlarge', 'label_karpenter_sh_capacity_type': 'spot', 'label_karpenter_sh_initialized': 'true', 'label_karpenter_sh_provisioner_name': 'glr-x86-64-v4', 'label_kubernetes_io_arch': 'amd64', 'label_kubernetes_io_hostname': 'ip-192-168-86-107.ec2.internal', 'label_kubernetes_io_os': 'linux', 'label_node_kubernetes_io_instance_type': 'i3en.6xlarge', 'label_spack_io_pipeline': 'true', 'label_spack_io_x86_64': 'v4', 'label_topology_ebs_csi_aws_com_zone': 'us-east-1c', 'label_topology_kubernetes_io_region': 'us-east-1', 'label_topology_kubernetes_io_zone': 'us-east-1c', 'namespace': 'monitoring', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'kube-prometheus-stack-kube-state-metrics-dbd66d8c7-6ftw8', 'service': 'kube-prometheus-stack-kube-state-metrics'}, 'value': [1706117733, '1']}]}} + +# modified version of VALID_MEMORY_USAGE to make the mean/stddev 0 +INVALID_MEMORY_USAGE = {'status': 'success', 'data': {'resultType': 'matrix', 'result': [{'metric': {'__name__': 'container_memory_working_set_bytes', 'container': 'build', 'endpoint': 'https-metrics', 'id': '/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podd7aa13e0_998c_4f21_b1d6_62781f4980b0.slice/cri-containerd-48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1.scope', 'image': 'ghcr.io/spack/ubuntu20.04-runner-amd64-gcc-11.4:2023.08.01', 'instance': '192.168.86.107:10250', 'job': 'kubelet', 'metrics_path': '/metrics/cadvisor', 'name': '48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'service': 'kube-prometheus-stack-kubelet'}, 'values': [[1706117115, '0']]}]}} diff --git a/gantry/tests/defs/db.py b/gantry/tests/defs/db.py new file mode 100644 index 0000000..d4e5290 --- /dev/null +++ b/gantry/tests/defs/db.py @@ -0,0 +1,5 @@ +# flake8: noqa +# fmt: off + +# valid input into insert_node +NODE_INSERT_DICT = {"uuid": "ec253b04-b1dc-f08b-acac-e23df83b3602", "hostname": "ip-192-168-86-107.ec2.internal", "cores": 24.0, "mem": 196608000000.0, "arch": "amd64", "os": "linux", "instance_type": "i3en.6xlarge"} diff --git a/gantry/tests/defs/prometheus.py b/gantry/tests/defs/prometheus.py new file mode 100644 index 0000000..eeec4d3 --- /dev/null +++ b/gantry/tests/defs/prometheus.py @@ -0,0 +1,12 @@ +# flake8: noqa +# fmt: off + +QUERY_DICT = query={"metric": "kube_pod_annotations","filters": {"annotation_gitlab_ci_job_id": 1}} +QUERY_STR = "rate(container_cpu_usage_seconds_total{pod='1', container='build'}[90s])" + +# encoded versions of the above that were put through the original version of process_query +ENCODED_QUERY_DICT = "kube_pod_annotations%7Bannotation_gitlab_ci_job_id%3D%221%22%7D" +ENCODED_QUERY_STR = "rate%28container_cpu_usage_seconds_total%7Bpod%3D%271%27%2C%20container%3D%27build%27%7D%5B90s%5D%29" + +# this will not be parsed as a query +INVALID_QUERY = 1 diff --git a/gantry/tests/sql/insert_job.sql b/gantry/tests/sql/insert_job.sql new file mode 100644 index 0000000..3008da6 --- /dev/null +++ b/gantry/tests/sql/insert_job.sql @@ -0,0 +1 @@ +INSERT INTO jobs VALUES(1,'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z',2,1706117046,1706118420,9892514,'success','pr42264_bugfix/mathomp4/hdf5-appleclang15','gmsh','4.8.4','{"alglib": true, "cairo": false, "cgns": true, "compression": true, "eigen": false, "external": false, "fltk": true, "gmp": true, "hdf5": false, "ipo": false, "med": true, "metis": true, "mmg": true, "mpi": true, "netgen": true, "oce": true, "opencascade": false, "openmp": false, "petsc": false, "privateapi": false, "shared": true, "slepc": false, "tetgen": true, "voropp": true, "build_system": "cmake", "build_type": "Release", "generator": "make"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',16,0.75,NULL,4.12532286694540495,3.15805864677520409,11.6038107294648877,0.248374361826775191,3.34888880339475214,2000000000.0,48000000000.0,1649868862.72588062,999763968.0,5679742976.0,2785280.0,1378705563.21018671); \ No newline at end of file diff --git a/gantry/tests/sql/insert_node.sql b/gantry/tests/sql/insert_node.sql new file mode 100644 index 0000000..cad50ee --- /dev/null +++ b/gantry/tests/sql/insert_node.sql @@ -0,0 +1,2 @@ +--- primary key is set to 2 to set up the test that checks for race conditions +INSERT INTO nodes VALUES(2,'ec253b04-b1dc-f08b-acac-e23df83b3602','ip-192-168-86-107.ec2.internal',24.0,196608000000.0,'amd64','linux','i3en.6xlarge'); \ No newline at end of file diff --git a/gantry/tests/test_collection.py b/gantry/tests/test_collection.py new file mode 100644 index 0000000..7e35d11 --- /dev/null +++ b/gantry/tests/test_collection.py @@ -0,0 +1,145 @@ +import pytest + +from gantry.clients.gitlab import GitlabClient +from gantry.clients.prometheus import PrometheusClient +from gantry.routes.collection import fetch_job, fetch_node +from gantry.tests.defs import collection as defs + +# mapping of prometheus request shortcuts +# to raw values that would be returned by resp.json() + +# note: the ordering of this dict indicated the order of the calls +# if the order in which PrometheusClient._query is called changes, +# this dict must be updated +PROMETHEUS_REQS = { + "job_annotations": defs.VALID_ANNOTATIONS, + "job_resources": defs.VALID_RESOURCE_REQUESTS, + "job_limits": defs.VALID_RESOURCE_LIMITS, + "job_memory_usage": defs.VALID_MEMORY_USAGE, + "job_cpu_usage": defs.VALID_CPU_USAGE, + "node_info": defs.VALID_NODE_INFO, + "node_labels": defs.VALID_NODE_LABELS, +} + + +@pytest.fixture +async def gitlab(mocker): + """Returns GitlabClient with some default (mocked) behavior""" + + # mock the request to the gitlab api + # default is to return normal log that wouldn't be detected as a ghost job + mocker.patch.object(GitlabClient, "_request", return_value=defs.VALID_JOB_LOG) + return GitlabClient("", "") + + +@pytest.fixture +async def prometheus(mocker): + """Returns PrometheusClient with some default (mocked) behavior""" + + # use dict value iterable to mock multiple calls + mocker.patch.object( + PrometheusClient, "_query", side_effect=PROMETHEUS_REQS.values() + ) + return PrometheusClient("", "") + + +@pytest.mark.parametrize( + "key, value", + [ + ("build_status", defs.INVALID_JOB_STATUS), + ("build_name", defs.INVALID_JOB_NAME), + ("runner", defs.INVALID_RUNNER), + ], +) +async def test_invalid_gitlab_fields(db_conn, gitlab, prometheus, key, value): + """Tests behavior when invalid data from Gitlab is passed to fetch_job""" + payload = defs.VALID_JOB.copy() + payload[key] = value + + assert await fetch_job(payload, db_conn, gitlab, prometheus) is None + + +async def test_job_exists(db_conn): + """ + Tests that fetch_job returns None when the job already exists in the database. + The return value of fetch_job is only used to indicate when a job is inserted, + not if it's found in the database. + """ + # node must be inserted before job to avoid foreign key constraint + with open("gantry/tests/sql/insert_node.sql") as f: + await db_conn.executescript(f.read()) + with open("gantry/tests/sql/insert_job.sql") as f: + await db_conn.executescript(f.read()) + + assert await fetch_job(defs.VALID_JOB, db_conn, None, None) is None + + +async def test_ghost_job(db_conn, gitlab, mocker): + """Tests that a ghost job is detected""" + + mocker.patch.object(gitlab, "_request", return_value=defs.GHOST_JOB_LOG) + assert await fetch_job(defs.VALID_JOB, db_conn, gitlab, None) is None + + +@pytest.mark.parametrize( + "req", + [ + "job_annotations", + "job_resources", + "job_limits", + "job_memory_usage", + "job_cpu_usage", + "node_info", + "node_labels", + ], +) +async def test_missing_data(db_conn, gitlab, prometheus, req): + """Tests behavior when Prometheus data is missing for certain requests""" + + p = PROMETHEUS_REQS.copy() + # for each req in PROMETHEUS_REQS, set it to an empty dict + p[req] = {} + prometheus._query.side_effect = p.values() + assert await fetch_job(defs.VALID_JOB, db_conn, gitlab, prometheus) is None + + +async def test_invalid_usage(db_conn, gitlab, prometheus): + """Test that when resource usage is invalid (eg mean=0), the job is not inserted""" + + p = PROMETHEUS_REQS.copy() + # could also be cpu usage + p["job_memory_usage"] = defs.INVALID_MEMORY_USAGE + prometheus._query.side_effect = p.values() + assert await fetch_job(defs.VALID_JOB, db_conn, gitlab, prometheus) is None + + +async def test_job_node_inserted(db_conn, gitlab, prometheus): + """Tests that the job and node are in the database after calling fetch_node""" + + await fetch_job(defs.VALID_JOB, db_conn, gitlab, prometheus) + # as the first records in the database, the ids should be 1 + async with db_conn.execute("SELECT * FROM jobs WHERE id=?", (1,)) as cursor: + job = await cursor.fetchone() + async with db_conn.execute("SELECT * FROM nodes WHERE id=?", (1,)) as cursor: + node = await cursor.fetchone() + assert job == defs.INSERTED_JOB + assert node == defs.INSERTED_NODE + + +async def test_node_exists(db_conn, prometheus): + """Tests that fetch_node returns the existing node id when the node + is already in the database""" + + # when fetch_node is called, only two prometheus requests are made + # (see comment above PROMETHEUS_REQS) + prometheus._query.side_effect = [ + PROMETHEUS_REQS["node_info"], + PROMETHEUS_REQS["node_labels"], + ] + + # in the inserted row, the node id is 2 because if the fetch_node call + # inserts a new node, the id would be set to 1 + with open("gantry/tests/sql/insert_node.sql") as f: + await db_conn.executescript(f.read()) + + assert await fetch_node(db_conn, prometheus, None, None) == 2 diff --git a/gantry/tests/test_db.py b/gantry/tests/test_db.py new file mode 100644 index 0000000..8d265c5 --- /dev/null +++ b/gantry/tests/test_db.py @@ -0,0 +1,30 @@ +from gantry.db.insert import insert_job, insert_node +from gantry.tests.defs import db as defs + + +async def test_node_insert_race(db_conn): + """ + Tests the situation where two identical jobs are inserted around the same time. + The first call should insert the node and the second should return the id + of the first. + """ + # the id of this row is 1 + with open("gantry/tests/sql/insert_node.sql") as f: + await db_conn.executescript(f.read()) + + # the id of NODE_INSERT_DICT is 2, but the output should be 1 + assert await insert_node(db_conn, defs.NODE_INSERT_DICT) == 2 + + +async def test_insert_node_incomplete(db_conn): + """ + Tests that when missing data is passed to the insert_node function, it returns None. + Issues around using lastrowid to get the id of the inserted row were returning + old inserted rows, so this ensures that the function returns None when it should. + """ + assert await insert_node(db_conn, {"uuid": None}) is None + + +async def test_insert_job_incomplete(db_conn): + """See test_insert_node_incomplete""" + assert await insert_job(db_conn, {"node": None}) is None diff --git a/gantry/tests/test_prometheus.py b/gantry/tests/test_prometheus.py new file mode 100644 index 0000000..ecf82e2 --- /dev/null +++ b/gantry/tests/test_prometheus.py @@ -0,0 +1,19 @@ +import pytest + +from gantry.clients.prometheus import util +from gantry.clients.prometheus.prometheus import PrometheusClient +from gantry.tests.defs import prometheus as defs + + +def test_cookie_set(): + """Test that a cookie is set when specified""" + p = PrometheusClient("", "cookie") + assert p.cookies == {"_oauth2_proxy": "cookie"} + + +def test_process_query(): + """Test that a query is parsed and encoded properly, from both dict and string""" + assert util.process_query(defs.QUERY_DICT) == defs.ENCODED_QUERY_DICT + assert util.process_query(defs.QUERY_STR) == defs.ENCODED_QUERY_STR + with pytest.raises(ValueError): + util.process_query(defs.INVALID_QUERY) diff --git a/gantry/tests/test_spec.py b/gantry/tests/test_spec.py new file mode 100644 index 0000000..b91ca86 --- /dev/null +++ b/gantry/tests/test_spec.py @@ -0,0 +1,21 @@ +from gantry.util.spec import spec_variants + + +def test_valid_spec(): + """Test a valid spec string to be parsed""" + assert spec_variants( + "+adios2~advanced_debug patches=02253c7,acb3805,b724e6a use_vtkm=on" + ) == { + "adios2": True, + "advanced_debug": False, + "patches": ["02253c7", "acb3805", "b724e6a"], + "use_vtkm": "on", + } + + +def test_invalid_spec(): + """Test some invalid specs""" + assert spec_variants("fefj!@#$%^&eifejifeifeij---5893843$%^&*()") == {} + assert spec_variants("fifheife") == {} + assert spec_variants("++++++") == {} + assert spec_variants("+~~++") == {} diff --git a/pyproject.toml b/pyproject.toml index 4620ee1..fe739ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,3 +2,11 @@ profile = "black" skip_gitignore = true color_output = true + +[tool.pytest.ini_options] +asyncio_mode = "auto" + +[tool.coverage.run] +include = [ + "gantry/*" +] diff --git a/spack.yaml b/spack.yaml index 44863c0..a36b502 100644 --- a/spack.yaml +++ b/spack.yaml @@ -3,7 +3,9 @@ spack: - python - py-aiohttp - py-pytest - - py-pytest-asyncio + - py-pytest-aiohttp + - py-pytest-mock + - py-coverage - py-flake8 - py-black - py-isort