Skip to content

Commit

Permalink
Add cost per job metrics
Browse files Browse the repository at this point in the history
Closes #75

Computes and stores the following metrics:

- cpu_cost: cost of using CPU resources on the node, based on the CPU request of the job
- mem_cost
- cpu_penalty: penalty factor that represents the over or under allocation of CPU resources
- mem_penalty
  • Loading branch information
cmelone committed Oct 10, 2024
1 parent 25cadab commit b3c5b7b
Show file tree
Hide file tree
Showing 11 changed files with 216 additions and 11 deletions.
3 changes: 3 additions & 0 deletions gantry/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ async def apply_migrations(db: aiosqlite.Connection):
# and not inadvertently added to the migrations folder
("001_initial.sql", 1),
("002_spec_index.sql", 2),
("003_job_cost.sql", 3),
]

# apply migrations that have not been applied
Expand All @@ -45,6 +46,8 @@ async def apply_migrations(db: aiosqlite.Connection):
async def init_db(app: web.Application):
db = await aiosqlite.connect(os.environ["DB_FILE"])
await apply_migrations(db)
# ensure foreign key constraints are enabled
await db.execute("PRAGMA foreign_keys = ON")
app["db"] = db
yield
await db.close()
Expand Down
112 changes: 112 additions & 0 deletions gantry/clients/prometheus/job.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json

import aiosqlite

from gantry.clients.prometheus import util
from gantry.util.spec import spec_variants

Expand Down Expand Up @@ -152,3 +154,113 @@ async def get_usage(self, pod: str, start: float, end: float) -> dict:
"mem_min": mem_usage["min"],
"mem_stddev": mem_usage["stddev"],
}

async def get_costs(
self,
db: aiosqlite.Connection,
resources: dict,
usage: dict,
start: float,
end: float,
node_id: int,
) -> dict:
"""
Calculates the costs associated with a job.
Objectives:
- we want to measure the cost of a job's submission and execution
- measure efficiency of resource usage to discourage wasted cycles
The cost should be independent of other activity on the node in order
to be comparable against other jobs.
To normalize the cost of resources within instance types, we calculate
the cost of each CPU and memory unit in the node during the lifetime
of the job.
Rather than using real usage as a factor in the cost, we use the requests,
as they block other jobs from using resources. In this case, jobs will be
incentivized to make lower requests, while also factoring in the runtime.
To account for instances where jobs do not use their requested resources (+/-),
we compute a penalty factor that can be used to understand the cost imposed
on the rest of the node, or jobs that could have been scheduled on the machine.
Job cost and the penalties are stored separately for each resource to allow for
flexibility. When analyzing these costs, instance type should be factored in,
as the cost per job is influence by the cost per resource, which will vary.
args:
db: a database connection
resources: job requests and limits
usage: job memory and cpu usage
start: job start time
end: job end time
node_id: the node that the job ran on
returns:
dict of: cpu_cost, mem_cost, cpu_penalty, mem_penalty
"""
costs = {}
async with db.execute(
"""
select capacity_type, instance_type, zone, cores, mem
from nodes where id = ?
""",
(node_id,),
) as cursor:
node = await cursor.fetchone()

if not node:
# this is a temporary condition that will happen during the transition
# to collecting
raise util.IncompleteData(
f"node instance metadata is missing from db. node={node_id}"
)

capacity_type, instance_type, zone, cores, mem = node

# spot instance prices can change, so we avg the cost over the job's runtime
instance_costs = await self.client.query_range(
query={
"metric": "karpenter_cloudprovider_instance_type_offering_price_estimate", # noqa: E501
"filters": {
"capacity_type": capacity_type,
"instance_type": instance_type,
"zone": zone,
},
},
start=start,
end=end,
)

if not instance_costs:
raise util.IncompleteData(f"node cost is missing. node={node_id}")

instance_costs = [float(value) for _, value in instance_costs[0]["values"]]
# average hourly cost of the instance over the job's lifetime
instance_cost = sum(instance_costs) / len(instance_costs)
# compute cost relative to duration of the job (in seconds)
node_cost = instance_cost * ((end - start) / 60 / 60)

# we assume that the cost of the node is split evenly between cpu and memory
# cost of each CPU in the node during the lifetime of the job
cost_per_cpu = (node_cost * 0.5) / cores
# cost of each unit of memory (byte)
cost_per_mem = (node_cost * 0.5) / mem
# these utilization ratios determine the extent to which resources
# were misallocated for this job
cpu_util_ratio = usage["cpu_mean"] / resources["cpu_request"]
mem_util_ratio = usage["mem_mean"] / resources["mem_request"]

# compute separate costs for cpu and memory usage
# using requests instead of actual usage because requests are *guaranteed*
costs["cpu_cost"] = cost_per_cpu * resources["cpu_request"]
costs["mem_cost"] = cost_per_mem * resources["mem_request"]
# penalty factors meant to discourage underallocation, which slows down jobs
# or overallocation, which prevents jobs from being scheduled on the same node

costs["cpu_penalty"] = max(1 / cpu_util_ratio, cpu_util_ratio)
costs["mem_penalty"] = max(1 / mem_util_ratio, mem_util_ratio)

return costs
2 changes: 2 additions & 0 deletions gantry/clients/prometheus/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,6 @@ async def get_labels(self, hostname: str, time: float) -> dict:
"arch": labels["label_kubernetes_io_arch"],
"os": labels["label_kubernetes_io_os"],
"instance_type": labels["label_node_kubernetes_io_instance_type"],
"capacity_type": labels["label_karpenter_sh_capacity_type"],
"zone": labels["label_topology_kubernetes_io_zone"],
}
7 changes: 7 additions & 0 deletions gantry/routes/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ async def fetch_job(
)
usage = await prometheus.job.get_usage(annotations["pod"], job.start, job.end)
node_id = await fetch_node(db_conn, prometheus, node_hostname, job.midpoint)
costs = await prometheus.job.get_costs(
db_conn, resources, usage, job.start, job.end, node_id
)

except IncompleteData as e:
# missing data, skip this job
logger.error(f"{e} job={job.gl_id}")
Expand All @@ -87,6 +91,7 @@ async def fetch_job(
**annotations,
**resources,
**usage,
**costs,
},
)

Expand Down Expand Up @@ -133,5 +138,7 @@ async def fetch_node(
"arch": node_labels["arch"],
"os": node_labels["os"],
"instance_type": node_labels["instance_type"],
"capacity_type": node_labels["capacity_type"],
"zone": node_labels["zone"],
},
)
5 changes: 3 additions & 2 deletions gantry/tests/defs/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@

# used to compare successful insertions
# run SELECT * FROM table_name WHERE id = 1; from python sqlite api and grab fetchone() result
INSERTED_JOB = (1, 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 1, 1706117046, 1706118420, 9892514, 'success', 'pr42264_bugfix/mathomp4/hdf5-appleclang15', 'gmsh', '4.8.4', '{"alglib": true, "cairo": false, "cgns": true, "compression": true, "eigen": false, "external": false, "fltk": true, "gmp": true, "hdf5": false, "ipo": false, "med": true, "metis": true, "mmg": true, "mpi": true, "netgen": true, "oce": true, "opencascade": false, "openmp": false, "petsc": false, "privateapi": false, "shared": true, "slepc": false, "tetgen": true, "voropp": true, "build_system": "cmake", "build_type": "Release", "generator": "make"}', 'gcc', '11.4.0', 'linux-ubuntu20.04-x86_64_v3', 'e4s', 16, 0.75, None, 1.899768349523097, 0.2971597591741076, 4.128116379389054, 0.2483743618267752, 1.7602635378120381, 2000000000.0, 48000000000.0, 143698407.6190476, 2785280.0, 594620416.0, 2785280.0, 252073065.82263485)
INSERTED_NODE = (1, 'ec253b04-b1dc-f08b-acac-e23df83b3602', 'ip-192-168-86-107.ec2.internal', 24.0, 196608000000.0, 'amd64', 'linux', 'i3en.6xlarge')
INSERTED_JOB = (1, 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 1, 1706117046, 1706118420, 9892514, 'success', 'pr42264_bugfix/mathomp4/hdf5-appleclang15', 'gmsh', '4.8.4', '{"alglib": true, "cairo": false, "cgns": true, "compression": true, "eigen": false, "external": false, "fltk": true, "gmp": true, "hdf5": false, "ipo": false, "med": true, "metis": true, "mmg": true, "mpi": true, "netgen": true, "oce": true, "opencascade": false, "openmp": false, "petsc": false, "privateapi": false, "shared": true, "slepc": false, "tetgen": true, "voropp": true, "build_system": "cmake", "build_type": "Release", "generator": "make"}', 'gcc', '11.4.0', 'linux-ubuntu20.04-x86_64_v3', 'e4s', 16, 0.75, None, 1.899768349523097, 0.2971597591741076, 4.128116379389054, 0.2483743618267752, 1.7602635378120381, 2000000000.0, 48000000000.0, 143698407.6190476, 2785280.0, 594620416.0, 2785280.0, 252073065.82263485, 0.002981770833333333, 0.0009706285264756945, 2.533024466030796, 13.918038711341255)
INSERTED_NODE = (1, 'ec253b04-b1dc-f08b-acac-e23df83b3602', 'ip-192-168-86-107.ec2.internal', 24.0, 196608000000.0, 'amd64', 'linux', 'i3en.6xlarge', 'us-east-1c', 'spot')

# these were obtained by executing the respective queries to Prometheus and capturing the JSON output
# or the raw output of PrometheusClient._query
Expand All @@ -32,6 +32,7 @@
VALID_CPU_USAGE = {'status': 'success', 'data': {'resultType': 'matrix', 'result': [{'metric': {'container': 'build', 'cpu': 'total', 'endpoint': 'https-metrics', 'id': '/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podd7aa13e0_998c_4f21_b1d6_62781f4980b0.slice/cri-containerd-48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1.scope', 'image': 'ghcr.io/spack/ubuntu20.04-runner-amd64-gcc-11.4:2023.08.01', 'instance': '192.168.86.107:10250', 'job': 'kubelet', 'metrics_path': '/metrics/cadvisor', 'name': '48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'service': 'kube-prometheus-stack-kubelet'}, 'values': [[1706117145, '0.2483743618267752'], [1706117146, '0.25650526138466395'], [1706117147, '0.26463616094255266'], [1706117148, '0.2727670605004414'], [1706117149, '0.28089796005833007'], [1706117150, '0.2890288596162188'], [1706117151, '0.2971597591741076'], [1706117357, '3.7319005481816236'], [1706117358, '3.7319005481816236'], [1706117359, '3.7319005481816236'], [1706117360, '3.7319005481816245'], [1706117361, '3.7319005481816245'], [1706118420, '4.128116379389054']]}]}}
VALID_NODE_INFO = {'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'__name__': 'kube_node_info', 'container': 'kube-state-metrics', 'container_runtime_version': 'containerd://1.7.2', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'internal_ip': '192.168.86.107', 'job': 'kube-state-metrics', 'kernel_version': '5.10.205-195.804.amzn2.x86_64', 'kubelet_version': 'v1.27.9-eks-5e0fdde', 'kubeproxy_version': 'v1.27.9-eks-5e0fdde', 'namespace': 'monitoring', 'node': 'ip-192-168-86-107.ec2.internal', 'os_image': 'Amazon Linux 2', 'pod': 'kube-prometheus-stack-kube-state-metrics-dbd66d8c7-6ftw8', 'provider_id': 'aws:///us-east-1c/i-0fe9d9c99fdb3631d', 'service': 'kube-prometheus-stack-kube-state-metrics', 'system_uuid': 'ec253b04-b1dc-f08b-acac-e23df83b3602'}, 'value': [1706117733, '1']}]}}
VALID_NODE_LABELS = {'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'__name__': 'kube_node_labels', 'container': 'kube-state-metrics', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'job': 'kube-state-metrics', 'label_beta_kubernetes_io_arch': 'amd64', 'label_beta_kubernetes_io_instance_type': 'i3en.6xlarge', 'label_beta_kubernetes_io_os': 'linux', 'label_failure_domain_beta_kubernetes_io_region': 'us-east-1', 'label_failure_domain_beta_kubernetes_io_zone': 'us-east-1c', 'label_k8s_io_cloud_provider_aws': 'ceb9f9cc8e47252a6f7fe7d6bded2655', 'label_karpenter_k8s_aws_instance_category': 'i', 'label_karpenter_k8s_aws_instance_cpu': '24', 'label_karpenter_k8s_aws_instance_encryption_in_transit_supported': 'true', 'label_karpenter_k8s_aws_instance_family': 'i3en', 'label_karpenter_k8s_aws_instance_generation': '3', 'label_karpenter_k8s_aws_instance_hypervisor': 'nitro', 'label_karpenter_k8s_aws_instance_local_nvme': '15000', 'label_karpenter_k8s_aws_instance_memory': '196608', 'label_karpenter_k8s_aws_instance_network_bandwidth': '25000', 'label_karpenter_k8s_aws_instance_pods': '234', 'label_karpenter_k8s_aws_instance_size': '6xlarge', 'label_karpenter_sh_capacity_type': 'spot', 'label_karpenter_sh_initialized': 'true', 'label_karpenter_sh_provisioner_name': 'glr-x86-64-v4', 'label_kubernetes_io_arch': 'amd64', 'label_kubernetes_io_hostname': 'ip-192-168-86-107.ec2.internal', 'label_kubernetes_io_os': 'linux', 'label_node_kubernetes_io_instance_type': 'i3en.6xlarge', 'label_spack_io_pipeline': 'true', 'label_spack_io_x86_64': 'v4', 'label_topology_ebs_csi_aws_com_zone': 'us-east-1c', 'label_topology_kubernetes_io_region': 'us-east-1', 'label_topology_kubernetes_io_zone': 'us-east-1c', 'namespace': 'monitoring', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'kube-prometheus-stack-kube-state-metrics-dbd66d8c7-6ftw8', 'service': 'kube-prometheus-stack-kube-state-metrics'}, 'value': [1706117733, '1']}]}}
VALID_NODE_COST = {'status': 'success', 'data': {'resultType': 'matrix', 'result': [{'metric': {'__name__': 'karpenter_cloudprovider_instance_type_offering_price_estimate', 'capacity_type': 'spot', 'container': 'controller', 'endpoint': 'http-metrics', 'instance': '192.168.240.113:8000', 'instance_type': 'i3en.6xlarge', 'job': 'karpenter', 'namespace': 'karpenter', 'pod': 'karpenter-8488f7f6dc-ml7q8', 'region': 'us-east-1', 'service': 'karpenter', 'zone': 'us-east-1c'}, 'values': [[1723838829, '0.5']]}]}}

# modified version of VALID_MEMORY_USAGE to make the mean/stddev 0
INVALID_MEMORY_USAGE = {'status': 'success', 'data': {'resultType': 'matrix', 'result': [{'metric': {'__name__': 'container_memory_working_set_bytes', 'container': 'build', 'endpoint': 'https-metrics', 'id': '/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podd7aa13e0_998c_4f21_b1d6_62781f4980b0.slice/cri-containerd-48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1.scope', 'image': 'ghcr.io/spack/ubuntu20.04-runner-amd64-gcc-11.4:2023.08.01', 'instance': '192.168.86.107:10250', 'job': 'kubelet', 'metrics_path': '/metrics/cadvisor', 'name': '48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'service': 'kube-prometheus-stack-kubelet'}, 'values': [[1706117115, '0']]}]}}
2 changes: 1 addition & 1 deletion gantry/tests/defs/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# fmt: off

# valid input into insert_node
NODE_INSERT_DICT = {"uuid": "ec253b04-b1dc-f08b-acac-e23df83b3602", "hostname": "ip-192-168-86-107.ec2.internal", "cores": 24.0, "mem": 196608000000.0, "arch": "amd64", "os": "linux", "instance_type": "i3en.6xlarge"}
NODE_INSERT_DICT = {"uuid": "ec253b04-b1dc-f08b-acac-e23df83b3602", "hostname": "ip-192-168-86-107.ec2.internal", "cores": 24.0, "mem": 196608000000.0, "arch": "amd64", "os": "linux", "instance_type": "i3en.6xlarge", "zone": "us-east-1c", "capacity_type": "spot"}
2 changes: 1 addition & 1 deletion gantry/tests/sql/insert_job.sql
Original file line number Diff line number Diff line change
@@ -1 +1 @@
INSERT INTO jobs VALUES(1,'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z',2,1706117046,1706118420,9892514,'success','pr42264_bugfix/mathomp4/hdf5-appleclang15','gmsh','4.8.4','{"alglib": true, "cairo": false, "cgns": true, "compression": true, "eigen": false, "external": false, "fltk": true, "gmp": true, "hdf5": false, "ipo": false, "med": true, "metis": true, "mmg": true, "mpi": true, "netgen": true, "oce": true, "opencascade": false, "openmp": false, "petsc": false, "privateapi": false, "shared": true, "slepc": false, "tetgen": true, "voropp": true, "build_system": "cmake", "build_type": "Release", "generator": "make"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',16,0.75,NULL,4.12532286694540495,3.15805864677520409,11.6038107294648877,0.248374361826775191,3.34888880339475214,2000000000.0,48000000000.0,1649868862.72588062,999763968.0,5679742976.0,2785280.0,1378705563.21018671);
INSERT INTO jobs VALUES(1,'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z',2,1706117046,1706118420,9892514,'success','pr42264_bugfix/mathomp4/hdf5-appleclang15','gmsh','4.8.4','{"alglib": true, "cairo": false, "cgns": true, "compression": true, "eigen": false, "external": false, "fltk": true, "gmp": true, "hdf5": false, "ipo": false, "med": true, "metis": true, "mmg": true, "mpi": true, "netgen": true, "oce": true, "opencascade": false, "openmp": false, "petsc": false, "privateapi": false, "shared": true, "slepc": false, "tetgen": true, "voropp": true, "build_system": "cmake", "build_type": "Release", "generator": "make"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',16,0.75,NULL,4.12532286694540495,3.15805864677520409,11.6038107294648877,0.248374361826775191,3.34888880339475214,2000000000.0,48000000000.0,1649868862.72588062,999763968.0,5679742976.0,2785280.0,1378705563.21018671,0.002981770833333333, 0.0009706285264756945, 2.533024466030796, 13.918038711341255);
2 changes: 1 addition & 1 deletion gantry/tests/sql/insert_node.sql
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
--- primary key is set to 2 to set up the test that checks for race conditions
INSERT INTO nodes VALUES(2,'ec253b04-b1dc-f08b-acac-e23df83b3602','ip-192-168-86-107.ec2.internal',24.0,196608000000.0,'amd64','linux','i3en.6xlarge');
INSERT INTO nodes VALUES(2,'ec253b04-b1dc-f08b-acac-e23df83b3602','ip-192-168-86-107.ec2.internal',24.0,196608000000.0,'amd64','linux','i3en.6xlarge','us-east-1c','spot');
Loading

0 comments on commit b3c5b7b

Please sign in to comment.