From 5078d540de0031485a72460eac56c7268cee2b52 Mon Sep 17 00:00:00 2001 From: Phil Budne <phil@regressive.org> Date: Sun, 22 Dec 2024 16:39:23 -0500 Subject: [PATCH] indexer/scripts/elastic-stats.py: get node cpu/loadavg from node.stats() --- indexer/scripts/elastic-stats.py | 51 ++++++++++++-------------------- 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/indexer/scripts/elastic-stats.py b/indexer/scripts/elastic-stats.py index 76c3f113..4083e7c6 100644 --- a/indexer/scripts/elastic-stats.py +++ b/indexer/scripts/elastic-stats.py @@ -111,6 +111,25 @@ def node_stats(self) -> None: labels=node_labels + [("name", breaker_name)], ) + os_data = node_data["os"] + cpu_data = os_data["cpu"] + + cpu_pct = cpu_data["percent"] + self.g("node.os.cpu.percent", cpu_pct, labels=node_labels) + + # report in old location too, for now + # (can be removed after one week in production) + self.g("cat.nodes.cpu", cpu_pct, labels=node_labels) + + lavg = cpu_data["load_average"] + for m in (1, 5, 15): + value = lavg[f"{m}m"] + self.g(f"node.os.cpu.load_average.{m}m", value, labels=node_labels) + + # report in old location too, for now + # (can be removed after one week in production) + self.g(f"cat.nodes.load_{m}m", value, labels=node_labels) + def cluster_health(self) -> None: es = self.elasticsearch_client() cluster_health = cast(Dict[str, Any], es.cluster.health().raw) @@ -149,33 +168,6 @@ def cluster_health(self) -> None: ]: self.g(f"cluster.health.pending_tasks.{short}", cluster_health[attr]) - def cat_nodes(self) -> None: - """ - Docs say: - - cat (Compact and aligned text) APIs are only intended for - human consumption using the Kibana console or command - line. They are not intended for use by applications. For - application consumption, we recommend using a corresponding - JSON API. - - BUT I can't find the CPU/loadvg data elsewere, and while CPU% and - loadavg info is available on a per-server basis (by server name) - not by stack name / realm. - """ - - es = self.elasticsearch_client() - nodes = cast(list[dict[str, Any]], es.cat.nodes(format="json").raw) - keys = ["cpu", "load_1m", "load_5m", "load_15m"] - for node in nodes: - name = node["name"].split(".")[0] - labels = [("node", name)] - for key in keys: - try: - self.g(f"cat.nodes.{key}", node[key], labels=labels) - except KeyError: - pass - def main_loop(self) -> None: while True: try: @@ -193,11 +185,6 @@ def main_loop(self) -> None: except (ConnectionError, ConnectionTimeout, KeyError) as e: logger.warning("cluster.health: %r", e) - try: - self.cat_nodes() - except (ConnectionError, ConnectionTimeout, KeyError) as e: - logger.warning("cat.nodes: %r", e) - # sleep until top of next period: self.interval_sleep()