Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

indexer/scripts/elastic-stats.py: get node cpu/loadavg from node.stats() #362

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 19 additions & 32 deletions indexer/scripts/elastic-stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,25 @@ def node_stats(self) -> None:
labels=node_labels + [("name", breaker_name)],
)

os_data = node_data["os"]
cpu_data = os_data["cpu"]

cpu_pct = cpu_data["percent"]
self.g("node.os.cpu.percent", cpu_pct, labels=node_labels)

# report in old location too, for now
# (can be removed after one week in production)
self.g("cat.nodes.cpu", cpu_pct, labels=node_labels)

lavg = cpu_data["load_average"]
for m in (1, 5, 15):
value = lavg[f"{m}m"]
self.g(f"node.os.cpu.load_average.{m}m", value, labels=node_labels)

# report in old location too, for now
# (can be removed after one week in production)
self.g(f"cat.nodes.load_{m}m", value, labels=node_labels)

def cluster_health(self) -> None:
es = self.elasticsearch_client()
cluster_health = cast(Dict[str, Any], es.cluster.health().raw)
Expand Down Expand Up @@ -149,33 +168,6 @@ def cluster_health(self) -> None:
]:
self.g(f"cluster.health.pending_tasks.{short}", cluster_health[attr])

def cat_nodes(self) -> None:
"""
Docs say:

cat (Compact and aligned text) APIs are only intended for
human consumption using the Kibana console or command
line. They are not intended for use by applications. For
application consumption, we recommend using a corresponding
JSON API.

BUT I can't find the CPU/loadvg data elsewere, and while CPU% and
loadavg info is available on a per-server basis (by server name)
not by stack name / realm.
"""

es = self.elasticsearch_client()
nodes = cast(list[dict[str, Any]], es.cat.nodes(format="json").raw)
keys = ["cpu", "load_1m", "load_5m", "load_15m"]
for node in nodes:
name = node["name"].split(".")[0]
labels = [("node", name)]
for key in keys:
try:
self.g(f"cat.nodes.{key}", node[key], labels=labels)
except KeyError:
pass

def main_loop(self) -> None:
while True:
try:
Expand All @@ -193,11 +185,6 @@ def main_loop(self) -> None:
except (ConnectionError, ConnectionTimeout, KeyError) as e:
logger.warning("cluster.health: %r", e)

try:
self.cat_nodes()
except (ConnectionError, ConnectionTimeout, KeyError) as e:
logger.warning("cat.nodes: %r", e)

# sleep until top of next period:
self.interval_sleep()

Expand Down