forked from tym-xqo/pg_dba_metrics
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add Alert script - yaml config file for setting check and threshold per metric - automatically updates status on failure, so won't alert again until clear - added args for invoking Scheduler and quieting alerts
- Loading branch information
Showing
29 changed files
with
570 additions
and
290 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,7 @@ | ||
DATABASE_URL=postgres://localhost:5432/yardstick | ||
STORE_DB_URL=postgres://localhost:5432/metrics | ||
METRIC_ENV=development | ||
DATABASE_URL=postgres://some_user@localhost:5432/cool_facts | ||
STORE_DB_URL=postgres://dba_user@localhost:5432/metrics | ||
SLACK_TOKEN=xxxx-0000000 | ||
CHANNEL=AB123456 | ||
HOSTNAME=localhost |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -112,4 +112,6 @@ venv.bak/ | |
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
.pyre/ | ||
|
||
query_bak/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
#pg_dba_metrics | ||
# pg_dba_metrics | ||
|
||
Simple python app that executes arbitrary queries against a database, and returns results as timestamped JSON suitable for insertion to a time series table, and checking results against configurable thresholds for alerting via Slackbot. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
- check: test_data_point | ||
name: test_metric | ||
status: clear | ||
threshold: 0 | ||
- check: foo | ||
name: test | ||
status: clear | ||
threshold: 5 | ||
- check: duration | ||
name: long-connection | ||
status: clear | ||
threshold: 600 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 - | ||
import os | ||
from itertools import cycle | ||
|
||
import yaml | ||
from dotenv import find_dotenv, load_dotenv | ||
from slack_post import slack_post | ||
|
||
override = False | ||
if os.getenv("METRIC_ENV", "development") == "development": | ||
override = True | ||
|
||
load_dotenv(find_dotenv(), override=override) | ||
|
||
HOSTNAME = os.getenv("HOSTNAME", "localhost") | ||
|
||
CONFIG = yaml.safe_load(open("config.yaml", "r")) | ||
|
||
|
||
def send_alert(metric, value): | ||
# TODO: include raw metric JSON as attachment | ||
name = metric["name"] | ||
check = metric["check"] | ||
threshold = metric["threshold"] | ||
status = metric["status"] | ||
full_metric = yaml.safe_dump(metric) | ||
|
||
title = f"{HOSTNAME} {status}" | ||
|
||
message = ( | ||
f"Metric *{name}* {check} is {value}\nThreshold is {threshold}\n" | ||
f"```{full_metric}```" | ||
) | ||
|
||
color = "good" | ||
if status == "failure": | ||
color = "danger" | ||
|
||
alert = slack_post(title=title, message=message, color=color) | ||
return alert | ||
|
||
|
||
def update_config(metric): | ||
name = metric["name"] | ||
config_match = list(filter(lambda m: m["name"] != name, CONFIG)) | ||
metric_config = { | ||
key: metric[key] for key in ("name", "check", "threshold", "status") | ||
} | ||
new_config = [*config_match, metric_config] | ||
with open("config.yaml", "w") as config_file: | ||
config_file.write(yaml.safe_dump(new_config)) | ||
|
||
|
||
def swap_status(status): | ||
opts = cycle(["clear", "failure"]) | ||
new_status = next(opts) | ||
if status == new_status: | ||
new_status = next(opts) | ||
return new_status | ||
|
||
|
||
def check_metric(metric): | ||
# TODO: Support failure modes other than `> threshold` | ||
data = metric["data"] | ||
status = metric["status"] | ||
check = metric["check"] | ||
threshold = metric["threshold"] | ||
alert = None | ||
|
||
for row in data: | ||
value = row[check] | ||
test = value >= threshold | ||
if status == "failure": | ||
test = value < threshold | ||
if test: | ||
metric["status"] = swap_status(status) | ||
alert = send_alert(metric, value) | ||
update_config(metric) | ||
return alert | ||
|
||
|
||
def alert_check(metric): | ||
name = metric["name"] | ||
if any(m["name"] == name for m in CONFIG): | ||
config_match = list(filter(lambda m: m["name"] == name, CONFIG))[0] | ||
metric = {**config_match, **metric} | ||
alert = check_metric(metric) | ||
return alert | ||
else: | ||
pass | ||
|
||
|
||
if __name__ == "__main__": | ||
metric = {"data": [{"test_data_point": -1}], "name": "test_metric"} | ||
alert_check(metric) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import argparse | ||
import glob | ||
import os | ||
|
||
from apscheduler.schedulers.blocking import BlockingScheduler | ||
from check import store_db, store_metric | ||
from dotenv import find_dotenv, load_dotenv | ||
|
||
override = False | ||
if os.getenv("METRIC_ENV", "development") == "development": | ||
override = True | ||
load_dotenv(find_dotenv(), override=override) | ||
|
||
INTERVAL = os.getenv("INTERVAL", 60) | ||
|
||
|
||
def get_metrics(as_json=False, quiet=False): | ||
queries = [name for name in glob.glob("query_files/*")] | ||
metrics = [os.path.basename(name) for name in queries] | ||
for name in metrics: | ||
store_metric(name, as_json, quiet) | ||
|
||
|
||
def create_table(): | ||
"""Create table for storing metrics in target database if not present | ||
""" | ||
sql = ( | ||
"create table if not exists perf_metric( " | ||
"metric_id bigserial primary key " | ||
"stamp timestamp with time zone, " | ||
"payload jsonb, " | ||
"name text)" | ||
) | ||
store_db.query(sql) | ||
|
||
|
||
def schedule(as_json=False, quiet=False): | ||
scheduler = BlockingScheduler(timezone="UTC") | ||
scheduler.add_job(get_metrics, "interval", [as_json, quiet], seconds=INTERVAL) | ||
print("Press Ctrl+C to exit") | ||
|
||
# Execution will block here until Ctrl+C is pressed. | ||
try: | ||
scheduler.start() | ||
except (KeyboardInterrupt, SystemExit): | ||
pass | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("-j", "--json", action="store_true", default=False) | ||
parser.add_argument("-q", "--no-alerts", action="store_true", default=False) | ||
parser.add_argument("-s", "--schedule", action="store_true", default=False) | ||
args = parser.parse_args() | ||
if not args.json: | ||
create_table() | ||
if args.schedule: | ||
schedule(as_json=args.json, quiet=args.no_alerts) | ||
else: | ||
get_metrics(as_json=args.json, quiet=args.no_alerts) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import os | ||
|
||
import slack | ||
from dotenv import find_dotenv, load_dotenv | ||
|
||
override = False | ||
if os.getenv("METRIC_ENV", "development") == "development": | ||
override = True | ||
|
||
load_dotenv(find_dotenv(), override=override) | ||
|
||
TOKEN = os.getenv("SLACK_TOKEN") | ||
CHANNEL = os.getenv("CHANNEL") | ||
HOSTNAME = os.getenv("HOSTNAME") | ||
|
||
client = slack.WebClient(TOKEN) | ||
|
||
|
||
def slack_post(title="Test", message="Hello world!", color="#999999"): | ||
attach = dict(fallback=message, title=title, text=message, color=color) | ||
r = client.chat_postMessage( | ||
channel=CHANNEL, attachments=[attach], username=f"{HOSTNAME} DBA alert" | ||
) | ||
return r | ||
|
||
|
||
if __name__ == "__main__": | ||
slack_post(message="Something blah blah") |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.