Skip to content

Commit

Permalink
Add monitor features
Browse files Browse the repository at this point in the history
  • Loading branch information
idrissneumann committed Sep 11, 2024
1 parent 4131c96 commit 5731e2c
Show file tree
Hide file tree
Showing 8 changed files with 186 additions and 1 deletion.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,26 @@ service:
exporters: [logging]
```

## Monitor features

Imalive is also able to check some http endpoint and log and export metrics (status and duration).

In order to use that, just override the `/app/imalive.yml` with the following content:

```yaml
---
monitors:
- type: http
name: imalive
url: http://localhost:8081
method: GET # optional (GET by default, only POST and GET are supported)
expected_http_code: 200 # optional (200 by default)
expected_contain: "\"status\":\"ok\"" # optional (no check on the body response if not present)
timeout: 30 # optional (30 seconds if not present)
username: changeit # optional (no basic auth if not present)
password: changerit # optional (no basic auth if not present)
```

## Development / contributions

Go see this [documentation](./CONTRIBUTING.md)
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.6.6
4.0.0
2 changes: 2 additions & 0 deletions imalive.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
---
monitors: {}
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ opentelemetry-api
opentelemetry-sdk
opentelemetry-instrumentation-fastapi
opentelemetry-exporter-otlp
pyyaml
2 changes: 2 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from utils.cid import get_current_cid
from utils.manifests import get_manifest_as_dict
from utils.heartbit import heartbit
from utils.monitor import monitors
from utils.otel import init_otel_tracer, init_otel_metrics, init_otel_logger

version = "unkown"
Expand Down Expand Up @@ -41,6 +42,7 @@
init_otel_logger()

heartbit()
monitors()

instrumentator.instrument(app, metric_namespace='imalive', metric_subsystem='imalive')
instrumentator.expose(app, endpoint='/v1/prom')
Expand Down
7 changes: 7 additions & 0 deletions src/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ def is_empty_key(vdict, key):
def is_not_empty_key(vdict, key):
return not is_empty_key(vdict, key)

def remove_key_safely(vdict, key):
if is_not_empty_key(vdict, key):
del vdict[key]

def get_or_else(vdict, key, default):
return default if is_empty_key(vdict, key) else vdict[key]

def is_numeric (var):
if (isinstance(var, int)):
return True
Expand Down
1 change: 1 addition & 0 deletions src/utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def all_metrics():
vdate = datetime.now()
return {
"status": "ok",
"type": "heartbit",
'name': os.environ['IMALIVE_NODE_NAME'],
'time': vdate.isoformat(),
"disk_usage": disk_usage(),
Expand Down
152 changes: 152 additions & 0 deletions src/utils/monitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import os
import yaml
import requests
import asyncio
import threading

import requests
import yaml

from datetime import datetime
from time import sleep
from requests.auth import HTTPBasicAuth

from utils.common import is_empty_key, get_or_else, is_not_empty, remove_key_safely
from utils.gauge import create_gauge, set_gauge
from utils.heartbit import WAIT_TIME
from utils.logger import log_msg
from utils.otel import get_otel_tracer

def check_http_monitor(monitor, gauges):
vdate = datetime.now()

if monitor['type'] != 'http':
log_msg("DEBUG", {
"status": "ok",
"type": "monitor",
"time": vdate.isoformat(),
"message": "Not an http monitor",
"monitor": monitor
})
set_gauge(gauges['result'], 0)
return

if is_empty_key(monitor, 'url'):
log_msg("ERROR", {
"status": "ko",
"type": "monitor",
"time": vdate.isoformat(),
"message": "Missing mandatory url",
"monitor": monitor
})
set_gauge(gauges['result'], 0)
return

method = get_or_else(monitor, 'method', 'GET')
timeout = get_or_else(monitor, 'timeout', 30)
expected_http_code = get_or_else(monitor, 'expected_http_code', 200)
expected_contain = get_or_else(monitor, 'expected_contain', None)
username = get_or_else(monitor, 'username', None)
password = get_or_else(monitor, 'password', None)
remove_key_safely(monitor, 'password')

auth = None
duration = None
if is_not_empty(username) and is_not_empty(password):
auth = HTTPBasicAuth(username, password)

try:
if method == "GET":
response = requests.get(monitor['url'], timeout=timeout, auth=auth)
duration = response.elapsed.total_seconds()
set_gauge(gauges['duration'], duration)
elif method == "POST":
response = requests.post(monitor['url'], timeout=timeout, auth=auth)
duration = response.elapsed.total_seconds()
set_gauge(gauges['duration'], duration)
else:
log_msg("ERROR", {
"status": "ko",
"type": "monitor",
"time": vdate.isoformat(),
"message": "Not supported http method",
"monitor": monitor
})
set_gauge(gauges['result'], 0)
return

if response.status_code != expected_http_code:
log_msg("ERROR", {
"status": "ko",
"type": "monitor",
"time": vdate.isoformat(),
"duration": duration,
"message": "Not supported http method",
"monitor": monitor
})
set_gauge(gauges['result'], 0)
return

if is_not_empty(expected_contain) and expected_contain not in response.text:
log_msg("ERROR", {
"status": "ko",
"type": "monitor",
"time": vdate.isoformat(),
"duration": duration,
"message": "Response not contain {}".format(expected_contain),
"monitor": monitor
})
set_gauge(gauges['result'], 0)
return

set_gauge(gauges['result'], 1)
log_msg("INFO", {
"status": "ok",
"type": "monitor",
"time": vdate.isoformat(),
"duration": duration,
"message": "Monitor is healthy",
"monitor": monitor
})

except Exception as e:
set_gauge(gauges['result'], 0)
log_msg("ERROR", {
"status": "ko",
"type": "monitor",
"time": vdate.isoformat(),
"message": "Unexpected error",
"error": "{}".format(e),
"monitor": monitor
})

gauges = {}
def monitors():
def loop_monitors():
config_path = os.path.realpath(os.path.join(os.path.dirname(__file__), '..', '..', 'imalive.yml'))
with open(config_path, "r") as stream:
loaded_data = yaml.safe_load(stream)
for monitor in loaded_data['monitors']:
if is_empty_key(monitor, 'name'):
continue

gauges[monitor['name']] = {
'result': create_gauge("monitor_{}_result".format(monitor['name']), "monitor {} result".format(monitor['name'])),
'duration': create_gauge("monitor_{}_duration".format(monitor['name']), "monitor {} duration".format(monitor['name']))
}

while True:
with get_otel_tracer().start_as_current_span("imalive-monitors"):
for monitor in loaded_data['monitors']:
if is_empty_key(monitor, 'name'):
continue
check_http_monitor(monitor, gauges[monitor['name']])
sleep(WAIT_TIME)

def start_monitors():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(loop_monitors())

async_thread = threading.Thread(target=start_monitors, daemon=True)
async_thread.start()

0 comments on commit 5731e2c

Please sign in to comment.