Skip to content

Commit

Permalink
Implement Prometheus observability (#119)
Browse files Browse the repository at this point in the history
  • Loading branch information
No767 committed May 14, 2024
1 parent d05c403 commit 7018ff8
Show file tree
Hide file tree
Showing 9 changed files with 300 additions and 3 deletions.
2 changes: 2 additions & 0 deletions bot/cogs/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,7 @@ async def blocklist_add(
await tr.rollback()
await ctx.send("Unable to block user")
else:
self.bot.metrics.features.blocked_users.inc()
await tr.commit()
self.bot.blocklist.replace(blocklist)

Expand Down Expand Up @@ -680,6 +681,7 @@ async def blocklist_remove(self, ctx: GuildContext, entity: discord.Member) -> N
await tr.rollback()
await ctx.send("Unable to block user")
else:
self.bot.metrics.features.blocked_users.dec()
await tr.commit()
self.bot.blocklist.replace(blocklist)
await block_ticket.cog.soft_unlock_ticket(
Expand Down
122 changes: 122 additions & 0 deletions bot/cogs/ext/prometheus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from __future__ import annotations

import platform
from typing import TYPE_CHECKING

import discord
from discord.ext import commands, tasks

try:

from prometheus_async.aio.web import start_http_server
from prometheus_client import Counter, Enum, Gauge, Info, Summary
except ImportError:
raise RuntimeError(
"Prometheus libraries are required to be installed. "
"Either install those libraries or disable Prometheus extension"
)

if TYPE_CHECKING:
from bot.rodhaj import Rodhaj

METRIC_PREFIX = "discord_"


class FeatureCollector:
__slots__ = (
"bot",
"active_tickets",
"closed_tickets",
"locked_tickets",
"blocked_users",
)

def __init__(self, bot: Rodhaj):
self.bot = bot
self.active_tickets = Gauge(
f"{METRIC_PREFIX}active_tickets", "Amount of active tickets"
)
self.closed_tickets = Counter(
f"{METRIC_PREFIX}closed_tickets", "Number of closed tickets in this session"
)
self.locked_tickets = Gauge(
f"{METRIC_PREFIX}locked_tickets",
"Number of soft locked tickets in this session",
)
self.blocked_users = Gauge(
f"{METRIC_PREFIX}blocked_users", "Number of currently blocked users"
)


# Maybe load all of these from an json file next time
class Metrics:
__slots__ = ("bot", "connected", "latency", "commands", "version", "features")

def __init__(self, bot: Rodhaj):
self.bot = bot
self.connected = Enum(
f"{METRIC_PREFIX}connected",
"Connected to Discord",
["shard"],
states=["connected", "disconnected"],
)
self.latency = Gauge(f"{METRIC_PREFIX}latency", "Latency to Discord", ["shard"])
self.commands = Summary(f"{METRIC_PREFIX}commands", "Total commands executed")
self.version = Info(f"{METRIC_PREFIX}version", "Versions of the bot")
self.features = FeatureCollector(self.bot)

def get_commands(self) -> int:
total_commands = 0
for _ in self.bot.walk_commands():
# As some of the commands are parents,
# Grouped commands are also counted here
total_commands += 1

return total_commands

def fill(self) -> None:
self.version.info(
{
"build_version": self.bot.version,
"dpy_version": discord.__version__,
"python_version": platform.python_version(),
}
)
self.commands.observe(self.get_commands())

async def start(self, host: str, port: int) -> None:
await start_http_server(addr=host, port=port)


class Prometheus(commands.Cog):
"""Prometheus exporter extension for Rodhaj"""

def __init__(self, bot: Rodhaj):
self.bot = bot
self._connected_label = self.bot.metrics.connected.labels(None)

async def cog_load(self) -> None:
self.latency_loop.start()

async def cog_unload(self) -> None:
self.latency_loop.stop()

@tasks.loop(seconds=5)
async def latency_loop(self) -> None:
self.bot.metrics.latency.labels(None).set(self.bot.latency)

@commands.Cog.listener()
async def on_connect(self) -> None:
self._connected_label.state("connected")

@commands.Cog.listener()
async def on_resumed(self) -> None:
self._connected_label.state("connected")

@commands.Cog.listener()
async def on_disconnect(self) -> None:
self._connected_label.state("disconnected")


async def setup(bot: Rodhaj) -> None:
await bot.add_cog(Prometheus(bot))
5 changes: 5 additions & 0 deletions bot/cogs/tickets.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ async def lock_ticket(
async def soft_lock_ticket(
self, thread: discord.Thread, reason: Optional[str] = None
) -> discord.Thread:
self.bot.metrics.features.locked_tickets.inc()
tags = thread.applied_tags
locked_tag = self.get_locked_tag(thread.parent)

Expand All @@ -162,6 +163,7 @@ async def soft_lock_ticket(
async def soft_unlock_ticket(
self, thread: discord.Thread, reason: Optional[str] = None
) -> discord.Thread:
self.bot.metrics.features.locked_tickets.dec()
tags = thread.applied_tags
locked_tag = self.get_locked_tag(thread.parent)

Expand All @@ -176,6 +178,8 @@ async def close_ticket(
connection: Union[asyncpg.Pool, asyncpg.Connection],
author: Optional[Union[discord.User, discord.Member]] = None,
) -> Optional[discord.Thread]:
self.bot.metrics.features.closed_tickets.inc()
self.bot.metrics.features.active_tickets.dec()
if isinstance(user, int):
user = self.bot.get_user(user) or (await self.bot.fetch_user(user))

Expand Down Expand Up @@ -280,6 +284,7 @@ async def create_ticket(self, ticket: TicketThread) -> Optional[TicketOutput]:
status=False, ticket=created_ticket, msg="Could not create ticket"
)
else:
self.bot.metrics.features.active_tickets.inc()
await tr.commit()
return TicketOutput(
status=True,
Expand Down
13 changes: 13 additions & 0 deletions bot/rodhaj.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from aiohttp import ClientSession
from cogs import EXTENSIONS, VERSION
from cogs.config import Blocklist, GuildWebhookDispatcher
from cogs.ext.prometheus import Metrics
from discord.ext import commands
from libs.tickets.structs import PartialConfig, ReservedTags, StatusChecklist
from libs.tickets.utils import get_cached_thread, get_partial_ticket
Expand Down Expand Up @@ -56,6 +57,7 @@ def __init__(
self.blocklist = Blocklist(self)
self.default_prefix = "r>"
self.logger = logging.getLogger("rodhaj")
self.metrics = Metrics(self)
self.session = session
self.partial_config: Optional[PartialConfig] = None
self.pool = pool
Expand All @@ -65,6 +67,7 @@ def __init__(
)
self._dev_mode = config.rodhaj.get("dev_mode", False)
self._reloader = Reloader(self, Path(__file__).parent)
self._prometheus = config.rodhaj.get("prometheus", {})

### Ticket related utils
async def fetch_partial_config(self) -> Optional[PartialConfig]:
Expand Down Expand Up @@ -214,6 +217,16 @@ async def setup_hook(self) -> None:
await self.blocklist.load()
self.partial_config = await self.fetch_partial_config()

if self._prometheus.get("enabled", False):
await self.load_extension("cogs.ext.prometheus")
prom_host = self._prometheus.get("host", "127.0.0.1")
prom_port = self._prometheus.get("port", 8555)

await self.metrics.start(host=prom_host, port=prom_port)
self.logger.info("Prometheus Server started on %s:%s", prom_host, prom_port)

self.metrics.fill()

if self._dev_mode:
self.logger.info("Dev mode is enabled. Loading Reloader")
self._reloader.start()
Expand Down
23 changes: 22 additions & 1 deletion docs/dev-guide/intro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,25 @@ pre-built Docker Compose file is provided. Setup instructions are as follows:

.. code-block:: bash
docker compose -f docker-compose-dev.yml up -d
docker compose -f docker-compose-dev.yml up -d
Extensions
==========

Rodhaj includes the following extensions as noted:

Prometheus Exporter
^^^^^^^^^^^^^^^^^^^

Rodhaj currently includes an `Prometheus <https://prometheus.io/>`_ exporter.
This exporter is intended to be used in production environments, where
metrics surrounding ticket usage, bot health, and others would provide
valuable insight. This exporter can be enabled by setting the
``rodhaj.prometheus.enabled`` key within ``config.yml``.

.. note::

Prometheus client libraries are listed within the
``requirements.txt`` file. By default, these libraries
should be installed, but disabling the exporter will not
affect the usage of these libraries.
15 changes: 14 additions & 1 deletion docs/user-guide/features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,17 @@ Blocklist
This feature acts very similar to an block/unblock feature. All blocked users
as of writing will not get a message from the bot. Planned features with this feature
include an timer to automatically remove those who are on the blocklist and
an history feature to track past incidents.
an history feature to track past incidents.

Prometheus Extension
--------------------

In order to aid in observability, Rodhaj includes an `Prometheus <https://prometheus.io/>`_ exporter.
This is included as an extension to Rodhaj, which when used, provides valuable information
in regards to usage, and other metrics. This extension is designed primarily to be used in
production environments.

.. note::

Disabling this extension will have no effect
on the bot itself.
Loading

0 comments on commit 7018ff8

Please sign in to comment.