diff --git a/.gitignore b/.gitignore index 0c63512..9e523a9 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,8 @@ venv/ ENV/ env.bak/ venv.bak/ +.direnv/ +.envrc # mypy .mypy_cache/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..112d239 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,34 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +## 0.2.0 + +### 🚀 Features + +- Use the chat history to build messages compatible with the openai api [{role, content}]. +- Support for reply in thread. +- Improve conversation history management. +- Manage reply in conversation + improved albert messaging. +- Add a minimal system prompt in norag mode +- Improve albert command and response format. +- Add command aliases. +- Add a grist table for user management (implement an minimalistic async grist client) + +### 🐛 Bug Fixes + +- Pyalbert version for gemma-2 support +- Better error management + +### Refacto + +- Github actions (#72) +- Bot commands refactorization +- Add all bot custom messages in a dedicated AlbertMsg classe. + +### Scripts + +- Dump users state list +- Update users table from list +- Send error message demo + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..7eaf849 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,11 @@ + +Le code est linté et les imports trié avec [Ruff](https://docs.astral.sh/ruff/) : +```bash +ruff check --fix --select I . +``` + + +Ruff s'intégre dans la plupart des éditeurs de code. Vous pouvez automatiser le linter avec les _hooks_ de _pre-commit_ de git si vous préférez : +```bash +pre-commit install +`` diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a1387dc --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Etalab + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 3eb8cdd..9f7c1f9 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Le projet est un fork de [tchap_bot](https://code.peren.fr/open-source/tchapbot) Contient : - `app/.` : la codebase pour le Tchap bot Albert -- `app/matrix_bot` : une bibliothèque pour pouvoir faire des bots Matrix +- `app/matrix_bot` : une bibliothèque qui encapsule [matrix-nio](https://github.com/matrix-nio/matrix-nio) faire des bots Matrix ### Installation locale @@ -52,41 +52,14 @@ Créez le fichier d'environnement `app/.env` avec les informations de connexion cp app/.env.example app/.env ``` -Les variables d'environnement à renseigner sont les suivantes : +L'ensemble des variables d'environements disponibles est documenté dans le fichier suivant : [app/config.py](./app/config.py) -- `JOIN_ON_INVITE` : booléen facultatif pour activer ou non l'acceptation automatique des invitations dans les salons (exemple : `JOIN_ON_INVITE=True`. Par défaut, `False`) -- `SALT` : il est conseillé de changer la valeur du salt pour ne pas avoir celle par défaut. Il faudra en revanche qu'elle de change pas entre deux sessions. -- `MATRIX_HOME_SERVER` : l'URL du serveur Matrix à utiliser (exemple : `MATRIX_HOME_SERVER="https://matrix.agent.ministere_example.tchap.gouv.fr"`) -- `MATRIX_BOT_USERNAME` : le nom d'utilisateur du bot Matrix (exemple : `MATRIX_BOT_USERNAME="tchapbot@ministere_example.gouv.fr"`) -- `MATRIX_BOT_PASSWORD` : le mot de passe du bot Matrix -- `ERRORS_ROOM_ID` : l'identifiant du salon Tchap où les erreurs seront envoyées (exemple : `ERRORS_ROOM_ID="!roomid:matrix.agent.ministere_example.tchap.gouv.fr"`). **Attention** : le bot doit être invité dans ce salon pour pouvoir y envoyer ses messages d'erreur ! -Pour que le bot se connecte à l'API d'Albert, il faut également renseigner les variables suivantes : -- `USER_ALLOWED_DOMAINS` : liste des domaines d'email autorisés pour les utilisateurs Tchap pour qu'ils puissent interagir avec le bot (exemple : `USER_ALLOWED_DOMAINS='["ministere1.gouv.fr", "ministere2.gouv.fr"]'`. Par défaut : `["*"]` (tous les domaines sont autorisés)) -- `GROUPS_USED=['albert']` : permet, dans cet exemple, d'activer toutes les commandes qui font partie du groupe "albert" -- `ALBERT_API_URL` : l'url de l'API Albert à consommer -- `ALBERT_API_TOKEN` : le token API utilisé pour authoriser le bot a consommer l'API Albert. Pour plus d'informations, consultez la documentation de l'API Albert -- `ALBERT_MODEL_NAME` : le nom du modèle Albert à utiliser pour le bot (exemple : `ALBERT_MODEL_NAME='AgentPublic/albertlight-7b'`). Pour plus d'informations, consultez la documentation de l'API Albert et le [hub des modèles Albert de HuggingFace](https://huggingface.co/collections/AgentPublic/albert-662a1d95c93a47aca5cecc82) -- `ALBERT_MODE` : le mode d'Albert à utiliser pour le bot (exemple : `ALBERT_MODE='rag'`). Pour plus d'informations, consultez la documentation de l'API Albert -- `CONVERSATION_OBSOLESCENCE` : le temps en secondes après lequel une conversation se remet automatiquement à zéro (exemple : `CONVERSATION_OBSOLESCENCE=3600` pour une heure). Par défaut : `3600` (une heure) +### Lancer le bot - -### Utilisation en dehors de Docker - -Pour lancer le bot en dehors de Docker : -```bash -cd app -./.venv/bin/python3 . -``` - - -### Utilisation avec Docker - -1. Créez un fichier `.env` à la racine du projet avec les variables d'environnement mentionnées dans [app/.env.example](./app/.env.example) y compris celles mentionnées dans la section *"For docker-compose deployment"* - -2. Lancer le container du bot à la racine du projet : +Pour lancer le bot executez : ```bash -docker compose up --detach +python app ``` @@ -99,15 +72,7 @@ Le premier sync est assez long, et a priori non bloquant. Si vous avez une inter Le projet est en open source, sous [licence MIT](LICENSES/MIT.txt). Toutes les contributions sont bienvenues, sous forme de pull requests ou d'ouvertures d'issues sur le [repo officiel GitHub](https://github.com/etalab-ia/albert-tchapbot). -Avant de contribuer au dépôt, il est nécessaire d'initialiser les _hooks_ de _pre-commit_ : -```bash -pre-commit install -``` - -Si vous ne pouvez pas utiliser de pre-commit, il est nécessaire de formatter, linter et trier les imports avec [Ruff](https://docs.astral.sh/ruff/) : -```bash -ruff check --fix --select I . -``` +Pour commencer, consultez [CONTRIBUTING.md](CONTRIBUTING.md). ### Licence @@ -137,7 +102,7 @@ The project is a fork of [tchap_bot](https://code.peren.fr/open-source/tchapbot) Contains: - `app/.`: the codebase for the Albert Tchap bot -- `app/matrix_bot`: a library to be able to make Matrix bots +- `app/matrix_bot`: a library that wraps [matrix-nio](https://github.com/matrix-nio/matrix-nio) to make Matrix bots ### Local Installation @@ -165,40 +130,15 @@ Create the environment file `app/.env` with the connection information (or provi cp app/.env.example app/.env ``` -The following environment variables must be entered: - -- `JOIN_ON_INVITE`: optional boolean to enable or disable automatic acceptance of invitations to Tchap rooms (example: `JOIN_ON_INVITE=True`. Default: `False`). -- `SALT`: it is advisable to change the salt value to avoid having the default one. However, it must not change between sessions. -- `MATRIX_HOME_SERVER`: the URL of the Matrix server to be used (example: `MATRIX_HOME_SERVER=“https://matrix.agent.ministere_example.tchap.gouv.fr”`). -- `MATRIX_BOT_USERNAME`: the Matrix bot username (example: `MATRIX_BOT_USERNAME=“tchapbot@ministere_example.gouv.fr”`) -- `MATRIX_BOT_PASSWORD`: the Matrix bot user password -- `ERRORS_ROOM_ID`: the Tchap room ID where errors will be sent (example: `ERRORS_ROOM_ID=“!roomid:matrix.agent.ministere_example.tchap.gouv.fr”`). **Warning**: the bot must be invited to this room to be able to send error messages! - -For the bot to connect to Albert API, you also need to provide the following variables: -- `USER_ALLOWED_DOMAINS`: list of allowed email domains for Tchap users to interact with the bot (example: `USER_ALLOWED_DOMAINS='["ministere.gouv.fr"]'`. Default: `["*"]` (all domains are allowed)) -- `GROUPS_USED=['albert']`: allows, in this example, to activate all commands that are part of the albert group -- `ALBERT_API_URL`: the URL of the Albert API to consume -- `ALBERT_API_TOKEN`: the API token used to authorize the bot to consume the Albert API. For more info, check the Albert API documentation -- `ALBERT_MODEL_NAME`: the name of the model to use for the bot (example: `ALBERT_MODEL_NAME='AgentPublic/albertlight-7b'`). For more info, check the Albert API documentation and the [Albert models hub on HuggingFace](https://huggingface.co/collections/AgentPublic/albert-662a1d95c93a47aca5cecc82). -- `ALBERT_MODE`: the mode of Albert to use for the bot (example: `ALBERT_MODE='rag'`). For more info, check the Albert API documentation -- `CONVERSATION_OBSOLESCENCE` : the time in seconds after which a conversation automatically resets (example: `CONVERSATION_OBSOLESCENCE=3600` for one hour). Default: `3600` (one hour) +The set of available environment variables is documented in the following file: [app/config.py](./app/config.py) -### Usage outside of Docker +### Run the bot -To launch the bot outside of Docker: +To launch the bot: ```bash -cd app -./.venv/bin/python3 . +python app ``` -### Usage with Docker - -1. Create a `.env` file at the root of the project with the environment variables mentioned in [app/.env.example](./app/.env.example), including those mentionned in the *"For docker-compose deployment"* section - -2. Launch the bot container at the root of the project: -```bash -docker compose up --detach -``` ### Troubleshooting @@ -208,15 +148,7 @@ The first sync is quite long, and apparently non-blocking. If you interact with This project is open source, under the [MIT license](LICENSES/MIT.txt). All contributions are welcome, in the form of pull requests or issue openings on the [repo officiel GitHub](https://github.com/etalab-ia/albert-tchapbot). -Before contributing to the repository, it is necessary to initialize the pre-commit hooks: -```bash -pre-commit install -``` - -If you cannot use pre-commit, it is necessary to format, lint, and sort imports with [Ruff](https://docs.astral.sh/ruff/) before committing: -```bash -ruff check --fix --select I . -``` +To get started, take a look at [CONTRIBUTING.md](CONTRIBUTING.md). ### License diff --git a/app/.env.example b/app/.env.example index 562e311..b236669 100644 --- a/app/.env.example +++ b/app/.env.example @@ -3,8 +3,7 @@ # # SPDX-License-Identifier: CC0-1.0 -VERBOSE=False -SYSTEMD_LOGGING=True +LOG_LEVEL=10 JOIN_ON_INVITE=True SALT=b"\xce,\xa1\xc6lY\x80\xe3X}\x91\xa60m\xa8N" MATRIX_HOME_SERVER="https://matrix.agent.ministere_example.tchap.gouv.fr" @@ -15,5 +14,5 @@ USER_ALLOWED_DOMAINS='["ministere_example.gouv.fr", "ministere_example2.gouv.fr" GROUPS_USED='["basic", "albert"]' ALBERT_API_URL="https://albert-server-url.example.com" ALBERT_API_TOKEN="INSERT_YOUR_TOKEN" -ALBERT_MODEL_NAME="AgentPublic/guillaumetell-7b" +ALBERT_MODEL="AgentPublic/llama3-instruct-8b" ALBERT_MODE="rag" diff --git a/app/_version.py b/app/_version.py new file mode 100755 index 0000000..d3ec452 --- /dev/null +++ b/app/_version.py @@ -0,0 +1 @@ +__version__ = "0.2.0" diff --git a/app/bot.py b/app/bot.py index 6d36716..9cd1324 100755 --- a/app/bot.py +++ b/app/bot.py @@ -3,11 +3,14 @@ # # SPDX-License-Identifier: MIT -from commands import command_registry -from config import env_config +import time + from matrix_bot.bot import MatrixBot from matrix_bot.config import logger +from commands import command_registry +from config import env_config + # TODO/IMPROVE: # - if albert-bot is invited in a salon, make it answer only when if it is tagged. # - !models: show available models. @@ -37,4 +40,14 @@ def main(): # await tchap_bot.matrix_client.send_markdown_message(room_id, command_registry.get_help()) # tchap_bot.callbacks.register_on_startup(startup_action) - tchap_bot.run() + n_tries = 4 + err = None + for i in range(n_tries): + try: + tchap_bot.run() + except Exception as err: + logger.error(f"Bot startup failed with error: {err}") + time.sleep(3) + + if err: + raise err diff --git a/app/bot_msg.py b/app/bot_msg.py new file mode 100755 index 0000000..5609fd7 --- /dev/null +++ b/app/bot_msg.py @@ -0,0 +1,74 @@ +from config import APP_VERSION, COMMAND_PREFIX, Config + + +class AlbertMsg: + common_msg_prefixes = [ + "👋 Bonjour, je suis **Albert**", + "🤖 Configuration actuelle", + "\u26a0\ufe0f **Erreur**", + "\u26a0\ufe0f **Commande inconnue**", + "**La conversation a été remise à zéro**", + "🤖 Albert a échoué", + ] + shorts = { + "help": f"Pour retrouver ce message informatif, tapez `{COMMAND_PREFIX}aide`. Pour les geek tapez `{COMMAND_PREFIX}aide -v`.", + "reset": f"Pour ré-initialiser notre conversation, tapez `{COMMAND_PREFIX}reset`", + "conversation": f"Pour activer/désactiver le mode conversation, tapez `{COMMAND_PREFIX}conversation`", + "debug": f"Pour afficher des informations sur la configuration actuelle, `{COMMAND_PREFIX}debug`", + "model": f"Pour modifier le modèle, tapez `{COMMAND_PREFIX}model MODEL_NAME`", + "mode": f"Pour modifier le mode du modèle (c'est-à-dire le modèle de prompt utilisé), tapez `{COMMAND_PREFIX}mode MODE`", + "sources": f"Pour obtenir les sources utilisées pour générer ma dernière réponse, tapez `{COMMAND_PREFIX}sources`", + } + + failed = "🤖 Albert a échoué à répondre. Veuillez réessayez dans un moment." + + reset = "**La conversation a été remise à zéro**. Vous pouvez néanmoins toujours répondre dans un fil de discussion." + + user_not_allowed = "Albert est en phase de test et n'est pas encore disponible pour votre utilisateur. Contactez albert-contact@data.gouv.fr pour demander un accès." + + domain_not_allowed = "Albert n'est pas encore disponible pour votre domaine. Merci de rester en contact, il sera disponible après une phase beta test." + + def error_debug(reason, config): + msg = f"\u26a0\ufe0f **Albert API error**\n\n{reason}\n\n- Albert API URL: {config.albert_api_url}\n- Matrix server: {config.matrix_home_server}" + return msg + + def help(model_url, model_short_name, cmds): + msg = "👋 Bonjour, je suis **Albert**, votre **assistant automatique dédié aux questions légales et administratives** mis à disposition par la **DINUM**. Je suis actuellement en phase de **test**.\n\n" + msg += f"J'utilise le modèle de langage _[{model_short_name}]({model_url})_ et j'ai été alimenté par des bases de connaissances gouvernementales, comme les fiches pratiques de service-public.fr éditées par la Direction de l'information légale et administrative (DILA).\n\n" + msg += "Maintenant que nous avons fait plus connaissance, quelques **règles pour m'utiliser** :\n\n" + msg += "🔮 Ne m'utilisez pas pour élaborer une décision administrative individuelle.\n\n" + msg += "❌ **Ne me transmettez pas** :\n" + msg += "- des **fichiers** (pdf, images, etc.) ;\n" + msg += "- des données permettant de **vous** identifier ou **d'autres personnes** ;\n" + msg += "- des données **confidentielles** ;\n\n" + msg += "Enfin, quelques informations pratiques :\n\n" + msg += "🛠️ **Pour gérer notre conversation** :\n" + msg += "- " + "\n- ".join(cmds) + msg += "\n\n" + msg += "📁 **Sur l'usage des données**\nLes conversations sont stockées de manière anonyme. Elles me permettent de contextualiser les conversations et l'équipe qui me développe les utilise pour m'évaluer et analyser mes performances.\n\n" + msg += "📯 Nous contacter : albert-contact@data.gouv.fr" + + return msg + + def commands(cmds): + msg = "Les commandes spéciales suivantes sont disponibles :\n\n" + msg += "- " + "\n- ".join(cmds) + return msg + + def unknown_command(cmds_msg): + msg = f"\u26a0\ufe0f **Commande inconnue**\n\n{cmds_msg}" + return msg + + def reset_notif(delay_min): + msg = f"Comme vous n'avez pas continué votre conversation avec Albert depuis plus de {delay_min} minutes, **la conversation a été automatiquement remise à zéro**. Vous pouvez néanmoins toujours répondre dans un fil de discussion.\n\n" + msg += "Entrez **!aide** pour obtenir plus d'informatin sur ma paramétrisatiion." + return msg + + def debug(config: Config): + msg = "🤖 Configuration actuelle :\n\n" + msg += f"- Version: {APP_VERSION}\n" + msg += f"- API: {config.albert_api_url}\n" + msg += f"- Model: {config.albert_model}\n" + msg += f"- Mode: {config.albert_mode}\n" + msg += f"- With history: {config.albert_with_history}\n" + return msg diff --git a/app/commands.py b/app/commands.py index 1bd0800..c1c7bc6 100755 --- a/app/commands.py +++ b/app/commands.py @@ -3,22 +3,27 @@ # # SPDX-License-Identifier: MIT -import time +import asyncio +import traceback from collections import defaultdict from dataclasses import dataclass +from functools import wraps -from config import APP_VERSION, COMMAND_PREFIX, Config from matrix_bot.client import MatrixClient -from matrix_bot.config import bot_lib_config, logger +from matrix_bot.config import logger from matrix_bot.eventparser import EventNotConcerned, EventParser from nio import Event, RoomMemberEvent, RoomMessageText -from pyalbert_utils import ( + +from bot_msg import AlbertMsg +from config import COMMAND_PREFIX, Config +from core_llm import ( generate, generate_sources, get_available_models, get_available_modes, - new_chat, ) +from iam import TchapIam +from tchap_utils import get_cleanup_body, get_previous_messages, get_thread_messages, isa_reply_to @dataclass @@ -32,19 +37,24 @@ def add_command( group: str, onEvent: Event, command: str | None, + aliases: list[str] | None, prefix: str | None, help_message: str | None, - hidden: bool, + for_geek: bool, func, ): + commands = [command] if command else None + if aliases: + commands += aliases + self.function_register[name] = { "name": name, "group": group, "onEvent": onEvent, - "command": command, + "commands": commands, "prefix": prefix, "help": help_message, - "hidden": hidden, + "for_geek": for_geek, "func": func, } @@ -57,74 +67,74 @@ def activate_and_retrieve_group(self, group_name: str) -> list: return features def is_valid_command(self, command) -> bool: - return command in [ - feature["command"] - for name, feature in self.function_register.items() - if name in self.activated_functions - ] - - def get_help(self, config: Config) -> str: - cmds = self._get_cmds(config) - - model_url = f"https://huggingface.co/{config.albert_model_name}" - model_short_name = config.albert_model_name.split("/")[-1] - - help_message = "👋 Bonjour, je suis **Albert**, votre **assistant automatique dédié aux questions légales et administratives** mis à disposition par la **DINUM**. Je suis actuellement en phase de **test**.\n\n" - help_message += f"J'utilise le modèle de langage _[{model_short_name}]({model_url})_ et j'ai été alimenté par des bases de connaissances gouvernementales, comme les fiches pratiques de service-public.fr éditées par la Direction de l'information légale et administrative (DILA).\n\n" - - help_message += "Maintenant que nous avons fait plus connaissance, quelques **règles pour m'utiliser** :\n\n" - - help_message += ( - "🔮 Ne m'utilisez pas pour élaborer une décision administrative individuelle.\n\n" - ) - help_message += "❌ **Ne me transmettez pas** :\n" - help_message += "- des **fichiers** (pdf, images, etc.) ;\n" - help_message += ( - "- des données permettant de **vous** identifier ou **d'autres personnes** ;\n" - ) - help_message += "- des données **confidentielles** ;\n\n" - - help_message += "Enfin, quelques informations pratiques :\n\n" - - help_message += "🛠️ **Pour gérer notre conversation** :\n" - help_message += "- " + "\n- ".join(cmds) - help_message += "\n\n" - - help_message += "📁 **Sur l'usage des données**\nLes conversations sont stockées de manière anonyme. Elles me permettent de contextualiser les conversations et l'équipe qui me développe les utilise pour m'évaluer et analyser mes performances.\n\n" - - help_message += "📯 Nous contacter : albert-contact@data.gouv.fr" + valid_commands = [] + for name, feature in self.function_register.items(): + if name in self.activated_functions: + if feature.get("commands"): + valid_commands += feature["commands"] + return command in valid_commands - return help_message + def get_help(self, config: Config, verbose: bool = False) -> str: + cmds = self._get_cmds(config, verbose) + model_url = f"https://huggingface.co/{config.albert_model}" + model_short_name = config.albert_model.split("/")[-1] + return AlbertMsg.help(model_url, model_short_name, cmds) def show_commands(self, config: Config) -> str: cmds = self._get_cmds(config) - available_cmd = "Les commandes spéciales suivantes sont disponibles :\n\n" - available_cmd += "- " + "\n- ".join(cmds) - return available_cmd + return AlbertMsg.commands(cmds) - def _get_cmds(self, config: Config) -> list[str]: + def _get_cmds(self, config: Config, verbose: bool = False) -> list[str]: cmds = set( feature["help"] for name, feature in self.function_register.items() if name in self.activated_functions and feature["help"] - and not feature["hidden"] - and not (feature.get("command") == "sources" and config.albert_mode == "norag") + and (not feature["for_geek"] or verbose) + and not ("sources" in feature.get("commands") and config.albert_mode == "norag") ) return sorted(list(cmds)) +# ================================================================================ +# Globals lifespan +# ================================================================================ + command_registry = CommandRegistry({}, set()) user_configs = defaultdict(lambda: Config()) +tiam = TchapIam(Config()) + + +async def log_not_allowed(msg: str, ep: EventParser, matrix_client: MatrixClient): + """Send feedback message for unauthorized user""" + config = user_configs[ep.sender] + await matrix_client.send_markdown_message(ep.room.room_id, msg, msgtype="m.notice") + + # If user is new to the pending list, send a notification for a new pending user + if await tiam.add_pending_user(config, ep.sender): + if config.errors_room_id: + try: + await matrix_client.send_markdown_message( + config.errors_room_id, + f"\u26a0\ufe0f **New Albert Tchap user access request**\n\n{ep.sender}\n\nMatrix server: {config.matrix_home_server}", + ) + except: + print("Failed to find error room ?!") + + +# ================================================================================ +# Decorators +# ================================================================================ def register_feature( group: str, onEvent: Event, command: str | None = None, + aliases: list[str] | None = None, prefix: str = COMMAND_PREFIX, help: str | None = None, - hidden: bool = False, + for_geek: bool = False, ): def decorator(func): command_registry.add_command( @@ -132,9 +142,10 @@ def decorator(func): group=group, onEvent=onEvent, command=command, + aliases=aliases, prefix=prefix, help_message=help, - hidden=hidden, + for_geek=for_geek, func=func, ) return func @@ -142,34 +153,72 @@ def decorator(func): return decorator +def only_allowed_user(func): + """decorator to use with async function using EventParser""" + + @wraps(func) + async def wrapper(ep: EventParser, matrix_client: MatrixClient): + ep.do_not_accept_own_message() # avoid infinite loop + ep.only_on_direct_message() # Only in direct room for now (need a spec for "saloon" conversation) + + config = user_configs[ep.sender] + is_allowed, msg = await tiam.is_user_allowed(config, ep.sender, refresh=True) + if is_allowed: + return await func(ep, matrix_client) + + if not msg or ep.is_command(COMMAND_PREFIX): + # Only send back the message for the generic albert_answer method + # ignoring other callbacks. + raise EventNotConcerned + + await log_not_allowed(msg, ep, matrix_client) + + return wrapper + + +# ================================================================================ +# Bot commands +# ================================================================================ + + @register_feature( group="basic", onEvent=RoomMessageText, command="aide", - help=f"Pour retrouver ce message informatif, utilisez **{COMMAND_PREFIX}aide**", + aliases=["help", "aiuto"], + help=AlbertMsg.shorts["help"], ) +@only_allowed_user async def help(ep: EventParser, matrix_client: MatrixClient): config = user_configs[ep.sender] - await matrix_client.room_typing(ep.room.room_id) - await matrix_client.send_markdown_message(ep.room.room_id, command_registry.get_help(config)) + + commands = ep.get_command() + verbose = False + if len(commands) > 1 and commands[1] in ["-v", "--verbose", "--more", "-a", "--all"]: + verbose = True + await matrix_client.send_markdown_message(ep.room.room_id, command_registry.get_help(config, verbose)) # fmt: off @register_feature( group="albert", - onEvent=RoomMemberEvent, # @DEBUG: RoomCreateEvent is not captured ? + onEvent=RoomMemberEvent, help=None, ) +@only_allowed_user async def albert_welcome(ep: EventParser, matrix_client: MatrixClient): """ Receive the join/invite event and send the welcome/help message """ config = user_configs[ep.sender] - ep.only_on_direct_message() + ep.only_on_join() + config.update_last_activity() - time.sleep(3) # wait for the room to be ready - otherwise the encryption seems to be not ready await matrix_client.room_typing(ep.room.room_id) + await asyncio.sleep( + 3 + ) # wait for the room to be ready - otherwise the encryption seems to be not ready await matrix_client.send_markdown_message(ep.room.room_id, command_registry.get_help(config)) @@ -177,131 +226,144 @@ async def albert_welcome(ep: EventParser, matrix_client: MatrixClient): group="albert", onEvent=RoomMessageText, command="reset", - help=f"Pour ré-initialiser notre conversation, utilisez **{COMMAND_PREFIX}reset**", + help=AlbertMsg.shorts["reset"], ) +@only_allowed_user async def albert_reset(ep: EventParser, matrix_client: MatrixClient): config = user_configs[ep.sender] if config.albert_with_history: config.update_last_activity() - await matrix_client.room_typing(ep.room.room_id) - config.albert_chat_id = new_chat(config) - reset_message = "**La conversation a été remise à zéro.**\n\n" - reset_message += command_registry.show_commands(config) + config.albert_history_lookup = 0 + reset_message = AlbertMsg.reset + # reset_message += command_registry.show_commands(config) await matrix_client.send_markdown_message( ep.room.room_id, reset_message, msgtype="m.notice" ) + else: + await matrix_client.send_markdown_message( + ep.room.room_id, + "Le mode conversation n'est pas activé. tapez !conversation pour l'activer.", + msgtype="m.notice", + ) @register_feature( group="albert", onEvent=RoomMessageText, command="conversation", - help=f"Pour activer/désactiver le mode conversation, utilisez **{COMMAND_PREFIX}conversation**", - hidden=True, + help=AlbertMsg.shorts["conversation"], + for_geek=True, ) +@only_allowed_user async def albert_conversation(ep: EventParser, matrix_client: MatrixClient): config = user_configs[ep.sender] - await matrix_client.room_typing(ep.room.room_id) + config.albert_history_lookup = 0 if config.albert_with_history: config.albert_with_history = False - config.albert_chat_id = None message = "Le mode conversation est désactivé." else: config.update_last_activity() config.albert_with_history = True message = "Le mode conversation est activé." - await matrix_client.send_text_message(ep.room.room_id, message) + await matrix_client.send_markdown_message(ep.room.room_id, message, msgtype="m.notice") @register_feature( group="albert", onEvent=RoomMessageText, command="debug", - help=f"Pour afficher des informations sur la configuration actuelle, **{COMMAND_PREFIX}debug**", - hidden=True, + help=AlbertMsg.shorts["debug"], + for_geek=True, ) +@only_allowed_user async def albert_debug(ep: EventParser, matrix_client: MatrixClient): config = user_configs[ep.sender] - await matrix_client.room_typing(ep.room.room_id) - debug_message = f"Configuration actuelle :\n\n" - debug_message += f"- Version: {APP_VERSION}\n" - debug_message += f"- API: {config.albert_api_url}\n" - debug_message += f"- Model: {config.albert_model_name}\n" - debug_message += f"- Mode: {config.albert_mode}\n" - debug_message += f"- With history: {config.albert_with_history}\n" - debug_message += f"- Chat ID: {config.albert_chat_id}\n" - debug_message += f"- Stream ID: {config.albert_stream_id}\n" - await matrix_client.send_markdown_message(ep.room.room_id, debug_message) + debug_message = AlbertMsg.debug(config) + await matrix_client.send_markdown_message(ep.room.room_id, debug_message, msgtype="m.notice") @register_feature( group="albert", onEvent=RoomMessageText, command="model", - help=f"Pour modifier le modèle, utilisez **{COMMAND_PREFIX}model** MODEL_NAME", - hidden=True, + aliases=["models"], + help=AlbertMsg.shorts["model"], + for_geek=True, ) +@only_allowed_user async def albert_model(ep: EventParser, matrix_client: MatrixClient): config = user_configs[ep.sender] await matrix_client.room_typing(ep.room.room_id) - commands = ep.event.body.split() + command = ep.get_command() # Get all available models all_models = get_available_models(config) - if len(commands) <= 1: - message = ( - f"La commande !model nécessite de donner un modèle parmi : {', '.join(all_models)}" - ) + all_models = [k for k, v in all_models.items() if v["type"] == "text-generation"] + models_list = "\n\n- " + "\n- ".join( + map(lambda x: x + (" *" if x == config.albert_model else ""), all_models) + ) + if len(command) <= 1: + message = "La commande !model nécessite de donner un modèle parmi :" + models_list + message += "\n\nExemple: `!model " + all_models[-1] + "`" else: - model = commands[1] + model = command[1] if model not in all_models: - message = f"Modèle inconnu. Les modèles disponibles sont : {', '.join(all_models)}" + message = "La commande !model nécessite de donner un modèle parmi :" + models_list + message += "\n\nExemple: `!model " + all_models[-1] + "`" else: - previous_model = config.albert_model_name - config.albert_model_name = model + previous_model = config.albert_model + config.albert_model = model message = f"Le modèle a été modifié : {previous_model} -> {model}" - await matrix_client.send_text_message(ep.room.room_id, message) + await matrix_client.send_markdown_message(ep.room.room_id, message, msgtype="m.notice") @register_feature( group="albert", onEvent=RoomMessageText, command="mode", - help=f"Pour modifier le mode du modèle (c'est-à-dire le modèle de prompt utilisé), utilisez **{COMMAND_PREFIX}mode** MODE", - hidden=True, + aliases=["modes"], + help=AlbertMsg.shorts["mode"], + for_geek=True, ) +@only_allowed_user async def albert_mode(ep: EventParser, matrix_client: MatrixClient): config = user_configs[ep.sender] await matrix_client.room_typing(ep.room.room_id) - commands = ep.event.body.split() + command = ep.get_command() # Get all available mode for the current model all_modes = get_available_modes(config) all_modes += ["norag"] - if len(commands) <= 1: - message = f"La commande !mode nécessite de donner un mode parmi : {', '.join(all_modes)}" + mode_list = "\n\n- " + "\n- ".join( + map(lambda x: x + (" *" if x == config.albert_mode else ""), all_modes) + ) + if len(command) <= 1: + message = "La commande !mode nécessite de donner un mode parmi :" + mode_list + message += "\n\nExemple: `!mode " + all_modes[-1] + "`" else: - mode = commands[1] + mode = command[1] if mode not in all_modes: - message = f"Mode inconnu. Les modes disponibles sont : {', '.join(all_modes)}" + message = "La commande !mode nécessite de donner un mode parmi :" + mode_list + message += "\n\nExemple: `!mode " + all_modes[-1] + "`" else: old_mode = config.albert_mode config.albert_mode = mode message = f"Le mode a été modifié : {old_mode} -> {mode}" - await matrix_client.send_text_message(ep.room.room_id, message) + await matrix_client.send_markdown_message(ep.room.room_id, message, msgtype="m.notice") @register_feature( group="albert", onEvent=RoomMessageText, command="sources", - help=f"Pour obtenir les sources utilisées pour générer ma dernière réponse, utilisez **{COMMAND_PREFIX}sources**", + help=AlbertMsg.shorts["sources"], ) +@only_allowed_user async def albert_sources(ep: EventParser, matrix_client: MatrixClient): config = user_configs[ep.sender] - await matrix_client.room_typing(ep.room.room_id) try: - if config.albert_stream_id: - sources = generate_sources(config=config, stream_id=config.albert_stream_id) + if config.last_rag_references: + await matrix_client.room_typing(ep.room.room_id) + sources = generate_sources(config, config.last_rag_references) sources_msg = "" for source in sources: extra_context = "" @@ -310,13 +372,12 @@ async def albert_sources(ep: EventParser, matrix_client: MatrixClient): sources_msg += f'- {source["title"]} {extra_context}: {source["url"]} \n' else: sources_msg = "Aucune source trouvée, veuillez me poser une question d'abord." - except Exception as albert_exception: - await matrix_client.send_markdown_message( - ep.room.room_id, f"\u26a0\ufe0f **Serveur erreur**\n\n{albert_exception}" - ) + except Exception: + traceback.print_exc() + await matrix_client.send_markdown_message(ep.room.room_id, AlbertMsg.failed, msgtype="m.notice") # fmt: off return - await matrix_client.send_text_message(ep.room.room_id, sources_msg) + await matrix_client.send_markdown_message(ep.room.room_id, sources_msg) @register_feature( @@ -324,24 +385,23 @@ async def albert_sources(ep: EventParser, matrix_client: MatrixClient): onEvent=RoomMessageText, help=None, ) +@only_allowed_user async def albert_answer(ep: EventParser, matrix_client: MatrixClient): """ Receive a message event which is not a command, send the prompt to Albert API and return the response to the user """ - # user_prompt: str = await ep.hl() config = user_configs[ep.sender] - user_prompt = ep.event.body - if user_prompt.startswith(COMMAND_PREFIX): - raise EventNotConcerned - ep.only_on_direct_message() - query = user_prompt + initial_history_lookup = config.albert_history_lookup + + user_query = ep.event.body.strip() + if ep.is_command(COMMAND_PREFIX): + raise EventNotConcerned if config.albert_with_history and config.is_conversation_obsolete: - config.albert_chat_id = new_chat(config) - obsolescence_in_minutes = str(bot_lib_config.conversation_obsolescence // 60) - reset_message = f"Comme vous n'avez pas continué votre conversation avec Albert depuis plus de {obsolescence_in_minutes} minutes, **la conversation a été automatiquement remise à zéro.**\n\n" - reset_message += command_registry.show_commands(config) + config.albert_history_lookup = 0 + obsolescence_in_minutes = str(config.conversation_obsolescence // 60) + reset_message = AlbertMsg.reset_notif(obsolescence_in_minutes) await matrix_client.room_typing(ep.room.room_id) await matrix_client.send_markdown_message( ep.room.room_id, reset_message, msgtype="m.notice" @@ -350,28 +410,85 @@ async def albert_answer(ep: EventParser, matrix_client: MatrixClient): config.update_last_activity() await matrix_client.room_typing(ep.room.room_id, typing_state=True, timeout=180_000) try: - answer = generate(config=config, query=query) - except Exception as albert_exception: + # Build the messages history + # -- + is_reply_to = isa_reply_to(ep.event) + if is_reply_to: + # Use the targeted thread history + # -- + message_events = await get_thread_messages( + config, ep, max_rewind=config.albert_max_rewind + ) + else: + # Use normal history + # -- + # Add the current user query in the history count + config.albert_history_lookup += 1 + message_events = await get_previous_messages( + config, + ep, + history_lookup=config.albert_history_lookup, + max_rewind=config.albert_max_rewind, + ) + + # Map event to list of message {role, content} and cleanup message body + # @TODO: If bot should answer in multi-user canal, we could catch is own name here. + messages = [ + {"role": "user", "content": get_cleanup_body(event)} + if event.source["sender"] == ep.sender + else {"role": "assistant", "content": get_cleanup_body(event)} + for event in message_events + ] + + # Empty chunk (i.e at startup) + if not messages: + messages = [{"role": "user", "content": user_query}] + + answer = generate(config, messages) + + except Exception as albert_err: + logger.error(f"{albert_err}") + traceback.print_exc() # Send an error message to the user await matrix_client.send_markdown_message( - ep.room.room_id, - f"\u26a0\ufe0f **Erreur**\n\nAlbert est actuellement en maintenance, étant encore en phase de test. Réessayez plus tard.", + ep.room.room_id, AlbertMsg.failed, msgtype="m.notice" ) # Redirect the error message to the errors room if it exists if config.errors_room_id: - await matrix_client.send_markdown_message( - config.errors_room_id, - f"\u26a0\ufe0f **Albert API erreur**\n\n{albert_exception}\n\nMatrix server: {config.matrix_home_server}", - ) + try: + await matrix_client.send_markdown_message(config.errors_room_id, AlbertMsg.error_debug(albert_err, config)) # fmt: off + except: + print("Failed to find error room ?!") + + config.albert_history_lookup = initial_history_lookup return - logger.debug(f"{query=}") + logger.debug(f"{user_query=}") logger.debug(f"{answer=}") + + reply_to = None + if is_reply_to: + # "content" -> "m.mentions": {"user_ids": [ep.sender]}, + # "content" -> "m.relates_to": {"m.in_reply_to": {"event_id": ep.event.event_id}}, + reply_to = ep.event.event_id + try: # sometimes the async code fail (when input is big) with random asyncio errors - await matrix_client.send_markdown_message(ep.room.room_id, answer) + await matrix_client.send_markdown_message(ep.room.room_id, answer, reply_to=reply_to) + await tiam.increment_user_question(ep.sender) except Exception as llm_exception: # it seems to work when we retry logger.error(f"asyncio error when sending message {llm_exception=}. retrying") - await matrix_client.send_markdown_message(ep.room.room_id, answer) + await asyncio.sleep(1) + try: + # Try once more + await matrix_client.send_markdown_message(ep.room.room_id, answer, reply_to=reply_to) + await tiam.increment_user_question(ep.sender) + except: + config.albert_history_lookup = initial_history_lookup + return + + # Add agent answer in the history count + if not is_reply_to: + config.albert_history_lookup += 1 @register_feature( @@ -380,12 +497,26 @@ async def albert_answer(ep: EventParser, matrix_client: MatrixClient): help=None, ) async def albert_wrong_command(ep: EventParser, matrix_client: MatrixClient): + """Special handler to catch invalid command""" config = user_configs[ep.sender] - user_prompt = ep.event.body - command = user_prompt.split()[0][1:] - if not user_prompt.startswith(COMMAND_PREFIX) or command_registry.is_valid_command(command): + + ep.do_not_accept_own_message() # avoid infinite loop + ep.only_on_direct_message() # Only in direct room for now (need a spec for "saloon" conversation) + + command = ep.event.body.strip().lstrip(COMMAND_PREFIX).split() + if not ep.is_command(COMMAND_PREFIX): + # Not a command raise EventNotConcerned + elif command_registry.is_valid_command(command[0]): + # Valid command + raise EventNotConcerned + + is_allowed, msg = await tiam.is_user_allowed(config, ep.sender, refresh=True) + if not is_allowed: + await log_not_allowed(msg, ep, matrix_client) + return + + cmds_msg = command_registry.show_commands(config) await matrix_client.send_markdown_message( - ep.room.room_id, - f"\u26a0\ufe0f **Commande inconnue**\n\n{command_registry.show_commands(config)}", + ep.room.room_id, AlbertMsg.unknown_command(cmds_msg), msgtype="m.notice" ) diff --git a/app/config.py b/app/config.py index 4a04577..511fcde 100755 --- a/app/config.py +++ b/app/config.py @@ -5,22 +5,16 @@ import logging import time -import tomllib from pathlib import Path -from matrix_bot.config import bot_lib_config from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict +from _version import __version__ + COMMAND_PREFIX = "!" -APP_VERSION = "unknown" -try: - with open("pyproject.toml", "rb") as f: - pyproject: dict = tomllib.load(f) - APP_VERSION = pyproject["project"]["version"] -except Exception as e: - logging.warning(f"Error while reading pyproject.toml: {e}") +APP_VERSION = __version__ class BaseConfig(BaseSettings): @@ -32,7 +26,7 @@ class BaseConfig(BaseSettings): class Config(BaseConfig): - verbose: bool = Field(False, description="Enable / disable verbose logging") + # General systemd_logging: bool = Field( True, description="Enable / disable logging with systemd.journal.JournalHandler" ) @@ -46,27 +40,43 @@ class Config(BaseConfig): ) groups_used: list[str] = Field(["basic"], description="List of commands groups to use") last_activity: int = Field(int(time.time()), description="Last activity timestamp") + + # Grist Api Key + grist_api_server: str = Field("", description="Grist Api Server") + grist_api_key: str = Field("", description="Grist API Key") + grist_users_table_id: str = Field("", description="Grist Users doc ID") + grist_users_table_name: str = Field("", description="Grist Users table name/ID") + # Albert API settings - albert_api_url: str = Field("http://localhost:8090/api/v2", description="Albert API base URL") + albert_api_url: str = Field("http://localhost:8090", description="Albert API base URL") albert_api_token: str = Field("", description="Albert API Token") - albert_model_name: str = Field( + + # Albert Conversation settings + # ============================ + # PER USER SETTINGS ! + # ============================ + albert_model: str = Field( "AgentPublic/albertlight-7b", description="Albert model name to use (see Albert models hub on HuggingFace)", ) albert_mode: str = Field("rag", description="Albert API mode") - ## Albert Conversational settings albert_with_history: bool = Field(True, description="Conversational mode") - albert_chat_id: int | None = Field(None, description="Current chat id") - albert_stream_id: int | None = Field(None, description="Current stream id") + albert_history_lookup: int = Field(0, description="How far we lookup in the history") + albert_max_rewind: int = Field(20, description="Max history rewind for stability purposes") + conversation_obsolescence: int = Field( + 15 * 60, description="time after which a conversation is considered obsolete, in seconds" + ) + last_rag_references: list[dict] | None = Field(None, description="Last sources used for the RAG.") @property def is_conversation_obsolete(self) -> bool: - return int(time.time()) - self.last_activity > bot_lib_config.conversation_obsolescence + return int(time.time()) - self.last_activity > self.conversation_obsolescence def update_last_activity(self) -> None: self.last_activity = int(time.time()) +# Default config env_config = Config() diff --git a/app/core_llm.py b/app/core_llm.py new file mode 100755 index 0000000..e1cb749 --- /dev/null +++ b/app/core_llm.py @@ -0,0 +1,77 @@ +# SPDX-FileCopyrightText: 2024 Etalab +# +# SPDX-License-Identifier: MIT + +import requests +from pyalbert.clients import LlmClient +from pyalbert.utils import log_and_raise_for_status + +from config import Config + +# FIX/FUTURE: with pyalbert v0.7 ? +API_PREFIX_V1 = "/api" +API_PREFIX_V2 = "/api/v2" + + +def get_available_models(config: Config) -> dict: + """Fetch available models""" + api_key = config.albert_api_token + url = config.albert_api_url + API_PREFIX_V2 + headers = {"Authorization": f"Bearer {api_key}"} + response = requests.get(f"{url}/models", headers=headers) + log_and_raise_for_status(response) + model_prompts = response.json() + return model_prompts or {} + + +def get_available_modes(config: Config) -> list[str]: + """Fetch available modes for the current model""" + model = config.albert_model + model_prompts = get_available_models(config) + model_config = model_prompts.get(model, {}) + if not model_config: + return [] + + modes = [x["mode"] for x in model_config.get("prompts", []) if "mode" in x] + return modes + + +def generate_sources(config: Config, rag_references: list[str]) -> list[dict]: + """Fetch chunks sources from references""" + api_key = config.albert_api_token + url = config.albert_api_url + API_PREFIX_V2 + headers = {"Authorization": f"Bearer {api_key}"} + data = {"uids": rag_references} + response = requests.post(f"{url}/get_chunks", headers=headers, json=data) + log_and_raise_for_status(response) + sources = response.json() + return sources + + +def generate(config: Config, messages: list, limit=7) -> str: + api_key = config.albert_api_token + url = config.albert_api_url + API_PREFIX_V1 + model = config.albert_model + mode = None if config.albert_mode == "norag" else config.albert_mode + if not config.albert_with_history: + messages = messages[-1:] + + if mode is None: + system_prompt = "Tu es Albert, un bot de l'état français en charge d'informer les agents." + messages = [ + { + "role": "system", + "content": system_prompt, + } + ] + messages + + # Query LLM API + # -- + rag_params = {"strategy": "last", "mode": mode, "limit": 7} + aclient = LlmClient(model, base_url=url, api_key=api_key) + result = aclient.generate(messages=messages, rag=rag_params) + answer = result.choices[0].message.content + if getattr(result, "rag_context"): + config.last_rag_references = result.rag_context[0].references + + return answer.strip() diff --git a/app/iam.py b/app/iam.py new file mode 100755 index 0000000..7294697 --- /dev/null +++ b/app/iam.py @@ -0,0 +1,174 @@ +import asyncio +import json +import re +from collections import namedtuple +from datetime import datetime, timedelta, timezone +from urllib.parse import urlencode + +import aiohttp + +from bot_msg import AlbertMsg + + +# from grist_api import GristDocAPI => is not async +class AsyncGristDocAPI: + def __init__(self, doc_id: str, server: str, api_key: str): + self.doc_id = doc_id + self.server = server + self.api_key = api_key + self.base_url = f"{server}/api" + + async def _request(self, method, endpoint, json_data=None): + headers = {"Authorization": f"Bearer {self.api_key}"} + async with aiohttp.ClientSession() as session: + if method in ["GET"]: + data = {"params": json_data} + else: + headers["Content-Type"] = "application/json" + data = {"json": json_data} + + async with session.request( + method, self.base_url + endpoint, headers=headers, **data + ) as response: + response.raise_for_status() + return await response.json() + + async def fetch_table(self, table_id, filters=None) -> list[namedtuple]: + endpoint = f"/docs/{self.doc_id}/tables/{table_id}/records" + data = {} + if filters: + data["filter"] = json.dumps(filters) + result = await self._request("GET", endpoint, data) + + if not result["records"]: + return [] + Record = namedtuple("Record", ["id"] + list(result["records"][0]["fields"].keys())) + records = [Record(**{"id": r["id"], **r["fields"]}) for r in result["records"]] + return records + + async def add_records(self, table_id, records): + endpoint = f"/docs/{self.doc_id}/tables/{table_id}/records" + data = {"records": [{"fields": r} for r in records]} + result = await self._request("POST", endpoint, data) + return result + + async def update_records(self, table_id, records): + endpoint = f"/docs/{self.doc_id}/tables/{table_id}/records" + records = [r.copy() for r in records] + data = {"records": [{"id": r.pop("id"), "fields": r} for r in records]} + result = await self._request("PATCH", endpoint, data) + return result + + +class TchapIam: + REFRESH_DELTA = 3600 + TZ = timezone(timedelta(hours=2)) + + def __init__(self, config): + self.config = config + self.users_table_id = config.grist_users_table_id + self.users_table_name = config.grist_users_table_name + self.iam_client = AsyncGristDocAPI( + self.users_table_id, + server=config.grist_api_server, + api_key=config.grist_api_key, + ) + + # White-listed users + self.users_allowed = {} + # Users that have been adde to the pendings list. + # Used to send notification for new pending users. + self.users_not_allowed = {} + + self.last_refresh = None + asyncio.run(self._refresh()) + + @staticmethod + def domain_from_sender(sender: str) -> str: + """ + Sender IDs are formatted like this: "@-: + e.g. @john.doe-ministere_example.gouv.fr1:agent.ministere_example.tchap.gouv.frmerci + """ + match = re.search( + r"(?<=\-)[^\-\:]+[0-9]*(?=\:)", sender + ) # match the domain name (between the last "-" and ":", with optional numbers to ignore at the end of the domain) WARNING: this regex is not perfect and doesn't work for domain names with dashes in it like "developpement-durable.gouv.fr" + if match: + return match.group(0) + + print("Could not extract domain from sender: %s" % sender) + + async def _refresh(self): + ttl = datetime.utcnow() - timedelta(seconds=self.REFRESH_DELTA) + if not self.last_refresh or self.last_refresh < ttl: + # Build allowed users list + users_table = await self.iam_client.fetch_table( + self.users_table_name, filters={"status": ["allowed"]} + ) + self.users_allowed.clear() + for record in users_table: + self.users_allowed[record.tchap_user] = record + + # Build not allowed users list + users_table = await self.iam_client.fetch_table( + self.users_table_name, filters={"status": ["pending", "forbidden"]} + ) + self.users_not_allowed.clear() + for record in users_table: + self.users_not_allowed[record.tchap_user] = record + + self.last_refresh = datetime.utcnow() + print("User table (IAM) updated") + + async def is_user_allowed(self, config, username, refresh=False) -> tuple[bool, str]: + """Check if user is allowed to use the tchap bot: + 1. User should be in the whitelist, otherwise send user_not_allowed message + 2. User should be in allowed_domain, otherwise domain_not_allowed_message message + """ + if refresh: + await self._refresh() + is_allowed = False + msg = "" + + # 1. check user + is_allowed = username in self.users_allowed + if not is_allowed: + msg = AlbertMsg.user_not_allowed + + # 2. Check domains + if is_allowed: + if "*" in config.user_allowed_domains: + is_allowed = True + elif self.domain_from_sender(username) in config.user_allowed_domains: + is_allowed = True + else: + is_allowed = False + msg = AlbertMsg.domain_not_allowed + + return is_allowed, msg + + async def add_pending_user(self, config, username) -> bool: + """Return True if the used as been added to the list""" + if username in list(self.users_allowed) + list(self.users_not_allowed): + return False + + record = { + "tchap_user": username, + "status": "pending", + "domain": self.domain_from_sender(username), + } + await self.iam_client.add_records(self.users_table_name, [record]) + return True + + async def increment_user_question(self, username, n=1, update_last_activity=True): + try: + record = self.users_allowed[username] + except Exception as err: + raise ValueError("User not found in grist") from err + + updates = {"n_questions": record.n_questions + n} + if update_last_activity: + updates["last_activity"] = str(datetime.now(self.TZ)) + + await self.iam_client.update_records(self.users_table_name, [{"id": record.id, **updates}]) + + self.users_allowed[username] = record._replace(**updates) diff --git a/app/matrix_bot/bot.py b/app/matrix_bot/bot.py old mode 100644 new mode 100755 index 2cea667..fd26c57 --- a/app/matrix_bot/bot.py +++ b/app/matrix_bot/bot.py @@ -26,7 +26,7 @@ def __init__(self, homeserver: str, username: str, password: str): async def main(self): await self.matrix_client.automatic_login() - sync = await self.matrix_client.sync(timeout=bot_lib_config.timeout, full_state=False) # Ignore prior messages + sync = await self.matrix_client.sync(timeout=bot_lib_config.timeout, full_state=True) # Ignore prior messages self.print_sync_response(sync) await self.callbacks.setup_callbacks() diff --git a/app/matrix_bot/callbacks.py b/app/matrix_bot/callbacks.py index 047a8f6..15fb1c7 100755 --- a/app/matrix_bot/callbacks.py +++ b/app/matrix_bot/callbacks.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: 2023 Pôle d'Expertise de la Régulation Numérique # # SPDX-License-Identifier: MIT +import traceback from functools import wraps from nio import ( @@ -14,6 +15,8 @@ UnknownEvent, ) +from bot_msg import AlbertMsg + from .client import MatrixClient from .config import bot_lib_config, logger from .eventparser import ( @@ -23,8 +26,9 @@ ) -def properly_fail(matrix_client): - """use this decorator so that your async callback never crash, log the error and return a message to the room""" +def properly_fail(matrix_client, error_msg=AlbertMsg.failed): + """use this decorator so that your async callback never crash, + log the error and return a message to the room""" def decorator(func): @wraps(func) @@ -32,8 +36,9 @@ async def wrapper(room, event): try: return await func(room, event) except Exception as unexpected_exception: - await matrix_client.send_text_message(room.room_id, "failed to answer") + await matrix_client.send_text_message(room.room_id, error_msg, msgtype="m.notice") logger.warning(f"command failed with exception: {unexpected_exception}") + traceback.print_exc() finally: await matrix_client.room_typing(room.room_id, typing_state=False) @@ -74,15 +79,13 @@ async def wrapped_func(room, event): ep = MessageEventParser( room=room, event=event, matrix_client=self.matrix_client, log_usage=True ) - if feature.get("command"): - ep.command(feature["command"], prefix=feature["prefix"]) + if feature.get("commands"): + ep.parse_command(feature["commands"], prefix=feature["prefix"]) else: ep = EventParser( room=room, event=event, matrix_client=self.matrix_client, log_usage=True ) - ep.do_not_accept_own_message() # avoid infinite loop - await ep.only_allowed_sender() # only allowed senders await func(ep=ep, matrix_client=self.matrix_client) self.client_callback.append((wrapped_func, onEvent)) diff --git a/app/matrix_bot/config.py b/app/matrix_bot/config.py old mode 100644 new mode 100755 index 13e91a0..1006058 --- a/app/matrix_bot/config.py +++ b/app/matrix_bot/config.py @@ -37,9 +37,6 @@ class BotLibConfig(BaseSettings): default=b"\xce,\xa1\xc6lY\x80\xe3X}\x91\xa60m\xa8N", description="Salt to store your session credentials. Should not change between two runs", ) - conversation_obsolescence: int = Field( - 60 * 60, description="time after which a conversation is considered obsolete, in seconds" - ) model_config = SettingsConfigDict(env_file=Path(".matrix_bot_env")) diff --git a/app/matrix_bot/eventparser.py b/app/matrix_bot/eventparser.py index 574ba4e..97a8e21 100755 --- a/app/matrix_bot/eventparser.py +++ b/app/matrix_bot/eventparser.py @@ -3,10 +3,8 @@ # SPDX-FileCopyrightText: 2024 Etalab # # SPDX-License-Identifier: MIT -import re from dataclasses import dataclass -from config import env_config from nio import Event, MatrixRoom, RoomMessageText from .client import MatrixClient @@ -40,11 +38,6 @@ def is_from_userid(self, userid: str) -> bool: def is_from_this_bot(self) -> bool: return self.is_from_userid(self.matrix_client.user_id) - def is_sender_allowed(self) -> bool: - if "*" in env_config.user_allowed_domains: - return True - return self.sender_domain() in env_config.user_allowed_domains - def room_is_direct_message(self) -> bool: return room_is_direct_message(self.room) @@ -54,18 +47,6 @@ def sender_id(self) -> str: def sender_username(self) -> str: return self.room.users[self.event.sender].name - def sender_domain(self) -> str | None: - """ - Sender IDs are formatted like this: "@-: - e.g. @john.doe-ministere_example.gouv.fr1:agent.ministere_example.tchap.gouv.frmerci - """ - match: re.Match[str] | None = re.search( - r"(?<=\-)[^\-\:]+[0-9]*(?=\:)", self.event.sender - ) # match the domain name (between the last "-" and ":", with optional numbers to ignore at the end of the domain) WARNING: this regex is not perfect and doesn't work for domain names with dashes in it like "developpement-durable.gouv.fr" - if match: - return match.group(0) - logger.warning("Could not extract domain from sender ID", sender_id=self.sender_id) - def do_not_accept_own_message(self) -> None: """ :raise EventNotConcerned: if the message is written by the bot. @@ -94,46 +75,47 @@ def only_on_join(self) -> None: if not self.event.source.get("content", {}).get("membership") == "invite": raise EventNotConcerned - async def only_allowed_sender(self) -> None: - """ - :raise EventNotConcerned: if the sender is not allowed to send messages - """ - if not self.is_sender_allowed(): - await self.matrix_client.send_markdown_message( - self.room.room_id, - "Albert n'est pas encore disponible pour votre domaine. Merci de rester en contact, il sera disponible après un beta test !", - ) - raise EventNotConcerned + def is_command(self): + return False class MessageEventParser(EventParser): event: RoomMessageText + command: list[str] | None = None - def _command(self, command: str, prefix: str, body=None, command_name: str = "") -> str: - command_prefix = f"{prefix}{command}" - if body.split()[0] != command_prefix: - raise EventNotConcerned - command_payload = body.removeprefix(command_prefix) - if self.log_usage: - logger.info( - "Handling command", command=command_name or command, command_payload=command_payload - ) - return command_payload - - def command(self, command: str, prefix: str, command_name: str = "") -> str: + def parse_command(self, commands: str | list[str], prefix: str, command_name: str = ""): """ - if the event is concerned by the command, returns the text after the command. Raise EventNotConcerned otherwise + if the event is concerned by the command, returns the command line as a list. + Raise EventNotConcerned otherwise. :param command: the command that is to be recognized. :param prefix: the prefix for this command (default is !). - :param command_name: name of the command, for logging purposes. + :param command_name: name(s) of the command, for logging purposes. :return: the text after the command :raise EventNotConcerned: if the current event is not concerned by the command. """ - return self._command( - command=command, prefix=prefix, command_name=command_name, body=self.event.body - ) + commands = [commands] if isinstance(commands, str) else commands + body = self.event.body.strip() + user_command = body.split() + command = [commands[0]] + user_command[1:] + + if not any([f"{prefix}{c}" == user_command[0] for c in commands]): + raise EventNotConcerned + + if self.log_usage: + logger.info( + "Handling command", command=command_name or command[0], command_payload=command[1:] + ) + + self.command = command + def is_command(self, prefix: str) -> bool: + return self.event.body.strip().startswith(prefix) + + def get_command(self) -> list[str] | None: + return self.command + + # @deprecated: Not used/tested async def hl(self, consider_hl_when_direct_message=True) -> str: """ if the event is a hl (highlight, i.e begins with the name of the bot), @@ -145,10 +127,12 @@ async def hl(self, consider_hl_when_direct_message=True) -> str: """ display_name = await self.matrix_client.get_display_name() if consider_hl_when_direct_message and self.room_is_direct_message(): - return self._command( + return self.get_command_line( "", prefix="", body=self.event.body.removeprefix(display_name).removeprefix(": "), command_name="mention", ) - return self.command(display_name, prefix="", command_name="mention").removeprefix(": ") + return self.get_command_line(display_name, prefix="", command_name="mention").removeprefix( + ": " + ) diff --git a/app/matrix_bot/room_utils.py b/app/matrix_bot/room_utils.py index 1b96e75..8dbeb08 100755 --- a/app/matrix_bot/room_utils.py +++ b/app/matrix_bot/room_utils.py @@ -13,5 +13,6 @@ def room_is_direct_message(room: MatrixRoom) -> bool: """Returns True if the room is a direct message room""" - # @DEBUG: the users length is sometimes empty when the bot restart... + # @DEBUG: If full_state is false whein initializing the matrix client, + # the romm mebrers can be empty here event if the room is not. return len(room.users) == 2 or len(room.users) == 0 diff --git a/app/pyalbert_utils.py b/app/pyalbert_utils.py deleted file mode 100755 index 559a009..0000000 --- a/app/pyalbert_utils.py +++ /dev/null @@ -1,138 +0,0 @@ -# SPDX-FileCopyrightText: 2024 Etalab -# -# SPDX-License-Identifier: MIT - -import json - -import requests -from config import Config -from matrix_bot.config import logger - - -def log_and_raise_for_status(response: requests.Response) -> None: - if not response.ok: - try: - error_detail = response.json().get("detail") - except Exception: - error_detail = response.text - logger.error(f"Albert API Error Detail: {error_detail}") - response.raise_for_status() - - -def get_available_models(config: Config) -> list[str] | None: - api_token = config.albert_api_token - url = config.albert_api_url - headers = {"Authorization": f"Bearer {api_token}"} - response = requests.get(f"{url}/models", headers=headers) - log_and_raise_for_status(response) - model_prompts = response.json() - return list(model_prompts.keys()) - - -def get_available_modes(config: Config) -> list[str] | None: - api_token = config.albert_api_token - url = config.albert_api_url - headers = {"Authorization": f"Bearer {api_token}"} - response = requests.get(f"{url}/models", headers=headers) - log_and_raise_for_status(response) - model_prompts = response.json() - - api_model = config.albert_model_name - model_config = model_prompts.get(api_model, {}) - if not model_config: - return None - - modes = [x["mode"] for x in model_config.get("config", {}).get("prompts", []) if "mode" in x] - return modes - - -def new_chat(config: Config) -> int: - api_token = config.albert_api_token - url = config.albert_api_url - headers = {"Authorization": f"Bearer {api_token}"} - - data = { - "chat_type": "qa", - } - response = requests.post(f"{url}/chat", headers=headers, json=data) - log_and_raise_for_status(response) - chat_id = response.json()["id"] - return chat_id - - -def generate(config: Config, query: str) -> str: - api_token = config.albert_api_token - api_model = config.albert_model_name - api_mode = None if config.albert_mode == "norag" else config.albert_mode - api_url = config.albert_api_url - with_history = config.albert_with_history - - # Create Stream: - headers = {"Authorization": f"Bearer {api_token}"} - data = { - "query": query, - "model_name": api_model, - "mode": api_mode, - "with_history": with_history, - # "postprocessing": ["check_url", "check_mail", "check_number"], - } - if with_history: - if not config.albert_chat_id: - config.albert_chat_id = new_chat(config) - response = requests.post( - f"{api_url}/stream/chat/{config.albert_chat_id}", headers=headers, json=data - ) - else: - response = requests.post(f"{api_url}/stream", headers=headers, json=data) - log_and_raise_for_status(response) - - stream_id = response.json()["id"] - config.albert_stream_id = stream_id - - # Start Stream: - # @TODO: implement non-streaming response - data = {"stream_id": stream_id} - response = requests.get( - f"{api_url}/stream/{stream_id}/start", headers=headers, json=data, stream=True - ) - log_and_raise_for_status(response) - - answer = "" - for line in response.iter_lines(): - if not line: - continue - - decoded_line = line.decode("utf-8") - _, _, data = decoded_line.partition("data: ") - try: - text = json.loads(data) - if text == "[DONE]": - break - answer += text - except json.decoder.JSONDecodeError as e: - # Should never happen... - print("\nDATA: " + data) - print("\nERROR:") - raise e - - return answer - - -def generate_sources(config: Config, stream_id: int) -> list[dict]: - api_token = config.albert_api_token - api_url = config.albert_api_url - - # Create Stream: - headers = {"Authorization": f"Bearer {api_token}"} - response = requests.get(f"{api_url}/stream/{stream_id}", headers=headers) - log_and_raise_for_status(response) - stream = response.json() - - # Fetch chunks sources - if not stream.get("rag_sources"): - return [] - data = {"uids": stream["rag_sources"]} - response = requests.post(f"{api_url}/get_chunks", headers=headers, json=data) - log_and_raise_for_status(response) - sources = response.json() - return sources diff --git a/app/tchap_utils.py b/app/tchap_utils.py old mode 100644 new mode 100755 index cfe65ec..7c9fb88 --- a/app/tchap_utils.py +++ b/app/tchap_utils.py @@ -3,10 +3,109 @@ # # SPDX-License-Identifier: MIT -from collections import defaultdict from typing import Optional -from nio import MatrixRoom +from matrix_bot.eventparser import EventParser +from nio import Event, MatrixRoom, MessageDirection + +from bot_msg import AlbertMsg +from config import Config + + +def has_keys_along(nested_dict: dict, keys: list[str]) -> bool: + current_level = nested_dict + for key in keys: + if isinstance(current_level, dict) and key in current_level: + current_level = current_level[key] + else: + return False + return True + + +def isa_reply_to(event) -> bool: + return has_keys_along(event.source, ["content", "m.relates_to", "m.in_reply_to", "event_id"]) + + +# +# Message management +# + + +async def get_thread_messages( + config: Config, ep: EventParser, max_rewind: int = 100 +) -> list[Event]: + matrix_client = ep.matrix_client + event = ep.event + + # Build the conversation thread + messages = [] + i = 0 + while isa_reply_to(event) and i < max_rewind: + messages.insert(0, event) + previous_event_id = event.source["content"]["m.relates_to"]["m.in_reply_to"]["event_id"] + previous = await matrix_client.room_get_event(ep.room.room_id, previous_event_id) + event = previous.event + i += 1 + + # Insert the last non original poster message + if not isa_reply_to(event) and i < max_rewind: + messages.insert(0, event) + + return messages + + +async def get_previous_messages( + config: Config, ep: EventParser, history_lookup: int = 10, max_rewind: int = 100 +) -> list[Event]: + matrix_client = ep.matrix_client + # Build the conversation history + starttoken = matrix_client.next_batch + roommessages = await matrix_client.room_messages( + ep.room.room_id, + starttoken, + limit=min(config.albert_history_lookup, config.albert_max_rewind), + direction=MessageDirection.back, + message_filter={"types": ["m.room.message", "m.room.encrypted"]}, + ) + messages = [] + decr = 0 + for i, event in enumerate(roommessages.chunk): + body = event.source["content"]["body"].strip() + # Or only accept "mesgtype" == m.text ? + if ( + isa_reply_to(event) + or event.source["content"]["msgtype"] in ["m.notice"] + or any(body.startswith(msg) for msg in AlbertMsg.common_msg_prefixes) + ): + decr += 1 + continue + messages.insert(0, event) + if i - decr >= min(history_lookup, max_rewind): + break + + return messages + + +def get_cleanup_body(event: Event) -> str: + body = event.source["content"]["body"].strip() + + # Remove quoted text in reply to avoid unnecesserilly text + if body.startswith("> <@"): + line_start = 0 + lines = body.split("\n") + for line in lines: + if line.startswith("> "): + line_start += 1 + else: + break + body = "\n".join(lines[line_start:]) + + return body.strip() + + +# +# User management +# default_power_to_title = { 0: "utilisateur", @@ -15,6 +114,11 @@ } +def user_name_to_non_hl_user(complete_user_name: str) -> str: + """get the string of the user""" + return complete_user_name.split("[")[0].strip() + + def get_user_to_power_level(salon: MatrixRoom) -> dict: users = {user_id: user.name for user_id, user in salon.users.items()} return { @@ -23,16 +127,6 @@ def get_user_to_power_level(salon: MatrixRoom) -> dict: } -def get_salon_users_str(salon: MatrixRoom) -> str: - user_to_power_level = get_user_to_power_level(salon) - user_to_print = defaultdict(list) - for user_name, power_level in user_to_power_level.items(): - key = default_power_to_title.get(power_level, "utilisateur") - # remove the originating server to remove the highlight - user_to_print[key] += [user_name] - return "\n".join(f" - **{key}** : {','.join(value) } " for key, value in user_to_print.items()) - - def get_salon_moderators( salon: MatrixRoom, *, fomo_user_name=None, kick_user_name=None ) -> Optional[list[str]]: @@ -49,13 +143,3 @@ def get_salon_moderators( for user_name, power_level in user_to_power_level.items() if power_level >= minimum_power_level ] - - -def user_name_to_non_hl_user(complete_user_name: str) -> str: - """get the string of the user""" - return complete_user_name.split("[")[0].strip() - - -def users_print(matrix_user_name: list[str]) -> str: - """Print a list of user without highlighting them in the tchap case""" - return ", ".join(user_name for user_name in matrix_user_name) diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index f30c0db..0000000 --- a/docker-compose.yml +++ /dev/null @@ -1,31 +0,0 @@ -version: "3.8" - -services: - app: - image: albert/tchapbot - build: - context: . - dockerfile: Dockerfile - command: python3 . - environment: - - VERBOSE=true - - SYSTEMD_LOGGING=True - - SESSION_PATH=/data/session.txt - - JOIN_ON_INVITE=${JOIN_ON_INVITE} - - SALT=${SALT} - - MATRIX_HOME_SERVER=${MATRIX_HOME_SERVER} - - MATRIX_BOT_USERNAME=${MATRIX_BOT_USERNAME} - - MATRIX_BOT_PASSWORD=${MATRIX_BOT_PASSWORD} - - ERRORS_ROOM_ID=${ERRORS_ROOM_ID} - - USER_ALLOWED_DOMAINS=${USER_ALLOWED_DOMAINS} - - GROUPS_USED=${GROUPS_USED} - - ALBERT_API_URL=${ALBERT_API_URL} - - ALBERT_API_TOKEN=${ALBERT_API_TOKEN} - - ALBERT_MODEL_NAME=${ALBERT_MODEL_NAME} - - ALBERT_MODE=${ALBERT_MODE} - ports: - - 443:443 - - 80:80 - volumes: - - /data/tchapbot:/data - restart: always diff --git a/pyproject.toml b/pyproject.toml index c7f8594..b5d70b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,36 +1,39 @@ [project] name = "albert-tchap" -version = "0.1.9" +version = "0.2.0" description = "Albert Tchap" authors = [ { name = "Pôle d'Expertise de la Régulation Numérique", email = "contact.peren@finances.gouv.fr" }, { name = "Etalab", email = "etalab@modernisation.gouv.fr" }, ] -requires-python = ">= 3.11" +requires-python = ">= 3.10" readme = "README.md" license = { text = "MIT" } dependencies = [ - "pillow==10.2.0", + "pillow==10.4.0", "markdown==3.6", - "requests==2.31.0", - "pydantic==2.6.4", - "pydantic-settings==2.2.1", + "requests==2.32.3", + "pydantic==2.8.2", + "pydantic-settings==2.3.4", "matrix-nio[e2e]==0.24.0", - "cryptography==42.0.5", - "structlog==24.1.0", + "cryptography==42.0.8", + "structlog==24.2.0", + "grist_api==0.1.0", + "pyalbert==0.6.20", ] -[tool.pdm] -distribution = false -dev-dependencies = { dev = ["ruff==0.4.4"] } +# Packaging -[tool.rye] -dev-dependencies = ["ruff==0.4.4"] +[tool.setuptools] +py-modules = [] + +# Linter [tool.ruff] line-length = 100 -lint = { ignore-init-module-imports = true, select = ["I"] } +src = ["app"] + +[tool.ruff.isort] +known-third-party = ["nio", "matrix_bot"] -[tool.setuptools] -py-modules = [] diff --git a/scripts/dump_users_state.py b/scripts/dump_users_state.py new file mode 100755 index 0000000..e01495d --- /dev/null +++ b/scripts/dump_users_state.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +import asyncio +import json +import os +from collections import namedtuple + +from nio import AsyncClient, InviteMemberEvent, JoinedRoomsResponse, SyncResponse + +# Matrix Config +config = { + "server": os.getenv("MATRIX_HOME_SERVER"), + "username": os.getenv("MATRIX_BOT_USERNAME"), + "password": os.getenv("MATRIX_BOT_PASSWORD"), + "errors_room_id": os.getenv("ERRORS_ROOM_ID"), +} +Config = namedtuple("Config", config.keys()) +config = Config(**config) + + +async def get_users_state(config: Config, client: AsyncClient): + # Sync with the server to get the latest state + sync_response = await client.sync(full_state=True) + if not isinstance(sync_response, SyncResponse): + raise ValueError("Failed to sync the client") + + direct_chat_users = set() + pending_chat_users = set() + + # Get the direct chats from the account data + response = await client.joined_rooms() + if isinstance(response, JoinedRoomsResponse): + rooms = response.rooms + else: + raise ValueError("Failed to fetch joined room") + + for room_id in rooms: + room = client.rooms.get(room_id) + if room: + # Check if the room is a direct chat (has exactly two members) + members = room.users.values() + if len(members) == 2: + # Add the other user (not the client) to the set + other_user = next(member for member in members if member.user_id != client.user_id) + direct_chat_users.add(other_user.user_id) + + # Check for pending invitations + for room_id, invite_state in sync_response.rooms.invite.items(): + for event in invite_state.invite_state: + if isinstance(event, InviteMemberEvent) and event.sender != client.user_id: + pending_chat_users.add(event.sender) + + # Due to invitation on room (not DM) ? + pending_chat_users = pending_chat_users - direct_chat_users + return {"active": list(direct_chat_users), "pending": list(pending_chat_users)} + + +async def main(config: Config): + client = AsyncClient(config.server, config.username) + await client.login(config.password) + + users_state = await get_users_state(config, client) + with open("users_state.json", "w") as f: + json.dump(users_state, f, indent=2) + + await client.close() + + +asyncio.get_event_loop().run_until_complete(main(config)) diff --git a/scripts/send_msg_to_errors.py b/scripts/send_msg_to_errors.py new file mode 100755 index 0000000..41e2d82 --- /dev/null +++ b/scripts/send_msg_to_errors.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +import asyncio +import os +from collections import namedtuple + +from nio import AsyncClient, LoginResponse + +# Matrix Config +config = { + "server": os.getenv("MATRIX_HOME_SERVER"), + "username": os.getenv("MATRIX_BOT_USERNAME"), + "password": os.getence("MATRIX_BOT_PASSWORD"), + "errors_room_id": os.getence("ERRORS_ROOM_ID"), +} +Config = namedtuple("Config", config.keys()) +config = Config(**config) + +message = "Hi, this is a test." + + +async def send_to_room(config: Config, message: str): + # Create an instance of the AsyncClient + client = AsyncClient(config.server, config.username) + + # Log in to the Matrix server + response = await client.login(config.password) + if isinstance(response, LoginResponse): + print("Logged in successfully") + + # Send a message to the room + await client.room_send( + room_id=config.errors_room_id, + message_type="m.room.message", + content={"msgtype": "m.text", "body": message}, + ) + print("Message sent successfully") + + # Log out from the Matrix server + await client.logout() + else: + print(f"Failed to log in: {response}") + + # Close the client session + await client.close() + + +# Run the main function using asyncio +asyncio.get_event_loop().run_until_complete(send_to_room(config, message)) diff --git a/scripts/update_users_table.py b/scripts/update_users_table.py new file mode 100755 index 0000000..42cc05b --- /dev/null +++ b/scripts/update_users_table.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python + +import asyncio +import json +import os +import re +import sys +from collections import namedtuple + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../app"))) + +from iam import AsyncGristDocAPI + +# Matrix Config +config = { + "users_table_id": os.getenv("GRIST_USERS_TABLE_ID"), + "users_table_name": os.getenv("GRIST_USERS_TABLE_NAME"), + "api_server": os.getenv("GRIST_API_SERVER"), + "api_key": os.getenv("GRIST_API_KEY"), +} +Config = namedtuple("Config", config.keys()) +config = Config(**config) + + +def domain_from_sender(sender: str) -> str: + """ + Sender IDs are formatted like this: "@-: + e.g. @john.doe-ministere_example.gouv.fr1:agent.ministere_example.tchap.gouv.frmerci + """ + match = re.search( + r"(?<=\-)[^\-\:]+[0-9]*(?=\:)", sender + ) # match the domain name (between the last "-" and ":", with optional numbers to ignore at the end of the domain) WARNING: this regex is not perfect and doesn't work for domain names with dashes in it like "developpement-durable.gouv.fr" + if match: + return match.group(0) + + +async def update_users_table(config): + grclient = AsyncGristDocAPI(config.users_table_id, config.api_server, config.api_key) + + with open("../tchap_users_state.prod.json") as f: + users_state = json.load(f) + + def map_status(status): + map_dict = {"active": "allowed"} + if status in map_dict: + return map_dict[status] + return status + + users_state = [ + {"tchap_user": name, "status": map_status(status), "domain":domain_from_sender(name)} + for status, ids in users_state.items() + for name in ids + ] + users_table = await grclient.fetch_table(config.users_table_name) + users_table = {r.tchap_user: r for r in users_table} + + new_records = [] + for r in users_state: + if r["tchap_user"] in users_table: + continue + new_records.append(r) + + await grclient.add_records(config.users_table_name, new_records) + + +asyncio.get_event_loop().run_until_complete(update_users_table(config))