Skip to content

Commit

Permalink
first antispam commit
Browse files Browse the repository at this point in the history
  • Loading branch information
abesmon committed Sep 21, 2024
1 parent f80e7e8 commit eec2812
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 3 deletions.
51 changes: 48 additions & 3 deletions bot/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import random

import random
from telegram import Update
from telegram import Update, Chat, Message, Bot
from telegram.ext import (
Application,
CommandHandler,
Expand All @@ -14,6 +14,7 @@

from markov_gen import generate_markov_text
from draw_func import circle_picture, face_picture
from spam_validator import validate_spam_text

async def start_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
start_called = context.user_data.get("start_called", False)
Expand Down Expand Up @@ -78,6 +79,49 @@ async def track_updates(update: Update, context: ContextTypes.DEFAULT_TYPE):
if update.message and update.message.text:
user_data["messages_count"] = user_data.get("messages_count", 0) + 1

async def validate_spam_updates(update: Update, context: ContextTypes.DEFAULT_TYPE):
user = update.effective_user
if not user: return

user_data_key = f"user_data:{user.id}"
user_data: dict = context.chat_data.setdefault(user_data_key, dict())

messages_count = user_data.get("messages_count", 0)

# if user already wrote something more than 2 times, then skip and suggest, that they are not spammer
if messages_count >= 2: return

text = update.message.text
# no text, we cant check that for spam
if text is None or text == "": return

spam_probability = validate_spam_text(text)
if spam_probability >= 0.65:
print(f"It's very probably spam!!!\nmessage:{text}\nfrom: {user.name}")
try:
update.effective_chat.delete_message(message_id=update.message.message_id)

await notify_admins_about_delete(update.effective_chat, update.message, context.bot, "потенциально спам")
except Exception as e:
print(f"Error deleting message: {e}")
else:
return

async def notify_admins_about_delete(chat: Chat, message: Message, bot: Bot, reason: str):
admins = await chat.get_administrators
notification = f"Из чата {chat.title} было удалено сообщение по причине: спам\n"
notification += f"Контент сообщения: {message.text}\n\n"
notification += f"Если вы считаете, что это ошибка, восстановите сообщение через настройки чата. В случае, если это не ошибка, возможно, стоит забанить пользователя и удалить пользователя"

for admin in admins:
try:
await bot.send_message(
chat_id=admin.user.id,
text=notification
)
except Exception as e:
print(f"Failed to send notification to admin {admin.user.id}: {e}")

def main():
token = os.environ.get('TELEGRAM_BOT_TOKEN')
if not token:
Expand All @@ -93,8 +137,9 @@ def main():

application.add_handler(CommandHandler("markov", markov_command))
application.add_handler(CommandHandler("sus", sus_command))

# Add a update handler

# SPAM tracker
application.add_handler(MessageHandler(filters.ALL, validate_spam_updates), -2)
application.add_handler(MessageHandler(filters.ALL, track_updates), -1)

application.run_polling(allowed_updates=Update.ALL_TYPES)
Expand Down
1 change: 1 addition & 0 deletions bot/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
python-telegram-bot
Pillow
markovify
openai
53 changes: 53 additions & 0 deletions bot/spam_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import os
import json

from openai import OpenAI

async def validate_spam_text(text: str) -> float:
llm_host = os.environ["ANTISPAM_LLM_HOST"]
llm_model = os.environ["ANTISPAM_LLM_MODEL"]

llm_client = OpenAI(base_url=llm_host, api_key="dummy")

chat_completion = llm_client.chat.completions.create(
model=llm_model,
messages=[
{
"role": "system",
"content": "Ты антиспам система, которая просматривает сообщение и вычисляет вероятность того, что сообщение является спамом. Когда пользователь предоставит тебе текст, тебе нужно выдать вероятность того, что этот текст спам. Вероятность это число с плавающей точкой от 0 до 1. 0 если сообщение не спам, 1 если это сообщение спам. Далее идет сообщение для проверки:"
},
{
"role": "user",
"content": "<message_to_validate>" + str(text) + "</message_to_validate>"
}
],
temperature=0.5,
max_tokens=-1,
stream=False,
response_format = {
"type": "json_schema",
"json_schema": {
"name": "spam_detection",
"strict": "true",
"schema": {
"type": "object",
"properties": {
"spam_probability": {
"type": "number"
}
},
"required": [
"spam_probability"
]
}
}
}
)

answer = chat_completion.choices[0].message.content
if not answer: return 0.5
try:
parsed_answer = json.loads(answer)
return float(parsed_answer.get("spam_probability", 0.5))
except json.JSONDecodeError:
return 0.5
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,6 @@ services:
command: /bin/sh -c "pip install -r requirements.txt && python bot.py"
environment:
- TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN}
- ANTISPAM_LLM_HOST=${ANTISPAM_LLM_HOST}
- ANTISPAM_LLM_MODEL=${ANTISPAM_LLM_MODEL}
restart: always

0 comments on commit eec2812

Please sign in to comment.