Add kiepscy and kiepscyurl command support. Now you can search through Kiepscy episodes and get urls to ipla!

KMoszczyc · KMoszczyc · commit 614e3218aba2 · 2025-03-12T00:58:35.000+01:00
diff --git a/data/misc/commands.txt b/data/misc/commands.txt
@@ -12,6 +12,8 @@ fun - (period[optional]) fun metric for all users
 funchart - (username[optional], period[optional]) fun metric chart
 handlowa - (--all) shopping sundays in Poland
 help - a list of commands
+kiepscy - (filter[with & and | operators]) Search Kiepscy episodes either by title or description
+kiepscyurl - (number) Get a Kiepscy episode url by the episode number
 lastmessages - (username, number[optional]) display last x messages from chat history
 likechart - (username[optional], period[optional]) display number of reactions received per day per user
 monologuechart - (username[optional], period[optional], --acc) calculate monologue index (acc stands for accumulative, meaning it's counted from the beginning no matter the filter period)
diff --git a/data/misc/kiepscy.parquet b/data/misc/kiepscy.parquet
diff --git a/definitions.py b/definitions.py
@@ -68,6 +68,7 @@ def read_str_file(path):
 SHOPPING_SUNDAYS_PATH = os.path.join(DATA_DIR, 'misc/niedziele.txt')
 EUROPEJSKAFIRMA_PATH = os.path.join(DATA_DIR, 'misc/europejskafirma.txt')
 BOCZEK_PATH = os.path.join(DATA_DIR, 'misc/boczek.txt')
+KIEPSCY_PATH = os.path.join(DATA_DIR, 'misc/kiepscy.parquet')
 
 # Load text files with funny phrases
 tvp_headlines = read_str_file(TVP_HEADLINES_PATH)
@@ -80,7 +81,7 @@ def read_str_file(path):
 shopping_sundays = read_str_file(SHOPPING_SUNDAYS_PATH)
 europejskafirma_phrases = read_str_file(EUROPEJSKAFIRMA_PATH)
 boczek_phrases = read_str_file(BOCZEK_PATH)
-
+kiepscy_df = pd.read_parquet(KIEPSCY_PATH)
 
 class PeriodFilterMode(Enum):
     """Mode used for filtering the chat data for:
diff --git a/src/core/misc_commands.py b/src/core/misc_commands.py
@@ -1,6 +1,7 @@
 import logging
 from datetime import datetime
 
+import pandas as pd
 import telegram
 from telegram import Update
 from telegram.ext import ContextTypes
@@ -10,7 +11,7 @@
 from src.models.bot_state import BotState
 from src.models.command_args import CommandArgs
 from definitions import ozjasz_phrases, bartosiak_phrases, tvp_headlines, tvp_latest_headlines, commands, bible_df, ArgType, shopping_sundays, USERS_PATH, arguments_help, europejskafirma_phrases, \
-    boczek_phrases
+    boczek_phrases, kiepscy_df
 import src.core.utils as core_utils
 import src.stats.utils as stats_utils
 
@@ -246,3 +247,47 @@ async def cmd_remind_me(self, update: Update, context: ContextTypes.DEFAULT_TYPE
         self.job_persistance.save_job(job_queue=context.job_queue, dt=dt, func=core_utils.send_response_message, args=[update.effective_chat.id, update.message.message_id, command_args.string])
         response = f"You're gonna get pinged at {core_utils.dt_to_pretty_str(dt)}."
         await context.bot.send_message(chat_id=update.effective_chat.id, text=response)
+
+    async def cmd_kiepscy(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
+        command_args = CommandArgs(args=context.args, expected_args=[ArgType.TEXT_MULTISPACED], min_string_length=1, max_string_length=1000)
+        command_args = core_utils.parse_args(self.users_df, command_args)
+        if command_args.error != '':
+            await context.bot.send_message(chat_id=update.effective_chat.id, text=command_args.error)
+            return
+
+        search_phrase = command_args.string
+        search_phrases = command_args.strings
+
+        if search_phrases:  # use & operator to match multiple words
+            regex = core_utils.regexify_multiword_filter(search_phrases)
+            matching_by_title_df = kiepscy_df[kiepscy_df['title'].str.contains(regex, case=False)]
+            matching_by_description_df = kiepscy_df[kiepscy_df['description'].str.contains(regex, case=False)]
+        else:
+            matching_by_title_df = kiepscy_df[kiepscy_df['title'].str.contains(search_phrase, case=False)]
+            matching_by_description_df = kiepscy_df[kiepscy_df['description'].str.contains(search_phrase, case=False)]
+        merged_df = pd.concat([matching_by_title_df, matching_by_description_df], ignore_index=True)
+
+        text = f"Kiepscy episodes that match [{search_phrase}]:\n"
+        for i, (index, row) in enumerate(merged_df.iterrows()):
+            description = f"{row['description'][:100]}.." if len(row['description']) > 100 else row['description']
+            text += f"- *{row['nr']}: {row['title']}* - {description}\n"
+        text = stats_utils.escape_special_characters(text)
+        await context.bot.send_message(chat_id=update.effective_chat.id, text=text, parse_mode=telegram.constants.ParseMode.MARKDOWN_V2)
+
+    async def cmd_kiepscyurl(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
+        command_args = CommandArgs(args=context.args, expected_args=[ArgType.POSITIVE_INT], number_limit=1000)
+        command_args = core_utils.parse_args(self.users_df, command_args)
+        if command_args.error != '':
+            await context.bot.send_message(chat_id=update.effective_chat.id, text=command_args.error)
+            return
+
+        episode_nr = str(command_args.number)
+        matching_episode_df = kiepscy_df[kiepscy_df['nr'] == episode_nr]
+        if matching_episode_df.empty:
+            await context.bot.send_message(chat_id=update.effective_chat.id, text=f"Nie ma takiego epizodu :(")
+            return
+        row = matching_episode_df.iloc[0]
+        text = f"*{episode_nr}: {row['title']}* - {row['url']}"
+
+        text = stats_utils.escape_special_characters(text)
+        await context.bot.send_message(chat_id=update.effective_chat.id, text=text, parse_mode=telegram.constants.ParseMode.MARKDOWN_V2)
diff --git a/src/core/ozjasz_bot.py b/src/core/ozjasz_bot.py
@@ -66,6 +66,8 @@ def get_commands_map(self):
             'remind': self.chat_commands.cmd_remind,
             'commands': self.chat_commands.cmd_command_usage,
             'summary': self.chat_commands.cmd_summary,
+            'kiepscy': self.core_commands.cmd_kiepscy,
+            'kiepscyurl': self.core_commands.cmd_kiepscyurl,
             'topmessages': lambda update, context: self.chat_commands.cmd_messages_by_reactions(update, context, EmojiType.ALL),
             'sadmessages': lambda update, context: self.chat_commands.cmd_messages_by_reactions(update, context, EmojiType.NEGATIVE),
             'topmemes': lambda update, context: self.chat_commands.cmd_media_by_reactions(update, context, MessageType.IMAGE, EmojiType.ALL),
diff --git a/src/core/utils.py b/src/core/utils.py
@@ -517,7 +517,10 @@ def parse_string(command_args: CommandArgs, text: str) -> [str, CommandArgs, str
     if len(text) > command_args.max_string_length:
         error = f'{command_args.label} {text} is too long, it should have {command_args.max_string_length} characters or less.'
 
-    command_args.string = text
+    if '&' in text: # user for 'AND' filtering
+        command_args.strings = text.split('&')
+    else:
+        command_args.string = text
     return text, command_args, error
 
 
@@ -622,3 +625,9 @@ async def send_response_message(context, chat_id, message_id, message):
 
 def dt_to_pretty_str(dt):
     return dt.strftime("%d-%m-%Y %H:%M:%S")
+
+def regexify_multiword_filter(words):
+    base = r'^{}'
+    expr = '(?=.*{})'
+    return base.format(''.join(expr.format(w) for w in words))
+
diff --git a/src/models/command_args.py b/src/models/command_args.py
@@ -29,6 +29,7 @@ class CommandArgs:
     dt_format: DatetimeFormat = None
     number: int = 5
     string: str = ''
+    strings: list[str] = field(default_factory=lambda: [])
     min_string_length: int = 0
     max_string_length: int = 20
     label: str = ''