diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..60a51682d --- /dev/null +++ b/.gitignore @@ -0,0 +1,144 @@ +# Personal files +*.session +*.session-journal +.vscode +*test.py + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ diff --git a/README.md b/README.md new file mode 100644 index 000000000..45958a26b --- /dev/null +++ b/README.md @@ -0,0 +1,54 @@ +## [Media Search bot](https://github.com/Mahesh0253/Media-Search-bot) + +* Index channel files for inline search. +* When you going to post file on telegram channel this bot will save that in database, So you and your subscribers can easily search that in inline mode. +* This bot supports document, video and audio file formats with caption. + +### Installation + +#### Easy Way +[![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy) + +#### Hard Way + +```sh +python3 -m venv env +. ./env/bin/activate +pip install -r requirements.txt +# +python bot.py +``` +Check `sample_info.py` before editing `info.py` file + +#### Variables + +##### Required Variables +* `BOT_TOKEN`: Create a bot using [@BotFather](https://telegram.dog/BotFather), and get the Telegram API token. + +* `API_ID`: Get this value from [telegram.org](https://my.telegram.org/apps) +* `API_HASH`: Get this value from [telegram.org](https://my.telegram.org/apps) +* `CHANNELS`: Username or id of channel. Separate multiple channels by space +* `ADMINS`: Username or id of Admin. Separate multiple Admins by space +* `DATABASE_URI`: [mongoDB](https://www.mongodb.com) URI. Get this value from [mongoDB](https://www.mongodb.com). For more help watch this [video](https://youtu.be/VQnmcBnguPY) +* `DATABASE_NAME`: Name of the database in [mongoDB](https://www.mongodb.com). For more help watch this [video](https://youtu.be/VQnmcBnguPY) + +##### Optional Variable +* `COLLECTION_NAME`: Name of the collections. Defaults to Telegram_files. If you going to use same database, then use different collection name for each bot + +### How to use? +* First add this bot in channel as a Admin +* Then whenever you post file, bot will save that in database, So you can easily search whenever you want. + +### Admin commands +``` +channel - Get basic infomation about channels +total - Show total of saved files +logger - Get log file +``` +### Contributions +Contributions are welcome. + +### Thanks to [Pyrogram](https://github.com/pyrogram/pyrogram) + +### License +Code released under [The GNU General Public License](LICENSE). \ No newline at end of file diff --git a/app.json b/app.json new file mode 100644 index 000000000..504b1df96 --- /dev/null +++ b/app.json @@ -0,0 +1,64 @@ +{ + "name": "Media Search bot", + "description": "When you going to post file on telegram channel this bot will save that in database, So you can easily search that in inline mode", + "keywords": [ + "telegram", + "best", + "indian", + "pyrogram", + "media", + "search", + "channel", + "index", + "inline" + ], + "website": "https://github.com/Mahesh0253/Media-Search-bot", + "repository": "https://github.com/Mahesh0253/Media-Search-bot", + "env": { + "BOT_TOKEN": { + "description": "Your bot token.", + "value": "" + }, + "API_ID": { + "description": "Get this value from https://my.telegram.org", + "value": "" + }, + "API_HASH": { + "description": "Get this value from https://my.telegram.org", + "value": "" + }, + "CHANNELS": { + "description": "Username or id of channel. Separate multiple channels by space.", + "value": "" + }, + "ADMINS": { + "description": "Username or id of Admin. Separate multiple Admins by space.", + "value": "" + }, + "DATABASE_URI": { + "description": "mongoDB URI. Get this value from https://www.mongodb.com. For more help watch this video - https://youtu.be/VQnmcBnguPY", + "value": "" + }, + "DATABASE_NAME": { + "description": "Name of the database in mongoDB. For more help watch this video - https://youtu.be/VQnmcBnguPY", + "value": "" + }, + "COLLECTION_NAME": { + "description": "Name of the collections. Defaults to Telegram_files. If you going to use same database, then use different collection name for each bot", + "value": "Telegram_files", + "required": false + } + }, + "addons": [], + "buildpacks": [ + { + "url": "heroku/python" + } + ], + "formation": { + "worker": { + "quantity": 1, + "size": "free" + } + } +} \ No newline at end of file diff --git a/bot.py b/bot.py new file mode 100644 index 000000000..f821b1516 --- /dev/null +++ b/bot.py @@ -0,0 +1,40 @@ +import logging +import logging.config + +# Get logging configurations +logging.config.fileConfig('logging.conf') +logging.getLogger().setLevel(logging.ERROR) + +from pyrogram import Client, __version__ +from pyrogram.raw.all import layer +from utils import Media +from info import SESSION, API_ID, API_HASH, BOT_TOKEN + + +class Bot(Client): + + def __init__(self): + super().__init__( + session_name=SESSION, + api_id=API_ID, + api_hash=API_HASH, + bot_token=BOT_TOKEN, + workers=150, + plugins={"root": "plugins"}, + sleep_threshold=10, + ) + + async def start(self): + await super().start() + await Media.ensure_indexes() + me = await self.get_me() + self.username = '@' + me.username + print(f"{me.first_name} with for Pyrogram v{__version__} (Layer {layer}) started on {me.username}.") + + async def stop(self, *args): + await super().stop() + print("Bot stopped. Bye.") + + +app = Bot() +app.run() diff --git a/info.py b/info.py new file mode 100644 index 000000000..9100bb7f9 --- /dev/null +++ b/info.py @@ -0,0 +1,26 @@ +import re +from os import environ + +# Bot information +SESSION = environ.get('SESSION', 'Media_search') +API_ID = environ['API_ID'] +API_HASH = environ['API_HASH'] +BOT_TOKEN = environ['BOT_TOKEN'] + +# Admins & Channels +ADMINS = [int(admin) if re.search('^\d+$', admin) else admin for admin in environ['ADMINS'].split()] +CHANNELS = [int(channel) if re.search('^-100\d+$', channel) else channel for channel in environ['CHANNELS'].split()] + +# MongoDB information +DATABASE_URI = ['DATABASE_URI'] +DATABASE_NAME = ['DATABASE_NAME'] +COLLECTION_NAME = environ.get('COLLECTION_NAME', 'Telegram_files') + +# Messages +START_MSG = """ +**Hi, I'm Media Search bot** + +Here you can search files in inline mode. Just press follwing buttons and start searching. +""" + +SHARE_BUTTON_TEXT = 'Checkout {username} for searching files' \ No newline at end of file diff --git a/logging.conf b/logging.conf new file mode 100644 index 000000000..371455afc --- /dev/null +++ b/logging.conf @@ -0,0 +1,32 @@ +[loggers] +keys=root + +[handlers] +keys=consoleHandler,fileHandler + +[formatters] +keys=consoleFormatter,fileFormatter + +[logger_root] +level=DEBUG +handlers=consoleHandler,fileHandler + +[handler_consoleHandler] +class=StreamHandler +level=INFO +formatter=consoleFormatter +args=(sys.stdout,) + +[handler_fileHandler] +class=FileHandler +level=ERROR +formatter=fileFormatter +args=('TelegramBot.log','w',) + +[formatter_consoleFormatter] +format=%(asctime)s - %(lineno)d - %(name)s - %(module)s - %(levelname)s - %(message)s +datefmt=%I:%M:%S %p + +[formatter_fileFormatter] +format=[%(asctime)s:%(name)s:%(lineno)d:%(levelname)s] %(message)s +datefmt=%m/%d/%Y %I:%M:%S %p \ No newline at end of file diff --git a/plugins/channel.py b/plugins/channel.py new file mode 100644 index 000000000..fdef34ae1 --- /dev/null +++ b/plugins/channel.py @@ -0,0 +1,20 @@ +from pyrogram import Client, filters +from utils import save_file +from info import CHANNELS + +media_filter = filters.document | filters.video | filters.audio + + +@Client.on_message(filters.chat(CHANNELS) & media_filter) +async def media(bot, message): + """Media Handler""" + for kind in ("document", "video", "audio"): + media = getattr(message, kind, None) + if media is not None: + break + else: + return + + media.file_type = kind + media.caption = message.caption + await save_file(media) \ No newline at end of file diff --git a/plugins/commands.py b/plugins/commands.py new file mode 100644 index 000000000..4de5b83da --- /dev/null +++ b/plugins/commands.py @@ -0,0 +1,51 @@ +from pyrogram import Client, filters +from pyrogram.types import InlineKeyboardButton, InlineKeyboardMarkup +from info import START_MSG, CHANNELS, ADMINS +from utils import Media + + +@Client.on_message(filters.command('start')) +async def start(bot, message): + """Start command handler""" + buttons = [[ + InlineKeyboardButton('Search Here', switch_inline_query_current_chat=''), + InlineKeyboardButton('Go Inline', switch_inline_query=''), + ]] + reply_markup = InlineKeyboardMarkup(buttons) + await message.reply(START_MSG, reply_markup=reply_markup) + + +@Client.on_message(filters.command('channel') & filters.chat(ADMINS)) +async def channel_info(bot, message): + """Send basic information of channel""" + + if isinstance(CHANNELS, (int, str)): + channels = [CHANNELS] + elif isinstance(CHANNELS, list): + channels = CHANNELS + else: + raise ValueError("Unexpected type of CHANNELS") + + for channel in channels: + channel_info = await bot.get_chat(channel) + try: + await message.reply(str(channel_info)) + except Exception as e: + await message.reply(f'Error: {e}') + + +@Client.on_message(filters.command('total') & filters.chat(ADMINS)) +async def total(bot, message): + """Show total files in database""" + msg = await message.reply("Processing...⏳", quote=True) + total = await Media.count_documents() + await msg.edit(f'📁 Saved files: {total}') + + +@Client.on_message(filters.command('logger') & filters.chat(ADMINS)) +async def log_file(bot, message): + """Send log file""" + try: + await message.reply_document('TelegramBot.log') + except Exception as e: + await message.reply(str(e)) \ No newline at end of file diff --git a/plugins/inline.py b/plugins/inline.py new file mode 100644 index 000000000..6c36cdf40 --- /dev/null +++ b/plugins/inline.py @@ -0,0 +1,72 @@ +from urllib.parse import quote +from pyrogram import Client, filters, emoji +from pyrogram.types import InlineKeyboardButton, InlineKeyboardMarkup, InlineQueryResultCachedDocument +from utils import get_search_results +from info import SHARE_BUTTON_TEXT + +CACHE_TIME = 60 + + +@Client.on_inline_query() +async def answer(bot, query): + """Show search results for given inline query""" + + results = [] + string = query.query + reply_markup = get_reply_markup(bot.username) + files = await get_search_results(string) + + for file in files: + results.append( + InlineQueryResultCachedDocument( + title=file.file_name, + file_id=file.file_id, + caption=file.caption, + description=f'Size: {get_size(file.file_size)}\nType: {file.file_type}', + reply_markup=reply_markup, + ) + ) + + if results: + count = len(results) + switch_pm_text = f"{emoji.FILE_FOLDER} {count} Result{'s' if count > 1 else ''}" + if string: + switch_pm_text += f" for {string}" + + await query.answer(results=results, + cache_time=CACHE_TIME, + switch_pm_text=switch_pm_text, + switch_pm_parameter="start") + else: + + switch_pm_text = f'{emoji.CROSS_MARK} No results' + if string: + switch_pm_text += f' for "{string}"' + + await query.answer( + results=[], + cache_time=CACHE_TIME, + switch_pm_text=switch_pm_text, + switch_pm_parameter="okay") + + +def get_reply_markup(username): + buttons = [[ + InlineKeyboardButton('Search again', switch_inline_query_current_chat=''), + InlineKeyboardButton( + text='Share bot', + url='tg://msg?text='+ quote(SHARE_BUTTON_TEXT.format(username=username))), + ]] + return InlineKeyboardMarkup(buttons) + + +def get_size(size): + """Get size in readable format""" + + units = ["Bytes", "KB", "MB", "GB", "TB", "PB", "EB"] + size = float(size) + i = 0 + while size >= 1024.0 and i < len(units): + i += 1 + size /= 1024.0 + return "%.2f %s" % (size, units[i]) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..a2bad8068 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +https://github.com/Mahesh0253/pyrogram/archive/beta.zip +tgcrypto +umongo +motor==2.1.0 +dnspython \ No newline at end of file diff --git a/sample_info.py b/sample_info.py new file mode 100644 index 000000000..c58496c7c --- /dev/null +++ b/sample_info.py @@ -0,0 +1,23 @@ +# Bot information +SESSION ='Media_search' +API_ID = 12345 +API_HASH = '0123456789abcdef0123456789abcdef' +BOT_TOKEN = '123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11' + +# Admins & Channels +ADMINS = [12345789, 'admin123', 98765432] +CHANNELS = [-10012345678, -100987654321, 'channelusername'] + +# MongoDB information +DATABASE_URI = "mongodb://[username:password@]host1[:port1][,...hostN[:portN]][/[defaultauthdb]?retryWrites=true&w=majority" +DATABASE_NAME = 'Telegram' +COLLECTION_NAME ='channel_files' # If you going to use same database, then use different collection name for each bot + +# Messages +START_MSG = """ +**Hi, I'm Media Search bot** + +Here you can search files in inline mode. Just press follwing buttons and start searching. +""" + +SHARE_BUTTON_TEXT = 'Checkout {username} for searching files' \ No newline at end of file diff --git a/utils.py b/utils.py new file mode 100644 index 000000000..fbfc8c081 --- /dev/null +++ b/utils.py @@ -0,0 +1,66 @@ +import re +import logging +from pymongo.errors import DuplicateKeyError +from umongo import Instance, Document, fields +from motor.motor_asyncio import AsyncIOMotorClient +from info import DATABASE_URI, DATABASE_NAME, COLLECTION_NAME + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +client = AsyncIOMotorClient(DATABASE_URI) +db = client[DATABASE_NAME] +instance = Instance(db) + + +@instance.register +class Media(Document): + file_id = fields.StrField(attribute='_id') + file_ref = fields.StrField() + file_name = fields.StrField(required=True) + file_size = fields.IntField(required=True) + file_type = fields.StrField() + mime_type = fields.StrField() + caption = fields.StrField() + + class Meta: + collection_name = COLLECTION_NAME + + +async def save_file(media): + """Save file in database""" + + file = Media( + file_id=media.file_id, + file_ref=media.file_ref, + file_name=media.file_name, + file_size=media.file_size, + file_type=media.file_type, + mime_type=media.mime_type, + ) + + caption = media.caption + if caption: + file.caption = caption + + try: + await file.commit() + except DuplicateKeyError: + logger.warning(media.file_name + " is already saved in database") + else: + logger.info(media.file_name + " is saved in database") + + +async def get_search_results(query, max_results=10): + """For given query return results in async generator form""" + + raw_pattern = query.lower().strip().replace(' ', '.?') + if not raw_pattern: + raw_pattern = '.' + + try: + regex = re.compile(raw_pattern, re.IGNORECASE) + except: + return [] + + return await Media.find({'file_name':regex}).sort('$natural', -1).limit(max_results).to_list(length=max_results) \ No newline at end of file