From aa66d82bf32865432d3561cc8c0c0fd918a39a7d Mon Sep 17 00:00:00 2001 From: Philipp Kessling <32732590+pekasen@users.noreply.github.com> Date: Wed, 14 Jun 2023 11:51:13 +0000 Subject: [PATCH 1/2] add: logging options --- README.md | 53 +++++++++++++++++++++++++++++++++-------------- tegracli/main.py | 18 ++++++++++++---- tests/test_cli.py | 2 +- 3 files changed, 53 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 8a5b6f8..0e558ef 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,13 @@ A convenience wrapper around Telethon and the Telegram Client API for research purposes. -# Installation Instructions +## Installation Instructions `tegracli` uses Poetry and python >= 3.9 and < 4.0 for building and installing. To install using pipx, run the following command `pipx install tegracli`. -## How to get API keys +### How to get API keys If you don't have API keys for Telegram, head over to [my.telegram.org](https://my.telegram.org). Click on [API development tools](https://my.telegram.org/apps), fill the form to create yourself an app and pluck the keys into `tegracli.conf.yml`. The session name can be arbitrary. @@ -23,14 +23,39 @@ session_name: somesessionyo This template file is provided with the repository. -# Usage +## Usage `tegracli` is a terminal application to access the Telegram API for research purposes. In order to retrieve messages the configuration-file from the section before must be present in the directory you start `tegracli`. +```text +Usage: tegracli [OPTIONS] COMMAND [ARGS]... + + Tegracli!! Retrieve messages from *Te*le*gra*m with a *CLI*! + +Options: + -v, --verbose Logging verbosity. + -l, --log-file FILENAME File to log to. Defaults to STDOUT. + -s, --serialize Serialize output to JSON. + --help Show this message and exit. + +Commands: + configure Configure tegracli. + get Get messages for the specified channels by either ID or... + group Manage account groups. + hydrate Hydrate a file with messages-ids. + search Searches Telegram content that is available to your account. +``` + +## Logging + +`tegracli` allows for configuring what and how it is logged. Per default logging is **disabled** and can be enabled by passing `--verbose` or `-v`, logging level can be increased by more `-vvvv`s. By default logging target is `STDOUT` but this can be redirected to a file with `--log-file yourfile.log`. Setting `--serialize` allows to be to write the entire logging information in JSON-encoded form. + +## Commands + The following commands are available: -## CONFIGURE +### configure Opens an interactive prompt for configuring API-access. Aks you to input your API id, API hash and session name and requests a 2FA code from Telegram. @@ -44,7 +69,7 @@ Options: --help Show this message and exit. ``` -## GET +### get To _get_ messages from a number of channels, use this command. @@ -82,7 +107,7 @@ Options: | **reply_to** | limit messages to replies *to* a specific user | | **reverse/forward** | flag to indicate whether messages should be retrieved in chronological or reverse chronological order. | -### Basic Examples +#### Basic Examples To retrieve the last fifty messages from a Telegram channel: @@ -106,7 +131,7 @@ To retrieve message sent before January, 1st 2022: ```bash tegracli get --reverse --offset_date 2022-01-01 corona_infokanal_bmg ``` -## SEARCH +### search To _search_ messages of your chats and groups and channels you are subscribed to, use this command. @@ -119,7 +144,7 @@ Options: --help Show this message and exit. ``` -## HYDRATE +### hydrate To rehydrate messages from the API this command accepts a file with message IDs in the format of `$channel_name/$post_number`. Both input and output file are optional, if not given, `stdin` and `stdout` are used. @@ -142,15 +167,13 @@ echo test_channel/1234 | tegracli hydrate >> {"_":"Message","id": 1234, ... , "restriction_reason":[],"ttl_period":null} ``` -## GROUP INIT and GROUP RUN +### groups -In order to support updatable and long-running collections `tegracli` sports an *account group* feature which retrieves -the history of a given set of accounts and is able to retrieve updates on each of these accounts. +In order to support updatable and long-running collections `tegracli` sports an *account group* feature which retrieves the history of a given set of accounts and is able to retrieve updates on each of these accounts. -Groups are initialized by calling `teracli group init`, where accounts to track are stated by either stating them as arguments -or by reading in a file. +Groups are initialized by calling `teracli group init`, where accounts to track are stated by either stating them as arguments or by reading in a file. -### Account Group File Format +#### Account Group File Format Account files are expected to follow these requirements: @@ -207,7 +230,7 @@ Usage: tegracli group run [OPTIONS] [GROUPS]... Messages are stored in `jsonl`-files per channel or query. For channels filename is the channel's or user's id, for searches the query. **BEWARE:** how directories and files are structured is subject to active development and prone to changes in the near future. -# Developer Installation +## Developer Installation 1. Install [poetry](https://python-poetry.org/docs/#installation), 2. Clone repository and unzip, if necessary, diff --git a/tegracli/main.py b/tegracli/main.py index 3b1e0c7..5582b7d 100644 --- a/tegracli/main.py +++ b/tegracli/main.py @@ -39,14 +39,24 @@ async def _handle_auth(client: TelegramClient): await client.send_code_request(phone_number) await client.sign_in(phone_number, click.prompt("Enter 2FA code")) +log_levels = { + 1: "CRITIAL", + 2: "ERROR", + 3: "WARNING", + 4: "INFO", + 5: "DEBUG", +} @click.group() -@click.option("--debug/--no-debug", default=False) +@click.option("--verbose", "-v", count=True, help="Logging verbosity.", default=0) +@click.option("--log-file", "-l", help="File to log to. Defaults to STDOUT.", type=click.File("w", encoding="UTF-8"), default="-") +@click.option("--serialize", "-s", help="Serialize output to JSON.", is_flag=True, default=False) @click.pass_context -def cli(ctx: click.Context, debug: bool): +def cli(ctx: click.Context, verbose: int, log_file: click.File, serialize: bool) -> None: """Tegracli!! Retrieve messages from *Te*le*gra*m with a *CLI*!""" - if debug is True: - log.add("tegracli.log.json", serialize=True) + if verbose != 0: + log.remove() + log.add(log_file, level=log_levels[verbose], serialize=serialize) if ctx.obj is None: ctx.obj = {} diff --git a/tests/test_cli.py b/tests/test_cli.py index c575cfd..35f6369 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -197,7 +197,7 @@ def test_configure(runner: CliRunner, tmp_path: Path): with runner.isolated_filesystem(temp_dir=tmp_path): result = runner.invoke( cli, - ["--no-debug", "configure"], + ["configure"], input="12345678\n123042jdsnfsisnfkr\ntestytest", ) assert result.exit_code == 0 From 95eef761a855a089f168e8256414ceeb56b2837c Mon Sep 17 00:00:00 2001 From: Philipp Kessling <32732590+pekasen@users.noreply.github.com> Date: Thu, 15 Jun 2023 07:12:21 +0000 Subject: [PATCH 2/2] reenable `--debug` flag --- README.md | 4 +++- tegracli/main.py | 50 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 0e558ef..904d7e5 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,8 @@ Usage: tegracli [OPTIONS] COMMAND [ARGS]... Tegracli!! Retrieve messages from *Te*le*gra*m with a *CLI*! Options: + -d, --debug Enable legacy debugging, is overwritten by the + other options. Defaults to False. -v, --verbose Logging verbosity. -l, --log-file FILENAME File to log to. Defaults to STDOUT. -s, --serialize Serialize output to JSON. @@ -49,7 +51,7 @@ Commands: ## Logging -`tegracli` allows for configuring what and how it is logged. Per default logging is **disabled** and can be enabled by passing `--verbose` or `-v`, logging level can be increased by more `-vvvv`s. By default logging target is `STDOUT` but this can be redirected to a file with `--log-file yourfile.log`. Setting `--serialize` allows to be to write the entire logging information in JSON-encoded form. +`tegracli` allows for configuring what and how it is logged. Per default logging is **disabled** and can be enabled by passing `--verbose` or `-v`, logging level can be increased by more `-vvvv`s. By default logging target is `STDOUT` but this can be redirected to a file with `--log-file yourfile.log`. Setting `--serialize` allows to be to write the entire logging information in JSON-encoded form. `--debug` is the legacy option used by `tegracli` <= 0.2.5, this will set serialized logging into `tegracli.log.jsonl` at the `DEBUG` level; it is overwritten by setting the `--verbose` option. ## Commands diff --git a/tegracli/main.py b/tegracli/main.py index 5582b7d..6b44967 100644 --- a/tegracli/main.py +++ b/tegracli/main.py @@ -39,24 +39,48 @@ async def _handle_auth(client: TelegramClient): await client.send_code_request(phone_number) await client.sign_in(phone_number, click.prompt("Enter 2FA code")) + log_levels = { - 1: "CRITIAL", + 1: "CRITICAL", 2: "ERROR", 3: "WARNING", 4: "INFO", 5: "DEBUG", } + @click.group() -@click.option("--verbose", "-v", count=True, help="Logging verbosity.", default=0) -@click.option("--log-file", "-l", help="File to log to. Defaults to STDOUT.", type=click.File("w", encoding="UTF-8"), default="-") -@click.option("--serialize", "-s", help="Serialize output to JSON.", is_flag=True, default=False) +@click.option( + "--debug", + "-d", + is_flag=True, + help="Enable legacy debugging, is overwritten by the other options. Defaults to False.", + default=False, +) +@click.option("--verbose", "-v", count=True, help="Logging verbosity.", default=0) +@click.option( + "--log-file", + "-l", + help="File to log to. Defaults to STDOUT.", + type=click.File("w", encoding="UTF-8"), + default="-", +) +@click.option( + "--serialize", "-s", help="Serialize output to JSON.", is_flag=True, default=False +) @click.pass_context -def cli(ctx: click.Context, verbose: int, log_file: click.File, serialize: bool) -> None: +def cli( + ctx: click.Context, debug: bool, verbose: int, log_file: click.File, serialize: bool +) -> None: """Tegracli!! Retrieve messages from *Te*le*gra*m with a *CLI*!""" - if verbose != 0: - log.remove() - log.add(log_file, level=log_levels[verbose], serialize=serialize) + log.remove() + + serialize = debug or serialize + log_file = "tegracli.log.jsonl" if debug else log_file + log_level = log_levels[verbose] if verbose != 0 else "DEBUG" + + if verbose != 0 or debug: + log.add(log_file, level=log_level, serialize=serialize) if ctx.obj is None: ctx.obj = {} @@ -273,7 +297,7 @@ def reset(groups: Tuple[str]): @click.pass_context def run(ctx: click.Context, groups: Tuple[str]): """Load a group configuration and run the groups operations. - + GROUPS are subdirectories with a valid group configuration. If the special keyword all is given, all subdirectories are considered. """ @@ -366,13 +390,11 @@ def run_group(client: TelegramClient, groups: Tuple[str]): """Runs the required operations for the specified groups.""" cwd = Path() - if groups == ("all", ): + if groups == ("all",): groups = [ path - for path - in Path().iterdir() - if path.is_dir() - and not path.name.startswith(".") + for path in Path().iterdir() + if path.is_dir() and not path.name.startswith(".") ] # iterate groups