From a4e4b9a3610fe2b21c31b940fed7ec165787be50 Mon Sep 17 00:00:00 2001 From: Jan Willhaus Date: Sat, 30 Mar 2024 15:54:16 +0100 Subject: [PATCH] refactor: Simplify settings parsing --- podcast_archiver/base.py | 31 ++-- podcast_archiver/cli.py | 156 ++++++++----------- podcast_archiver/config.py | 266 ++++++++++++++------------------- podcast_archiver/constants.py | 3 + podcast_archiver/download.py | 20 ++- podcast_archiver/exceptions.py | 20 +-- podcast_archiver/logging.py | 16 +- podcast_archiver/processor.py | 8 +- podcast_archiver/utils.py | 7 +- tests/conftest.py | 27 ++++ tests/test_base.py | 4 +- tests/test_config.py | 19 +-- tests/test_download.py | 22 +-- tests/test_filenames.py | 11 +- tests/test_happy_path.py | 64 ++++---- tests/test_init.py | 4 +- tests/test_main.py | 2 +- 17 files changed, 331 insertions(+), 349 deletions(-) diff --git a/podcast_archiver/base.py b/podcast_archiver/base.py index 6b5d385..5260880 100755 --- a/podcast_archiver/base.py +++ b/podcast_archiver/base.py @@ -4,9 +4,10 @@ from pathlib import Path from typing import TYPE_CHECKING -from pydantic import AnyHttpUrl +from pydantic import AnyHttpUrl, ValidationError from podcast_archiver.console import console +from podcast_archiver.exceptions import InvalidFeed from podcast_archiver.logging import logger from podcast_archiver.processor import FeedProcessor @@ -22,27 +23,33 @@ class PodcastArchiver: def __init__(self, settings: Settings): self.settings = settings - self.processor = FeedProcessor(settings=self.settings) + self.processor = FeedProcessor(settings=settings) logger.debug("Initializing with settings: %s", settings) - self.feeds = set() - for feed in self.settings.feeds: - self.add_feed(feed) - for opml in self.settings.opml_files: - self.add_from_opml(opml) + try: + self.feeds = set() + for feed in self.settings.feeds: + self.add_feed(feed) + for opml in self.settings.opml_files: + self.add_from_opml(opml) + except ValidationError as exc: + raise InvalidFeed(feed=exc.errors()[0]["input"]) from exc def register_cleanup(self, ctx: click.RichContext) -> None: @ctx.call_on_close def _cleanup() -> None: self.processor.shutdown() - def add_feed(self, feed: Path | AnyHttpUrl) -> None: + def add_feed(self, feed: Path | AnyHttpUrl | str) -> None: if isinstance(feed, Path): with open(feed, "r") as fp: - self.feeds.union(set(fp.read().strip().splitlines())) - else: - self.feeds.add(feed) + for f in fp.read().strip().splitlines(): + self.add_feed(f) + return + if isinstance(feed, str): + feed = AnyHttpUrl(feed) + self.feeds.add(feed) def add_from_opml(self, opml: Path) -> None: with opml.open("r") as file: @@ -51,7 +58,7 @@ def add_from_opml(self, opml: Path) -> None: # TODO: Move parsing to pydantic for elem in tree.findall(".//outline[@type='rss'][@xmlUrl!='']"): if url := elem.get("xmlUrl"): - self.add_feed(AnyHttpUrl(url)) + self.add_feed(url) def run(self) -> int: failures = 0 diff --git a/podcast_archiver/cli.py b/podcast_archiver/cli.py index bbaaf62..7079f45 100644 --- a/podcast_archiver/cli.py +++ b/podcast_archiver/cli.py @@ -1,17 +1,22 @@ import pathlib -from os import PathLike, getenv -from typing import Any, cast +from typing import Any import rich_click as click -from click.core import Context, Parameter from podcast_archiver import __version__ as version +from podcast_archiver import constants from podcast_archiver.base import PodcastArchiver -from podcast_archiver.config import DEFAULT_SETTINGS, Settings +from podcast_archiver.config import ( + ConfigPath, + Settings, + get_default_config_path, + print_default_config, +) from podcast_archiver.console import console from podcast_archiver.constants import ENVVAR_PREFIX, PROG_NAME -from podcast_archiver.exceptions import InvalidSettings +from podcast_archiver.exceptions import InvalidFeed, InvalidSettings from podcast_archiver.logging import configure_logging +from podcast_archiver.models import ALL_FIELD_TITLES_STR click.rich_click.USE_RICH_MARKUP = True click.rich_click.USE_MARKDOWN = True @@ -46,64 +51,6 @@ } -class ConfigPath(click.Path): - def __init__(self) -> None: - return super().__init__( - exists=True, - readable=True, - file_okay=True, - dir_okay=False, - resolve_path=True, - path_type=pathlib.Path, - ) - - def convert( # type: ignore[override] - self, value: str | PathLike[str], param: Parameter | None, ctx: Context | None - ) -> str | bytes | PathLike[str] | None: - if value is None: - return None - if ( - ctx - and param - and isinstance(value, pathlib.Path) - and value == param.get_default(ctx, call=True) - and not value.exists() - ): - try: - value.parent.mkdir(exist_ok=True, parents=True) - with value.open("w") as fp: - Settings.generate_default_config(file=fp) - except (OSError, FileNotFoundError): - return None - - filepath = cast(pathlib.Path, super().convert(value, param, ctx)) - if not ctx or ctx.resilient_parsing: - return filepath - - try: - ctx.default_map = ctx.default_map or {} - settings = Settings.load_from_yaml(filepath) - ctx.default_map.update(settings.model_dump(exclude_unset=True, exclude_none=True, by_alias=True)) - except InvalidSettings as exc: - self.fail(f"{self.name.title()} {click.format_filename(filepath)!r} is invalid: {exc}", param, ctx) - - return filepath - - -def get_default_config_path() -> pathlib.Path | None: - if getenv("TESTING", "0").lower() in ("1", "true"): - return None - return pathlib.Path(click.get_app_dir(PROG_NAME)) / "config.yaml" # pragma: no cover - - -def generate_default_config(ctx: click.Context, param: click.Parameter, value: bool) -> None: - if not value or ctx.resilient_parsing: - return - - Settings.generate_default_config() - ctx.exit() - - @click.command( context_settings={ "auto_envvar_prefix": ENVVAR_PREFIX, @@ -114,25 +61,37 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b @click.option( "-f", "--feed", + "feeds", + default=[], multiple=True, show_envvar=True, - help=Settings.model_fields["feeds"].description + " Use repeatedly for multiple feeds.", # type: ignore[operator] + help="Feed URLs to archive. Use repeatedly for multiple feeds.", ) @click.option( "-o", "--opml", + "opml_files", + type=click.Path( + exists=True, + file_okay=True, + dir_okay=False, + resolve_path=True, + path_type=pathlib.Path, + ), + default=[], multiple=True, show_envvar=True, help=( - Settings.model_fields["opml_files"].description # type: ignore[operator] - + " Use repeatedly for multiple files." + "OPML files containing feed URLs to archive. OPML files can be exported from a variety of podcatchers." + "Use repeatedly for multiple files." ), ) @click.option( "-d", "--dir", + "archive_directory", type=click.Path( - exists=False, + exists=True, writable=True, file_okay=False, dir_okay=True, @@ -141,9 +100,12 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b ), show_default=True, required=False, - default=DEFAULT_SETTINGS.archive_directory, + default=pathlib.Path("."), show_envvar=True, - help=Settings.model_fields["archive_directory"].description, + help=( + "Directory to which to download the podcast archive. " + "By default, the archive will be created in the current working directory ('.')." + ), ) @click.option( "-F", @@ -151,75 +113,83 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b type=str, show_default=True, required=False, - default=DEFAULT_SETTINGS.filename_template, + default=constants.DEFAULT_FILENAME_TEMPLATE, show_envvar=True, - help=Settings.model_fields["filename_template"].description, + help=( + "Template to be used when generating filenames. Available template variables are: " + f"{ALL_FIELD_TITLES_STR}, and 'ext' (the filename extension)." + ), ) @click.option( "-u", "--update", + "update_archive", type=bool, - default=DEFAULT_SETTINGS.update_archive, is_flag=True, show_envvar=True, - help=Settings.model_fields["update_archive"].description, + help=( + "Update the feeds with newly added episodes only. " + "Adding episodes ends with the first episode already present in the download directory." + ), ) @click.option( "--write-info-json", type=bool, - default=DEFAULT_SETTINGS.write_info_json, is_flag=True, show_envvar=True, - help=Settings.model_fields["write_info_json"].description, + help="Write episode metadata to a .info.json file next to the media file itself.", ) @click.option( "-q", "--quiet", type=bool, - default=DEFAULT_SETTINGS.quiet, is_flag=True, show_envvar=True, - help=Settings.model_fields["quiet"].description, + help="Print only minimal progress information. Errors will always be emitted.", ) @click.option( "-C", "--concurrency", type=int, - default=DEFAULT_SETTINGS.concurrency, + default=constants.DEFAULT_CONCURRENCY, show_envvar=True, - help=Settings.model_fields["concurrency"].description, + help="Maximum number of simultaneous downloads.", ) @click.option( "--debug-partial", type=bool, - default=DEFAULT_SETTINGS.debug_partial, is_flag=True, show_envvar=True, - help=Settings.model_fields["debug_partial"].description, + help=f"Download only the first {constants.DEBUG_PARTIAL_SIZE} bytes of episodes for debugging purposes.", ) @click.option( "-v", "--verbose", count=True, show_envvar=True, - default=DEFAULT_SETTINGS.verbose, - help=Settings.model_fields["verbose"].description, + is_eager=True, + callback=configure_logging, + help="Increase the level of verbosity while downloading.", ) @click.option( "-S", "--slugify", + "slugify_paths", type=bool, - default=DEFAULT_SETTINGS.slugify_paths, is_flag=True, show_envvar=True, - help=Settings.model_fields["slugify_paths"].description, + help="Format filenames in the most compatible way, replacing all special characters.", ) @click.option( "-m", "--max-episodes", + "maximum_episode_count", type=int, - default=DEFAULT_SETTINGS.maximum_episode_count, - help=Settings.model_fields["maximum_episode_count"].description, + default=0, + help=( + "Only download the given number of episodes per podcast feed. " + "Useful if you don't really need the entire backlog." + ), ) @click.version_option( version, @@ -233,7 +203,7 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b expose_value=False, is_flag=True, is_eager=True, - callback=generate_default_config, + callback=print_default_config, help="Emit an example YAML config file to stdout and exit.", ) @click.option( @@ -248,12 +218,10 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b help="Path to a config file. Command line arguments will take precedence.", ) @click.pass_context -def main(ctx: click.RichContext, /, **kwargs: Any) -> int: - configure_logging(kwargs["verbose"]) - console.quiet = kwargs["quiet"] or kwargs["verbose"] > 1 +def main(ctx: click.RichContext, **kwargs: Any) -> int: + settings = Settings(**kwargs) + console.quiet = settings.quiet or settings.verbose > 1 try: - settings = Settings.load_from_dict(kwargs) - # Replicate click's `no_args_is_help` behavior but only when config file does not contain feeds/OPMLs if not (settings.feeds or settings.opml_files): click.echo(ctx.command.get_help(ctx)) @@ -262,6 +230,8 @@ def main(ctx: click.RichContext, /, **kwargs: Any) -> int: pa = PodcastArchiver(settings=settings) pa.register_cleanup(ctx) pa.run() + except InvalidFeed as exc: + raise click.BadParameter(f"Cannot parse feed '{exc.feed}'") from exc except InvalidSettings as exc: raise click.BadParameter(f"Invalid settings: {exc}") from exc except KeyboardInterrupt as exc: diff --git a/podcast_archiver/config.py b/podcast_archiver/config.py index df4ee2f..e34fab0 100644 --- a/podcast_archiver/config.py +++ b/podcast_archiver/config.py @@ -2,23 +2,94 @@ import pathlib import textwrap +from contextlib import suppress +from dataclasses import dataclass from datetime import datetime -from functools import cached_property -from typing import IO, Any, Text - -import pydantic -from pydantic import AnyHttpUrl, BaseModel, BeforeValidator, DirectoryPath, Field, FilePath -from pydantic import ConfigDict as _ConfigDict -from pydantic_core import to_json -from typing_extensions import Annotated +from os import PathLike, getenv +from typing import Any, cast + +import click +from click.core import Context, Parameter +from pydantic_core import Url, to_json from yaml import YAMLError, safe_load from podcast_archiver import __version__ as version from podcast_archiver import constants from podcast_archiver.console import console from podcast_archiver.exceptions import InvalidSettings -from podcast_archiver.models import ALL_FIELD_TITLES_STR -from podcast_archiver.utils import FilenameFormatter +from podcast_archiver.logging import logger + + +@dataclass +class Settings: + feeds: list[str | Url] + opml_files: list[pathlib.Path] + archive_directory: pathlib.Path + + update_archive: bool + write_info_json: bool + maximum_episode_count: int + filename_template: str + slugify_paths: bool + + quiet: bool + verbose: int + concurrency: int + debug_partial: bool + + +def get_default_config_path() -> pathlib.Path | None: + if getenv("TESTING", "0").lower() in ("1", "true"): + return None + return pathlib.Path(click.get_app_dir(constants.PROG_NAME)) / "config.yaml" # pragma: no cover + + +def generate_default_config(ctx: click.Context) -> str: + now = datetime.now().replace(microsecond=0).astimezone() + wrapper = textwrap.TextWrapper(width=80, initial_indent="# ", subsequent_indent="# ") + + lines = [ + f"## {constants.PROG_NAME.title()} configuration", + f"## Generated with {constants.PROG_NAME} {version} at {now}", + ] + + for cli_param in cast(list[Parameter | click.Option], ctx.command.params): + if (name := cli_param.name) in ("help", "config", "config_generate", "version"): + continue + + param_value = cli_param.get_default(ctx, call=True) + param_help = "" + if _help := getattr(cli_param, "help", ""): + param_help = f": {_help}" + lines += [ + "", + *wrapper.wrap(f"Field '{name}'{param_help}"), + "#", + *wrapper.wrap(f"Equivalent command line option: {', '.join(cli_param.opts)}"), + "#", + f"{name}: {to_json(param_value).decode()}", + ] + + return "\n".join(lines).strip() + + +def print_default_config(ctx: click.Context, param: click.Parameter, value: bool = True) -> None: + if not value or ctx.resilient_parsing: + return + console.print(generate_default_config(ctx), highlight=False) + ctx.exit() + + +def write_default_config(ctx: click.Context, param: click.Parameter, value: str | PathLike[str]) -> None: + if not value or ctx.resilient_parsing: + return + if not isinstance(value, pathlib.Path) or value != param.get_default(ctx, call=True) or value.exists(): + return + + with suppress(OSError, FileNotFoundError): + value.parent.mkdir(exist_ok=True, parents=True) + with value.open("w") as fh: + fh.write(generate_default_config(ctx) + "\n") def expanduser(v: pathlib.Path) -> pathlib.Path: @@ -27,154 +98,43 @@ def expanduser(v: pathlib.Path) -> pathlib.Path: return v.expanduser() -UserExpandedDir = Annotated[DirectoryPath, BeforeValidator(expanduser)] -UserExpandedFile = Annotated[FilePath, BeforeValidator(expanduser)] - - -class Settings(BaseModel): - model_config = _ConfigDict(populate_by_name=True) - - feeds: list[AnyHttpUrl] = Field( - default_factory=list, - alias="feed", - description="Feed URLs to archive.", - ) - - opml_files: list[UserExpandedFile] = Field( - default_factory=list, - alias="opml", - description=( - "OPML files containing feed URLs to archive. OPML files can be exported from a variety of podcatchers." - ), - ) - - archive_directory: UserExpandedDir = Field( - default=UserExpandedDir("."), - alias="dir", - description=( - "Directory to which to download the podcast archive. " - "By default, the archive will be created in the current working directory ('.')." - ), - ) - - update_archive: bool = Field( - default=False, - alias="update", - description=( - "Update the feeds with newly added episodes only. " - "Adding episodes ends with the first episode already present in the download directory." - ), - ) - - write_info_json: bool = Field( - default=False, - alias="write_info_json", - description="Write episode metadata to a .info.json file next to the media file itself.", - ) - - quiet: bool = Field( - default=False, - alias="quiet", - description="Print only minimal progress information. Errors will always be emitted.", - ) - - verbose: int = Field( - default=0, - alias="verbose", - description="Increase the level of verbosity while downloading.", - ) - - slugify_paths: bool = Field( - default=False, - alias="slugify", - description="Format filenames in the most compatible way, replacing all special characters.", - ) - - filename_template: str = Field( - alias="filename_template", - default="{show.title}/{episode.published_time:%Y-%m-%d} - {episode.title}.{ext}", - description=( - "Template to be used when generating filenames. Available template variables are: " - f"{ALL_FIELD_TITLES_STR}, and 'ext' (the filename extension)" - ), - ) - - maximum_episode_count: int = Field( - default=0, - alias="max_episodes", - description=( - "Only download the given number of episodes per podcast feed. " - "Useful if you don't really need the entire backlog." - ), - ) - - concurrency: int = Field( - default=4, - alias="concurrency", - description="Maximum number of simultaneous downloads.", - ) - - debug_partial: bool = Field( - default=False, - alias="debug_partial", - description=f"Download only the first {constants.DEBUG_PARTIAL_SIZE} bytes of episodes for debugging purposes.", - ) - - @classmethod - def load_from_dict(cls, value: dict[str, Any]) -> Settings: +class ConfigPath(click.Path): + def __init__(self) -> None: + return super().__init__( + exists=True, + readable=True, + file_okay=True, + dir_okay=False, + resolve_path=True, + path_type=pathlib.Path, + ) + + def convert( # type: ignore[override] + self, value: str | PathLike[str], param: Parameter | None, ctx: Context | None + ) -> str | bytes | PathLike[str] | None: + if value is None: + return None + if not ctx or ctx.resilient_parsing or not param: + return value + write_default_config(ctx, param, value) + value = cast(pathlib.Path, super().convert(value, param, ctx)) + logger.info("Loading configuration from %s", value) try: - return cls.model_validate(value) - except pydantic.ValidationError as exc: - raise InvalidSettings(errors=exc.errors()) from exc + ctx.default_map = self.load_yaml(value) + except InvalidSettings as exc: + self.fail(f"{self.name.title()} {click.format_filename(value)!r} is invalid: {exc}", param, ctx) + return value - @classmethod - def load_from_yaml(cls, path: pathlib.Path) -> Settings: + @staticmethod + def load_yaml(path: pathlib.Path) -> dict[str, Any]: try: with path.open("r") as filep: content = safe_load(filep) except YAMLError as exc: raise InvalidSettings("Not a valid YAML document") from exc - if content: - return cls.load_from_dict(content) - return cls() - - @classmethod - def generate_default_config(cls, file: IO[Text] | None = None) -> None: - now = datetime.now().replace(microsecond=0).astimezone() - wrapper = textwrap.TextWrapper(width=80, initial_indent="# ", subsequent_indent="# ") - - lines = [ - f"## {constants.PROG_NAME.title()} configuration", - f"## Generated with {constants.PROG_NAME} {version} at {now}", - ] - - for name, field in cls.model_fields.items(): - cli_opt = ( - wrapper.wrap(f"Equivalent command line option: --{field.alias.replace('_', '-')}") - if field.alias - else [] - ) - value = field.get_default(call_default_factory=True) - lines += [ - "", - *wrapper.wrap(f"Field '{name}': {field.description}"), - "#", - *cli_opt, - "#", - f"{name}: {to_json(value).decode()}", - ] - - contents = "\n".join(lines).strip() - if not file: - console.print(contents, highlight=False) - return - with file: - file.write(contents + "\n") - - @cached_property - def filename_formatter(self) -> FilenameFormatter: - return FilenameFormatter(self) - - -DEFAULT_SETTINGS = Settings() + if content is None: + return {} + if isinstance(content, dict): + return content + raise InvalidSettings("Not a valid YAML document") diff --git a/podcast_archiver/constants.py b/podcast_archiver/constants.py index 8458d9e..2a4e263 100644 --- a/podcast_archiver/constants.py +++ b/podcast_archiver/constants.py @@ -13,3 +13,6 @@ DEBUG_PARTIAL_SIZE = DOWNLOAD_CHUNK_SIZE * 4 MAX_TITLE_LENGTH = 96 + +DEFAULT_FILENAME_TEMPLATE = "{show.title}/{episode.published_time:%Y-%m-%d} - {episode.title}.{ext}" +DEFAULT_CONCURRENCY = 4 diff --git a/podcast_archiver/download.py b/podcast_archiver/download.py index ac952eb..d4265d5 100644 --- a/podcast_archiver/download.py +++ b/podcast_archiver/download.py @@ -4,7 +4,6 @@ from typing import IO, TYPE_CHECKING, Any from podcast_archiver import constants -from podcast_archiver.config import DEFAULT_SETTINGS, Settings from podcast_archiver.enums import DownloadResult from podcast_archiver.logging import logger from podcast_archiver.session import session @@ -16,6 +15,7 @@ from requests import Response from rich import progress as rich_progress + from podcast_archiver.config import Settings from podcast_archiver.models import Episode, FeedInfo @@ -26,6 +26,9 @@ class DownloadJob: target: Path stop_event: Event + _debug_partial: bool + _write_info_json: bool + _progress: rich_progress.Progress | None = None _task_id: rich_progress.TaskID | None = None @@ -33,18 +36,19 @@ def __init__( self, episode: Episode, *, + target: Path, feed_info: FeedInfo, - settings: Settings = DEFAULT_SETTINGS, + debug_partial: bool = False, + write_info_json: bool = False, progress: rich_progress.Progress | None = None, stop_event: Event | None = None, ) -> None: self.episode = episode + self.target = target self.feed_info = feed_info - self.settings = settings + self._debug_partial = debug_partial + self._write_info_json = write_info_json self._progress = progress - self.target = self.settings.filename_formatter.format(episode=self.episode, feed_info=self.feed_info) - if settings.debug_partial: - self.target = self.target.with_suffix(".partial" + self.target.suffix) self.stop_event = stop_event or Event() self.init_progress() @@ -124,7 +128,7 @@ def receive_data(self, fp: IO[str], response: Response) -> bool: total_written += fp.write(chunk) self.update_progress(completed=total_written) - if self.settings.debug_partial and total_written >= constants.DEBUG_PARTIAL_SIZE: + if self._debug_partial and total_written >= constants.DEBUG_PARTIAL_SIZE: logger.debug("Partial download completed.") return True if self.stop_event.is_set(): @@ -134,7 +138,7 @@ def receive_data(self, fp: IO[str], response: Response) -> bool: return True def write_info_json(self) -> None: - if not self.settings.write_info_json: + if not self._write_info_json: return logger.info("Writing episode metadata to %s", self.infojsonfile.name) with atomic_write(self.infojsonfile) as fp: diff --git a/podcast_archiver/exceptions.py b/podcast_archiver/exceptions.py index e6665b4..9898a70 100644 --- a/podcast_archiver/exceptions.py +++ b/podcast_archiver/exceptions.py @@ -1,28 +1,20 @@ from typing import Any -import pydantic_core - class PodcastArchiverException(Exception): pass class InvalidSettings(PodcastArchiverException): - errors: list[pydantic_core.ErrorDetails] + pass - def __init__(self, *args: Any, errors: list[pydantic_core.ErrorDetails] | None = None) -> None: - self.errors = errors or [] - super().__init__(*args) - @staticmethod - def _format_error(err: pydantic_core.ErrorDetails) -> str: - return f"Field '{'.'.join(str(loc) for loc in err['loc'])}': {err['msg']}" +class InvalidFeed(PodcastArchiverException): + feed: Any - def __str__(self) -> str: - msg = super().__str__() - if not self.errors: - return msg - return msg + "\n" + "\n".join("* " + self._format_error(err) for err in self.errors) + def __init__(self, *args: Any, feed: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.feed = feed class MissingDownloadUrl(ValueError): diff --git a/podcast_archiver/logging.py b/podcast_archiver/logging.py index 735e140..e2b608a 100644 --- a/podcast_archiver/logging.py +++ b/podcast_archiver/logging.py @@ -2,20 +2,27 @@ import logging import logging.config +from typing import TYPE_CHECKING from rich.logging import RichHandler from podcast_archiver.console import console +if TYPE_CHECKING: + import click + logger = logging.getLogger("podcast_archiver") -def configure_logging(verbosity: int) -> None: - if verbosity > 2: +def configure_logging(ctx: click.Context, param: click.Parameter, value: int | None) -> int | None: + if value is None or ctx.resilient_parsing: + return None + + if value > 2: level = logging.DEBUG - elif verbosity == 2: + elif value == 2: level = logging.INFO - elif verbosity == 1: + elif value == 1: level = logging.WARNING else: level = logging.ERROR @@ -37,3 +44,4 @@ def configure_logging(verbosity: int) -> None: ], ) logger.debug("Running in debug mode.") + return value diff --git a/podcast_archiver/processor.py b/podcast_archiver/processor.py index 1caf526..d17ea19 100644 --- a/podcast_archiver/processor.py +++ b/podcast_archiver/processor.py @@ -14,6 +14,7 @@ from podcast_archiver.enums import DownloadResult, QueueCompletionType from podcast_archiver.logging import logger from podcast_archiver.models import Feed +from podcast_archiver.utils import FilenameFormatter if TYPE_CHECKING: from podcast_archiver.config import Settings @@ -40,12 +41,14 @@ class ProcessingResult: class FeedProcessor: settings: Settings + filename_formatter: FilenameFormatter pool_executor: ThreadPoolExecutor progress: rich_progress.Progress stop_event: Event def __init__(self, settings: Settings) -> None: self.settings = settings + self.filename_formatter = FilenameFormatter(settings) self.pool_executor = ThreadPoolExecutor(max_workers=self.settings.concurrency) self.progress = rich_progress.Progress( *PROGRESS_COLUMNS, @@ -82,10 +85,13 @@ def process(self, url: AnyHttpUrl) -> ProcessingResult: def _process_episodes(self, feed: Feed) -> tuple[list[Future[DownloadResult]], QueueCompletionType]: futures: list[Future[DownloadResult]] = [] for idx, episode in enumerate(feed.episode_iter(self.settings.maximum_episode_count), 1): + target = self.filename_formatter.format(episode=episode, feed_info=feed.info) download_job = DownloadJob( episode, + target=target, feed_info=feed.info, - settings=self.settings, + debug_partial=self.settings.debug_partial, + write_info_json=self.settings.write_info_json, progress=self.progress, stop_event=self.stop_event, ) diff --git a/podcast_archiver/utils.py b/podcast_archiver/utils.py index 34bdf85..795d9f2 100644 --- a/podcast_archiver/utils.py +++ b/podcast_archiver/utils.py @@ -75,6 +75,7 @@ class FilenameFormatter(Formatter): _template: str _slugify: bool _path_root: Path + _partial: bool _parsed: list[tuple[str, str | None, str | None, str | None]] @@ -82,6 +83,7 @@ def __init__(self, settings: Settings) -> None: self._template = settings.filename_template self._slugify = settings.slugify_paths self._path_root = settings.archive_directory + self._partial = settings.debug_partial def parse( # type: ignore[override] self, @@ -104,7 +106,10 @@ def format(self, episode: Episode, feed_info: FeedInfo) -> Path: # type: ignore "show": feed_info, "ext": episode.ext, } - return self._path_root / self.vformat(self._template, args=(), kwargs=kwargs) + path = self._path_root / self.vformat(self._template, args=(), kwargs=kwargs) + if self._partial: + path = path.with_suffix(".partial" + path.suffix) + return path @contextmanager diff --git a/tests/conftest.py b/tests/conftest.py index 78605f0..1f382a5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,9 @@ import pytest from pydantic_core import Url +from podcast_archiver.config import Settings +from podcast_archiver.constants import DEFAULT_FILENAME_TEMPLATE + if TYPE_CHECKING: from responses import RequestsMock @@ -64,3 +67,27 @@ def tmp_path_cd(request: pytest.FixtureRequest, tmp_path: str) -> Iterable[str]: os.chdir(tmp_path) yield tmp_path os.chdir(request.config.invocation_params.dir) + + +@pytest.fixture +def default_settings_no_feeds(tmp_path: Path) -> Settings: + return Settings( + feeds=[], + opml_files=[], + archive_directory=tmp_path, + update_archive=False, + write_info_json=False, + maximum_episode_count=0, + filename_template=DEFAULT_FILENAME_TEMPLATE, + slugify_paths=False, + quiet=False, + verbose=1, + concurrency=2, + debug_partial=False, + ) + + +@pytest.fixture +def default_settings(default_settings_no_feeds: Settings, feed_lautsprecher: Url) -> Settings: + default_settings_no_feeds.feeds = [feed_lautsprecher] + return default_settings_no_feeds diff --git a/tests/test_base.py b/tests/test_base.py index 57758ce..ff6e136 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -14,8 +14,8 @@ FIXTURES_DIR / "opml_downcast_valid.xml", ], ) -def test_add_opml(opml_file: Path) -> None: - pa = PodcastArchiver(Settings()) +def test_add_opml(opml_file: Path, default_settings_no_feeds: Settings) -> None: + pa = PodcastArchiver(default_settings_no_feeds) pa.add_from_opml(opml_file) assert [str(f) for f in pa.feeds] == [FEED_URL] diff --git a/tests/test_config.py b/tests/test_config.py index ee4f59e..9b53a34 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,9 +1,9 @@ from pathlib import Path import pytest -from pydantic_core import Url +from click import BadParameter -from podcast_archiver.config import Settings +from podcast_archiver.cli import main from podcast_archiver.exceptions import InvalidSettings DUMMY_FEED = "http://localhost/feed.rss" @@ -13,17 +13,18 @@ def test_load(tmp_path_cd: Path) -> None: configfile = tmp_path_cd / "configtmp.yaml" configfile.write_text(f"feeds: [{DUMMY_FEED}]") - settings = Settings.load_from_yaml(configfile) + ctx = main.make_context("under_test", ["-c", str(configfile)]) - assert Url(DUMMY_FEED) in settings.feeds + assert ctx.default_map + assert DUMMY_FEED in ctx.default_map["feeds"] def test_load_invalid_yaml(tmp_path_cd: Path) -> None: configfile = tmp_path_cd / "configtmp.yaml" configfile.write_text("!randomgibberish") - with pytest.raises(InvalidSettings, match="Not a valid YAML document"): - Settings.load_from_yaml(configfile) + with pytest.raises(BadParameter, match="Not a valid YAML document"): + main.make_context("under_test", ["-c", str(configfile)]) def test_load_invalid_type(tmp_path_cd: Path) -> None: @@ -31,11 +32,11 @@ def test_load_invalid_type(tmp_path_cd: Path) -> None: configfile.write_text("feeds: 7") with pytest.raises(InvalidSettings, match="Input should be a valid list"): - Settings.load_from_yaml(configfile) + main.make_context("under_test", ["-c", str(configfile)]) def test_load_nonexistent(tmp_path_cd: Path) -> None: configfile = tmp_path_cd / "configtmp.yaml" - with pytest.raises(FileNotFoundError): - Settings.load_from_yaml(configfile) + with pytest.raises(BadParameter, match="does not exist."): + main.make_context("under_test", ["-c", str(configfile)]) diff --git a/tests/test_download.py b/tests/test_download.py index 3158408..d25ead1 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -2,6 +2,7 @@ import logging from functools import partial +from pathlib import Path from typing import TYPE_CHECKING, Any, Protocol from unittest import mock @@ -9,14 +10,11 @@ from requests import HTTPError from podcast_archiver import download, utils -from podcast_archiver.config import Settings from podcast_archiver.enums import DownloadResult from podcast_archiver.models import FeedPage from tests.conftest import MEDIA_URL if TYPE_CHECKING: - from pathlib import Path - from responses import RequestsMock @@ -29,7 +27,7 @@ def test_download_job(tmp_path_cd: Path, feedobj_lautsprecher: dict[str, Any]) - "update.return_value": None, } ) - job = download.DownloadJob(episode=episode, feed_info=feed.feed, progress=mock_progress) + job = download.DownloadJob(episode=episode, target=Path("file.tmp"), feed_info=feed.feed, progress=mock_progress) result = job() assert result == DownloadResult.COMPLETED_SUCCESSFULLY @@ -41,8 +39,8 @@ def test_download_already_exists(tmp_path_cd: Path, feedobj_lautsprecher_notcons feed = FeedPage.model_validate(feedobj_lautsprecher_notconsumed) episode = feed.episodes[0] - job = download.DownloadJob(episode=episode, feed_info=feed.feed) - job.target.parent.mkdir() + job = download.DownloadJob(episode=episode, target=Path("file.tmp"), feed_info=feed.feed) + job.target.parent.mkdir(exist_ok=True) job.target.touch() result = job() @@ -53,7 +51,7 @@ def test_download_aborted(tmp_path_cd: Path, feedobj_lautsprecher: dict[str, Any feed = FeedPage.model_validate(feedobj_lautsprecher) episode = feed.episodes[0] - job = download.DownloadJob(episode=episode, feed_info=feed.feed) + job = download.DownloadJob(episode=episode, target=Path("file.tmp"), feed_info=feed.feed) job.stop_event.set() result = job() @@ -86,7 +84,7 @@ def test_download_failed( if should_download: responses.add(responses.GET, MEDIA_URL, b"BLOB") - job = download.DownloadJob(episode=episode, feed_info=feed.feed) + job = download.DownloadJob(episode=episode, target=Path("file.tmp"), feed_info=feed.feed) with failure_mode(side_effect=side_effect), caplog.at_level(logging.ERROR): result = job() @@ -107,8 +105,12 @@ def test_download_failed( def test_download_info_json(tmp_path_cd: Path, feedobj_lautsprecher: dict[str, Any], write_info_json: bool) -> None: feed = FeedPage.model_validate(feedobj_lautsprecher) episode = feed.episodes[0] - settings = Settings(write_info_json=write_info_json) - job = download.DownloadJob(episode=episode, feed_info=feed.feed, settings=settings) + job = download.DownloadJob( + episode=episode, + target=Path("file.tmp"), + feed_info=feed.feed, + write_info_json=write_info_json, + ) result = job() assert result == DownloadResult.COMPLETED_SUCCESSFULLY diff --git a/tests/test_filenames.py b/tests/test_filenames.py index 56b4745..bf5af47 100644 --- a/tests/test_filenames.py +++ b/tests/test_filenames.py @@ -1,4 +1,5 @@ from datetime import datetime, timezone +from pathlib import Path import pytest @@ -50,9 +51,13 @@ ), ], ) -def test_filename_formatting(fname_tmpl: str, slugify: bool, expected_fname: str) -> None: - settings = Settings(filename_template=fname_tmpl, slugify_paths=slugify) - formatter = FilenameFormatter(settings=settings) +def test_filename_formatting( + fname_tmpl: str, slugify: bool, expected_fname: str, default_settings_no_feeds: Settings +) -> None: + default_settings_no_feeds.archive_directory = Path("") + default_settings_no_feeds.filename_template = fname_tmpl + default_settings_no_feeds.slugify_paths = slugify + formatter = FilenameFormatter(settings=default_settings_no_feeds) result = formatter.format(EPISODE, feed_info=FEED_INFO) diff --git a/tests/test_happy_path.py b/tests/test_happy_path.py index b782027..13cd239 100644 --- a/tests/test_happy_path.py +++ b/tests/test_happy_path.py @@ -7,18 +7,21 @@ from podcast_archiver.config import Settings -def test_happy_path(tmp_path: Path, feed_lautsprecher: Url) -> None: - settings = Settings(archive_directory=tmp_path, feeds=[feed_lautsprecher], quiet=True) - pa = PodcastArchiver(settings) +def test_happy_path(tmp_path: Path, default_settings: Settings) -> None: + default_settings.quiet = True + + pa = PodcastArchiver(default_settings) pa.run() files = list(tmp_path.glob("**/*.m4a")) assert len(files) == 5 -def test_happy_path_info_json(tmp_path: Path, feed_lautsprecher: Url) -> None: - settings = Settings(archive_directory=tmp_path, feeds=[feed_lautsprecher], quiet=True, write_info_json=True) - pa = PodcastArchiver(settings) +def test_happy_path_info_json(tmp_path: Path, default_settings: Settings) -> None: + default_settings.quiet = True + default_settings.write_info_json = True + + pa = PodcastArchiver(default_settings) pa.run() files = list(tmp_path.glob("**/*.m4a")) @@ -27,15 +30,14 @@ def test_happy_path_info_json(tmp_path: Path, feed_lautsprecher: Url) -> None: assert len(files) == 5 -def test_happy_path_max_episodes(tmp_path: Path, feed_lautsprecher: Url, capsys: pytest.CaptureFixture[str]) -> None: - settings = Settings( - archive_directory=tmp_path, - feeds=[feed_lautsprecher], - maximum_episode_count=2, - ) - pa = PodcastArchiver(settings) - pa.add_feed(feed_lautsprecher) +def test_happy_path_max_episodes( + tmp_path: Path, feed_lautsprecher: Url, capsys: pytest.CaptureFixture[str], default_settings: Settings +) -> None: + default_settings.archive_directory = tmp_path + default_settings.maximum_episode_count = 2 + pa = PodcastArchiver(default_settings) + pa.add_feed(feed_lautsprecher) pa.run() files = list(tmp_path.glob("**/*.m4a")) @@ -44,31 +46,23 @@ def test_happy_path_max_episodes(tmp_path: Path, feed_lautsprecher: Url, capsys: assert "Maximum episode count reached" in outerr.out -def test_happy_path_files_exist(tmp_path: Path, feed_lautsprecher: Url) -> None: +def test_happy_path_files_exist(tmp_path: Path, default_settings: Settings) -> None: (tmp_path / "LS015 Der Sender bin ich.m4a").touch() - settings = Settings( - archive_directory=tmp_path, - feeds=[feed_lautsprecher], - filename_template="{episode.title}.{ext}", - ) - pa = PodcastArchiver(settings) + default_settings.filename_template = "{episode.title}.{ext}" + pa = PodcastArchiver(default_settings) pa.run() files = list(tmp_path.glob("**/*.m4a")) assert len(files) == 5 -def test_happy_path_update(tmp_path: Path, feed_lautsprecher: Url) -> None: +def test_happy_path_update(tmp_path: Path, default_settings: Settings) -> None: (tmp_path / "LS017 Podcastverzeichnisse.m4a").touch() # cspell: disable-line - settings = Settings( - archive_directory=tmp_path, - feeds=[feed_lautsprecher], - update_archive=True, - filename_template="{episode.title}.{ext}", - ) - pa = PodcastArchiver(settings) + default_settings.update_archive = True + default_settings.filename_template = "{episode.title}.{ext}" + pa = PodcastArchiver(default_settings) pa.run() files = list(tmp_path.glob("**/*.m4a")) @@ -77,14 +71,12 @@ def test_happy_path_update(tmp_path: Path, feed_lautsprecher: Url) -> None: assert not list(tmp_path.glob("LS015*.m4a")) -def test_happy_path_empty_feed(tmp_path: Path, feed_lautsprecher_empty: Url) -> None: - settings = Settings( - archive_directory=tmp_path, - feeds=[feed_lautsprecher_empty], - update_archive=True, - ) - pa = PodcastArchiver(settings) +def test_happy_path_empty_feed( + tmp_path: Path, feed_lautsprecher_empty: Url, default_settings_no_feeds: Settings +) -> None: + default_settings_no_feeds.feeds = [feed_lautsprecher_empty] + pa = PodcastArchiver(default_settings_no_feeds) pa.run() files = list(tmp_path.glob("*.m4a")) diff --git a/tests/test_init.py b/tests/test_init.py index 377d1e0..e8e4a22 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -2,8 +2,8 @@ from podcast_archiver.config import Settings -def test_instantiate() -> None: - pa = PodcastArchiver(settings=Settings()) +def test_instantiate(default_settings_no_feeds: Settings) -> None: + pa = PodcastArchiver(settings=default_settings_no_feeds) pa.run() assert len(pa.feeds) == 0 diff --git a/tests/test_main.py b/tests/test_main.py index 2f63c1d..62b2a1d 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -30,7 +30,7 @@ def test_main_nonexistent_dir(feed_lautsprecher_notconsumed: Url) -> None: def test_main_nonexistent_opml(tmp_path_cd: Path, feed_lautsprecher_notconsumed: Url) -> None: - with pytest.raises(click.BadParameter, match="Field 'opml.0': Path does not point to a file"): + with pytest.raises(click.BadParameter, match="File '/nonexistent.xml' does not exist."): cli.main(["--opml", "/nonexistent.xml"], standalone_mode=False)