Skip to content

Commit

Permalink
feat: Add continuous mode with --sleep-seconds (#192)
Browse files Browse the repository at this point in the history
  • Loading branch information
janw authored Oct 27, 2024
1 parent 43b2c95 commit 02ff00e
Show file tree
Hide file tree
Showing 18 changed files with 399 additions and 168 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,25 @@ Feeds can also be "fetched" from a local file:
podcast-archiver -f file:/Users/janw/downloaded_feed.xml
```

#### Continuous mode

When the `--sleep-seconds` option is set to a non-zero value, Podcast Archiver operates in continuous mode. After successfully populating the archive, it will not terminate but rather sleep for the given number of seconds until it refreshes the feeds again and downloads episodes that have been published in the meantime.

If no new episodes have been published, no download attempts will be made, and the archiver will go to sleep again. This mode of operation is ideal to be run in a containerized setup, for example using [docker compose](https://docs.docker.com/compose/install/):

```yaml
services:
podcast-archiver:
restart: always
image: ghcr.io/janw/podcast-archiver
volumes:
- ./archive:/archive
command:
- --sleep-seconds=3600 # sleep for 1 hour between updates
- --feed=https://feeds.feedburner.com/TheAnthropoceneReviewed
- --feed=https://feeds.megaphone.fm/heavyweight-spot
```
### Changing the filename format
Podcast Archiver has a `--filename-template` option that allows you to change the particular naming scheme of the archive. The default value for `--filename-template`. is shown in `podcast-archiver --help`, as well as all the available variables. The basic ones are:
Expand Down
1 change: 1 addition & 0 deletions cspell.config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ words:
- PYTHONUNBUFFERED
- pyyaml
- rprint
- signum
- subdirs
- tini
- tmpl
Expand Down
17 changes: 17 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
services:
podcast-archiver:
restart: always
image: ghcr.io/janw/podcast-archiver:v1
build:
context: .
dockerfile: Dockerfile
cache_from:
- ghcr.io/janw/podcast-archiver:edge
- ghcr.io/janw/podcast-archiver:latest
volumes:
- ./archive:/archive
command:
- --sleep-seconds=3600
- --ignore-database
- --feed=https://feeds.feedburner.com/TheAnthropoceneReviewed
- --feed=https://feeds.megaphone.fm/heavyweight-spot
14 changes: 11 additions & 3 deletions podcast_archiver/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import annotations

import signal
import sys
import xml.etree.ElementTree as etree
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any

from podcast_archiver.logging import logger, rprint
from podcast_archiver.processor import FeedProcessor
Expand Down Expand Up @@ -31,9 +33,15 @@ def __init__(self, settings: Settings):
self.add_from_opml(opml)

def register_cleanup(self, ctx: click.RichContext) -> None:
@ctx.call_on_close
def _cleanup() -> None:
def _cleanup(signum: int, *args: Any) -> None:
logger.debug("Signal %s received", signum)
rprint("[error]Terminating.[/]")
self.processor.shutdown()
ctx.close()
sys.exit(0)

signal.signal(signal.SIGINT, _cleanup)
signal.signal(signal.SIGTERM, _cleanup)

def add_feed(self, feed: Path | str) -> None:
new_feeds = [feed] if isinstance(feed, str) else feed.read_text().strip().splitlines()
Expand Down
20 changes: 16 additions & 4 deletions podcast_archiver/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@
import os
import pathlib
import stat
import time
from os import getenv
from typing import TYPE_CHECKING, Any

import rich_click as click
from rich import get_console

from podcast_archiver import __version__ as version
from podcast_archiver import constants
from podcast_archiver.base import PodcastArchiver
from podcast_archiver.config import Settings, in_ci
from podcast_archiver.exceptions import InvalidSettings
from podcast_archiver.logging import configure_logging
from podcast_archiver.logging import configure_logging, rprint

if TYPE_CHECKING:
from click.shell_completion import CompletionItem
Expand Down Expand Up @@ -49,6 +49,7 @@
"--update",
"--max-episodes",
"--ignore-database",
"--sleep",
],
},
]
Expand Down Expand Up @@ -215,6 +216,7 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b
"-v",
"--verbose",
count=True,
metavar="",
show_envvar=True,
help=Settings.model_fields["verbose"].description,
)
Expand Down Expand Up @@ -281,10 +283,16 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b
show_envvar=True,
help=Settings.model_fields["ignore_database"].description,
)
@click.option(
"--sleep-seconds",
type=int,
default=0,
show_envvar=True,
help=Settings.model_fields["sleep_seconds"].description,
)
@click.pass_context
def main(ctx: click.RichContext, /, **kwargs: Any) -> int:
get_console().quiet = kwargs["quiet"]
configure_logging(kwargs["verbose"])
configure_logging(kwargs["verbose"], kwargs["quiet"])
try:
settings = Settings.load_from_dict(kwargs)

Expand All @@ -296,6 +304,10 @@ def main(ctx: click.RichContext, /, **kwargs: Any) -> int:
pa = PodcastArchiver(settings=settings)
pa.register_cleanup(ctx)
pa.run()
while settings.sleep_seconds > 0:
rprint(f"Sleeping for {settings.sleep_seconds} seconds.")
time.sleep(settings.sleep_seconds)
pa.run()
except InvalidSettings as exc:
raise click.BadParameter(f"Invalid settings: {exc}") from exc
except KeyboardInterrupt as exc: # pragma: no cover
Expand Down
14 changes: 13 additions & 1 deletion podcast_archiver/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,11 @@ class Settings(BaseModel):

verbose: int = Field(
default=0,
description="Increase the level of verbosity while downloading.",
description=(
"Increase the level of verbosity while downloading. Can be passed multiple times. Increased verbosity and "
"non-interactive execution (in a cronjob, docker compose, etc.) will disable progress bars. "
"Non-interactive execution also always raises the verbosity unless --quiet is passed."
),
)

slugify_paths: bool = Field(
Expand Down Expand Up @@ -136,6 +140,14 @@ class Settings(BaseModel):
),
)

sleep_seconds: int = Field(
default=0,
description=(
f"Run {constants.PROG_NAME} continuously. Set to a non-zero number of seconds to sleep after all available "
"episodes have been downloaded. Otherwise the application exits after all downloads have been completed."
),
)

config: FilePath | None = Field(
default=None,
exclude=True,
Expand Down
2 changes: 2 additions & 0 deletions podcast_archiver/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

MAX_TITLE_LENGTH = 96


DEFAULT_DATETIME_FORMAT = "%Y-%m-%d"
DEFAULT_ARCHIVE_DIRECTORY = pathlib.Path(".")
DEFAULT_FILENAME_TEMPLATE = "{show.title}/{episode.published_time:%Y-%m-%d} - {episode.title}.{ext}"
DEFAULT_CONCURRENCY = 4
Expand Down
97 changes: 37 additions & 60 deletions podcast_archiver/download.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
from __future__ import annotations

from contextlib import nullcontext
from contextlib import contextmanager
from threading import Event
from typing import IO, TYPE_CHECKING, NoReturn

from tqdm import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm
from typing import IO, TYPE_CHECKING, Generator

from podcast_archiver import constants
from podcast_archiver.enums import DownloadResult
from podcast_archiver.logging import logger
from podcast_archiver.exceptions import NotCompleted
from podcast_archiver.logging import logger, wrapped_tqdm
from podcast_archiver.session import session
from podcast_archiver.types import EpisodeResult
from podcast_archiver.utils import atomic_write
Expand All @@ -28,38 +26,31 @@ class DownloadJob:
target: Path
stop_event: Event

_debug_partial: bool
_max_download_bytes: int | None = None
_write_info_json: bool
_no_progress: bool

def __init__(
self,
episode: Episode,
*,
target: Path,
debug_partial: bool = False,
max_download_bytes: int | None = None,
write_info_json: bool = False,
no_progress: bool = False,
stop_event: Event | None = None,
) -> None:
self.episode = episode
self.target = target
self._debug_partial = debug_partial
self._max_download_bytes = max_download_bytes
self._write_info_json = write_info_json
self._no_progress = no_progress
self.stop_event = stop_event or Event()

def __repr__(self) -> str:
return f"EpisodeDownload({self})"

def __str__(self) -> str:
return str(self.episode)

def __call__(self) -> EpisodeResult:
try:
return self.run()
except NotCompleted:
return EpisodeResult(self.episode, DownloadResult.ABORTED)
except Exception as exc:
logger.error(f"Download failed: {exc}")
logger.error("Download failed: %s; %s", self.episode, exc)
logger.debug("Exception while downloading", exc_info=exc)
return EpisodeResult(self.episode, DownloadResult.FAILED)

Expand All @@ -68,58 +59,44 @@ def run(self) -> EpisodeResult:
return EpisodeResult(self.episode, DownloadResult.ALREADY_EXISTS)

self.target.parent.mkdir(parents=True, exist_ok=True)
self.write_info_json()

response = session.get(
self.episode.enclosure.href,
stream=True,
allow_redirects=True,
)
response.raise_for_status()
total_size = int(response.headers.get("content-length", "0"))
with (
logging_redirect_tqdm() if not self._no_progress else nullcontext(),
tqdm(
desc=f"{self.episode.title} ({self.episode.published_time:%Y-%m-%d})",
total=total_size,
unit_scale=True,
unit="B",
disable=self._no_progress,
) as progresser,
):
with atomic_write(self.target, mode="wb") as fp:
receive_complete = self.receive_data(fp, response, progresser=progresser)

if not receive_complete:
self.target.unlink(missing_ok=True)
return EpisodeResult(self.episode, DownloadResult.ABORTED)
logger.info("Downloading: %s", self.episode)
response = session.get_and_raise(self.episode.enclosure.href, stream=True)
with self.write_info_json(), atomic_write(self.target, mode="wb") as fp:
self.receive_data(fp, response)

logger.info("Completed download of %s", self.target)
logger.info("Completed: %s", self.episode)
return EpisodeResult(self.episode, DownloadResult.COMPLETED_SUCCESSFULLY)

@property
def infojsonfile(self) -> Path:
return self.target.with_suffix(".info.json")

def receive_data(self, fp: IO[str], response: Response, progresser: tqdm[NoReturn]) -> bool:
def receive_data(self, fp: IO[bytes], response: Response) -> None:
total_size = int(response.headers.get("content-length", "0"))
total_written = 0
for chunk in response.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
written = fp.write(chunk)
total_written += written
progresser.update(written)

if self._debug_partial and total_written >= constants.DEBUG_PARTIAL_SIZE:
logger.debug("Partial download completed.")
return True
if self.stop_event.is_set():
logger.debug("Stop event is set, bailing.")
return False
max_bytes = self._max_download_bytes
for chunk in wrapped_tqdm(
response.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE),
desc=str(self.episode),
total=total_size,
):
total_written += fp.write(chunk)

return True
if max_bytes and total_written >= max_bytes:
fp.truncate(max_bytes)
logger.debug("Partial download of first %s bytes completed.", max_bytes)
return

if self.stop_event.is_set():
logger.debug("Stop event is set, bailing on %s.", self.episode)
raise NotCompleted

def write_info_json(self) -> None:
@contextmanager
def write_info_json(self) -> Generator[None, None, None]:
if not self._write_info_json:
yield
return
logger.info("Writing episode metadata to %s", self.infojsonfile.name)
with atomic_write(self.infojsonfile) as fp:
fp.write(self.episode.model_dump_json(indent=2) + "\n")
yield
logger.debug("Wrote episode metadata to %s", self.infojsonfile.name)
6 changes: 3 additions & 3 deletions podcast_archiver/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ def __str__(self) -> str:


class QueueCompletionType(StrEnum):
COMPLETED = "Archived all episodes."
FOUND_EXISTING = "Archive is up to date."
MAX_EPISODES = "Maximum episode count reached."
COMPLETED = "Archived all episodes"
FOUND_EXISTING = "Archive is up to date"
MAX_EPISODES = "Maximum episode count reached"


class DownloadResult(StrEnum):
Expand Down
22 changes: 20 additions & 2 deletions podcast_archiver/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from typing import Any
from __future__ import annotations

import pydantic_core
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
import pydantic_core

from podcast_archiver.models import FeedInfo


class PodcastArchiverException(Exception):
Expand All @@ -27,3 +32,16 @@ def __str__(self) -> str:

class MissingDownloadUrl(ValueError):
pass


class NotCompleted(RuntimeError):
pass


class NotModified(PodcastArchiverException):
info: FeedInfo
last_modified: str | None = None

def __init__(self, info: FeedInfo, *args: object) -> None:
super().__init__(*args)
self.info = info
Loading

0 comments on commit 02ff00e

Please sign in to comment.