Skip to content

Commit

Permalink
Now we check the "episode.firstrun" and "program.title" as well as th…
Browse files Browse the repository at this point in the history
…e "episode.id" to find if we have downloaded an episode before. This seems to give more accurate results rather than the id.
  • Loading branch information
HaukurPall committed May 9, 2022
1 parent 0ebf802 commit 4df0e0c
Show file tree
Hide file tree
Showing 6 changed files with 229 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
downloads/
orgranized/
organized/

# Poetry
poetry.lock
Expand Down
6 changes: 4 additions & 2 deletions src/ruv_dl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from ruv_dl.organize import organize as _organize
from ruv_dl.ruv_client import Program, Programs, load_programs
from ruv_dl.search import get_all_programs_by_pattern
from ruv_dl.storage import EpisodeDownload
from ruv_dl.storage import EpisodeDownload, filter_downloaded_episodes

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -81,7 +81,9 @@ def download_program(
for episode in program["episodes"]
]
previously_downloaded_episodes = read_downloaded_episodes(config.download_log)
episodes_to_download = [episode for episode in selected_episodes if episode not in previously_downloaded_episodes]
episodes_to_download = filter_downloaded_episodes(
downloaded_episodes=previously_downloaded_episodes, episodes_to_download=selected_episodes
)
log.info(f"Will download {len(episodes_to_download)} episodes")
tqdm_iter = tqdm(episodes_to_download)
try:
Expand Down
10 changes: 8 additions & 2 deletions src/ruv_dl/ruv_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,21 @@


class Episode(TypedDict):
"""A single episode"""

id: str
title: str
file: str
firstrun: str # 2009-01-01 22:10:00


class Program(TypedDict):
"""A single program"""

id: str
title: str
foreign_title: str
short_description: str
foreign_title: Optional[str]
short_description: Optional[str]
episodes: List[Episode]


Expand Down Expand Up @@ -111,6 +116,7 @@ async def _query_all_programs(session: AsyncClientSession) -> List[Program]:
id
title
file
firstrun
}
title
foreign_title
Expand Down
28 changes: 27 additions & 1 deletion src/ruv_dl/storage.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
import json
from dataclasses import asdict, dataclass
from typing import Optional
from typing import List, Optional

from ruv_dl.ruv_client import Episode, Program


@dataclass
class EpisodeDownload:
"""A downloaded episode"""

id: str
program_id: str
program_title: str
title: Optional[str]
foreign_title: Optional[str]
quality_str: str
url: str
firstrun: Optional[str] = None

@staticmethod
def from_episode_and_program(episode: Episode, program: Program, quality: str) -> "EpisodeDownload":
Expand All @@ -25,6 +28,7 @@ def from_episode_and_program(episode: Episode, program: Program, quality: str) -
foreign_title=program["foreign_title"],
quality_str=quality,
url=episode["file"],
firstrun=episode["firstrun"],
)

@staticmethod
Expand All @@ -43,3 +47,25 @@ def file_name_regexp(extension: str) -> str:
r"^(?P<program_title>.+?) \|\|\| (?P<title>.+?) \|\|\| (?P<foreign_title>.+?)(?P<quality_str> \[.+?\])?"
+ f".{extension}"
)


def filter_downloaded_episodes(
downloaded_episodes: List[EpisodeDownload], episodes_to_download: List[EpisodeDownload]
) -> List[EpisodeDownload]:
"""Filter out episodes that are already downloaded.
First we check if the title and firstrun are the same.
If firstrun is not present we check if the id is the same."""
return [
episode
for episode in episodes_to_download
# This might be slow when the downloaded list becomes large
if not any(
(
episode.program_title == downloaded_episode.program_title
and episode.firstrun == downloaded_episode.firstrun
or episode.id == downloaded_episode.id
)
for downloaded_episode in downloaded_episodes
)
]
22 changes: 22 additions & 0 deletions tests/test_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from tkinter import W

from ruv_dl import ruv_client


def test_fields():
programs = ruv_client.RUVClient().get_all_programs()
assert len(programs) > 0
for program in programs.values():
assert program["title"], f"Bad program: {program}"
# Most Icelandic shows do not have a foreign title
assert "foreign_title" in program, f"Bad program: {program}"
assert program["id"], f"Bad program: {program}"
# Some audio shows do not have a short description
assert "short_description" in program, f"Bad program: {program}"
assert "episodes" in program, f"Bad program: {program}"
# Some programs have now episodes
for episode in program["episodes"]:
assert "id" in episode, f"Bad episode: {episode}"
assert "title" in episode, f"Bad episode: {episode}"
assert "firstrun" in episode, f"Bad episode: {episode}"
assert "file" in episode, f"Bad episode: {episode}"
167 changes: 167 additions & 0 deletions tests/test_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
from ruv_dl.storage import EpisodeDownload, filter_downloaded_episodes


def test_filter_downloaded_all_equal():
downloaded_episodes = [
EpisodeDownload(
id="1",
program_id="1",
program_title="Program 1",
title="Episode 1",
foreign_title="Foreign title 1",
quality_str="Quality 1",
url="Url 1",
firstrun="Firstrun 1",
)
]
episodes_to_download = [
EpisodeDownload(
id="1",
program_id="1",
program_title="Program 1",
title="Episode 1",
foreign_title="Foreign title 1",
quality_str="Quality 1",
url="Url 1",
firstrun="Firstrun 1",
)
]
assert filter_downloaded_episodes(downloaded_episodes, episodes_to_download) == []


def test_filter_downloaded_same_title_and_firstrun():
downloaded_episodes = [
EpisodeDownload(
id="1",
program_id="1",
program_title="Program 1",
title="Episode 1",
foreign_title="Foreign title 1",
quality_str="Quality 1",
url="Url 1",
firstrun="Firstrun 1",
)
]
episodes_to_download = [
EpisodeDownload(
id="2", # Different id
program_id="1",
program_title="Program 1",
title="Episode 1",
foreign_title="Foreign title 1",
quality_str="Quality 1",
url="Url 1",
firstrun="Firstrun 1",
)
]
assert filter_downloaded_episodes(downloaded_episodes, episodes_to_download) == []


def test_filter_downloaded_same_id():
downloaded_episodes = [
EpisodeDownload(
id="1",
program_id="1",
program_title="Program 1",
title="Episode 1",
foreign_title="Foreign title 1",
quality_str="Quality 1",
url="Url 1",
firstrun="Firstrun 1",
)
]
episodes_to_download = [
EpisodeDownload(
id="1",
program_id="1",
program_title="Program 2",
title="Episode 1",
foreign_title="Foreign title 1",
quality_str="Quality 1",
url="Url 1",
firstrun="Firstrun 1",
)
]
assert filter_downloaded_episodes(downloaded_episodes, episodes_to_download) == []


def test_filter_downloaded_not_same_title_id():
downloaded_episodes = [
EpisodeDownload(
id="1",
program_id="1",
program_title="Program 1",
title="Episode 1",
foreign_title="Foreign title 1",
quality_str="Quality 1",
url="Url 1",
firstrun="Firstrun 1",
)
]
episodes_to_download = [
EpisodeDownload(
id="2", # Different id
program_id="1",
program_title="Program 2", # Different program_title
title="Episode 1",
foreign_title="Foreign title 1",
quality_str="Quality 1",
url="Url 1",
firstrun="Firstrun 1",
)
]
assert filter_downloaded_episodes(downloaded_episodes, episodes_to_download) == episodes_to_download


def test_filter_downloaded_same_id_missing_firstrun():
downloaded_episodes = [
EpisodeDownload(
id="1",
program_id="1",
program_title="Program 1",
title="Episode 1",
foreign_title="Foreign title 1",
quality_str="Quality 1",
url="Url 1",
)
]
episodes_to_download = [
EpisodeDownload(
id="1",
program_id="1",
program_title="Program 1",
title="Episode 1",
foreign_title="Foreign title 1",
quality_str="Quality 1",
url="Url 1",
firstrun="Firstrun 1",
)
]
assert filter_downloaded_episodes(downloaded_episodes, episodes_to_download) == []


def test_filter_downloaded_not_same_id_missing_firstrun():
downloaded_episodes = [
EpisodeDownload(
id="1",
program_id="1",
program_title="Program 1",
title="Episode 1",
foreign_title="Foreign title 1",
quality_str="Quality 1",
url="Url 1",
)
]
episodes_to_download = [
EpisodeDownload(
id="2",
program_id="1",
program_title="Program 1",
title="Episode 1",
foreign_title="Foreign title 1",
quality_str="Quality 1",
url="Url 1",
firstrun="Firstrun 1",
)
]
assert filter_downloaded_episodes(downloaded_episodes, episodes_to_download) == episodes_to_download

0 comments on commit 4df0e0c

Please sign in to comment.