Skip to content

Commit

Permalink
Merge pull request #53 from debakarr/try-fixing-what-is-broken
Browse files Browse the repository at this point in the history
Try fixing what is broken
  • Loading branch information
debakarr committed Sep 15, 2024
2 parents 2df38d1 + 3bb4f31 commit 64bcc9b
Show file tree
Hide file tree
Showing 12 changed files with 1,322 additions and 1,660 deletions.
181 changes: 123 additions & 58 deletions README.md

Large diffs are not rendered by default.

2,283 changes: 871 additions & 1,412 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ markdownify = "^0.11.6"
validators = "^0.20.0"
click = "^8.1.3"
yt-dlp = "^2023.3.4"
pydantic = "^2.9.1"

[tool.poetry.dev-dependencies]
datamodel-code-generator = { extras = ["http"], version = "^0.17.1" }
flake8 = { version = "^6.0.0", python = ">=3.8.1,<4.0" }
flake8-bugbear = "^23.2.13"
flake8-black = "^0.3.6"
Expand Down
15 changes: 10 additions & 5 deletions src/kodekloud_downloader/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@

from kodekloud_downloader.enums import Quality
from kodekloud_downloader.helpers import select_courses
from kodekloud_downloader.main import download_course, download_quiz
from kodekloud_downloader.models import get_all_course
from kodekloud_downloader.main import (
download_course,
download_quiz,
parse_course_from_url,
)
from kodekloud_downloader.models.helper import collect_all_courses


@click.group()
Expand Down Expand Up @@ -59,19 +63,20 @@ def dl(
max_duplicate_count: int,
):
if course_url is None:
courses = get_all_course()
courses = collect_all_courses()
selected_courses = select_courses(courses)
for selected_course in selected_courses:
download_course(
url=selected_course.link,
course=selected_course,
cookie=cookie,
quality=quality,
output_dir=output_dir,
max_duplicate_count=max_duplicate_count,
)
elif validators.url(course_url):
course_detail = parse_course_from_url(course_url)
download_course(
url=course_url,
course=course_detail,
cookie=cookie,
quality=quality,
output_dir=output_dir,
Expand Down
51 changes: 35 additions & 16 deletions src/kodekloud_downloader/helpers.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import logging
import re
import string
from pathlib import Path
from typing import List
from typing import List, Optional

import prettytable
import requests
import yt_dlp

from kodekloud_downloader.models import Course
from kodekloud_downloader.models.courses import Course

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -55,7 +56,12 @@ def select_courses(courses: List[Course]) -> List[Course]:

for i, course in enumerate(courses):
table.add_row(
[i + 1, course.name, course.course_type, ", ".join(course.categories)]
[
i + 1,
course.title,
course.plan,
", ".join([category.name for category in course.categories]),
]
)

table.align["No."] = "l"
Expand Down Expand Up @@ -96,6 +102,9 @@ def download_video(url: str, output_path: Path, cookie: str, quality: str) -> No
:param cookie: The user's authentication cookie
:param quality: The video quality (e.g. "720p")
"""
headers = {
"Referer": "https://learn.kodekloud.com/",
}
ydl_opts = {
"format": f"bestvideo[height<={quality[:-1]}]+bestaudio/best[height<={quality[:-1]}]/best",
"concurrent_fragment_downloads": 15,
Expand All @@ -105,6 +114,7 @@ def download_video(url: str, output_path: Path, cookie: str, quality: str) -> No
"merge_output_format": "mkv",
"writesubtitles": True,
"no_write_sub": True,
"http_headers": headers,
}
logger.debug(f"Calling download with following options: {ydl_opts}")
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
Expand Down Expand Up @@ -140,17 +150,26 @@ def download_all_pdf(content, download_path: Path, cookie: str) -> None:
file_name.write_bytes(response.content)


def get_video_info(url: str, cookie: str):
ydl_opts = {
"skip_download": True,
"print_json": True,
"quiet": True,
"extract_flat": True,
"simulate": True,
"no_warnings": True,
"cookiefile": cookie,
}
def parse_token(cookiefile: str) -> Optional[str]:
"""
Parse the session cookie from a file containing cookies.
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
return info
:param cookiefile: The path to the file containing cookies.
:return: The value of the 'session-cookie' if found, otherwise None.
:raises FileNotFoundError: If the cookie file does not exist.
:raises IOError: If there is an error reading the file.
"""
cookies = {}
try:
with open(cookiefile, "r") as fp:
for line in fp:
if line.strip() and not re.match(r"^\#", line):
line_fields = line.strip().split("\t")
if len(line_fields) > 6:
cookies[line_fields[5]] = line_fields[6]
except FileNotFoundError:
raise FileNotFoundError(f"The file {cookiefile} does not exist.")
except IOError as e:
raise IOError(f"Error reading the file {cookiefile}: {e}")

return cookies.get("session-cookie")
133 changes: 85 additions & 48 deletions src/kodekloud_downloader/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import logging
from collections import defaultdict
from http.cookiejar import MozillaCookieJar
from pathlib import Path
from typing import Union

Expand All @@ -12,16 +11,31 @@
from kodekloud_downloader.helpers import (
download_all_pdf,
download_video,
get_video_info,
is_normal_content,
normalize_name,
parse_token,
)
from kodekloud_downloader.models import Quiz, Topic
from kodekloud_downloader.models.course import CourseDetail
from kodekloud_downloader.models.courses import Course
from kodekloud_downloader.models.helper import fetch_course_detail
from kodekloud_downloader.models.quiz import Quiz

logger = logging.getLogger(__name__)


def download_quiz(output_dir: str, sep: bool):
def download_quiz(output_dir: str, sep: bool) -> None:
"""
Download quizzes from the API and save them as Markdown files.
:param output_dir: The directory path where the Markdown files will be saved.
:param sep: A boolean flag indicating whether to separate each quiz into individual files.
If `True`, each quiz will be saved as a separate Markdown file. If `False`,
all quizzes will be combined into a single Markdown file.
:return: None
:raises ValueError: If `output_dir` is not a valid directory path.
:raises requests.RequestException: For errors related to the HTTP request.
:raises IOError: For file I/O errors.
"""
quiz_markdown = [] if sep else ["# KodeKloud Quiz"]
response = requests.get("https://mcq-backend-main.kodekloud.com/api/quizzes/all")
response.raise_for_status()
Expand Down Expand Up @@ -56,7 +70,7 @@ def download_quiz(output_dir: str, sep: bool):
output_file = Path(output_dir) / f"{quiz_name.replace('/', '')}.md"
markdown_text = "\n".join(quiz_markdown)

with open(output_file, 'w', encoding='utf-8') as f:
with open(output_file, "w", encoding="utf-8") as f:
f.write(markdown_text)
print(f"Quiz file written in {output_file}")

Expand All @@ -72,8 +86,21 @@ def download_quiz(output_dir: str, sep: bool):
print(f"Quiz file written in {output_file}")


def parse_course_from_url(url: str) -> CourseDetail:
"""
Parse the course slug from the given URL and fetch the course details.
:param url: The URL from which to extract the course slug.
:return: An instance of `CourseDetail` containing the course details.
:raises ValueError: If the URL does not contain a valid course slug.
"""
url = url.strip("/")
course_slug = url.split("/")[-1]
return fetch_course_detail(course_slug)


def download_course(
url: str,
course: Union[Course, CourseDetail],
cookie: str,
quality: str,
output_dir: Union[str, Path],
Expand All @@ -82,40 +109,46 @@ def download_course(
"""
Download a course from KodeKloud.
:param url: The course URL
:param course: The Course or CourseDetail object
:param cookie: The user's authentication cookie
:param quality: The video quality (e.g. "720p")
:param output_dir: The output directory for the downloaded course
:param max_duplicate_count: Maximum duplicate video before after cookie expire message will be raised
"""
cj = MozillaCookieJar(cookie)
cj.load(ignore_discard=True, ignore_expires=True)

page = requests.get(url, cookies=cj)
soup = BeautifulSoup(page.content, "html.parser")
course_name_tag = soup.find("h1", class_="course_title") or soup.find(
"h1", class_="entry-title"
)
course_name = course_name_tag.text.strip()
main_lesson_content = soup.find("div", class_="lessons_main__content") or soup.find(
"div", class_="ld-lesson-list"
)
topics = (
main_lesson_content.find_all("div", class_="ld-item-list-item")
or main_lesson_content.find_all("div", class_="w-dyn-item")
or main_lesson_content.find_all("div", class_="ld-item-list-items")
session = requests.Session()
session_token = parse_token(cookie)
headers = {"authorization": f"bearer {session_token}"}
params = {
"course_id": course.id,
}

course_detail = (
fetch_course_detail(course.slug) if isinstance(course, Course) else course
)
items = [Topic.make(topic) for topic in topics]

downloaded_videos = defaultdict(int)
for i, item in enumerate(items, start=1):
for j, lesson in enumerate(item.lessons, start=1):
for module_index, module in enumerate(course_detail.modules, start=1):
for lesson_index, lesson in enumerate(module.lessons, start=1):
file_path = create_file_path(
output_dir, course_name, i, item.name, j, lesson.name
output_dir,
course.title,
module_index,
module.title,
lesson_index,
lesson.title,
)

if lesson.is_video:
current_video_url = get_video_info(lesson.url, cookie=cookie).get("url")
if lesson.type == "video":
url = f"https://learn-api.kodekloud.com/api/lessons/{lesson.id}"

response = session.get(url, headers=headers, params=params)
response.raise_for_status()
lesson_video_url = response.json()["video_url"]
# TODO: Maybe if in future KodeKloud change the video streaming service, this area will need some working.
# Try to generalize this for future enhacement?
current_video_url = (
f"https://player.vimeo.com/video/{lesson_video_url.split('/')[-1]}"
)
if (
current_video_url in downloaded_videos
and downloaded_videos[current_video_url] > max_duplicate_count
Expand All @@ -125,79 +158,83 @@ def download_course(
"\nYour cookie might have expired or you don't have access/enrolled to the course."
"\nPlease refresh/regenerate the cookie or enroll in the course and try again."
)
download_video_lesson(lesson, file_path, cookie, quality)
download_video_lesson(current_video_url, file_path, cookie, quality)
downloaded_videos[current_video_url] += 1
else:
download_resource_lesson(lesson, file_path, cookie)
lesson_url = f"https://learn.kodekloud.com/user/courses/{course.slug}/module/{module.id}/lesson/{lesson.id}"
download_resource_lesson(lesson_url, file_path, cookie)


def create_file_path(
output_dir: Union[str, Path],
course_name: str,
i: int,
item_name: str,
j: int,
module_index: int,
module_name: str,
lesson_index: int,
lesson_name: str,
) -> Path:
"""
Create a file path for a lesson.
:param output_dir: The output directory for the downloaded course
:param course_name: The course name
:param i: The topic index
:param item_name: The topic name
:param j: The lesson index
:param module_index: The module index
:param item_name: The module name
:param lesson_index: The lesson index
:param lesson_name: The lesson name
:return: The created file path
"""
return Path(
Path(output_dir)
/ "KodeKloud"
/ normalize_name(course_name)
/ f"{i} - {normalize_name(item_name)}"
/ f"{j} - {normalize_name(lesson_name)}"
/ f"{module_index} - {normalize_name(module_name)}"
/ f"{lesson_index} - {normalize_name(lesson_name)}"
)


def download_video_lesson(lesson, file_path: Path, cookie: str, quality: str) -> None:
def download_video_lesson(
lesson_video_url, file_path: Path, cookie: str, quality: str
) -> None:
"""
Download a video lesson.
:param lesson: The lesson object
:param lesson_video_url: The lesson video URL
:param file_path: The output file path for the video
:param cookie: The user's authentication cookie
:param quality: The video quality (e.g. "720p")
"""
logger.info(f"Writing video file... {file_path}...")
file_path.parent.mkdir(parents=True, exist_ok=True)
logger.info(f"Parsing url: {lesson.url}")
logger.info(f"Parsing url: {lesson_video_url}")
try:
download_video(
url=lesson.url,
url=lesson_video_url,
output_path=file_path,
cookie=cookie,
quality=quality,
)
except yt_dlp.utils.UnsupportedError as ex:
logger.error(
f"Could not download video in link {lesson.url}. "
f"Could not download video in link {lesson_video_url}. "
"Please open link manually and verify that video exists!"
)
except yt_dlp.utils.DownloadError as ex:
logger.error(
f"Access denied while downloading video or audio file from link {lesson.url}"
f"Access denied while downloading video or audio file from link {lesson_video_url}\n{ex}"
)


def download_resource_lesson(lesson, file_path: Path, cookie: str) -> None:
def download_resource_lesson(lesson_url, file_path: Path, cookie: str) -> None:
"""
Download a resource lesson.
:param lesson: The lesson object
:param lesson_url: The lesson url
:param file_path: The output file path for the resource
:param cookie: The user's authentication cookie
"""
page = requests.get(lesson.url)
# TODO: Did we break this? I have no idea.
page = requests.get(lesson_url)
soup = BeautifulSoup(page.content, "html.parser")
content = soup.find("div", class_="learndash_content_wrap")

Expand Down
Loading

0 comments on commit 64bcc9b

Please sign in to comment.