Merge pull request #53 from debakarr/try-fixing-what-is-broken

Try fixing what is broken
debakarr · Sep 15, 2024 · 64bcc9b · 64bcc9b
2 parents 2df38d1 + 3bb4f31
commit 64bcc9b
Show file tree

Hide file tree

Showing 12 changed files with 1,322 additions and 1,660 deletions.
diff --git a/README.md b/README.md
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,9 +18,9 @@ markdownify = "^0.11.6"
 validators = "^0.20.0"
 click = "^8.1.3"
 yt-dlp = "^2023.3.4"
+pydantic = "^2.9.1"
 
 [tool.poetry.dev-dependencies]
-datamodel-code-generator = { extras = ["http"], version = "^0.17.1" }
 flake8 = { version = "^6.0.0", python = ">=3.8.1,<4.0" }
 flake8-bugbear = "^23.2.13"
 flake8-black = "^0.3.6"

diff --git a/src/kodekloud_downloader/cli.py b/src/kodekloud_downloader/cli.py
@@ -7,8 +7,12 @@
 
 from kodekloud_downloader.enums import Quality
 from kodekloud_downloader.helpers import select_courses
-from kodekloud_downloader.main import download_course, download_quiz
-from kodekloud_downloader.models import get_all_course
+from kodekloud_downloader.main import (
+    download_course,
+    download_quiz,
+    parse_course_from_url,
+)
+from kodekloud_downloader.models.helper import collect_all_courses
 
 
 @click.group()
@@ -59,19 +63,20 @@ def dl(
     max_duplicate_count: int,
 ):
     if course_url is None:
-        courses = get_all_course()
+        courses = collect_all_courses()
         selected_courses = select_courses(courses)
         for selected_course in selected_courses:
             download_course(
-                url=selected_course.link,
+                course=selected_course,
                 cookie=cookie,
                 quality=quality,
                 output_dir=output_dir,
                 max_duplicate_count=max_duplicate_count,
             )
     elif validators.url(course_url):
+        course_detail = parse_course_from_url(course_url)
         download_course(
-            url=course_url,
+            course=course_detail,
             cookie=cookie,
             quality=quality,
             output_dir=output_dir,

diff --git a/src/kodekloud_downloader/helpers.py b/src/kodekloud_downloader/helpers.py
@@ -1,13 +1,14 @@
 import logging
+import re
 import string
 from pathlib import Path
-from typing import List
+from typing import List, Optional
 
 import prettytable
 import requests
 import yt_dlp
 
-from kodekloud_downloader.models import Course
+from kodekloud_downloader.models.courses import Course
 
 logger = logging.getLogger(__name__)
 
@@ -55,7 +56,12 @@ def select_courses(courses: List[Course]) -> List[Course]:
 
     for i, course in enumerate(courses):
         table.add_row(
-            [i + 1, course.name, course.course_type, ", ".join(course.categories)]
+            [
+                i + 1,
+                course.title,
+                course.plan,
+                ", ".join([category.name for category in course.categories]),
+            ]
         )
 
     table.align["No."] = "l"
@@ -96,6 +102,9 @@ def download_video(url: str, output_path: Path, cookie: str, quality: str) -> No
     :param cookie: The user's authentication cookie
     :param quality: The video quality (e.g. "720p")
     """
+    headers = {
+        "Referer": "https://learn.kodekloud.com/",
+    }
     ydl_opts = {
         "format": f"bestvideo[height<={quality[:-1]}]+bestaudio/best[height<={quality[:-1]}]/best",
         "concurrent_fragment_downloads": 15,
@@ -105,6 +114,7 @@ def download_video(url: str, output_path: Path, cookie: str, quality: str) -> No
         "merge_output_format": "mkv",
         "writesubtitles": True,
         "no_write_sub": True,
+        "http_headers": headers,
     }
     logger.debug(f"Calling download with following options: {ydl_opts}")
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
@@ -140,17 +150,26 @@ def download_all_pdf(content, download_path: Path, cookie: str) -> None:
             file_name.write_bytes(response.content)
 
 
-def get_video_info(url: str, cookie: str):
-    ydl_opts = {
-        "skip_download": True,
-        "print_json": True,
-        "quiet": True,
-        "extract_flat": True,
-        "simulate": True,
-        "no_warnings": True,
-        "cookiefile": cookie,
-    }
+def parse_token(cookiefile: str) -> Optional[str]:
+    """
+    Parse the session cookie from a file containing cookies.
 
-    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-        info = ydl.extract_info(url, download=False)
-        return info
+    :param cookiefile: The path to the file containing cookies.
+    :return: The value of the 'session-cookie' if found, otherwise None.
+    :raises FileNotFoundError: If the cookie file does not exist.
+    :raises IOError: If there is an error reading the file.
+    """
+    cookies = {}
+    try:
+        with open(cookiefile, "r") as fp:
+            for line in fp:
+                if line.strip() and not re.match(r"^\#", line):
+                    line_fields = line.strip().split("\t")
+                    if len(line_fields) > 6:
+                        cookies[line_fields[5]] = line_fields[6]
+    except FileNotFoundError:
+        raise FileNotFoundError(f"The file {cookiefile} does not exist.")
+    except IOError as e:
+        raise IOError(f"Error reading the file {cookiefile}: {e}")
+
+    return cookies.get("session-cookie")
diff --git a/src/kodekloud_downloader/main.py b/src/kodekloud_downloader/main.py
@@ -1,6 +1,5 @@
 import logging
 from collections import defaultdict
-from http.cookiejar import MozillaCookieJar
 from pathlib import Path
 from typing import Union
 
@@ -12,16 +11,31 @@
 from kodekloud_downloader.helpers import (
     download_all_pdf,
     download_video,
-    get_video_info,
     is_normal_content,
     normalize_name,
+    parse_token,
 )
-from kodekloud_downloader.models import Quiz, Topic
+from kodekloud_downloader.models.course import CourseDetail
+from kodekloud_downloader.models.courses import Course
+from kodekloud_downloader.models.helper import fetch_course_detail
+from kodekloud_downloader.models.quiz import Quiz
 
 logger = logging.getLogger(__name__)
 
 
-def download_quiz(output_dir: str, sep: bool):
+def download_quiz(output_dir: str, sep: bool) -> None:
+    """
+    Download quizzes from the API and save them as Markdown files.
+
+    :param output_dir: The directory path where the Markdown files will be saved.
+    :param sep: A boolean flag indicating whether to separate each quiz into individual files.
+                 If `True`, each quiz will be saved as a separate Markdown file. If `False`,
+                 all quizzes will be combined into a single Markdown file.
+    :return: None
+    :raises ValueError: If `output_dir` is not a valid directory path.
+    :raises requests.RequestException: For errors related to the HTTP request.
+    :raises IOError: For file I/O errors.
+    """
     quiz_markdown = [] if sep else ["# KodeKloud Quiz"]
     response = requests.get("https://mcq-backend-main.kodekloud.com/api/quizzes/all")
     response.raise_for_status()
@@ -56,7 +70,7 @@ def download_quiz(output_dir: str, sep: bool):
             output_file = Path(output_dir) / f"{quiz_name.replace('/', '')}.md"
             markdown_text = "\n".join(quiz_markdown)
 
-            with open(output_file, 'w', encoding='utf-8') as f:
+            with open(output_file, "w", encoding="utf-8") as f:
                 f.write(markdown_text)
             print(f"Quiz file written in {output_file}")
 
@@ -72,8 +86,21 @@ def download_quiz(output_dir: str, sep: bool):
         print(f"Quiz file written in {output_file}")
 
 
+def parse_course_from_url(url: str) -> CourseDetail:
+    """
+    Parse the course slug from the given URL and fetch the course details.
+
+    :param url: The URL from which to extract the course slug.
+    :return: An instance of `CourseDetail` containing the course details.
+    :raises ValueError: If the URL does not contain a valid course slug.
+    """
+    url = url.strip("/")
+    course_slug = url.split("/")[-1]
+    return fetch_course_detail(course_slug)
+
+
 def download_course(
-    url: str,
+    course: Union[Course, CourseDetail],
     cookie: str,
     quality: str,
     output_dir: Union[str, Path],
@@ -82,40 +109,46 @@ def download_course(
     """
     Download a course from KodeKloud.
 
-    :param url: The course URL
+    :param course: The Course or CourseDetail object
     :param cookie: The user's authentication cookie
     :param quality: The video quality (e.g. "720p")
     :param output_dir: The output directory for the downloaded course
     :param max_duplicate_count: Maximum duplicate video before after cookie expire message will be raised
     """
-    cj = MozillaCookieJar(cookie)
-    cj.load(ignore_discard=True, ignore_expires=True)
-
-    page = requests.get(url, cookies=cj)
-    soup = BeautifulSoup(page.content, "html.parser")
-    course_name_tag = soup.find("h1", class_="course_title") or soup.find(
-        "h1", class_="entry-title"
-    )
-    course_name = course_name_tag.text.strip()
-    main_lesson_content = soup.find("div", class_="lessons_main__content") or soup.find(
-        "div", class_="ld-lesson-list"
-    )
-    topics = (
-        main_lesson_content.find_all("div", class_="ld-item-list-item")
-        or main_lesson_content.find_all("div", class_="w-dyn-item")
-        or main_lesson_content.find_all("div", class_="ld-item-list-items")
+    session = requests.Session()
+    session_token = parse_token(cookie)
+    headers = {"authorization": f"bearer {session_token}"}
+    params = {
+        "course_id": course.id,
+    }
+
+    course_detail = (
+        fetch_course_detail(course.slug) if isinstance(course, Course) else course
     )
-    items = [Topic.make(topic) for topic in topics]
 
     downloaded_videos = defaultdict(int)
-    for i, item in enumerate(items, start=1):
-        for j, lesson in enumerate(item.lessons, start=1):
+    for module_index, module in enumerate(course_detail.modules, start=1):
+        for lesson_index, lesson in enumerate(module.lessons, start=1):
             file_path = create_file_path(
-                output_dir, course_name, i, item.name, j, lesson.name
+                output_dir,
+                course.title,
+                module_index,
+                module.title,
+                lesson_index,
+                lesson.title,
             )
 
-            if lesson.is_video:
-                current_video_url = get_video_info(lesson.url, cookie=cookie).get("url")
+            if lesson.type == "video":
+                url = f"https://learn-api.kodekloud.com/api/lessons/{lesson.id}"
+
+                response = session.get(url, headers=headers, params=params)
+                response.raise_for_status()
+                lesson_video_url = response.json()["video_url"]
+                # TODO: Maybe if in future KodeKloud change the video streaming service, this area will need some working.
+                # Try to generalize this for future enhacement?
+                current_video_url = (
+                    f"https://player.vimeo.com/video/{lesson_video_url.split('/')[-1]}"
+                )
                 if (
                     current_video_url in downloaded_videos
                     and downloaded_videos[current_video_url] > max_duplicate_count
@@ -125,79 +158,83 @@ def download_course(
                         "\nYour cookie might have expired or you don't have access/enrolled to the course."
                         "\nPlease refresh/regenerate the cookie or enroll in the course and try again."
                     )
-                download_video_lesson(lesson, file_path, cookie, quality)
+                download_video_lesson(current_video_url, file_path, cookie, quality)
                 downloaded_videos[current_video_url] += 1
             else:
-                download_resource_lesson(lesson, file_path, cookie)
+                lesson_url = f"https://learn.kodekloud.com/user/courses/{course.slug}/module/{module.id}/lesson/{lesson.id}"
+                download_resource_lesson(lesson_url, file_path, cookie)
 
 
 def create_file_path(
     output_dir: Union[str, Path],
     course_name: str,
-    i: int,
-    item_name: str,
-    j: int,
+    module_index: int,
+    module_name: str,
+    lesson_index: int,
     lesson_name: str,
 ) -> Path:
     """
     Create a file path for a lesson.
 
     :param output_dir: The output directory for the downloaded course
     :param course_name: The course name
-    :param i: The topic index
-    :param item_name: The topic name
-    :param j: The lesson index
+    :param module_index: The module index
+    :param item_name: The module name
+    :param lesson_index: The lesson index
     :param lesson_name: The lesson name
     :return: The created file path
     """
     return Path(
         Path(output_dir)
         / "KodeKloud"
         / normalize_name(course_name)
-        / f"{i} - {normalize_name(item_name)}"
-        / f"{j} - {normalize_name(lesson_name)}"
+        / f"{module_index} - {normalize_name(module_name)}"
+        / f"{lesson_index} - {normalize_name(lesson_name)}"
     )
 
 
-def download_video_lesson(lesson, file_path: Path, cookie: str, quality: str) -> None:
+def download_video_lesson(
+    lesson_video_url, file_path: Path, cookie: str, quality: str
+) -> None:
     """
     Download a video lesson.
 
-    :param lesson: The lesson object
+    :param lesson_video_url: The lesson video URL
     :param file_path: The output file path for the video
     :param cookie: The user's authentication cookie
     :param quality: The video quality (e.g. "720p")
     """
     logger.info(f"Writing video file... {file_path}...")
     file_path.parent.mkdir(parents=True, exist_ok=True)
-    logger.info(f"Parsing url: {lesson.url}")
+    logger.info(f"Parsing url: {lesson_video_url}")
     try:
         download_video(
-            url=lesson.url,
+            url=lesson_video_url,
             output_path=file_path,
             cookie=cookie,
             quality=quality,
         )
     except yt_dlp.utils.UnsupportedError as ex:
         logger.error(
-            f"Could not download video in link {lesson.url}. "
+            f"Could not download video in link {lesson_video_url}. "
             "Please open link manually and verify that video exists!"
         )
     except yt_dlp.utils.DownloadError as ex:
         logger.error(
-            f"Access denied while downloading video or audio file from link {lesson.url}"
+            f"Access denied while downloading video or audio file from link {lesson_video_url}\n{ex}"
         )
 
 
-def download_resource_lesson(lesson, file_path: Path, cookie: str) -> None:
+def download_resource_lesson(lesson_url, file_path: Path, cookie: str) -> None:
     """
     Download a resource lesson.
 
-    :param lesson: The lesson object
+    :param lesson_url: The lesson url
     :param file_path: The output file path for the resource
     :param cookie: The user's authentication cookie
     """
-    page = requests.get(lesson.url)
+    # TODO: Did we break this? I have no idea.
+    page = requests.get(lesson_url)
     soup = BeautifulSoup(page.content, "html.parser")
     content = soup.find("div", class_="learndash_content_wrap")