diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 0354352..62c0eb6 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,12 +1,12 @@ name: Lint on: - push: - branches: - - main + pull_request: + types: [opened, synchronize, reopened, ready_for_review] jobs: build: + if: github.event.pull_request.draft == false runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -16,4 +16,4 @@ jobs: - name: Run linter run: | pip install ruff - ruff check \ No newline at end of file + ruff check diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index abf2479..4f5f508 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,21 +1,26 @@ name: Pytest on: - push: - branches: - - main + pull_request: + types: [opened, synchronize, reopened, ready_for_review] jobs: build: + if: github.event.pull_request.draft == false runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.11" - - name: Run tests + python-version: "3.12" + + - name: Install ffmpeg + run: sudo apt-get update && sudo apt-get install -y ffmpeg + + - name: Install Poetry and dependencies run: | pip install poetry - poetry install --with api --with dev - poetry run pytest - + poetry install --with api --with dev + + - name: Run tests + run: poetry run pytest diff --git a/poetry.lock b/poetry.lock index e849a06..8287290 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand. [[package]] name = "altgraph" @@ -43,7 +43,7 @@ typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"] trio = ["trio (>=0.26.1)"] [[package]] @@ -61,6 +61,29 @@ files = [ [package.dependencies] six = "*" +[[package]] +name = "beautifulsoup4" +version = "4.13.3" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.7.0" +groups = ["main"] +files = [ + {file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"}, + {file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"}, +] + +[package.dependencies] +soupsieve = ">1.2" +typing-extensions = ">=4.0.0" + +[package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] name = "black" version = "24.10.0" @@ -340,7 +363,7 @@ files = [ [package.extras] docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"] -typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] +typing = ["typing-extensions (>=4.12.2)"] [[package]] name = "fsspec" @@ -435,7 +458,7 @@ httpcore = "==1.*" idna = "*" [package.extras] -brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] +brotli = ["brotli", "brotlicffi"] cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] @@ -564,7 +587,7 @@ colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} [package.extras] -dev = ["Sphinx (==8.1.3) ; python_version >= \"3.11\"", "build (==1.2.2) ; python_version >= \"3.11\"", "colorama (==0.4.5) ; python_version < \"3.8\"", "colorama (==0.4.6) ; python_version >= \"3.8\"", "exceptiongroup (==1.1.3) ; python_version >= \"3.7\" and python_version < \"3.11\"", "freezegun (==1.1.0) ; python_version < \"3.8\"", "freezegun (==1.5.0) ; python_version >= \"3.8\"", "mypy (==v0.910) ; python_version < \"3.6\"", "mypy (==v0.971) ; python_version == \"3.6\"", "mypy (==v1.13.0) ; python_version >= \"3.8\"", "mypy (==v1.4.1) ; python_version == \"3.7\"", "myst-parser (==4.0.0) ; python_version >= \"3.11\"", "pre-commit (==4.0.1) ; python_version >= \"3.9\"", "pytest (==6.1.2) ; python_version < \"3.8\"", "pytest (==8.3.2) ; python_version >= \"3.8\"", "pytest-cov (==2.12.1) ; python_version < \"3.8\"", "pytest-cov (==5.0.0) ; python_version == \"3.8\"", "pytest-cov (==6.0.0) ; python_version >= \"3.9\"", "pytest-mypy-plugins (==1.9.3) ; python_version >= \"3.6\" and python_version < \"3.8\"", "pytest-mypy-plugins (==3.1.0) ; python_version >= \"3.8\"", "sphinx-rtd-theme (==3.0.2) ; python_version >= \"3.11\"", "tox (==3.27.1) ; python_version < \"3.8\"", "tox (==4.23.2) ; python_version >= \"3.8\"", "twine (==6.0.1) ; python_version >= \"3.11\""] +dev = ["Sphinx (==8.1.3)", "build (==1.2.2)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.5.0)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.13.0)", "mypy (==v1.4.1)", "myst-parser (==4.0.0)", "pre-commit (==4.0.1)", "pytest (==6.1.2)", "pytest (==8.3.2)", "pytest-cov (==2.12.1)", "pytest-cov (==5.0.0)", "pytest-cov (==6.0.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.1.0)", "sphinx-rtd-theme (==3.0.2)", "tox (==3.27.1)", "tox (==4.23.2)", "twine (==6.0.1)"] [[package]] name = "macholib" @@ -729,7 +752,7 @@ files = [ [package.extras] develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] docs = ["sphinx"] -gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""] +gmpy = ["gmpy2 (>=2.1.0a4)"] tests = ["pytest (>=4.6)"] [[package]] @@ -1170,7 +1193,7 @@ typing-extensions = ">=4.12.2" [package.extras] email = ["email-validator (>=2.0.0)"] -timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""] +timezone = ["tzdata"] [[package]] name = "pydantic-core" @@ -1732,13 +1755,13 @@ files = [ ] [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""] -core = ["importlib_metadata (>=6) ; python_version < \"3.10\"", "jaraco.collections", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.8.0)"] +core = ["importlib_metadata (>=6)", "jaraco.collections", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] -type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib_metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.14.*)", "pytest-mypy"] [[package]] name = "shellingham" @@ -1776,6 +1799,18 @@ files = [ {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] +[[package]] +name = "soupsieve" +version = "2.6" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, + {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, +] + [[package]] name = "starlette" version = "0.45.3" @@ -2009,7 +2044,7 @@ files = [ ] [package.extras] -brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] @@ -2031,7 +2066,7 @@ click = ">=7.0" h11 = ">=0.8" [package.extras] -standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"] +standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] [[package]] name = "virtualenv" @@ -2052,7 +2087,7 @@ platformdirs = ">=3.9.1,<5" [package.extras] docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] [[package]] name = "win32-setctime" @@ -2068,9 +2103,9 @@ files = [ ] [package.extras] -dev = ["black (>=19.3b0) ; python_version >= \"3.6\"", "pytest (>=4.6.2)"] +dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.13" -content-hash = "53c0dedded5806e9198951110b7d9d89496058141de4223052f0450011be2a4a" +content-hash = "cbf6ba5d1d9c8b609489a6b4f90c3be1c323d07534629692b9ea7d8282ecde49" diff --git a/pyproject.toml b/pyproject.toml index af2a5ce..dafadc5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ rb-doc-parser = { path = "src/rb-doc-parser", develop = true } rb-audio-transcription = { path = "src/rb-audio-transcription", develop = true } # Don't add new packages here, add them appropriately in the list above +beautifulsoup4 = "^4.13.3" diff --git a/src/rb-api/rb/api/main.py b/src/rb-api/rb/api/main.py index c85842b..23ee19d 100644 --- a/src/rb-api/rb/api/main.py +++ b/src/rb-api/rb/api/main.py @@ -1,9 +1,6 @@ -from logging.config import dictConfig import multiprocessing import os -import logging import sys -from starlette.middleware.base import BaseHTTPMiddleware from fastapi import FastAPI from fastapi.staticfiles import StaticFiles from rb.api import routes diff --git a/src/rb-api/rb/api/routes/cli.py b/src/rb-api/rb/api/routes/cli.py index d94f1f4..cf4817b 100644 --- a/src/rb-api/rb/api/routes/cli.py +++ b/src/rb-api/rb/api/routes/cli.py @@ -157,7 +157,7 @@ def wrapper(*args, **kwargs) -> ResponseBody: ) # FIXME: prefix /api to make desktop call happy for now , eventually this will go away # GOAL : /audio/routes is valid /api/routes should no longer work - cli_to_api_router.include_router(router,prefix=f'/api', tags=[plugin.name]) + cli_to_api_router.include_router(router,prefix='/api', tags=[plugin.name]) logger.debug(f"Registering FastAPI route for {plugin.name} desktop call: {command.callback.__name__}") else: diff --git a/src/rb-api/rb/api/utils.py b/src/rb-api/rb/api/utils.py index 0ffc201..7b1df6f 100644 --- a/src/rb-api/rb/api/utils.py +++ b/src/rb-api/rb/api/utils.py @@ -1,5 +1,6 @@ -from typing import Any, Callable import argparse +from typing import Any, Callable + from rb.api.models import FloatRangeDescriptor, IntRangeDescriptor @@ -7,7 +8,7 @@ def get_int_range_check_func_arg_parser(range: IntRangeDescriptor) -> Callable[[ def check_func(value: Any) -> int: try: value = int(value) - except: + except Exception: raise argparse.ArgumentTypeError(f"{value} is not a valid integer") if value < range.min or value > range.max: raise argparse.ArgumentTypeError(f"{value} is not in the range [{range.min}, {range.max}]") @@ -18,7 +19,7 @@ def get_float_range_check_func_arg_parser(range: FloatRangeDescriptor) -> Callab def check_func(value: Any) -> float: try: value = float(value) - except: + except Exception: raise argparse.ArgumentTypeError(f"{value} is not a valid float") if value < range.min or value > range.max: raise argparse.ArgumentTypeError(f"{value} is not in the range [{range.min}, {range.max}]") diff --git a/src/rb-audio-transcription/rb_audio_transcription/main.py b/src/rb-audio-transcription/rb_audio_transcription/main.py index c58a4a8..6bf00f9 100644 --- a/src/rb-audio-transcription/rb_audio_transcription/main.py +++ b/src/rb-audio-transcription/rb_audio_transcription/main.py @@ -1,5 +1,5 @@ """audio transcribe plugin""" -import sys, os +import sys import json import logging from pathlib import Path @@ -8,7 +8,6 @@ from rb.api.models import ( BatchTextResponse, DirectoryInput, - FileInput, FloatParameterDescriptor, InputSchema, InputType, @@ -21,7 +20,6 @@ TextParameterDescriptor, TextResponse, ) -from rb.api.models import BatchFileInput from rb.api.models import API_APPMETDATA, API_ROUTES, PLUGIN_SCHEMA_SUFFIX from rb.api.utils import ( get_int_range_check_func_arg_parser, @@ -190,7 +188,7 @@ def alternate_params_parser(p: str) -> ParameterSchema: # this fucntion is not used , just an example try: params = string_to_dict(p) - logger.info(f"-----DEBUG parser ---") + logger.info("-----DEBUG parser ---") range_object = IntRangeDescriptor(min=params["c"], max=params["d"]) func = get_int_range_check_func_arg_parser(range_object) if func(params["e"]): @@ -221,7 +219,7 @@ def validate_inputs(inputs: DirInputs): files = [file for file in dirpath.iterdir() if file.is_file()] logger.debug(files) if len(files) < 1: - raise HTTPException(status_code=400, detail=f"no 'files_in given directory' for transcribe command") + raise HTTPException(status_code=400, detail="no 'files_in given directory' for transcribe command") logger.debug("------validate inputs done ---") ## this return object is now ready for use in transcribe function return inputs @@ -229,7 +227,7 @@ def validate_inputs(inputs: DirInputs): logger.error("validate bad inputs: %s", e) raise HTTPException(status_code=400, detail=f"Invalid path inputs for transcribe command: {e}") -@app.command(f'transcribe') +@app.command('transcribe') def transcribe( inputs: Annotated[ DirInputs, diff --git a/src/rb-audio-transcription/tests/test_main.py b/src/rb-audio-transcription/tests/test_main.py index 8786a96..8f9d845 100644 --- a/src/rb-audio-transcription/tests/test_main.py +++ b/src/rb-audio-transcription/tests/test_main.py @@ -24,19 +24,19 @@ def test_routes_command(): ''' call typer cli to get routes''' result = runner.invoke(cli_app, [API_ROUTES]) - assert result is not "" + assert result != "" assert result.exit_code == 0 def test_metadata_command(): result = runner.invoke(cli_app, [API_APPMETDATA]) - assert result is not "" + assert result != "" assert result.exit_code == 0 def test_schema_command(): result = runner.invoke(cli_app, [f"task{PLUGIN_SCHEMA_SUFFIX}"]) - assert result is not "" + assert result != "" assert result.exit_code == 0 def test_negative_test(): diff --git a/src/rb-doc-parser/rb_doc_parser/main.py b/src/rb-doc-parser/rb_doc_parser/main.py index bf92700..19739d9 100644 --- a/src/rb-doc-parser/rb_doc_parser/main.py +++ b/src/rb-doc-parser/rb_doc_parser/main.py @@ -1,5 +1,5 @@ import typer -from rb.lib.docs import DOCS_GITHUB_URL, download_reference_doc # type: ignore +from rb.lib.docs import BASE_WIKI_URL, download_all_wiki_pages # type: ignore from rb.lib.ollama import use_ollama # type: ignore from rb_doc_parser.chat import load_chat_config, stream_output @@ -11,8 +11,8 @@ def open() -> str: """ Open docs in the browser """ - typer.launch(DOCS_GITHUB_URL) - return DOCS_GITHUB_URL + typer.launch(BASE_WIKI_URL) + return BASE_WIKI_URL @use_ollama @@ -23,7 +23,7 @@ def ask( """ Ask a question against the docs """ - reference_doc = download_reference_doc() + reference_doc = download_all_wiki_pages() chat_config = load_chat_config() chat_config["prompt"]["system"] = chat_config["prompt"]["system"].format( reference_doc=reference_doc diff --git a/src/rb-lib/rb/lib/docs.py b/src/rb-lib/rb/lib/docs.py index 85bb730..fe0b52d 100644 --- a/src/rb-lib/rb/lib/docs.py +++ b/src/rb-lib/rb/lib/docs.py @@ -1,11 +1,77 @@ import requests +from bs4 import BeautifulSoup -DOCS_GITHUB_URL = "https://github.com/UMass-Rescue/RescueBox/wiki" -REFERENCE_RAW_MARKDOWN_URL = ( - "https://raw.githubusercontent.com/wiki/UMass-Rescue/RescueBox/Reference.md" -) +BASE_WIKI_URL = "https://github.com/UMass-Rescue/RescueBox/wiki" +def get_wiki_page_links(): + """ + Fetches all subpage links from the GitHub Wiki main page. + """ + try: + response = requests.get(BASE_WIKI_URL, timeout=10) + response.raise_for_status() -def download_reference_doc(): - response = requests.get(REFERENCE_RAW_MARKDOWN_URL) - return response.text + soup = BeautifulSoup(response.text, "html.parser") + + # Find all links to subpages (they contain '/RescueBox/wiki/' in href) + wiki_links = soup.find_all("a", href=True) + + # Extract the full URLs of each wiki page + page_urls = [ + BASE_WIKI_URL + "/" + link["href"].split("/")[-1] # Append page name to base URL + for link in wiki_links + if "/RescueBox/wiki/" in link["href"] + ] + + return page_urls + + except requests.RequestException as e: + print(f"Error fetching wiki page links: {e}") + return [] + +def download_wiki_page(url): + """ + Downloads and extracts markdown content from a given GitHub wiki page. + """ + try: + response = requests.get(url, timeout=10) + response.raise_for_status() + + soup = BeautifulSoup(response.text, "html.parser") + + # Extract markdown content (GitHub wikis use `markdown-body` class) + wiki_content_div = soup.find("div", class_="markdown-body") + + if not wiki_content_div: + print(f"Warning: No markdown content found on {url}") + return None + + return wiki_content_div.get_text().strip() + + except requests.RequestException as e: + print(f"Error downloading {url}: {e}") + return None + +def download_all_wiki_pages(): + """ + Fetches all wiki pages and extracts their markdown content. + """ + wiki_pages = get_wiki_page_links() + wiki_data = {} + + for page_url in wiki_pages: + page_name = page_url.split("/")[-1] # Extract the page name + print(f"Fetching: {page_name}") + markdown_text = download_wiki_page(page_url) + + if markdown_text: + wiki_data[page_name] = markdown_text + + return wiki_data + +# Example Usage +all_wiki_content = download_all_wiki_pages() + +# Print a preview of collected content +for page, content in all_wiki_content.items(): + print(f"\n=== {page} ===\n{content[:500]}...\n") # Show first 500 chars per page diff --git a/src/rb-lib/rb/lib/stdout.py b/src/rb-lib/rb/lib/stdout.py index ee5ab70..6da22fb 100644 --- a/src/rb-lib/rb/lib/stdout.py +++ b/src/rb-lib/rb/lib/stdout.py @@ -1,8 +1,6 @@ -import contextlib import io import sys from io import StringIO -from typing import Callable, Generator class Capturing(list): @@ -19,7 +17,6 @@ def __exit__(self, *args): def capture_stdout_as_generator(func, *args, **kwargs): import sys - import io old_stdout = sys.stdout sys.stdout = buffer = io.StringIO() diff --git a/src/rb-lib/rb/lib/tests/test_docs.py b/src/rb-lib/rb/lib/tests/test_docs.py index 45205f3..d863a4a 100644 --- a/src/rb-lib/rb/lib/tests/test_docs.py +++ b/src/rb-lib/rb/lib/tests/test_docs.py @@ -1,7 +1,14 @@ -from rb.lib.docs import download_reference_doc +from rb.lib.docs import download_all_wiki_pages -def test_download_reference_doc(): - reference_doc = download_reference_doc() - assert reference_doc is not None - assert "rescuebox" in reference_doc +def test_download_all_wiki_pages(): + wiki_content = download_all_wiki_pages() + + # Ensure that at least one wiki page is retrieved + assert wiki_content is not None, "Failed: No wiki content retrieved" + assert isinstance(wiki_content, dict), "Failed: Returned data is not a dictionary" + assert len(wiki_content) > 0, "Failed: No pages found in the wiki" + + # Check that at least one page contains expected keywords + found_valid_page = any("RescueBox" in content or "UMass" in content for content in wiki_content.values()) + assert found_valid_page, "Failed: No retrieved pages contain expected content"