From 4bea8e6085c595a4c98c395247b526a56b9cf585 Mon Sep 17 00:00:00 2001 From: Jonathan Cubides Date: Mon, 2 Dec 2024 17:01:36 +0100 Subject: [PATCH] v0.4.6 (#14) --- mkdocs_juvix/common/preprocesors/links.py | 36 ++-- mkdocs_juvix/env.py | 9 +- mkdocs_juvix/links.py | 75 ++++---- mkdocs_juvix/logger.py | 93 ++++----- mkdocs_juvix/main.py | 222 ++++++++++++++++------ mkdocs_juvix/snippets.py | 9 +- mkdocs_juvix/utils.py | 10 +- pyproject.toml | 2 +- 8 files changed, 273 insertions(+), 183 deletions(-) diff --git a/mkdocs_juvix/common/preprocesors/links.py b/mkdocs_juvix/common/preprocesors/links.py index 7a6081d..be37c27 100644 --- a/mkdocs_juvix/common/preprocesors/links.py +++ b/mkdocs_juvix/common/preprocesors/links.py @@ -5,7 +5,7 @@ from urllib.parse import urljoin import numpy as np # type: ignore -from colorama import Fore, Style +from colorama import Fore, Style # type: ignore from fuzzywuzzy import fuzz # type: ignore from markdown.preprocessors import Preprocessor # type: ignore from ncls import NCLS # type: ignore @@ -101,10 +101,9 @@ def process_wikilink(config, full_text, match, md_filepath) -> Optional[WikiLink ) link_page = link.page - # print white space with "X" - if len(config["url_for"].get(link_page, [])) > 1 and link_page in config["url_for"]: - possible_pages = config["url_for"][link_page] + if len(config.get("url_for", {}).get(link_page, [])) > 1 and link_page in config.get("url_for", {}): + possible_pages = config.get("url_for", {}).get(link_page, []) hint = link.hint if link.hint else "" token = hint + link_page coefficients = {p: fuzz.WRatio(fun_normalise(p), token) for p in possible_pages} @@ -121,8 +120,8 @@ def process_wikilink(config, full_text, match, md_filepath) -> Optional[WikiLink Our choice: {link.html_path}""" ) - elif link_page in config["url_for"]: - link.html_path = config["url_for"].get(link_page, [""])[0] + elif link_page in config.get("url_for", {}): + link.html_path = config.get("url_for", {}).get(link_page, [""])[0] log.debug(f"Single page found. html_path: {link.html_path}") else: log.debug("Link page not in config['url_for']") @@ -135,11 +134,11 @@ def process_wikilink(config, full_text, match, md_filepath) -> Optional[WikiLink # Update links_found TODO: move this to the model try: - url_page = config["url_for"][link_page][0] - if url_page in config["nodes"]: - actuallink = config["nodes"][url_page] + url_page = config.get("url_for", {}).get(link_page, [""])[0] + if url_page in config.get("nodes", {}): + actuallink = config.get("nodes", {}).get(url_page, {}) if actuallink: - pageName = actuallink["page"].get("names", [""])[0] + pageName = actuallink.get("page", {}).get("names", [""])[0] html_path: str = link.html_path if link.html_path else "" config.get("links_found", []).append( { @@ -192,7 +191,8 @@ def _run(self, content: str) -> str: # Find all code blocks, HTML comments, and script tags in a single pass ignore_blocks = re.compile( - r"((`{1,3})(?:[\s\S]*?)(\2)||)", + # r"((`{1,3})(?:[\s\S]*?)(\2)||)", + r"((`{1,3})(?:[\s\S]*?)(\2))", re.DOTALL, ) intervals = [] @@ -205,10 +205,12 @@ def _run(self, content: str) -> str: except Exception as e: log.error(f"Error occurred while processing ignore patterns: {str(e)}") return content - intervals_where_not_to_look = None - if intervals: - starts, ends, ids = map(np.array, zip(*intervals)) - intervals_where_not_to_look = NCLS(starts, ends, ids) + + # Review this for later improvements + # intervals_where_not_to_look = None + # if intervals: + # starts, ends, ids = map(np.array, zip(*intervals)) + # intervals_where_not_to_look = NCLS(starts, ends, ids) # Find all wikilinks str_wikilinks = list(WIKILINK_PATTERN.finditer(content)) @@ -218,9 +220,7 @@ def _run(self, content: str) -> str: start, end = m.start(), m.end() # TODO: review this - if intervals_where_not_to_look and not list( - intervals_where_not_to_look.find_overlap(start, end) - ): + if True: log.debug( f"{Fore.YELLOW}Processing wikilink: {m.group(0)}{Style.RESET_ALL}" ) diff --git a/mkdocs_juvix/env.py b/mkdocs_juvix/env.py index d531aef..80d5a0d 100644 --- a/mkdocs_juvix/env.py +++ b/mkdocs_juvix/env.py @@ -14,14 +14,13 @@ from colorama import Fore, Style # type: ignore from mkdocs.config.defaults import MkDocsConfig -from mkdocs.plugins import get_plugin_logger from semver import Version import mkdocs_juvix.utils as utils from mkdocs_juvix.juvix_version import MIN_JUVIX_VERSION from mkdocs_juvix.utils import is_juvix_markdown_file -log = get_plugin_logger(f"{Fore.BLUE}[juvix_mkdocs] (env) {Style.RESET_ALL}") +from mkdocs_juvix.logger import log BASE_PATH = Path(__file__).parent FIXTURES_PATH = BASE_PATH / "fixtures" @@ -182,6 +181,12 @@ def __init__(self, config: Optional[MkDocsConfig] = None): ) exit(1) + if not self.CACHE_ABSPATH.exists(): + log.info( + f"{Fore.YELLOW}Creating cache directory {self.CACHE_ABSPATH}{Style.RESET_ALL}" + ) + self.CACHE_ABSPATH.mkdir(parents=True, exist_ok=True) + if ( self.CACHE_ABSPATH.exists() and self.REMOVE_CACHE diff --git a/mkdocs_juvix/links.py b/mkdocs_juvix/links.py index b9694e3..3410948 100644 --- a/mkdocs_juvix/links.py +++ b/mkdocs_juvix/links.py @@ -4,13 +4,12 @@ import json import re -from concurrent.futures import ThreadPoolExecutor from os import getenv from pathlib import Path from typing import Dict, List, Optional from urllib.parse import urljoin -from colorama import Fore, Style # type: ignore +from tqdm import tqdm as sync_tqdm # type: ignore from markdown.extensions import Extension # type: ignore from mkdocs.config.defaults import MkDocsConfig from mkdocs.structure.files import File, Files @@ -21,7 +20,7 @@ from mkdocs_juvix.common.preprocesors.links import WLPreprocessor from mkdocs_juvix.common.utils import fix_site_url, get_page_title from mkdocs_juvix.env import ENV -from mkdocs_juvix.logger import log +from mkdocs_juvix.logger import clear_line, clear_screen, log files_relation: List[ResultEntry] = [] EXCLUDED_DIRS = [ @@ -57,7 +56,6 @@ def extendMarkdown(self, md): # noqa: N802 TOKEN_LIST_WIKILINKS: str = "" - class WikilinksPlugin: env: Optional[ENV] = None @@ -100,35 +98,40 @@ def on_pre_build(self, config: MkDocsConfig) -> None: config["wikilinks_issues"] = 0 config["nodes"] = {} node_index = 0 - - for _url, page in _extract_aliases_from_nav(config["nav"]): - url = urljoin(config["site_url"], _url) - - config["aliases_for"][url] = [page] - config["url_for"].setdefault(page, []) - config["url_for"][page].append(url) - - # Create a new entry if the URL is not already present in config["nodes"] - if url not in config["nodes"]: - config["nodes"][url] = { - "index": node_index, - "page": {"names": [], "path": _url.replace("./", "")}, - } - # Append the page to the "names" list - config["nodes"][url]["page"]["names"].append(page) - node_index += 1 + nav_items = list(_extract_aliases_from_nav(config["nav"])) + + with sync_tqdm(total=len(nav_items), desc="> processing nav items") as pbar: + for _url, page in nav_items: + url = urljoin(config["site_url"], _url) + config["aliases_for"][url] = [page] + config["url_for"].setdefault(page, []) + config["url_for"][page].append(url) + + # Create a new entry if the URL is not already present in config["nodes"] + if url not in config["nodes"]: + config["nodes"][url] = { + "index": node_index, + "page": {"names": [], "path": _url.replace("./", "")}, + } + # Append the page to the "names" list + config["nodes"][url]["page"]["names"].append(page) + node_index += 1 + pbar.update(1) + clear_line() if self.NODES_JSON.exists(): self.NODES_JSON.unlink() - - with open(self.NODES_JSON, "w") as f: - json.dump( + try: + with open(self.NODES_JSON, "w") as f: + json.dump( { "nodes": config.get("nodes", {}), }, f, indent=2, ) + except Exception as e: + log.error(f"Error writing nodes.json: {e}") config["current_page"] = None # current page being processed return @@ -161,18 +164,17 @@ def process_file(file: File) -> None: _title = _title.strip() _title = re.sub(r'^[\'"`]|["\'`]$', "", _title) - if _title not in config["url_for"]: - url = urljoin(config["site_url"], file.url) + if _title not in config.get("url_for", {}): + url = urljoin(config.get("site_url", ""), file.url) config["url_for"][_title] = [url] config["aliases_for"][url] = [_title] - - with ThreadPoolExecutor() as executor: - list( - executor.map( - process_file, filter(lambda f: f.is_documentation_page(), files) - ) - ) - executor.shutdown(wait=True) + clear_screen() + with sync_tqdm(total=len(files), desc="> processing files") as pbar: + for file in files: + if file.is_documentation_page(): + process_file(file) + pbar.update(1) + clear_line() if self.LINKS_JSON.exists(): self.LINKS_JSON.unlink() @@ -201,16 +203,20 @@ def on_page_content( frontmatter has the `list_wikilinks` flag set to true. """ if "current_page" not in config or "nodes" not in config: + log.debug("No current_page or nodes in config") return html current_page = config["current_page"] url = current_page.canonical_url.replace(".html", ".md") if url not in config["nodes"]: + log.debug(f"URL {url} not found in nodes. It's probably ignored because it's not in the mkdocs.yml file.") return html if url not in config["nodes"] or "index" not in config["nodes"][url]: + log.debug(f"URL {url} not found in nodes or no index for URL") return html links_number: List[Dict[str, int]] = config.get("links_number", []) if len(links_number) > 0: + log.debug(f"Processing {len(links_number)} links for {url}") actualindex = config["nodes"][url]["index"] result_entry = ResultEntry( file=current_page.url, @@ -222,6 +228,7 @@ def on_page_content( files_relation.append(result_entry) if page.meta.get("list_wikilinks", False): + log.debug(f"Generating wikilinks list for {url}") # Creat a bullet list of links wrapped_links = "
Relevant internal links on this page