From 9c69ef672dadbb1e5dac0c76da9623510bad270d Mon Sep 17 00:00:00 2001 From: Jonathan Cubides Date: Tue, 22 Oct 2024 00:05:51 -0500 Subject: [PATCH] Speed up SVG generation and images link replacement, plus other optimizations --- mkdocs_juvix/common/models/wikilink.py | 4 +- mkdocs_juvix/common/preprocesors/links.py | 32 ++- mkdocs_juvix/common/utils.py | 7 +- mkdocs_juvix/env.py | 47 ++-- mkdocs_juvix/images.py | 256 +++++++++++++--------- mkdocs_juvix/juvix_markdown.py | 91 +++++--- mkdocs_juvix/links.py | 31 +-- mkdocs_juvix/snippets.py | 28 ++- mkdocs_juvix/utils.py | 67 +++++- poetry.lock | 4 +- pyproject.toml | 3 +- 11 files changed, 359 insertions(+), 211 deletions(-) diff --git a/mkdocs_juvix/common/models/wikilink.py b/mkdocs_juvix/common/models/wikilink.py index 810bf1b..0474ae6 100644 --- a/mkdocs_juvix/common/models/wikilink.py +++ b/mkdocs_juvix/common/models/wikilink.py @@ -1,7 +1,9 @@ from typing import Optional -from .loc import FileLoc from mkdocs.plugins import get_plugin_logger + +from .loc import FileLoc + log = get_plugin_logger("\033[94m[wikilinks]\033[0m") diff --git a/mkdocs_juvix/common/preprocesors/links.py b/mkdocs_juvix/common/preprocesors/links.py index 24eb33a..da45c05 100644 --- a/mkdocs_juvix/common/preprocesors/links.py +++ b/mkdocs_juvix/common/preprocesors/links.py @@ -1,16 +1,17 @@ import os import re -from pathlib import Path import time +from pathlib import Path from typing import Any, Dict, List, Optional from urllib.parse import urljoin -from mkdocs.plugins import get_plugin_logger +import numpy as np # type: ignore +from colorama import Fore, Style # type: ignore from fuzzywuzzy import fuzz # type: ignore from markdown.preprocessors import Preprocessor # type: ignore +from mkdocs.plugins import get_plugin_logger from mkdocs.structure.pages import Page -from ncls import NCLS # type: ignore -import numpy as np # type: ignore +from ncls import NCLS # type: ignore from mkdocs_juvix.common.models import FileLoc, WikiLink from mkdocs_juvix.env import ENV @@ -27,7 +28,8 @@ re.VERBOSE, ) -log = get_plugin_logger("\033[94m[wikilinks_preprocessor]\033[0m") + +log = get_plugin_logger(f"{Fore.BLUE}[juvix_mkdocs-wikilinks]{Style.RESET_ALL}") REPORT_BROKEN_WIKILINKS = bool(os.environ.get("REPORT_BROKEN_WIKILINKS", False)) @@ -94,7 +96,7 @@ def run(self, lines) -> List[str]: cache_filepath and cache_filepath.exists() and original_filepath - and not self.env.new_or_changed_or_no_exist(original_filepath) + and not self.env.new_or_changed_or_not_exists(original_filepath) ): return cache_filepath.read_text().split("\n") @@ -102,20 +104,24 @@ def run(self, lines) -> List[str]: time_start = time.time() lines = self.snippet_preprocessor.run(lines) time_end = time.time() - log.info(f"Snippet finished in {(time_end - time_start):.5f} seconds") + log.info( + f"Snippet finished in {Fore.GREEN}{(time_end - time_start):.5f}{Style.RESET_ALL} seconds" + ) - log.info(f"Processing wikilinks on file {filepath}") + log.info( + f"Processing wikilinks on file {Fore.GREEN}{filepath}{Style.RESET_ALL}" + ) # Combine all lines into a single string full_text = "\n".join(lines) # Find all code blocks, HTML comments, and script tags in a single pass - pattern = re.compile( + ignore_blocks = re.compile( r"(```(?:[\s\S]*?)```||)", re.DOTALL ) intervals = [] time_start = time.time() try: - for match in pattern.finditer(full_text): + for match in ignore_blocks.finditer(full_text): intervals.append((match.start(), match.end(), 1)) except TimeoutError: log.error("Timeout occurred while processing ignore patterns") @@ -150,8 +156,10 @@ def run(self, lines) -> List[str]: for start, end, new_text in reversed(replacements): full_text = full_text[:start] + new_text + full_text[end:] time_end = time.time() - - log.info(f"Processing wikilinks took {(time_end - time_start):.5f} seconds") + + log.info( + f"Processing wikilinks took {Fore.GREEN}{(time_end - time_start):.5f}{Style.RESET_ALL} seconds" + ) if cache_filepath: log.debug(f"Writing wikilinks to cache for file {original_filepath}") diff --git a/mkdocs_juvix/common/utils.py b/mkdocs_juvix/common/utils.py index a0b92fb..0764b26 100644 --- a/mkdocs_juvix/common/utils.py +++ b/mkdocs_juvix/common/utils.py @@ -18,12 +18,15 @@ def fix_site_url(config: MkDocsConfig) -> MkDocsConfig: config["site_url"] += "/" return config - log.info("SITE_URL environment variable not set") + log.debug("SITE_URL environment variable not set") version = os.environ.get("MIKE_DOCS_VERSION") if version: - log.info(f"Using MIKE_DOCS_VERSION environment variable: {version}") + log.debug(f"Using MIKE_DOCS_VERSION environment variable: {version}") + + if "site_url" not in config or not config["site_url"]: + config["site_url"] = "" if not config["site_url"].endswith("/"): config["site_url"] += "/" diff --git a/mkdocs_juvix/env.py b/mkdocs_juvix/env.py index 28b8f4f..4050966 100644 --- a/mkdocs_juvix/env.py +++ b/mkdocs_juvix/env.py @@ -4,14 +4,15 @@ Juvix settings. """ -from functools import lru_cache import os import shutil import subprocess +from functools import lru_cache from os import getenv from pathlib import Path from typing import List, Optional, Tuple +from colorama import Fore, Style # type: ignore from mkdocs.config.defaults import MkDocsConfig from mkdocs.plugins import get_plugin_logger from semver import Version @@ -19,11 +20,12 @@ from mkdocs_juvix.juvix_version import MIN_JUVIX_VERSION from mkdocs_juvix.utils import compute_hash_filepath, hash_file -log = get_plugin_logger("ENV") +log = get_plugin_logger(f"{Fore.BLUE}[juvix_mkdocs-env]{Style.RESET_ALL}") BASE_PATH = Path(__file__).parent FIXTURES_PATH = BASE_PATH / "fixtures" + class ENV: ROOT_PATH: Path DOCS_DIRNAME: str = getenv("DOCS_DIRNAME", "docs") @@ -94,7 +96,6 @@ class ENV: "CACHE_WIKILINKS_DIRNAME", ".wikilinks" ) # The name of the file where the Juvix Markdown files are stored DOCS_IMAGES_DIRNAME: str = getenv("DOCS_IMAGES_DIRNAME", "images") - CACHE_IMAGES_DIRNAME: str = getenv("CACHE_IMAGES_DIRNAME", ".images") CACHE_JUVIX_VERSION_FILENAME: str = getenv( "CACHE_JUVIX_VERSION_FILENAME", ".juvix_version" ) # The name of the file where the Juvix version is stored @@ -120,7 +121,6 @@ class ENV: SHOW_TODOS_IN_MD: bool INDEXES_PATH: Path IMAGES_PATH: Path - CACHE_IMAGES_PATH: Path def __init__(self, config: Optional[MkDocsConfig] = None): if config: @@ -196,9 +196,6 @@ def __init__(self, config: Optional[MkDocsConfig] = None): self.CACHE_HASHES_PATH: Path = ( self.CACHE_ABSPATH / self.CACHE_HASHES_DIRNAME ) # The path where hashes are stored (not the project hash) - self.CACHE_IMAGES_PATH: Path = ( - self.CACHE_ABSPATH / self.CACHE_IMAGES_DIRNAME - ) # The path to the images cache directory self.JUVIX_FOOTER_CSS_FILEPATH: Path = ( self.DOCS_ABSPATH / "assets" / "css" / self.JUVIX_FOOTER_CSS_FILENAME @@ -224,12 +221,19 @@ def __init__(self, config: Optional[MkDocsConfig] = None): self.CACHE_HASHES_PATH, self.JUVIX_FOOTER_CSS_FILEPATH.parent, self.CACHE_WIKILINKS_PATH, - self.CACHE_IMAGES_PATH, ] for directory in directories: - if directory.exists() and self.REMOVE_CACHE: + if ( + directory.exists() + and self.REMOVE_CACHE + and config + and not config.get("env_init", False) + ): try: + log.info( + f"Removing directory {Fore.RED}{directory}{Style.RESET_ALL}" + ) shutil.rmtree(directory, ignore_errors=True) except Exception as e: log.error( @@ -281,11 +285,13 @@ def __init__(self, config: Optional[MkDocsConfig] = None): self.JUVIX_ENABLED = False self.JUVIX_AVAILABLE = False return - + self.USE_DOT = bool(getenv("USE_DOT", True)) self.DOT_BIN = getenv("DOT_BIN", "dot") self.DOT_FLAGS = getenv("DOT_FLAGS", "-Tsvg") self.IMAGES_ENABLED = bool(getenv("IMAGES_ENABLED", True)) + if config: + config["env_init"] = True @lru_cache(maxsize=128) def read_markdown_file_from_cache(self, filepath: Path) -> Optional[str]: @@ -298,19 +304,17 @@ def read_wikilinks_file_from_cache(self, filepath: Path) -> Optional[str]: if cache_ABSpath := self.get_filepath_for_wikilinks_in_cache(filepath): return cache_ABSpath.read_text() return None - + def write_wikilinks_file_to_cache(self, filepath: Path, content: str) -> None: if cache_ABSpath := self.get_filepath_for_wikilinks_in_cache(filepath): cache_ABSpath.write_text(content) - + def get_filepath_for_wikilinks_in_cache(self, filepath: Path) -> Optional[Path]: filepath = filepath.absolute() rel_to_docs = filepath.relative_to(self.DOCS_ABSPATH) - return ( - self.CACHE_WIKILINKS_PATH / rel_to_docs.parent / filepath.name - ) + return self.CACHE_WIKILINKS_PATH / rel_to_docs.parent / filepath.name - def new_or_changed_or_no_exist(self, filepath: Path) -> bool: + def new_or_changed_or_not_exists(self, filepath: Path) -> bool: content_hash = hash_file(filepath) path_hash = compute_hash_filepath(filepath, hash_dir=self.CACHE_HASHES_PATH) if not path_hash.exists(): @@ -318,7 +322,7 @@ def new_or_changed_or_no_exist(self, filepath: Path) -> bool: return True fresh_content_hash = path_hash.read_text() return content_hash != fresh_content_hash - + @lru_cache(maxsize=128) def get_filepath_for_juvix_markdown_in_cache( self, _filepath: Path @@ -326,9 +330,7 @@ def get_filepath_for_juvix_markdown_in_cache( filepath = _filepath.absolute() md_filename = filepath.name.replace(".juvix.md", ".md") rel_to_docs = filepath.relative_to(self.DOCS_ABSPATH) - return ( - self.CACHE_MARKDOWN_JUVIX_OUTPUT_PATH / rel_to_docs.parent / md_filename - ) + return self.CACHE_MARKDOWN_JUVIX_OUTPUT_PATH / rel_to_docs.parent / md_filename def unqualified_module_name(self, filepath: Path) -> Optional[str]: fposix: str = filepath.as_posix() @@ -371,9 +373,7 @@ def get_filename_module_by_extension( return module_name + extension if module_name else None def update_hash_file(self, filepath: Path) -> Optional[Tuple[Path, str]]: # noqa: F821 - filepath_hash = compute_hash_filepath( - filepath, hash_dir=self.CACHE_HASHES_PATH - ) + filepath_hash = compute_hash_filepath(filepath, hash_dir=self.CACHE_HASHES_PATH) try: with open(filepath_hash, "w") as f: content_hash = hash_file(filepath) @@ -398,4 +398,3 @@ def get_expected_filepath_for_juvix_isabelle_output_in_cache( / cache_isabelle_filename ) return cache_isabelle_filepath - \ No newline at end of file diff --git a/mkdocs_juvix/images.py b/mkdocs_juvix/images.py index 1c36b49..22dc796 100644 --- a/mkdocs_juvix/images.py +++ b/mkdocs_juvix/images.py @@ -1,23 +1,25 @@ -import logging -import os import re import shutil import subprocess +import time +from concurrent.futures import ThreadPoolExecutor from pathlib import Path from typing import Optional -from urllib.parse import urljoin -from mkdocs.plugins import BasePlugin, get_plugin_logger -from common.models.loc import FileLoc # type: ignore -from common.utils import fix_site_url # type:ignore + +import numpy as np +from colorama import Fore, Style # type: ignore from markdown.extensions import Extension # type: ignore from markdown.preprocessors import Preprocessor # type: ignore from mkdocs.config.defaults import MkDocsConfig # type: ignore +from mkdocs.plugins import BasePlugin, get_plugin_logger from mkdocs.structure.files import Files # type: ignore from mkdocs.structure.pages import Page -from mkdocs_juvix.env import ENV # type: ignore +from ncls import NCLS # type: ignore -log = get_plugin_logger("\033[94m[images]\033[0m") +from mkdocs_juvix.common.utils import fix_site_url # type:ignore +from mkdocs_juvix.env import ENV # type: ignore +log = get_plugin_logger(f"{Fore.BLUE}[juvix_mkdocs-images]{Style.RESET_ALL}") IMAGES_PATTERN = re.compile( r""" @@ -28,6 +30,14 @@ re.VERBOSE, ) +HTML_IMG_PATTERN = re.compile( + r""" +[^\)]+)("|') +""", + re.VERBOSE, +) + + class ImgExtension(Extension): config: MkDocsConfig env: ENV @@ -46,7 +56,8 @@ def extendMarkdown(self, md): # noqa: N802 self.md = md md.registerExtension(self) self.imgpp = ImgPreprocessor(self.config, self.env) - md.preprocessors.register(self.imgpp, "img-pp", 110) + md.preprocessors.register(self.imgpp, "img-pp", 90) + class ImgPreprocessor(Preprocessor): config: MkDocsConfig @@ -60,6 +71,8 @@ def __init__(self, config, env: Optional[ENV] = None): self.env = env def run(self, lines): + full_text = "".join(lines) + config = self.config current_page_url = None @@ -73,81 +86,83 @@ def run(self, lines): log.error("Current page URL not found. Images will not be processed.") return lines - in_html_comment = False - in_div = False - - for i, line in enumerate(lines.copy()): - if "" in line: - in_html_comment = False - if "" in line: - in_div = False - if in_html_comment or in_div: - continue - - matches = IMAGES_PATTERN.finditer(line) - - for match in matches: - _url = match.group("url") - url = Path(_url) - if url.as_posix().startswith("http"): - continue - - loc = FileLoc(current_page_url, i + 1, match.start() + 2) - - image_fname = url.name - img_cache = self.env.CACHE_IMAGES_PATH / image_fname - - if image_fname.endswith(".dot.svg") and self.env.USE_DOT: - dot_file = image_fname.replace(".dot.svg", ".dot") - dot_location = self.env.IMAGES_PATH / dot_file - log.debug(f"{loc}\nGenerating SVG from DOT file: {dot_location}") - - if not dot_location.exists(): - log.info(f"{dot_location} not found. Skipping SVG generation.") - continue - - cmd = f"{self.env.DOT_BIN} {self.env.DOT_FLAGS} {dot_location.as_posix()} -o {img_cache.absolute().as_posix()}" - - log.debug(f"Running command: {cmd}") - - output = subprocess.run(cmd, shell=True, check=True) - - if output.returncode != 0: - log.error(f"Error running graphviz: {output}") - - if not img_cache.exists(): - config["images_issues"] += 1 - log.error( - f"{loc}\n [!] Image not found. Expected location:\n==> {img_cache}" - ) - - img_expected_location = self.env.IMAGES_PATH / image_fname - - new_url = urljoin( - config["site_url"], - img_expected_location.relative_to(self.env.DOCS_ABSPATH).as_posix(), - ) + ignore_blocks = re.compile( + r"(```(?:[\s\S]*?)```||
[\s\S]*?
)", re.DOTALL + ) + intervals = [] + try: + for match in ignore_blocks.finditer(full_text): + intervals.append((match.start(), match.end(), 1)) + except Exception as e: + log.error(f"Error occurred while processing ignore patterns: {e}") + return lines - lines[i] = lines[i].replace(_url, new_url) + ignore_tree = None + if intervals: + starts, ends, ids = map(np.array, zip(*intervals)) + ignore_tree = NCLS(starts, ends, ids) - log.debug( - f"{loc}\n[!] Image URL: {_url}\nwas replaced by the following URL:\n ==> {new_url}" + def img_markdown_link(match: re.Match, img_expected_location: Path) -> str: + if match.group("caption"): + return ( + f"![{match.group('caption')}]({img_expected_location.as_posix()})" ) - return lines - - -class ImagePlugin(BasePlugin): - config: MkDocsConfig + else: + return img_expected_location.as_posix() + + full_text = "".join(lines) + + time_start = time.time() + + def process_matches(pattern, process_func): + matches = list(pattern.finditer(full_text)) + if matches: + replacements = [] + for match in matches: + start, end = match.start(), match.end() + if ignore_tree and not list(ignore_tree.find_overlap(start, end)): + url = Path(match.group("url")) + if url.as_posix().startswith("http"): + continue + image_fname = url.name + img_expected_location = self.env.IMAGES_PATH / image_fname + new_url = process_func(match, img_expected_location) + replacements.append((start, end, new_url)) + return replacements + return [] + + replacements = process_matches( + IMAGES_PATTERN, + lambda match, img_expected_location: img_markdown_link( + match, img_expected_location + ), + ) + + for start, end, new_url in reversed(replacements): + full_text = full_text[:start] + new_url + full_text[end:] + + replacements = process_matches( + HTML_IMG_PATTERN, + lambda _, + img_expected_location: f'', + ) + for start, end, new_url in reversed(replacements): + full_text = full_text[:start] + new_url + full_text[end:] + + time_end = time.time() + log.debug( + f"Path image resolution took {time_end - time_start:.5f} seconds for {current_page_url}" + ) + + return full_text.split("\n") + + +class ImagesPlugin(BasePlugin): env: ENV def on_config(self, config: MkDocsConfig) -> MkDocsConfig: config = fix_site_url(config) - if self.env is None: - self.env = ENV(config) + self.env = ENV(config) if not shutil.which(self.env.DOT_BIN): log.warning( @@ -155,36 +170,77 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig: ) self.env.USE_DOT = False + dot_files = list(self.env.IMAGES_PATH.glob("*.dot")) + + def process_dot_file(dot_file: Path): + try: + cond = self.env.new_or_changed_or_not_exists(dot_file) + svg_file = dot_file.with_suffix(".dot.svg") + if cond: + self._generate_dot_svg(dot_file) + if svg_file.exists(): + log.info(f"Generated SVG: {svg_file}") + self.env.update_hash_file(dot_file) + return svg_file + except Exception as e: + log.error(f"Error generating SVG for {dot_file}: {e}") + return None + + if dot_files: + log.info( + f"Generating {Fore.GREEN}{len(dot_files)}{Style.RESET_ALL} SVG images" + ) + for dot_file in dot_files: + process_dot_file(dot_file) + + with ThreadPoolExecutor() as executor: + results = list(executor.map(process_dot_file, dot_files)) + executor.shutdown(wait=True) + + for result in results: + if result is None: + log.error("Failed to generate SVG for one of the DOT files") + exit(1) + imgext_instance = ImgExtension(config=config, env=self.env) config.markdown_extensions.append(imgext_instance) # type: ignore config["images"] = {} # page: [image] config.setdefault("current_page", None) # current page being processed - config["images_issues"] = 0 return config - - def on_page_markdown(self, - markdown, page: Page, config: MkDocsConfig, files: Files + def _generate_dot_svg(self, dot_file: Path) -> Optional[Path]: + svg_file = dot_file.with_suffix(".dot.svg") + + if not svg_file.exists(): + self.env.IMAGES_PATH.mkdir(parents=True, exist_ok=True) + + dot_cmd = [ + self.env.DOT_BIN, + self.env.DOT_FLAGS, + dot_file.absolute().as_posix(), + "-o", + svg_file.absolute().as_posix(), + ] + + try: + time_start = time.time() + log.info(f"Generating SVG for {Fore.GREEN}{dot_file}{Style.RESET_ALL}") + output = subprocess.run(dot_cmd) + time_end = time.time() + log.info( + f"Generation took {Fore.GREEN}{time_end - time_start:.5f}{Style.RESET_ALL} seconds" + ) + if output.returncode != 0: + log.error(f"Error running graphviz: {output}") + return None + return dot_file + except Exception as e: + log.error(f"Error running graphviz: {e}") + return None + + def on_page_markdown( + self, markdown, page: Page, config: MkDocsConfig, files: Files ) -> str: config["current_page"] = page # needed for the preprocessor return markdown - - def on_post_build(self, config: MkDocsConfig) -> None: - if config["images_issues"] > 0: - log.error( - f"\n[!] {config['images_issues']} image(s) not found. Please check the logs for more details." - ) - else: - images_dir = self.env.IMAGES_PATH - if not images_dir.exists(): - log.error(f"Expected images directory {images_dir} not found.") - images_dir.mkdir(parents=True, exist_ok=True) - try: - path_images = self.env.CACHE_IMAGES_PATH - if not path_images.exists(): - log.error(f"Expected images cache directory {path_images} not found.") - return - shutil.copytree(path_images, images_dir, dirs_exist_ok=True) - except Exception as e: - log.error(f"Error copying images to site directory: {e}") diff --git a/mkdocs_juvix/juvix_markdown.py b/mkdocs_juvix/juvix_markdown.py index 1349140..2f00a3f 100644 --- a/mkdocs_juvix/juvix_markdown.py +++ b/mkdocs_juvix/juvix_markdown.py @@ -1,17 +1,18 @@ import json import shutil import subprocess +import time +from concurrent.futures import ThreadPoolExecutor, wait from functools import wraps from os import getenv from pathlib import Path -import time from typing import Any, Callable, Dict, List, Optional from urllib.parse import urljoin -from concurrent.futures import ThreadPoolExecutor import pathspec import yaml # type:ignore from bs4 import BeautifulSoup # type:ignore +from colorama import Fore, Style # type: ignore from dotenv import load_dotenv from mkdocs.config.defaults import MkDocsConfig from mkdocs.plugins import BasePlugin, get_plugin_logger @@ -22,14 +23,11 @@ from mkdocs_juvix.env import ENV, FIXTURES_PATH from mkdocs_juvix.snippets import RE_SNIPPET_SECTION -from mkdocs_juvix.utils import ( - compute_sha_over_folder, - fix_site_url, -) +from mkdocs_juvix.utils import compute_sha_over_folder, fix_site_url load_dotenv() -log = get_plugin_logger("\033[94m[juvix_mkdocs]\033[0m") +log = get_plugin_logger(f"{Fore.BLUE}[juvix_mkdocs-to-markdown]{Style.RESET_ALL}") _pipeline: str = """ For reference, the Mkdocs Pipeline is the following: ├── on_startup(command, dirty) @@ -135,13 +133,16 @@ def on_pre_build(self, config: MkDocsConfig) -> None: [self.env.JUVIX_BIN, "dependencies", "update"], capture_output=True ) time_end = time.time() - log.info(f"\033[92mUpdated Juvix dependencies in {time_end - time_start} seconds\033[0m") + log.info( + f"Updated Juvix dependencies in {Fore.GREEN}{time_end - time_start:.2f}{Style.RESET_ALL} seconds" + ) self.env.FIRST_RUN = False except Exception as e: log.error(f"A problem occurred while updating Juvix dependencies: {e}") return time_start = time.time() + def process_file(_file: Path) -> None: filepath: Path = _file.absolute() relative_to: Path = filepath.relative_to(self.env.DOCS_ABSPATH) @@ -159,14 +160,29 @@ def process_file(_file: Path) -> None: self._generate_output_files_for_juvix_markdown(filepath) juvix_md_files = list(self.env.DOCS_ABSPATH.rglob("*.juvix.md")) + log.info( + f"==== Processing {Fore.GREEN}{len(juvix_md_files)}{Style.RESET_ALL} Juvix Markdown files ====" + ) + with ThreadPoolExecutor() as executor: - results = list(executor.map(process_file, juvix_md_files)) - executor.shutdown(wait=True) - while len(results) != len(juvix_md_files): - log.error("Not all Juvix Markdown files were processed.") + futures = [executor.submit(process_file, file) for file in juvix_md_files] + + # Wait for all futures to complete + wait(futures) + for juvix_md_file in juvix_md_files: + if not juvix_md_file.exists(): + log.error( + f"Juvix Markdown file not found: {Fore.RED}{juvix_md_file}{Style.RESET_ALL}" + ) + exit(1) + log.info("Finished processing all Juvix Markdown files") + + # exit(1) time_end = time.time() - log.info(f"\033[92mGenerated Markdown for {len(self.juvix_md_files)} Juvix Markdown files in {time_end - time_start} seconds\033[0m") + log.info( + f"Generated Markdown for {Fore.GREEN}{len(self.juvix_md_files)}{Style.RESET_ALL} Juvix Markdown files in {Fore.GREEN}{time_end - time_start:.2f}{Style.RESET_ALL} seconds" + ) self.juvix_md_files.sort(key=lambda x: x["qualified_module_name"]) juvix_modules = self.env.CACHE_ABSPATH.joinpath("juvix_modules.json") juvix_modules.write_text(json.dumps(self.juvix_md_files, indent=4)) @@ -181,13 +197,16 @@ def process_file(_file: Path) -> None: self.env.CACHE_ORIGINAL_JUVIX_MARKDOWN_FILES_ABSPATH ) equal_hashes = current_sha == sha_filecontent - - log.info("\033[95mComputed Hash for Juvix Markdown files: %s\033[0m", current_sha) + log.info( + f"Computed Hash for Juvix Markdown files: {Fore.MAGENTA}{current_sha}{Style.RESET_ALL}" + ) if not equal_hashes: - log.info("\033[95mThe hashes are different (previous: %s)\033[0m", sha_filecontent) + log.info( + f"The hashes are different! (previous: {Fore.MAGENTA}{sha_filecontent}{Style.RESET_ALL})" + ) else: - log.info("\033[93mThe Juvix Markdown content has not changed.\033[0m") + log.info("The Juvix Markdown content has not changed.") generate: bool = ( self.env.JUVIX_ENABLED @@ -202,7 +221,9 @@ def process_file(_file: Path) -> None: ) if not generate: - log.info("\033[92mSkipping Juvix HTML generation for Juvix files.\033[0m") + log.info( + f"{Fore.GREEN}Skipping Juvix HTML generation for Juvix files.{Style.RESET_ALL}" + ) else: log.debug( "Generating auxiliary HTML for Juvix files. This may take a while... It's only generated once per session." @@ -273,7 +294,9 @@ def on_page_markdown( return markdown filepath = Path(src_path) isabelle_path = ( - self.env.get_expected_filepath_for_juvix_isabelle_output_in_cache(filepath) + self.env.get_expected_filepath_for_juvix_isabelle_output_in_cache( + filepath + ) ) if isabelle_path and not isabelle_path.exists(): log.error( @@ -394,7 +417,6 @@ def _move_html_cache_to_site_dir(self, filepath: Path, site_dir: Path) -> None: shutil.copytree(self.env.CACHE_HTML_PATH, dest_folder, dirs_exist_ok=True) return - def _generate_html(self, generate: bool = True, move_cache: bool = True) -> None: everythingJuvix = self.env.DOCS_ABSPATH.joinpath("everything.juvix.md") if not everythingJuvix.exists(): @@ -435,7 +457,9 @@ def process_file(filepath_info: dict) -> None: executor.map(process_file, files_to_process) executor.shutdown(wait=True) time_end = time.time() - log.info(f"\033[92mGenerated HTML for {len(files_to_process)} files in {time_end - time_start} seconds\033[0m") + log.info( + f"\033[92mGenerated HTML for {len(files_to_process)} files in {time_end - time_start} seconds\033[0m" + ) return @@ -495,7 +519,6 @@ def _generate_html_per_file( except Exception as e: log.error(f"Error copying folder: {e}") - def _generate_isabelle_html(self, filepath: Path) -> Optional[str]: if not filepath.as_posix().endswith(".juvix.md"): return None @@ -508,7 +531,7 @@ def _generate_isabelle_html(self, filepath: Path) -> Optional[str]: isabelle_filepath is not None and isabelle_filepath.exists() ) - if not cache_available or self.env.new_or_changed_or_no_exist(filepath): + if not cache_available or self.env.new_or_changed_or_not_exists(filepath): log.info(f"No Isabelle file in cache for {filepath}") return self._run_juvix_isabelle(filepath) @@ -524,11 +547,13 @@ def _generate_output_files_for_juvix_markdown( if not filepath.as_posix().endswith(".juvix.md"): return None - new_or_changed = self.env.new_or_changed_or_no_exist(filepath) + new_or_changed = self.env.new_or_changed_or_not_exists(filepath) if not new_or_changed: log.debug(f"Reading cached file for: {filepath}") - if cache_filepath := self.env.get_filepath_for_juvix_markdown_in_cache(filepath): + if cache_filepath := self.env.get_filepath_for_juvix_markdown_in_cache( + filepath + ): return cache_filepath.read_text() markdown_output = self._run_juvix_markdown(filepath) try: @@ -571,8 +596,6 @@ def _generate_output_files_for_juvix_markdown( return markdown_output - - def _run_juvix_isabelle(self, _filepath: Path) -> Optional[str]: filepath: Path = _filepath.absolute() fposix: str = filepath.as_posix() @@ -685,7 +708,9 @@ def _run_juvix_markdown(self, _filepath: Path) -> Optional[str]: "--no-colors", ] try: - log.info(f"Generating Markdown for '{filepath.name}'") + log.info( + f"Generating Markdown for {Fore.GREEN}'{filepath.relative_to(self.env.DOCS_ABSPATH)}'{Style.RESET_ALL}" + ) result_markdown = subprocess.run( juvix_markdown_cmd, cwd=self.env.DOCS_ABSPATH, capture_output=True ) @@ -705,9 +730,12 @@ def _run_juvix_markdown(self, _filepath: Path) -> Optional[str]: log.error(f"Error running Juvix on file: {fposix} -\n {e}") return None - cache_markdown_filename: Optional[str] = self.env.get_filename_module_by_extension( - filepath, extension=".md" + md_output: str = result_markdown.stdout.decode("utf-8") + + cache_markdown_filename: Optional[str] = ( + self.env.get_filename_module_by_extension(filepath, extension=".md") ) + if cache_markdown_filename is None: log.debug(f"Could not determine the markdown file name for: {fposix}") return None @@ -719,12 +747,12 @@ def _run_juvix_markdown(self, _filepath: Path) -> Optional[str]: ) cache_markdown_filepath.parent.mkdir(parents=True, exist_ok=True) - md_output: str = result_markdown.stdout.decode("utf-8") try: cache_markdown_filepath.write_text(md_output) except Exception as e: log.error(f"Error writing to cache markdown file: {e}") return md_output + self._update_markdown_file_as_in_docs(filepath) self.env.update_hash_file(filepath) return md_output @@ -740,7 +768,6 @@ def _update_markdown_file_as_in_docs(self, filepath: Path) -> None: except Exception as e: log.error(f"Error copying file: {e}") - def _generate_code_block_footer_css_file( self, css_file: Path, compiler_version: Optional[str] = None ) -> Optional[Path]: diff --git a/mkdocs_juvix/links.py b/mkdocs_juvix/links.py index 6a0debf..4bc30d5 100644 --- a/mkdocs_juvix/links.py +++ b/mkdocs_juvix/links.py @@ -2,17 +2,16 @@ Support for wiki-style links in MkDocs in tandem of pydownx_snippets. """ -from concurrent.futures import ThreadPoolExecutor -from functools import lru_cache import json import re +from concurrent.futures import ThreadPoolExecutor from os import getenv from pathlib import Path from typing import Dict, List, Optional from urllib.parse import urljoin -import time import mkdocs.plugins +from colorama import Fore, Style # type: ignore from markdown.extensions import Extension # type: ignore from mkdocs.config.defaults import MkDocsConfig from mkdocs.plugins import BasePlugin, get_plugin_logger @@ -22,29 +21,24 @@ from mkdocs_juvix.common.models.entry import ResultEntry from mkdocs_juvix.common.preprocesors.links import WLPreprocessor -from mkdocs_juvix.common.utils import ( - fix_site_url, - get_page_title, -) +from mkdocs_juvix.common.utils import fix_site_url, get_page_title from mkdocs_juvix.env import ENV from mkdocs_juvix.snippets import ( DEFAULT_URL_SIZE, DEFAULT_URL_TIMEOUT, SnippetPreprocessor, ) +from mkdocs_juvix.utils import get_filtered_subdirs # type: ignore -log = get_plugin_logger("\033[94m[wikilinks]\033[0m") +log = get_plugin_logger(f"{Fore.BLUE}[juvix_mkdocs-links]{Style.RESET_ALL}") files_relation: List[ResultEntry] = [] EXCLUDED_DIRS = [ - ".", - "__", - "site", - "env", - "venv", - ".hooks", - ".env", + ".git", ".juvix_build", + ".vscode", + ".hooks", + ".github", ] @@ -60,12 +54,6 @@ def __init__(self, config: MkDocsConfig, env: Optional[ENV] = None): else: self.env = env - for root in Path(self.env.ROOT_ABSPATH).rglob("*"): - if root.is_dir() and not any( - part.startswith(tuple(EXCLUDED_DIRS)) for part in root.parts - ): - self.base_path.append(root.as_posix()) - def __repr__(self): return "WLExtension" @@ -122,6 +110,7 @@ def on_config(self, config: MkDocsConfig, **kwargs) -> MkDocsConfig: config["markdown_extensions"].remove("mkdocs_juvix.snippets") wl_extension = WLExtension(config, self.env) + wl_extension.base_path = list(get_filtered_subdirs(self.env.ROOT_ABSPATH)) config.markdown_extensions.append(wl_extension) # type: ignore return config diff --git a/mkdocs_juvix/snippets.py b/mkdocs_juvix/snippets.py index b16542c..e3e3943 100644 --- a/mkdocs_juvix/snippets.py +++ b/mkdocs_juvix/snippets.py @@ -39,13 +39,15 @@ from pathlib import Path from typing import Any, Optional +from colorama import Fore, Style # type: ignore from markdown import Extension # type: ignore from markdown.preprocessors import Preprocessor # type: ignore from mkdocs.plugins import get_plugin_logger from mkdocs_juvix.env import ENV +from mkdocs_juvix.utils import find_file_in_subdirs # type: ignore -log = get_plugin_logger("\033[94m[snippets]\033[0m") +log = get_plugin_logger(f"{Fore.BLUE}[juvix_mkdocs-snippets]{Style.RESET_ALL}") MI = 1024 * 1024 # mebibyte (MiB) DEFAULT_URL_SIZE = MI * 32 @@ -93,7 +95,9 @@ class SnippetMissingError(Exception): class SnippetPreprocessor(Preprocessor): """Handle snippets in Markdown content.""" - env : ENV + + env: ENV + def __init__(self, config, md: Any, env: Optional[ENV] = None): """Initialize.""" @@ -213,8 +217,6 @@ def dedent(self, lines): return textwrap.dedent("\n".join(lines)).split("\n") - - def get_snippet_path(self, path) -> Optional[str]: """Get snippet path.""" if path in self.snippet_cache: @@ -398,12 +400,17 @@ def parse_snippets( path = path[:-4] is_isabelle = True - snippet = self.get_snippet_path(path) if not url else path + snippet = ( + find_file_in_subdirs( + self.env.ROOT_ABSPATH, self.base_path, Path(path) # type: ignore + ) + if not url + else path + ) is_juvix = False if snippet: original = snippet - if not just_raw and snippet.endswith(".juvix.md"): snippet = self.env.CACHE_MARKDOWN_JUVIX_OUTPUT_PATH / Path( snippet.replace(".juvix.md", ".md") @@ -509,7 +516,10 @@ def parse_snippets( elif self.check_paths: # print base path + log.error(f"Base path: {self.base_path}") + log.error("2. Snippet at path '{}' could not be found".format(path)) + exit(1) # Pop the current file name out of the cache if file_name: @@ -528,17 +538,15 @@ def run(self, lines): time_start = time.time() lines = self.parse_snippets(lines) time_end = time.time() - log.info( - f"Parsing snippets took {(time_end - time_start):.5f} seconds" - ) + log.info(f"Parsing snippets took {(time_end - time_start):.5f} seconds") return lines + class SnippetExtension(Extension): """Snippet extension.""" def __init__(self, *args, **kwargs): """Initialize.""" - self.config = { "base_path": [ [".", "includes"], diff --git a/mkdocs_juvix/utils.py b/mkdocs_juvix/utils.py index cc29e81..94b9763 100644 --- a/mkdocs_juvix/utils.py +++ b/mkdocs_juvix/utils.py @@ -3,11 +3,69 @@ import os from functools import lru_cache from pathlib import Path -from typing import Optional +from typing import Iterable, Optional from mkdocs.config.defaults import MkDocsConfig log = logging.getLogger("mkdocs") +EXCLUDED_DIRS = { + ".git", + ".hooks", + "env", + "venv", + ".github", + ".juvix_build", + ".vscode", + "__pycache__", + ".mypy_cache", + ".pytest_cache", + "node_modules", + "build", + "dist", + ".idea", +} + + +def is_excluded(entry): + return ( + entry.name in EXCLUDED_DIRS + or entry.name.startswith(".") + or entry.name.endswith("~") + or entry.name.endswith(".pyc") + ) + + +def get_filtered_subdirs(base_dir): + for entry in os.scandir(base_dir): + if entry.is_dir() and not is_excluded(entry): + yield entry.path + yield from get_filtered_subdirs(entry.path) + + +def get_all_subdirs(dir_path): + try: + for entry in os.scandir(dir_path): + if entry.is_dir() and not is_excluded(entry): + yield entry.path + yield from get_all_subdirs(entry.path) + except PermissionError: + log.warning(f"Permission denied: {dir_path}") + except OSError as e: + log.error(f"Error accessing {dir_path}: {e}") + + +def find_file_in_subdirs( + base_dir: Path, subdirs: Iterable[Path], filepath: Path +) -> Optional[str]: + full_path = base_dir / filepath + if full_path.exists(): + return full_path.absolute().as_posix() + subdirs = [base_dir / "images"] + list(subdirs) + for subdir in subdirs: + full_path = Path(subdir) / filepath.name + if full_path.exists(): + return full_path.absolute().as_posix() + return None def fix_site_url(config: MkDocsConfig) -> MkDocsConfig: @@ -16,11 +74,9 @@ def fix_site_url(config: MkDocsConfig) -> MkDocsConfig: if site_url: config["site_url"] = site_url else: - log.info("SITE_URL environment variable not set") - mike_docs_version = os.getenv("MIKE_DOCS_VERSION") if mike_docs_version: - log.info( + log.debug( f"Using MIKE_DOCS_VERSION environment variable: {mike_docs_version}" ) config["docs_version"] = mike_docs_version @@ -31,8 +87,7 @@ def fix_site_url(config: MkDocsConfig) -> MkDocsConfig: if not config["site_url"].endswith("/"): config["site_url"] += "/" - log.info(f"site_url: {config['site_url']}") - + log.debug(f"site_url: {config['site_url']}") os.environ["SITE_URL"] = config["site_url"] return config diff --git a/poetry.lock b/poetry.lock index f40bbac..21032fe 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1992,5 +1992,5 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" -python-versions = ">=3.10" -content-hash = "3bcb2a1398bbae52027cd21ef8351c833698a2bdd891f92a2cc64a52c07ff3b7" +python-versions = ">=3.10,<4" +content-hash = "8b637e45adbf662e15ecc3a239cd7500854ea95c9ba6142ad129ce5b240836bc" diff --git a/pyproject.toml b/pyproject.toml index ab35900..4763ebf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "mkdocs-juvix-plugin" -version = "0.3.9b" +version = "0.3.9" description = "Support for Juvix Markdown files in MkDocs" authors = ["Jonathan Prieto-Cubides, and GitHub contributors"] license = "MIT" @@ -25,6 +25,7 @@ juvix = "mkdocs_juvix.juvix_markdown:JuvixPlugin" # extra plugins todos = "mkdocs_juvix.todos:TodosPlugin" wikilinks = "mkdocs_juvix.links:WikilinksPlugin" +images = "mkdocs_juvix.images:ImagesPlugin" # differ = "mkdocs_juvix.differ:DifferPlugin" [tool.poetry.dependencies]