Skip to content

Commit

Permalink
Speed up SVG generation and images link replacement, plus other optim…
Browse files Browse the repository at this point in the history
…izations
  • Loading branch information
jonaprieto committed Oct 22, 2024
1 parent 8b80fee commit 9c69ef6
Show file tree
Hide file tree
Showing 11 changed files with 359 additions and 211 deletions.
4 changes: 3 additions & 1 deletion mkdocs_juvix/common/models/wikilink.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from typing import Optional

from .loc import FileLoc
from mkdocs.plugins import get_plugin_logger

from .loc import FileLoc

log = get_plugin_logger("\033[94m[wikilinks]\033[0m")


Expand Down
32 changes: 20 additions & 12 deletions mkdocs_juvix/common/preprocesors/links.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import os
import re
from pathlib import Path
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
from urllib.parse import urljoin

from mkdocs.plugins import get_plugin_logger
import numpy as np # type: ignore
from colorama import Fore, Style # type: ignore
from fuzzywuzzy import fuzz # type: ignore
from markdown.preprocessors import Preprocessor # type: ignore
from mkdocs.plugins import get_plugin_logger
from mkdocs.structure.pages import Page
from ncls import NCLS # type: ignore
import numpy as np # type: ignore
from ncls import NCLS # type: ignore

from mkdocs_juvix.common.models import FileLoc, WikiLink
from mkdocs_juvix.env import ENV
Expand All @@ -27,7 +28,8 @@
re.VERBOSE,
)

log = get_plugin_logger("\033[94m[wikilinks_preprocessor]\033[0m")

log = get_plugin_logger(f"{Fore.BLUE}[juvix_mkdocs-wikilinks]{Style.RESET_ALL}")

REPORT_BROKEN_WIKILINKS = bool(os.environ.get("REPORT_BROKEN_WIKILINKS", False))

Expand Down Expand Up @@ -94,28 +96,32 @@ def run(self, lines) -> List[str]:
cache_filepath
and cache_filepath.exists()
and original_filepath
and not self.env.new_or_changed_or_no_exist(original_filepath)
and not self.env.new_or_changed_or_not_exists(original_filepath)
):
return cache_filepath.read_text().split("\n")

if self.run_snippet_preprocessor:
time_start = time.time()
lines = self.snippet_preprocessor.run(lines)
time_end = time.time()
log.info(f"Snippet finished in {(time_end - time_start):.5f} seconds")
log.info(
f"Snippet finished in {Fore.GREEN}{(time_end - time_start):.5f}{Style.RESET_ALL} seconds"
)

log.info(f"Processing wikilinks on file {filepath}")
log.info(
f"Processing wikilinks on file {Fore.GREEN}{filepath}{Style.RESET_ALL}"
)
# Combine all lines into a single string
full_text = "\n".join(lines)
# Find all code blocks, HTML comments, and script tags in a single pass
pattern = re.compile(
ignore_blocks = re.compile(
r"(```(?:[\s\S]*?)```|<!--[\s\S]*?-->|<script>[\s\S]*?</script>)", re.DOTALL
)

intervals = []
time_start = time.time()
try:
for match in pattern.finditer(full_text):
for match in ignore_blocks.finditer(full_text):
intervals.append((match.start(), match.end(), 1))
except TimeoutError:
log.error("Timeout occurred while processing ignore patterns")
Expand Down Expand Up @@ -150,8 +156,10 @@ def run(self, lines) -> List[str]:
for start, end, new_text in reversed(replacements):
full_text = full_text[:start] + new_text + full_text[end:]
time_end = time.time()

log.info(f"Processing wikilinks took {(time_end - time_start):.5f} seconds")

log.info(
f"Processing wikilinks took {Fore.GREEN}{(time_end - time_start):.5f}{Style.RESET_ALL} seconds"
)

if cache_filepath:
log.debug(f"Writing wikilinks to cache for file {original_filepath}")
Expand Down
7 changes: 5 additions & 2 deletions mkdocs_juvix/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@ def fix_site_url(config: MkDocsConfig) -> MkDocsConfig:
config["site_url"] += "/"
return config

log.info("SITE_URL environment variable not set")
log.debug("SITE_URL environment variable not set")

version = os.environ.get("MIKE_DOCS_VERSION")

if version:
log.info(f"Using MIKE_DOCS_VERSION environment variable: {version}")
log.debug(f"Using MIKE_DOCS_VERSION environment variable: {version}")

if "site_url" not in config or not config["site_url"]:
config["site_url"] = ""

if not config["site_url"].endswith("/"):
config["site_url"] += "/"
Expand Down
47 changes: 23 additions & 24 deletions mkdocs_juvix/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,28 @@
Juvix settings.
"""

from functools import lru_cache
import os
import shutil
import subprocess
from functools import lru_cache
from os import getenv
from pathlib import Path
from typing import List, Optional, Tuple

from colorama import Fore, Style # type: ignore
from mkdocs.config.defaults import MkDocsConfig
from mkdocs.plugins import get_plugin_logger
from semver import Version

from mkdocs_juvix.juvix_version import MIN_JUVIX_VERSION
from mkdocs_juvix.utils import compute_hash_filepath, hash_file

log = get_plugin_logger("ENV")
log = get_plugin_logger(f"{Fore.BLUE}[juvix_mkdocs-env]{Style.RESET_ALL}")

BASE_PATH = Path(__file__).parent
FIXTURES_PATH = BASE_PATH / "fixtures"


class ENV:
ROOT_PATH: Path
DOCS_DIRNAME: str = getenv("DOCS_DIRNAME", "docs")
Expand Down Expand Up @@ -94,7 +96,6 @@ class ENV:
"CACHE_WIKILINKS_DIRNAME", ".wikilinks"
) # The name of the file where the Juvix Markdown files are stored
DOCS_IMAGES_DIRNAME: str = getenv("DOCS_IMAGES_DIRNAME", "images")
CACHE_IMAGES_DIRNAME: str = getenv("CACHE_IMAGES_DIRNAME", ".images")
CACHE_JUVIX_VERSION_FILENAME: str = getenv(
"CACHE_JUVIX_VERSION_FILENAME", ".juvix_version"
) # The name of the file where the Juvix version is stored
Expand All @@ -120,7 +121,6 @@ class ENV:
SHOW_TODOS_IN_MD: bool
INDEXES_PATH: Path
IMAGES_PATH: Path
CACHE_IMAGES_PATH: Path

def __init__(self, config: Optional[MkDocsConfig] = None):
if config:
Expand Down Expand Up @@ -196,9 +196,6 @@ def __init__(self, config: Optional[MkDocsConfig] = None):
self.CACHE_HASHES_PATH: Path = (
self.CACHE_ABSPATH / self.CACHE_HASHES_DIRNAME
) # The path where hashes are stored (not the project hash)
self.CACHE_IMAGES_PATH: Path = (
self.CACHE_ABSPATH / self.CACHE_IMAGES_DIRNAME
) # The path to the images cache directory

self.JUVIX_FOOTER_CSS_FILEPATH: Path = (
self.DOCS_ABSPATH / "assets" / "css" / self.JUVIX_FOOTER_CSS_FILENAME
Expand All @@ -224,12 +221,19 @@ def __init__(self, config: Optional[MkDocsConfig] = None):
self.CACHE_HASHES_PATH,
self.JUVIX_FOOTER_CSS_FILEPATH.parent,
self.CACHE_WIKILINKS_PATH,
self.CACHE_IMAGES_PATH,
]

for directory in directories:
if directory.exists() and self.REMOVE_CACHE:
if (
directory.exists()
and self.REMOVE_CACHE
and config
and not config.get("env_init", False)
):
try:
log.info(
f"Removing directory {Fore.RED}{directory}{Style.RESET_ALL}"
)
shutil.rmtree(directory, ignore_errors=True)
except Exception as e:
log.error(
Expand Down Expand Up @@ -281,11 +285,13 @@ def __init__(self, config: Optional[MkDocsConfig] = None):
self.JUVIX_ENABLED = False
self.JUVIX_AVAILABLE = False
return

self.USE_DOT = bool(getenv("USE_DOT", True))
self.DOT_BIN = getenv("DOT_BIN", "dot")
self.DOT_FLAGS = getenv("DOT_FLAGS", "-Tsvg")
self.IMAGES_ENABLED = bool(getenv("IMAGES_ENABLED", True))
if config:
config["env_init"] = True

@lru_cache(maxsize=128)
def read_markdown_file_from_cache(self, filepath: Path) -> Optional[str]:
Expand All @@ -298,37 +304,33 @@ def read_wikilinks_file_from_cache(self, filepath: Path) -> Optional[str]:
if cache_ABSpath := self.get_filepath_for_wikilinks_in_cache(filepath):
return cache_ABSpath.read_text()
return None

def write_wikilinks_file_to_cache(self, filepath: Path, content: str) -> None:
if cache_ABSpath := self.get_filepath_for_wikilinks_in_cache(filepath):
cache_ABSpath.write_text(content)

def get_filepath_for_wikilinks_in_cache(self, filepath: Path) -> Optional[Path]:
filepath = filepath.absolute()
rel_to_docs = filepath.relative_to(self.DOCS_ABSPATH)
return (
self.CACHE_WIKILINKS_PATH / rel_to_docs.parent / filepath.name
)
return self.CACHE_WIKILINKS_PATH / rel_to_docs.parent / filepath.name

def new_or_changed_or_no_exist(self, filepath: Path) -> bool:
def new_or_changed_or_not_exists(self, filepath: Path) -> bool:
content_hash = hash_file(filepath)
path_hash = compute_hash_filepath(filepath, hash_dir=self.CACHE_HASHES_PATH)
if not path_hash.exists():
log.debug(f"File: {filepath} does not have a hash file.")
return True
fresh_content_hash = path_hash.read_text()
return content_hash != fresh_content_hash

@lru_cache(maxsize=128)
def get_filepath_for_juvix_markdown_in_cache(
self, _filepath: Path
) -> Optional[Path]:
filepath = _filepath.absolute()
md_filename = filepath.name.replace(".juvix.md", ".md")
rel_to_docs = filepath.relative_to(self.DOCS_ABSPATH)
return (
self.CACHE_MARKDOWN_JUVIX_OUTPUT_PATH / rel_to_docs.parent / md_filename
)
return self.CACHE_MARKDOWN_JUVIX_OUTPUT_PATH / rel_to_docs.parent / md_filename

def unqualified_module_name(self, filepath: Path) -> Optional[str]:
fposix: str = filepath.as_posix()
Expand Down Expand Up @@ -371,9 +373,7 @@ def get_filename_module_by_extension(
return module_name + extension if module_name else None

def update_hash_file(self, filepath: Path) -> Optional[Tuple[Path, str]]: # noqa: F821
filepath_hash = compute_hash_filepath(
filepath, hash_dir=self.CACHE_HASHES_PATH
)
filepath_hash = compute_hash_filepath(filepath, hash_dir=self.CACHE_HASHES_PATH)
try:
with open(filepath_hash, "w") as f:
content_hash = hash_file(filepath)
Expand All @@ -398,4 +398,3 @@ def get_expected_filepath_for_juvix_isabelle_output_in_cache(
/ cache_isabelle_filename
)
return cache_isabelle_filepath

Loading

0 comments on commit 9c69ef6

Please sign in to comment.