w.i.p

anoma · Oct 24, 2024 · 8f10450 · 8f10450
1 parent 60352ae
commit 8f10450
Show file tree

Hide file tree

Showing 11 changed files with 766 additions and 367 deletions.
diff --git a/mkdocs_juvix/common/preprocesors/links.py b/mkdocs_juvix/common/preprocesors/links.py
@@ -103,7 +103,7 @@ def run(self, lines) -> List[str]:
             cache_filepath
             and cache_filepath.exists()
             and original_filepath
-            and not self.env.new_or_changed_or_not_exists(original_filepath)
+            and not self.env.is_file_new_or_changed_for_cache(original_filepath)
         ):
             return cache_filepath.read_text().split("\n")
 

diff --git a/mkdocs_juvix/env.py b/mkdocs_juvix/env.py
@@ -18,7 +18,7 @@
 from semver import Version
 
 from mkdocs_juvix.juvix_version import MIN_JUVIX_VERSION
-from mkdocs_juvix.utils import compute_hash_filepath, hash_file
+import mkdocs_juvix.utils as utils
 
 log = get_plugin_logger(f"{Fore.BLUE}[juvix_mkdocs-env]{Style.RESET_ALL}")
 
@@ -295,7 +295,7 @@ def __init__(self, config: Optional[MkDocsConfig] = None):
 
     @lru_cache(maxsize=128)
     def read_markdown_file_from_cache(self, filepath: Path) -> Optional[str]:
-        if cache_ABSpath := self.get_filepath_for_juvix_markdown_in_cache(filepath):
+        if cache_ABSpath := self.get_filepath_for_cache_markdown_output_of_juvix_markdown_file(filepath):
             return cache_ABSpath.read_text()
         return None
 
@@ -313,24 +313,36 @@ def get_filepath_for_wikilinks_in_cache(self, filepath: Path) -> Optional[Path]:
         filepath = filepath.absolute()
         rel_to_docs = filepath.relative_to(self.DOCS_ABSPATH)
         return self.CACHE_WIKILINKS_PATH / rel_to_docs.parent / filepath.name
-
-    def new_or_changed_or_not_exists(self, filepath: Path) -> bool:
-        content_hash = hash_file(filepath)
-        path_hash = compute_hash_filepath(filepath, hash_dir=self.CACHE_HASHES_PATH)
-        if not path_hash.exists():
-            log.debug(f"File: {filepath} does not have a hash file.")
-            return True
-        fresh_content_hash = path_hash.read_text()
-        return content_hash != fresh_content_hash
+
+    def get_expected_filepath_for_cached_hash_for(self, filepath: Path) -> Path:
+        file_abspath = filepath.absolute()
+        return utils.get_filepath_for_cached_hash_for(file_abspath, hash_dir=self.CACHE_HASHES_PATH)
+
+    def is_file_new_or_changed_for_cache(self, filepath: Path) -> bool:
+        file_abspath = filepath.absolute()
+        hash_file = self.get_expected_filepath_for_cached_hash_for(file_abspath)
+        if not hash_file.exists():
+            return True  # File is new
+        # compute the hash of the file content to check if it has changed
+        current_hash = utils.hash_content_of(file_abspath)
+        cached_hash = hash_file.read_text().strip()
+        return current_hash != cached_hash  # File has changed if hashes are different
+
+    def update_cache_for_file(self, filepath: Path, file_content: str) -> None:
+        file_abspath = filepath.absolute()
+        cache_filepath = self.get_expected_filepath_for_cached_hash_for(file_abspath)
+        cache_filepath.parent.mkdir(parents=True, exist_ok=True)
+        cache_filepath.write_text(file_content)
+        self.update_hash_file(file_abspath)
 
     @lru_cache(maxsize=128)
-    def get_filepath_for_juvix_markdown_in_cache(
-        self, _filepath: Path
-    ) -> Optional[Path]:
-        filepath = _filepath.absolute()
+    def get_filepath_for_cache_markdown_output_of_juvix_markdown_file(
+        self, filepath: Path
+    ) -> Path:
+        file_abspath = filepath.absolute()
         md_filename = filepath.name.replace(".juvix.md", ".md")
-        rel_to_docs = filepath.relative_to(self.DOCS_ABSPATH)
-        return self.CACHE_MARKDOWN_JUVIX_OUTPUT_PATH / rel_to_docs.parent / md_filename
+        file_rel_to_docs = file_abspath.relative_to(self.DOCS_ABSPATH)
+        return self.CACHE_MARKDOWN_JUVIX_OUTPUT_PATH / file_rel_to_docs.parent / md_filename
 
     def unqualified_module_name(self, filepath: Path) -> Optional[str]:
         fposix: str = filepath.as_posix()
@@ -372,11 +384,11 @@ def get_filename_module_by_extension(
         module_name = self.unqualified_module_name(filepath)
         return module_name + extension if module_name else None
 
-    def update_hash_file(self, filepath: Path) -> Optional[Tuple[Path, str]]:  # noqa: F821
-        filepath_hash = compute_hash_filepath(filepath, hash_dir=self.CACHE_HASHES_PATH)
+    def update_hash_file(self, filepath: Path) -> Optional[Tuple[Path, str]]: 
+        filepath_hash = self.get_expected_filepath_for_cached_hash_for(filepath)
         try:
             with open(filepath_hash, "w") as f:
-                content_hash = hash_file(filepath)
+                content_hash = utils.hash_content_of(filepath)
                 f.write(content_hash)
                 return (filepath_hash, content_hash)
         except Exception as e:

diff --git a/mkdocs_juvix/images.py b/mkdocs_juvix/images.py
@@ -13,7 +13,7 @@
 from mkdocs.plugins import BasePlugin, get_plugin_logger
 from mkdocs.structure.files import Files  # type: ignore
 from mkdocs.structure.pages import Page
-from ncls import NCLS
+from ncls import NCLS  # type: ignore
 
 from mkdocs_juvix.common.utils import fix_site_url  # type:ignore
 from mkdocs_juvix.env import ENV  # type: ignore
@@ -70,88 +70,57 @@ def __init__(self, config, env: Optional[ENV] = None):
             self.env = env
 
     def run(self, lines):
-        full_text = "".join(lines)
-
+        full_text = "\n".join(lines)
         config = self.config
-        current_page_url = None
-
-        if "current_page" in config and isinstance(config["current_page"], Page):
-            url_relative = self.env.DOCS_PATH / Path(
-                config["current_page"].url.replace(".html", ".md")
-            )
-            current_page_url = url_relative.as_posix()
 
-        if not current_page_url:
+        if not isinstance(config.get("current_page"), Page):
             log.error("Current page URL not found. Images will not be processed.")
             return lines
 
-        ignore_blocks = re.compile(
-            r"(```(?:[\s\S]*?)```|<!--[\s\S]*?-->|<div>[\s\S]*?</div>)", re.DOTALL
-        )
-        intervals = []
-        try:
-            for match in ignore_blocks.finditer(full_text):
-                intervals.append((match.start(), match.end(), 1))
-        except Exception as e:
-            log.error(f"Error occurred while processing ignore patterns: {e}")
-            return lines
+        url_relative = self.env.DOCS_PATH / Path(config["current_page"].url.replace(".html", ".md"))
+        current_page_url = url_relative.as_posix()
+        log.info(f"Processing images for {url_relative}")
+
+        ignore_blocks = re.compile(r"(```(?:[\s\S]*?)```|<!--[\s\S]*?-->|<div>[\s\S]*?</div>)", re.DOTALL)
+        intervals = [(match.start(), match.end(), 1) for match in ignore_blocks.finditer(full_text)]
 
         ignore_tree = None
         if intervals:
             starts, ends, ids = map(np.array, zip(*intervals))
             ignore_tree = NCLS(starts, ends, ids)
 
-        def img_markdown_link(match: re.Match, img_expected_location: Path) -> str:
-            if match.group("caption"):
-                return (
-                    f"![{match.group('caption')}]({img_expected_location.as_posix()})"
-                )
-            else:
-                return img_expected_location.as_posix()
-
-        full_text = "".join(lines)
-
-        time_start = time.time()
-
         def process_matches(pattern, process_func):
-            matches = list(pattern.finditer(full_text))
-            if matches:
-                replacements = []
-                for match in matches:
-                    start, end = match.start(), match.end()
-                    if ignore_tree and not list(ignore_tree.find_overlap(start, end)):
-                        url = Path(match.group("url"))
-                        if url.as_posix().startswith("http"):
-                            continue
-                        image_fname = url.name
-                        img_expected_location = self.env.IMAGES_PATH / image_fname
+            replacements = []
+            for match in pattern.finditer(full_text):
+                start, end = match.span()
+                if ignore_tree and not list(ignore_tree.find_overlap(start, end)):
+                    url = Path(match.group("url"))
+                    if not url.as_posix().startswith("http"):
+                        img_expected_location = self.env.IMAGES_PATH / url.name
                         new_url = process_func(match, img_expected_location)
                         replacements.append((start, end, new_url))
-                return replacements
-            return []
+            return replacements
+
+        time_start = time.time()
 
         replacements = process_matches(
             IMAGES_PATTERN,
-            lambda match, img_expected_location: img_markdown_link(
-                match, img_expected_location
-            ),
+            lambda match, img_expected_location: (
+                f"![{match.group('caption')}]({img_expected_location.as_posix()})"
+                if match.group("caption")
+                else img_expected_location.as_posix()
+            )
         )
 
-        for start, end, new_url in reversed(replacements):
-            full_text = full_text[:start] + new_url + full_text[end:]
-
-        replacements = process_matches(
+        replacements += process_matches(
             HTML_IMG_PATTERN,
-            lambda _,
-            img_expected_location: f'<img src="{img_expected_location.absolute().as_posix()}" />',
+            lambda _, img_expected_location: f'<img src="{img_expected_location.absolute().as_posix()}" />'
         )
+
         for start, end, new_url in reversed(replacements):
             full_text = full_text[:start] + new_url + full_text[end:]
 
-        time_end = time.time()
-        log.debug(
-            f"Path image resolution took {time_end - time_start:.5f} seconds for {current_page_url}"
-        )
+        log.debug(f"Path image resolution took {time.time() - time_start:.5f} seconds for {current_page_url}")
 
         return full_text.split("\n")
 
@@ -173,16 +142,18 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig:
 
         async def process_dot_file(dot_file: Path):
             try:
-                cond = self.env.new_or_changed_or_not_exists(dot_file)
+                cond = self.env.is_file_new_or_changed_for_cache(dot_file)
                 svg_file = dot_file.with_suffix(".dot.svg")
                 if cond:
                     await self._generate_dot_svg(dot_file)
                     if svg_file.exists():
-                        log.info(f"Generated SVG: {svg_file}")
+                        log.info(f"Generated SVG: {Fore.GREEN}{svg_file}{Style.RESET_ALL}")
                         self.env.update_hash_file(dot_file)
                 return svg_file
             except Exception as e:
-                log.error(f"Error generating SVG for {dot_file}: {e}")
+                log.error(
+                    f"Error generating SVG for {Fore.GREEN}{dot_file}{Style.RESET_ALL}: {e}"
+                )
                 return None
 
         async def run_in_parallel(dot_files: List[Path]):