Skip to content

Commit

Permalink
w.i.p
Browse files Browse the repository at this point in the history
  • Loading branch information
jonaprieto committed Oct 24, 2024
1 parent 60352ae commit 8f10450
Show file tree
Hide file tree
Showing 11 changed files with 766 additions and 367 deletions.
2 changes: 1 addition & 1 deletion mkdocs_juvix/common/preprocesors/links.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def run(self, lines) -> List[str]:
cache_filepath
and cache_filepath.exists()
and original_filepath
and not self.env.new_or_changed_or_not_exists(original_filepath)
and not self.env.is_file_new_or_changed_for_cache(original_filepath)
):
return cache_filepath.read_text().split("\n")

Expand Down
52 changes: 32 additions & 20 deletions mkdocs_juvix/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from semver import Version

from mkdocs_juvix.juvix_version import MIN_JUVIX_VERSION
from mkdocs_juvix.utils import compute_hash_filepath, hash_file
import mkdocs_juvix.utils as utils

log = get_plugin_logger(f"{Fore.BLUE}[juvix_mkdocs-env]{Style.RESET_ALL}")

Expand Down Expand Up @@ -295,7 +295,7 @@ def __init__(self, config: Optional[MkDocsConfig] = None):

@lru_cache(maxsize=128)
def read_markdown_file_from_cache(self, filepath: Path) -> Optional[str]:
if cache_ABSpath := self.get_filepath_for_juvix_markdown_in_cache(filepath):
if cache_ABSpath := self.get_filepath_for_cache_markdown_output_of_juvix_markdown_file(filepath):
return cache_ABSpath.read_text()
return None

Expand All @@ -313,24 +313,36 @@ def get_filepath_for_wikilinks_in_cache(self, filepath: Path) -> Optional[Path]:
filepath = filepath.absolute()
rel_to_docs = filepath.relative_to(self.DOCS_ABSPATH)
return self.CACHE_WIKILINKS_PATH / rel_to_docs.parent / filepath.name

def new_or_changed_or_not_exists(self, filepath: Path) -> bool:
content_hash = hash_file(filepath)
path_hash = compute_hash_filepath(filepath, hash_dir=self.CACHE_HASHES_PATH)
if not path_hash.exists():
log.debug(f"File: {filepath} does not have a hash file.")
return True
fresh_content_hash = path_hash.read_text()
return content_hash != fresh_content_hash

def get_expected_filepath_for_cached_hash_for(self, filepath: Path) -> Path:
file_abspath = filepath.absolute()
return utils.get_filepath_for_cached_hash_for(file_abspath, hash_dir=self.CACHE_HASHES_PATH)

def is_file_new_or_changed_for_cache(self, filepath: Path) -> bool:
file_abspath = filepath.absolute()
hash_file = self.get_expected_filepath_for_cached_hash_for(file_abspath)
if not hash_file.exists():
return True # File is new
# compute the hash of the file content to check if it has changed
current_hash = utils.hash_content_of(file_abspath)
cached_hash = hash_file.read_text().strip()
return current_hash != cached_hash # File has changed if hashes are different

def update_cache_for_file(self, filepath: Path, file_content: str) -> None:
file_abspath = filepath.absolute()
cache_filepath = self.get_expected_filepath_for_cached_hash_for(file_abspath)
cache_filepath.parent.mkdir(parents=True, exist_ok=True)
cache_filepath.write_text(file_content)
self.update_hash_file(file_abspath)

@lru_cache(maxsize=128)
def get_filepath_for_juvix_markdown_in_cache(
self, _filepath: Path
) -> Optional[Path]:
filepath = _filepath.absolute()
def get_filepath_for_cache_markdown_output_of_juvix_markdown_file(
self, filepath: Path
) -> Path:
file_abspath = filepath.absolute()
md_filename = filepath.name.replace(".juvix.md", ".md")
rel_to_docs = filepath.relative_to(self.DOCS_ABSPATH)
return self.CACHE_MARKDOWN_JUVIX_OUTPUT_PATH / rel_to_docs.parent / md_filename
file_rel_to_docs = file_abspath.relative_to(self.DOCS_ABSPATH)
return self.CACHE_MARKDOWN_JUVIX_OUTPUT_PATH / file_rel_to_docs.parent / md_filename

def unqualified_module_name(self, filepath: Path) -> Optional[str]:
fposix: str = filepath.as_posix()
Expand Down Expand Up @@ -372,11 +384,11 @@ def get_filename_module_by_extension(
module_name = self.unqualified_module_name(filepath)
return module_name + extension if module_name else None

def update_hash_file(self, filepath: Path) -> Optional[Tuple[Path, str]]: # noqa: F821
filepath_hash = compute_hash_filepath(filepath, hash_dir=self.CACHE_HASHES_PATH)
def update_hash_file(self, filepath: Path) -> Optional[Tuple[Path, str]]:
filepath_hash = self.get_expected_filepath_for_cached_hash_for(filepath)
try:
with open(filepath_hash, "w") as f:
content_hash = hash_file(filepath)
content_hash = utils.hash_content_of(filepath)
f.write(content_hash)
return (filepath_hash, content_hash)
except Exception as e:
Expand Down
95 changes: 33 additions & 62 deletions mkdocs_juvix/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from mkdocs.plugins import BasePlugin, get_plugin_logger
from mkdocs.structure.files import Files # type: ignore
from mkdocs.structure.pages import Page
from ncls import NCLS
from ncls import NCLS # type: ignore

from mkdocs_juvix.common.utils import fix_site_url # type:ignore
from mkdocs_juvix.env import ENV # type: ignore
Expand Down Expand Up @@ -70,88 +70,57 @@ def __init__(self, config, env: Optional[ENV] = None):
self.env = env

def run(self, lines):
full_text = "".join(lines)

full_text = "\n".join(lines)
config = self.config
current_page_url = None

if "current_page" in config and isinstance(config["current_page"], Page):
url_relative = self.env.DOCS_PATH / Path(
config["current_page"].url.replace(".html", ".md")
)
current_page_url = url_relative.as_posix()

if not current_page_url:
if not isinstance(config.get("current_page"), Page):
log.error("Current page URL not found. Images will not be processed.")
return lines

ignore_blocks = re.compile(
r"(```(?:[\s\S]*?)```|<!--[\s\S]*?-->|<div>[\s\S]*?</div>)", re.DOTALL
)
intervals = []
try:
for match in ignore_blocks.finditer(full_text):
intervals.append((match.start(), match.end(), 1))
except Exception as e:
log.error(f"Error occurred while processing ignore patterns: {e}")
return lines
url_relative = self.env.DOCS_PATH / Path(config["current_page"].url.replace(".html", ".md"))
current_page_url = url_relative.as_posix()
log.info(f"Processing images for {url_relative}")

ignore_blocks = re.compile(r"(```(?:[\s\S]*?)```|<!--[\s\S]*?-->|<div>[\s\S]*?</div>)", re.DOTALL)
intervals = [(match.start(), match.end(), 1) for match in ignore_blocks.finditer(full_text)]

ignore_tree = None
if intervals:
starts, ends, ids = map(np.array, zip(*intervals))
ignore_tree = NCLS(starts, ends, ids)

def img_markdown_link(match: re.Match, img_expected_location: Path) -> str:
if match.group("caption"):
return (
f"![{match.group('caption')}]({img_expected_location.as_posix()})"
)
else:
return img_expected_location.as_posix()

full_text = "".join(lines)

time_start = time.time()

def process_matches(pattern, process_func):
matches = list(pattern.finditer(full_text))
if matches:
replacements = []
for match in matches:
start, end = match.start(), match.end()
if ignore_tree and not list(ignore_tree.find_overlap(start, end)):
url = Path(match.group("url"))
if url.as_posix().startswith("http"):
continue
image_fname = url.name
img_expected_location = self.env.IMAGES_PATH / image_fname
replacements = []
for match in pattern.finditer(full_text):
start, end = match.span()
if ignore_tree and not list(ignore_tree.find_overlap(start, end)):
url = Path(match.group("url"))
if not url.as_posix().startswith("http"):
img_expected_location = self.env.IMAGES_PATH / url.name
new_url = process_func(match, img_expected_location)
replacements.append((start, end, new_url))
return replacements
return []
return replacements

time_start = time.time()

replacements = process_matches(
IMAGES_PATTERN,
lambda match, img_expected_location: img_markdown_link(
match, img_expected_location
),
lambda match, img_expected_location: (
f"![{match.group('caption')}]({img_expected_location.as_posix()})"
if match.group("caption")
else img_expected_location.as_posix()
)
)

for start, end, new_url in reversed(replacements):
full_text = full_text[:start] + new_url + full_text[end:]

replacements = process_matches(
replacements += process_matches(
HTML_IMG_PATTERN,
lambda _,
img_expected_location: f'<img src="{img_expected_location.absolute().as_posix()}" />',
lambda _, img_expected_location: f'<img src="{img_expected_location.absolute().as_posix()}" />'
)

for start, end, new_url in reversed(replacements):
full_text = full_text[:start] + new_url + full_text[end:]

time_end = time.time()
log.debug(
f"Path image resolution took {time_end - time_start:.5f} seconds for {current_page_url}"
)
log.debug(f"Path image resolution took {time.time() - time_start:.5f} seconds for {current_page_url}")

return full_text.split("\n")

Expand All @@ -173,16 +142,18 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig:

async def process_dot_file(dot_file: Path):
try:
cond = self.env.new_or_changed_or_not_exists(dot_file)
cond = self.env.is_file_new_or_changed_for_cache(dot_file)
svg_file = dot_file.with_suffix(".dot.svg")
if cond:
await self._generate_dot_svg(dot_file)
if svg_file.exists():
log.info(f"Generated SVG: {svg_file}")
log.info(f"Generated SVG: {Fore.GREEN}{svg_file}{Style.RESET_ALL}")
self.env.update_hash_file(dot_file)
return svg_file
except Exception as e:
log.error(f"Error generating SVG for {dot_file}: {e}")
log.error(
f"Error generating SVG for {Fore.GREEN}{dot_file}{Style.RESET_ALL}: {e}"
)
return None

async def run_in_parallel(dot_files: List[Path]):
Expand Down
Loading

0 comments on commit 8f10450

Please sign in to comment.