Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework build caching #843

Merged
merged 4 commits into from
Sep 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions sphinxcontrib/confluencebuilder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def setup(app):
# Default alignment for tables, figures, etc.
cm.add_conf('confluence_default_alignment', 'confluence')
# Enablement of a generated domain index documents
cm.add_conf('confluence_domain_indices')
cm.add_conf('confluence_domain_indices', 'confluence')
# Confluence editor to target for publication.
cm.add_conf('confluence_editor', 'confluence')
# File to get page header information from.
Expand All @@ -98,19 +98,19 @@ def setup(app):
# Dictionary to pass to footer when rendering template.
cm.add_conf('confluence_footer_data', 'confluence')
# Enablement of a generated search documents
cm.add_conf_bool('confluence_include_search')
cm.add_conf_bool('confluence_include_search', 'confluence')
# Enablement of a "page generated" notice.
cm.add_conf_bool('confluence_page_generation_notice', 'confluence')
# Enablement of publishing pages into a hierarchy from a root toctree.
cm.add_conf_bool('confluence_page_hierarchy')
cm.add_conf_bool('confluence_page_hierarchy', 'confluence')
# Show previous/next buttons (bottom, top, both, None).
cm.add_conf('confluence_prev_next_buttons_location', 'confluence')
# Suffix to put after section numbers, before section name
cm.add_conf('confluence_secnumber_suffix', 'confluence')
# Enablement of a "Edit/Show Source" reference on each document
cm.add_conf('confluence_sourcelink', 'confluence')
# Enablement of a generated index document
cm.add_conf_bool('confluence_use_index')
cm.add_conf_bool('confluence_use_index', 'confluence')
# Enablement for toctrees for singleconfluence documents.
cm.add_conf_bool('singleconfluence_toctree', 'singleconfluence')

Expand All @@ -132,7 +132,7 @@ def setup(app):
# Explicitly prevent page notifications on update.
cm.add_conf_bool('confluence_disable_notifications')
# Define a series of labels to apply to all published pages.
cm.add_conf('confluence_global_labels')
cm.add_conf('confluence_global_labels', 'confluence')
# Enablement of configuring root as space's homepage.
cm.add_conf_bool('confluence_root_homepage')
# Translation to override parent page identifier to publish to.
Expand Down
53 changes: 33 additions & 20 deletions sphinxcontrib/confluencebuilder/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from sphinxcontrib.confluencebuilder.config.checks import validate_configuration
from sphinxcontrib.confluencebuilder.config.defaults import apply_defaults
from sphinxcontrib.confluencebuilder.config.env import apply_env_overrides
from sphinxcontrib.confluencebuilder.config.env import build_hash
from sphinxcontrib.confluencebuilder.env import ConfluenceCacheInfo
from sphinxcontrib.confluencebuilder.intersphinx import build_intersphinx
from sphinxcontrib.confluencebuilder.logger import ConfluenceLogger
from sphinxcontrib.confluencebuilder.nodes import confluence_footer
Expand Down Expand Up @@ -67,12 +69,15 @@ def __init__(self, app, env=None):
self.domain_indices = {}
self.file_suffix = '.conf'
self.info = ConfluenceLogger.info
self.legacy_assets = {}
self.legacy_pages = None
self.link_suffix = None
self.metadata = defaultdict(dict)
self.nav_next = {}
self.nav_prev = {}
self.omitted_docnames = []
self.orphan_docnames = []
self.parent_id = None
self.publish_allowlist = None
self.publish_denylist = None
self.publish_docnames = []
Expand All @@ -84,8 +89,10 @@ def __init__(self, app, env=None):
self.use_search = None
self.verbose = ConfluenceLogger.verbose
self.warn = ConfluenceLogger.warn
self._cache_info = ConfluenceCacheInfo(self)
self._cached_footer_data = None
self._cached_header_data = None
self._config_confluence_hash = None
self._original_get_doctree = None
self._verbose = self.app.verbosity

Expand Down Expand Up @@ -148,6 +155,14 @@ def init(self):
self.config.sphinx_verbosity = self._verbose
self.publisher.init(self.config, self.cloud)

# With the configuration finalizes, generate a Confluence-specific
# configuration hash that is applicable to this run
self._config_confluence_hash = build_hash(config)
self.verbose('configuration hash ' + self._config_confluence_hash)

self._cache_info.load_cache()
self._cache_info.configure(self._config_confluence_hash)

self.create_template_bridge()
self.templates.init(self)

Expand Down Expand Up @@ -209,27 +224,11 @@ def get_outdated_docs(self):
"""
Return an iterable of input files that are outdated.
"""
# This method is taken from TextBuilder.get_outdated_docs()
# with minor changes to support :confval:`rst_file_transform`.

for docname in self.env.found_docs:
if docname not in self.env.all_docs:
if self._cache_info.is_outdated(docname):
yield docname
continue
sourcename = path.join(self.env.srcdir, docname +
self.file_suffix)
targetname = path.join(self.outdir, self.file_transform(docname))

try:
targetmtime = path.getmtime(targetname)
except Exception:
targetmtime = 0
try:
srcmtime = path.getmtime(sourcename)
if srcmtime > targetmtime:
yield docname
except OSError:
# source doesn't exist anymore
pass

def get_target_uri(self, docname, typ=None):
return self.link_transform(docname)
Expand Down Expand Up @@ -483,6 +482,8 @@ def write_doc(self, docname, doctree):
except OSError as err:
self.warn(f'error writing file {outfilename}: {err}')

self._cache_info.track_page_hash(docname)

def publish_doc(self, docname, output):
conf = self.config
title = self.state.title(docname)
Expand Down Expand Up @@ -519,6 +520,8 @@ def publish_doc(self, docname, output):
uploaded_id = self.publisher.store_page(title, data, parent_id)
self.state.register_upload_id(docname, uploaded_id)

self._cache_info.track_last_page_id(docname, uploaded_id)

if self.config.root_doc == docname:
self.root_doc_page_id = uploaded_id

Expand Down Expand Up @@ -751,8 +754,6 @@ def finish(self):

# publish generated output (if desired)
if self.publish:
self.legacy_assets = {}
self.legacy_pages = None
self.parent_id = self.publisher.get_base_page_id()

for docname in status_iterator(
Expand Down Expand Up @@ -802,9 +803,21 @@ def to_asset_name(asset):
except OSError as err:
self.warn(f'error reading asset {key}: {err}')

# if we have documents that were not changes (and therefore, not
# needing to be republished), assume any cached publish page ids
# are still valid and remove them from the legacy pages list
other_docs = self.env.all_docs.keys() - set(self.publish_docnames)
for unchanged_doc in other_docs:
lpid = self._cache_info.last_page_id(unchanged_doc)
if lpid is not None and lpid in self.legacy_pages:
self.legacy_pages.remove(lpid)

self.publish_cleanup()
self.publish_finalize()

# persist cache from this run
self._cache_info.save_cache()

def cleanup(self):
if self.publish:
self.publisher.disconnect()
Expand Down
40 changes: 40 additions & 0 deletions sphinxcontrib/confluencebuilder/config/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright Sphinx Confluence Builder Contributors (AUTHORS)

from sphinxcontrib.confluencebuilder.logger import ConfluenceLogger as logger
from sphinxcontrib.confluencebuilder.util import ConfluenceUtil
from sphinxcontrib.confluencebuilder.util import str2bool
import os

Expand Down Expand Up @@ -38,3 +39,42 @@ def apply_env_overrides(builder):
conf[key] = int(env_val)
else:
conf[key] = env_val


def build_hash(config):
"""
builds a confluence configuration hash

This call will build a hash based on Confluence-specific configuration
entries. This hash can later be used to determine whether or not
re-processing documents is needed based certain configuration values
being changed.

Args:
config: the configuration
"""

# extract confluence configuration options
entries = []
for c in sorted(config.filter(['confluence'])):
entries.append(c.name)
entries.append(c.value)

# compile a string to hash, sorting dictionary/list/etc. entries along
# the way
hash_data = []
while entries:
value = entries.pop(0)

if isinstance(value, dict):
sorted_value = dict(sorted(value.items()))
for k, v in sorted_value.items():
entries.append(k)
entries.append(v)
elif isinstance(value, (list, set, tuple)):
entries.extend(sorted(value))
else:
hash_data.append(str(value))

# generate a configuration hash
return ConfluenceUtil.hash(''.join(hash_data))
Loading