Skip to content

Commit 46767cc

Browse files
committed
Account for recent removal of self-closing tags
1 parent e80b729 commit 46767cc

File tree

1 file changed

+11
-10
lines changed

1 file changed

+11
-10
lines changed

build_docs.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -1265,25 +1265,26 @@ def proofread_canonicals(
12651265
purge(http, *paths_to_purge)
12661266

12671267

1268+
# Python 3.12 onwards doesn't use self-closing tags for <link rel="canonical">
1269+
_canonical_re = re.compile(
1270+
b"""<link rel="canonical" href="https://docs.python.org/([^"]*)"(?: /)?>"""
1271+
)
1272+
1273+
12681274
def _check_canonical_rel(file: Path, www_root: Path):
12691275
# Check for a canonical relation link in the HTML.
12701276
# If one exists, ensure that the target exists
12711277
# or otherwise remove the canonical link element.
1272-
prefix = b'<link rel="canonical" href="https://docs.python.org/'
1273-
suffix = b'" />'
1274-
pfx_len = len(prefix)
1275-
sfx_len = len(suffix)
12761278
html = file.read_bytes()
1277-
try:
1278-
start = html.index(prefix)
1279-
end = html.index(suffix, start + pfx_len)
1280-
except ValueError:
1279+
canonical = _canonical_re.search(html)
1280+
if canonical is None:
12811281
return None
1282-
target = html[start + pfx_len : end].decode(errors="surrogateescape")
1282+
target = canonical[1].decode(encoding="UTF-8", errors="surrogateescape")
12831283
if (www_root / target).exists():
12841284
return None
12851285
logging.info("Removing broken canonical from %s to %s", file, target)
1286-
file.write_bytes(html[:start] + html[end + sfx_len :])
1286+
start, end = canonical.span()
1287+
file.write_bytes(html[:start] + html[end:])
12871288
return file
12881289

12891290

0 commit comments

Comments
 (0)