File tree 1 file changed +11
-10
lines changed
1 file changed +11
-10
lines changed Original file line number Diff line number Diff line change @@ -1265,25 +1265,26 @@ def proofread_canonicals(
1265
1265
purge (http , * paths_to_purge )
1266
1266
1267
1267
1268
+ # Python 3.12 onwards doesn't use self-closing tags for <link rel="canonical">
1269
+ _canonical_re = re .compile (
1270
+ b"""<link rel="canonical" href="https://docs.python.org/([^"]*)"(?: /)?>"""
1271
+ )
1272
+
1273
+
1268
1274
def _check_canonical_rel (file : Path , www_root : Path ):
1269
1275
# Check for a canonical relation link in the HTML.
1270
1276
# If one exists, ensure that the target exists
1271
1277
# or otherwise remove the canonical link element.
1272
- prefix = b'<link rel="canonical" href="https://docs.python.org/'
1273
- suffix = b'" />'
1274
- pfx_len = len (prefix )
1275
- sfx_len = len (suffix )
1276
1278
html = file .read_bytes ()
1277
- try :
1278
- start = html .index (prefix )
1279
- end = html .index (suffix , start + pfx_len )
1280
- except ValueError :
1279
+ canonical = _canonical_re .search (html )
1280
+ if canonical is None :
1281
1281
return None
1282
- target = html [ start + pfx_len : end ].decode (errors = "surrogateescape" )
1282
+ target = canonical [ 1 ].decode (encoding = "UTF-8" , errors = "surrogateescape" )
1283
1283
if (www_root / target ).exists ():
1284
1284
return None
1285
1285
logging .info ("Removing broken canonical from %s to %s" , file , target )
1286
- file .write_bytes (html [:start ] + html [end + sfx_len :])
1286
+ start , end = canonical .span ()
1287
+ file .write_bytes (html [:start ] + html [end :])
1287
1288
return file
1288
1289
1289
1290
You can’t perform that action at this time.
0 commit comments