diff --git a/se/se_epub_lint.py b/se/se_epub_lint.py index 658fe2a7..89b66c8e 100644 --- a/se/se_epub_lint.py +++ b/se/se_epub_lint.py @@ -1517,13 +1517,16 @@ def _lint_special_file_checks(self, filename: Path, dom: se.easy_xml.EasyXmlTree loi_text_matches_figure = False for child in figure.xpath("./img|./figcaption"): figure_text = "" + loi_text_to_compare = loi_text if child.tag == "img": figure_text = child.get_attr("alt") + # Replace/remove characters that don't appear in alt attributes. + loi_text_to_compare = loi_text_to_compare.replace(se.NO_BREAK_SPACE, ' ').replace(se.WORD_JOINER, '') elif child.tag == "figcaption": # Replace tabs and newlines with a single space to better match figcaptions that contain
figure_text = regex.sub(r"[ \n\t]+", " ", child.inner_text()) - if loi_text == figure_text: + if loi_text_to_compare == figure_text: loi_text_matches_figure = True break diff --git a/tests/lint/semantic/s-041/golden/s-041-out.txt b/tests/lint/semantic/s-041/golden/s-041-out.txt index ee7259d8..2c156325 100644 --- a/tests/lint/semantic/s-041/golden/s-041-out.txt +++ b/tests/lint/semantic/s-041/golden/s-041-out.txt @@ -1,9 +1,12 @@ s-004 [Error] chapter-1.xhtml `img` element missing `alt` attribute. + s-041 [Manual Review] chapter-1.xhtml The text in `#f-5`'s LoI entry does not match either its `
` element or its `` `alt` attribute. s-041 [Manual Review] chapter-1.xhtml The text in `#f-6`'s LoI entry does not match either its `
` element or its `` `alt` attribute. s-041 [Manual Review] chapter-1.xhtml The text in `#f-7`'s LoI entry does not match either its `
` element or its `` `alt` attribute. +s-041 [Manual Review] chapter-1.xhtml The text in `#f-9`'s LoI entry does not +match either its `
` element or its `` `alt` attribute. diff --git a/tests/lint/semantic/s-041/in/src/epub/text/chapter-1.xhtml b/tests/lint/semantic/s-041/in/src/epub/text/chapter-1.xhtml index b0d6e099..05501a11 100644 --- a/tests/lint/semantic/s-041/in/src/epub/text/chapter-1.xhtml +++ b/tests/lint/semantic/s-041/in/src/epub/text/chapter-1.xhtml @@ -16,6 +16,9 @@
t-6-cap
t-7-alt.
t-7-cap
+ +
Mr. Smith 2–3 years ago.
+
Mr. Smith 2–3 years ago.
diff --git a/tests/lint/semantic/s-041/in/src/epub/text/loi.xhtml b/tests/lint/semantic/s-041/in/src/epub/text/loi.xhtml index a8703ed8..5403a169 100644 --- a/tests/lint/semantic/s-041/in/src/epub/text/loi.xhtml +++ b/tests/lint/semantic/s-041/in/src/epub/text/loi.xhtml @@ -17,6 +17,12 @@
  • x

  • x

  • x

  • + + +
  • Mr. Smith 2⁠–⁠3 years ago.

  • + + +
  • Mr. Smith 2⁠–⁠3 years ago.