diff --git a/se/se_epub_lint.py b/se/se_epub_lint.py
index ad126b80..5db15a9c 100644
--- a/se/se_epub_lint.py
+++ b/se/se_epub_lint.py
@@ -289,7 +289,7 @@
"s-038", "Illegal asterism. Section/scene breaks must be defined by an [xhtml]
[/] element."
"s-039", "[text]Ibid[/] in endnotes. “Ibid” means “The previous reference” which is meaningless with popup endnotes"
"s-040", f"[attr]#{figure_ref}[/] not found in file [path][link=file://{self.path / 'src/epub/text' / chapter_ref}]{chapter_ref}[/][/]."
-"s-041", f"The [xhtml][/] element of [attr]#{figure_ref}[/] does not match the text in its LoI entry."
+"s-041", f"The text in [attr]#{figure_ref}[/]'s LoI entry does not match either its [xhtml][/] element or its [xhtml][/] [attr]alt[/] attribute."
"s-042", "[xhtml]
[/] element without [xhtml][/] child."
"s-043", "[val]se:short-story[/] semantic on element that is not [xhtml][/]."
"s-044", "Element with poem or verse semantic, without descendant [xhtml]
[/] (stanza) element."
@@ -1504,8 +1504,6 @@ def _lint_special_file_checks(self, filename: Path, dom: se.easy_xml.EasyXmlTree
for node in dom.xpath("/html/body/nav[contains(@epub:type, 'loi')]//li//a"):
figure_ref = node.get_attr("href").split("#")[1]
chapter_ref = regex.findall(r"(.*?)#.*", node.get_attr("href"))[0]
- figure_img_alt = ""
- figcaption_text = ""
loi_text = node.inner_text()
file_dom = self.get_dom(self.content_path / "text" / chapter_ref)
@@ -1515,19 +1513,21 @@ def _lint_special_file_checks(self, filename: Path, dom: se.easy_xml.EasyXmlTree
messages.append(LintMessage("s-040", f"[attr]#{figure_ref}[/] not found in file [path][link=file://{self.path / 'src/epub/text' / chapter_ref}]{chapter_ref}[/][/].", se.MESSAGE_TYPE_ERROR, filename))
continue
- for child in figure.xpath("./*"):
+ loi_text_matches_figure = False
+ for child in figure.xpath("./img|./figcaption"):
+ figure_text = ""
if child.tag == "img":
- figure_img_alt = child.get_attr("alt")
-
- if child.tag == "figcaption":
- figcaption_text = child.inner_text()
-
+ figure_text = child.get_attr("alt")
+ elif child.tag == "figcaption":
# Replace tabs and newlines with a single space to better match figcaptions that contain
- figcaption_text = regex.sub(r"(\n|\t)", " ", figcaption_text)
- figcaption_text = regex.sub(r"[ ]+", " ", figcaption_text)
+ figure_text = regex.sub(r"[ \n\t]+", " ", child.inner_text())
+
+ if loi_text == figure_text:
+ loi_text_matches_figure = True
+ break
- if (figcaption_text != "" and loi_text != "" and figcaption_text != loi_text) and (figure_img_alt != "" and loi_text != "" and figure_img_alt != loi_text):
- messages.append(LintMessage("s-041", f"The [xhtml][/] element of [attr]#{figure_ref}[/] does not match the text in its LoI entry.", se.MESSAGE_TYPE_WARNING, self.path / "src/epub/text" / chapter_ref))
+ if not loi_text_matches_figure:
+ messages.append(LintMessage("s-041", f"The text in [attr]#{figure_ref}[/]'s LoI entry does not match either its [xhtml][/] element or its [xhtml][/] [attr]alt[/] attribute.", se.MESSAGE_TYPE_WARNING, self.path / "src/epub/text" / chapter_ref))
return messages
@@ -2445,7 +2445,9 @@ def _lint_xhtml_typography_checks(filename: Path, dom: se.easy_xml.EasyXmlTree,
img_alt_not_typogrified = []
img_alt_lacking_punctuation = []
for node in nodes:
- if "titlepage.svg" not in node.get_attr("src"):
+ img_src = node.lxml_element.get("src")
+ # Avoid crashing if the src attribute is missing
+ if img_src and "titlepage.svg" not in img_src:
ebook_flags["has_images"] = True # Save for a later check
alt = node.get_attr("alt")
@@ -2460,7 +2462,6 @@ def _lint_xhtml_typography_checks(filename: Path, dom: se.easy_xml.EasyXmlTree,
img_alt_lacking_punctuation.append(node.to_tag_string())
# Check that alt attributes match SVG titles
- img_src = node.lxml_element.get("src")
if img_src and img_src.endswith("svg"):
title_text = ""
image_ref = img_src.split("/").pop()
diff --git a/tests/lint/semantic/s-041/golden/s-041-out.txt b/tests/lint/semantic/s-041/golden/s-041-out.txt
new file mode 100644
index 00000000..ee7259d8
--- /dev/null
+++ b/tests/lint/semantic/s-041/golden/s-041-out.txt
@@ -0,0 +1,9 @@
+s-004 [Error] chapter-1.xhtml `img` element missing `alt` attribute.
+
+
+s-041 [Manual Review] chapter-1.xhtml The text in `#f-5`'s LoI entry does not
+match either its `` element or its `` `alt` attribute.
+s-041 [Manual Review] chapter-1.xhtml The text in `#f-6`'s LoI entry does not
+match either its `` element or its `` `alt` attribute.
+s-041 [Manual Review] chapter-1.xhtml The text in `#f-7`'s LoI entry does not
+match either its `` element or its `` `alt` attribute.
diff --git a/tests/lint/semantic/s-041/in/src/epub/content.opf b/tests/lint/semantic/s-041/in/src/epub/content.opf
new file mode 100644
index 00000000..0af098ae
--- /dev/null
+++ b/tests/lint/semantic/s-041/in/src/epub/content.opf
@@ -0,0 +1,93 @@
+
+
+
+ url:https://standardebooks.org/ebooks/jane-austen/unknown-novel/john-doe
+ 1900-01-01T00:00:00Z
+ 1900-01-01T00:00:00Z
+ The source text and artwork in this ebook are believed to be in the United States public domain; that is, they are believed to be free of copyright restrictions in the United States. They may still be copyrighted in other countries, so users located outside of the United States must check their local laws before using this ebook. The creators of, and contributors to, this ebook dedicate their contributions to the worldwide public domain via the terms in the [CC0 1.0 Universal Public Domain Dedication](https://creativecommons.org/publicdomain/zero/1.0/).
+ Standard Ebooks
+ Standard Ebooks
+ https://standardebooks.org
+ bkd
+ mdc
+ pbl
+ The League of Moveable Type
+ League of Moveable Type, The
+ https://www.theleagueofmoveabletype.com
+ tyd
+
+ Standard Ebooks
+ textual
+ textual
+ readingOrder
+ structuralNavigation
+ tableOfContents
+ unlocked
+ none
+ This publication conforms to WCAG 2.2 Level AA.
+
+ Unknown Novel
+ Unknown Novel
+ England--Social life and customs--19th century--Fiction
+ Sisters -- Fiction
+ LCSH
+ sh2008114941
+ LCSH
+ sh2008111400
+ Fiction
+ A short test novel for lint testing.
+
+ <p>A short test novel for lint testing.</p>
+
+ en-GB
+ https://www.gutenberg.org/ebooks/161
+ https://archive.org/details/bub_gb_RtT0OLKFMHsC
+ WORD_COUNT
+ READING_EASE
+ https://en.wikipedia.org/wiki/Unknown_Jane_Austen_Novel
+ https://github.com/standardebooks/jane-austen_unknown-novel_john-doe
+ Jane Austen
+ Austen, Jane
+ https://en.wikipedia.org/wiki/Jane_Austen
+ http://id.loc.gov/authorities/names/n79032879
+ aut
+ Georg Friedrich Kersting
+ Kersting, Georg Friedrich
+ https://en.wikipedia.org/wiki/Georg_Friedrich_Kersting
+ http://id.loc.gov/authorities/names/n83319941
+ art
+ Anonymous
+ Anonymous
+ trc
+ John Doe
+ Doe, John
+ bkp
+ blw
+ cov
+ ill
+ mrk
+ pfr
+ tyg
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/lint/semantic/s-041/in/src/epub/text/chapter-1.xhtml b/tests/lint/semantic/s-041/in/src/epub/text/chapter-1.xhtml
new file mode 100644
index 00000000..b0d6e099
--- /dev/null
+++ b/tests/lint/semantic/s-041/in/src/epub/text/chapter-1.xhtml
@@ -0,0 +1,21 @@
+
+
+
+ I
+
+
+
+
Unknown Novel
+ was published in 1810 by
+ Jane Austen.
+
This ebook was produced for
+ Standard Ebooks
+ by
+ An Anonymous Volunteer,
+ and is based on a transcription produced in 2010 by
+ An Anonymous Volunteer
+ for
+ Project Gutenberg
+ and is based on digital scans from the
+ Internet Archive.
+
The cover page is adapted from
+ At the Mirror,
+ a painting completed in 1827 by
+ Georg Friedrich Kersting.
+ The cover and title pages feature the
+ League Spartan and Sorts Mill Goudy
+ typefaces created in 2014 and 2009 by
+ The League of Moveable Type.
+
The first edition of this ebook was released on
+ January 1, 1900, 12:00 a.m.
+ You can check for updates to this ebook, view its revision history, or download it for different ereading systems at
+ standardebooks.org/ebooks/jane-austen/unknown-novel/john-doe.
+
The volunteer-driven Standard Ebooks project relies on readers like you to submit typos, corrections, and other improvements. Anyone can contribute at standardebooks.org.
+
+
+
diff --git a/tests/lint/semantic/s-041/in/src/epub/text/loi.xhtml b/tests/lint/semantic/s-041/in/src/epub/text/loi.xhtml
new file mode 100644
index 00000000..a8703ed8
--- /dev/null
+++ b/tests/lint/semantic/s-041/in/src/epub/text/loi.xhtml
@@ -0,0 +1,23 @@
+
+
+
+ List of Illustrations
+
+
+
+
+