Clean up regex flags

- MULTILINE is irrelevant unless ^ is used to match line starts - IGNORECASE is irrelevant when case-agnostic character classes are already being used (e.g. \w or \p{Letter}) or when only punctuation/spaces are being matched - DOTALL is irrelevant unless . is used to match all characters
apasel422 · Jul 15, 2024 · 8665308 · 8665308
1 parent a394344
commit 8665308
Show file tree

Hide file tree

Showing 8 changed files with 51 additions and 51 deletions.
diff --git a/se/commands/create_draft.py b/se/commands/create_draft.py
@@ -727,8 +727,8 @@ def _create_draft(args: Namespace, plain_output: bool):
 
 				producers_text = regex.sub(r".+?Produced by (.+?)\s*$", "\\1", producers_text, flags=regex.DOTALL)
 				producers_text = regex.sub(r"\(.+?\)", "", producers_text, flags=regex.DOTALL)
-				producers_text = regex.sub(r"(at )?https?://www\.pgdp\.net", "", producers_text, flags=regex.DOTALL)
-				producers_text = regex.sub(r"[\r\n]+", " ", producers_text, flags=regex.DOTALL)
+				producers_text = regex.sub(r"(at )?https?://www\.pgdp\.net", "", producers_text)
+				producers_text = regex.sub(r"[\r\n]+", " ", producers_text)
 				producers_text = regex.sub(r",? and ", ", and ", producers_text)
 				producers_text = producers_text.replace(" and the Online", " and The Online")
 				producers_text = producers_text.replace(", and ", ", ").strip()
@@ -945,7 +945,7 @@ def _create_draft(args: Namespace, plain_output: bool):
 
 				i = i + 1
 
-			metadata_xml = regex.sub(r"\t\t<dc:contributor id=\"transcriber-1\">TRANSCRIBER</dc:contributor>\s*<meta property=\"file-as\" refines=\"#transcriber-1\">TRANSCRIBER_SORT</meta>\s*<meta property=\"se:url.homepage\" refines=\"#transcriber-1\">TRANSCRIBER_URL</meta>\s*<meta property=\"role\" refines=\"#transcriber-1\" scheme=\"marc:relators\">trc</meta>", "\t\t" + producers_xhtml.strip(), metadata_xml, flags=regex.DOTALL)
+			metadata_xml = regex.sub(r"\t\t<dc:contributor id=\"transcriber-1\">TRANSCRIBER</dc:contributor>\s*<meta property=\"file-as\" refines=\"#transcriber-1\">TRANSCRIBER_SORT</meta>\s*<meta property=\"se:url\.homepage\" refines=\"#transcriber-1\">TRANSCRIBER_URL</meta>\s*<meta property=\"role\" refines=\"#transcriber-1\" scheme=\"marc:relators\">trc</meta>", "\t\t" + producers_xhtml.strip(), metadata_xml)
 
 		if ebook_wiki_url:
 			metadata_xml = metadata_xml.replace(">EBOOK_WIKI_URL<", f">{ebook_wiki_url}<")

diff --git a/se/commands/word_count.py b/se/commands/word_count.py
@@ -66,7 +66,7 @@ def word_count(plain_output: bool) -> int:
 
 					else:
 						# We couldn't generate a dom, fall back to regex replacements
-						xhtml = regex.sub(r"<(pre|div|p)[^>]*?>[^<]*Project Gutenberg[^<]+?</\1>", "", xhtml, flags=regex.IGNORECASE|regex.DOTALL)
+						xhtml = regex.sub(r"<(pre|div|p)[^>]*?>[^<]*Project Gutenberg[^<]+?</\1>", "", xhtml, flags=regex.IGNORECASE)
 						xhtml = regex.sub(r"<span class=\"pagenum\">.+?</span>", "", xhtml, flags=regex.IGNORECASE|regex.DOTALL)
 
 				total_word_count += se.formatting.get_word_count(xhtml)

diff --git a/se/formatting.py b/se/formatting.py
@@ -362,16 +362,16 @@ def get_word_count(xhtml: str) -> int:
 	xhtml = regex.sub(r"<.+?>", " ", xhtml, flags=regex.DOTALL)
 
 	# Replace some formatting characters
-	xhtml = regex.sub(r"[…–—― ‘’“”\{\}\(\)]", " ", xhtml, flags=regex.IGNORECASE | regex.DOTALL)
+	xhtml = regex.sub(r"[…–—― ‘’“”\{\}\(\)]", " ", xhtml)
 
 	# Remove word-connecting dashes, apostrophes, commas, and slashes (and/or), they count as a word boundry but they shouldn't
-	xhtml = regex.sub(fr"[\p{{Letter}}0-9][\-\'\,\.\/{se.NO_BREAK_HYPHEN}{se.SHY_HYPHEN}][\p{{Letter}}0-9]", "aa", xhtml, flags=regex.IGNORECASE | regex.DOTALL)
+	xhtml = regex.sub(fr"[\p{{Letter}}0-9][\-\'\,\.\/{se.NO_BREAK_HYPHEN}{se.SHY_HYPHEN}][\p{{Letter}}0-9]", "aa", xhtml)
 
 	# Replace sequential spaces with one space
-	xhtml = regex.sub(r"\s+", " ", xhtml, flags=regex.IGNORECASE | regex.DOTALL)
+	xhtml = regex.sub(r"\s+", " ", xhtml)
 
 	# Get the word count
-	return len(regex.findall(r"\b\w+\b", xhtml, flags=regex.IGNORECASE | regex.DOTALL))
+	return len(regex.findall(r"\b\w+\b", xhtml))
 
 def _replace_character_references(match_object) -> str:
 	"""Replace most XML character references with literal characters.
@@ -660,13 +660,13 @@ def format_xhtml(xhtml: str) -> str:
 	xhtml = regex.sub(r"&#?\w+;", _replace_character_references, xhtml)
 
 	# Remove unnecessary doctypes which can cause xmllint to hang
-	xhtml = regex.sub(r"<!DOCTYPE[^>]+?>", "", xhtml, flags=regex.DOTALL)
+	xhtml = regex.sub(r"<!DOCTYPE[^>]+?>", "", xhtml)
 
 	# Remove white space between opening/closing tag and text nodes
 	# We do this first so that we can still format line breaks after <br/>
 	# Exclude comments
 	xhtml = regex.sub(r"(<(?:[^!/][^>]*?[^/]|[a-z])>)\s+([^\s<])", r"\1\2", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(r"([^\s>])\s+(</[^>]+?>)", r"\1\2", xhtml, flags=regex.IGNORECASE)
+	xhtml = regex.sub(r"([^\s>])\s+(</[^>]+?>)", r"\1\2", xhtml)
 
 	try:
 		tree = _format_xml_str(xhtml)
@@ -1088,7 +1088,7 @@ def format_css(css: str) -> str:
 	output = regex.sub(r"(@[\p{Letter}]+) \(", "\\1(", output)
 
 	# Remove empty rules
-	output = regex.sub(r"^\t*[^\{\}]+?\{\s*\}\n", "", output, flags=regex.DOTALL|regex.MULTILINE)
+	output = regex.sub(r"^\t*[^\{\}]+?\{\s*\}\n", "", output, flags=regex.MULTILINE)
 
 	return output
 
@@ -1103,7 +1103,7 @@ def remove_tags(text: str) -> str:
 	A string with all HTML tags removed
 	"""
 
-	return regex.sub(r"</?[\p{Letter}]+[^>]*?>", "", text, flags=regex.DOTALL)
+	return regex.sub(r"</?[\p{Letter}]+[^>]*?>", "", text)
 
 def get_ordinal(number: str) -> str:
 	"""
@@ -1296,7 +1296,7 @@ def make_url_safe(text: str) -> str:
 	text = regex.sub(r"['‘’`]", "", text)
 
 	# 5. Convert any non-digit, non-letter character to a space
-	text = regex.sub(r"[^0-9\p{Letter}]", " ", text, flags=regex.IGNORECASE)
+	text = regex.sub(r"[^0-9\p{Letter}]", " ", text)
 
 	# 6. Convert any instance of one or more space to a dash
 	text = regex.sub(r"\s+", "-", text)

diff --git a/se/se_epub.py b/se/se_epub.py
@@ -699,7 +699,7 @@ def recompose(self, output_xhtml5: bool, extra_css_file: Union[Path,None] = None
 			output_xhtml = output_xhtml.replace("epub|type", "data-epub-type")
 			output_xhtml = output_xhtml.replace("xml|lang", "lang")
 			output_xhtml = regex.sub(r" xmlns.+?=\".+?\"", "", output_xhtml)
-			output_xhtml = regex.sub(r"@namespace (epub|xml).+?\s+", "", output_xhtml, flags=regex.MULTILINE)
+			output_xhtml = regex.sub(r"@namespace (epub|xml).+?\s+", "", output_xhtml)
 
 			# The Nu HTML5 Validator barfs if non-void elements are self-closed (like <td/>)
 			# Try to un-self-close them for HTML5 output.

diff --git a/se/se_epub_generate_toc.py b/se/se_epub_generate_toc.py
@@ -109,7 +109,7 @@ def toc_link(self) -> str:
 					out_string += f"<a href=\"text/{self.file_link}\">{self.title}</a>\n"
 
 		# Replace <br/> with a single space
-		out_string = regex.sub(r"<br/>\s*", " ", out_string, flags=regex.DOTALL)
+		out_string = regex.sub(r"<br/>\s*", " ", out_string)
 
 		return out_string
 

diff --git a/se/se_epub_lint.py b/se/se_epub_lint.py
@@ -1827,7 +1827,7 @@ def _lint_xhtml_syntax_checks(self, filename: Path, dom: se.easy_xml.EasyXmlTree
 		title = regex.sub(r"^[\s\.\,\!\?\:\;]*", "", title)
 
 		# Normalize whitespace
-		title = regex.sub(r"\s+", " ", title, flags=regex.DOTALL).strip()
+		title = regex.sub(r"\s+", " ", title).strip()
 
 		# Do we have a subtitle? If so the first letter of that must be capitalized, so we pull that out
 		subtitle_matches = regex.findall(r"(.*?)<span epub:type=\"subtitle\">(.*?)</span>(.*?)", title, flags=regex.DOTALL)
@@ -2313,7 +2313,7 @@ def _lint_xhtml_typography_checks(filename: Path, dom: se.easy_xml.EasyXmlTree,
 
 	# Check for repeated punctuation, but first remove `&amp;` so we don't match `&amp;,`
 	# Remove tds with repeated ” as they are probably ditto marks
-	matches = regex.findall(r"[,;]{2,}.{0,20}", file_contents.replace("&amp;", "")) + regex.findall(r"(?:“\s*“|”\s*”|’ ’|‘\s*‘).{0,20}", regex.sub(r"<td>[”\s]+?(<a .+?epub:type=\"noteref\">.+?</a>)?</td>", "", file_contents)) +	 regex.findall(r"[\p{Letter}][,\.:;]\s[,\.:;]\s?[\p{Letter}<].{0,20}", file_contents, flags=regex.IGNORECASE)
+	matches = regex.findall(r"[,;]{2,}.{0,20}", file_contents.replace("&amp;", "")) + regex.findall(r"(?:“\s*“|”\s*”|’ ’|‘\s*‘).{0,20}", regex.sub(r"<td>[”\s]+?(<a .+?epub:type=\"noteref\">.+?</a>)?</td>", "", file_contents)) +	 regex.findall(r"[\p{Letter}][,\.:;]\s[,\.:;]\s?[\p{Letter}<].{0,20}", file_contents)
 	if matches:
 		messages.append(LintMessage("t-008", "Repeated punctuation.", se.MESSAGE_TYPE_WARNING, filename, matches))
 
@@ -2608,7 +2608,7 @@ def _lint_xhtml_typography_checks(filename: Path, dom: se.easy_xml.EasyXmlTree,
 		messages.append(LintMessage("t-048", "Chapter opening text in all-caps.", se.MESSAGE_TYPE_ERROR, filename, [node.to_string() for node in nodes]))
 
 	# Check for two-em-dashes used for elision instead of three-em-dashes
-	matches = regex.findall(fr"[^{se.WORD_JOINER}\p{{Letter}}”]⸺[^“{se.WORD_JOINER}\p{{Letter}}].*", file_contents, flags=regex.MULTILINE)
+	matches = regex.findall(fr"[^{se.WORD_JOINER}\p{{Letter}}”]⸺[^“{se.WORD_JOINER}\p{{Letter}}].*", file_contents)
 	if matches:
 		messages.append(LintMessage("t-049", "Two-em-dash used for eliding an entire word. Use a three-em-dash instead.", se.MESSAGE_TYPE_WARNING, filename, matches))
 
@@ -2934,7 +2934,7 @@ def _lint_xhtml_typo_checks(filename: Path, dom: se.easy_xml.EasyXmlTree, file_c
 	# Exclude paragraphs in blockquotes, which may have special quoting rules, and "continued" paragraphs, which may be continued dialog without an “
 	for node in dom_copy.xpath("/html/body//p[not(ancestor::blockquote) and not(contains(@class, 'continued'))]"):
 		node.set_attr("id", "lint-" + str(node_number))
-		temp_xhtml = temp_xhtml + f"<p id=\"lint-{node_number}\">" + regex.sub(r"[\s\n]+", " ", node.inner_text(), flags=regex.DOTALL) + "\n"
+		temp_xhtml = temp_xhtml + f"<p id=\"lint-{node_number}\">" + regex.sub(r"\s+", " ", node.inner_text()) + "\n"
 		node_number = node_number + 1
 
 	replacement_count = 1
@@ -2943,12 +2943,12 @@ def _lint_xhtml_typo_checks(filename: Path, dom: se.easy_xml.EasyXmlTree, file_c
 		(temp_xhtml, replacement_count) = regex.subn(r"“[^“]+?”", " ", temp_xhtml) # Remove all regular quotes
 
 	# Remove contractions to reduce rsquo for next regex
-	temp_xhtml = regex.sub(r"[\p{Letter}]’[\p{Letter}]", " ", temp_xhtml, flags=regex.MULTILINE)
+	temp_xhtml = regex.sub(r"[\p{Letter}]’[\p{Letter}]", " ", temp_xhtml)
 
 	# Remove all runs of ldquo that are likely to spill to the next <p>
 	replacement_count = 1
 	while replacement_count > 0:
-		(temp_xhtml, replacement_count) = regex.subn(r"“[^“”]+?$", " ", temp_xhtml, flags=regex.MULTILINE)
+		(temp_xhtml, replacement_count) = regex.subn(r"“[^“”]+?$", " ", temp_xhtml)
 
 	# Match problem `‘` using regex, and if found, get the actual node text from the dom to return.
 	typos = []
@@ -3009,7 +3009,7 @@ def _lint_xhtml_typo_checks(filename: Path, dom: se.easy_xml.EasyXmlTree, file_c
 
 	# Check for closing rdquo without opening ldquo.
 	# Remove tds in case rdquo means "ditto mark"
-	typos = regex.findall(r"”[^“‘]+?”", regex.sub(r"<td[^>]*?>[”\s]+?(<a .+?epub:type=\"noteref\">.+?</a>)?</td>", "", file_contents), flags=regex.DOTALL)
+	typos = regex.findall(r"”[^“‘]+?”", regex.sub(r"<td[^>]*?>[”\s]+?(<a .+?epub:type=\"noteref\">.+?</a>)?</td>", "", file_contents))
 
 	# We create a filter to try to exclude nested quotations
 	# Remove tags in case they're enclosing punctuation we want to match against at the end of a sentence.

diff --git a/se/typography.py b/se/typography.py
@@ -103,18 +103,18 @@ def typogrify(xhtml: str, smart_quotes: bool = True) -> str:
 	xhtml = xhtml.replace("——", "⸺")
 
 	# Smartypants doesn't do well on em dashes followed by open quotes. Fix that here
-	xhtml = regex.sub(r"—”([\p{Letter}])", r"—“\1", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(r"—’([\p{Letter}])", r"—‘\1", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(r"-“</p>", r"—”</p>", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(r"‘”</p>", fr"’{se.HAIR_SPACE}”</p>", xhtml, flags=regex.IGNORECASE)
+	xhtml = regex.sub(r"—”([\p{Letter}])", r"—“\1", xhtml)
+	xhtml = regex.sub(r"—’([\p{Letter}])", r"—‘\1", xhtml)
+	xhtml = regex.sub(r"-“</p>", r"—”</p>", xhtml)
+	xhtml = regex.sub(r"‘”</p>", fr"’{se.HAIR_SPACE}”</p>", xhtml)
 
 	# Now that we've fixed Smartypants' output, put our quotes back in
 	xhtml = xhtml.replace("!#se:rsquo#!", "’")
 
 	# Remove spaces between en and em dashes
 	# Note that we match at least one character before the dashes, so that we don't catch start-of-line em dashes like in poetry.
 	# We do a negative lookbehind for <br/ to prevent newlines/indents after <br/>s from being included
-	xhtml = regex.sub(r"(?<!<br/)([^\.…\s])\s*([–—])\s*", r"\1\2", xhtml, flags=regex.DOTALL)
+	xhtml = regex.sub(r"(?<!<br/)([^\.…\s])\s*([–—])\s*", r"\1\2", xhtml)
 
 	# First, remove stray word joiners
 	xhtml = xhtml.replace(se.WORD_JOINER, "")
@@ -123,12 +123,12 @@ def typogrify(xhtml: str, smart_quotes: bool = True) -> str:
 	xhtml = xhtml.replace(se.SHY_HYPHEN, "")
 
 	# Fix some common em-dash transcription errors
-	xhtml = regex.sub(r"([:;])-([\p{Letter}])", r"\1—\2", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(r"([\p{Letter}])-“", r"\1—“", xhtml, flags=regex.IGNORECASE)
+	xhtml = regex.sub(r"([:;])-([\p{Letter}])", r"\1—\2", xhtml)
+	xhtml = regex.sub(r"([\p{Letter}])-“", r"\1—“", xhtml)
 	xhtml = regex.sub(r":-</", fr":{se.WORD_JOINER}—</", xhtml)
 
 	# Em dashes and two-em-dashes can be broken before, so add a word joiner between letters/punctuation and the following em dash
-	xhtml = regex.sub(fr"([^\s{se.WORD_JOINER}{se.NO_BREAK_SPACE}{se.HAIR_SPACE}])([—⸻])", fr"\1{se.WORD_JOINER}\2", xhtml, flags=regex.IGNORECASE)
+	xhtml = regex.sub(fr"([^\s{se.WORD_JOINER}{se.NO_BREAK_SPACE}{se.HAIR_SPACE}])([—⸻])", fr"\1{se.WORD_JOINER}\2", xhtml)
 
 	# Add en dashes; don't replace match that is within an html tag, since ids and attrs often contain the pattern DIGIT-DIGIT
 	xhtml = regex.sub(r"(?<!<[^>]*)([0-9]+)\-([0-9]+)", r"\1–\2", xhtml)
@@ -146,7 +146,7 @@ def typogrify(xhtml: str, smart_quotes: bool = True) -> str:
 	xhtml = regex.sub(fr"([\p{{Lowercase_Letter}}]){se.WORD_JOINER}—th\b", r"\1 —th", xhtml)
 
 	# Remove word joiners from following opening tags--they're usually never correct
-	xhtml = regex.sub(fr"<([\p{{Letter}}]+)([^>]*?)>{se.WORD_JOINER}", r"<\1\2>", xhtml, flags=regex.IGNORECASE)
+	xhtml = regex.sub(fr"<([\p{{Letter}}]+)([^>]*?)>{se.WORD_JOINER}", r"<\1\2>", xhtml)
 
 	# Add a word joiner after em dashes within <cite> elements
 	xhtml = regex.sub(r"<cite([^>]*?)>—", fr"<cite\1>—{se.WORD_JOINER}", xhtml)
@@ -213,7 +213,7 @@ def typogrify(xhtml: str, smart_quotes: bool = True) -> str:
 	xhtml = regex.sub(r"(\s)‘a’(\s)", r"\1’a’\2", xhtml, flags=regex.IGNORECASE)
 
 	# Years
-	xhtml = regex.sub(r"‘([0-9]{2,}[^\p{Letter}0-9’])", r"’\1", xhtml, flags=regex.IGNORECASE)
+	xhtml = regex.sub(r"‘([0-9]{2,}[^\p{Letter}0-9’])", r"’\1", xhtml)
 
 	xhtml = regex.sub(r"‘([Aa]ve|[Oo]me|[Ii]m|[Mm]idst|[Gg]ainst|[Nn]eath|[Ee]m|[Cc]os|[Tt]is|[Tt]isn’t|[Tt]was|[Tt]ain’t|[Tt]wixt|[Tt]were|[Tt]would|[Tt]wouldn|[Tt]won|[Tt]ween|[Tt]will|[Rr]ound|[Pp]on|[Uu]ns?|[Uu]d|[Cc]ept|[Oo]w|[Aa]ppen|[Ee])\b", r"’\1", xhtml)
 
@@ -245,34 +245,34 @@ def typogrify(xhtml: str, smart_quotes: bool = True) -> str:
 	xhtml = regex.sub(r"(?<!A\. )B\.\s+C\.", r"BC", xhtml)
 
 	# Put spacing next to close quotes
-	xhtml = regex.sub(fr"“[\s{se.NO_BREAK_SPACE}]*‘", fr"“{se.HAIR_SPACE}‘", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(fr"’[\s{se.NO_BREAK_SPACE}]*”", fr"’{se.HAIR_SPACE}”", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(fr"“[\s{se.NO_BREAK_SPACE}]*’", fr"“{se.HAIR_SPACE}’", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(fr"‘[\s{se.NO_BREAK_SPACE}]*“", fr"‘{se.HAIR_SPACE}“", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(fr"‘[\s{se.NO_BREAK_SPACE}]*’", fr"‘{se.HAIR_SPACE}’", xhtml, flags=regex.IGNORECASE)
+	xhtml = regex.sub(fr"“[\s{se.NO_BREAK_SPACE}]*‘", fr"“{se.HAIR_SPACE}‘", xhtml)
+	xhtml = regex.sub(fr"’[\s{se.NO_BREAK_SPACE}]*”", fr"’{se.HAIR_SPACE}”", xhtml)
+	xhtml = regex.sub(fr"“[\s{se.NO_BREAK_SPACE}]*’", fr"“{se.HAIR_SPACE}’", xhtml)
+	xhtml = regex.sub(fr"‘[\s{se.NO_BREAK_SPACE}]*“", fr"‘{se.HAIR_SPACE}“", xhtml)
+	xhtml = regex.sub(fr"‘[\s{se.NO_BREAK_SPACE}]*’", fr"‘{se.HAIR_SPACE}’", xhtml)
 
 	# We require a non-letter char at the end, otherwise we might match a contraction: “Hello,” ’e said.
-	xhtml = regex.sub(fr"”[\s{se.NO_BREAK_SPACE}]*’([^\p{{Letter}}])", fr"”{se.HAIR_SPACE}’\1", xhtml, flags=regex.IGNORECASE)
+	xhtml = regex.sub(fr"”[\s{se.NO_BREAK_SPACE}]*’([^\p{{Letter}}])", fr"”{se.HAIR_SPACE}’\1", xhtml)
 
 	# Fix ellipses spacing
-	xhtml = regex.sub(r"\s*\.\s*\.\s*\.\s*", r"…", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(fr"[\s{se.NO_BREAK_SPACE}]?…[\s{se.NO_BREAK_SPACE}]?\.", fr".{se.HAIR_SPACE}…", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(fr"[\s{se.NO_BREAK_SPACE}]?…[\s{se.NO_BREAK_SPACE}]?", fr"{se.HAIR_SPACE}… ", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(fr"<p([^>]*?)>{se.HAIR_SPACE}…", r"<p\1>…", xhtml, flags=regex.IGNORECASE)
+	xhtml = regex.sub(r"\s*\.\s*\.\s*\.\s*", r"…", xhtml)
+	xhtml = regex.sub(fr"[\s{se.NO_BREAK_SPACE}]?…[\s{se.NO_BREAK_SPACE}]?\.", fr".{se.HAIR_SPACE}…", xhtml)
+	xhtml = regex.sub(fr"[\s{se.NO_BREAK_SPACE}]?…[\s{se.NO_BREAK_SPACE}]?", fr"{se.HAIR_SPACE}… ", xhtml)
+	xhtml = regex.sub(fr"<p([^>]*?)>{se.HAIR_SPACE}…", r"<p\1>…", xhtml)
 
 	# Remove spaces between opening tags and ellipses
-	xhtml = regex.sub(fr"(<[\p{{Letter}}0-9]+[^<]+?>)[\s{se.NO_BREAK_SPACE}]+?…", r"\1…", xhtml, flags=regex.IGNORECASE)
+	xhtml = regex.sub(fr"(<[\p{{Letter}}0-9]+[^<]+?>)[\s{se.NO_BREAK_SPACE}]+?…", r"\1…", xhtml)
 
 	# Remove spaces between closing tags and ellipses
-	xhtml = regex.sub(fr"…[\s{se.NO_BREAK_SPACE}]?(</[\p{{Letter}}0-9]+>)", r"…\1", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(fr"…[\s{se.NO_BREAK_SPACE}]+([\)”’])(?![\p{{Letter}}])", r"…\1", xhtml, flags=regex.IGNORECASE) # If followed by a letter, the single quote is probably a leading elision
-	xhtml = regex.sub(fr"([\(“‘])[\s{se.NO_BREAK_SPACE}]+…", r"\1…", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(fr"…[\s{se.NO_BREAK_SPACE}]?([\!\?\.\;\,])", fr"…{se.HAIR_SPACE}\1", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(fr"([\!\?\.\;”’])[\s{se.NO_BREAK_SPACE}]?…", fr"\1{se.HAIR_SPACE}…", xhtml, flags=regex.IGNORECASE)
-	xhtml = regex.sub(fr"\,[\s{se.NO_BREAK_SPACE}]?…", fr",{se.HAIR_SPACE}…", xhtml, flags=regex.IGNORECASE)
+	xhtml = regex.sub(fr"…[\s{se.NO_BREAK_SPACE}]?(</[\p{{Letter}}0-9]+>)", r"…\1", xhtml)
+	xhtml = regex.sub(fr"…[\s{se.NO_BREAK_SPACE}]+([\)”’])(?![\p{{Letter}}])", r"…\1", xhtml) # If followed by a letter, the single quote is probably a leading elision
+	xhtml = regex.sub(fr"([\(“‘])[\s{se.NO_BREAK_SPACE}]+…", r"\1…", xhtml)
+	xhtml = regex.sub(fr"…[\s{se.NO_BREAK_SPACE}]?([\!\?\.\;\,])", fr"…{se.HAIR_SPACE}\1", xhtml)
+	xhtml = regex.sub(fr"([\!\?\.\;”’])[\s{se.NO_BREAK_SPACE}]?…", fr"\1{se.HAIR_SPACE}…", xhtml)
+	xhtml = regex.sub(fr"\,[\s{se.NO_BREAK_SPACE}]?…", fr",{se.HAIR_SPACE}…", xhtml)
 
 	# Add nbsp to ellipses that open dialog
-	xhtml = regex.sub(r"([“‘])…\s([\p{Letter}0-9])", fr"\1…{se.NO_BREAK_SPACE}\2", xhtml, flags=regex.IGNORECASE)
+	xhtml = regex.sub(r"([“‘])…\s([\p{Letter}0-9])", fr"\1…{se.NO_BREAK_SPACE}\2", xhtml)
 
 	# Don't use . ... if within a clause
 	xhtml = regex.sub(r"\.(\s…\s[\p{Lowercase_Letter}])", r"\1", xhtml)

diff --git a/se/vendor/kobo_touch_extended/kobo.py b/se/vendor/kobo_touch_extended/kobo.py
@@ -32,7 +32,7 @@ def append_kobo_spans_from_text(node, text):
 			return False
 		else:
 			# Split text in sentences
-			groups = regex.split(fr'(.*?[\.\!\?\:](?:{se.HAIR_SPACE}…)?[\'"\u201d\u2019]?(?:{se.HAIR_SPACE}\u201d)?\s*)', text, flags=regex.MULTILINE)
+			groups = regex.split(fr'(.*?[\.\!\?\:](?:{se.HAIR_SPACE}…)?[\'"\u201d\u2019]?(?:{se.HAIR_SPACE}\u201d)?\s*)', text)
 			# Remove empty strings resulting from split()
 			groups = [g for g in groups if g != ""]