From 5106d9133bb30586fab1fca3f7b0dcf407f33d95 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Sat, 28 Sep 2019 19:16:44 -0400 Subject: [PATCH] space out lexeme ends for known broken markup in latexml 0.8.4 --- src/dnm/node.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/dnm/node.rs b/src/dnm/node.rs index 501fe5aeb0..32c071406d 100644 --- a/src/dnm/node.rs +++ b/src/dnm/node.rs @@ -1,5 +1,10 @@ +use lazy_static::lazy_static; use libxml::readonly::RoNode; use libxml::xpath::Context; +use regex::Regex; +lazy_static! { + static ref LEXEME_END_MARKER : Regex = Regex::new(r"((?:OPFUNCTION|OPERATOR|OPEN|CLOSE|UNKNOWN|RELOP|ADDOP|MULOP|ARROW|BIGOP|BINOP|ID|OVERACCENT|UNDERACCENT):end)").unwrap(); +} /// Map math nodes to their lexemes pub fn lexematize_math(node: RoNode, context: &mut Context) -> String { @@ -20,7 +25,8 @@ pub fn lexematize_math(node: RoNode, context: &mut Context) -> String { let mut annotation_string = anno.get_content(); // offer fix for latexml 0.8.4 serialization flaw in some cases (e.g. "POSTFIX:endID:end" // instead of "POSTFIX:end ID:end") - annotation_string = annotation_string + annotation_string = LEXEME_END_MARKER + .replace_all(&annotation_string, " $1 ") .split(":end") .collect::>() .join(":end ");