Skip to content
This repository has been archived by the owner on Mar 25, 2024. It is now read-only.

Commit

Permalink
space out lexeme ends for known broken markup in latexml 0.8.4
Browse files Browse the repository at this point in the history
  • Loading branch information
dginev committed Sep 29, 2019
1 parent 31de976 commit 5106d91
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion src/dnm/node.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
use lazy_static::lazy_static;
use libxml::readonly::RoNode;
use libxml::xpath::Context;
use regex::Regex;
lazy_static! {
static ref LEXEME_END_MARKER : Regex = Regex::new(r"((?:OPFUNCTION|OPERATOR|OPEN|CLOSE|UNKNOWN|RELOP|ADDOP|MULOP|ARROW|BIGOP|BINOP|ID|OVERACCENT|UNDERACCENT):end)").unwrap();
}

/// Map math nodes to their lexemes
pub fn lexematize_math(node: RoNode, context: &mut Context) -> String {
Expand All @@ -20,7 +25,8 @@ pub fn lexematize_math(node: RoNode, context: &mut Context) -> String {
let mut annotation_string = anno.get_content();
// offer fix for latexml 0.8.4 serialization flaw in some cases (e.g. "POSTFIX:endID:end"
// instead of "POSTFIX:end ID:end")
annotation_string = annotation_string
annotation_string = LEXEME_END_MARKER
.replace_all(&annotation_string, " $1 ")
.split(":end")
.collect::<Vec<&str>>()
.join(":end ");
Expand Down

0 comments on commit 5106d91

Please sign in to comment.