diff --git a/zspell/src/affix.rs b/zspell/src/affix.rs index 21e2ea2..18efdaa 100644 --- a/zspell/src/affix.rs +++ b/zspell/src/affix.rs @@ -2,7 +2,7 @@ mod node; mod parse; -mod rule; +mod parse_rule; mod types; use std::collections::BTreeMap; diff --git a/zspell/src/affix/parse.rs b/zspell/src/affix/parse.rs index cd8d9d9..d590de3 100644 --- a/zspell/src/affix/parse.rs +++ b/zspell/src/affix/parse.rs @@ -8,10 +8,10 @@ use std::sync::Arc; use lazy_static::lazy_static; pub use node::AffixNode; +pub use parse_rule::{ParsedRule, ParsedRuleGroup}; use regex::Regex; -pub use rule::{ParsedRule, ParsedRuleGroup}; -use super::{node, rule}; +use super::{node, parse_rule}; use crate::affix::{ CompoundPattern, CompoundSyllable, Conversion, Encoding, FlagType, Phonetic, RuleType, }; diff --git a/zspell/src/affix/rule.rs b/zspell/src/affix/parse_rule.rs similarity index 100% rename from zspell/src/affix/rule.rs rename to zspell/src/affix/parse_rule.rs diff --git a/zspell/src/dict/meta.rs b/zspell/src/dict/meta.rs index 88a0f32..4d7ae0b 100644 --- a/zspell/src/dict/meta.rs +++ b/zspell/src/dict/meta.rs @@ -55,7 +55,7 @@ pub enum Source { /// The full rule that created this rule: Arc, /// Index of the relevant pattern within the rule. This could potentially be a reference - /// but that might require a RefCell, and I don't want to risk reference + /// but that might require a RefCell, and I don't want to risk reference cycles. pat_idx: usize, }, /// This meta came from a .dic file, only contains morphinfo diff --git a/zspell/src/dict/rule.rs b/zspell/src/dict/rule.rs index fad6a59..b5d6776 100644 --- a/zspell/src/dict/rule.rs +++ b/zspell/src/dict/rule.rs @@ -92,8 +92,14 @@ impl AfxRule { } /// Do the opposite of [`apply_patterns`], try to strip this pattern from a word - pub fn strip_patterns(&self, word: &str) { - todo!() + pub fn strip_patterns<'a>( + &'a self, + word: &'a str, + ) -> impl Iterator)> + 'a { + self.patterns + .iter() + .enumerate() + .filter_map(|(idx, pat)| pat.strip_pattern(word, self.kind).map(|s| (idx, s))) } pub(crate) fn patterns(&self) -> &[AfxRulePattern] { @@ -190,23 +196,23 @@ impl AfxRulePattern { let Some(base) = dbg!(word.strip_prefix(self.affix.as_ref())) else { return None; }; - match &self.strip { - Some(add_back) => Cow::Owned(format!("{add_back}{base}")), - None => Cow::Borrowed(base), - } + self.strip.as_ref().map_or(Cow::Borrowed(base), |add_back| { + Cow::Owned(format!("{add_back}{base}")) + }) } RuleType::Suffix => { let Some(base) = dbg!(word.strip_suffix(self.affix.as_ref())) else { return None; }; - match &self.strip { - Some(add_back) => Cow::Owned(format!("{base}{add_back}")), - None => Cow::Borrowed(base), - } + self.strip.as_ref().map_or(Cow::Borrowed(base), |add_back| { + Cow::Owned(format!("{base}{add_back}")) + }) } }; if let Some(re) = &self.condition { + // FIXME: we probably want to change this to return `None` if it turns out + // these patterns come up debug_assert!( re.is_match(ret.as_ref()), "created word {ret} does not match {re:?}!" diff --git a/zspell/src/dict/rules_apply.rs b/zspell/src/dict/rules_apply.rs index ed8abb8..cd5217f 100644 --- a/zspell/src/dict/rules_apply.rs +++ b/zspell/src/dict/rules_apply.rs @@ -208,76 +208,3 @@ mod tests { // FIXME: do something with these } } - -// TODO: evaluate this for hyphenation -// mod peek_map { -// use std::iter::Peekable; -// use unicode_segmentation::UnicodeSegmentation; - -// pub struct PeekMap(Peekable, F); - -// pub fn peek_map) -> R>( -// it: Peekable, -// f: F, -// ) -> PeekMap { -// PeekMap(it, f) -// } - -// impl) -> R> Iterator for PeekMap { -// type Item = R; -// fn next(&mut self) -> Option { -// let x = self.0.next()?; -// Some((self.1)(x, self.0.peek())) -// } -// } - -// #[cfg(test)] -// mod tests { -// use super::*; - -// #[test] -// fn test_x() { -// let s = "the quick brown. Fox Jum-ped -- where? 'over' (the) very-lazy dog"; - -// enum HyphenState { -// None, -// AwaitingHyphen(usize), -// AwaitingWord(usize) -// } - -// let mut accum = HyphenState::None; - -// let v: Vec<_> = peek_map(s.split_word_bound_indices().peekable(), -// |(idx, w), next|{ - -// let c1 = w.chars().next().unwrap(); -// if !(c1.is_alphanumeric() || c1 == '-') { -// accum = HyphenState::None; -// return None; -// } - -// if let Some((nidx, nw)) = next { -// // If our next item is a hyphen, start accumulating -// if nw == "-" { -// accum = HyphenState::AwaitingHyphen(idx); -// return None; -// } -// } -// match accum { -// HyphenState::None => { -// // No upcoming hyphen? Just return our value -// Some((idx, w)) -// }, -// HyphenState::AwaitingHyphen(_) => { - -// }, -// HyphenState::AwaitingWord(_) => todo!(), -// } -// } -// ).collect(); - -// dbg!(v); - -// } -// } -// } diff --git a/zspell/src/dict/tests_rule.rs b/zspell/src/dict/tests_rule.rs index de27afa..ec8a81a 100644 --- a/zspell/src/dict/tests_rule.rs +++ b/zspell/src/dict/tests_rule.rs @@ -4,33 +4,33 @@ use crate::affix::RuleType::{self, Prefix, Suffix}; #[test] fn test_check_condition() { let mut kind = RuleType::Suffix; - let mut rule = AfxRulePattern::default(); - rule.set_pattern("[^aeiou]y", kind).unwrap(); + let mut rule_pat = AfxRulePattern::default(); + rule_pat.set_pattern("[^aeiou]y", kind).unwrap(); // General tests, including with pattern in the middle - assert!(rule.check_condition("xxxy")); - assert!(!rule.check_condition("xxxay")); - assert!(!rule.check_condition("xxxyxx")); + assert!(rule_pat.check_condition("xxxy")); + assert!(!rule_pat.check_condition("xxxay")); + assert!(!rule_pat.check_condition("xxxyxx")); // Test with prefix kind = RuleType::Prefix; - rule.set_pattern("y[^aeiou]", kind).unwrap(); - assert!(rule.check_condition("yxxx")); - assert!(!rule.check_condition("yaxxx")); - assert!(!rule.check_condition("xxxyxxx")); + rule_pat.set_pattern("y[^aeiou]", kind).unwrap(); + assert!(rule_pat.check_condition("yxxx")); + assert!(!rule_pat.check_condition("yaxxx")); + assert!(!rule_pat.check_condition("xxxyxxx")); // Test other real rules kind = RuleType::Suffix; - rule.set_pattern("[sxzh]", kind).unwrap(); - assert!(rule.check_condition("access")); - assert!(rule.check_condition("abyss")); - assert!(!rule.check_condition("accomplishment")); - assert!(rule.check_condition("mmms")); - assert!(!rule.check_condition("mmsmm")); + rule_pat.set_pattern("[sxzh]", kind).unwrap(); + assert!(rule_pat.check_condition("access")); + assert!(rule_pat.check_condition("abyss")); + assert!(!rule_pat.check_condition("accomplishment")); + assert!(rule_pat.check_condition("mmms")); + assert!(!rule_pat.check_condition("mmsmm")); // Check with default condition - rule.set_pattern(".", kind).unwrap(); - assert!(rule.check_condition("xxx")); + rule_pat.set_pattern(".", kind).unwrap(); + assert!(rule_pat.check_condition("xxx")); } // affix, strip, condition, kind, input, output