Skip to content

Commit

Permalink
Add 'strip_patterns' function
Browse files Browse the repository at this point in the history
  • Loading branch information
tgross35 committed Nov 25, 2023
1 parent e2d3211 commit 53ea69f
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 96 deletions.
2 changes: 1 addition & 1 deletion zspell/src/affix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

mod node;
mod parse;
mod rule;
mod parse_rule;
mod types;

use std::collections::BTreeMap;
Expand Down
4 changes: 2 additions & 2 deletions zspell/src/affix/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ use std::sync::Arc;

use lazy_static::lazy_static;
pub use node::AffixNode;
pub use parse_rule::{ParsedRule, ParsedRuleGroup};
use regex::Regex;
pub use rule::{ParsedRule, ParsedRuleGroup};

use super::{node, rule};
use super::{node, parse_rule};
use crate::affix::{
CompoundPattern, CompoundSyllable, Conversion, Encoding, FlagType, Phonetic, RuleType,
};
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion zspell/src/dict/meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ pub enum Source {
/// The full rule that created this
rule: Arc<AfxRule>,
/// Index of the relevant pattern within the rule. This could potentially be a reference
/// but that might require a RefCell, and I don't want to risk reference
/// but that might require a RefCell, and I don't want to risk reference cycles.
pat_idx: usize,
},
/// This meta came from a .dic file, only contains morphinfo
Expand Down
12 changes: 10 additions & 2 deletions zspell/src/dict/rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,14 @@ impl AfxRule {
}

/// Do the opposite of [`apply_patterns`], try to strip this pattern from a word
pub fn strip_patterns(&self, word: &str) {
todo!()
pub fn strip_patterns<'a>(
&'a self,
word: &'a str,
) -> impl Iterator<Item = (usize, Cow<'a, str>)> + 'a {
self.patterns
.iter()
.enumerate()
.filter_map(|(idx, pat)| pat.strip_pattern(word, self.kind).map(|s| (idx, s)))
}

Check warning on line 103 in zspell/src/dict/rule.rs

View check run for this annotation

Codecov / codecov/patch

zspell/src/dict/rule.rs#L95-L103

Added lines #L95 - L103 were not covered by tests

pub(crate) fn patterns(&self) -> &[AfxRulePattern] {
Expand Down Expand Up @@ -207,6 +213,8 @@ impl AfxRulePattern {
};

if let Some(re) = &self.condition {
// FIXME: we probably want to change this to return `None` if it turns out
// these patterns come up
debug_assert!(
re.is_match(ret.as_ref()),
"created word {ret} does not match {re:?}!"

Check warning on line 220 in zspell/src/dict/rule.rs

View check run for this annotation

Codecov / codecov/patch

zspell/src/dict/rule.rs#L220

Added line #L220 was not covered by tests
Expand Down
73 changes: 0 additions & 73 deletions zspell/src/dict/rules_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,76 +208,3 @@ mod tests {
// FIXME: do something with these
}
}

// TODO: evaluate this for hyphenation
// mod peek_map {
// use std::iter::Peekable;
// use unicode_segmentation::UnicodeSegmentation;

// pub struct PeekMap<I: Iterator, F>(Peekable<I>, F);

// pub fn peek_map<R, I: Iterator, F: FnMut(I::Item, Option<&I::Item>) -> R>(
// it: Peekable<I>,
// f: F,
// ) -> PeekMap<I, F> {
// PeekMap(it, f)
// }

// impl<R, I: Iterator, F: FnMut(I::Item, Option<&I::Item>) -> R> Iterator for PeekMap<I, F> {
// type Item = R;
// fn next(&mut self) -> Option<R> {
// let x = self.0.next()?;
// Some((self.1)(x, self.0.peek()))
// }
// }

// #[cfg(test)]
// mod tests {
// use super::*;

// #[test]
// fn test_x() {
// let s = "the quick brown. Fox Jum-ped -- where? 'over' (the) very-lazy dog";

// enum HyphenState {
// None,
// AwaitingHyphen(usize),
// AwaitingWord(usize)
// }

// let mut accum = HyphenState::None;

// let v: Vec<_> = peek_map(s.split_word_bound_indices().peekable(),
// |(idx, w), next|{

// let c1 = w.chars().next().unwrap();
// if !(c1.is_alphanumeric() || c1 == '-') {
// accum = HyphenState::None;
// return None;
// }

// if let Some((nidx, nw)) = next {
// // If our next item is a hyphen, start accumulating
// if nw == "-" {
// accum = HyphenState::AwaitingHyphen(idx);
// return None;
// }
// }
// match accum {
// HyphenState::None => {
// // No upcoming hyphen? Just return our value
// Some((idx, w))
// },
// HyphenState::AwaitingHyphen(_) => {

// },
// HyphenState::AwaitingWord(_) => todo!(),
// }
// }
// ).collect();

// dbg!(v);

// }
// }
// }
34 changes: 17 additions & 17 deletions zspell/src/dict/tests_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,33 @@ use crate::affix::RuleType::{self, Prefix, Suffix};
#[test]
fn test_check_condition() {
let mut kind = RuleType::Suffix;
let mut rule = AfxRulePattern::default();
rule.set_pattern("[^aeiou]y", kind).unwrap();
let mut rule_pat = AfxRulePattern::default();
rule_pat.set_pattern("[^aeiou]y", kind).unwrap();

// General tests, including with pattern in the middle
assert!(rule.check_condition("xxxy"));
assert!(!rule.check_condition("xxxay"));
assert!(!rule.check_condition("xxxyxx"));
assert!(rule_pat.check_condition("xxxy"));
assert!(!rule_pat.check_condition("xxxay"));
assert!(!rule_pat.check_condition("xxxyxx"));

// Test with prefix
kind = RuleType::Prefix;
rule.set_pattern("y[^aeiou]", kind).unwrap();
assert!(rule.check_condition("yxxx"));
assert!(!rule.check_condition("yaxxx"));
assert!(!rule.check_condition("xxxyxxx"));
rule_pat.set_pattern("y[^aeiou]", kind).unwrap();
assert!(rule_pat.check_condition("yxxx"));
assert!(!rule_pat.check_condition("yaxxx"));
assert!(!rule_pat.check_condition("xxxyxxx"));

// Test other real rules
kind = RuleType::Suffix;
rule.set_pattern("[sxzh]", kind).unwrap();
assert!(rule.check_condition("access"));
assert!(rule.check_condition("abyss"));
assert!(!rule.check_condition("accomplishment"));
assert!(rule.check_condition("mmms"));
assert!(!rule.check_condition("mmsmm"));
rule_pat.set_pattern("[sxzh]", kind).unwrap();
assert!(rule_pat.check_condition("access"));
assert!(rule_pat.check_condition("abyss"));
assert!(!rule_pat.check_condition("accomplishment"));
assert!(rule_pat.check_condition("mmms"));
assert!(!rule_pat.check_condition("mmsmm"));

// Check with default condition
rule.set_pattern(".", kind).unwrap();
assert!(rule.check_condition("xxx"));
rule_pat.set_pattern(".", kind).unwrap();
assert!(rule_pat.check_condition("xxx"));
}

// affix, strip, condition, kind, input, output
Expand Down

0 comments on commit 53ea69f

Please sign in to comment.