Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for lazy word generation #92

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion zspell/src/affix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

mod node;
mod parse;
mod rule;
mod parse_rule;
mod types;

use std::collections::BTreeMap;
Expand Down
4 changes: 2 additions & 2 deletions zspell/src/affix/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ use std::sync::Arc;

use lazy_static::lazy_static;
pub use node::AffixNode;
pub use parse_rule::{ParsedRule, ParsedRuleGroup};
use regex::Regex;
pub use rule::{ParsedRule, ParsedRuleGroup};

use super::{node, rule};
use super::{node, parse_rule};
use crate::affix::{
CompoundPattern, CompoundSyllable, Conversion, Encoding, FlagType, Phonetic, RuleType,
};
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion zspell/src/dict/meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ pub enum Source {
/// The full rule that created this
rule: Arc<AfxRule>,
/// Index of the relevant pattern within the rule. This could potentially be a reference
/// but that might require a RefCell, and I don't want to risk reference
/// but that might require a RefCell, and I don't want to risk reference cycles.
pat_idx: usize,
},
/// This meta came from a .dic file, only contains morphinfo
Expand Down
50 changes: 49 additions & 1 deletion zspell/src/dict/rule.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Implementation for a stored rule

use std::borrow::Cow;
use std::hash::Hash;
use std::sync::Arc;

Expand Down Expand Up @@ -90,6 +91,17 @@
.filter_map(|(idx, pat)| pat.apply_pattern(stem, self.kind).map(|s| (idx, s)))
}

/// Do the opposite of [`apply_patterns`], try to strip this pattern from a word
pub fn strip_patterns<'a>(
&'a self,
word: &'a str,
) -> impl Iterator<Item = (usize, Cow<'a, str>)> + 'a {
self.patterns
.iter()
.enumerate()
.filter_map(|(idx, pat)| pat.strip_pattern(word, self.kind).map(|s| (idx, s)))
}

Check warning on line 103 in zspell/src/dict/rule.rs

View check run for this annotation

Codecov / codecov/patch

zspell/src/dict/rule.rs#L95-L103

Added lines #L95 - L103 were not covered by tests

pub(crate) fn patterns(&self) -> &[AfxRulePattern] {
&self.patterns
}
Expand All @@ -98,6 +110,7 @@
/// A single affix rule application
#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
pub struct AfxRulePattern {
/// The prefix or suffix to be added
affix: Box<str>,
/// Condition to be met to apply this rule.
condition: Option<ReWrapper>,
Expand Down Expand Up @@ -139,7 +152,7 @@
&self.morph_info
}

// Verify the match condition and apply this rule
/// Verify the match condition and apply this rule
#[allow(clippy::option_if_let_else)]
fn apply_pattern(&self, s: &str, kind: RuleType) -> Option<String> {
// No return if condition doesn't match
Expand Down Expand Up @@ -173,6 +186,41 @@
}
}
}

/// Remove this pattern
// FIXME:PERF: maybe keep a `String` in the parent stack, take a `&mut` reference to return?
fn strip_pattern<'a>(&self, word: &'a str, kind: RuleType) -> Option<Cow<'a, str>> {
dbg!(word, kind);
let ret = match kind {
RuleType::Prefix => {
let Some(base) = dbg!(word.strip_prefix(self.affix.as_ref())) else {
return None;

Check warning on line 197 in zspell/src/dict/rule.rs

View check run for this annotation

Codecov / codecov/patch

zspell/src/dict/rule.rs#L197

Added line #L197 was not covered by tests
};
self.strip.as_ref().map_or(Cow::Borrowed(base), |add_back| {
Cow::Owned(format!("{add_back}{base}"))
})
}
RuleType::Suffix => {
let Some(base) = dbg!(word.strip_suffix(self.affix.as_ref())) else {
return None;

Check warning on line 205 in zspell/src/dict/rule.rs

View check run for this annotation

Codecov / codecov/patch

zspell/src/dict/rule.rs#L205

Added line #L205 was not covered by tests
};
self.strip.as_ref().map_or(Cow::Borrowed(base), |add_back| {
Cow::Owned(format!("{base}{add_back}"))
})
}
};

if let Some(re) = &self.condition {
// FIXME: we probably want to change this to return `None` if it turns out
// these patterns come up
debug_assert!(
re.is_match(ret.as_ref()),
"created word {ret} does not match {re:?}!"

Check warning on line 218 in zspell/src/dict/rule.rs

View check run for this annotation

Codecov / codecov/patch

zspell/src/dict/rule.rs#L218

Added line #L218 was not covered by tests
);
}

Some(ret)
}
}

#[cfg(test)]
Expand Down
73 changes: 0 additions & 73 deletions zspell/src/dict/rules_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,76 +208,3 @@ mod tests {
// FIXME: do something with these
}
}

// TODO: evaluate this for hyphenation
// mod peek_map {
// use std::iter::Peekable;
// use unicode_segmentation::UnicodeSegmentation;

// pub struct PeekMap<I: Iterator, F>(Peekable<I>, F);

// pub fn peek_map<R, I: Iterator, F: FnMut(I::Item, Option<&I::Item>) -> R>(
// it: Peekable<I>,
// f: F,
// ) -> PeekMap<I, F> {
// PeekMap(it, f)
// }

// impl<R, I: Iterator, F: FnMut(I::Item, Option<&I::Item>) -> R> Iterator for PeekMap<I, F> {
// type Item = R;
// fn next(&mut self) -> Option<R> {
// let x = self.0.next()?;
// Some((self.1)(x, self.0.peek()))
// }
// }

// #[cfg(test)]
// mod tests {
// use super::*;

// #[test]
// fn test_x() {
// let s = "the quick brown. Fox Jum-ped -- where? 'over' (the) very-lazy dog";

// enum HyphenState {
// None,
// AwaitingHyphen(usize),
// AwaitingWord(usize)
// }

// let mut accum = HyphenState::None;

// let v: Vec<_> = peek_map(s.split_word_bound_indices().peekable(),
// |(idx, w), next|{

// let c1 = w.chars().next().unwrap();
// if !(c1.is_alphanumeric() || c1 == '-') {
// accum = HyphenState::None;
// return None;
// }

// if let Some((nidx, nw)) = next {
// // If our next item is a hyphen, start accumulating
// if nw == "-" {
// accum = HyphenState::AwaitingHyphen(idx);
// return None;
// }
// }
// match accum {
// HyphenState::None => {
// // No upcoming hyphen? Just return our value
// Some((idx, w))
// },
// HyphenState::AwaitingHyphen(_) => {

// },
// HyphenState::AwaitingWord(_) => todo!(),
// }
// }
// ).collect();

// dbg!(v);

// }
// }
// }
84 changes: 56 additions & 28 deletions zspell/src/dict/tests_rule.rs
Original file line number Diff line number Diff line change
@@ -1,53 +1,81 @@
use super::*;
use crate::affix::RuleType;
use crate::affix::RuleType::{self, Prefix, Suffix};

#[test]
fn test_check_condition() {
let mut kind = RuleType::Suffix;
let mut rule = AfxRulePattern::default();
rule.set_pattern("[^aeiou]y", kind).unwrap();
let mut rule_pat = AfxRulePattern::default();
rule_pat.set_pattern("[^aeiou]y", kind).unwrap();

// General tests, including with pattern in the middle
assert!(rule.check_condition("xxxy"));
assert!(!rule.check_condition("xxxay"));
assert!(!rule.check_condition("xxxyxx"));
assert!(rule_pat.check_condition("xxxy"));
assert!(!rule_pat.check_condition("xxxay"));
assert!(!rule_pat.check_condition("xxxyxx"));

// Test with prefix
kind = RuleType::Prefix;
rule.set_pattern("y[^aeiou]", kind).unwrap();
assert!(rule.check_condition("yxxx"));
assert!(!rule.check_condition("yaxxx"));
assert!(!rule.check_condition("xxxyxxx"));
rule_pat.set_pattern("y[^aeiou]", kind).unwrap();
assert!(rule_pat.check_condition("yxxx"));
assert!(!rule_pat.check_condition("yaxxx"));
assert!(!rule_pat.check_condition("xxxyxxx"));

// Test other real rules
kind = RuleType::Suffix;
rule.set_pattern("[sxzh]", kind).unwrap();
assert!(rule.check_condition("access"));
assert!(rule.check_condition("abyss"));
assert!(!rule.check_condition("accomplishment"));
assert!(rule.check_condition("mmms"));
assert!(!rule.check_condition("mmsmm"));
rule_pat.set_pattern("[sxzh]", kind).unwrap();
assert!(rule_pat.check_condition("access"));
assert!(rule_pat.check_condition("abyss"));
assert!(!rule_pat.check_condition("accomplishment"));
assert!(rule_pat.check_condition("mmms"));
assert!(!rule_pat.check_condition("mmsmm"));

// Check with default condition
rule.set_pattern(".", kind).unwrap();
assert!(rule.check_condition("xxx"));
rule_pat.set_pattern(".", kind).unwrap();
assert!(rule_pat.check_condition("xxx"));
}

// affix, strip, condition, kind, input, output
type TestRulePattern = (
&'static str,
Option<&'static str>,
&'static str,
RuleType,
&'static str,
&'static str,
);
const RULE_PATTERNS: &[TestRulePattern] = &[
("zzz", Some("y"), "[^aeiou]y", Suffix, "xxxy", "xxxzzz"),
("zzz", Some("y"), "y[^aeiou]", Prefix, "yxxx", "zzzxxx"),
("zzz", None, ".", Suffix, "xxx", "xxxzzz"),
];

#[test]
fn test_apply_pattern() {
let mut kind = RuleType::Suffix;
let mut rule = AfxRulePattern::new("zzz", Some("y"));
for rule_pat in RULE_PATTERNS {
let (afx, strip, cond, kind, input, output) = rule_pat;
let mut rule = AfxRulePattern::new(afx, *strip);
rule.set_pattern(cond, *kind).unwrap();

rule.set_pattern("[^aeiou]y", kind).unwrap();
assert_eq!(rule.apply_pattern("xxxy", kind), Some("xxxzzz".to_owned()));
assert_eq!(
rule.apply_pattern(input, *kind),
Some((*output).into()),
"testing {rule_pat:?}"

Check warning on line 61 in zspell/src/dict/tests_rule.rs

View check run for this annotation

Codecov / codecov/patch

zspell/src/dict/tests_rule.rs#L61

Added line #L61 was not covered by tests
);
}
}

kind = RuleType::Prefix;
rule.set_pattern("y[^aeiou]", kind).unwrap();
assert_eq!(rule.apply_pattern("yxxx", kind), Some("zzzxxx".to_owned()));
#[test]
fn test_strip_pattern() {
for rule_pat in RULE_PATTERNS {
let (afx, strip, cond, kind, input, output) = rule_pat;
let mut rule = AfxRulePattern::new(afx, *strip);
rule.set_pattern(cond, *kind).unwrap();

kind = RuleType::Suffix;
rule.set_pattern(".", kind).unwrap();
assert_eq!(rule.apply_pattern("xxx", kind), Some("xxxzzz".to_owned()));
assert_eq!(
rule.strip_pattern(output, *kind),
Some((*input).into()),
"testing {rule_pat:?}"

Check warning on line 76 in zspell/src/dict/tests_rule.rs

View check run for this annotation

Codecov / codecov/patch

zspell/src/dict/tests_rule.rs#L76

Added line #L76 was not covered by tests
);
}
}

// #[test]
Expand Down