Skip to content

Commit

Permalink
fix incorrect index calculation in hashmap
Browse files Browse the repository at this point in the history
  • Loading branch information
maxbachmann committed Nov 29, 2023
1 parent 2b8ec1c commit 0f40286
Show file tree
Hide file tree
Showing 10 changed files with 70 additions and 5 deletions.
11 changes: 9 additions & 2 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
Changelog
---------

[0.3.1] - 2023-11-29
^^^^^^^^^^^^^^^^^^^^
Fixed
~~~~~
* fixed crash inside hashmap lookup function leading to a crash in the
Damerau-Levenshtein implementation

[0.3.0] - 2023-11-27
^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^
Previous versions only existed for testing purposed years ago. This is a complete
rewrite porting a subset of the features provided in the C++ implementation of
rapidfuzz. The remaining features will be added in later releases.

Added
~~~~~~~
~~~~~
* added implementations of the following string metrics:

* Levenshtein distance
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
description="rapid fuzzy string matching library"
name = "rapidfuzz"
version = "0.3.0"
version = "0.3.1"
authors = ["maxbachmann <[email protected]>"]
edition = "2021"
readme = "Readme.md"
Expand Down
4 changes: 2 additions & 2 deletions src/details/growing_hashmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ where
/// strategy to `CPython` and `Ruby`
fn lookup(&self, key: u64) -> usize {
let hash = key;
let mut i = (hash & self.mask as u64) as usize;
let mut i = hash as usize & self.mask as usize;

let map = self
.map
Expand All @@ -113,7 +113,7 @@ where

let mut perturb = key;
loop {
i = (i * 5 + perturb as usize + 1) % 128;
i = (i * 5 + perturb as usize + 1) & self.mask as usize;

if map[i].value == Default::default() || map[i].key == key {
return i;
Expand Down
8 changes: 8 additions & 0 deletions src/distance/damerau_levenshtein.rs
Original file line number Diff line number Diff line change
Expand Up @@ -628,4 +628,12 @@ mod tests {
0.0001
);
}

#[test]
fn unicode() {
assert_eq!(
Some(5),
_test_distance("Иванко".chars(), "Петрунко".chars(), None, None)
);
}
}
8 changes: 8 additions & 0 deletions src/distance/indel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -790,4 +790,12 @@ mod tests {

assert_eq!(Some(231), test_distance_ascii(s1, s2, None, None));
}

#[test]
fn unicode() {
assert_eq!(
Some(8),
test_distance("Иванко".chars(), "Петрунко".chars(), None, None)
);
}
}
9 changes: 9 additions & 0 deletions src/distance/jaro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1111,4 +1111,13 @@ mod tests {
}
}
}

#[test]
fn unicode() {
assert_delta!(
Some(0.375),
_test_distance("Иванко".chars(), "Петрунко".chars(), None, None),
0.0001
);
}
}
9 changes: 9 additions & 0 deletions src/distance/jaro_winkler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -715,4 +715,13 @@ mod tests {
}
}
}

#[test]
fn unicode() {
assert_delta!(
Some(0.375),
_test_distance("Иванко".chars(), "Петрунко".chars(), None, None),
0.0001
);
}
}
8 changes: 8 additions & 0 deletions src/distance/lcs_seq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1208,4 +1208,12 @@ mod tests {
let b = "220";
assert_eq!(Some(1), test_similarity_ascii(a, b, None, None));
}

#[test]
fn unicode() {
assert_eq!(
Some(5),
test_distance("Иванко".chars(), "Петрунко".chars(), None, None)
);
}
}
8 changes: 8 additions & 0 deletions src/distance/levenshtein.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2115,4 +2115,12 @@ mod tests {
distance(OCR_EXAMPLE1.iter(), OCR_EXAMPLE2.iter(), None, None, 0)
);
}

#[test]
fn unicode() {
assert_eq!(
Some(5),
_test_distance("Иванко".chars(), "Петрунко".chars(), None, None, None)
);
}
}
8 changes: 8 additions & 0 deletions src/distance/osa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -606,4 +606,12 @@ mod tests {
let s2 = "b".to_string() + &filler + "AC" + &filler + "b";
assert_eq!(Some(3), _test_distance_ascii(&s1, &s2, None, None));
}

#[test]
fn unicode() {
assert_eq!(
Some(5),
_test_distance("Иванко".chars(), "Петрунко".chars(), None, None)
);
}
}

0 comments on commit 0f40286

Please sign in to comment.