Skip to content

Commit e7ed39d

Browse files
NarsilAnantha Kandrapu
and
Anantha Kandrapu
authored
Fixing NormalizedString append when normalized is empty. (#1717)
Co-authored-by: Anantha Kandrapu <[email protected]>
1 parent 0ff2ab0 commit e7ed39d

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

tokenizers/src/tokenizer/normalizer.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,9 @@ impl NormalizedString {
517517
if let Some((b, prev)) = self.normalized.char_indices().last() {
518518
let transformations = std::iter::once((prev, 0)).chain(s.chars().map(|c| (c, 1)));
519519
self.transform_range(Range::Normalized(b..), transformations, 0);
520+
} else {
521+
let transformations = s.chars().map(|c| (c, 1));
522+
self.transform_range(Range::Normalized(..), transformations, 0);
520523
}
521524
self
522525
}
@@ -2284,4 +2287,24 @@ mod tests {
22842287
s.lowercase();
22852288
assert_eq!(s.get(), "a...");
22862289
}
2290+
2291+
#[test]
2292+
fn test_append_after_clear() {
2293+
let mut n = NormalizedString::from("Hello");
2294+
assert_eq!(n.get(), "Hello");
2295+
2296+
n.clear();
2297+
assert_eq!(n.get(), "");
2298+
2299+
n.append(" World");
2300+
assert_eq!(n.get(), " World");
2301+
2302+
assert_eq!(n.len_original(), 5);
2303+
assert_eq!(n.len(), 6);
2304+
2305+
assert_eq!(n.get_range_original(Range::Original(0..5)), Some("Hello"));
2306+
assert_eq!(n.get_range_original(Range::Normalized(0..6)), Some(""));
2307+
2308+
assert_eq!(n.get_range(Range::Normalized(0..6)), Some(" World"));
2309+
}
22872310
}

0 commit comments

Comments
 (0)