Skip to content

Commit

Permalink
Skip stemming strings with non-ascii chars
Browse files Browse the repository at this point in the history
  • Loading branch information
wltsmrz committed Nov 16, 2020
1 parent d4ff3ad commit d6e39c0
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 4 deletions.
6 changes: 3 additions & 3 deletions src/stemmer.carp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
(register to-lower (Fn [&String] String) "String_to_lower_")

(defn trim-from [x j]
(String.slice x 0 (+ (utf8-length x) j)))
(String.slice x 0 (+ (String.length x) j)))

(defn replace-from [x j r]
(String.append &(trim-from x j) r))
Expand Down Expand Up @@ -219,7 +219,7 @@
x))

(defn stem [x]
(cond (< (String.length x) 3)
(cond (or (< (String.length x) 3) (not (= (String.length x) (utf8-length x))))
@x
(=> x
(to-lower)
Expand All @@ -234,5 +234,5 @@
)))

(defn stem-cstr [x]
(cstr &(stem &(String.from-cstr x))))
(String.cstr &(stem &(String.from-cstr x))))
)
1 change: 0 additions & 1 deletion src/string_add.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,3 @@ String String_to_lower_(const String* p) {
while (i--) *(r + i) = tolower(*(r + i));
return r;
}

0 comments on commit d6e39c0

Please sign in to comment.