Skip to content

Commit 5139b5d

Browse files
committed
number filtering added
1 parent 7922ea2 commit 5139b5d

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

stemmer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ def lemmatize(input,multiList=False,cascade=True):
413413
for word in lst:
414414
stemmed.append(lemmatizationEngine(word))
415415

416-
stemmed = list(set(stemmed))
416+
#stemmed = list(set(stemmed))
417417
return stemmed
418418

419419

stopwordremover.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,17 @@
2525

2626
def remove_stop_word(input,multiList=False):
2727
if isinstance(input,str):
28-
filtered = [word for word in normalizer.normalize(input) if (word not in stopwords['english'] and not unicode(word).isnumeric())]
28+
filtered = [word for word in normalizer.normalize(input) if (word not in stopwords['english'] and not word.isdigit())]
2929
return filtered
3030

3131
if not isinstance(input, basestring):
3232
if multiList == True:
3333
for index,lst in input:
34-
input[index] = [word for word in normalizer.normalize(lst) (word not in stopwords['english'] and not unicode(word).isnumeric())]
34+
input[index] = [word for word in normalizer.normalize(lst) (word not in stopwords['english'] and not word.isdigit())]
3535
return input
3636

3737
else :
38-
input = [word for word in normalizer.normalize(input) (word not in stopwords['english'] and not unicode(word).isnumeric())]
38+
input = [word for word in normalizer.normalize(input) (word not in stopwords['english'] and not word.isdigit())]
3939
return input
4040

4141

0 commit comments

Comments
 (0)