number filtering added

psych0der · psych0der · commit 5139b5d34794 · 2013-11-28T22:21:43.000+05:30
diff --git a/stemmer.py b/stemmer.py
@@ -413,7 +413,7 @@ def lemmatize(input,multiList=False,cascade=True):
 		for word in lst:
 			stemmed.append(lemmatizationEngine(word))
 
-		stemmed = list(set(stemmed))
+		#stemmed = list(set(stemmed))
 		return stemmed
 
 
diff --git a/stopwordremover.py b/stopwordremover.py
@@ -25,17 +25,17 @@
 
 def remove_stop_word(input,multiList=False):
 	if isinstance(input,str):
-		filtered = [word for word in normalizer.normalize(input) if (word not in stopwords['english'] and not unicode(word).isnumeric())]
+		filtered = [word for word in normalizer.normalize(input) if (word not in stopwords['english'] and not word.isdigit())]
 		return filtered
 
 	if not isinstance(input, basestring):
 		if multiList == True:
 			for index,lst in input:
-				input[index] = [word for word in normalizer.normalize(lst) (word not in stopwords['english'] and not unicode(word).isnumeric())]
+				input[index] = [word for word in normalizer.normalize(lst) (word not in stopwords['english'] and not word.isdigit())]
 			return input
 
 		else :
-			input = [word for word in normalizer.normalize(input) (word not in stopwords['english'] and not unicode(word).isnumeric())]
+			input = [word for word in normalizer.normalize(input) (word not in stopwords['english'] and not word.isdigit())]
 			return input