Skip to content

Commit 2605807

Browse files
committed
Add.csv support & example training data
1 parent cb9baf1 commit 2605807

File tree

6 files changed

+5621
-30
lines changed

6 files changed

+5621
-30
lines changed

Diff for: .gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,5 @@ nosetests.xml
3939
phone.txt
4040
email.txt
4141
test.txt
42+
test.py
43+
spam_keywords.txt

Diff for: bayes.db

443 KB
Binary file not shown.

Diff for: bayes.py

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def register_mode(mode_class):
1717
register_mode(Status)
1818

1919
args = sys.argv
20+
print(args)
2021
usage = 'Usage: %s %s <mode specific args>' % (args[0], '|'.join(modes.keys()))
2122

2223
if (len(args) < 2):

Diff for: learn.py

+43-30
Original file line numberDiff line numberDiff line change
@@ -2,41 +2,54 @@
22
from mode import Mode
33
from words import list_to_dict
44
from words import text_to_list
5+
import csv
6+
import codecs
57

68
class Learn(Mode):
7-
def validate(self, args):
8-
valid_args = False
9-
usage = 'Usage: %s learn <doc type> <file> <count>' % args[0]
9+
ext=""
10+
def validate(self, args):
11+
valid_args = False
12+
usage = 'Usage: %s learn <doc type> <file> <count>' % args[0]
1013

11-
if len(args) == 5:
12-
doc_type = args[2]
13-
14-
file_contents = None
15-
try:
16-
file_contents = open(args[3], 'r').read()
17-
except Exception as e:
18-
raise ValueError(usage + '\nUnable to read specified file "%s", the error message was: %s' % (args[3], e))
14+
if len(args) == 5:
15+
doc_type = args[2]
16+
17+
file_contents = None
18+
try:
19+
ext = (args[3])[-3:]
20+
if (ext=="csv"):
21+
f_open = codecs.open('args[3]','r',encoding='utf-8',errors='ignore')
22+
text = list(csv.reader(f_open, delimiter=','))
23+
file_contents = ""
24+
for sen in text:
25+
file_contents = file_contents + (sen[1])
26+
if (ext=="txt"):
27+
file_contents = open(args[3], 'r').read()
28+
except Exception as e:
29+
raise ValueError(usage + '\nUnable to read specified file "%s", the error message was: %s' % (args[3], e))
1930

20-
count = 0
21-
try:
22-
count = int(args[4])
23-
except:
24-
raise ValueError(usage + '\nEnter an integer value for the "count" parameter')
31+
count = 0
32+
try:
33+
count = int(args[4])
34+
except:
35+
raise ValueError(usage + '\nEnter an integer value for the "count" parameter')
2536

26-
self.file_contents = file_contents
27-
self.count = count
28-
self.doc_type = doc_type
37+
self.file_contents = file_contents
38+
self.count = count
39+
self.doc_type = doc_type
40+
if (ext=="csv"):
41+
f_open.close()
2942

30-
else:
31-
raise ValueError(usage)
43+
else:
44+
raise ValueError(usage)
3245

33-
def execute(self):
34-
db = Db()
35-
l = text_to_list(self.file_contents)
36-
d = list_to_dict(l)
37-
db.update_word_counts(d, self.doc_type)
38-
db.update_doctype_count(self.count, self.doc_type)
39-
return self.count
46+
def execute(self):
47+
db = Db()
48+
l = text_to_list(self.file_contents)
49+
d = list_to_dict(l)
50+
db.update_word_counts(d, self.doc_type)
51+
db.update_doctype_count(self.count, self.doc_type)
52+
return self.count
4053

41-
def output(self, _):
42-
print("Processed %s documents of type '%s'" % (self.count, self.doc_type))
54+
def output(self, _):
55+
print("Processed %s documents of type '%s'" % (self.count, self.doc_type))

0 commit comments

Comments
 (0)