Description
I am getting following error while testing the py CRf suite.
crftrainer.append(xseq, yseq)
File "pycrfsuite/_pycrfsuite.pyx", line 312, in pycrfsuite._pycrfsuite.BaseTrainer.append
File "stringsource", line 48, in vector.from_py.__pyx_convert_vector_from_py_std_3a__3a_string
File "pycrfsuite/_pycrfsuite.pyx", line 53, in pycrfsuite._pycrfsuite.to_item
File "stringsource", line 15, in string.from_py.__pyx_convert_string_from_py_std__in_string
TypeError: expected bytes, tuple found
This is the code I am using It can be used for replicating the error.
train_sentlist = [ BOS I would like to meet Jack EOS]
[def features(sentence, index):
print( sentence, "\t", index)
k = {
'word': sentence[index],
'is_first': index == 0,
'is_last': index == len(sentence) - 1,
'is_capitalized': sentence[index][0].upper() == sentence[index][0],
'is_all_caps': sentence[index].upper() == sentence[index],
'is_all_lower': sentence[index].lower() == sentence[index],
'prefix-1': sentence[index][0],
'prefix-2': sentence[index][:2],
'prefix-3': sentence[index][:3],
'suffix-1': sentence[index][-1],
'suffix-2': sentence[index][-2:],
'suffix-3': sentence[index][-3:],
'prev_word': '' if index == 0 else sentence[index - 1],
'next_word': '' if index == len(sentence) - 1 else sentence[index + 1]
}
return k
for sent in train_sentlist:
sent.remove("EOS")
sent.remove("BOS")
Train_list.append(sent)
X=[]
for tagged in Train_list:
X.append([features(tagged, index) for index in range(len(tagged))])
crftrainer = pycrfsuite.Trainer(verbose=False)
for xseq, yseq in zip(X, t_nerlist):
crftrainer.append(xseq, yseq)
crftrainer.set_params({
'c1': 1.0, # coefficient for L1 penalty
'c2': 1e-3, # coefficient for L2 penalty
'max_iterations': 200, # stop earlier
'feature.possible_transitions': True
})
crftrainer.train("model/nermodel.crfsuite")](url)