Skip to content

File "stringsource", line 15, in string.from_py.__pyx_convert_string_from_py_std__in_string TypeError: expected bytes, tuple found #108

Open
@arunespku

Description

@arunespku

I am getting following error while testing the py CRf suite.

   crftrainer.append(xseq,  yseq)
  File "pycrfsuite/_pycrfsuite.pyx", line 312, in pycrfsuite._pycrfsuite.BaseTrainer.append
  File "stringsource", line 48, in vector.from_py.__pyx_convert_vector_from_py_std_3a__3a_string
  File "pycrfsuite/_pycrfsuite.pyx", line 53, in pycrfsuite._pycrfsuite.to_item
  File "stringsource", line 15, in string.from_py.__pyx_convert_string_from_py_std__in_string
TypeError: expected bytes, tuple found

This is the code I am using It can be used for replicating the error.
train_sentlist = [ BOS I would like to meet Jack EOS]

[def features(sentence, index):
print( sentence, "\t", index)
k = {
'word': sentence[index],
'is_first': index == 0,
'is_last': index == len(sentence) - 1,
'is_capitalized': sentence[index][0].upper() == sentence[index][0],
'is_all_caps': sentence[index].upper() == sentence[index],
'is_all_lower': sentence[index].lower() == sentence[index],
'prefix-1': sentence[index][0],
'prefix-2': sentence[index][:2],
'prefix-3': sentence[index][:3],
'suffix-1': sentence[index][-1],
'suffix-2': sentence[index][-2:],
'suffix-3': sentence[index][-3:],
'prev_word': '' if index == 0 else sentence[index - 1],
'next_word': '' if index == len(sentence) - 1 else sentence[index + 1]

}

return k

for sent in train_sentlist:
sent.remove("EOS")
sent.remove("BOS")

Train_list.append(sent)

X=[]
for tagged in Train_list:
X.append([features(tagged, index) for index in range(len(tagged))])

crftrainer = pycrfsuite.Trainer(verbose=False)

for xseq, yseq in zip(X, t_nerlist):

crftrainer.append(xseq,  yseq)
crftrainer.set_params({
    'c1': 1.0,  # coefficient for L1 penalty
    'c2': 1e-3,  # coefficient for L2 penalty
    'max_iterations': 200,  # stop earlier
    'feature.possible_transitions': True
})

crftrainer.train("model/nermodel.crfsuite")](url)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions