diff --git a/sesame/preprocess.py b/sesame/preprocess.py index a868626..bcbb549 100644 --- a/sesame/preprocess.py +++ b/sesame/preprocess.py @@ -76,7 +76,7 @@ def write_to_conll(outf, fsp, firstex, sentid): token, postag, nltkpostag, nltklemma, lu, frm, role = fsp.info_at_idx(i) outf.write(str(i + 1) + "\t") # ID = 0 - outf.write(str(token.encode('utf-8')) + "\t") # FORM = 1 + outf.write(str(token) + "\t") # FORM = 1 outf.write("_\t" + nltklemma + "\t") # LEMMA PLEMMA = 2,3 outf.write(postag + "\t" + nltkpostag + "\t") # POS PPOS = 4,5 outf.write(str(sentid - 1) + "\t_\t") # FEAT PFEAT = 6,7 ~ replacing FEAT with sentence number