diff --git a/pipelines/textcat_demo/scripts/convert.py b/pipelines/textcat_demo/scripts/convert.py index 79d50eedc..44f004d00 100644 --- a/pipelines/textcat_demo/scripts/convert.py +++ b/pipelines/textcat_demo/scripts/convert.py @@ -10,12 +10,11 @@ def convert(lang: str, input_path: Path, output_path: Path): nlp = spacy.blank(lang) - docs = [] + db = DocBin() for line in srsly.read_jsonl(input_path): doc = nlp.make_doc(line["text"]) doc.cats = line["cats"] - docs.append(doc) - db = DocBin(docs=docs) + db.add(doc) db.to_disk(output_path) diff --git a/pipelines/textcat_multilabel_demo/scripts/convert.py b/pipelines/textcat_multilabel_demo/scripts/convert.py index 79d50eedc..44f004d00 100644 --- a/pipelines/textcat_multilabel_demo/scripts/convert.py +++ b/pipelines/textcat_multilabel_demo/scripts/convert.py @@ -10,12 +10,11 @@ def convert(lang: str, input_path: Path, output_path: Path): nlp = spacy.blank(lang) - docs = [] + db = DocBin() for line in srsly.read_jsonl(input_path): doc = nlp.make_doc(line["text"]) doc.cats = line["cats"] - docs.append(doc) - db = DocBin(docs=docs) + db.add(doc) db.to_disk(output_path)