Skip to content

Commit

Permalink
More efficient DocBin creation in conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianeboyd committed Mar 5, 2021
1 parent 88d1628 commit ef00ec7
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 6 deletions.
5 changes: 2 additions & 3 deletions pipelines/textcat_demo/scripts/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@

def convert(lang: str, input_path: Path, output_path: Path):
nlp = spacy.blank(lang)
docs = []
db = DocBin()
for line in srsly.read_jsonl(input_path):
doc = nlp.make_doc(line["text"])
doc.cats = line["cats"]
docs.append(doc)
db = DocBin(docs=docs)
db.add(doc)
db.to_disk(output_path)


Expand Down
5 changes: 2 additions & 3 deletions pipelines/textcat_multilabel_demo/scripts/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@

def convert(lang: str, input_path: Path, output_path: Path):
nlp = spacy.blank(lang)
docs = []
db = DocBin()
for line in srsly.read_jsonl(input_path):
doc = nlp.make_doc(line["text"])
doc.cats = line["cats"]
docs.append(doc)
db = DocBin(docs=docs)
db.add(doc)
db.to_disk(output_path)


Expand Down

0 comments on commit ef00ec7

Please sign in to comment.