From bddb883efd90965f325a8c1f5be0131ad5b503ae Mon Sep 17 00:00:00 2001 From: Thomas Buhrmann Date: Sat, 11 May 2019 14:29:51 +0200 Subject: [PATCH] Allow passing of empty text strings (#10) * For compatibility with native Spacy language classes allow passing of empty text strings. This will produce 0-length docs, rather than raising an exception. * Increment minor version. --- spacy_stanfordnlp/about.py | 2 +- spacy_stanfordnlp/language.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/spacy_stanfordnlp/about.py b/spacy_stanfordnlp/about.py index e1e6e3c..fb55564 100644 --- a/spacy_stanfordnlp/about.py +++ b/spacy_stanfordnlp/about.py @@ -1,5 +1,5 @@ __title__ = "spacy-stanfordnlp" -__version__ = "0.1.0" +__version__ = "0.1.1" __summary__ = "Use the latest StanfordNLP research models directly in spaCy" __uri__ = "https://explosion.ai" __author__ = "Ines Montani" diff --git a/spacy_stanfordnlp/language.py b/spacy_stanfordnlp/language.py index 735177a..b53eb82 100644 --- a/spacy_stanfordnlp/language.py +++ b/spacy_stanfordnlp/language.py @@ -6,6 +6,7 @@ from stanfordnlp.models.common.vocab import UNK_ID from stanfordnlp.models.common.pretrain import Pretrain +from stanfordnlp.pipeline.doc import Document import numpy import re @@ -130,11 +131,12 @@ def __call__(self, text): text (unicode): The text to process. RETURNS (spacy.tokens.Doc): The spaCy Doc object. """ - snlp_doc = self.snlp(text) + snlp_doc = self.snlp(text) if text else Document("") text = snlp_doc.text tokens, heads = self.get_tokens_with_heads(snlp_doc) if not len(tokens): - raise ValueError("No tokens available.") + return Doc(self.vocab) + words = [] spaces = [] pos = []