diff --git a/goose/extractors/title.py b/goose/extractors/title.py index 8104c52b..f253f5b8 100644 --- a/goose/extractors/title.py +++ b/goose/extractors/title.py @@ -51,6 +51,9 @@ def clean_title(self, title): # my wonderfull article | TechCrunch title_words = title.split() + if not title_words: + return str() + # check if first letter is in TITLE_SPLITTERS # if so remove it if title_words[0] in TITLE_SPLITTERS: diff --git a/tests/data/extractors/title/test_title_opengraph_empty.html b/tests/data/extractors/title/test_title_opengraph_empty.html new file mode 100644 index 00000000..5d270152 --- /dev/null +++ b/tests/data/extractors/title/test_title_opengraph_empty.html @@ -0,0 +1,14 @@ + + + + + Wrong article title - website + + +
+

+ TextNode 1 - The Scala supported IDE is one of the few pain points of developers who want to start using Scala in their Java project. On existing long term project developed by a team its hard to step in and introduce a new language that is not supported by the existing IDE. On way to go about it is to hid the fact that you use Scala from the Java world by using one way dependency injection. Still, if you wish to truly absorb Scala into your existing java environment then you'll soon introduced cross language dependencies. +

+
+ + diff --git a/tests/data/extractors/title/test_title_opengraph_empty.json b/tests/data/extractors/title/test_title_opengraph_empty.json new file mode 100644 index 00000000..376fc189 --- /dev/null +++ b/tests/data/extractors/title/test_title_opengraph_empty.json @@ -0,0 +1,6 @@ +{ + "url": "http://exemple.com/test_opengraphcontent", + "expected": { + "title": "" + } +} diff --git a/tests/extractors/title.py b/tests/extractors/title.py index 36bee9a2..79d5f9c5 100644 --- a/tests/extractors/title.py +++ b/tests/extractors/title.py @@ -30,3 +30,8 @@ def test_title_opengraph(self): article = self.getArticle() fields = ['title'] self.runArticleAssertions(article=article, fields=fields) + + def test_title_opengraph_empty(self): + article = self.getArticle() + fields = ['title'] + self.runArticleAssertions(article=article, fields=fields)