Skip to content
This repository has been archived by the owner on Oct 3, 2022. It is now read-only.

Commit

Permalink
Added FeedHandler to encapsulate feed tasks and url checking
Browse files Browse the repository at this point in the history
  • Loading branch information
Christian committed Nov 10, 2017
1 parent c4725bd commit b168213
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 0 deletions.
31 changes: 31 additions & 0 deletions tests/test_feedhandler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import unittest
from util.feedhandler import FeedHandler


class TestFeedHandler(unittest.TestCase):

def test_parse_feed(self):
url = "https://lorem-rss.herokuapp.com/feed"
feed = FeedHandler.parse_feed(url)
self.assertIsNotNone(url)

def test_is_parsable(self):
url = "https://lorem-rss.herokuapp.com/feed"
self.assertTrue(FeedHandler.is_parsable(url))
url = "https://google.de"
self.assertFalse(FeedHandler.is_parsable(url))
url = "www.google.de"
self.assertFalse(FeedHandler.is_parsable(url))

def test_format_url_string(self):
url = "https://lorem-rss.herokuapp.com/feed"
url = FeedHandler.format_url_string(url)
self.assertEqual(url, "https://lorem-rss.herokuapp.com/feed")

url = "www.lorem-rss.herokuapp.com/feed"
url = FeedHandler.format_url_string(url)
self.assertEqual(url, "http://www.lorem-rss.herokuapp.com/feed")

url = "lorem-rss.herokuapp.com/feed"
url = FeedHandler.format_url_string(url)
self.assertEqual(url, "http://lorem-rss.herokuapp.com/feed")
50 changes: 50 additions & 0 deletions util/feedhandler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import feedparser
import re


class FeedHandler(object):

@staticmethod
def parse_feed(url):
"""
Parses the given url, returns a list containing all available entries
"""

feed = feedparser.parse(url)
return feed.entries

@staticmethod
def is_parsable(url):
"""
Checks wether the given url provides a news feed. Return True if news are available, else False
"""

url_pattern = re.compile("((http(s?))):\/\/.*")
if not url_pattern.match(url):
return False

feed = feedparser.parse(url)

# Check if result is empty
if not feed.entries:
return False
# Check if entries provide updated attribute
for post in feed.entries:
if not hasattr(post, "updated"):
return False
return True

@staticmethod
def format_url_string(string):
"""
Formats a given url as string so it matches http(s)://adress.domain.
This should be called before parsing the url, to make sure it is parsable
"""

string = string.lower()

url_pattern = re.compile("((http(s?))):\/\/.*")
if not url_pattern.match(string):
string = "http://" + string

return string

0 comments on commit b168213

Please sign in to comment.