From c90d08f416a14d822170d01b0c5fe30cb63e2959 Mon Sep 17 00:00:00 2001 From: facundoolano Date: Thu, 4 Jan 2024 13:39:23 -0300 Subject: [PATCH] remove wordpress footer from entry preview --- feedi/parsers/rss.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/feedi/parsers/rss.py b/feedi/parsers/rss.py index 0dd0d27..6e390d1 100644 --- a/feedi/parsers/rss.py +++ b/feedi/parsers/rss.py @@ -198,14 +198,20 @@ def parse_avatar_url(self, entry): return url def parse_content_short(self, entry): + content_url = self.parse_content_url(entry) summary = entry.get('summary') if summary: + # wordpress adds an annoying footer by default ('the post x appeared first on') + # removing it by skipping the last line when it includes a link to the article + footer = summary.split('\n')[-1] + if content_url.split('?')[0] in footer: + summary = summary.strip(footer).strip() + summary = html.unescape(summary) else: - url = self.parse_content_url(entry) - if not url: + if not content_url: return - summary = self.fetch_meta(url, 'og:description', 'description') + summary = self.fetch_meta(content_url, 'og:description', 'description') if not summary: return