diff --git a/ctyparser/bigcty.py b/ctyparser/bigcty.py index 88aa972..5de22eb 100644 --- a/ctyparser/bigcty.py +++ b/ctyparser/bigcty.py @@ -21,6 +21,7 @@ import feedparser from typing import Union +from lxml import html default_feed = "http://www.country-files.com/category/big-cty/feed/" @@ -172,13 +173,17 @@ def update(self) -> bool: with tempfile.TemporaryDirectory() as temp: path = pathlib.PurePath(temp) - dl_url = f'http://www.country-files.com/bigcty/download/{update_date[:4]}/bigcty-{update_date}.zip' # TODO: Issue #10 + page = session.get(update_url) + if page.status_code != 200: + raise Exception(f"Unable to find and download bigcty-{update_date}.zip") + tree = html.fromstring(page.content) + link_urls = tree.xpath("//a[contains(@href,'zip')]/@href") + if len(link_urls) == 0: + raise Exception(f"Unable to find link to bigcty-{update_date}.zip") + dl_url = link_urls[0] rq = session.get(dl_url) - if rq.status_code == 404: - dl_url = f'http://www.country-files.com/bigcty/download/bigcty-{update_date}.zip' - rq = session.get(dl_url) - if rq.status_code != 200: - raise Exception(f"Unable to find and download bigcty-{update_date}.zip") + if rq.status_code != 200: + raise Exception(f"Unable to find and download bigcty-{update_date}.zip") with open(path / 'cty.zip', 'wb+') as file: file.write(rq.content) zipfile.ZipFile(file).extract('cty.dat', path=str(path)) # Force cast as str because mypy diff --git a/devrequirements.txt b/devrequirements.txt index 4df7d58..91c6baf 100644 --- a/devrequirements.txt +++ b/devrequirements.txt @@ -7,3 +7,4 @@ sphinx # Dependencies feedparser requests +lxml