Skip to content

Commit

Permalink
Update web_scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
sondregronas committed Aug 23, 2024
1 parent d848995 commit e1a50ba
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion .github/workflows/web_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@ def get_html(link):
"""Get the html from the given url, and append the new links to the links list."""
print(f"Visiting {url}/{link.strip("/")}")
r = requests.get(f'{url}/{link.strip("/")}', allow_redirects=True)
html = bs(r.text, "html.parser").prettify()

# Only prettify if mimetype is text/html
if "text/html" in r.headers.get("Content-Type"):
html = bs(r.text, "html.parser").prettify()
else:
html = r.text

new_links = get_links(html, path=link.strip("/"))
new_media_links = get_media_links(html, path=link.strip("/"))
Expand Down

0 comments on commit e1a50ba

Please sign in to comment.