diff --git a/feedi/scraping.py b/feedi/scraping.py index 9c7501e..0a3a43c 100644 --- a/feedi/scraping.py +++ b/feedi/scraping.py @@ -135,7 +135,11 @@ def package_epub(url, article): """ output_buffer = io.BytesIO() - with zipfile.ZipFile(output_buffer, 'w', compression=zipfile.ZIP_DEFLATED) as zip: + with zipfile.ZipFile(output_buffer, 'w') as zip: + # mimetype should be the first file in the container and it should be uncompressed + # https://www.w3.org/TR/epub-33/#sec-zip-container-mime + zip.writestr('mimetype', "application/epub+zip", compress_type=zipfile.ZIP_STORED) + soup = BeautifulSoup(article['content'], 'lxml') for img in soup.findAll('img'): img_url = img['src'] @@ -159,16 +163,15 @@ def package_epub(url, article): # else write as is dest_file.write(response.content) - zip.writestr('article.html', str(soup)) + zip.writestr('article.html', str(soup), compress_type=zipfile.ZIP_DEFLATED) # epub boilerplate based on https://github.com/thansen0/sample-epub-minimal - zip.writestr('mimetype', "application/epub+zip") zip.writestr('META-INF/container.xml', """ -""") +""", compress_type=zipfile.ZIP_DEFLATED) author = article['byline'] or article['siteName'] if not author: @@ -193,6 +196,6 @@ def package_epub(url, article): -""") +""", compress_type=zipfile.ZIP_DEFLATED) return output_buffer.getvalue()