From 6a9c3a4ccf83af3ad30e1ec54d45afc169d3e175 Mon Sep 17 00:00:00 2001 From: Facundo Olano Date: Thu, 27 Jun 2024 10:38:01 -0300 Subject: [PATCH] Put uncompressed mimetype first in the epub zip file (#101) * fix epub mimetype compression * try with zip type inverted --- feedi/scraping.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/feedi/scraping.py b/feedi/scraping.py index 9c7501e..0a3a43c 100644 --- a/feedi/scraping.py +++ b/feedi/scraping.py @@ -135,7 +135,11 @@ def package_epub(url, article): """ output_buffer = io.BytesIO() - with zipfile.ZipFile(output_buffer, 'w', compression=zipfile.ZIP_DEFLATED) as zip: + with zipfile.ZipFile(output_buffer, 'w') as zip: + # mimetype should be the first file in the container and it should be uncompressed + # https://www.w3.org/TR/epub-33/#sec-zip-container-mime + zip.writestr('mimetype', "application/epub+zip", compress_type=zipfile.ZIP_STORED) + soup = BeautifulSoup(article['content'], 'lxml') for img in soup.findAll('img'): img_url = img['src'] @@ -159,16 +163,15 @@ def package_epub(url, article): # else write as is dest_file.write(response.content) - zip.writestr('article.html', str(soup)) + zip.writestr('article.html', str(soup), compress_type=zipfile.ZIP_DEFLATED) # epub boilerplate based on https://github.com/thansen0/sample-epub-minimal - zip.writestr('mimetype', "application/epub+zip") zip.writestr('META-INF/container.xml', """ -""") +""", compress_type=zipfile.ZIP_DEFLATED) author = article['byline'] or article['siteName'] if not author: @@ -193,6 +196,6 @@ def package_epub(url, article): -""") +""", compress_type=zipfile.ZIP_DEFLATED) return output_buffer.getvalue()