Skip to content

Commit

Permalink
Put uncompressed mimetype first in the epub zip file (#101)
Browse files Browse the repository at this point in the history
* fix epub mimetype compression

* try with zip type inverted
  • Loading branch information
facundoolano authored Jun 27, 2024
1 parent 79c9f9c commit 6a9c3a4
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions feedi/scraping.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,11 @@ def package_epub(url, article):
"""

output_buffer = io.BytesIO()
with zipfile.ZipFile(output_buffer, 'w', compression=zipfile.ZIP_DEFLATED) as zip:
with zipfile.ZipFile(output_buffer, 'w') as zip:
# mimetype should be the first file in the container and it should be uncompressed
# https://www.w3.org/TR/epub-33/#sec-zip-container-mime
zip.writestr('mimetype', "application/epub+zip", compress_type=zipfile.ZIP_STORED)

soup = BeautifulSoup(article['content'], 'lxml')
for img in soup.findAll('img'):
img_url = img['src']
Expand All @@ -159,16 +163,15 @@ def package_epub(url, article):
# else write as is
dest_file.write(response.content)

zip.writestr('article.html', str(soup))
zip.writestr('article.html', str(soup), compress_type=zipfile.ZIP_DEFLATED)

# epub boilerplate based on https://github.com/thansen0/sample-epub-minimal
zip.writestr('mimetype', "application/epub+zip")
zip.writestr('META-INF/container.xml', """<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>""")
</container>""", compress_type=zipfile.ZIP_DEFLATED)

author = article['byline'] or article['siteName']
if not author:
Expand All @@ -193,6 +196,6 @@ def package_epub(url, article):
<spine toc="ncx">
<itemref idref="article" />
</spine>
</package>""")
</package>""", compress_type=zipfile.ZIP_DEFLATED)

return output_buffer.getvalue()

0 comments on commit 6a9c3a4

Please sign in to comment.