Skip to content

Commit

Permalink
3.1.2
Browse files Browse the repository at this point in the history
Add a feature to convert mobi to epub and push it to Amazon.
  • Loading branch information
cdhigh committed Jun 18, 2024
1 parent 7edaea8 commit 4ddf36b
Show file tree
Hide file tree
Showing 23 changed files with 484 additions and 162 deletions.
24 changes: 14 additions & 10 deletions application/lib/build_ebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,28 @@
from calibre.web.feeds.recipes import compile_recipe
from recipe_helper import GenerateRecipeSource
from urlopener import UrlOpener
from application.utils import loc_exc_pos

#从输入格式生成对应的输出格式
#recipes: 编译后的recipe,为一个列表
#input_: 如果是recipe,为编译后的recipe(或列表),或者是一个输入文件名,或一个BytesIO
#input_fmt: 输入格式, recipe, mobi, ...
#user: KeUser对象
#output_fmt: 如果指定,则生成特定格式的书籍,否则使用user.book_cfg('type')
#options: 额外的一些参数,为一个字典
# 如: options={'debug_pipeline': path, 'verbose': 1}
#返回电子书二进制内容
def recipes_to_ebook(recipes: list, user, options=None, output_fmt=''):
if not isinstance(recipes, list):
recipes = [recipes]
def convert_book(input_, input_fmt, user, options=None, output_fmt=''):
output = io.BytesIO()
output_fmt=output_fmt if output_fmt else user.book_cfg('type')
output_fmt = output_fmt if output_fmt else user.book_cfg('type')
options = ke_opts(user, options)
plumber = Plumber(recipes, output, input_fmt='recipe', output_fmt=output_fmt, options=options)
plumber.run()
return output.getvalue()

plumber = Plumber(input_, output, input_fmt=input_fmt, output_fmt=output_fmt, options=options)
try:
plumber.run()
return output.getvalue()
except:
default_log.warning(loc_exc_pos('convert_book failed'))
return b''

#仅通过一个url列表构建一本电子书
#urls: [(title, url),...] or [url,url,...]
#title: 书籍标题
Expand Down Expand Up @@ -84,7 +88,7 @@ def clearPrevDownloads(): #退出时清理临时文件
userCss = user.get_extra_css()
ro.extra_css = f'{ro.extra_css}\n\n{userCss}' if ro.extra_css else userCss #type:ignore

book = recipes_to_ebook([ro], user, options, output_fmt)
book = convert_book(ro, 'recipe', user, options, output_fmt)
clearPrevDownloads()
return book

Expand Down
3 changes: 2 additions & 1 deletion application/lib/calibre/customize/builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ def set_metadata(self, stream, mi, type):
from calibre.ebooks.conversion.plugins.html_input import HTMLInput
from calibre.ebooks.conversion.plugins.epub_input import EPUBInput
from calibre.ebooks.conversion.plugins.epub_output import EPUBOutput
from calibre.ebooks.conversion.plugins.mobi_input import MOBIInput
from calibre.ebooks.conversion.plugins.mobi_output import (MOBIOutput, AZW3Output)
from calibre.ebooks.conversion.plugins.oeb_output import OEBOutput

plugins = [RecipeInput, HTMLInput, EPUBOutput, MOBIOutput, AZW3Output, OEBOutput, EPUBMetadataWriter, MOBIMetadataWriter]
plugins = [RecipeInput, HTMLInput, MOBIInput, EPUBOutput, MOBIOutput, AZW3Output, OEBOutput, EPUBMetadataWriter, MOBIMetadataWriter]

from calibre.customize.profiles import input_profiles, output_profiles
plugins += input_profiles
Expand Down
2 changes: 1 addition & 1 deletion application/lib/calibre/customize/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def get_images(self):
raise NotImplementedError()

#fs: FsDictStub 对象,由它根据情况使用内存缓存或使用磁盘缓存
def convert(self, stream, options, file_ext, log, output_dir, fs):
def convert(self, stream, opts, file_ext, log, output_dir, fs):
'''
This method must be implemented in sub-classes. It must return
the path to the created OPF file or an :class:`OEBBook` instance.
Expand Down
122 changes: 122 additions & 0 deletions application/lib/calibre/ebooks/compression/mobi_uncompress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
import struct

class unpackException(Exception):
pass

class UncompressedReader:
def unpack(self, data):
return data

class PalmdocReader:
def unpack(self, i):
o, p = b"", 0
while p < len(i):
# for python 3 must use slice since i[p] returns int while slice returns character
c = ord(i[p : p + 1])
p += 1
if c >= 1 and c <= 8:
o += i[p : p + c]
p += c
elif c < 128:
o += bytes([c])
elif c >= 192:
o += b" " + bytes([c ^ 128])
else:
if p < len(i):
c = (c << 8) | ord(i[p : p + 1])
p += 1
m = (c >> 3) & 0x07FF
n = (c & 7) + 3
if m > n:
o += o[-m : n - m]
else:
for _ in range(n):
# because of completely ass-backwards decision by python mainters for python 3
# we must use slice for bytes as i[p] returns int while slice returns character
if m == 1:
o += o[-m:]
else:
o += o[-m : -m + 1]
return o


class HuffcdicReader:
q = struct.Struct(b">Q").unpack_from

def loadHuff(self, huff):
if huff[0:8] != b"HUFF\x00\x00\x00\x18":
raise unpackException("invalid huff header")
off1, off2 = struct.unpack_from(b">LL", huff, 8)

def dict1_unpack(v):
codelen, term, maxcode = v & 0x1F, v & 0x80, v >> 8
assert codelen != 0
if codelen <= 8:
assert term
maxcode = ((maxcode + 1) << (32 - codelen)) - 1
return (codelen, term, maxcode)

self.dict1 = list(map(dict1_unpack, struct.unpack_from(b">256L", huff, off1)))

dict2 = struct.unpack_from(b">64L", huff, off2)
self.mincode, self.maxcode = (), ()
for codelen, mincode in enumerate((0,) + dict2[0::2]):
self.mincode += (mincode << (32 - codelen),)
for codelen, maxcode in enumerate((0,) + dict2[1::2]):
self.maxcode += (((maxcode + 1) << (32 - codelen)) - 1,)

self.dictionary = []

def loadCdic(self, cdic):
if cdic[0:8] != b"CDIC\x00\x00\x00\x10":
raise unpackException("invalid cdic header")
phrases, bits = struct.unpack_from(b">LL", cdic, 8)
n = min(1 << bits, phrases - len(self.dictionary))
h = struct.Struct(b">H").unpack_from

def getslice(off):
(blen,) = h(cdic, 16 + off)
slice = cdic[18 + off : 18 + off + (blen & 0x7FFF)]
return (slice, blen & 0x8000)

self.dictionary += list(map(getslice, struct.unpack_from(bytes(">%dH" % n, "latin-1"), cdic, 16)))

def unpack(self, data):
q = HuffcdicReader.q

bitsleft = len(data) * 8
data += b"\x00\x00\x00\x00\x00\x00\x00\x00"
pos = 0
(x,) = q(data, pos)
n = 32

s = b""
while True:
if n <= 0:
pos += 4
(x,) = q(data, pos)
n += 32
code = (x >> n) & ((1 << 32) - 1)

codelen, term, maxcode = self.dict1[code >> 24]
if not term:
while code < self.mincode[codelen]:
codelen += 1
maxcode = self.maxcode[codelen]

n -= codelen
bitsleft -= codelen
if bitsleft < 0:
break

r = (maxcode - code) >> (32 - codelen)
slice, flag = self.dictionary[r]
if not flag:
self.dictionary[r] = None
slice = self.unpack(slice)
self.dictionary[r] = (slice, 1)
s += slice
return s
7 changes: 4 additions & 3 deletions application/lib/calibre/ebooks/compression/palmdoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
from struct import pack

#from calibre_extensions import cPalmdoc
from .mobi_uncompress import PalmdocReader


#def decompress_doc(data):
# return cPalmdoc.decompress(data)
def decompress_doc(data):
return PalmdocReader().unpack(data)
#return cPalmdoc.decompress(data)


def compress_doc(data):
Expand Down
66 changes: 66 additions & 0 deletions application/lib/calibre/ebooks/conversion/plugins/mobi_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <[email protected]>'
__docformat__ = 'restructuredtext en'

import os

from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks import DRMError

class MOBIInput(InputFormatPlugin):

name = 'MOBI Input'
author = 'Kovid Goyal'
description = _('Convert MOBI files (.mobi, .prc, .azw) to HTML')
file_types = {'mobi', 'prc', 'azw', 'azw3', 'pobi'}
commit_name = 'mobi_input'

#执行转换完成后返回生成的 opf 文件路径,只是路径,不包含文件名
#recipes: 可以为文件名, StringIO, 或一个列表
#output_dir: 输出目录
#fs: plumber生成的FsDictStub实例
#返回 opf文件的全路径名或传入的fs实例
def convert(self, stream, opts, file_ext, log, output_dir, fs):
self.user = opts.user
self.is_kf8 = False
self.mobi_is_joint = False

from calibre.ebooks.mobi.reader.mobi6 import MobiReader
from lxml import html
parse_cache = {}
try:
mr = MobiReader(stream, log, opts.input_encoding, opts.debug_pipeline, fs=fs)
if mr.kf8_type is None:
mr.extract_content(output_dir, parse_cache)
except DRMError:
raise
except:
mr = MobiReader(stream, log, opts.input_encoding,
opts.debug_pipeline, try_extra_data_fix=True, fs=fs)
if mr.kf8_type is None:
mr.extract_content(output_dir, parse_cache)

if mr.kf8_type is not None:
log('Found KF8 MOBI of type %r'%mr.kf8_type)
if mr.kf8_type == 'joint':
self.mobi_is_joint = True
from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
mr = Mobi8Reader(mr, log, fs=fs)
opf = mr(output_dir)
self.encrypted_fonts = mr.encrypted_fonts
self.is_kf8 = True
return opf

raw = parse_cache.pop('calibre_raw_mobi_markup', False)
if raw:
if isinstance(raw, str):
raw = raw.encode('utf-8')
fs.write(os.path.join(output_dir, 'debug-raw.html'), raw, 'wb')
from calibre.ebooks.oeb.base import close_self_closing_tags
for f, root in parse_cache.items():
raw = html.tostring(root, encoding='utf-8', method='xml',
include_meta_content_type=False)
raw = close_self_closing_tags(raw)
fs.write(os.path.join(output_dir, f), raw, 'wb')
#accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]'
return fs if fs else mr.created_opf_path
12 changes: 7 additions & 5 deletions application/lib/calibre/ebooks/conversion/plumber.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
__copyright__ = '2009, Kovid Goyal <[email protected]>'
__docformat__ = 'restructuredtext en'

from PIL.Image import isImageType
import os, re, sys, shutil, pprint, json, io, css_parser, logging, traceback
from itertools import chain
from functools import partial
Expand All @@ -24,7 +25,7 @@
from polyglot.builtins import string_or_bytes

from filesystem_dict import FsDictStub
from application.utils import get_directory_size
from application.utils import get_directory_size, loc_exc_pos
from application.base_handler import save_delivery_log

DEBUG_README=b'''
Expand Down Expand Up @@ -397,7 +398,7 @@ def run(self):
self.oeb = self.input_plugin(self.input_, self.opts, self.input_fmt, self.log, tdir, fs)
except Exception as e:
if 'All feeds are empty, aborting.' in str(e):
self.log.warning('Failed to execute input plugin: {}'.format(str(e)))
self.log.warning('Plumber: All feeds are empty, aborting.')
else:
self.log.warning('Failed to execute input plugin: {}'.format(traceback.format_exc()))
fs.clear()
Expand All @@ -416,11 +417,12 @@ def run(self):
# return
self.opts_to_mi(self.opts, self.user_metadata)
if not hasattr(self.oeb, 'manifest'): #从一堆文件里面创建OEBBook实例
fs.find_opf_path()
try:
self.oeb = create_oebbook(self.log, self.oeb, self.opts, encoding=self.input_plugin.output_encoding,
self.oeb = create_oebbook(self.log, fs, self.opts, encoding=self.input_plugin.output_encoding,
removed_items=getattr(self.input_plugin, 'removed_items_to_ignore', ()))
except Exception as e:
self.log.warning('Failed to create oebbook for recipes: {}'.format(str(e)))
except:
self.log.warning(loc_exc_pos('Failed to create oebbook'))
fs.clear()
return

Expand Down
Loading

0 comments on commit 4ddf36b

Please sign in to comment.