-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconverters.py
172 lines (135 loc) · 4.89 KB
/
converters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import os
from PyPDF2 import PdfReader
from ebooklib import epub
import docx
from bs4 import BeautifulSoup
def setup_epub_book(identifier, title, language="en"):
"""
Setup an EPUB book with the given identifier, title, and language.
:param identifier: The identifier for the EPUB book.
:param title: The title of the EPUB book.
:param language: The language of the EPUB book.
:return: An initialized EpubBook object.
"""
ebook = epub.EpubBook()
ebook.set_identifier(identifier)
ebook.set_title(title)
ebook.set_language(language)
return ebook
def add_chapter(ebook, title, filename, content):
"""
Add a chapter to the EPUB book.
:param ebook: The EpubBook object.
:param title: The title of the chapter.
:param filename: The filename for the chapter.
:param content: The content of the chapter.
"""
if not content.strip():
return None
chapter = epub.EpubHtml(title=title, file_name=filename)
chapter.content = f"<h1>{title}</h1>{content}"
ebook.add_item(chapter)
return chapter
def finalize_epub_book(ebook, spine, toc, output_path):
"""
Finalize and write the EPUB book to the specified output path.
:param ebook: The EpubBook object.
:param spine: The spine (order) of the chapters.
:param toc: The table of contents for the book.
:param output_path: The output path for the EPUB file.
"""
ebook.toc = toc
ebook.spine = spine
ebook.add_item(epub.EpubNcx())
ebook.add_item(epub.EpubNav())
style = "BODY { font-family: Arial, sans-serif; }"
nav_css = epub.EpubItem(
uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style
)
ebook.add_item(nav_css)
epub.write_epub(output_path, ebook, {})
def pdf_to_epub(pdf_path, epub_path):
"""
Convert a PDF file to an EPUB file.
:param pdf_path: The path to the PDF file.
:param epub_path: The path to the output EPUB file.
"""
reader = PdfReader(pdf_path)
book_title = os.path.splitext(os.path.basename(pdf_path))[0]
ebook = setup_epub_book(book_title, book_title)
spine = ["nav"]
toc = []
content = ""
for page_num, page in enumerate(reader.pages):
text = page.extract_text()
content += f"<h2>Page {page_num + 1}</h2><p>{text}</p>"
chapter_title = book_title
filename = "content.xhtml"
chapter = add_chapter(ebook, chapter_title, filename, content)
if chapter:
spine.append(chapter)
toc.append(epub.Link(filename, chapter_title, filename))
finalize_epub_book(ebook, spine, toc, epub_path)
def docx_to_epub(docx_path, epub_path):
"""
Convert a DOCX file to an EPUB file.
:param docx_path: The path to the DOCX file.
:param epub_path: The path to the output EPUB file.
"""
doc = docx.Document(docx_path)
book_title = os.path.splitext(os.path.basename(docx_path))[0]
ebook = setup_epub_book(book_title, book_title)
spine = ["nav"]
toc = []
content = ""
for para in doc.paragraphs:
if para.text.strip():
content += f"<p>{para.text}</p>"
chapter_title = book_title
filename = "content.xhtml"
chapter = add_chapter(ebook, chapter_title, filename, content)
if chapter:
spine.append(chapter)
toc.append(epub.Link(filename, chapter_title, filename))
finalize_epub_book(ebook, spine, toc, epub_path)
def html_to_epub(html_path, epub_path):
"""
Convert an HTML file to an EPUB file.
:param html_path: The path to the HTML file.
:param epub_path: The path to the output EPUB file.
"""
with open(html_path, "r", encoding="utf-8") as f:
soup = BeautifulSoup(f, "html.parser")
content = str(soup.body)
book_title = os.path.splitext(os.path.basename(html_path))[0]
ebook = setup_epub_book(book_title, book_title)
spine = ["nav"]
toc = []
chapter_title = book_title
filename = "content.xhtml"
chapter = add_chapter(ebook, chapter_title, filename, content)
if chapter:
spine.append(chapter)
toc.append(epub.Link(filename, chapter_title, filename))
finalize_epub_book(ebook, spine, toc, epub_path)
def txt_to_epub(txt_path, epub_path):
"""
Convert a TXT file to an EPUB file.
:param txt_path: The path to the TXT file.
:param epub_path: The path to the output EPUB file.
"""
with open(txt_path, "r", encoding="utf-8") as f:
content = f.read()
book_title = os.path.splitext(os.path.basename(txt_path))[0]
ebook = setup_epub_book(book_title, book_title)
spine = ["nav"]
toc = []
chapter_title = book_title
filename = "content.xhtml"
chapter = add_chapter(
ebook, chapter_title, filename, content.replace("\n", "<br/>")
)
if chapter:
spine.append(chapter)
toc.append(epub.Link(filename, chapter_title, filename))
finalize_epub_book(ebook, spine, toc, epub_path)