Skip to content

Commit

Permalink
feat: Added the ability to translate filename and metadata. resolved #…
Browse files Browse the repository at this point in the history
  • Loading branch information
bookfere committed Mar 11, 2024
1 parent 7e79fcf commit c89e735
Show file tree
Hide file tree
Showing 12 changed files with 349 additions and 183 deletions.
36 changes: 19 additions & 17 deletions advanced.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,7 @@ def show_advanced(self):


class AdvancedTranslation(QDialog):
raw_text = pyqtSignal(str)
original_text = pyqtSignal(str)
translation_text = pyqtSignal((), (str,))
paragraph_sig = pyqtSignal(Paragraph)
progress_bar = pyqtSignal()

preparation_thread = QThread()
Expand Down Expand Up @@ -504,8 +502,7 @@ def terminate_translation():
def terminate_finished():
stop_button.setDisabled(False)
stop_button.setText(_('Stop'))
self.translation_text[str].emit(
self.table.current_paragraph().translation)
self.paragraph_sig.emit(self.table.current_paragraph())
self.trans_worker.finished.connect(terminate_finished)

stack = QStackedWidget()
Expand Down Expand Up @@ -565,11 +562,18 @@ def layout_control(self):
ebook_title.setCursorPosition(0)
output_format = OutputFormat()
output_format.setFixedWidth(150)
save_layout.addWidget(QLabel(_('Title')))
save_layout.addWidget(QLabel(_('Filename')))
save_layout.addWidget(ebook_title, 1)
save_layout.addWidget(QLabel(_('Format')))
save_layout.addWidget(output_format)
save_layout.addWidget(save_ebook)

if self.config.get('to_library'):
ebook_title.setDisabled(True)
ebook_title.setToolTip(_(
"The ebook's filename is automatically managed by Calibre "
'according to metadata since the output path is set to '
'Calibre Library.'))
ebook_title.textChanged.connect(self.ebook.set_title)

layout.addWidget(cache_group)
Expand Down Expand Up @@ -662,10 +666,12 @@ def layout_review(self):
translation_text.cursorPositionChanged.connect(
translation_text.ensureCursorVisible)

self.raw_text.connect(raw_text.setPlainText)
self.original_text.connect(original_text.setPlainText)
self.translation_text.connect(translation_text.clear)
self.translation_text[str].connect(translation_text.setPlainText)
def refresh_translation(paragraph):
translation_text.clear()
raw_text.setPlainText(paragraph.raw)
original_text.setPlainText(paragraph.original)
translation_text.setPlainText(paragraph.translation)
self.paragraph_sig.connect(refresh_translation)
self.trans_worker.start.connect(
lambda: translation_text.setReadOnly(False))
self.trans_worker.finished.connect(
Expand Down Expand Up @@ -715,25 +721,21 @@ def change_selected_item():
paragraph = self.table.current_paragraph()
if paragraph is None:
return
self.raw_text.emit(paragraph.raw)
self.original_text.emit(paragraph.original.strip())
self.translation_text[str].emit(paragraph.translation)
self.paragraph_sig.emit(paragraph)
self.table.itemSelectionChanged.connect(change_selected_item)
self.table.setCurrentItem(self.table.item(0, 0))
change_selected_item()

def translation_callback(paragraph):
self.table.row.emit(paragraph.row)
self.raw_text.emit(paragraph.raw)
self.original_text.emit(paragraph.original)
self.translation_text[str].emit(paragraph.translation)
self.paragraph_sig.emit(paragraph)
self.cache.update_paragraph(paragraph)
self.progress_bar.emit()
self.trans_worker.callback.connect(translation_callback)

def streaming_translation(data):
if data == '':
self.translation_text.emit()
self.paragraph_sig.emit(self.table.current_paragraph())
elif isinstance(data, Paragraph):
self.table.setCurrentItem(self.table.item(data.row, 0))
else:
Expand Down
7 changes: 5 additions & 2 deletions lib/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
from .utils import log, sep, uid, open_path
from .cache import get_cache, TranslationCache
from .element import (
get_srt_elements, get_toc_elements, get_page_elements, get_element_handler,
Extraction)
Extraction, get_element_handler, get_srt_elements, get_toc_elements,
get_page_elements, get_metadata_elements)
from .translation import get_translator, get_translation


Expand All @@ -36,6 +36,7 @@ def extract_book(input_path):
plumber = Plumber(input_path, output_path, log=log)

def convert(self, oeb, output_path, input_plugin, opts, log):
elements.extend(get_metadata_elements(oeb.metadata))
elements.extend(get_toc_elements(oeb.toc.nodes, []))
elements.extend(get_page_elements(oeb.manifest.items))
plumber.output_plugin.convert = MethodType(convert, plumber.output_plugin)
Expand Down Expand Up @@ -117,6 +118,8 @@ def convert(self, oeb, output_path, input_plugin, opts, log):
log.info(debug_info)
translation.set_progress(self.report_progress)

elements.extend(get_metadata_elements(oeb.metadata))
# The number of elements may vary with format conversion.
elements.extend(get_toc_elements(oeb.toc.nodes, []))
elements.extend(get_page_elements(oeb.manifest.items))
original_group = element_handler.prepare_original(elements)
Expand Down
53 changes: 48 additions & 5 deletions lib/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,31 @@ def add_translation(
return self.element


class MetadataElement(Element):
def get_raw(self):
return self.element.content

def get_text(self):
return self.element.content

def get_content(self, placeholder):
return self.element.content

def add_translation(
self, translation, placeholder, position, translation_lang=None,
original_color=None, translation_color=None):
if translation is not None:
if position == 'only':
self.element.content = translation
elif position in ['above', 'left']:
self.element.content = '%s %s' % (
translation, self.element.content)
else:
self.element.content = '%s %s' %(
self.element.content, translation)
return self.element


class TocElement(Element):
def get_raw(self):
return self.element.title
Expand All @@ -104,9 +129,9 @@ def add_translation(


class PageElement(Element):
def _get_descendents(self, tags):
def _get_descendents(self, element, tags):
xpath = './/*[%s]' % ' or '.join(['self::x:%s' % tag for tag in tags])
return self._element_copy().xpath(xpath, namespaces=ns)
return element.xpath(xpath, namespaces=ns)

def get_name(self):
return get_name(self.element)
Expand All @@ -125,13 +150,15 @@ def delete(self):
self.element.getparent().remove(self.element)

def get_content(self, placeholder):
for noise in self._get_descendents(('rt', 'rp', 'sup', 'sub')):
element_copy = self._element_copy()
for noise in self._get_descendents(
element_copy, ('rt', 'rp', 'sup', 'sub')):
parent = noise.getparent()
parent.text = (parent.text or '') + (noise.tail or '')
parent.remove(noise)

self.reserve_elements = self._get_descendents(
('img', 'code', 'br', 'hr', 'sub', 'sup', 'kbd'))
element_copy, ('img', 'code', 'br', 'hr', 'sub', 'sup', 'kbd'))
count = 0
for reserve in self.reserve_elements:
replacement = placeholder[0].format(format(count, '05'))
Expand All @@ -146,7 +173,7 @@ def get_content(self, placeholder):
parent.remove(reserve)
count += 1

return trim(''.join(self._element_copy().itertext()))
return trim(''.join(element_copy.itertext()))

def _polish_translation(self, translation):
translation = translation.replace('\n', '<br />')
Expand Down Expand Up @@ -513,6 +540,22 @@ def get_srt_elements(path):
return [SrtElement(section) for section in sections]


def get_metadata_elements(metadata):
elements = []
names = (
'title', 'creator', 'publisher', 'rights', 'subject', 'contributor')
pattern = re.compile(r'[a-z]+')
for key in metadata.iterkeys():
if key not in names:
continue
items = getattr(metadata, key)
for item in items:
if pattern.search(item.content) is None:
continue
elements.append(MetadataElement(item, 'content.opf'))
return elements


def get_toc_elements(nodes, elements=[]):
"""Be aware that elements should not overlap with existing data."""
for node in nodes:
Expand Down
1 change: 0 additions & 1 deletion tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ def test_created_paragraph(self):
self.assertFalse(self.paragraph.is_cache)
self.assertIsNone(self.paragraph.error)
self.assertTrue(self.paragraph.aligned)
self.assertIsNone(self.paragraph.background)

def test_get_attributes(self):
self.assertEqual({'class': 'test'}, self.paragraph.get_attributes())
Expand Down
63 changes: 60 additions & 3 deletions tests/test_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@

from lxml import etree

from calibre.ebooks.oeb.base import TOC
from calibre.ebooks.oeb.base import TOC, Metadata

from ..lib.utils import ns
from ..lib.cache import Paragraph
from ..lib.element import (
get_string, get_name, SrtElement, TocElement, PageElement, Extraction,
ElementHandler, ElementHandlerMerge, get_toc_elements)
get_string, get_name, Extraction, ElementHandler, ElementHandlerMerge,
SrtElement, TocElement, PageElement, MetadataElement, get_toc_elements,
get_metadata_elements)
from ..engines import DeeplFreeTranslate
from ..engines.base import Base

Expand Down Expand Up @@ -45,6 +46,22 @@ def test_get_toc_elements(self):
elements = get_toc_elements(toc, [])
self.assertEqual(3, len(elements))

def test_get_metadata_elements(self):
metadata = Mock(Metadata)
item_1 = Mock(Metadata.Item, content='a')
item_2 = Mock(Metadata.Item, content='b')
item_3 = Mock(Metadata.Item, content='0')
metadata.title = [item_1]
metadata.subject = [item_2, item_3]
metadata.language = []
metadata.iterkeys.return_value = ['title', 'subject', 'language']

elements = get_metadata_elements(metadata)

self.assertEqual(2, len(elements))
self.assertIs(item_1, elements[0].element)
self.assertIs(item_2, elements[1].element)


class TestSrtElement(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -84,6 +101,46 @@ def test_add_translation_only(self):
self.assertEqual('A', element[2])


class TestMetadataElement(unittest.TestCase):
def setUp(self):
self.medata_item = Mock(Metadata.Item, content='a')
self.element = MetadataElement(self.medata_item)

def test_get_raw(self):
self.assertEqual('a', self.element.get_raw())

def test_get_text(self):
self.assertEqual('a', self.element.get_text())

def test_get_content(self):
self.assertEqual('a', self.element.get_content(Base.placeholder))

def test_add_translation_none(self):
self.assertIs(
self.element.element,
self.element.add_translation(None, Base.placeholder, 'below'))

def test_add_translation_below(self):
element = self.element.add_translation('A', Base.placeholder, 'below')
self.assertEqual('a A', element.content)

def test_add_translation_right(self):
element = self.element.add_translation('A', Base.placeholder, 'right')
self.assertEqual('a A', element.content)

def test_add_translation_above(self):
element = self.element.add_translation('A', Base.placeholder, 'above')
self.assertEqual('A a', element.content)

def test_add_translation_left(self):
element = self.element.add_translation('A', Base.placeholder, 'left')
self.assertEqual('A a', element.content)

def test_add_translation_only(self):
element = self.element.add_translation('A', Base.placeholder, 'only')
self.assertEqual('A', element.content)


class TestTocElement(unittest.TestCase):
def setUp(self):
self.element = TocElement(TOC('a', 'a.html'), 'toc.ncx')
Expand Down
20 changes: 14 additions & 6 deletions translations/es.po
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: Ebook Translator Calibre Plugin\n"
"Report-Msgid-Bugs-To: [email protected]\n"
"POT-Creation-Date: 2024-03-10 12:31+0800\n"
"POT-Creation-Date: 2024-03-11 21:06+0800\n"
"PO-Revision-Date: 2023-04-17 14:17+0800\n"
"Last-Translator: Automatically generated\n"
"Language-Team: none\n"
Expand Down Expand Up @@ -109,8 +109,16 @@ msgstr ""
msgid "Output"
msgstr ""

msgid "Title"
msgstr "Título"
msgid "Filename"
msgstr ""

msgid "Format"
msgstr ""

msgid ""
"The ebook's filename is automatically managed by Calibre according to "
"metadata since the output path is set to Calibre Library."
msgstr ""

msgid "Translated"
msgstr ""
Expand Down Expand Up @@ -148,6 +156,9 @@ msgstr ""
msgid "Output Format"
msgstr "Formato de salida"

msgid "Title"
msgstr "Título"

msgid "Translate"
msgstr "Traducir"

Expand Down Expand Up @@ -206,9 +217,6 @@ msgstr ""
msgid "Merge Length"
msgstr ""

msgid "Filename"
msgstr ""

msgid "Size (MB)"
msgstr ""

Expand Down
Loading

0 comments on commit c89e735

Please sign in to comment.