Skip to content

Commit

Permalink
refactor: Optimized the feature for adding translations to elements.
Browse files Browse the repository at this point in the history
  • Loading branch information
bookfere committed Nov 23, 2023
1 parent 40eabc7 commit 9d45c3d
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 83 deletions.
138 changes: 77 additions & 61 deletions lib/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,12 @@ def add_translation(self, translation, placeholder, position=None,
new_element = etree.XML('<{0} xmlns="{1}">{2}</{0}>'.format(
get_name(self.element), ns['x'], trim(translation)))
# Preserve all attributes from the original element.
for k, v in self.element.items():
if k == 'id' and position != 'only':
for name, value in self.element.items():
if name == 'id' and position != 'only':
continue
if k == 'dir':
v = 'auto'
new_element.set(k, v)
if name == 'dir':
value = 'auto'
new_element.set(name, value)
if color is not None:
new_element.set('style', 'color:%s' % color)
if lang is not None:
Expand Down Expand Up @@ -291,7 +291,9 @@ def __init__(self, placeholder, separator, merge_length=0):
self.lang = None

self.elements = {}
self.original = []
self.originals = []

self.base_originals = []

def get_merge_length(self):
return self.merge_length
Expand All @@ -305,37 +307,42 @@ def set_translation_color(self, color):
def set_translation_lang(self, lang):
self.lang = lang

def get_elements(self):
return self.elements

def remove_unused_elements(self):
if self.position == 'only':
for element in self.elements.values():
element.delete()

def prepare_original(self, elements):
for eid, element in enumerate(elements):
self.elements[eid] = element
count = 0
for oid, element in enumerate(elements):
raw = element.get_raw()
content = element.get_content(self.placeholder)
md5 = uid('%s%s' % (eid, content))
md5 = uid('%s%s' % (oid, content))
attrs = element.get_attributes()
self.original.append(
(eid, md5, raw, content, element.ignored, attrs,
element.page_id))
return self.original
if not element.ignored:
self.elements[count] = element
self.base_originals.append(content)
count += 1
self.originals.append(
(oid, md5, raw, content, element.ignored, attrs,
element.page_id))
return self.originals

def add_translations(self, paragraphs):
count = 0
for paragraph in paragraphs:
element = self.elements.get(paragraph.id)
if paragraph.original not in self.base_originals:
continue
element = self.elements.get(count)
if not element:
continue
translation = paragraph.translation
if translation:
element.add_translation(
translation, self.placeholder, self.position,
self.lang, self.color)
self.elements.pop(paragraph.id)
self.elements.pop(count)
count += 1
for eid, element in self.elements.copy().items():
if element.ignored:
self.elements.pop(eid)
Expand All @@ -345,59 +352,69 @@ def add_translations(self, paragraphs):
class ElementHandlerMerge(ElementHandler):
def prepare_original(self, elements):
raw = ''
content = ''
txt = ''
oid = 0
count = 0
for eid, element in enumerate(elements):
for element in elements:
if element.ignored:
continue
self.elements[eid] = element
separator = self.separator \
or ' %s ' % self.placeholder[0].format(eid)
self.elements[count] = element
code = element.get_raw()
text = element.get_content(self.placeholder) + separator
if len(content + text) < self.merge_length:
raw += code
content += text
content = element.get_content(self.placeholder)
self.base_originals.append(content)
count += 1
content += self.separator
if len(txt + content) < self.merge_length:
raw += code + self.separator
txt += content
continue
elif content:
md5 = uid('%s%s' % (count, content))
self.original.append((count, md5, raw, content, False))
count += 1
elif txt:
md5 = uid('%s%s' % (oid, txt))
self.originals.append((oid, md5, raw, txt, False))
oid += 1
raw = code
content = text
md5 = uid('%s%s' % (count, content))
content and self.original.append((count, md5, raw, content, False))
return self.original
txt = content
md5 = uid('%s%s' % (oid, txt))
txt and self.originals.append((oid, md5, raw, txt, False))
return self.originals

def align_paragraph(self, paragraph):
# Compatible with using the placeholder as the separator.
if paragraph.original[-2:] != self.separator:
pattern = re.compile(
r'\s*%s\s*' % self.placeholder[1].format(r'(0|[^0]\d*)'))
paragraph.original = pattern.sub(
self.separator, paragraph.original)
paragraph.translation = pattern.sub(
self.separator, paragraph.translation)
# Ensure the translation count matches the actual elements count.
originals = paragraph.original.strip().split(self.separator)
pattern = re.compile('%s+' % self.separator)
translations = pattern.sub(self.separator, paragraph.translation)
translations = translations.strip().split(self.separator)
offset = len(originals) - len(translations)
if offset > 0:
translations += ['-'] * offset
elif offset < 0:
translations = translations[:offset]
for original in originals:
if original and original not in self.base_originals:
translations.pop(originals.index(original))
return translations

def add_translations(self, paragraphs):
content = ''
translations = []
for paragraph in paragraphs:
tail = paragraph.original[-2:]
tail = tail if tail == self.separator else ''
if paragraph.translation:
content += paragraph.translation + tail
# Check if the translated content contains at least one separator;
# if none is found, use the placeholder to separate paragraphs.
if self.separator and self.separator in content:
pattern = '%s+' % self.separator
content = re.sub(pattern, self.separator, content)
else:
self.separator = None

translations.extend(self.align_paragraph(paragraph))
count = 0
for eid, element in self.elements.copy().items():
separator = self.separator or self.placeholder[1].format(eid)
matches = re.search(separator, content)
if not matches:
if element.ignored:
continue
pattern = matches.group(0)
end = content.find(pattern)
part = content[:end]
content = content.replace(part + pattern, '', 1)
if not element.ignored:
element.add_translation(
part.strip(), self.placeholder, self.position, self.lang,
self.color)
self.elements.pop(eid)
element.add_translation(
translations[count], self.placeholder, self.position,
self.lang, self.color)
count += 1
self.elements.pop(eid)
self.remove_unused_elements()


Expand Down Expand Up @@ -449,5 +466,4 @@ def get_element_handler(placeholder, separator):
handler.set_translation_position(
config.get('translation_position'))
handler.set_translation_color(config.get('translation_color'))

return handler
98 changes: 76 additions & 22 deletions tests/test_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,9 +474,11 @@ def test_prepare_original(self, mock_uid):
def test_add_translations(self):
self.handler.prepare_original(self.elements)
translations = [
Paragraph(0, 'm1', '<p id="a">a</p>', 'a', False, '{"id": "a"}',
Paragraph(0, 'm1', '<p id="x">x</p>', 'x', False, '{"id": "x"}',
'p1', None, 'ENGINE', 'LANG'),
Paragraph(1, 'm1', '<p id="a">a</p>', 'a', False, '{"id": "a"}',
'p1', 'A', 'ENGINE', 'LANG'),
Paragraph(1, 'm2', '<p id="b">b</p>', 'b', False, '{"id": "b"}',
Paragraph(2, 'm2', '<p id="b">b</p>', 'b', False, '{"id": "b"}',
'p1', 'B', 'ENGINE', 'LANG'),
Paragraph(3, 'm3', '<p id="c">c</p>', 'c', False,
'{"id": "c", "class": "c"}', 'p1', 'C', 'ENGINE',
Expand All @@ -485,6 +487,7 @@ def test_add_translations(self):
self.handler.add_translations(translations)

elements = self.xhtml.findall('./x:body/*', namespaces=ns)

self.assertEqual(8, len(elements))
self.assertEqual('a', elements[0].text)
self.assertEqual('A', elements[1].text)
Expand All @@ -493,6 +496,7 @@ def test_add_translations(self):

self.assertEqual('c', elements[5].text)
self.assertEqual('C', elements[6].text)

self.assertIsNone(elements[6].get('id'))
self.assertEqual('c', elements[6].get('class'))

Expand Down Expand Up @@ -540,24 +544,64 @@ def setUp(self):
in self.xhtml.findall('./x:body/*', namespaces=ns)]
self.elements[-1].set_ignored(True)
self.elements[-3].set_ignored(True)
self.handler = ElementHandlerMerge(Base.placeholder, None, 1000)
self.handler = ElementHandlerMerge(
Base.placeholder, Base.separator, 1000)

@patch('calibre_plugins.ebook_translator.lib.element.uid')
def test_prepare_original_merge_placeholder(self, mock_uid):
mock_uid.return_value = 'm1'
self.assertEqual([(
0, 'm1', '<p id="a">a</p><p id="b">b</p><p id="c" class="c">c</p>',
'a {{id_0}} b {{id_1}} c {{id_3}} ', False)],
self.handler.prepare_original(self.elements))
def test_align_paragraph(self):
self.handler.prepare_original(self.elements)

paragraph = Paragraph(
0, 'm1', '<p id="a">a</p><p id="b">b</p><p id="c">c</p>',
'a {{id_0}} b {{id_1}} c {{id_3}}', False, None, None,
'A {{id_0}} B {{id_1}} C {{id_3}}', 'ENGINE', 'LANG')
self.assertEqual(
['A', 'B', 'C'], self.handler.align_paragraph(paragraph))

paragraph = Paragraph(
0, 'm1', '<p id="a">a</p><p id="b">b</p><p id="c">c</p>',
'a {{id_0}} b {{id_1}} c {{id_3}}', False, None, None,
'A {{id_0}} B {{id_1}} C {{id_4}} D {{id_5}}', 'ENGINE', 'LANG')
self.handler.align_paragraph(paragraph)
self.assertEqual(
['A', 'B', 'C'], self.handler.align_paragraph(paragraph))

paragraph = Paragraph(
0, 'm1', '<p id="a">a</p><p id="b">b</p><p id="c">c</p>',
'a {{id_0}} b {{id_1}} c {{id_3}}', False, None, None,
'A {{id_0}} B {{id_1}}', 'ENGINE', 'LANG')
self.assertEqual(
['A', 'B', '-'], self.handler.align_paragraph(paragraph))

paragraph = Paragraph(
0, 'm1', '<p id="a">a</p><p id="b">b</p><p id="c">c</p>',
'a\n\nb\n\nc\n\n', False, None, None, 'A\n\nB\n\nC\n\n',
'ENGINE', 'LANG')
self.assertEqual(
['A', 'B', 'C'], self.handler.align_paragraph(paragraph))

paragraph = Paragraph(
0, 'm1', '<p id="a">a</p><p id="b">b</p><p id="c">c</p>',
'a\n\nb\n\nc\n\n', False, None, None, 'A\n\nB\n\nC\n\nD\n\nE\n\n',
'ENGINE', 'LANG')
self.assertEqual(
['A', 'B', 'C'], self.handler.align_paragraph(paragraph))

paragraph = Paragraph(
0, 'm1', '<p id="a">a</p><p id="b">b</p><p id="c">c</p>',
'a\n\nb\n\nc\n\n', False, None, None, 'A\n\nB\n\n',
'ENGINE', 'LANG')
self.assertEqual(
['A', 'B', '-'], self.handler.align_paragraph(paragraph))

@patch('calibre_plugins.ebook_translator.lib.element.uid')
def test_prepare_original_merge_separator(self, mock_uid):
mock_uid.return_value = 'm1'
self.handler.separator = Base.separator
self.assertEqual([(
0, 'm1', '<p id="a">a</p><p id="b">b</p><p id="c" class="c">c</p>',
'a\n\nb\n\nc\n\n', False)],
0, 'm1', '<p id="a">a</p>\n\n<p id="b">b</p>\n\n<p id="c" '
'class="c">c</p>\n\n', 'a\n\nb\n\nc\n\n', False)],
self.handler.prepare_original(self.elements))
self.assertEqual(['a', 'b', 'c'], self.handler.base_originals)

@patch('calibre_plugins.ebook_translator.lib.element.uid')
def test_prepare_original_merge_separator_multiple(self, mock_uid):
Expand All @@ -569,6 +613,7 @@ def test_prepare_original_merge_separator_multiple(self, mock_uid):
(1, 'm2', '<p id="b">b</p>', 'b\n\n', False),
(2, 'm3', '<p id="c" class="c">c</p>', 'c\n\n', False)]
self.assertEqual(items, self.handler.prepare_original(self.elements))
self.assertEqual(['a', 'b', 'c'], self.handler.base_originals)

def test_add_translations_merge_placeholder(self):
self.handler.prepare_original(self.elements)
Expand Down Expand Up @@ -617,12 +662,13 @@ def test_add_translations_merge_separator(self):

elements = self.xhtml.findall('./x:body/*', namespaces=ns)

self.assertEqual(7, len(elements))
self.assertEqual(8, len(elements))
self.assertEqual('a', elements[0].text)
self.assertEqual('A B', elements[1].text)
self.assertEqual('b', elements[2].text)
self.assertEqual('C', elements[3].text)
self.assertEqual('c', elements[5].text)
self.assertEqual('-', elements[6].text)

def test_add_translations_merge_separator_multiple(self):
self.handler.merge_length = 2
Expand Down Expand Up @@ -656,13 +702,15 @@ def test_add_translations_merge_placeholder_missing_id(self):
'A B {{id_1}} C {{id_3}}', 'ENGINE', 'LANG')])

elements = self.xhtml.findall('./x:body/*', namespaces=ns)
self.assertEqual(7, len(elements))

self.assertEqual(8, len(elements))
self.assertEqual('a', elements[0].text)
self.assertEqual('b', elements[1].text)
self.assertEqual('A B', elements[2].text)
self.assertEqual('A B', elements[1].text)
self.assertEqual('b', elements[2].text)
self.assertEqual('C', elements[3].text)

self.assertEqual('c', elements[4].text)
self.assertEqual('C', elements[5].text)
self.assertEqual('c', elements[5].text)
self.assertEqual('-', elements[6].text)

def test_add_translations_merge_placeholder_missing_newline(self):
self.handler.separator = Base.separator
Expand All @@ -673,13 +721,15 @@ def test_add_translations_merge_placeholder_missing_newline(self):
'A B\n\nC\n\n', 'ENGINE', 'LANG')])

elements = self.xhtml.findall('./x:body/*', namespaces=ns)
self.assertEqual(7, len(elements))

self.assertEqual(8, len(elements))
self.assertEqual('a', elements[0].text)
self.assertEqual('A B', elements[1].text)
self.assertEqual('b', elements[2].text)
self.assertEqual('C', elements[3].text)

self.assertEqual('c', elements[5].text)
self.assertEqual('-', elements[6].text)

def test_add_translations_merge_palceholder_only(self):
self.handler.position = 'only'
Expand Down Expand Up @@ -725,10 +775,11 @@ def test_add_translations_merge_placeholder_only_missing_id(self):
'A B {{id_1}} C {{id_3}}', 'ENGINE', 'LANG')])

elements = self.xhtml.findall('./x:body/*', namespaces=ns)
self.assertEqual(4, len(elements))
self.assertEqual(5, len(elements))
self.assertEqual('A B', elements[0].text)
self.assertEqual('C', elements[1].text)

self.assertEqual('C', elements[2].text)
self.assertEqual('-', elements[3].text)

def test_add_translations_merge_separator_only_missing_id(self):
self.handler.position = 'only'
Expand All @@ -741,6 +792,9 @@ def test_add_translations_merge_separator_only_missing_id(self):
'A B\n\nC\n\n', 'ENGINE', 'LANG')])

elements = self.xhtml.findall('./x:body/*', namespaces=ns)
self.assertEqual(4, len(elements))

self.assertEqual(5, len(elements))
self.assertEqual('A B', elements[0].text)
self.assertEqual('C', elements[1].text)

self.assertEqual('-', elements[3].text)

0 comments on commit 9d45c3d

Please sign in to comment.