From 3734339fee49d463211f0ca67dfb316d3f30181d Mon Sep 17 00:00:00 2001 From: kaldan007 Date: Tue, 16 Mar 2021 15:15:08 +0530 Subject: [PATCH 1/4] fix: toc level variable changed --- openpecha/serializers/epub.py | 22 ++++++++++++---------- tests/integration/test_tsadra.py | 22 +++++----------------- 2 files changed, 17 insertions(+), 27 deletions(-) diff --git a/openpecha/serializers/epub.py b/openpecha/serializers/epub.py index ea925d34..f23b4410 100644 --- a/openpecha/serializers/epub.py +++ b/openpecha/serializers/epub.py @@ -36,6 +36,13 @@ class Tsadra_template: footnote_EP = "" footnote_reference_SP = ' Date: Tue, 16 Mar 2021 15:27:24 +0530 Subject: [PATCH 2/4] fix: toc level variable changed --- openpecha/serializers/epub.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/openpecha/serializers/epub.py b/openpecha/serializers/epub.py index f23b4410..fd992a13 100644 --- a/openpecha/serializers/epub.py +++ b/openpecha/serializers/epub.py @@ -318,9 +318,15 @@ def serialize(self, toc_levels={}, output_path="./output/epub_output"): Path("template.css").write_bytes(template.content) # Running ebook-convert command to convert html file to .epub (From calibre) # XPath expression to detect chapter titles. - level1_toc_Xpath = Tsadra_template.toc_xpaths.get(toc_levels["1"], "") - level2_toc_Xpath = Tsadra_template.toc_xpaths.get(toc_levels["2"], "") - level3_toc_Xpath = Tsadra_template.toc_xpaths.get(toc_levels["3"], "") + level1_toc_Xpath = Tsadra_template.toc_xpaths.get( + toc_levels.get("1", ""), "" + ) + level2_toc_Xpath = Tsadra_template.toc_xpaths.get( + toc_levels.get("2", ""), "" + ) + level3_toc_Xpath = Tsadra_template.toc_xpaths.get( + toc_levels.get("3", ""), "" + ) cover_path = self.opf_path / f"assets/image/{cover_image}" out_epub_fn = output_path / f"{self.meta['id']}.epub" From 044e119ecdc8638b243128e15788d51307636f19 Mon Sep 17 00:00:00 2001 From: kaldan007 Date: Thu, 18 Mar 2021 11:33:16 +0530 Subject: [PATCH 3/4] fix(epub-serializer): removed credit page layer n added credit page img tag after first author --- openpecha/formatters/layers.py | 12 ------------ openpecha/formatters/tsadra.py | 21 +++------------------ openpecha/serializers/epub.py | 13 +++++++++---- tests/integration/test_tsadra.py | 2 +- 4 files changed, 13 insertions(+), 35 deletions(-) diff --git a/openpecha/formatters/layers.py b/openpecha/formatters/layers.py index 5bbf143b..01b50605 100644 --- a/openpecha/formatters/layers.py +++ b/openpecha/formatters/layers.py @@ -26,7 +26,6 @@ "Author", "Archaic", "Span", - "CreditPage", "Footnote", ] @@ -38,7 +37,6 @@ class AnnType: poti_title = "PotiTitle" author = "Author" chapter = "Chapter" - credit_page = "Credit_page" topic = "Text" sub_topic = "SubText" @@ -70,9 +68,6 @@ class _attr_names: START = "start" END = "end" - # Credit page - CREDIT_PAGE_IMG_NAME = "credit_page_img_name" - # Page PAGE_INDEX = "page_index" # Page number based on Volume specified, type: int PAGE_INFO = "page_info" # Page information. type: str @@ -178,13 +173,6 @@ def BookNumber(span): return {_attr_names.SPAN: span} -def CreditPage(credit_page_img_name, span): - return { - _attr_names.CREDIT_PAGE_IMG_NAME: credit_page_img_name, - _attr_names.SPAN: span, - } - - def PotiTitle(span): return {_attr_names.SPAN: span} diff --git a/openpecha/formatters/tsadra.py b/openpecha/formatters/tsadra.py index 11934bd2..c0801a47 100644 --- a/openpecha/formatters/tsadra.py +++ b/openpecha/formatters/tsadra.py @@ -8,7 +8,7 @@ from .formatter import BaseFormatter from .layers import * -from .layers import AnnType, CreditPage +from .layers import AnnType class TsadraTemplate: @@ -19,7 +19,6 @@ class TsadraTemplate: "credits-page_front-page---text-author", "credits-page_front-page---text-author1", ] - credit_page = "credits-page_epub-edition-line" book_titles = [ "credits-page_front-title", "tibetan-book-title", @@ -77,7 +76,6 @@ def __init__(self, output_path="./output", metadata=None): self.walker = 0 # The walker to traverse every character in the pecha self.book_title = [] # list variable to store book title index self.sub_title = [] - self.credit_page = [] self.book_number = [] self.poti_title = [] self.author = [] # list variable to store author annotion index @@ -167,18 +165,6 @@ def build_layers(self, html): self.base_text += book_num + "\n" self.walker += len(book_num) + 1 - elif p["class"][0] == TsadraTemplate.credit_page: - credit_page = self.get_credit_page(p) - if credit_page: - self.credit_page.append( - ( - None, - CreditPage(credit_page, Span(self.walker, self.walker + 1)), - ) - ) - self.base_text += " " - self.walker += 1 - elif ( p["class"][0] in TsadraTemplate.author ): # to get the author annotation index @@ -468,7 +454,6 @@ def get_result(self): AnnType.sub_title: [self.sub_title], AnnType.book_number: [self.book_number], AnnType.poti_title: [self.poti_title], - AnnType.credit_page: [self.credit_page], AnnType.author: [self.author], AnnType.chapter: [self.chapter], AnnType.topic: [self.topic], @@ -537,8 +522,8 @@ def create_opf(self, input_path, id_): # cover image path image_path = input_path / "image" - (self.dirs["opf_path"] / "asset").mkdir(exist_ok=True) - os.system(f"cp -R {image_path} {self.dirs['opf_path']}/asset") + (self.dirs["opf_path"] / "assets").mkdir(exist_ok=True) + os.system(f"cp -R {image_path} {self.dirs['opf_path']}/assets") # parse layers for html in self.get_input(input_path): diff --git a/openpecha/serializers/epub.py b/openpecha/serializers/epub.py index fd992a13..6816fe8a 100644 --- a/openpecha/serializers/epub.py +++ b/openpecha/serializers/epub.py @@ -89,10 +89,6 @@ def apply_annotation(self, vol_id, ann, uuid2localid): elif ann["type"] == AnnType.peydurma: start_payload = "#" only_start_ann = True - elif ann["type"] == AnnType.credit_page: - credit_page_ann = ann["credit_page_img_name"] - start_payload = f'{Tsadra_template.credit_page_SP}

\n' - only_start_ann = True elif ann["type"] == AnnType.error_candidate: start_payload = "[" end_payload = "]" @@ -277,6 +273,14 @@ def get_footnote_references(self, footnotes): footnote_references += f'{p_tag}{Tsadra_template.footnote_reference_SP} id="fr{footnote_id}">{footnote["footnote_ref"]}

' return footnote_references + def add_credit_page(self, result): + author_pat = re.search('

.+

', result) + credit_pg_name = self.meta["source_metadata"].get("credit", "") + if credit_pg_name: + credit_page_pat = f'{author_pat[0]}\n{Tsadra_template.credit_page_SP}

\n' + result = re.sub(author_pat[0], credit_page_pat, result, 1) + return result + def serialize(self, toc_levels={}, output_path="./output/epub_output"): """This module serialize .opf file to other format such as .epub etc. In case of epub, we are using calibre ebook-convert command to do the conversion by passing our custom css template @@ -297,6 +301,7 @@ def serialize(self, toc_levels={}, output_path="./output/epub_output"): results = self.get_result() for vol_id, result in results.items(): + result = self.add_credit_page(result) footnote_ref_tag = "" if "Footnote" in self.layers: footnote_fn = self.opf_path / "layers" / vol_id / "Footnote.yml" diff --git a/tests/integration/test_tsadra.py b/tests/integration/test_tsadra.py index 5215957b..831319b2 100644 --- a/tests/integration/test_tsadra.py +++ b/tests/integration/test_tsadra.py @@ -9,7 +9,7 @@ pecha_name = f"P{pecha_id:06}" ebook_path = f"./tests/data/serialize/tsadra/src/{pecha_name}/OEBPS/" opfs_path = "./tests/data/serialize/tsadra" - opf_path = f"./tests/data/serialize/tsadra/{pecha_name}.opf/" + opf_path = f"./tests/data/serialize/tsadra/{pecha_name}/{pecha_name}.opf/" hfml_path = "./output/demo/output/P000113_hfml/" ebook_output_path = "./tests/data/serialize/tsadra/ebook" From 0342a179862aa663fe38059290bf67332bb3f1e8 Mon Sep 17 00:00:00 2001 From: kaldan007 Date: Thu, 18 Mar 2021 12:06:16 +0530 Subject: [PATCH 4/4] fix(epub-serializer): removed credit page layer n added credit page img tag after first author --- tests/test_formatter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_formatter.py b/tests/test_formatter.py index 8c964dc3..45108f26 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -307,7 +307,6 @@ def test_tsadra_formatter(self): expected_result = { AnnType.book_title: [[(None, {"span": {"start": 0, "end": 84}})]], AnnType.sub_title: [[]], - AnnType.credit_page: [[]], AnnType.book_number: [[]], AnnType.poti_title: [[]], AnnType.author: [