Skip to content

Commit

Permalink
feat: Handle more image cases in repair_image
Browse files Browse the repository at this point in the history
  • Loading branch information
ewuerger committed Jan 16, 2025
1 parent 7d37311 commit 47ae895
Showing 1 changed file with 121 additions and 73 deletions.
194 changes: 121 additions & 73 deletions capella2polarion/converters/element_converter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# Copyright DB InfraGO AG and contributors
# SPDX-License-Identifier: Apache-2.0
"""Objects for serialization of capella objects to workitems."""

from __future__ import annotations

import base64
import collections
import hashlib
import logging
Expand All @@ -25,24 +27,27 @@
from capella2polarion.connectors import polarion_repo
from capella2polarion.converters import data_session, polarion_html_helper

RE_DESCR_LINK_PATTERN = re.compile(
r"<a href=\"hlink://([^\"]+)\">([^<]+)<\/a>"
)
RE_DESCR_LINK_PATTERN = re.compile(r"<a href=\"hlink://([^\"]+)\">([^<]+)<\/a>")
RE_CAMEL_CASE_2ND_WORD_PATTERN = re.compile(r"([a-z]+)([A-Z][a-z]+)")

logger = logging.getLogger(__name__)
C2P_IMAGE_PREFIX = "__C2P__"
JINJA_RENDERED_IMG_CLS = "jinja-rendered-image"


class ExternalURLAttachmentFoundError(Exception):
"""Raised when external URL attachments are found.
They are not supported because of security reasons.
"""


def resolve_element_type(type_: str) -> str:
"""Return a valid Type ID for polarion for a given ``obj``."""
return type_[0].lower() + type_[1:]


def _format_texts(
type_texts: dict[str, list[str]]
) -> dict[str, dict[str, str]]:
def _format_texts(type_texts: dict[str, list[str]]) -> dict[str, dict[str, str]]:
def _format(texts: list[str]) -> dict[str, str]:
if len(texts) > 1:
items = "".join(f"<li>{text}</li>" for text in texts)
Expand Down Expand Up @@ -87,9 +92,7 @@ def serialize(self, uuid: str) -> data_model.CapellaWorkItem | None:
"""Return a CapellaWorkItem for the given diagram or element."""
converter_data = self.converter_session[uuid]
work_item_id = None
if old := self.capella_polarion_mapping.get_work_item_by_capella_uuid(
uuid
):
if old := self.capella_polarion_mapping.get_work_item_by_capella_uuid(uuid):
work_item_id = old.id

self.__generic_work_item(converter_data, work_item_id)
Expand All @@ -104,9 +107,7 @@ def serialize(self, uuid: str) -> data_model.CapellaWorkItem | None:
] = getattr(self, f"_{converter}")
serializer(converter_data, **params)
except Exception as error:
converter_data.errors.add(
", ".join([str(a) for a in error.args])
)
converter_data.errors.add(", ".join([str(a) for a in error.args]))
converter_data.work_item = None

if converter_data.errors:
Expand Down Expand Up @@ -182,9 +183,7 @@ def _render_jinja_template(
model=self.model,
work_item=converter_data.work_item,
)
_, text, _ = self._sanitize_text(
converter_data.capella_element, rendered_jinja
)
_, text, _ = self._sanitize_text(converter_data.capella_element, rendered_jinja)
return text

def setup_env(self, env: jinja2.Environment):
Expand Down Expand Up @@ -265,7 +264,9 @@ def _draw_additional_attributes_diagram(
"value": diagram_html,
}

def _sanitize_linked_text(self, obj: m.ModelElement | m.Diagram) -> tuple[
def _sanitize_linked_text(
self, obj: m.ModelElement | m.Diagram
) -> tuple[
list[str],
markupsafe.Markup,
list[data_model.Capella2PolarionAttachment],
Expand All @@ -291,9 +292,7 @@ def _sanitize_text(
]:
referenced_uuids: list[str] = []
replaced_markup = RE_DESCR_LINK_PATTERN.sub(
lambda match: self._replace_markup(
obj.uuid, match, referenced_uuids, 2
),
lambda match: self._replace_markup(obj.uuid, match, referenced_uuids, 2),
text,
)

Expand All @@ -307,56 +306,113 @@ def repair_images(node: etree._Element) -> None:
):
return

file_url = pathlib.PurePosixPath(node.get("src"))
workspace = file_url.parts[0]
file_path = pathlib.PurePosixPath(*file_url.parts[1:])
mime_type, _ = mimetypes.guess_type(file_url)
resources = self.model.resources
filehandler = resources[
["\x00", workspace][workspace in resources]
]
try:
with filehandler.open(file_path, "r") as img:
content = img.read()
file_name = (
hashlib.md5(str(file_path).encode("utf8")).hexdigest()
+ file_path.suffix
)
attachments.append(
data_model.Capella2PolarionAttachment(
"",
"",
file_path.name,
content,
mime_type,
file_name,
src = node.get("src")
assert src is not None
if src.startswith("data:"):
stem = self.handle_base64_image(node, attachments, obj)
elif src.startswith("http://") or src.startswith("https://"):
stem = self.handle_external_url(node, obj)
else:
stem = self.handle_workspace_image(node, attachments, obj)

if self.generate_figure_captions:
caption = node.get("alt", f'Image "{stem or 'no name'}" of {obj.name}')
node.addnext(
html.fromstring(
polarion_html_helper.POLARION_CAPTION.format(
label="Figure", caption=caption
)
)
# We use the filename here as the ID is unknown here
# This needs to be refactored after updating attachments
node.attrib["src"] = f"workitemimg:{file_name}"
if self.generate_figure_captions:
caption = node.get(
"alt", f'Image "{file_url.stem}" of {obj.name}'
)
node.addnext(
html.fromstring(
polarion_html_helper.POLARION_CAPTION.format(
label="Figure", caption=caption
)
)
)

except FileNotFoundError:
self.converter_session[obj.uuid].errors.add(
f"Inline image can't be found from {file_path!r}."
)

repaired_markup = chelpers.process_html_fragments(
replaced_markup, repair_images
)
return referenced_uuids, repaired_markup, attachments

def handle_base64_image(
self,
node: etree._Element,
attachments: list[data_model.Capella2PolarionAttachment],
obj: m.ModelElement | m.Diagram,
):
"""Handle base64-encoded images."""
src = node.get("src")
assert src is not None
mime_type = src.split(";")[0].split(":")[1]
base64_data = src.split(",")[1]
filetype = mimetypes.guess_extension(mime_type)

try:
file_path_name = f"{obj.uuid}_b64_image"
file_name = hashlib.md5(file_path_name.encode("utf8")).hexdigest() + (
filetype or ".jpg"
)
attachments.append(
data_model.Capella2PolarionAttachment(
"",
"",
file_path_name,
base64.b64decode(base64_data),
mime_type,
file_name,
)
)
node.attrib["src"] = f"workitemimg:{file_name}"
except Exception as e:
self.converter_session[obj.uuid].errors.add(
f"Failed to process base64 image: {str(e)}"
)

def handle_external_url(
self, node: etree._Element, obj: m.ModelElement | m.Diagram
):
"""Handle external URLs by raising a custom exception."""
src = node.get("src")
self.converter_session[obj.uuid].errors.add(
f"External URL found in 'src': {src!r}. Raising ExternalURLAttachmentFoundError."
)
raise ExternalURLAttachmentFoundError(f"External URL not allowed: {src}")

def handle_workspace_image(
self,
node: etree._Element,
attachments: list[data_model.Capella2PolarionAttachment],
obj: m.ModelElement | m.Diagram,
):
"""Handle images referenced as file paths in the workspace."""
src = node.get("src")
assert src is not None
file_url = pathlib.PurePosixPath(src)
workspace = file_url.parts[0]
file_path = pathlib.PurePosixPath(*file_url.parts[1:])
mime_type, _ = mimetypes.guess_type(file_url)
resources = self.model.resources
filehandler = resources[["\x00", workspace][workspace in resources]]

try:
with filehandler.open(file_path, "r") as img:
content = img.read()
file_name = (
hashlib.md5(str(file_path).encode("utf8")).hexdigest()
+ file_path.suffix
)
attachments.append(
data_model.Capella2PolarionAttachment(
"",
"",
file_path.name,
content,
mime_type,
file_name,
)
)
node.attrib["src"] = f"workitemimg:{file_name}"
except FileNotFoundError:
self.converter_session[obj.uuid].errors.add(
f"Inline image can't be found from {file_path!r}."
)

def _replace_markup(
self,
origin_uuid: str,
Expand All @@ -376,9 +432,7 @@ def _replace_markup(
self.converter_session[origin_uuid].errors.add(
f"Non-existing model element referenced in description: {uuid}"
)
return polarion_html_helper.strike_through(
match.group(default_group)
)
return polarion_html_helper.strike_through(match.group(default_group))
if pid := self.capella_polarion_mapping.get_work_item_id(uuid):
referenced_uuids.append(uuid)
return polarion_html_helper.POLARION_WORK_ITEM_URL.format(pid=pid)
Expand All @@ -400,9 +454,7 @@ def _get_requirement_types_text(
continue

if not (req.type and req.text):
identifier = (
req.long_name or req.name or req.summary or req.uuid
)
identifier = req.long_name or req.name or req.summary or req.uuid
self.converter_session[obj.uuid].errors.add(
f"Found Requirement without text or type on {identifier!r}"
)
Expand Down Expand Up @@ -498,9 +550,7 @@ def get_condition(cap: m.ModelElement, name: str) -> str:
post_condition = get_condition(obj, "postcondition")

assert converter_data.work_item, "No work item set yet"
converter_data.work_item.preCondition = polarion_api.HtmlContent(
pre_condition
)
converter_data.work_item.preCondition = polarion_api.HtmlContent(pre_condition)
converter_data.work_item.postCondition = polarion_api.HtmlContent(
post_condition
)
Expand Down Expand Up @@ -599,9 +649,7 @@ def _jinja_as_description(
assert (
converter_data.work_item.description
), "Description should already be defined"
converter_data.work_item.description.value = (
self._render_jinja_template(
template_folder, template_path, converter_data
)
converter_data.work_item.description.value = self._render_jinja_template(
template_folder, template_path, converter_data
)
return converter_data.work_item

0 comments on commit 47ae895

Please sign in to comment.