diff --git a/capella2polarion/converters/element_converter.py b/capella2polarion/converters/element_converter.py index 56b9409..7c34f12 100644 --- a/capella2polarion/converters/element_converter.py +++ b/capella2polarion/converters/element_converter.py @@ -1,8 +1,10 @@ # Copyright DB InfraGO AG and contributors # SPDX-License-Identifier: Apache-2.0 """Objects for serialization of capella objects to workitems.""" + from __future__ import annotations +import base64 import collections import hashlib import logging @@ -25,9 +27,7 @@ from capella2polarion.connectors import polarion_repo from capella2polarion.converters import data_session, polarion_html_helper -RE_DESCR_LINK_PATTERN = re.compile( - r"([^<]+)<\/a>" -) +RE_DESCR_LINK_PATTERN = re.compile(r"([^<]+)<\/a>") RE_CAMEL_CASE_2ND_WORD_PATTERN = re.compile(r"([a-z]+)([A-Z][a-z]+)") logger = logging.getLogger(__name__) @@ -35,14 +35,19 @@ JINJA_RENDERED_IMG_CLS = "jinja-rendered-image" +class ExternalURLAttachmentFoundError(Exception): + """Raised when external URL attachments are found. + + They are not supported because of security reasons. + """ + + def resolve_element_type(type_: str) -> str: """Return a valid Type ID for polarion for a given ``obj``.""" return type_[0].lower() + type_[1:] -def _format_texts( - type_texts: dict[str, list[str]] -) -> dict[str, dict[str, str]]: +def _format_texts(type_texts: dict[str, list[str]]) -> dict[str, dict[str, str]]: def _format(texts: list[str]) -> dict[str, str]: if len(texts) > 1: items = "".join(f"
  • {text}
  • " for text in texts) @@ -87,9 +92,7 @@ def serialize(self, uuid: str) -> data_model.CapellaWorkItem | None: """Return a CapellaWorkItem for the given diagram or element.""" converter_data = self.converter_session[uuid] work_item_id = None - if old := self.capella_polarion_mapping.get_work_item_by_capella_uuid( - uuid - ): + if old := self.capella_polarion_mapping.get_work_item_by_capella_uuid(uuid): work_item_id = old.id self.__generic_work_item(converter_data, work_item_id) @@ -104,9 +107,7 @@ def serialize(self, uuid: str) -> data_model.CapellaWorkItem | None: ] = getattr(self, f"_{converter}") serializer(converter_data, **params) except Exception as error: - converter_data.errors.add( - ", ".join([str(a) for a in error.args]) - ) + converter_data.errors.add(", ".join([str(a) for a in error.args])) converter_data.work_item = None if converter_data.errors: @@ -182,9 +183,7 @@ def _render_jinja_template( model=self.model, work_item=converter_data.work_item, ) - _, text, _ = self._sanitize_text( - converter_data.capella_element, rendered_jinja - ) + _, text, _ = self._sanitize_text(converter_data.capella_element, rendered_jinja) return text def setup_env(self, env: jinja2.Environment): @@ -265,7 +264,9 @@ def _draw_additional_attributes_diagram( "value": diagram_html, } - def _sanitize_linked_text(self, obj: m.ModelElement | m.Diagram) -> tuple[ + def _sanitize_linked_text( + self, obj: m.ModelElement | m.Diagram + ) -> tuple[ list[str], markupsafe.Markup, list[data_model.Capella2PolarionAttachment], @@ -291,9 +292,7 @@ def _sanitize_text( ]: referenced_uuids: list[str] = [] replaced_markup = RE_DESCR_LINK_PATTERN.sub( - lambda match: self._replace_markup( - obj.uuid, match, referenced_uuids, 2 - ), + lambda match: self._replace_markup(obj.uuid, match, referenced_uuids, 2), text, ) @@ -307,49 +306,23 @@ def repair_images(node: etree._Element) -> None: ): return - file_url = pathlib.PurePosixPath(node.get("src")) - workspace = file_url.parts[0] - file_path = pathlib.PurePosixPath(*file_url.parts[1:]) - mime_type, _ = mimetypes.guess_type(file_url) - resources = self.model.resources - filehandler = resources[ - ["\x00", workspace][workspace in resources] - ] - try: - with filehandler.open(file_path, "r") as img: - content = img.read() - file_name = ( - hashlib.md5(str(file_path).encode("utf8")).hexdigest() - + file_path.suffix - ) - attachments.append( - data_model.Capella2PolarionAttachment( - "", - "", - file_path.name, - content, - mime_type, - file_name, + src = node.get("src") + assert src is not None + if src.startswith("data:"): + stem = self.handle_base64_image(node, attachments, obj) + elif src.startswith("http://") or src.startswith("https://"): + stem = self.handle_external_url(node, obj) + else: + stem = self.handle_workspace_image(node, attachments, obj) + + if self.generate_figure_captions: + caption = node.get("alt", f'Image "{stem or 'no name'}" of {obj.name}') + node.addnext( + html.fromstring( + polarion_html_helper.POLARION_CAPTION.format( + label="Figure", caption=caption ) ) - # We use the filename here as the ID is unknown here - # This needs to be refactored after updating attachments - node.attrib["src"] = f"workitemimg:{file_name}" - if self.generate_figure_captions: - caption = node.get( - "alt", f'Image "{file_url.stem}" of {obj.name}' - ) - node.addnext( - html.fromstring( - polarion_html_helper.POLARION_CAPTION.format( - label="Figure", caption=caption - ) - ) - ) - - except FileNotFoundError: - self.converter_session[obj.uuid].errors.add( - f"Inline image can't be found from {file_path!r}." ) repaired_markup = chelpers.process_html_fragments( @@ -357,6 +330,89 @@ def repair_images(node: etree._Element) -> None: ) return referenced_uuids, repaired_markup, attachments + def handle_base64_image( + self, + node: etree._Element, + attachments: list[data_model.Capella2PolarionAttachment], + obj: m.ModelElement | m.Diagram, + ): + """Handle base64-encoded images.""" + src = node.get("src") + assert src is not None + mime_type = src.split(";")[0].split(":")[1] + base64_data = src.split(",")[1] + filetype = mimetypes.guess_extension(mime_type) + + try: + file_path_name = f"{obj.uuid}_b64_image" + file_name = hashlib.md5(file_path_name.encode("utf8")).hexdigest() + ( + filetype or ".jpg" + ) + attachments.append( + data_model.Capella2PolarionAttachment( + "", + "", + file_path_name, + base64.b64decode(base64_data), + mime_type, + file_name, + ) + ) + node.attrib["src"] = f"workitemimg:{file_name}" + except Exception as e: + self.converter_session[obj.uuid].errors.add( + f"Failed to process base64 image: {str(e)}" + ) + + def handle_external_url( + self, node: etree._Element, obj: m.ModelElement | m.Diagram + ): + """Handle external URLs by raising a custom exception.""" + src = node.get("src") + self.converter_session[obj.uuid].errors.add( + f"External URL found in 'src': {src!r}. Raising ExternalURLAttachmentFoundError." + ) + raise ExternalURLAttachmentFoundError(f"External URL not allowed: {src}") + + def handle_workspace_image( + self, + node: etree._Element, + attachments: list[data_model.Capella2PolarionAttachment], + obj: m.ModelElement | m.Diagram, + ): + """Handle images referenced as file paths in the workspace.""" + src = node.get("src") + assert src is not None + file_url = pathlib.PurePosixPath(src) + workspace = file_url.parts[0] + file_path = pathlib.PurePosixPath(*file_url.parts[1:]) + mime_type, _ = mimetypes.guess_type(file_url) + resources = self.model.resources + filehandler = resources[["\x00", workspace][workspace in resources]] + + try: + with filehandler.open(file_path, "r") as img: + content = img.read() + file_name = ( + hashlib.md5(str(file_path).encode("utf8")).hexdigest() + + file_path.suffix + ) + attachments.append( + data_model.Capella2PolarionAttachment( + "", + "", + file_path.name, + content, + mime_type, + file_name, + ) + ) + node.attrib["src"] = f"workitemimg:{file_name}" + except FileNotFoundError: + self.converter_session[obj.uuid].errors.add( + f"Inline image can't be found from {file_path!r}." + ) + def _replace_markup( self, origin_uuid: str, @@ -376,9 +432,7 @@ def _replace_markup( self.converter_session[origin_uuid].errors.add( f"Non-existing model element referenced in description: {uuid}" ) - return polarion_html_helper.strike_through( - match.group(default_group) - ) + return polarion_html_helper.strike_through(match.group(default_group)) if pid := self.capella_polarion_mapping.get_work_item_id(uuid): referenced_uuids.append(uuid) return polarion_html_helper.POLARION_WORK_ITEM_URL.format(pid=pid) @@ -400,9 +454,7 @@ def _get_requirement_types_text( continue if not (req.type and req.text): - identifier = ( - req.long_name or req.name or req.summary or req.uuid - ) + identifier = req.long_name or req.name or req.summary or req.uuid self.converter_session[obj.uuid].errors.add( f"Found Requirement without text or type on {identifier!r}" ) @@ -498,9 +550,7 @@ def get_condition(cap: m.ModelElement, name: str) -> str: post_condition = get_condition(obj, "postcondition") assert converter_data.work_item, "No work item set yet" - converter_data.work_item.preCondition = polarion_api.HtmlContent( - pre_condition - ) + converter_data.work_item.preCondition = polarion_api.HtmlContent(pre_condition) converter_data.work_item.postCondition = polarion_api.HtmlContent( post_condition ) @@ -599,9 +649,7 @@ def _jinja_as_description( assert ( converter_data.work_item.description ), "Description should already be defined" - converter_data.work_item.description.value = ( - self._render_jinja_template( - template_folder, template_path, converter_data - ) + converter_data.work_item.description.value = self._render_jinja_template( + template_folder, template_path, converter_data ) return converter_data.work_item