From 75e534c4be96eb423d85f2eb8094009a6be24535 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Tue, 17 Sep 2024 11:36:14 +1000 Subject: [PATCH 1/2] chore: move pom parser to parsers module Signed-off-by: Ben Selwyn-Smith --- src/macaron/parsers/pomparser.py | 34 +++++++++++++++++++++ src/macaron/repo_finder/repo_finder_java.py | 26 ++-------------- 2 files changed, 36 insertions(+), 24 deletions(-) create mode 100644 src/macaron/parsers/pomparser.py diff --git a/src/macaron/parsers/pomparser.py b/src/macaron/parsers/pomparser.py new file mode 100644 index 000000000..857deff9f --- /dev/null +++ b/src/macaron/parsers/pomparser.py @@ -0,0 +1,34 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the parser for POM files.""" +import logging +from xml.etree.ElementTree import Element # nosec + +import defusedxml.ElementTree +from defusedxml.ElementTree import fromstring + +logger: logging.Logger = logging.getLogger(__name__) + + +def parse_pom_string(pom_string: str) -> Element | None: + """ + Parse the passed POM string using defusedxml. + + Parameters + ---------- + pom_string : str + The contents of a POM file as a string. + + Returns + ------- + Element | None + The parsed element representing the POM's XML hierarchy. + """ + try: + # Stored here first to help with type checking. + pom: Element = fromstring(pom_string) + return pom + except defusedxml.ElementTree.ParseError as error: + logger.debug("Failed to parse XML: %s", error) + return None diff --git a/src/macaron/repo_finder/repo_finder_java.py b/src/macaron/repo_finder/repo_finder_java.py index 63ff840d5..9bdec2526 100644 --- a/src/macaron/repo_finder/repo_finder_java.py +++ b/src/macaron/repo_finder/repo_finder_java.py @@ -6,11 +6,10 @@ import re from xml.etree.ElementTree import Element # nosec -import defusedxml.ElementTree -from defusedxml.ElementTree import fromstring from packageurl import PackageURL from macaron.config.defaults import defaults +from macaron.parsers.pomparser import parse_pom_string from macaron.repo_finder.repo_finder_base import BaseRepoFinder from macaron.repo_finder.repo_validator import find_valid_repository_url from macaron.util import send_get_http_raw @@ -168,34 +167,13 @@ def _read_pom(self, pom: str) -> list[str]: return [] # Parse POM using defusedxml - pom_element = self._parse_pom(pom) + pom_element = parse_pom_string(pom) if pom_element is None: return [] # Attempt to extract SCM data and return URL return self._find_scm(pom_element, tags) - def _parse_pom(self, pom: str) -> Element | None: - """ - Parse the passed POM using defusedxml. - - Parameters - ---------- - pom : str - The contents of a POM file as a string. - - Returns - ------- - Element | None : - The parsed element representing the POM's XML hierarchy. - """ - try: - self.pom_element = fromstring(pom) - return self.pom_element - except defusedxml.ElementTree.ParseError as error: - logger.debug("Failed to parse XML: %s", error) - return None - def _find_scm(self, pom: Element, tags: list[str], resolve_properties: bool = True) -> list[str]: """ Parse the passed pom and extract the passed tags. From 9c85a98a0f38a2efdcab2914bea7a99d74f0f430 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Tue, 17 Sep 2024 14:07:21 +1000 Subject: [PATCH 2/2] chore: minor fix Signed-off-by: Ben Selwyn-Smith --- src/macaron/repo_finder/repo_finder_java.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/macaron/repo_finder/repo_finder_java.py b/src/macaron/repo_finder/repo_finder_java.py index 9bdec2526..148c03e1b 100644 --- a/src/macaron/repo_finder/repo_finder_java.py +++ b/src/macaron/repo_finder/repo_finder_java.py @@ -170,6 +170,7 @@ def _read_pom(self, pom: str) -> list[str]: pom_element = parse_pom_string(pom) if pom_element is None: return [] + self.pom_element = pom_element # Attempt to extract SCM data and return URL return self._find_scm(pom_element, tags)