diff --git a/src/macaron/parsers/pomparser.py b/src/macaron/parsers/pomparser.py new file mode 100644 index 000000000..857deff9f --- /dev/null +++ b/src/macaron/parsers/pomparser.py @@ -0,0 +1,34 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the parser for POM files.""" +import logging +from xml.etree.ElementTree import Element # nosec + +import defusedxml.ElementTree +from defusedxml.ElementTree import fromstring + +logger: logging.Logger = logging.getLogger(__name__) + + +def parse_pom_string(pom_string: str) -> Element | None: + """ + Parse the passed POM string using defusedxml. + + Parameters + ---------- + pom_string : str + The contents of a POM file as a string. + + Returns + ------- + Element | None + The parsed element representing the POM's XML hierarchy. + """ + try: + # Stored here first to help with type checking. + pom: Element = fromstring(pom_string) + return pom + except defusedxml.ElementTree.ParseError as error: + logger.debug("Failed to parse XML: %s", error) + return None diff --git a/src/macaron/repo_finder/repo_finder_java.py b/src/macaron/repo_finder/repo_finder_java.py index 63ff840d5..148c03e1b 100644 --- a/src/macaron/repo_finder/repo_finder_java.py +++ b/src/macaron/repo_finder/repo_finder_java.py @@ -6,11 +6,10 @@ import re from xml.etree.ElementTree import Element # nosec -import defusedxml.ElementTree -from defusedxml.ElementTree import fromstring from packageurl import PackageURL from macaron.config.defaults import defaults +from macaron.parsers.pomparser import parse_pom_string from macaron.repo_finder.repo_finder_base import BaseRepoFinder from macaron.repo_finder.repo_validator import find_valid_repository_url from macaron.util import send_get_http_raw @@ -168,34 +167,14 @@ def _read_pom(self, pom: str) -> list[str]: return [] # Parse POM using defusedxml - pom_element = self._parse_pom(pom) + pom_element = parse_pom_string(pom) if pom_element is None: return [] + self.pom_element = pom_element # Attempt to extract SCM data and return URL return self._find_scm(pom_element, tags) - def _parse_pom(self, pom: str) -> Element | None: - """ - Parse the passed POM using defusedxml. - - Parameters - ---------- - pom : str - The contents of a POM file as a string. - - Returns - ------- - Element | None : - The parsed element representing the POM's XML hierarchy. - """ - try: - self.pom_element = fromstring(pom) - return self.pom_element - except defusedxml.ElementTree.ParseError as error: - logger.debug("Failed to parse XML: %s", error) - return None - def _find_scm(self, pom: Element, tags: list[str], resolve_properties: bool = True) -> list[str]: """ Parse the passed pom and extract the passed tags.