Skip to content

Commit

Permalink
chore: move pom parser to parsers module
Browse files Browse the repository at this point in the history
Signed-off-by: Ben Selwyn-Smith <[email protected]>
  • Loading branch information
benmss committed Sep 17, 2024
1 parent c9752b3 commit 75e534c
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 24 deletions.
34 changes: 34 additions & 0 deletions src/macaron/parsers/pomparser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module contains the parser for POM files."""
import logging
from xml.etree.ElementTree import Element # nosec

import defusedxml.ElementTree
from defusedxml.ElementTree import fromstring

logger: logging.Logger = logging.getLogger(__name__)


def parse_pom_string(pom_string: str) -> Element | None:
"""
Parse the passed POM string using defusedxml.
Parameters
----------
pom_string : str
The contents of a POM file as a string.
Returns
-------
Element | None
The parsed element representing the POM's XML hierarchy.
"""
try:
# Stored here first to help with type checking.
pom: Element = fromstring(pom_string)
return pom
except defusedxml.ElementTree.ParseError as error:
logger.debug("Failed to parse XML: %s", error)
return None
26 changes: 2 additions & 24 deletions src/macaron/repo_finder/repo_finder_java.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@
import re
from xml.etree.ElementTree import Element # nosec

import defusedxml.ElementTree
from defusedxml.ElementTree import fromstring
from packageurl import PackageURL

from macaron.config.defaults import defaults
from macaron.parsers.pomparser import parse_pom_string
from macaron.repo_finder.repo_finder_base import BaseRepoFinder
from macaron.repo_finder.repo_validator import find_valid_repository_url
from macaron.util import send_get_http_raw
Expand Down Expand Up @@ -168,34 +167,13 @@ def _read_pom(self, pom: str) -> list[str]:
return []

# Parse POM using defusedxml
pom_element = self._parse_pom(pom)
pom_element = parse_pom_string(pom)
if pom_element is None:
return []

# Attempt to extract SCM data and return URL
return self._find_scm(pom_element, tags)

def _parse_pom(self, pom: str) -> Element | None:
"""
Parse the passed POM using defusedxml.
Parameters
----------
pom : str
The contents of a POM file as a string.
Returns
-------
Element | None :
The parsed element representing the POM's XML hierarchy.
"""
try:
self.pom_element = fromstring(pom)
return self.pom_element
except defusedxml.ElementTree.ParseError as error:
logger.debug("Failed to parse XML: %s", error)
return None

def _find_scm(self, pom: Element, tags: list[str], resolve_properties: bool = True) -> list[str]:
"""
Parse the passed pom and extract the passed tags.
Expand Down

0 comments on commit 75e534c

Please sign in to comment.