Skip to content

Commit

Permalink
chore: improve commit finder accuracy (#687)
Browse files Browse the repository at this point in the history
Signed-off-by: Ben Selwyn-Smith <[email protected]>
  • Loading branch information
benmss authored Apr 8, 2024
1 parent ba3fcb0 commit 74041ee
Show file tree
Hide file tree
Showing 3 changed files with 443 additions and 18 deletions.
63 changes: 47 additions & 16 deletions src/macaron/repo_finder/commit_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
alphabetic_only_pattern = re.compile("^[a-z]+$", flags=re.IGNORECASE)
hex_only_pattern = re.compile("^[0-9a-f]+$", flags=re.IGNORECASE)
numeric_only_pattern = re.compile("^[0-9]+$")
versioned_string = re.compile("^[a-z]+[0-9]+$", flags=re.IGNORECASE) # e.g. RC1, M5, etc.
versioned_string = re.compile("^([a-z]+)(0*)([1-9]+[0-9]*)$", flags=re.IGNORECASE) # e.g. RC1, M5, etc.


class AbstractPurlType(Enum):
Expand Down Expand Up @@ -473,42 +473,73 @@ def _compute_tag_version_similarity(tag_version: str, tag_suffix: str, version_p
"""
count = len(version_parts)
# Reduce count for each direct match between version parts and tag version.
tag_version_text = tag_version
tag_version_text = tag_version.lower()
for part in version_parts:
part = part.lower()
if part in tag_version_text:
tag_version_text = tag_version_text.replace(part, "", 1)
count = count - 1

# Try to reduce the count further based on the tag suffix.
if tag_suffix:
last_part = version_parts[-1]
last_part = version_parts[-1].lower()
# The tag suffix might consist of multiple version parts, e.g. RC1.RELEASE
suffix_split = split_pattern.split(tag_suffix)
# Try to match suffix parts to version.
versioned_string_match = False
if len(suffix_split) > 1:
# Try to match suffix parts to version.
versioned_string_match = False
for suffix_part in suffix_split:
suffix_part = suffix_part.lower()
if alphabetic_only_pattern.match(suffix_part) and suffix_part == last_part:
# If the suffix part only contains alphabetic characters, reduce the count if it
# matches the version.
count = count - 1
continue
if versioned_string.match(suffix_part):
# If the suffix part contains alphabetic characters followed by numeric characters,
# reduce the count if it matches the version (once only), otherwise increase the count.
if not versioned_string_match and suffix_part == last_part:
count = count - 1
versioned_string_match = True
else:
count = count + 1
if tag_suffix != last_part:
count = count + 1

variable_suffix_pattern = _create_suffix_tag_comparison_pattern(suffix_part)
if not variable_suffix_pattern:
continue

if versioned_string_match:
count = count + 1
continue

# If the suffix part contains alphabetic characters followed by numeric characters,
# reduce the count if it closely matches the version (once only), otherwise increase the count.
if re.match(variable_suffix_pattern, last_part):
count = count - 1
versioned_string_match = True
else:
count = count + 1

variable_suffix_pattern = _create_suffix_tag_comparison_pattern(tag_suffix)
if variable_suffix_pattern:
if re.match(variable_suffix_pattern, last_part):
count = count - 1
else:
count = count + 1
else:
count = count - 1
count = count + 1

return count


def _create_suffix_tag_comparison_pattern(tag_part: str) -> str | None:
"""Create pattern to compare part of a tag with part of a version.
The created pattern allows for numeric parts within the tag to have a variable number of zeros for matching.
"""
versioned_string_result = versioned_string.match(tag_part)
if not versioned_string_result:
return None

variable_suffix_pattern = f"{versioned_string_result.group(1)}"
if not versioned_string_result.group(2):
return f"{variable_suffix_pattern}{versioned_string_result.group(3)}"

return f"{variable_suffix_pattern}(0*){versioned_string_result.group(3)}"


def _get_tag_commit(tag: TagReference) -> Commit | None:
"""Return the commit of the passed tag.
Expand Down
4 changes: 2 additions & 2 deletions tests/e2e/repo_finder/commit_finder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module performs a regression test of the commit finder's tag matching functionality."""
Expand All @@ -18,7 +18,7 @@
# Set logging debug level.
logger.setLevel(logging.DEBUG)

java_tags_file_path = Path(__file__).parent.joinpath("resources", "java_tags.json")
java_tags_file_path = Path(__file__).parent.joinpath("resources", "tags.json")


def test_commit_finder() -> int:
Expand Down
Loading

0 comments on commit 74041ee

Please sign in to comment.