Skip to content

Commit 74041ee

Browse files
authored
chore: improve commit finder accuracy (#687)
Signed-off-by: Ben Selwyn-Smith <[email protected]>
1 parent ba3fcb0 commit 74041ee

File tree

3 files changed

+443
-18
lines changed

3 files changed

+443
-18
lines changed

src/macaron/repo_finder/commit_finder.py

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@
100100
alphabetic_only_pattern = re.compile("^[a-z]+$", flags=re.IGNORECASE)
101101
hex_only_pattern = re.compile("^[0-9a-f]+$", flags=re.IGNORECASE)
102102
numeric_only_pattern = re.compile("^[0-9]+$")
103-
versioned_string = re.compile("^[a-z]+[0-9]+$", flags=re.IGNORECASE) # e.g. RC1, M5, etc.
103+
versioned_string = re.compile("^([a-z]+)(0*)([1-9]+[0-9]*)$", flags=re.IGNORECASE) # e.g. RC1, M5, etc.
104104

105105

106106
class AbstractPurlType(Enum):
@@ -473,42 +473,73 @@ def _compute_tag_version_similarity(tag_version: str, tag_suffix: str, version_p
473473
"""
474474
count = len(version_parts)
475475
# Reduce count for each direct match between version parts and tag version.
476-
tag_version_text = tag_version
476+
tag_version_text = tag_version.lower()
477477
for part in version_parts:
478+
part = part.lower()
478479
if part in tag_version_text:
479480
tag_version_text = tag_version_text.replace(part, "", 1)
480481
count = count - 1
481482

482483
# Try to reduce the count further based on the tag suffix.
483484
if tag_suffix:
484-
last_part = version_parts[-1]
485+
last_part = version_parts[-1].lower()
485486
# The tag suffix might consist of multiple version parts, e.g. RC1.RELEASE
486487
suffix_split = split_pattern.split(tag_suffix)
488+
# Try to match suffix parts to version.
489+
versioned_string_match = False
487490
if len(suffix_split) > 1:
488-
# Try to match suffix parts to version.
489-
versioned_string_match = False
490491
for suffix_part in suffix_split:
492+
suffix_part = suffix_part.lower()
491493
if alphabetic_only_pattern.match(suffix_part) and suffix_part == last_part:
492494
# If the suffix part only contains alphabetic characters, reduce the count if it
493495
# matches the version.
494496
count = count - 1
495497
continue
496-
if versioned_string.match(suffix_part):
497-
# If the suffix part contains alphabetic characters followed by numeric characters,
498-
# reduce the count if it matches the version (once only), otherwise increase the count.
499-
if not versioned_string_match and suffix_part == last_part:
500-
count = count - 1
501-
versioned_string_match = True
502-
else:
503-
count = count + 1
504-
if tag_suffix != last_part:
505-
count = count + 1
498+
499+
variable_suffix_pattern = _create_suffix_tag_comparison_pattern(suffix_part)
500+
if not variable_suffix_pattern:
501+
continue
502+
503+
if versioned_string_match:
504+
count = count + 1
505+
continue
506+
507+
# If the suffix part contains alphabetic characters followed by numeric characters,
508+
# reduce the count if it closely matches the version (once only), otherwise increase the count.
509+
if re.match(variable_suffix_pattern, last_part):
510+
count = count - 1
511+
versioned_string_match = True
512+
else:
513+
count = count + 1
514+
515+
variable_suffix_pattern = _create_suffix_tag_comparison_pattern(tag_suffix)
516+
if variable_suffix_pattern:
517+
if re.match(variable_suffix_pattern, last_part):
518+
count = count - 1
519+
else:
520+
count = count + 1
506521
else:
507-
count = count - 1
522+
count = count + 1
508523

509524
return count
510525

511526

527+
def _create_suffix_tag_comparison_pattern(tag_part: str) -> str | None:
528+
"""Create pattern to compare part of a tag with part of a version.
529+
530+
The created pattern allows for numeric parts within the tag to have a variable number of zeros for matching.
531+
"""
532+
versioned_string_result = versioned_string.match(tag_part)
533+
if not versioned_string_result:
534+
return None
535+
536+
variable_suffix_pattern = f"{versioned_string_result.group(1)}"
537+
if not versioned_string_result.group(2):
538+
return f"{variable_suffix_pattern}{versioned_string_result.group(3)}"
539+
540+
return f"{variable_suffix_pattern}(0*){versioned_string_result.group(3)}"
541+
542+
512543
def _get_tag_commit(tag: TagReference) -> Commit | None:
513544
"""Return the commit of the passed tag.
514545

tests/e2e/repo_finder/commit_finder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module performs a regression test of the commit finder's tag matching functionality."""
@@ -18,7 +18,7 @@
1818
# Set logging debug level.
1919
logger.setLevel(logging.DEBUG)
2020

21-
java_tags_file_path = Path(__file__).parent.joinpath("resources", "java_tags.json")
21+
java_tags_file_path = Path(__file__).parent.joinpath("resources", "tags.json")
2222

2323

2424
def test_commit_finder() -> int:

0 commit comments

Comments
 (0)