From 191e67c17e1f22496d1f2b4b5072cec7b64128ed Mon Sep 17 00:00:00 2001 From: Hao Chang Yu Date: Thu, 25 Jan 2024 10:36:10 -0500 Subject: [PATCH] Addressed an edge-case around advisory-collection name collisions. fixes #3380. Co-authored-by: Grant Gainey (cherry picked from commit 47e61c91d28614eb26e178bc2df5bc992259106d) --- CHANGES/3380.bugfix | 1 + pulp_rpm/app/advisory.py | 107 ++++++++++++++++++++++++---------- pulp_rpm/app/modelresource.py | 1 - 3 files changed, 77 insertions(+), 32 deletions(-) create mode 100644 CHANGES/3380.bugfix diff --git a/CHANGES/3380.bugfix b/CHANGES/3380.bugfix new file mode 100644 index 000000000..2bd70b92e --- /dev/null +++ b/CHANGES/3380.bugfix @@ -0,0 +1 @@ +Addressed some edge-cases involving advisory-collection-naming and imports. diff --git a/pulp_rpm/app/advisory.py b/pulp_rpm/app/advisory.py index c2abd2352..bfafa5a64 100644 --- a/pulp_rpm/app/advisory.py +++ b/pulp_rpm/app/advisory.py @@ -115,8 +115,9 @@ def resolve_advisories(version, previous_version): tmp_advisories = [] for adv in conflict_advisories[1:]: pk_to_add, pk_to_remove, pk_to_exclude = resolve_advisory_conflict(new_advisory, adv) - new_advisory = UpdateRecord.objects.filter(pk=pk_to_add[0]).get() - tmp_advisories.append(new_advisory) + if pk_to_add: + new_advisory = UpdateRecord.objects.filter(pk=pk_to_add[0]).get() + tmp_advisories.append(new_advisory) content_pks_to_exclude.update(pk_to_exclude) content_pks_to_remove.update(pk_to_remove) added_advisories_by_id[adv_id] = [new_advisory] @@ -360,38 +361,84 @@ def merge_advisories(previous_advisory, added_advisory): added_collections = added_advisory.collections.all() references = previous_advisory.references.all() + # First thing to do is ensure collection-name-uniqueness + # in the newly-merged advisory. + + # If we've seen a collection-name already, create a new name + # by appending "_" and rename the collection before + # merging. + # "index", in this context, is a monotonically-increasing INTEGER. In the code below, note + # the lines that insure that, when a new "index" is required, choosing (curr-max+1) is + # guaranteed. + # NOTE: keeping the collections separate is DELIBERATE: + # 1) after the fact, it's clear that an advisory merge happened, and that one set of + # packages comes from one place, one from a different one + # 2) package-grouping can be *relevant* (e.g. base-rpms vs debuginfo-rpms) + # 3) package-grouping can be *required* (e.g., module-collections must be kept separate) + + def get_new_name(collection, collection_group): + """ + Generate a new collection-name based on the current one and a list of "indexes". + + Collections are unique-ified by adding "_INTEGER" to their given names. + Collection-group is keyed by "original" name, with values of "all index-integers seen + so far". + New names are coined as "original name + max(index-integers seen so far for that name". + """ + name_list = collection.name.split("_") + if name_list[-1].isdecimal(): + name_list.pop(-1) + orig_name = "_".join(name_list) + else: + orig_name = collection.name + + if orig_name in collection_group.keys(): + index = max(collection_group[orig_name]) + 1 + else: + index = 0 + collection_group.setdefault(orig_name, []).append(index) + return f"{orig_name}_{index}" + + # Set up the "collection-group" to an initial state - all the current-names, and a list + # of any "indices" they've been given, so we can know what our current-max-index is + # before we start renaming collections. + collection_group = {} + for collection in chain(previous_collections, added_collections): + # It's possible for a collection to have NO NAME (not even an empty string) + if not collection.name: + collection.name = "" + name_list = collection.name.split("_") + if name_list[-1].isdecimal(): + index = name_list.pop(-1) + orig_name = "_".join(name_list) + collection_group.setdefault(orig_name, []).append(int(index)) + + # Now, prepare for, and merge, the two advisories with transaction.atomic(): - # First thing to do is ensure collection-name-uniqueness - # in the newly-merged advisory. - - # If we've seen a collection-name already, create a new name - # by appending "_" and rename the collection before - # merging. - # NOTE: keeping the collections separate is DELIBERATE: - # 1) after the fact, it's clear that an advisory merge happened, and that one set of - # packages comes from one place, one from a different one - # 2) package-grouping can be *relevant* (e.g. base-rpms vs debuginfo-rpms) - # 3) package-grouping can be *required* (e.g., module-collections must be kept separate) - - # dictionary of collection-name:first-unused-suffix pairs - # if a collection has no name, we assign it the name "collection" and uniquify-it from there - names_seen = {"collection": 0} + # Finalize unique-ified collection names, to use in the event of concurrency-collisions + # if a collection has no name, we assign it the name "collection". + names_seen = ["collection"] + collections_to_merge = set([]) for collection in chain(previous_collections, added_collections): + # No packages? ignore + if collection.packages.count() == 0: + continue + # no-name? When merging, ILLEGAL! Give it a name if not collection.name: collection.name = "collection" - if collection.name in names_seen.keys(): - orig_name = collection.name - new_name = f"{orig_name}_{names_seen[orig_name]}" - names_seen[orig_name] += 1 - collection.name = new_name - # if we've not seen it before, store in names-seen as name:0 - else: - names_seen[collection.name] = 0 - merged_advisory_cr = previous_advisory.to_createrepo_c( - collections=chain(previous_collections, added_collections) - ) + # If we've seen a collection with "this" name - give it a unique one, + # and add that name to the "seen" list + if collection.name in names_seen: + collection.name = get_new_name(collection, collection_group) + # Either we've not seen it ever, or we've generated a new "indexed" name + names_seen.append(collection.name) + # add the collection to the "set of collections for the merged advisory" + collections_to_merge.add(collection) + + # Compute digest for the new merged advisory and save it + merged_advisory_cr = previous_advisory.to_createrepo_c(collections=collections_to_merge) merged_digest = hash_update_record(merged_advisory_cr) merged_advisory = previous_advisory # Need to null both pk (content_ptr_id) and pulp_id here to insure django doesn't @@ -408,9 +455,7 @@ def merge_advisories(previous_advisory, added_advisory): else: # For UpdateCollections, make sure we don't re-use the collections for either of the # advisories being merged - _copy_update_collections_for( - merged_advisory, chain(previous_collections, added_collections) - ) + _copy_update_collections_for(merged_advisory, collections_to_merge) for reference in references: # copy reference and add relation for advisory reference.pk = None diff --git a/pulp_rpm/app/modelresource.py b/pulp_rpm/app/modelresource.py index ae10a22b6..4e8901e52 100644 --- a/pulp_rpm/app/modelresource.py +++ b/pulp_rpm/app/modelresource.py @@ -538,7 +538,6 @@ def set_up_queryset(self): update_record__in=UpdateRecord.objects.filter(pk__in=self.repo_version.content) ) ) - .distinct("name", "epoch", "version", "release", "arch") .order_by("name", "epoch", "version", "release", "arch") .select_related("update_collection", "update_collection__update_record") )