Skip to content

Commit

Permalink
fix diff calculation bug around deleted edges (#4532)
Browse files Browse the repository at this point in the history
* use branched_from instead of created_at for branch diff

* fix bug in delete relationship query

* remove UNCHANGED properties, attrs, relationships from calculated diffs

* consolidate same_value checking logic in ConflictsEnricher

* refactor diff query to exclude intra-branch updates, add previous_values flag

* fixes for diff recalculation

* use branched_from instead of created_at for diff calculation

* more fixes and tests for diff calculation

* normalize time comparisons
  • Loading branch information
ajtmccarty authored Oct 4, 2024
1 parent 980c601 commit 01e37a6
Show file tree
Hide file tree
Showing 10 changed files with 649 additions and 131 deletions.
5 changes: 5 additions & 0 deletions backend/infrahub/core/branch.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ def validate_branch_name(cls, value: str) -> str:
def set_branched_from(cls, value: str) -> str:
return Timestamp(value).to_string()

def get_branched_from(self) -> str:
if not self.branched_from:
raise RuntimeError(f"branched_from not set for branch {self.name}")
return self.branched_from

@field_validator("created_at", mode="before")
@classmethod
def set_created_at(cls, value: str) -> str:
Expand Down
8 changes: 4 additions & 4 deletions backend/infrahub/core/diff/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ async def calculate_diff(
previous_node_specifiers: set[NodeFieldSpecifier] | None = None,
) -> CalculatedDiffs:
if diff_branch.name == registry.default_branch:
diff_branch_create_time = from_time
diff_branch_from_time = from_time
else:
diff_branch_create_time = Timestamp(diff_branch.get_created_at())
diff_branch_from_time = Timestamp(diff_branch.get_branched_from())
diff_parser = DiffQueryParser(
base_branch=base_branch,
diff_branch=diff_branch,
Expand All @@ -35,7 +35,7 @@ async def calculate_diff(
db=self.db,
branch=diff_branch,
base_branch=base_branch,
diff_branch_create_time=diff_branch_create_time,
diff_branch_from_time=diff_branch_from_time,
diff_from=from_time,
diff_to=to_time,
)
Expand All @@ -51,7 +51,7 @@ async def calculate_diff(
db=self.db,
branch=base_branch,
base_branch=base_branch,
diff_branch_create_time=diff_branch_create_time,
diff_branch_from_time=diff_branch_from_time,
diff_from=from_time,
diff_to=to_time,
current_node_field_specifiers=[
Expand Down
22 changes: 14 additions & 8 deletions backend/infrahub/core/diff/conflicts_enricher.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,7 @@ def _add_attribute_conflicts(
for property_type in common_property_types:
base_property = base_property_map[property_type]
branch_property = branch_property_map[property_type]
same_value = base_property.new_value == branch_property.new_value or (
base_property.action is DiffAction.UNCHANGED
and base_property.previous_value == branch_property.previous_value
)
same_value = self._have_same_value(base_property=base_property, branch_property=branch_property)
if not same_value:
self._add_property_conflict(
base_property=base_property,
Expand Down Expand Up @@ -147,10 +144,7 @@ def _add_relationship_conflicts_for_one_peer(
for property_type in common_property_types:
base_property = base_properties_by_type[property_type]
branch_property = branch_properties_by_type[property_type]
same_value = base_property.new_value == branch_property.new_value or (
base_property.action is DiffAction.UNCHANGED
and base_property.previous_value == branch_property.previous_value
)
same_value = self._have_same_value(base_property=base_property, branch_property=branch_property)
# special handling for cardinality-one peer ID conflict
if branch_property.property_type is DatabaseEdgeType.IS_RELATED and is_cardinality_one:
if same_value:
Expand Down Expand Up @@ -215,3 +209,15 @@ def _add_property_conflict(
diff_branch_changed_at=branch_property.changed_at,
selected_branch=selected_branch,
)

def _have_same_value(self, base_property: EnrichedDiffProperty, branch_property: EnrichedDiffProperty) -> bool:
if base_property.new_value == branch_property.new_value:
return True
if {base_property.new_value, branch_property.new_value} <= {"NULL", None}:
return True
if (
base_property.action is DiffAction.UNCHANGED
and base_property.previous_value == branch_property.previous_value
):
return True
return False
14 changes: 10 additions & 4 deletions backend/infrahub/core/diff/coordinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,21 @@ async def recalculate(
async with self.lock_registry.get(name=general_lock_name, namespace=self.lock_namespace):
log.debug(f"Acquired lock to recalculate diff for {base_branch.name} - {diff_branch.name}")
current_branch_diff = await self.diff_repo.get_one(diff_branch_name=diff_branch.name, diff_id=diff_id)
current_base_diff = await self.diff_repo.get_one(
diff_branch_name=base_branch.name, diff_id=current_branch_diff.partner_uuid
)
if current_branch_diff.tracking_id and isinstance(current_branch_diff.tracking_id, BranchTrackingId):
to_time = Timestamp()
else:
to_time = current_branch_diff.to_time
await self.diff_repo.delete_diff_roots(diff_root_uuids=[current_branch_diff.uuid])
await self.diff_repo.delete_diff_roots(diff_root_uuids=[current_branch_diff.uuid, current_base_diff.uuid])
from_time = current_branch_diff.from_time
branched_from_time = Timestamp(diff_branch.get_branched_from())
from_time = max(from_time, branched_from_time)
enriched_diffs = await self._update_diffs(
base_branch=base_branch,
diff_branch=diff_branch,
from_time=current_branch_diff.from_time,
from_time=branched_from_time,
to_time=to_time,
tracking_id=current_branch_diff.tracking_id,
force_branch_refresh=True,
Expand Down Expand Up @@ -283,14 +289,14 @@ async def _get_aggregated_enriched_diffs(
node_field_specifiers = self._get_node_field_specifiers(
enriched_diff=previous_diffs.diff_branch_diff
)
diff_request = EnrichedDiffRequest(
inner_diff_request = EnrichedDiffRequest(
base_branch=diff_request.base_branch,
diff_branch=diff_request.diff_branch,
from_time=current_time,
to_time=end_time,
node_field_specifiers=node_field_specifiers,
)
current_diffs = await self._get_enriched_diff(diff_request=diff_request)
current_diffs = await self._get_enriched_diff(diff_request=inner_diff_request)

if previous_diffs:
current_diffs = await self.diff_combiner.combine(
Expand Down
58 changes: 41 additions & 17 deletions backend/infrahub/core/diff/query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,14 @@ def track_database_path(self, database_path: DatabasePath) -> None:
self.timestamp_status_map[database_path.attribute_changed_at] = database_path.attribute_status

def to_diff_attribute(self, from_time: Timestamp) -> DiffAttribute:
properties = [prop.to_diff_property(from_time=from_time) for prop in self.properties_by_type.values()]
properties = []
for prop in self.properties_by_type.values():
diff_prop = prop.to_diff_property(from_time=from_time)
if diff_prop.action is not DiffAction.UNCHANGED:
properties.append(diff_prop)
action, changed_at = self.get_action_and_timestamp(from_time=from_time)
if not properties:
action = DiffAction.UNCHANGED
return DiffAttribute(
uuid=self.uuid, name=self.name, changed_at=changed_at, action=action, properties=properties
)
Expand Down Expand Up @@ -262,11 +268,15 @@ def get_final_single_relationship(self, from_time: Timestamp) -> DiffSingleRelat
chronological_properties=peer_id_properties, from_time=from_time
)
peer_id = peer_final_property.new_value or peer_final_property.previous_value
other_final_properties = [
self._get_single_relationship_final_property(chronological_properties=property_list, from_time=from_time)
for property_type, property_list in self.ordered_properties_by_type.items()
if property_type != DatabaseEdgeType.IS_RELATED
]
other_final_properties = []
for property_type, property_list in self.ordered_properties_by_type.items():
if property_type is DatabaseEdgeType.IS_RELATED:
continue
final_prop = self._get_single_relationship_final_property(
chronological_properties=property_list, from_time=from_time
)
if final_prop.action is not DiffAction.UNCHANGED:
other_final_properties.append(final_prop)
final_properties = [peer_final_property] + other_final_properties
last_changed_property = max(final_properties, key=lambda fp: fp.changed_at)
last_changed_at = last_changed_property.changed_at
Expand Down Expand Up @@ -330,8 +340,8 @@ def to_diff_relationship(self, from_time: Timestamp) -> DiffRelationship:
single_relationships = [
sr.get_final_single_relationship(from_time=from_time) for sr in self._single_relationship_list
]
last_changed_relatonship = max(single_relationships, key=lambda r: r.changed_at)
last_changed_at = last_changed_relatonship.changed_at
last_changed_relationship = max(single_relationships, key=lambda r: r.changed_at)
last_changed_at = last_changed_relationship.changed_at
action = DiffAction.UPDATED
if last_changed_at < from_time:
action = DiffAction.UNCHANGED
Expand All @@ -358,9 +368,19 @@ class DiffNodeIntermediate(TrackedStatusUpdates):
relationships_by_name: dict[str, DiffRelationshipIntermediate] = field(default_factory=dict)

def to_diff_node(self, from_time: Timestamp) -> DiffNode:
attributes = [attr.to_diff_attribute(from_time=from_time) for attr in self.attributes_by_name.values()]
relationships = [rel.to_diff_relationship(from_time=from_time) for rel in self.relationships_by_name.values()]
attributes = []
for attr in self.attributes_by_name.values():
diff_attr = attr.to_diff_attribute(from_time=from_time)
if diff_attr.action is not DiffAction.UNCHANGED:
attributes.append(diff_attr)
relationships = []
for rel in self.relationships_by_name.values():
diff_rel = rel.to_diff_relationship(from_time=from_time)
if diff_rel.action is not DiffAction.UNCHANGED:
relationships.append(diff_rel)
action, changed_at = self.get_action_and_timestamp(from_time=from_time)
if not attributes and not relationships:
action = DiffAction.UNCHANGED
return DiffNode(
uuid=self.uuid,
kind=self.kind,
Expand All @@ -384,8 +404,11 @@ class DiffRootIntermediate:
def to_diff_root(self, from_time: Timestamp, to_time: Timestamp) -> DiffRoot:
nodes = []
for node in self.nodes_by_id.values():
if not node.is_empty:
nodes.append(node.to_diff_node(from_time=from_time))
if node.is_empty:
continue
diff_node = node.to_diff_node(from_time=from_time)
if diff_node.action is not DiffAction.UNCHANGED:
nodes.append(diff_node)
return DiffRoot(uuid=self.uuid, branch=self.branch, nodes=nodes, from_time=from_time, to_time=to_time)


Expand All @@ -403,10 +426,11 @@ def __init__(
self.schema_manager = schema_manager
self.from_time = from_time
self.to_time = to_time or Timestamp()
# if this diff is for the base branch, use from_time b/c create_time would be too much
if diff_branch.name == base_branch.name:
self.diff_branch_create_time = from_time
self.diff_branched_from_time = from_time
else:
self.diff_branch_create_time = Timestamp(diff_branch.get_created_at())
self.diff_branched_from_time = Timestamp(diff_branch.get_branched_from())
self._diff_root_by_branch: dict[str, DiffRootIntermediate] = {}
self._final_diff_root_by_branch: dict[str, DiffRoot] = {}

Expand Down Expand Up @@ -610,19 +634,19 @@ def _remove_empty_base_diff_root(self) -> None:
ordered_diff_values = property_diff.get_ordered_values_asc()
if not ordered_diff_values:
continue
if ordered_diff_values[-1].changed_at >= self.diff_branch_create_time:
if ordered_diff_values[-1].changed_at >= self.diff_branched_from_time:
return
for relationship_diff in node_diff.relationships_by_name.values():
for diff_relationship_property_list in relationship_diff.properties_by_db_id.values():
for diff_relationship_property in diff_relationship_property_list:
if diff_relationship_property.changed_at >= self.diff_branch_create_time:
if diff_relationship_property.changed_at >= self.diff_branched_from_time:
return
del self._diff_root_by_branch[self.base_branch_name]

def _finalize(self) -> None:
for branch, diff_root_intermediate in self._diff_root_by_branch.items():
if branch == self.base_branch_name:
from_time = self.diff_branch_create_time
from_time = self.diff_branched_from_time
else:
from_time = self.from_time
self._final_diff_root_by_branch[branch] = diff_root_intermediate.to_diff_root(
Expand Down
Loading

0 comments on commit 01e37a6

Please sign in to comment.