Skip to content

Commit a29491a

Browse files
authored
Remove recursive call from ancestors_of (#821)
1 parent 5c9fa7e commit a29491a

File tree

3 files changed

+106
-6
lines changed

3 files changed

+106
-6
lines changed

pyiceberg/table/snapshots.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,9 @@ def set_when_positive(properties: Dict[str, str], num: int, property_name: str)
421421

422422
def ancestors_of(current_snapshot: Optional[Snapshot], table_metadata: TableMetadata) -> Iterable[Snapshot]:
423423
"""Get the ancestors of and including the given snapshot."""
424-
if current_snapshot:
425-
yield current_snapshot
426-
if current_snapshot.parent_snapshot_id is not None:
427-
if parent := table_metadata.snapshot_by_id(current_snapshot.parent_snapshot_id):
428-
yield from ancestors_of(parent, table_metadata)
424+
snapshot = current_snapshot
425+
while snapshot is not None:
426+
yield snapshot
427+
if snapshot.parent_snapshot_id is None:
428+
break
429+
snapshot = table_metadata.snapshot_by_id(snapshot.parent_snapshot_id)

tests/conftest.py

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,11 @@
2929
import re
3030
import socket
3131
import string
32+
import time
3233
import uuid
3334
from datetime import date, datetime, timezone
3435
from pathlib import Path
35-
from random import choice
36+
from random import choice, randint
3637
from tempfile import TemporaryDirectory
3738
from typing import (
3839
TYPE_CHECKING,
@@ -731,6 +732,77 @@ def example_table_metadata_no_snapshot_v1() -> Dict[str, Any]:
731732
return EXAMPLE_TABLE_METADATA_NO_SNAPSHOT_V1
732733

733734

735+
@pytest.fixture
736+
def example_table_metadata_v2_with_extensive_snapshots() -> Dict[str, Any]:
737+
def generate_snapshot(
738+
snapshot_id: int,
739+
parent_snapshot_id: Optional[int] = None,
740+
timestamp_ms: Optional[int] = None,
741+
sequence_number: int = 0,
742+
) -> Dict[str, Any]:
743+
return {
744+
"snapshot-id": snapshot_id,
745+
"parent-snapshot-id": parent_snapshot_id,
746+
"timestamp-ms": timestamp_ms or int(time.time() * 1000),
747+
"sequence-number": sequence_number,
748+
"summary": {"operation": "append"},
749+
"manifest-list": f"s3://a/b/{snapshot_id}.avro",
750+
}
751+
752+
snapshots = []
753+
snapshot_log = []
754+
initial_snapshot_id = 3051729675574597004
755+
756+
for i in range(2000):
757+
snapshot_id = initial_snapshot_id + i
758+
parent_snapshot_id = snapshot_id - 1 if i > 0 else None
759+
timestamp_ms = int(time.time() * 1000) - randint(0, 1000000)
760+
snapshots.append(generate_snapshot(snapshot_id, parent_snapshot_id, timestamp_ms, i))
761+
snapshot_log.append({"snapshot-id": snapshot_id, "timestamp-ms": timestamp_ms})
762+
763+
return {
764+
"format-version": 2,
765+
"table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
766+
"location": "s3://bucket/test/location",
767+
"last-sequence-number": 34,
768+
"last-updated-ms": 1602638573590,
769+
"last-column-id": 3,
770+
"current-schema-id": 1,
771+
"schemas": [
772+
{"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": "x", "required": True, "type": "long"}]},
773+
{
774+
"type": "struct",
775+
"schema-id": 1,
776+
"identifier-field-ids": [1, 2],
777+
"fields": [
778+
{"id": 1, "name": "x", "required": True, "type": "long"},
779+
{"id": 2, "name": "y", "required": True, "type": "long", "doc": "comment"},
780+
{"id": 3, "name": "z", "required": True, "type": "long"},
781+
],
782+
},
783+
],
784+
"default-spec-id": 0,
785+
"partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}],
786+
"last-partition-id": 1000,
787+
"default-sort-order-id": 3,
788+
"sort-orders": [
789+
{
790+
"order-id": 3,
791+
"fields": [
792+
{"transform": "identity", "source-id": 2, "direction": "asc", "null-order": "nulls-first"},
793+
{"transform": "bucket[4]", "source-id": 3, "direction": "desc", "null-order": "nulls-last"},
794+
],
795+
}
796+
],
797+
"properties": {"read.split.target.size": "134217728"},
798+
"current-snapshot-id": initial_snapshot_id + 1999,
799+
"snapshots": snapshots,
800+
"snapshot-log": snapshot_log,
801+
"metadata-log": [{"metadata-file": "s3://bucket/.../v1.json", "timestamp-ms": 1515100}],
802+
"refs": {"test": {"snapshot-id": initial_snapshot_id, "type": "tag", "max-ref-age-ms": 10000000}},
803+
}
804+
805+
734806
EXAMPLE_TABLE_METADATA_V2 = {
735807
"format-version": 2,
736808
"table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
@@ -1992,6 +2064,18 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) -> Table:
19922064
)
19932065

19942066

2067+
@pytest.fixture
2068+
def table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_snapshots: Dict[str, Any]) -> Table:
2069+
table_metadata = TableMetadataV2(**example_table_metadata_v2_with_extensive_snapshots)
2070+
return Table(
2071+
identifier=("database", "table"),
2072+
metadata=table_metadata,
2073+
metadata_location=f"{table_metadata.location}/uuid.metadata.json",
2074+
io=load_file_io(),
2075+
catalog=NoopCatalog("NoopCatalog"),
2076+
)
2077+
2078+
19952079
@pytest.fixture
19962080
def bound_reference_str() -> BoundReference[str]:
19972081
return BoundReference(field=NestedField(1, "field", StringType(), required=False), accessor=Accessor(position=0, inner=None))

tests/table/test_init.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,21 @@ def test_ancestors_of(table_v2: Table) -> None:
241241
]
242242

243243

244+
def test_ancestors_of_recursive_error(table_v2_with_extensive_snapshots: Table) -> None:
245+
# Test RecursionError: maximum recursion depth exceeded
246+
assert (
247+
len(
248+
list(
249+
ancestors_of(
250+
table_v2_with_extensive_snapshots.current_snapshot(),
251+
table_v2_with_extensive_snapshots.metadata,
252+
)
253+
)
254+
)
255+
== 2000
256+
)
257+
258+
244259
def test_snapshot_by_id_does_not_exist(table_v2: Table) -> None:
245260
assert table_v2.snapshot_by_id(-1) is None
246261

0 commit comments

Comments
 (0)