Skip to content

Commit 00c548a

Browse files
authored
Use version-hint.text for StaticTable (#1887)
This change allow making use of the `version-hint.text` file when a static table is instantiated with a `metadata_location` not ending with '.metadata.json'. User can just point to the table location, and metadata file path will be read from `version-hint.text`. Closes #763 # Rationale for this change `version-hint.text` is useful in context where you does not want or need a full-fledge catalog. Our use case is sharing datasets publicly as Iceberg tables on S3. # Are these changes tested? No yet. # Are there any user-facing changes? Yes. User can now points `StaticTable` to the table location rather than a specific version file.
1 parent 825fd5d commit 00c548a

File tree

4 files changed

+53
-0
lines changed

4 files changed

+53
-0
lines changed

mkdocs/docs/api.md

+11
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,17 @@ static_table = StaticTable.from_metadata(
215215

216216
The static-table is considered read-only.
217217

218+
Alternatively, if your table metadata directory contains a `version-hint.text` file, you can just specify
219+
the table root path, and the latest metadata file will be picked automatically.
220+
221+
```python
222+
from pyiceberg.table import StaticTable
223+
224+
static_table = StaticTable.from_metadata(
225+
"s3://warehouse/wh/nyc.db/taxis
226+
)
227+
```
228+
218229
## Check if a table exists
219230

220231
To check whether the `bids` table exists:

pyiceberg/table/__init__.py

+20
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from __future__ import annotations
1818

1919
import itertools
20+
import os
2021
import uuid
2122
import warnings
2223
from abc import ABC, abstractmethod
@@ -1378,8 +1379,27 @@ def refresh(self) -> Table:
13781379
"""Refresh the current table metadata."""
13791380
raise NotImplementedError("To be implemented")
13801381

1382+
@classmethod
1383+
def _metadata_location_from_version_hint(cls, metadata_location: str, properties: Properties = EMPTY_DICT) -> str:
1384+
version_hint_location = os.path.join(metadata_location, "metadata", "version-hint.text")
1385+
io = load_file_io(properties=properties, location=version_hint_location)
1386+
file = io.new_input(version_hint_location)
1387+
1388+
with file.open() as stream:
1389+
content = stream.read().decode("utf-8")
1390+
1391+
if content.endswith(".metadata.json"):
1392+
return os.path.join(metadata_location, "metadata", content)
1393+
elif content.isnumeric():
1394+
return os.path.join(metadata_location, "metadata", "v%s.metadata.json").format(content)
1395+
else:
1396+
return os.path.join(metadata_location, "metadata", "%s.metadata.json").format(content)
1397+
13811398
@classmethod
13821399
def from_metadata(cls, metadata_location: str, properties: Properties = EMPTY_DICT) -> StaticTable:
1400+
if not metadata_location.endswith(".metadata.json"):
1401+
metadata_location = StaticTable._metadata_location_from_version_hint(metadata_location, properties)
1402+
13831403
io = load_file_io(properties=properties, location=metadata_location)
13841404
file = io.new_input(metadata_location)
13851405

tests/conftest.py

+16
Original file line numberDiff line numberDiff line change
@@ -1121,6 +1121,22 @@ def example_table_metadata_v3() -> Dict[str, Any]:
11211121
return EXAMPLE_TABLE_METADATA_V3
11221122

11231123

1124+
@pytest.fixture(scope="session")
1125+
def table_location(tmp_path_factory: pytest.TempPathFactory) -> str:
1126+
from pyiceberg.io.pyarrow import PyArrowFileIO
1127+
1128+
metadata_filename = f"{uuid.uuid4()}.metadata.json"
1129+
metadata_location = str(tmp_path_factory.getbasetemp() / "metadata" / metadata_filename)
1130+
version_hint_location = str(tmp_path_factory.getbasetemp() / "metadata" / "version-hint.text")
1131+
metadata = TableMetadataV2(**EXAMPLE_TABLE_METADATA_V2)
1132+
ToOutputFile.table_metadata(metadata, PyArrowFileIO().new_output(location=metadata_location), overwrite=True)
1133+
1134+
with PyArrowFileIO().new_output(location=version_hint_location).create(overwrite=True) as s:
1135+
s.write(metadata_filename.encode("utf-8"))
1136+
1137+
return str(tmp_path_factory.getbasetemp())
1138+
1139+
11241140
@pytest.fixture(scope="session")
11251141
def metadata_location(tmp_path_factory: pytest.TempPathFactory) -> str:
11261142
from pyiceberg.io.pyarrow import PyArrowFileIO

tests/table/test_init.py

+6
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,12 @@ def test_static_table_gz_same_as_table(table_v2: Table, metadata_location_gz: st
383383
assert static_table.metadata == table_v2.metadata
384384

385385

386+
def test_static_table_version_hint_same_as_table(table_v2: Table, table_location: str) -> None:
387+
static_table = StaticTable.from_metadata(table_location)
388+
assert isinstance(static_table, Table)
389+
assert static_table.metadata == table_v2.metadata
390+
391+
386392
def test_static_table_io_does_not_exist(metadata_location: str) -> None:
387393
with pytest.raises(ValueError):
388394
StaticTable.from_metadata(metadata_location, {PY_IO_IMPL: "pyiceberg.does.not.exist.FileIO"})

0 commit comments

Comments
 (0)