|
29 | 29 | import re
|
30 | 30 | import socket
|
31 | 31 | import string
|
| 32 | +import time |
32 | 33 | import uuid
|
33 | 34 | from datetime import date, datetime, timezone
|
34 | 35 | from pathlib import Path
|
35 |
| -from random import choice |
| 36 | +from random import choice, randint |
36 | 37 | from tempfile import TemporaryDirectory
|
37 | 38 | from typing import (
|
38 | 39 | TYPE_CHECKING,
|
@@ -731,6 +732,77 @@ def example_table_metadata_no_snapshot_v1() -> Dict[str, Any]:
|
731 | 732 | return EXAMPLE_TABLE_METADATA_NO_SNAPSHOT_V1
|
732 | 733 |
|
733 | 734 |
|
| 735 | +@pytest.fixture |
| 736 | +def example_table_metadata_v2_with_extensive_snapshots() -> Dict[str, Any]: |
| 737 | + def generate_snapshot( |
| 738 | + snapshot_id: int, |
| 739 | + parent_snapshot_id: Optional[int] = None, |
| 740 | + timestamp_ms: Optional[int] = None, |
| 741 | + sequence_number: int = 0, |
| 742 | + ) -> Dict[str, Any]: |
| 743 | + return { |
| 744 | + "snapshot-id": snapshot_id, |
| 745 | + "parent-snapshot-id": parent_snapshot_id, |
| 746 | + "timestamp-ms": timestamp_ms or int(time.time() * 1000), |
| 747 | + "sequence-number": sequence_number, |
| 748 | + "summary": {"operation": "append"}, |
| 749 | + "manifest-list": f"s3://a/b/{snapshot_id}.avro", |
| 750 | + } |
| 751 | + |
| 752 | + snapshots = [] |
| 753 | + snapshot_log = [] |
| 754 | + initial_snapshot_id = 3051729675574597004 |
| 755 | + |
| 756 | + for i in range(2000): |
| 757 | + snapshot_id = initial_snapshot_id + i |
| 758 | + parent_snapshot_id = snapshot_id - 1 if i > 0 else None |
| 759 | + timestamp_ms = int(time.time() * 1000) - randint(0, 1000000) |
| 760 | + snapshots.append(generate_snapshot(snapshot_id, parent_snapshot_id, timestamp_ms, i)) |
| 761 | + snapshot_log.append({"snapshot-id": snapshot_id, "timestamp-ms": timestamp_ms}) |
| 762 | + |
| 763 | + return { |
| 764 | + "format-version": 2, |
| 765 | + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", |
| 766 | + "location": "s3://bucket/test/location", |
| 767 | + "last-sequence-number": 34, |
| 768 | + "last-updated-ms": 1602638573590, |
| 769 | + "last-column-id": 3, |
| 770 | + "current-schema-id": 1, |
| 771 | + "schemas": [ |
| 772 | + {"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": "x", "required": True, "type": "long"}]}, |
| 773 | + { |
| 774 | + "type": "struct", |
| 775 | + "schema-id": 1, |
| 776 | + "identifier-field-ids": [1, 2], |
| 777 | + "fields": [ |
| 778 | + {"id": 1, "name": "x", "required": True, "type": "long"}, |
| 779 | + {"id": 2, "name": "y", "required": True, "type": "long", "doc": "comment"}, |
| 780 | + {"id": 3, "name": "z", "required": True, "type": "long"}, |
| 781 | + ], |
| 782 | + }, |
| 783 | + ], |
| 784 | + "default-spec-id": 0, |
| 785 | + "partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}], |
| 786 | + "last-partition-id": 1000, |
| 787 | + "default-sort-order-id": 3, |
| 788 | + "sort-orders": [ |
| 789 | + { |
| 790 | + "order-id": 3, |
| 791 | + "fields": [ |
| 792 | + {"transform": "identity", "source-id": 2, "direction": "asc", "null-order": "nulls-first"}, |
| 793 | + {"transform": "bucket[4]", "source-id": 3, "direction": "desc", "null-order": "nulls-last"}, |
| 794 | + ], |
| 795 | + } |
| 796 | + ], |
| 797 | + "properties": {"read.split.target.size": "134217728"}, |
| 798 | + "current-snapshot-id": initial_snapshot_id + 1999, |
| 799 | + "snapshots": snapshots, |
| 800 | + "snapshot-log": snapshot_log, |
| 801 | + "metadata-log": [{"metadata-file": "s3://bucket/.../v1.json", "timestamp-ms": 1515100}], |
| 802 | + "refs": {"test": {"snapshot-id": initial_snapshot_id, "type": "tag", "max-ref-age-ms": 10000000}}, |
| 803 | + } |
| 804 | + |
| 805 | + |
734 | 806 | EXAMPLE_TABLE_METADATA_V2 = {
|
735 | 807 | "format-version": 2,
|
736 | 808 | "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
|
@@ -1992,6 +2064,18 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) -> Table:
|
1992 | 2064 | )
|
1993 | 2065 |
|
1994 | 2066 |
|
| 2067 | +@pytest.fixture |
| 2068 | +def table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_snapshots: Dict[str, Any]) -> Table: |
| 2069 | + table_metadata = TableMetadataV2(**example_table_metadata_v2_with_extensive_snapshots) |
| 2070 | + return Table( |
| 2071 | + identifier=("database", "table"), |
| 2072 | + metadata=table_metadata, |
| 2073 | + metadata_location=f"{table_metadata.location}/uuid.metadata.json", |
| 2074 | + io=load_file_io(), |
| 2075 | + catalog=NoopCatalog("NoopCatalog"), |
| 2076 | + ) |
| 2077 | + |
| 2078 | + |
1995 | 2079 | @pytest.fixture
|
1996 | 2080 | def bound_reference_str() -> BoundReference[str]:
|
1997 | 2081 | return BoundReference(field=NestedField(1, "field", StringType(), required=False), accessor=Accessor(position=0, inner=None))
|
|
0 commit comments