diff --git a/pyiceberg/avro/reader.py b/pyiceberg/avro/reader.py index 21f5d8077f..4c028ed711 100644 --- a/pyiceberg/avro/reader.py +++ b/pyiceberg/avro/reader.py @@ -175,6 +175,14 @@ class TimestampReader(IntegerReader): """ +class TimestampNanoReader(IntegerReader): + """Reads a nanosecond granularity timestamp from the stream. + + Long is decoded as python integer which represents + the number of nanoseconds from the unix epoch, 1 January 1970. + """ + + class TimestamptzReader(IntegerReader): """Reads a microsecond granularity timestamptz from the stream. @@ -185,6 +193,16 @@ class TimestamptzReader(IntegerReader): """ +class TimestamptzNanoReader(IntegerReader): + """Reads a microsecond granularity timestamptz from the stream. + + Long is decoded as python integer which represents + the number of nanoseconds from the unix epoch, 1 January 1970. + + Adjusted to UTC. + """ + + class StringReader(Reader): def read(self, decoder: BinaryDecoder) -> str: return decoder.read_utf8() diff --git a/pyiceberg/avro/resolver.py b/pyiceberg/avro/resolver.py index 004af8bd4e..9ed111ff40 100644 --- a/pyiceberg/avro/resolver.py +++ b/pyiceberg/avro/resolver.py @@ -44,7 +44,9 @@ StringReader, StructReader, TimeReader, + TimestampNanoReader, TimestampReader, + TimestamptzNanoReader, TimestamptzReader, UnknownReader, UUIDReader, @@ -64,6 +66,8 @@ OptionWriter, StringWriter, StructWriter, + TimestampNanoWriter, + TimestamptzNanoWriter, TimestamptzWriter, TimestampWriter, TimeWriter, @@ -99,7 +103,9 @@ PrimitiveType, StringType, StructType, + TimestampNanoType, TimestampType, + TimestamptzNanoType, TimestamptzType, TimeType, UnknownType, @@ -184,9 +190,15 @@ def visit_time(self, time_type: TimeType) -> Writer: def visit_timestamp(self, timestamp_type: TimestampType) -> Writer: return TimestampWriter() + def visit_timestamp_ns(self, timestamp_ns_type: TimestampNanoType) -> Writer: + return TimestampNanoWriter() + def visit_timestamptz(self, timestamptz_type: TimestamptzType) -> Writer: return TimestamptzWriter() + def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType) -> Writer: + return TimestamptzNanoWriter() + def visit_string(self, string_type: StringType) -> Writer: return StringWriter() @@ -332,9 +344,15 @@ def visit_time(self, time_type: TimeType, partner: Optional[IcebergType]) -> Wri def visit_timestamp(self, timestamp_type: TimestampType, partner: Optional[IcebergType]) -> Writer: return TimestampWriter() + def visit_timestamp_ns(self, timestamp_ns_type: TimestampNanoType, partner: Optional[IcebergType]) -> Writer: + return TimestampNanoWriter() + def visit_timestamptz(self, timestamptz_type: TimestamptzType, partner: Optional[IcebergType]) -> Writer: return TimestamptzWriter() + def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType, partner: Optional[IcebergType]) -> Writer: + return TimestamptzNanoWriter() + def visit_string(self, string_type: StringType, partner: Optional[IcebergType]) -> Writer: return StringWriter() @@ -465,9 +483,15 @@ def visit_time(self, time_type: TimeType, partner: Optional[IcebergType]) -> Rea def visit_timestamp(self, timestamp_type: TimestampType, partner: Optional[IcebergType]) -> Reader: return TimestampReader() + def visit_timestamp_ns(self, timestamp_ns_type: TimestampNanoType, partner: Optional[IcebergType]) -> Reader: + return TimestampNanoReader() + def visit_timestamptz(self, timestamptz_type: TimestamptzType, partner: Optional[IcebergType]) -> Reader: return TimestamptzReader() + def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType, partner: Optional[IcebergType]) -> Reader: + return TimestamptzNanoReader() + def visit_string(self, string_type: StringType, partner: Optional[IcebergType]) -> Reader: return StringReader() diff --git a/pyiceberg/avro/writer.py b/pyiceberg/avro/writer.py index 51d2d1bcc6..80e96b04ad 100644 --- a/pyiceberg/avro/writer.py +++ b/pyiceberg/avro/writer.py @@ -95,12 +95,24 @@ def write(self, encoder: BinaryEncoder, val: int) -> None: encoder.write_int(val) +@dataclass(frozen=True) +class TimestampNanoWriter(Writer): + def write(self, encoder: BinaryEncoder, val: int) -> None: + encoder.write_int(val) + + @dataclass(frozen=True) class TimestamptzWriter(Writer): def write(self, encoder: BinaryEncoder, val: int) -> None: encoder.write_int(val) +@dataclass(frozen=True) +class TimestamptzNanoWriter(Writer): + def write(self, encoder: BinaryEncoder, val: int) -> None: + encoder.write_int(val) + + @dataclass(frozen=True) class StringWriter(Writer): def write(self, encoder: BinaryEncoder, val: Any) -> None: diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py index ed5c6f7aac..fe208b4aca 100644 --- a/pyiceberg/conversions.py +++ b/pyiceberg/conversions.py @@ -55,7 +55,9 @@ LongType, PrimitiveType, StringType, + TimestampNanoType, TimestampType, + TimestamptzNanoType, TimestamptzType, TimeType, UnknownType, @@ -66,6 +68,7 @@ date_str_to_days, date_to_days, datetime_to_micros, + datetime_to_nanos, days_to_date, micros_to_time, micros_to_timestamp, @@ -127,7 +130,9 @@ def _(primitive_type: BooleanType, value_str: str) -> Union[int, float, str, uui @partition_to_py.register(DateType) @partition_to_py.register(TimeType) @partition_to_py.register(TimestampType) +@partition_to_py.register(TimestampNanoType) @partition_to_py.register(TimestamptzType) +@partition_to_py.register(TimestamptzNanoType) @handle_none def _(primitive_type: PrimitiveType, value_str: str) -> int: """Convert a string to an integer value. @@ -213,12 +218,20 @@ def _(_: PrimitiveType, value: int) -> bytes: @to_bytes.register(TimestampType) @to_bytes.register(TimestamptzType) -def _(_: TimestampType, value: Union[datetime, int]) -> bytes: +def _(_: PrimitiveType, value: Union[datetime, int]) -> bytes: if isinstance(value, datetime): value = datetime_to_micros(value) return _LONG_STRUCT.pack(value) +@to_bytes.register(TimestampNanoType) +@to_bytes.register(TimestamptzNanoType) +def _(_: PrimitiveType, value: Union[datetime, int]) -> bytes: + if isinstance(value, datetime): + value = datetime_to_nanos(value) + return _LONG_STRUCT.pack(value) + + @to_bytes.register(DateType) def _(_: DateType, value: Union[date, int]) -> bytes: if isinstance(value, date): @@ -319,6 +332,8 @@ def _(_: PrimitiveType, b: bytes) -> int: @from_bytes.register(TimeType) @from_bytes.register(TimestampType) @from_bytes.register(TimestamptzType) +@from_bytes.register(TimestampNanoType) +@from_bytes.register(TimestamptzNanoType) def _(_: PrimitiveType, b: bytes) -> int: return _LONG_STRUCT.unpack(b)[0] diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index d9f84a42ba..88be6abac7 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -163,7 +163,9 @@ PrimitiveType, StringType, StructType, + TimestampNanoType, TimestampType, + TimestamptzNanoType, TimestamptzType, TimeType, UnknownType, @@ -662,9 +664,15 @@ def visit_time(self, _: TimeType) -> pa.DataType: def visit_timestamp(self, _: TimestampType) -> pa.DataType: return pa.timestamp(unit="us") + def visit_timestamp_ns(self, _: TimestampNanoType) -> pa.DataType: + return pa.timestamp(unit="ns") + def visit_timestamptz(self, _: TimestamptzType) -> pa.DataType: return pa.timestamp(unit="us", tz="UTC") + def visit_timestamptz_ns(self, _: TimestamptzNanoType) -> pa.DataType: + return pa.timestamp(unit="ns", tz="UTC") + def visit_string(self, _: StringType) -> pa.DataType: return pa.large_string() @@ -1894,9 +1902,15 @@ def visit_time(self, time_type: TimeType) -> str: def visit_timestamp(self, timestamp_type: TimestampType) -> str: return "INT64" + def visit_timestamp_ns(self, timestamp_type: TimestampNanoType) -> str: + return "INT64" + def visit_timestamptz(self, timestamptz_type: TimestamptzType) -> str: return "INT64" + def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType) -> str: + return "INT64" + def visit_string(self, string_type: StringType) -> str: return "BYTE_ARRAY" diff --git a/pyiceberg/schema.py b/pyiceberg/schema.py index 7f6cfe9987..51762c57a7 100644 --- a/pyiceberg/schema.py +++ b/pyiceberg/schema.py @@ -57,7 +57,9 @@ PrimitiveType, StringType, StructType, + TimestampNanoType, TimestampType, + TimestamptzNanoType, TimestamptzType, TimeType, UnknownType, @@ -362,6 +364,21 @@ def _validate_identifier_field(self, field_id: int) -> None: f"Cannot add field {field.name} as an identifier field: must not be nested in an optional field {parent}" ) + def check_format_version_compatibility(self, format_version: int) -> None: + """Check that the schema is compatible for the given table format version. + + Args: + format_version: The Iceberg table format version. + + Raises: + ValueError: If the schema is not compatible for the format version. + """ + for field in self._lazy_id_to_field.values(): + if format_version < field.field_type.minimum_format_version(): + raise ValueError( + f"{field.field_type} is only supported in {field.field_type.minimum_format_version()} or higher. Current format version is: {format_version}" + ) + class SchemaVisitor(Generic[T], ABC): def before_field(self, field: NestedField) -> None: @@ -522,8 +539,12 @@ def primitive(self, primitive: PrimitiveType, primitive_partner: Optional[P]) -> return self.visit_time(primitive, primitive_partner) elif isinstance(primitive, TimestampType): return self.visit_timestamp(primitive, primitive_partner) + elif isinstance(primitive, TimestampNanoType): + return self.visit_timestamp_ns(primitive, primitive_partner) elif isinstance(primitive, TimestamptzType): return self.visit_timestamptz(primitive, primitive_partner) + elif isinstance(primitive, TimestamptzNanoType): + return self.visit_timestamptz_ns(primitive, primitive_partner) elif isinstance(primitive, StringType): return self.visit_string(primitive, primitive_partner) elif isinstance(primitive, UUIDType): @@ -573,10 +594,18 @@ def visit_time(self, time_type: TimeType, partner: Optional[P]) -> T: def visit_timestamp(self, timestamp_type: TimestampType, partner: Optional[P]) -> T: """Visit a TimestampType.""" + @abstractmethod + def visit_timestamp_ns(self, timestamp_ns_type: TimestampNanoType, partner: Optional[P]) -> T: + """Visit a TimestampNanoType.""" + @abstractmethod def visit_timestamptz(self, timestamptz_type: TimestamptzType, partner: Optional[P]) -> T: """Visit a TimestamptzType.""" + @abstractmethod + def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType, partner: Optional[P]) -> T: + """Visit a TimestamptzNanoType.""" + @abstractmethod def visit_string(self, string_type: StringType, partner: Optional[P]) -> T: """Visit a StringType.""" @@ -706,8 +735,12 @@ def primitive(self, primitive: PrimitiveType) -> T: return self.visit_time(primitive) elif isinstance(primitive, TimestampType): return self.visit_timestamp(primitive) + elif isinstance(primitive, TimestampNanoType): + return self.visit_timestamp_ns(primitive) elif isinstance(primitive, TimestamptzType): return self.visit_timestamptz(primitive) + elif isinstance(primitive, TimestamptzNanoType): + return self.visit_timestamptz_ns(primitive) elif isinstance(primitive, StringType): return self.visit_string(primitive) elif isinstance(primitive, UUIDType): @@ -759,10 +792,18 @@ def visit_time(self, time_type: TimeType) -> T: def visit_timestamp(self, timestamp_type: TimestampType) -> T: """Visit a TimestampType.""" + @abstractmethod + def visit_timestamp_ns(self, timestamp_type: TimestampNanoType) -> T: + """Visit a TimestampNanoType.""" + @abstractmethod def visit_timestamptz(self, timestamptz_type: TimestamptzType) -> T: """Visit a TimestamptzType.""" + @abstractmethod + def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType) -> T: + """Visit a TimestamptzNanoType.""" + @abstractmethod def visit_string(self, string_type: StringType) -> T: """Visit a StringType.""" diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py index d5ce76560c..abe3d8ef32 100644 --- a/pyiceberg/table/metadata.py +++ b/pyiceberg/table/metadata.py @@ -578,6 +578,11 @@ def new_table_metadata( ) -> TableMetadata: from pyiceberg.table import TableProperties + # Remove format-version so it does not get persisted + format_version = int(properties.pop(TableProperties.FORMAT_VERSION, TableProperties.DEFAULT_FORMAT_VERSION)) + + schema.check_format_version_compatibility(format_version) + fresh_schema = assign_fresh_schema_ids(schema) fresh_partition_spec = assign_fresh_partition_spec_ids(partition_spec, schema, fresh_schema) fresh_sort_order = assign_fresh_sort_order_ids(sort_order, schema, fresh_schema) @@ -585,8 +590,6 @@ def new_table_metadata( if table_uuid is None: table_uuid = uuid.uuid4() - # Remove format-version so it does not get persisted - format_version = int(properties.pop(TableProperties.FORMAT_VERSION, TableProperties.DEFAULT_FORMAT_VERSION)) if format_version == 1: return TableMetadataV1( location=location, diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py index 4a652a2f4a..fd90859203 100644 --- a/pyiceberg/transforms.py +++ b/pyiceberg/transforms.py @@ -73,7 +73,9 @@ IntegerType, LongType, StringType, + TimestampNanoType, TimestampType, + TimestamptzNanoType, TimestamptzType, TimeType, UUIDType, @@ -290,6 +292,8 @@ def can_transform(self, source: IcebergType) -> bool: TimeType, TimestampType, TimestamptzType, + TimestampNanoType, + TimestamptzNanoType, DecimalType, StringType, FixedType, @@ -323,6 +327,18 @@ def hash_func(v: Any) -> int: return mmh3.hash(struct.pack(" int: + # In order to bucket TimestampNano the same as Timestamp + # convert to micros before hashing. + if isinstance(v, py_datetime.datetime): + v = datetime.datetime_to_micros(v) + else: + v = datetime.nanos_to_micros(v) + + return mmh3.hash(struct.pack(" int: @@ -457,13 +473,20 @@ def year_func(v: Any) -> int: return datetime.micros_to_years(v) + elif isinstance(source, (TimestampNanoType, TimestamptzNanoType)): + + def year_func(v: Any) -> int: + # python datetime has no nanoseconds support. + # nanosecond datetimes will be expressed as int as a workaround + return datetime.nanos_to_years(v) + else: raise ValueError(f"Cannot apply year transform for type: {source}") return lambda v: year_func(v) if v is not None else None def can_transform(self, source: IcebergType) -> bool: - return isinstance(source, (DateType, TimestampType, TimestamptzType)) + return isinstance(source, (DateType, TimestampType, TimestamptzType, TimestampNanoType, TimestamptzNanoType)) @property def granularity(self) -> TimeResolution: @@ -481,15 +504,19 @@ def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Arr import pyarrow.compute as pc if isinstance(source, DateType): - epoch = datetime.EPOCH_DATE + epoch = pa.scalar(datetime.EPOCH_DATE) elif isinstance(source, TimestampType): - epoch = datetime.EPOCH_TIMESTAMP + epoch = pa.scalar(datetime.EPOCH_TIMESTAMP) elif isinstance(source, TimestamptzType): - epoch = datetime.EPOCH_TIMESTAMPTZ + epoch = pa.scalar(datetime.EPOCH_TIMESTAMPTZ) + elif isinstance(source, TimestampNanoType): + epoch = pa.scalar(datetime.EPOCH_TIMESTAMP).cast(pa.timestamp("ns")) + elif isinstance(source, TimestamptzNanoType): + epoch = pa.scalar(datetime.EPOCH_TIMESTAMPTZ).cast(pa.timestamp("ns")) else: raise ValueError(f"Cannot apply year transform for type: {source}") - return lambda v: pc.years_between(pa.scalar(epoch), v) if v is not None else None + return lambda v: pc.years_between(epoch, v) if v is not None else None class MonthTransform(TimeTransform[S]): @@ -520,13 +547,20 @@ def month_func(v: Any) -> int: return datetime.micros_to_months(v) + elif isinstance(source, (TimestampNanoType, TimestamptzNanoType)): + + def month_func(v: Any) -> int: + # python datetime has no nanoseconds support. + # nanosecond datetimes will be expressed as int as a workaround + return datetime.nanos_to_months(v) + else: raise ValueError(f"Cannot apply month transform for type: {source}") return lambda v: month_func(v) if v is not None else None def can_transform(self, source: IcebergType) -> bool: - return isinstance(source, (DateType, TimestampType, TimestamptzType)) + return isinstance(source, (DateType, TimestampType, TimestamptzType, TimestampNanoType, TimestamptzNanoType)) @property def granularity(self) -> TimeResolution: @@ -544,17 +578,21 @@ def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Arr import pyarrow.compute as pc if isinstance(source, DateType): - epoch = datetime.EPOCH_DATE + epoch = pa.scalar(datetime.EPOCH_DATE) elif isinstance(source, TimestampType): - epoch = datetime.EPOCH_TIMESTAMP + epoch = pa.scalar(datetime.EPOCH_TIMESTAMP) elif isinstance(source, TimestamptzType): - epoch = datetime.EPOCH_TIMESTAMPTZ + epoch = pa.scalar(datetime.EPOCH_TIMESTAMPTZ) + elif isinstance(source, TimestampNanoType): + epoch = pa.scalar(datetime.EPOCH_TIMESTAMP).cast(pa.timestamp("ns")) + elif isinstance(source, TimestamptzNanoType): + epoch = pa.scalar(datetime.EPOCH_TIMESTAMPTZ).cast(pa.timestamp("ns")) else: raise ValueError(f"Cannot apply month transform for type: {source}") def month_func(v: pa.Array) -> pa.Array: return pc.add( - pc.multiply(pc.years_between(pa.scalar(epoch), v), pa.scalar(12)), + pc.multiply(pc.years_between(epoch, v), pa.scalar(12)), pc.add(pc.month(v), pa.scalar(-1)), ) @@ -589,13 +627,20 @@ def day_func(v: Any) -> int: return datetime.micros_to_days(v) + elif isinstance(source, (TimestampNanoType, TimestamptzNanoType)): + + def day_func(v: Any) -> int: + # python datetime has no nanoseconds support. + # nanosecond datetimes will be expressed as int as a workaround + return datetime.nanos_to_days(v) + else: raise ValueError(f"Cannot apply day transform for type: {source}") return lambda v: day_func(v) if v is not None else None def can_transform(self, source: IcebergType) -> bool: - return isinstance(source, (DateType, TimestampType, TimestamptzType)) + return isinstance(source, (DateType, TimestampType, TimestamptzType, TimestampNanoType, TimestamptzNanoType)) def result_type(self, source: IcebergType) -> IcebergType: """Return the result type of a day transform. @@ -621,15 +666,19 @@ def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Arr import pyarrow.compute as pc if isinstance(source, DateType): - epoch = datetime.EPOCH_DATE + epoch = pa.scalar(datetime.EPOCH_DATE) elif isinstance(source, TimestampType): - epoch = datetime.EPOCH_TIMESTAMP + epoch = pa.scalar(datetime.EPOCH_TIMESTAMP) elif isinstance(source, TimestamptzType): - epoch = datetime.EPOCH_TIMESTAMPTZ + epoch = pa.scalar(datetime.EPOCH_TIMESTAMPTZ) + elif isinstance(source, TimestampNanoType): + epoch = pa.scalar(datetime.EPOCH_TIMESTAMP).cast(pa.timestamp("ns")) + elif isinstance(source, TimestamptzNanoType): + epoch = pa.scalar(datetime.EPOCH_TIMESTAMPTZ).cast(pa.timestamp("ns")) else: raise ValueError(f"Cannot apply day transform for type: {source}") - return lambda v: pc.days_between(pa.scalar(epoch), v) if v is not None else None + return lambda v: pc.days_between(epoch, v) if v is not None else None class HourTransform(TimeTransform[S]): @@ -652,13 +701,20 @@ def hour_func(v: Any) -> int: return datetime.micros_to_hours(v) + elif isinstance(source, (TimestampNanoType, TimestamptzNanoType)): + + def day_func(v: Any) -> int: + # python datetime has no nanoseconds support. + # nanosecond datetimes will be expressed as int as a workaround + return datetime.nanos_to_hours(v) + else: raise ValueError(f"Cannot apply hour transform for type: {source}") return lambda v: hour_func(v) if v is not None else None def can_transform(self, source: IcebergType) -> bool: - return isinstance(source, (TimestampType, TimestamptzType)) + return isinstance(source, (TimestampType, TimestamptzType, TimestampNanoType, TimestamptzNanoType)) @property def granularity(self) -> TimeResolution: @@ -676,13 +732,17 @@ def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Arr import pyarrow.compute as pc if isinstance(source, TimestampType): - epoch = datetime.EPOCH_TIMESTAMP + epoch = pa.scalar(datetime.EPOCH_TIMESTAMP) elif isinstance(source, TimestamptzType): - epoch = datetime.EPOCH_TIMESTAMPTZ + epoch = pa.scalar(datetime.EPOCH_TIMESTAMPTZ) + elif isinstance(source, TimestampNanoType): + epoch = pa.scalar(datetime.EPOCH_TIMESTAMP).cast(pa.timestamp("ns")) + elif isinstance(source, TimestamptzNanoType): + epoch = pa.scalar(datetime.EPOCH_TIMESTAMPTZ).cast(pa.timestamp("ns")) else: raise ValueError(f"Cannot apply hour transform for type: {source}") - return lambda v: pc.hours_between(pa.scalar(epoch), v) if v is not None else None + return lambda v: pc.hours_between(epoch, v) if v is not None else None def _base64encode(buffer: bytes) -> str: diff --git a/pyiceberg/types.py b/pyiceberg/types.py index 456f9ad884..8e83b011bf 100644 --- a/pyiceberg/types.py +++ b/pyiceberg/types.py @@ -53,7 +53,7 @@ from pydantic_core.core_schema import ValidatorFunctionWrapHandler from pyiceberg.exceptions import ValidationError -from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel, L +from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel, L, TableVersion from pyiceberg.utils.parsing import ParseNumberFromBrackets from pyiceberg.utils.singleton import Singleton @@ -140,6 +140,10 @@ def handle_primitive_type(cls, v: Any, handler: ValidatorFunctionWrapHandler) -> return TimestampType() if v == "timestamptz": return TimestamptzType() + if v == "timestamp_ns": + return TimestampNanoType() + if v == "timestamptz_ns": + return TimestamptzNanoType() if v == "date": return DateType() if v == "time": @@ -177,6 +181,10 @@ def is_primitive(self) -> bool: def is_struct(self) -> bool: return isinstance(self, StructType) + def minimum_format_version(self) -> TableVersion: + """Minimum Iceberg format version after which this type is supported.""" + return 1 + class PrimitiveType(Singleton, IcebergRootModel[str], IcebergType): """Base class for all Iceberg Primitive Types.""" @@ -703,6 +711,44 @@ class TimestamptzType(PrimitiveType): root: Literal["timestamptz"] = Field(default="timestamptz") +class TimestampNanoType(PrimitiveType): + """A TimestampNano data type in Iceberg can be represented using an instance of this class. + + TimestampNanos in Iceberg have nanosecond precision and include a date and a time of day without a timezone. + + Example: + >>> column_foo = TimestampNanoType() + >>> isinstance(column_foo, TimestampNanoType) + True + >>> column_foo + TimestampNanoType() + """ + + root: Literal["timestamp_ns"] = Field(default="timestamp_ns") + + def minimum_format_version(self) -> TableVersion: + return 3 + + +class TimestamptzNanoType(PrimitiveType): + """A TimestamptzNano data type in Iceberg can be represented using an instance of this class. + + TimestamptzNanos in Iceberg are stored as UTC and include a date and a time of day with a timezone. + + Example: + >>> column_foo = TimestamptzNanoType() + >>> isinstance(column_foo, TimestamptzNanoType) + True + >>> column_foo + TimestamptzNanoType() + """ + + root: Literal["timestamptz_ns"] = Field(default="timestamptz_ns") + + def minimum_format_version(self) -> TableVersion: + return 3 + + class StringType(PrimitiveType): """A String data type in Iceberg can be represented using an instance of this class. @@ -765,3 +811,6 @@ class UnknownType(PrimitiveType): """ root: Literal["unknown"] = Field(default="unknown") + + def minimum_format_version(self) -> TableVersion: + return 3 diff --git a/pyiceberg/utils/datetime.py b/pyiceberg/utils/datetime.py index 0cb6926efa..46bbb32dcd 100644 --- a/pyiceberg/utils/datetime.py +++ b/pyiceberg/utils/datetime.py @@ -29,8 +29,10 @@ EPOCH_DATE = date.fromisoformat("1970-01-01") EPOCH_TIMESTAMP = datetime.fromisoformat("1970-01-01T00:00:00.000000") ISO_TIMESTAMP = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(.\d{1,6})?") +ISO_TIMESTAMP_NANO = re.compile(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(.\d{1,6})?(\d{1,3})?") EPOCH_TIMESTAMPTZ = datetime.fromisoformat("1970-01-01T00:00:00.000000+00:00") ISO_TIMESTAMPTZ = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(.\d{1,6})?[-+]\d{2}:\d{2}") +ISO_TIMESTAMPTZ_NANO = re.compile(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(.\d{1,6})?(\d{1,3})?([-+]\d{2}:\d{2})") def micros_to_days(timestamp: int) -> int: @@ -91,6 +93,59 @@ def timestamp_to_micros(timestamp_str: str) -> int: raise ValueError(f"Invalid timestamp without zone: {timestamp_str} (must be ISO-8601)") +def time_str_to_nanos(time_str: str) -> int: + """Convert an ISO-8601 formatted time to nanoseconds from midnight.""" + return time_to_nanos(time.fromisoformat(time_str)) + + +def time_to_nanos(t: time) -> int: + """Convert a datetime.time object to nanoseconds from midnight.""" + # python datetime and time doesn't have nanoseconds support yet + # https://github.com/python/cpython/issues/59648 + return ((((t.hour * 60 + t.minute) * 60) + t.second) * 1_000_000 + t.microsecond) * 1_000 + + +def datetime_to_nanos(dt: datetime) -> int: + """Convert a datetime to nanoseconds from 1970-01-01T00:00:00.000000000.""" + # python datetime and time doesn't have nanoseconds support yet + # https://github.com/python/cpython/issues/59648 + if dt.tzinfo: + delta = dt - EPOCH_TIMESTAMPTZ + else: + delta = dt - EPOCH_TIMESTAMP + return ((delta.days * 86400 + delta.seconds) * 1_000_000 + delta.microseconds) * 1_000 + + +def timestamp_to_nanos(timestamp_str: str) -> int: + """Convert an ISO-9601 formatted timestamp without zone to nanoseconds from 1970-01-01T00:00:00.000000000.""" + if match := ISO_TIMESTAMP_NANO.fullmatch(timestamp_str): + # Python datetime does not have native nanoseconds support + # Hence we need to extract nanoseconds timestamp manually + ns_str = match.group(3) or "0" + ms_str = match.group(2) if match.group(2) else "" + timestamp_str_without_ns_str = match.group(1) + ms_str + return datetime_to_nanos(datetime.fromisoformat(timestamp_str_without_ns_str)) + int(ns_str) + if ISO_TIMESTAMPTZ_NANO.fullmatch(timestamp_str): + # When we can match a timestamp without a zone, we can give a more specific error + raise ValueError(f"Zone offset provided, but not expected: {timestamp_str}") + raise ValueError(f"Invalid timestamp without zone: {timestamp_str} (must be ISO-8601)") + + +def timestamptz_to_nanos(timestamptz_str: str) -> int: + """Convert an ISO-8601 formatted timestamp with zone to nanoseconds from 1970-01-01T00:00:00.000000000+00:00.""" + if match := ISO_TIMESTAMPTZ_NANO.fullmatch(timestamptz_str): + # Python datetime does not have native nanoseconds support + # Hence we need to extract nanoseconds timestamp manually + ns_str = match.group(3) or "0" + ms_str = match.group(2) if match.group(2) else "" + timestamptz_str_without_ns_str = match.group(1) + ms_str + match.group(4) + return datetime_to_nanos(datetime.fromisoformat(timestamptz_str_without_ns_str)) + int(ns_str) + if ISO_TIMESTAMPTZ_NANO.fullmatch(timestamptz_str): + # When we can match a timestamp without a zone, we can give a more specific error + raise ValueError(f"Missing zone offset: {timestamptz_str} (must be ISO-8601)") + raise ValueError(f"Invalid timestamp with zone: {timestamptz_str} (must be ISO-8601)") + + def datetime_to_millis(dt: datetime) -> int: """Convert a datetime to milliseconds from 1970-01-01T00:00:00.000000.""" if dt.tzinfo: @@ -184,3 +239,43 @@ def days_to_years(days: int) -> int: def micros_to_years(micros: int) -> int: return micros_to_timestamp(micros).year - EPOCH_TIMESTAMP.year + + +def nanos_to_timestamp(nanos: int) -> datetime: + """Convert nanoseconds from epoch to a microsecond timestamp.""" + dt = timedelta(microseconds=nanos_to_micros(nanos)) + return EPOCH_TIMESTAMP + dt + + +def nanos_to_years(nanos: int) -> int: + return nanos_to_timestamp(nanos).year - EPOCH_TIMESTAMP.year + + +def nanos_to_months(nanos: int) -> int: + dt = nanos_to_timestamp(nanos) + return (dt.year - EPOCH_TIMESTAMP.year) * 12 + (dt.month - EPOCH_TIMESTAMP.month) + + +def nanos_to_days(nanos: int) -> int: + """Convert a timestamp in nanoseconds to a date in days.""" + return timedelta(microseconds=nanos // 1000).days + + +def nanos_to_time(nanos: int) -> time: + """Convert a timestamp in nanoseconds to a microsecond precision time.""" + micros = nanos_to_micros(nanos) + micros, microseconds = divmod(micros, 1000000) + micros, seconds = divmod(micros, 60) + micros, minutes = divmod(micros, 60) + hours = micros + return time(hour=hours, minute=minutes, second=seconds, microsecond=microseconds) + + +def nanos_to_hours(nanos: int) -> int: + """Convert a timestamp in nanoseconds to hours from 1970-01-01T00:00.""" + return nanos // 3_600_000_000_0000 + + +def nanos_to_micros(nanos: int) -> int: + """Convert a nanoseconds timestamp to microsecond timestamp by dropping precision.""" + return nanos // 1000 diff --git a/pyiceberg/utils/schema_conversion.py b/pyiceberg/utils/schema_conversion.py index 17eb2051ff..6959380d63 100644 --- a/pyiceberg/utils/schema_conversion.py +++ b/pyiceberg/utils/schema_conversion.py @@ -605,13 +605,17 @@ def visit_time(self, time_type: TimeType) -> AvroType: return {"type": "long", "logicalType": "time-micros"} def visit_timestamp(self, timestamp_type: TimestampType) -> AvroType: - # Iceberg only supports micro's return {"type": "long", "logicalType": "timestamp-micros", "adjust-to-utc": False} + def visit_timestamp_ns(self, timestamp_type: TimestampType) -> AvroType: + return {"type": "long", "logicalType": "timestamp-nanos", "adjust-to-utc": False} + def visit_timestamptz(self, timestamptz_type: TimestamptzType) -> AvroType: - # Iceberg only supports micro's return {"type": "long", "logicalType": "timestamp-micros", "adjust-to-utc": True} + def visit_timestamptz_ns(self, timestamptz_type: TimestamptzType) -> AvroType: + return {"type": "long", "logicalType": "timestamp-nanos", "adjust-to-utc": True} + def visit_string(self, string_type: StringType) -> AvroType: return "string" diff --git a/tests/avro/test_reader.py b/tests/avro/test_reader.py index c713201bc0..3fdd3bbda3 100644 --- a/tests/avro/test_reader.py +++ b/tests/avro/test_reader.py @@ -35,7 +35,9 @@ StringReader, StructReader, TimeReader, + TimestampNanoReader, TimestampReader, + TimestamptzNanoReader, TimestamptzReader, UnknownReader, UUIDReader, @@ -58,7 +60,9 @@ NestedField, StringType, StructType, + TimestampNanoType, TimestampType, + TimestamptzNanoType, TimestamptzType, TimeType, UnknownType, @@ -313,10 +317,18 @@ def test_timestamp_reader() -> None: assert construct_reader(TimestampType()) == TimestampReader() +def test_timestamp_ns_reader() -> None: + assert construct_reader(TimestampNanoType()) == TimestampNanoReader() + + def test_timestamptz_reader() -> None: assert construct_reader(TimestamptzType()) == TimestamptzReader() +def test_timestamptz_ns_reader() -> None: + assert construct_reader(TimestamptzNanoType()) == TimestamptzNanoReader() + + def test_string_reader() -> None: assert construct_reader(StringType()) == StringReader() diff --git a/tests/avro/test_writer.py b/tests/avro/test_writer.py index 951be7e7da..3114b97d2b 100644 --- a/tests/avro/test_writer.py +++ b/tests/avro/test_writer.py @@ -33,6 +33,8 @@ FloatWriter, IntegerWriter, StringWriter, + TimestampNanoWriter, + TimestamptzNanoWriter, TimestamptzWriter, TimestampWriter, TimeWriter, @@ -55,7 +57,9 @@ NestedField, StringType, StructType, + TimestampNanoType, TimestampType, + TimestamptzNanoType, TimestamptzType, TimeType, UnknownType, @@ -113,10 +117,18 @@ def test_timestamp_writer() -> None: assert construct_writer(TimestampType()) == TimestampWriter() +def test_timestamp_ns_writer() -> None: + assert construct_writer(TimestampNanoType()) == TimestampNanoWriter() + + def test_timestamptz_writer() -> None: assert construct_writer(TimestamptzType()) == TimestamptzWriter() +def test_timestamptz_ns_writer() -> None: + assert construct_writer(TimestamptzNanoType()) == TimestamptzNanoWriter() + + def test_string_writer() -> None: assert construct_writer(StringType()) == StringWriter() diff --git a/tests/conftest.py b/tests/conftest.py index 1cbb3cfa4d..6444b7b273 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -920,10 +920,9 @@ def generate_snapshot( {"id": 1, "name": "x", "required": True, "type": "long"}, {"id": 2, "name": "y", "required": True, "type": "long", "doc": "comment"}, {"id": 3, "name": "z", "required": True, "type": "long"}, - # TODO: Add unknown, timestamp(tz)_ns {"id": 4, "name": "u", "required": True, "type": "unknown"}, - # {"id": 5, "name": "ns", "required": True, "type": "timestamp_ns"}, - # {"id": 6, "name": "nstz", "required": True, "type": "timestamptz_ns"}, + {"id": 5, "name": "ns", "required": True, "type": "timestamp_ns"}, + {"id": 6, "name": "nstz", "required": True, "type": "timestamptz_ns"}, ], }, ], diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index 1eb3500a17..ee5f8a2574 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -56,7 +56,6 @@ NestedField, StringType, TimestampType, - UnknownType, ) from pyiceberg.utils.concurrent import ExecutorFactory @@ -979,39 +978,3 @@ def test_scan_with_datetime(catalog: Catalog) -> None: df = table.scan(row_filter=LessThan("datetime", yesterday)).to_pandas() assert len(df) == 0 - - -@pytest.mark.integration -@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive")]) -def test_read_unknown_type(catalog: Catalog) -> None: - identifier = "default.test_table_read_unknown_type" - arrow_table = pa.Table.from_pydict( - { - "int": [1, 2], - "string": ["a", "b"], - "unknown": [None, None], - }, - schema=pa.schema( - [ - pa.field("int", pa.int32(), nullable=True), - pa.field("string", pa.string(), nullable=True), - pa.field("unknown", pa.null(), nullable=True), - ], - ), - ) - - try: - catalog.drop_table(identifier) - except NoSuchTableError: - pass - - tbl = catalog.create_table( - identifier, - schema=arrow_table.schema, - ) - - tbl.append(arrow_table) - - assert tbl.schema().find_type("unknown") == UnknownType() - result_table = tbl.scan().to_arrow() - assert result_table["unknown"].to_pylist() == [None, None] diff --git a/tests/table/test_metadata.py b/tests/table/test_metadata.py index d2ee5c3130..a8410cff36 100644 --- a/tests/table/test_metadata.py +++ b/tests/table/test_metadata.py @@ -48,8 +48,12 @@ LongType, MapType, NestedField, + PrimitiveType, StringType, StructType, + TimestampNanoType, + TimestamptzNanoType, + UnknownType, ) @@ -765,3 +769,110 @@ def test_make_metadata_fresh() -> None: ) assert actual.model_dump() == expected.model_dump() + + +def test_new_table_metadata_with_v3_schema() -> None: + schema = Schema( + NestedField(field_id=10, name="foo", field_type=StringType(), required=False), + NestedField(field_id=22, name="bar", field_type=IntegerType(), required=True), + NestedField(field_id=33, name="baz", field_type=BooleanType(), required=False), + NestedField(field_id=34, name="qux", field_type=TimestampNanoType(), required=False), + NestedField(field_id=35, name="quux", field_type=TimestamptzNanoType(), required=False), + schema_id=10, + identifier_field_ids=[22], + ) + + partition_spec = PartitionSpec( + PartitionField(source_id=22, field_id=1022, transform=IdentityTransform(), name="bar"), spec_id=10 + ) + + sort_order = SortOrder( + SortField(source_id=10, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_LAST), + order_id=10, + ) + + actual = new_table_metadata( + schema=schema, + partition_spec=partition_spec, + sort_order=sort_order, + location="s3://some_v1_location/", + properties={"format-version": "3"}, + ) + + expected_schema = Schema( + NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), + NestedField(field_id=4, name="qux", field_type=TimestampNanoType(), required=False), + NestedField(field_id=5, name="quux", field_type=TimestamptzNanoType(), required=False), + schema_id=0, + identifier_field_ids=[2], + ) + + expected_spec = PartitionSpec(PartitionField(source_id=2, field_id=1000, transform=IdentityTransform(), name="bar")) + + expected_sort_order = SortOrder( + SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_LAST), + order_id=1, + ) + + expected = TableMetadataV3( + location="s3://some_v1_location/", + table_uuid=actual.table_uuid, + last_updated_ms=actual.last_updated_ms, + last_column_id=5, + schemas=[expected_schema], + schema_=expected_schema, + current_schema_id=0, + partition_spec=[field.model_dump() for field in expected_spec.fields], + partition_specs=[expected_spec], + default_spec_id=0, + last_partition_id=1000, + properties={}, + current_snapshot_id=None, + snapshots=[], + snapshot_log=[], + metadata_log=[], + sort_orders=[expected_sort_order], + default_sort_order_id=1, + refs={}, + format_version=3, + ) + + assert actual.model_dump() == expected.model_dump() + assert actual.schemas == [expected_schema] + assert actual.partition_specs == [expected_spec] + assert actual.sort_orders == [expected_sort_order] + + +@pytest.mark.parametrize( + "field_type", + [ + TimestampNanoType(), + TimestamptzNanoType(), + UnknownType(), + ], +) +def test_new_table_metadata_format_v2_with_v3_schema_fails(field_type: PrimitiveType) -> None: + schema = Schema( + NestedField(field_id=34, name="qux", field_type=field_type, required=False), + schema_id=10, + ) + + partition_spec = PartitionSpec( + PartitionField(source_id=34, field_id=1022, transform=IdentityTransform(), name="qux"), spec_id=10 + ) + + sort_order = SortOrder( + SortField(source_id=34, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_LAST), + order_id=34, + ) + + with pytest.raises(ValueError, match=f"{field_type} is only supported in 3 or higher. Current format version is: 2"): + new_table_metadata( + schema=schema, + partition_spec=partition_spec, + sort_order=sort_order, + location="s3://some_v1_location/", + properties={"format-version": "2"}, + ) diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 0eafb96602..2ee0ba3dd9 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -43,6 +43,10 @@ - Stored as microseconds from 1970-01-01 00:00:00.000000 in an 8-byte little-endian long - 400000L is 0...110|00011010|10000000 in binary - 10000000 -> 128 (-128), 00011010 -> 26, 00000110 -> 6, ... , 00000000 -> 0 + TimestampNano: + - Stored as nanoseconds from 1970-01-01 00:00:00.000000000 in an 8-byte little-endian long + - 400000000L is 00010111|11010111|10000100|00000000 in binary + - 00000000 -> 0, 10000100 -> 124 (-124), 11010111 -> 41 (-41), 00010111 -> 23, ... , 00000000 -> 0 String: - Stored as UTF-8 bytes (without length) - 'A' -> 65, 'B' -> 66, 'C' -> 67 @@ -99,7 +103,9 @@ LongType, PrimitiveType, StringType, + TimestampNanoType, TimestampType, + TimestamptzNanoType, TimestamptzType, TimeType, UUIDType, @@ -266,6 +272,8 @@ def test_partition_to_py_raise_on_incorrect_precision_or_scale( (TimestamptzType(), b"\x00\xe8vH\x17\x00\x00\x00", 100000000000), (TimestampType(), b"\x80\x1a\x06\x00\x00\x00\x00\x00", 400000), (TimestampType(), b"\x00\xe8vH\x17\x00\x00\x00", 100000000000), + (TimestampNanoType(), b"\00\x84\xd7\x17\x00\x00\x00\x00", 400000000), + (TimestamptzNanoType(), b"\00\x84\xd7\x17\x00\x00\x00\x00", 400000000), (StringType(), b"ABC", "ABC"), (StringType(), b"foo", "foo"), ( diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 8987f8b189..d22c94cc93 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. # pylint: disable=eval-used,protected-access,redefined-outer-name -from datetime import date +from datetime import date, datetime from decimal import Decimal from typing import Annotated, Any, Callable, Optional, Union from uuid import UUID @@ -24,6 +24,7 @@ import mmh3 as mmh3 import pyarrow as pa import pytest +import pytz from pydantic import ( BeforeValidator, PlainSerializer, @@ -103,7 +104,9 @@ NestedField, PrimitiveType, StringType, + TimestampNanoType, TimestampType, + TimestamptzNanoType, TimestamptzType, TimeType, UnknownType, @@ -114,7 +117,9 @@ date_to_days, time_str_to_micros, timestamp_to_micros, + timestamp_to_nanos, timestamptz_to_micros, + timestamptz_to_nanos, ) @@ -142,6 +147,26 @@ ("iceberg", StringType(), 1210000089), (UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7"), UUIDType(), 1488055340), (b"\xf7\x9c>\tg|K\xbd\xa4y?4\x9c\xb7\x85\xe7", UUIDType(), 1488055340), + ( + timestamp_to_nanos("2017-11-16T22:31:08.000001"), + TimestampNanoType(), + -1207196810, + ), + ( + timestamp_to_nanos("2017-11-16T22:31:08.000001001"), + TimestampNanoType(), + -1207196810, + ), + ( + timestamptz_to_nanos("2017-11-16T14:31:08.000001-08:00"), + TimestamptzNanoType(), + -1207196810, + ), + ( + timestamptz_to_nanos("2017-11-16T14:31:08.000001001-08:00"), + TimestamptzNanoType(), + -1207196810, + ), ], ) def test_bucket_hash_values(test_input: Any, test_type: PrimitiveType, expected: Any) -> None: @@ -1603,7 +1628,7 @@ def test_ymd_pyarrow_transforms( ] else: with pytest.raises(ValueError): - transform.pyarrow_transform(DateType())(arrow_table_date_timestamps[source_col]) + transform.pyarrow_transform(source_type)(arrow_table_date_timestamps[source_col]) @pytest.mark.parametrize( @@ -1629,6 +1654,38 @@ def test_bucket_pyarrow_transforms( assert expected == transform.pyarrow_transform(source_type)(input_arr) +# pyiceberg_core currently does not support bucket transform on timestamp_ns and timestamptz_ns +# https://github.com/apache/iceberg-rust/issues/1110 +@pytest.mark.parametrize( + "source_type, input_arr, num_buckets", + [ + ( + TimestampNanoType(), + pa.array([datetime(1970, 1, 1, 0, 0, 0), datetime(2025, 2, 26, 1, 2, 3)], type=pa.timestamp(unit="ns")), + 10, + ), + ( + TimestamptzNanoType(), + pa.array( + [datetime(1970, 1, 1, 0, 0, 0), datetime(2025, 2, 26, 1, 2, 3)], + type=pa.timestamp(unit="ns", tz=pytz.timezone("Etc/GMT+10")), + ), + 10, + ), + ], +) +def test_unsupported_bucket_pyarrow_transform( + source_type: PrimitiveType, + input_arr: Union[pa.Array, pa.ChunkedArray], + num_buckets: int, +) -> None: + transform: Transform[Any, Any] = BucketTransform(num_buckets=num_buckets) + with pytest.raises(ValueError) as exc_info: + transform.pyarrow_transform(source_type)(input_arr) + + assert "FeatureUnsupported => Unsupported data type for bucket transform" in str(exc_info.value) + + @pytest.mark.parametrize( "source_type, input_arr, expected, width", [ diff --git a/tests/utils/test_datetime.py b/tests/utils/test_datetime.py index ac7ba54547..6f6f4a9114 100644 --- a/tests/utils/test_datetime.py +++ b/tests/utils/test_datetime.py @@ -14,12 +14,21 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from datetime import datetime, timezone, tzinfo +from datetime import datetime, time, timezone, tzinfo import pytest import pytz -from pyiceberg.utils.datetime import datetime_to_millis, millis_to_datetime +from pyiceberg.utils.datetime import ( + datetime_to_millis, + datetime_to_nanos, + millis_to_datetime, + nanos_to_micros, + time_str_to_nanos, + time_to_nanos, + timestamp_to_nanos, + timestamptz_to_nanos, +) timezones = [ pytz.timezone("Etc/GMT"), @@ -71,3 +80,55 @@ def test_datetime_tz_to_millis(tz: tzinfo) -> None: def test_millis_to_datetime() -> None: assert millis_to_datetime(1690971805918) == datetime(2023, 8, 2, 10, 23, 25, 918000) + + +@pytest.mark.parametrize("time_str, nanos", [("00:00:00+00:00", 0), ("20:21:44.375612-05:00", 73304375612000)]) +def test_time_str_to_nanos(time_str: str, nanos: int) -> None: + assert nanos == time_str_to_nanos(time_str) + + +@pytest.mark.parametrize( + "time_, nanos", [(time(0, 0, 0), 0), (time(20, 21, 44, 375612, tzinfo=pytz.timezone("Etc/GMT-5")), 73304375612000)] +) +def test_time_to_nanos(time_: time, nanos: int) -> None: + assert nanos == time_to_nanos(time_) + + +@pytest.mark.parametrize( + "datetime_, nanos", + [ + (datetime(1970, 1, 1, 0, 0, 0), 0), + (datetime(2025, 2, 23, 20, 21, 44, 375612, tzinfo=pytz.timezone("Etc/GMT-5")), 1740324104375612000), + ], +) +def test_datetime_to_nanos(datetime_: datetime, nanos: int) -> None: + assert nanos == datetime_to_nanos(datetime_) + + +@pytest.mark.parametrize( + "timestamp, nanos", + [ + ("1970-01-01T00:00:00", 0), + ("2025-02-23T20:21:44.375612", 1740342104375612000), + ("2025-02-23T20:21:44.375612001", 1740342104375612001), + ], +) +def test_timestamp_to_nanos(timestamp: str, nanos: int) -> None: + assert nanos == timestamp_to_nanos(timestamp) + + +@pytest.mark.parametrize( + "timestamp, nanos", + [ + ("1970-01-01T00:00:00+00:00", 0), + ("2025-02-23T16:21:44.375612-04:00", 1740342104375612000), + ("2025-02-23T16:21:44.375612001-04:00", 1740342104375612001), + ], +) +def test_timestamptz_to_nanos(timestamp: str, nanos: int) -> None: + assert nanos == timestamptz_to_nanos(timestamp) + + +@pytest.mark.parametrize("nanos, micros", [(1510871468000001001, 1510871468000001), (-1510871468000001001, -1510871468000002)]) +def test_nanos_to_micros(nanos: int, micros: int) -> None: + assert micros == nanos_to_micros(nanos)