-
-
Notifications
You must be signed in to change notification settings - Fork 35
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
logicalType: date #109
Comments
Hi @cfcs thanks for the issue. I think logical types are not applied, but it's doable. If you would like to support me, you can prepare a test example (issue for the reference), it will allow me to save time during testing. If you want, you can include your specific case I can test against. |
@stretch sure, here is a set of encoding/decoding functions: defmodule Enc do
# int logical types:
def date(d) when is_struct(d, Date) do # int
Date.diff(d, ~D[1970-01-01])
end
def date(d) when is_binary(d) and 10 == byte_size(d) do
date(Date.from_iso8601!(d)) # this can obviously fail
end
# long logical types:
def timestamp_millis(ts) when is_struct(ts, DateTime) do
DateTime.to_unix(ts, :millisecond)
end
def timestamp_millis(ts) when is_binary(ts) do
{:ok, utc, _tzsec} = DateTime.from_iso8601(ts)
timestamp_millis(utc)
end
def timestamp_micros(ts) when is_struct(ts, DateTime) do
DateTime.to_unix(ts, :microsecond)
end
def timestamp_micros(ts) when is_binary(ts) do
{:ok, utc, _tzsec} = DateTime.from_iso8601(ts)
timestamp_micros(utc)
end
def time_millis(t) when is_struct(t, Time) do
{s,u} = t |> Time.to_seconds_after_midnight()
s * 1000 + Integer.floor_div(u, 1000)
end
def time_millis(t) when is_binary(t) do
time_millis(Time.from_iso8601!(t))
end
def time_micros(t) when is_struct(t, Time) do
{s,u} = t |> Time.to_seconds_after_midnight()
s * 1000_000 + u
end
def time_micros(t) when is_binary(t) do
time_micros(Time.from_iso8601!(t))
end
end
defmodule Dec do
def date(int) do Date.add(~D[1970-01-01], int) end
def timestamp_millis(long) do DateTime.from_unix!(long, :millisecond) end
def timestamp_micros(long) do DateTime.from_unix!(long, :microsecond) end
def time_millis(long) do
seconds = Integer.floor_div(long, 1000)
millisecs = (long - seconds * 1000) * 1000
Time.from_seconds_after_midnight(seconds, {millisecs, 3})
end
def time_micros(long) do
seconds = Integer.floor_div(long, 1000_000)
microsecs = long - seconds * 1000_000
Time.from_seconds_after_midnight(seconds, {microsecs, 6})
end
end And corresponding tests: ts_iso = "2019-10-12T17:57:42.123456Z"
ts_iso_tz = "2019-10-12T17:57:42.123456+0130" # 90 min before
ts_sig = ~U[2019-10-12 17:57:42.123456Z]
ts_sig_tz = ~U[2019-10-12 16:27:42.123456Z]
date_iso = "2019-10-12"
date_sig = ~D[2019-10-12]
time_sig = ~T[17:57:42.123456]
time_iso = "17:57:42.123456"
1570903062123 = Enc.timestamp_millis(ts_sig)
1570903062123 = Enc.timestamp_millis(ts_iso)
1570897662123 = Enc.timestamp_millis(ts_iso_tz)
1570903062123456 = Enc.timestamp_micros(ts_sig)
1570903062123456 = Enc.timestamp_micros(ts_iso)
1570897662123456 = Enc.timestamp_micros(ts_iso_tz)
18181 = Enc.date(date_sig)
18181 = Enc.date(date_iso)
# These are "naive" intervals from midnight, not timezoned:
64662123 = Enc.time_millis(time_sig)
64662123 = Enc.time_millis(time_iso)
64662123456 = Enc.time_micros(time_iso)
64662123456 = Enc.time_micros(time_sig)
~D[2019-10-12] = date_sig
~D[2019-10-12] = Dec.date(Enc.date(date_iso))
~U[2019-10-12 17:57:42.123456Z] = ts_sig
~U[2019-10-12 17:57:42.123Z] = Dec.timestamp_millis(Enc.timestamp_millis(ts_iso))
~U[2019-10-12 17:57:42.123456Z] = Dec.timestamp_micros(Enc.timestamp_micros(ts_iso))
~U[2019-10-12 16:27:42.123456Z] = ts_sig_tz
~U[2019-10-12 16:27:42.123Z] = Dec.timestamp_millis(Enc.timestamp_millis(ts_iso_tz))
~U[2019-10-12 16:27:42.123456Z] = Dec.timestamp_micros(Enc.timestamp_micros(ts_iso_tz))
~T[17:57:42.123456] = time_sig
~T[17:57:42.123] = Dec.time_millis(Enc.time_millis(time_iso))
~T[17:57:42.123456] = Dec.time_micros(Enc.time_micros(time_iso)) dataclasses-avroschema has some examples. Here is an example schema generated using their tooling: {
"type": "record",
"name": "TimeLogicalTypes",
"fields": [
{
"name": "date",
"type": {
"type": "int",
"logicalType": "date"
},
"default": 18181
},
{
"name": "timestamp_millis",
"type": {
"type": "long",
"logicalType": "timestamp-millis"
},
"default": 1570903062123
},
{
"name": "timestamp_micro",
"type": {
"type": "long",
"logicalType": "timestamp-micros"
},
"default": 1570903062123456
},
{
"name": "daily_time_millis",
"type": {
"type": "int",
"logicalType": "time-millis"
},
"default": 64662123
},
{
"name": "daily_time_micro",
"type": {
"type": "long",
"logicalType": "time-micros"
},
"default": 64662123456
}
],
"doc": "Time logical types"
} The Python code for that: # pip install dataclasses-avroschema
import datetime
import dataclasses
import typing
import json
from dataclasses_avroschema import AvroModel, TimeMicro, DateTimeMicro
# UTC / naive:
a_datetime = datetime.datetime(2019, 10, 12, 17, 57, 42, 123456)
# timezoned timestamp (results in 15:57 instead of 17:57 because Avro doesn't preserve TZ info)
a_datetime = datetime.datetime.fromisoformat("2019-10-12T17:57:42.123456+02")
# UTC (this works)
a_datetime = datetime.datetime.fromisoformat("2019-10-12T17:57:42.123456+00")
@dataclasses.dataclass
class TimeLogicalTypes(AvroModel):
"Time logical types"
date: datetime.date = a_datetime.date() # date
timestamp_millis: datetime.datetime = a_datetime # timestamp-millis
timestamp_micro: DateTimeMicro = a_datetime # timestamp-micro
daily_time_millis: datetime.time = a_datetime.time() # time-millis
daily_time_micro: TimeMicro = a_datetime.time() # time-micros
# Print the derived Avro schema:
# print(json.dumps(json.loads(TimeLogicalTypes.avro_schema()), indent=2))
t1 = TimeLogicalTypes()
enc_json = t1.serialize(serialization_type="avro-json")
enc = t1.serialize() # binary serialization
dec_t1 = TimeLogicalTypes.deserialize(enc) # deserialized t1
re_enc_json = dec_t1.serialize(serialization_type="avro-json")
print('---- avro serialized json')
print(enc_json)
print('---- avro serialized binary')
print(enc.hex())
print('----- iso8861 json:')
t1_json = t1.to_json()
dec_t1_json = dec_t1.to_json()
print(t1_json)
print(dec_t1_json)
assert dec_t1_json == t1_json Sample output from that script (minus the schema above):
|
It's perhaps worth noting that |
For completeness here are the logical types I did not implement and that might also be useful:
Not sure when you would ever want to serialize not-timezoned stuff, but I guess they put it in the spec, and there is a native Elixir sigil ~N so: |
Thanks @cfcs, I appreciate your help. I'm a bit busy right now, but I will put that thing to work soon ™️ |
Hi, I have a schema like:
In order to pass a Date sigil into it it has to be converted to "the number of days since unix epoch":
I don't have the expertise to add handling of
logicalType
to the library, but leaving it here in case someone else needs this. :-)ping @stretch
The text was updated successfully, but these errors were encountered: