From 7c7710fad34a40244e12cccefc22c2a1a3b9e0da Mon Sep 17 00:00:00 2001 From: jlhitzeman Date: Wed, 3 Jul 2024 12:55:05 -0500 Subject: [PATCH 1/4] add base schema to enforce timestamp, create pksuid from time in log init, test basic functionality --- distill/core/log.py | 4 +- distill/schemas/base.py | 31 ++++++++++++ distill/schemas/userale.py | 7 ++- poetry.lock | 97 +++++++++++++++++++++++++++++++++++++- pyproject.toml | 2 + tests/test_log.py | 4 ++ 6 files changed, 141 insertions(+), 4 deletions(-) create mode 100644 distill/schemas/base.py diff --git a/distill/core/log.py b/distill/core/log.py index 887f7ae..4c1d5cf 100644 --- a/distill/core/log.py +++ b/distill/core/log.py @@ -18,6 +18,7 @@ from pydantic import BaseModel from pydantic.type_adapter import TypeAdapter from typing import Dict, Union +from pksuid import PKSUID from distill.core.types import JsonDict, JSONSerializable from distill.schemas.userale import UserAleSchema @@ -46,7 +47,8 @@ def __init__(self, data: Union[str, JsonDict], schema=UserAleSchema): raise TypeError("ERROR: " + str(type(data)) + " data should be either a string or a JsonDict") self.data = schema(**data) - # TODO: need to create ID field here on object initialization + self.id = PKSUID("log", schema._timestamp(self.data)) + def to_json(self) -> str: return self.data.model_dump_json(by_alias=True) diff --git a/distill/schemas/base.py b/distill/schemas/base.py new file mode 100644 index 0000000..7fca7d1 --- /dev/null +++ b/distill/schemas/base.py @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import abc + +from enum import Enum +from typing import List, Optional + +from pydantic import AliasGenerator, BaseModel, Field +from pydantic.alias_generators import to_camel +from pydantic.config import ConfigDict + + +class BaseSchema(BaseModel, abc.ABC): + @property + @abc.abstractmethod + def _timestamp(self): + pass + \ No newline at end of file diff --git a/distill/schemas/userale.py b/distill/schemas/userale.py index dc02173..6647489 100644 --- a/distill/schemas/userale.py +++ b/distill/schemas/userale.py @@ -14,10 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. from typing import List, Optional +from datetime import datetime from pydantic import AliasGenerator, BaseModel, Field, field_serializer, field_validator from pydantic.alias_generators import to_camel from pydantic.config import ConfigDict + +from .base import BaseSchema from datetime import datetime @@ -40,7 +43,7 @@ class Details(BaseModel): window: bool -class UserAleSchema(BaseModel): +class UserAleSchema(BaseSchema): """ A raw or custom log produced by UserAle """ @@ -80,3 +83,5 @@ def validate_ct(cls, ct: float): def serialize_ct(self, ct: datetime): return int(ct.timestamp() * 1000) + def _timestamp(self): + return self.client_time.timestamp() diff --git a/poetry.lock b/poetry.lock index 634d8c1..b68d3a9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "alabaster" @@ -817,6 +817,21 @@ calendars = ["convertdate", "hijri-converter"] fasttext = ["fasttext"] langdetect = ["langdetect"] +[[package]] +name = "datetime" +version = "5.5" +description = "This package provides a DateTime data type, as known from Zope. Unless you need to communicate with Zope APIs, you're probably better off using Python's built-in datetime module." +optional = false +python-versions = ">=3.7" +files = [ + {file = "DateTime-5.5-py3-none-any.whl", hash = "sha256:0abf6c51cb4ba7cee775ca46ccc727f3afdde463be28dbbe8803631fefd4a120"}, + {file = "DateTime-5.5.tar.gz", hash = "sha256:21ec6331f87a7fcb57bd7c59e8a68bfffe6fcbf5acdbbc7b356d6a9a020191d3"}, +] + +[package.dependencies] +pytz = "*" +"zope.interface" = "*" + [[package]] name = "debugpy" version = "1.8.2" @@ -2448,6 +2463,20 @@ files = [ {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, ] +[[package]] +name = "pksuid" +version = "1.1.2" +description = "Python package for generating prefixed ksuids." +optional = false +python-versions = ">=3.6,<4.0" +files = [ + {file = "pksuid-1.1.2-py3-none-any.whl", hash = "sha256:332296a7104b715169ad0e8067f29eba7f8c17e80e0d1c8342e4b6fc60efaea6"}, + {file = "pksuid-1.1.2.tar.gz", hash = "sha256:dc08ed7924a8affea5f36af4b6af345b126f521c9165b95e91ca1a2acef99e49"}, +] + +[package.dependencies] +pybase62 = "0.4.3" + [[package]] name = "platformdirs" version = "4.2.2" @@ -2605,6 +2634,17 @@ files = [ {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, ] +[[package]] +name = "pybase62" +version = "0.4.3" +description = "Python module for base62 encoding" +optional = false +python-versions = "*" +files = [ + {file = "pybase62-0.4.3-py3-none-any.whl", hash = "sha256:aaf020d0cf0959cb8576f502138b8e3daecf691b9b30626792d34df6103da7c1"}, + {file = "pybase62-0.4.3.tar.gz", hash = "sha256:0fbbe8474fc5fb020cc7f94dc88adfd12ef9bf38640c46612568ea07f046438c"}, +] + [[package]] name = "pycparser" version = "2.22" @@ -4013,7 +4053,60 @@ files = [ doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] +[[package]] +name = "zope-interface" +version = "6.4.post2" +description = "Interfaces for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "zope.interface-6.4.post2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2eccd5bef45883802848f821d940367c1d0ad588de71e5cabe3813175444202c"}, + {file = "zope.interface-6.4.post2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:762e616199f6319bb98e7f4f27d254c84c5fb1c25c908c2a9d0f92b92fb27530"}, + {file = "zope.interface-6.4.post2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ef8356f16b1a83609f7a992a6e33d792bb5eff2370712c9eaae0d02e1924341"}, + {file = "zope.interface-6.4.post2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e4fa5d34d7973e6b0efa46fe4405090f3b406f64b6290facbb19dcbf642ad6b"}, + {file = "zope.interface-6.4.post2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d22fce0b0f5715cdac082e35a9e735a1752dc8585f005d045abb1a7c20e197f9"}, + {file = "zope.interface-6.4.post2-cp310-cp310-win_amd64.whl", hash = "sha256:97e615eab34bd8477c3f34197a17ce08c648d38467489359cb9eb7394f1083f7"}, + {file = "zope.interface-6.4.post2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:599f3b07bde2627e163ce484d5497a54a0a8437779362395c6b25e68c6590ede"}, + {file = "zope.interface-6.4.post2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:136cacdde1a2c5e5bc3d0b2a1beed733f97e2dad8c2ad3c2e17116f6590a3827"}, + {file = "zope.interface-6.4.post2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47937cf2e7ed4e0e37f7851c76edeb8543ec9b0eae149b36ecd26176ff1ca874"}, + {file = "zope.interface-6.4.post2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f0a6be264afb094975b5ef55c911379d6989caa87c4e558814ec4f5125cfa2e"}, + {file = "zope.interface-6.4.post2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47654177e675bafdf4e4738ce58cdc5c6d6ee2157ac0a78a3fa460942b9d64a8"}, + {file = "zope.interface-6.4.post2-cp311-cp311-win_amd64.whl", hash = "sha256:e2fb8e8158306567a3a9a41670c1ff99d0567d7fc96fa93b7abf8b519a46b250"}, + {file = "zope.interface-6.4.post2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b912750b13d76af8aac45ddf4679535def304b2a48a07989ec736508d0bbfbde"}, + {file = "zope.interface-6.4.post2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4ac46298e0143d91e4644a27a769d1388d5d89e82ee0cf37bf2b0b001b9712a4"}, + {file = "zope.interface-6.4.post2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86a94af4a88110ed4bb8961f5ac72edf782958e665d5bfceaab6bf388420a78b"}, + {file = "zope.interface-6.4.post2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:73f9752cf3596771c7726f7eea5b9e634ad47c6d863043589a1c3bb31325c7eb"}, + {file = "zope.interface-6.4.post2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00b5c3e9744dcdc9e84c24ed6646d5cf0cf66551347b310b3ffd70f056535854"}, + {file = "zope.interface-6.4.post2-cp312-cp312-win_amd64.whl", hash = "sha256:551db2fe892fcbefb38f6f81ffa62de11090c8119fd4e66a60f3adff70751ec7"}, + {file = "zope.interface-6.4.post2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96ac6b3169940a8cd57b4f2b8edcad8f5213b60efcd197d59fbe52f0accd66e"}, + {file = "zope.interface-6.4.post2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cebff2fe5dc82cb22122e4e1225e00a4a506b1a16fafa911142ee124febf2c9e"}, + {file = "zope.interface-6.4.post2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33ee982237cffaf946db365c3a6ebaa37855d8e3ca5800f6f48890209c1cfefc"}, + {file = "zope.interface-6.4.post2-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:fbf649bc77510ef2521cf797700b96167bb77838c40780da7ea3edd8b78044d1"}, + {file = "zope.interface-6.4.post2-cp37-cp37m-win_amd64.whl", hash = "sha256:4c0b208a5d6c81434bdfa0f06d9b667e5de15af84d8cae5723c3a33ba6611b82"}, + {file = "zope.interface-6.4.post2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d3fe667935e9562407c2511570dca14604a654988a13d8725667e95161d92e9b"}, + {file = "zope.interface-6.4.post2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a96e6d4074db29b152222c34d7eec2e2db2f92638d2b2b2c704f9e8db3ae0edc"}, + {file = "zope.interface-6.4.post2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:866a0f583be79f0def667a5d2c60b7b4cc68f0c0a470f227e1122691b443c934"}, + {file = "zope.interface-6.4.post2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5fe919027f29b12f7a2562ba0daf3e045cb388f844e022552a5674fcdf5d21f1"}, + {file = "zope.interface-6.4.post2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e0343a6e06d94f6b6ac52fbc75269b41dd3c57066541a6c76517f69fe67cb43"}, + {file = "zope.interface-6.4.post2-cp38-cp38-win_amd64.whl", hash = "sha256:dabb70a6e3d9c22df50e08dc55b14ca2a99da95a2d941954255ac76fd6982bc5"}, + {file = "zope.interface-6.4.post2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:706efc19f9679a1b425d6fa2b4bc770d976d0984335eaea0869bd32f627591d2"}, + {file = "zope.interface-6.4.post2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3d136e5b8821073e1a09dde3eb076ea9988e7010c54ffe4d39701adf0c303438"}, + {file = "zope.interface-6.4.post2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1730c93a38b5a18d24549bc81613223962a19d457cfda9bdc66e542f475a36f4"}, + {file = "zope.interface-6.4.post2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bc2676312cc3468a25aac001ec727168994ea3b69b48914944a44c6a0b251e79"}, + {file = "zope.interface-6.4.post2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a62fd6cd518693568e23e02f41816adedfca637f26716837681c90b36af3671"}, + {file = "zope.interface-6.4.post2-cp39-cp39-win_amd64.whl", hash = "sha256:d3f7e001328bd6466b3414215f66dde3c7c13d8025a9c160a75d7b2687090d15"}, + {file = "zope.interface-6.4.post2.tar.gz", hash = "sha256:1c207e6f6dfd5749a26f5a5fd966602d6b824ec00d2df84a7e9a924e8933654e"}, +] + +[package.dependencies] +setuptools = "*" + +[package.extras] +docs = ["Sphinx", "repoze.sphinx.autointerface", "sphinx-rtd-theme"] +test = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] +testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] + [metadata] lock-version = "2.0" python-versions = ">=3.8,<4.0" -content-hash = "12ed7e5013613a9fff6e8fb74126ea7c7ed61a3a59aef4ac490fd472f7cf4931" +content-hash = "79aaf58e4a2a8d45ef7a6e08399373ae3c520068cce3d0089464a1a02f3d5f85" diff --git a/pyproject.toml b/pyproject.toml index a2e770c..37d31bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,8 @@ matplotlib = "3.7.5" scipy = "1.9.3" pydantic = "^2.7.4" dateparser = "^1.2.0" +pksuid = "^1.1.2" +datetime = "^5.5" [tool.poetry.group.docs] optional = true diff --git a/tests/test_log.py b/tests/test_log.py index d93c1b8..449eb6e 100644 --- a/tests/test_log.py +++ b/tests/test_log.py @@ -44,6 +44,10 @@ def test_log_constructor(): pageUrl = test_log.data.page_url assert pageUrl == "https://github.com/apache/flagon/tree/master/docker" + id = test_log.id + assert id.get_timestamp() == 1719530111079 // 1000 + assert id.prefix == "log" + def test_log_serialize(): data = load_log() From e53ea1fe96f5c1314914e713f0c775286120a445 Mon Sep 17 00:00:00 2001 From: jlhitzeman Date: Wed, 10 Jul 2024 11:54:37 -0500 Subject: [PATCH 2/4] Bake log hash into prefix --- distill/core/log.py | 4 +++- tests/test_log.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/distill/core/log.py b/distill/core/log.py index 4c1d5cf..0bfd667 100644 --- a/distill/core/log.py +++ b/distill/core/log.py @@ -40,14 +40,16 @@ def __init__(self, data: Union[str, JsonDict], schema=UserAleSchema): if isinstance(data, str): schema.model_validate_json(data, strict=True) + hash_sfx = str(hash(data)) data = json.loads(data) elif ta.validate_python(data): + hash_sfx = str(hash(json.dumps(data))) schema.model_validate(data, strict=True) else: raise TypeError("ERROR: " + str(type(data)) + " data should be either a string or a JsonDict") self.data = schema(**data) - self.id = PKSUID("log", schema._timestamp(self.data)) + self.id = PKSUID("log_" + hash_sfx, schema._timestamp(self.data)) def to_json(self) -> str: diff --git a/tests/test_log.py b/tests/test_log.py index 449eb6e..53b2767 100644 --- a/tests/test_log.py +++ b/tests/test_log.py @@ -46,7 +46,7 @@ def test_log_constructor(): id = test_log.id assert id.get_timestamp() == 1719530111079 // 1000 - assert id.prefix == "log" + assert id.prefix.startswith("log_") def test_log_serialize(): From 607bae6f68d9f7a5742caeb76e12424e7944ad6c Mon Sep 17 00:00:00 2001 From: jlhitzeman Date: Wed, 10 Jul 2024 13:03:30 -0500 Subject: [PATCH 3/4] Added docstrings --- distill/schemas/base.py | 9 +++++++++ distill/schemas/userale.py | 3 +++ 2 files changed, 12 insertions(+) diff --git a/distill/schemas/base.py b/distill/schemas/base.py index 7fca7d1..bed1db1 100644 --- a/distill/schemas/base.py +++ b/distill/schemas/base.py @@ -24,8 +24,17 @@ class BaseSchema(BaseModel, abc.ABC): + """ + Abstract base class to serve as model for any and all schemas + """ + @property @abc.abstractmethod def _timestamp(self): + """ + Represents a timestamp associated with the schema + Subclasses must override to provide specific timestamp value + Expected to return datetime object + """ pass \ No newline at end of file diff --git a/distill/schemas/userale.py b/distill/schemas/userale.py index 6647489..ac994cf 100644 --- a/distill/schemas/userale.py +++ b/distill/schemas/userale.py @@ -84,4 +84,7 @@ def serialize_ct(self, ct: datetime): return int(ct.timestamp() * 1000) def _timestamp(self): + """ + Returns timestamp as datetime object from userALE log's client_time field + """ return self.client_time.timestamp() From 6aabd25de8271140e03a4a1dba3a29816a1a50f5 Mon Sep 17 00:00:00 2001 From: jlhitzeman Date: Wed, 10 Jul 2024 13:14:29 -0500 Subject: [PATCH 4/4] Updated docstrings --- distill/schemas/base.py | 3 ++- distill/schemas/userale.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/distill/schemas/base.py b/distill/schemas/base.py index bed1db1..16a35c7 100644 --- a/distill/schemas/base.py +++ b/distill/schemas/base.py @@ -34,7 +34,8 @@ def _timestamp(self): """ Represents a timestamp associated with the schema Subclasses must override to provide specific timestamp value - Expected to return datetime object + Returns: + float: POSIX timestamp corresponding to datetime instance """ pass \ No newline at end of file diff --git a/distill/schemas/userale.py b/distill/schemas/userale.py index ac994cf..133ab0d 100644 --- a/distill/schemas/userale.py +++ b/distill/schemas/userale.py @@ -85,6 +85,7 @@ def serialize_ct(self, ct: datetime): def _timestamp(self): """ - Returns timestamp as datetime object from userALE log's client_time field + Returns: + float: POSIX time from userALE log's client_time field """ return self.client_time.timestamp()