From 4512cc1aeacc172aa03ba52d108482bb379f19fe Mon Sep 17 00:00:00 2001 From: voidZXL Date: Sat, 16 Nov 2024 02:06:26 +0800 Subject: [PATCH] fix Any behaviour in logical operations, fix issues in json schema parser and python generator, add _update_spec to ParamsCollector, optimize time type transform --- docs/zh/README.md | 67 +++++++++++++++++++++++++++++ tests/test_rule.py | 7 +++ tests/test_spec.py | 14 ++++++ tests/test_type.py | 5 +++ utype/parser/rule.py | 17 ++++++++ utype/specs/json_schema/constant.py | 2 + utype/specs/json_schema/parser.py | 6 ++- utype/specs/python/generator.py | 17 ++++---- utype/utils/base.py | 6 +++ utype/utils/transform.py | 6 ++- 10 files changed, 136 insertions(+), 11 deletions(-) create mode 100644 tests/test_spec.py diff --git a/docs/zh/README.md b/docs/zh/README.md index d3ee71f..cd4e88a 100644 --- a/docs/zh/README.md +++ b/docs/zh/README.md @@ -332,3 +332,70 @@ utype 是一个 [UtilMeta](https://utilmeta.com) 项目,你可以加入下面 * [X(Twitter)](https://twitter.com/utilmeta) * [Reddit](https://www.reddit.com/r/utilmeta) * [中文讨论区](https://lnzhou.com/channels/utilmeta/community) + + +## 对比 +### utype | Pydantic +Pydantic 是一个流行的 Python 数据解析验证库,utype 提供的功能与 Pydantic 大体上是相近的,但相比之下,utype 在以下方面有更多的关注 + +* **函数的解析**:utype 能很好的处理各种函数参数与返回值的解析(包括同步函数,异步函数,生成器与异步生成器函数),pydantic 对函数返回值只进行验证,并不尝试进行类型转化,且并不支持生成器函数 +* **约束类型**:对于 utype 来说所有的 **约束** (比如大小,长度,正则等)都会体现在类型中,从而可以直接用来进行类型转化与判断,pydantic 定义的类型往往需要作为字段的注解才能发挥作用 +```python +>>> from pydantic import PositiveInt +>>> PositiveInt(-1) +-1 +>>> from utype.types import PositiveInt +>>> PositiveInt(-1) +utype.utils.exceptions.ConstraintError: Constraint: : 0 violated +``` +* **类型注册机制**:utype 中所有类型的解析与转化方式都是可以进行注册与覆盖的,也就是说开发者可以方便地自定义基本类型的解析方式,或者注册自定义类型的解析函数;pydantic 支持的解析的内置类型是固定的。由于 utype 的类型解析是注册机制的,所以 utype 也可以兼容解析 **pydantic**, **dataclasses**, **attrs** 等数据类 (参考 [兼容 Pydantic](/zh/guide/type/#pydantic)) +```python +from utype import register_transformer +from collections.abc import Mapping +from pydantic import BaseModel + +@register_transformer(BaseModel) +def transform_pydantic(transformer, data, cls): + if not transformer.no_explicit_cast and not isinstance(data, Mapping): + data = transformer(data, dict) + return cls(**data) +``` +* **逻辑类型**:utype 的类型支持任意嵌套组合的逻辑运算,可以兼容基本类型与 typing 用法,以及支持运算出的类型对数据进行处理(pydantic 没有相应用法) +```python +from utype import Rule, exc +from typing import Literal + +class IntWeekDay(int, Rule): + gt = 0 + le = 7 + +weekday = IntWeekDay ^ Literal['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] + +>>> weekday('6') +6 +>>> weekday(b'tue') +'tue' +>>> weekday(8) +Constraint: : 7 violated; +Constraint: : ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun') violated +``` +* **字段模式**:utype 的字段提供了 模式 (`mode`) 机制,包括 `no_input` 与 `no_output` 等,可以在一个数据类中定义字段的多种用法,对于在 web 场景中定义负责 **增改查** 等多种目的的数据模型更加方便 +* **原生字典模型**:pydantic 的 BaseModel 产出的数据实例虽然有 JSON 序列化方法,但并不能被 `json.dumps` 处理,utype 提供继承原生字典的 `Schema` 类,整合到数据工作流中更方便 +```python +from pydantic import BaseModel +from utype import Schema +import json + +class md(BaseModel): + value: int + +class schema(Schema): + value: int + +>>> json.dumps(md(value=1)) +TypeError: Object of type md is not JSON serializable +>>> json.dumps(schema(value=1)) +'{"value": 1}' +``` + +整体上而言,utype 提供的配置参数更加简洁一些,提供的功更加灵活一些,可以看作一个更加灵活与轻量级的 Pydantic \ No newline at end of file diff --git a/tests/test_rule.py b/tests/test_rule.py index faee69e..0c48730 100644 --- a/tests/test_rule.py +++ b/tests/test_rule.py @@ -97,6 +97,13 @@ class IntWeekDay(int, Rule): multi_any = Rule.any_of(dict, list, str, None) assert multi_any('str') == 'str' + # test Any + assert types.PositiveInt & types.PositiveInt == types.PositiveInt + assert types.PositiveInt | types.Any in (Rule, types.Any) + assert types.PositiveInt | None | types.Any in (Rule, types.Any) + assert types.PositiveInt & types.Any == types.PositiveInt + assert types.PositiveInt ^ types.Any in (Rule, types.Any) + def test_length(self): class Length3(Rule): length = 3 diff --git a/tests/test_spec.py b/tests/test_spec.py new file mode 100644 index 0000000..ff83739 --- /dev/null +++ b/tests/test_spec.py @@ -0,0 +1,14 @@ +from utype.types import * +from utype.parser.rule import Rule + + +class TestSpec: + def test_json_schema_parser(self): + from utype.specs.json_schema.parser import JsonSchemaParser + from utype.specs.python.generator import PythonCodeGenerator + assert JsonSchemaParser({})() == Any + assert JsonSchemaParser({'anyOf': [{}, {'type': 'null'}]})() in (Rule, Any) + assert JsonSchemaParser({'type': 'object'})() == dict + assert JsonSchemaParser({'type': 'array'})() == list + assert JsonSchemaParser({'type': 'string'})() == str + assert JsonSchemaParser({'type': 'string', 'format': 'date'})() == date diff --git a/tests/test_type.py b/tests/test_type.py index a918790..2a7ff1d 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -332,6 +332,11 @@ def trans_my(trans, d, t): ], time: [ ("11:12:13", time(11, 12, 13), True, True), + ("08:09:10", time(8, 9, 10), True, True), + ("08:09:10", time(8, 9, 10), True, True), + ("8:9:10", time(8, 9, 10), True, True), + ("8:9:1", time(8, 9, 1), True, True), + ("8:30", time(8, 30, 0), True, True), (b"11:12:13", time(11, 12, 13), True, True), (dt, dt.time(), True, False), (dt.date(), time(), True, False), diff --git a/utype/parser/rule.py b/utype/parser/rule.py index c6015b3..c93ca3b 100644 --- a/utype/parser/rule.py +++ b/utype/parser/rule.py @@ -159,6 +159,9 @@ def _parse_arg(mcs, arg): if isinstance(arg, mcs): return arg + if arg in (Any, Self): + return arg + __origin = get_origin(arg) if __origin: # like List[str] Literal["value"] @@ -233,10 +236,24 @@ def combine(mcs, operator: str, *args): arg = mcs._parse_arg(arg) + if arg == Any: + if operator in ('|', '^'): + # if Any in any_of, there will be just Any (or Rule) + return Rule + elif operator == '&': + # if Any in and, just ignore + continue + if arg in __args: # avoid duplicate continue __args.append(arg) + if not __args: + return Rule + if operator != '~': + # for operation other than not, if just 1 arg left, use that + if len(__args) == 1: + return __args[0] return mcs( OPERATOR_NAMES.get(operator, operator), diff --git a/utype/specs/json_schema/constant.py b/utype/specs/json_schema/constant.py index 8381506..c0ee5b5 100644 --- a/utype/specs/json_schema/constant.py +++ b/utype/specs/json_schema/constant.py @@ -15,12 +15,14 @@ } TYPE_MAP = { 'null': type(None), + 'string': str, 'boolean': bool, 'bool': bool, 'object': dict, 'array': list, 'integer': int, 'int': int, + 'bigint': int, 'number': float, 'float': float, 'decimal': Decimal, diff --git a/utype/specs/json_schema/parser.py b/utype/specs/json_schema/parser.py index c6859d7..6083476 100644 --- a/utype/specs/json_schema/parser.py +++ b/utype/specs/json_schema/parser.py @@ -18,7 +18,7 @@ class JsonSchemaParser: object_meta_cls = LogicalMeta object_options_cls = Options field_cls = Field - default_type = str + default_type = Any NON_NAME_REG = '[^A-Za-z0-9]+' @@ -217,6 +217,8 @@ def parse_object(self, description: str = None, constraints: dict = None ): + if list(schema) == ['type'] and not constraints: + return dict name = name or 'ObjectSchema' properties = schema.get('properties') or {} required = schema.get('required') or [] @@ -305,6 +307,8 @@ def parse_array(self, description: str = None, constraints: dict = None ): + if list(schema) == ['type'] and not constraints: + return list items = schema.get('items') prefix_items = schema.get('prefixItems') args = [] diff --git a/utype/specs/python/generator.py b/utype/specs/python/generator.py index 1317488..c3a4146 100644 --- a/utype/specs/python/generator.py +++ b/utype/specs/python/generator.py @@ -2,14 +2,12 @@ import keyword import re -import utype from utype.parser.rule import Rule, LogicalType from utype.parser.field import Field from utype.parser.cls import ClassParser from utype.parser.func import FunctionParser from utype import unprovided, Options - -from typing import Type, Dict, ForwardRef +from typing import Type, Dict, Any, ForwardRef from utype.utils.functional import represent, valid_attr from collections import deque @@ -98,10 +96,11 @@ def generate_for_function(self, f, force_forward_ref: bool = None) -> str: default = self.generate_for_field(param_default) else: default = represent(param_default) - if len(args) == 1: - args.append(f'={default}') - else: - args.append(f' = {default}') + if default: + if len(args) == 1: + args.append(f'={default}') + else: + args.append(f' = {default}') params.append(''.join(args)) return_annotation = None @@ -122,7 +121,7 @@ def generate_for_type(self, t, with_constraints: bool = True, annotation: bool = return t if isinstance(t, ForwardRef): return repr(t.__forward_arg__) - if not isinstance(t, type): + if not isinstance(t, type) or t in (Any, Rule): return 'Any' if isinstance(t, LogicalType): if t.combinator: @@ -183,7 +182,7 @@ def generate_for_rule(self, t: Type[Rule], with_constraints: bool = True, annota @classmethod def generate_for_field(cls, field: Field, addition: dict = None) -> str: - if not field.__spec_kwargs__ and not addition: + if not field.__spec_kwargs__ and not addition and field.__class__ == Field: return '' name = None if field.__class__ == Field: diff --git a/utype/utils/base.py b/utype/utils/base.py index 940f295..c348c32 100644 --- a/utype/utils/base.py +++ b/utype/utils/base.py @@ -276,6 +276,12 @@ def __copy(cls, data, copy_class: bool = False): return data.__copy__() return data + def _update_spec(self, **kwargs): + # this is a rather ugly patch, we will figure something more elegantly in future + spec = dict(self.__spec_kwargs__) + spec.update(kwargs) + self.__spec_kwargs__ = ImmutableDict(spec) + def __deepcopy__(self, memo): return self.__copy__() diff --git a/utype/utils/transform.py b/utype/utils/transform.py index 0e13124..a4e1cdf 100644 --- a/utype/utils/transform.py +++ b/utype/utils/transform.py @@ -612,7 +612,11 @@ def to_time(self, data, t: Type[time] = time) -> time: return t() data = self._from_byte_like(data) if isinstance(data, str): - return t.fromisoformat(data) + if ':' in data: + try: + return t.fromisoformat(data) + except ValueError: + return self.to_datetime(f'1970-01-01 {data}').time() raise TypeError @registry.register(UUID)