From 93c6148072cfed10c89d67c32ec581fd881aa633 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Fri, 12 Apr 2024 16:33:18 -0700 Subject: [PATCH 01/20] WIP --- awscrt/cbor.py | 151 +++++++++++++++++++++++ crt/aws-c-common | 2 +- source/cbor.c | 305 +++++++++++++++++++++++++++++++++++++++++++++++ source/cbor.h | 48 ++++++++ source/module.c | 37 ++++++ source/module.h | 1 + 6 files changed, 543 insertions(+), 1 deletion(-) create mode 100644 awscrt/cbor.py create mode 100644 source/cbor.c create mode 100644 source/cbor.h diff --git a/awscrt/cbor.py b/awscrt/cbor.py new file mode 100644 index 000000000..ba464d577 --- /dev/null +++ b/awscrt/cbor.py @@ -0,0 +1,151 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0. + +import _awscrt + +from awscrt import NativeResource +from enum import IntEnum +from typing import Any + + +class AwsCborElementType(IntEnum): + Int = 0 + Float = 1 + String = 2 + Map = 3 + Array = 4 + NULL = 5 + + +class AwsCborEncoder(NativeResource): + """ Encoder for CBOR """ + + def __init__(self): + super().__init__() + self._binding = _awscrt.cbor_encoder_new() + + def get_encoded_data(self) -> bytes: + return _awscrt.cbor_encoder_get_encoded_data(self._binding) + + def add_int(self, val: int): + """Add int to encode, -2^64 to 2^64 inclusive. Otherwise, overflow will be raised. + + Args: + val (int): _description_ + + Returns: + _type_: _description_ + """ + if val < -2**64 or val > 2**64: + raise OverflowError(f"{val} is overflowed to be encoded into cbor integers") + + if val >= 0: + return _awscrt.cbor_encoder_encode_unsigned_int(self._binding, val) + else: + return _awscrt.cbor_encoder_encode_negative_int(self._binding, -1 - val) + + def add_float(self, val: float): + """Adding a "double" to encode + Rely on `PyFloat_AsDouble()` for error checking. + Args: + val (float): _description_ + """ + return _awscrt.cbor_encoder_encode_float(self._binding, val) + + def add_bytes(self, val: bytes): + return _awscrt.cbor_encoder_encode_bytes(self._binding, val) + + def add_string(self, val: str): + return _awscrt.cbor_encoder_encode_str(self._binding, val) + + def add_array_start(self, number_entries: int): + """Add a start of array element, with the `number_entries` + for the cbor data items to be included in the array. + `number_entries` should 0 to 2^64 inclusive. + Otherwise, overflow will be raised. + + Args: + number_entries (int): _description_ + + Returns: + _type_: _description_ + """ + if number_entries < 0 or number_entries > 2**64: + raise OverflowError() + + return _awscrt.cbor_encoder_encode_array_start(self._binding, number_entries) + + def add_map_start(self, number_entries: int): + """Add a start of map element, with the `number_entries` + for the number of pair of cbor data items to be included in the map. + `number_entries` should 0 to 2^64 inclusive. + Otherwise, overflow will be raised. + + Args: + number_entries (int): _description_ + + Returns: + _type_: _description_ + """ + if number_entries < 0 or number_entries > 2**64: + raise ValueError() + + return _awscrt.cbor_encoder_encode_map_start(self._binding, number_entries) + + def add_tag(self, tag_number: int): + if tag_number < 0 or tag_number > 2**64: + raise ValueError() + + return _awscrt.cbor_encoder_encode_tag(self._binding, tag_number) + + def add_null(self): + return _awscrt.cbor_encoder_encode_simple_types(self._binding, AwsCborElementType.NULL) + + +class AwsCborDecoder(NativeResource): + """ Decoder for CBOR """ + + def __init__(self, src: bytes): + super().__init__() + self._src = src + self._binding = _awscrt.cbor_decoder_new(src) + + def peek_next_type(self) -> AwsCborElementType: + return _awscrt.cbor_decoder_peek_type(self._binding) + + def get_remaining_bytes_len(self) -> int: + return _awscrt.cbor_decoder_get_remaining_bytes_len(self._binding) + + def consume_next_element(self): + return _awscrt.cbor_decoder_consume_next_element(self._binding) + + def consume_next_data_item(self): + return _awscrt.cbor_decoder_consume_next_data_item(self._binding) + + def get_next_unsigned_int(self) -> int: + return _awscrt.cbor_decoder_get_next_unsigned_int(self._binding) + + def get_next_negative_int(self) -> int: + val = _awscrt.cbor_decoder_get_next_negative_int(self._binding) + return -1 - val + + def get_next_double(self) -> float: + return _awscrt.cbor_decoder_get_next_double(self._binding) + + def get_next_bool(self) -> bool: + return _awscrt.cbor_decoder_get_next_bool(self._binding) + + def get_next_bytes(self) -> bytes: + return _awscrt.cbor_decoder_get_next_bytes(self._binding) + + def get_next_str(self) -> str: + return _awscrt.cbor_decoder_get_next_str(self._binding) + + def get_next_array_start(self) -> int: + return _awscrt.cbor_decoder_get_next_array_start(self._binding) + + def get_next_map_start(self) -> int: + return _awscrt.cbor_decoder_get_next_map_start(self._binding) + + def get_next_tag_val(self) -> int: + return _awscrt.cbor_decoder_get_next_tag_val(self._binding) diff --git a/crt/aws-c-common b/crt/aws-c-common index fcadc0dd5..7387d9022 160000 --- a/crt/aws-c-common +++ b/crt/aws-c-common @@ -1 +1 @@ -Subproject commit fcadc0dd5d8a26134c8bbf08c58e30eff50d177b +Subproject commit 7387d9022933a4b74ea6a4e037df2b9b7bcac03d diff --git a/source/cbor.c b/source/cbor.c new file mode 100644 index 000000000..1debc44d7 --- /dev/null +++ b/source/cbor.c @@ -0,0 +1,305 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ +#include "cbor.h" + +#include + +/******************************************************************************* + * ENCODE + ******************************************************************************/ + +static const char *s_capsule_name_cbor_encoder = "aws_cbor_encoder"; + +static struct aws_cbor_encoder *s_cbor_encoder_from_capsule(PyObject *py_capsule) { + return PyCapsule_GetPointer(py_capsule, s_capsule_name_cbor_encoder); +} + +/* Runs when GC destroys the capsule */ +static void s_cbor_encoder_capsule_destructor(PyObject *py_capsule) { + struct aws_cbor_encoder *encoder = s_cbor_encoder_from_capsule(py_capsule); + aws_cbor_encoder_release(encoder); +} + +PyObject *aws_py_cbor_encoder_new(PyObject *self, PyObject *args) { + (void)self; + (void)args; + + struct aws_cbor_encoder *encoder = aws_cbor_encoder_new(aws_py_get_allocator(), 128); + AWS_ASSERT(encoder != NULL); + PyObject *py_capsule = PyCapsule_New(encoder, s_capsule_name_cbor_encoder, s_cbor_encoder_capsule_destructor); + if (!py_capsule) { + aws_cbor_encoder_release(encoder); + return NULL; + } + + return py_capsule; +} +#define S_ENCODER_METHOD_START(FMT, ...) \ + (void)self; \ + PyObject *py_capsule; \ + if (!PyArg_ParseTuple(args, "O" FMT, &py_capsule, __VA_ARGS__)) { \ + return NULL; \ + } \ + struct aws_cbor_encoder *encoder = s_cbor_encoder_from_capsule(py_capsule); \ + if (!encoder) { \ + return NULL; \ + } + +PyObject *aws_py_cbor_encoder_get_encoded_data(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_capsule; + if (!PyArg_ParseTuple(args, "O", &py_capsule)) { + return NULL; + } + struct aws_cbor_encoder *encoder = s_cbor_encoder_from_capsule(py_capsule); + if (!encoder) { + return NULL; + } + struct aws_byte_cursor encoded_data = aws_cbor_encoder_get_encoded_data(encoder); + if (encoded_data.len == 0) { + /* TODO: probably better to be empty instead of None?? */ + Py_RETURN_NONE; + } + return PyBytes_FromStringAndSize((const char *)encoded_data.ptr, encoded_data.len); +} + +PyObject *aws_py_cbor_encoder_encode_unsigned_int(PyObject *self, PyObject *args) { + PyObject *pylong; + S_ENCODER_METHOD_START("O", &pylong); + uint64_t data = PyLong_AsUnsignedLongLong(pylong); + /* The python code has already checked the value */ + AWS_ASSERT(!PyErr_Occurred()); + aws_cbor_encode_uint(encoder, data); + Py_RETURN_NONE; +} + +PyObject *aws_py_cbor_encoder_encode_negative_int(PyObject *self, PyObject *args) { + PyObject *pylong; + S_ENCODER_METHOD_START("O", &pylong); + uint64_t data = PyLong_AsUnsignedLongLong(pylong); + /* The python code has already checked the value */ + AWS_ASSERT(!PyErr_Occurred()); + aws_cbor_encode_negint(encoder, data); + Py_RETURN_NONE; +} + +PyObject *aws_py_cbor_encoder_encode_float(PyObject *self, PyObject *args) { + PyObject *pyfloat; + S_ENCODER_METHOD_START("O", &pyfloat); + double data = PyFloat_AsDouble(pyfloat); + /* Rely on the python convert to check the pyfloat is able to convert to double. */ + if (PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.add_float is not a valid double to encode"); + return NULL; + } + aws_cbor_encode_double(encoder, data); + Py_RETURN_NONE; +} + +PyObject *aws_py_cbor_encoder_encode_bytes(PyObject *self, PyObject *args) { + struct aws_byte_cursor bytes_data; + S_ENCODER_METHOD_START("y#", &bytes_data.ptr, &bytes_data.len); + aws_cbor_encode_bytes(encoder, bytes_data); + Py_RETURN_NONE; +} + +PyObject *aws_py_cbor_encoder_encode_str(PyObject *self, PyObject *args) { + struct aws_byte_cursor str_data; + S_ENCODER_METHOD_START("s#", &str_data.ptr, &str_data.len); + aws_cbor_encode_string(encoder, str_data); + Py_RETURN_NONE; +} + +PyObject *aws_py_cbor_encoder_encode_array_start(PyObject *self, PyObject *args) { + PyObject *pylong; + S_ENCODER_METHOD_START("O", &pylong); + uint64_t data = PyLong_AsUnsignedLongLong(pylong); + /* The python code has already checked the value */ + AWS_ASSERT(!PyErr_Occurred()); + aws_cbor_encode_array_start(encoder, data); + Py_RETURN_NONE; +} + +PyObject *aws_py_cbor_encoder_encode_map_start(PyObject *self, PyObject *args) { + PyObject *pylong; + S_ENCODER_METHOD_START("O", &pylong); + uint64_t data = PyLong_AsUnsignedLongLong(pylong); + /* The python code has already checked the value */ + AWS_ASSERT(!PyErr_Occurred()); + aws_cbor_encode_map_start(encoder, data); + Py_RETURN_NONE; +} + +PyObject *aws_py_cbor_encoder_encode_tag(PyObject *self, PyObject *args) { + PyObject *pylong; + S_ENCODER_METHOD_START("O", &pylong); + uint64_t data = PyLong_AsUnsignedLongLong(pylong); + /* The python code has already checked the value */ + AWS_ASSERT(!PyErr_Occurred()); + aws_cbor_encode_tag(encoder, data); + Py_RETURN_NONE; +} + +PyObject *aws_py_cbor_encoder_encode_simple_types(PyObject *self, PyObject *args) { + Py_ssize_t type_enum; + S_ENCODER_METHOD_START("n", &type_enum); + switch (type_enum) { + case 5: + aws_cbor_encode_null(encoder); + break; + + default: + Py_RETURN_NONE; + break; + } + Py_RETURN_NONE; +} + +/******************************************************************************* + * DECODE + ******************************************************************************/ + +static const char *s_capsule_name_cbor_decoder = "aws_cbor_decoder"; + +static struct aws_cbor_decoder *s_cbor_decoder_from_capsule(PyObject *py_capsule) { + return PyCapsule_GetPointer(py_capsule, s_capsule_name_cbor_decoder); +} +/* Runs when GC destroys the capsule */ +static void s_cbor_decoder_capsule_destructor(PyObject *py_capsule) { + struct aws_cbor_decoder *decoder = s_cbor_decoder_from_capsule(py_capsule); + aws_cbor_decoder_release(decoder); +} + +PyObject *aws_py_cbor_decoder_new(PyObject *self, PyObject *args) { + (void)self; + /* The python object will keep the src alive from python. */ + struct aws_byte_cursor src; /* s# */ + + if (!PyArg_ParseTuple(args, "s#", &src.ptr, &src.len)) { + return NULL; + } + + struct aws_cbor_decoder *decoder = aws_cbor_decoder_new(aws_py_get_allocator(), &src); + AWS_ASSERT(decoder != NULL); + PyObject *py_capsule = PyCapsule_New(decoder, s_capsule_name_cbor_decoder, s_cbor_decoder_capsule_destructor); + if (!py_capsule) { + aws_cbor_decoder_release(decoder); + return NULL; + } + + return py_capsule; +} + +#define S_DECODER_METHOD_START(decoder_func, out_val) \ + (void)self; \ + PyObject *py_capsule; \ + if (!PyArg_ParseTuple(args, "O", &py_capsule)) { \ + return NULL; \ + } \ + struct aws_cbor_decoder *decoder = s_cbor_decoder_from_capsule(py_capsule); \ + if (!decoder) { \ + return NULL; \ + } \ + if (decoder_func(decoder, &out_val)) { \ + return PyErr_AwsLastError(); \ + } + +PyObject *aws_py_cbor_decoder_peek_type(PyObject *self, PyObject *args) { + enum aws_cbor_element_type out_type; + S_DECODER_METHOD_START(aws_cbor_decode_peek_type, out_type); + /* TODO: an convert from C type to the Python type */ + Py_RETURN_NONE; +} + +PyObject *aws_py_cbor_decoder_get_remaining_bytes_len(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_capsule; + if (!PyArg_ParseTuple(args, "O", &py_capsule)) { + return NULL; + } + struct aws_cbor_decoder *decoder = s_cbor_decoder_from_capsule(py_capsule); + if (!decoder) { + return NULL; + } + size_t remaining_len = aws_cbor_decoder_get_remaining_length(decoder); + return PyLong_FromSize_t(remaining_len); +} + +PyObject *aws_py_cbor_decoder_consume_next_element(PyObject *self, PyObject *args) { + enum aws_cbor_element_type out_type; + S_DECODER_METHOD_START(aws_cbor_decode_consume_next_element, out_type); + /* TODO: an convert from C type to the Python type */ + Py_RETURN_NONE; +} + +PyObject *aws_py_cbor_decoder_consume_next_data_item(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_capsule; + if (!PyArg_ParseTuple(args, "O", &py_capsule)) { + return NULL; + } + struct aws_cbor_decoder *decoder = s_cbor_decoder_from_capsule(py_capsule); + if (!decoder) { + return NULL; + } + if (aws_cbor_decode_consume_next_data_item(decoder)) { + return PyErr_AwsLastError(); + } + Py_RETURN_NONE; +} + +PyObject *aws_py_cbor_decoder_get_next_unsigned_int(PyObject *self, PyObject *args) { + uint64_t out_val; + S_DECODER_METHOD_START(aws_cbor_decode_get_next_unsigned_val, out_val); + return PyLong_FromUnsignedLongLong(out_val); +} + +PyObject *aws_py_cbor_decoder_get_next_negative_int(PyObject *self, PyObject *args) { + uint64_t out_val; + S_DECODER_METHOD_START(aws_cbor_decode_get_next_neg_val, out_val); + return PyLong_FromUnsignedLongLong(out_val); +} + +PyObject *aws_py_cbor_decoder_get_next_double(PyObject *self, PyObject *args) { + double out_val; + S_DECODER_METHOD_START(aws_cbor_decode_get_next_double_val, out_val); + return PyFloat_FromDouble(out_val); +} + +PyObject *aws_py_cbor_decoder_get_next_bool(PyObject *self, PyObject *args) { + bool out_val; + S_DECODER_METHOD_START(aws_cbor_decode_get_next_boolean_val, out_val); + return PyBool_FromLong(out_val); +} + +PyObject *aws_py_cbor_decoder_get_next_bytes(PyObject *self, PyObject *args) { + struct aws_byte_cursor out_val; + S_DECODER_METHOD_START(aws_cbor_decode_get_next_bytes_val, out_val); + return PyBytes_FromAwsByteCursor(&out_val); +} + +PyObject *aws_py_cbor_decoder_get_next_str(PyObject *self, PyObject *args) { + struct aws_byte_cursor out_val; + S_DECODER_METHOD_START(aws_cbor_decode_get_next_str_val, out_val); + return PyUnicode_FromAwsByteCursor(&out_val); +} + +PyObject *aws_py_cbor_decoder_get_next_array_start(PyObject *self, PyObject *args) { + uint64_t out_val; + S_DECODER_METHOD_START(aws_cbor_decode_get_next_array_start, out_val); + return PyLong_FromUnsignedLongLong(out_val); +} + +PyObject *aws_py_cbor_decoder_get_next_map_start(PyObject *self, PyObject *args) { + uint64_t out_val; + S_DECODER_METHOD_START(aws_cbor_decode_get_next_map_start, out_val); + return PyLong_FromUnsignedLongLong(out_val); +} + +PyObject *aws_py_cbor_decoder_get_next_tag_val(PyObject *self, PyObject *args) { + uint64_t out_val; + S_DECODER_METHOD_START(aws_cbor_decode_get_next_tag_val, out_val); + return PyLong_FromUnsignedLongLong(out_val); +} \ No newline at end of file diff --git a/source/cbor.h b/source/cbor.h new file mode 100644 index 000000000..94c09322e --- /dev/null +++ b/source/cbor.h @@ -0,0 +1,48 @@ +#ifndef AWS_CRT_PYTHON_CBOR_H +# define AWS_CRT_PYTHON_CBOR_H +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ +# include "module.h" + +/******************************************************************************* + * ENCODE + ******************************************************************************/ + +PyObject *aws_py_cbor_encoder_new(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_get_encoded_data(PyObject *self, PyObject *args); + +PyObject *aws_py_cbor_encoder_encode_unsigned_int(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_encode_negative_int(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_encode_float(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_encode_bytes(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_encode_str(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_encode_array_start(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_encode_map_start(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_encode_tag(PyObject *self, PyObject *args); + +/* Encode the types without value needed. The arg is the type to encode. */ +PyObject *aws_py_cbor_encoder_encode_simple_types(PyObject *self, PyObject *args); + +/******************************************************************************* + * DECODE + ******************************************************************************/ + +PyObject *aws_py_cbor_decoder_new(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_peek_type(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_get_remaining_bytes_len(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_consume_next_element(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_consume_next_data_item(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_get_next_unsigned_int(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_get_next_negative_int(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_get_next_double(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_get_next_bool(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_get_next_bytes(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_get_next_str(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_get_next_array_start(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_get_next_map_start(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_get_next_tag_val(PyObject *self, PyObject *args); + +#endif /* AWS_CRT_PYTHON_CBOR_H */ + // PyObject *aws_py_(PyObject *self, PyObject *args); \ No newline at end of file diff --git a/source/module.c b/source/module.c index 0648957e5..0b208d618 100644 --- a/source/module.c +++ b/source/module.c @@ -5,6 +5,7 @@ #include "module.h" #include "auth.h" +#include "cbor.h" #include "checksums.h" #include "common.h" #include "crypto.h" @@ -116,6 +117,14 @@ PyObject *PyUnicode_FromAwsString(const struct aws_string *aws_str) { return PyUnicode_FromStringAndSize(aws_string_c_str(aws_str), aws_str->len); } +PyObject *PyBytes_FromAwsByteCursor(const struct aws_byte_cursor *cursor) { + if (cursor->len > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, "Cursor exceeds PY_SSIZE_T_MAX"); + return NULL; + } + return PyBytes_FromStringAndSize((const char *)cursor->ptr, (Py_ssize_t)cursor->len); +} + uint32_t PyObject_GetAttrAsUint32(PyObject *o, const char *class_name, const char *attr_name) { uint32_t result = UINT32_MAX; @@ -814,6 +823,34 @@ static PyMethodDef s_module_methods[] = { AWS_PY_METHOD_DEF(websocket_increment_read_window, METH_VARARGS), AWS_PY_METHOD_DEF(websocket_create_handshake_request, METH_VARARGS), + /* CBOR Encode */ + AWS_PY_METHOD_DEF(cbor_encoder_new, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_get_encoded_data, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_encode_unsigned_int, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_encode_negative_int, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_encode_float, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_encode_bytes, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_encode_str, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_encode_array_start, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_encode_map_start, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_encode_tag, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_encode_simple_types, METH_VARARGS), + + /* CBOR Decode */ + AWS_PY_METHOD_DEF(cbor_decoder_new, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_peek_type, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_get_remaining_bytes_len, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_consume_next_element, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_consume_next_data_item, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_get_next_unsigned_int, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_get_next_negative_int, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_get_next_double, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_get_next_bool, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_get_next_bytes, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_get_next_str, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_get_next_array_start, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_get_next_map_start, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_get_next_tag_val, METH_VARARGS), {NULL, NULL, 0, NULL}, }; diff --git a/source/module.h b/source/module.h index d7254d8c4..49d72346d 100644 --- a/source/module.h +++ b/source/module.h @@ -30,6 +30,7 @@ enum aws_crt_python_errors { /* AWS Specific Helpers */ PyObject *PyUnicode_FromAwsByteCursor(const struct aws_byte_cursor *cursor); PyObject *PyUnicode_FromAwsString(const struct aws_string *aws_str); +PyObject *PyBytes_FromAwsByteCursor(const struct aws_byte_cursor *cursor); /* Return the named attribute, converted to the specified type. * If conversion cannot occur a python exception is set (check PyExc_Occurred()) */ From 4c5f80f65dfbb6aaa856234918bf7350860736f3 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Mon, 15 Apr 2024 15:43:43 -0700 Subject: [PATCH 02/20] renaming stuff --- awscrt/cbor.py | 34 +++++++++++++------------- crt/aws-c-common | 2 +- source/cbor.c | 62 ++++++++++++++++++++++++------------------------ source/cbor.h | 25 ++++++++++--------- source/module.c | 18 +++++++------- 5 files changed, 70 insertions(+), 71 deletions(-) diff --git a/awscrt/cbor.py b/awscrt/cbor.py index ba464d577..90e524d12 100644 --- a/awscrt/cbor.py +++ b/awscrt/cbor.py @@ -27,7 +27,7 @@ def __init__(self): def get_encoded_data(self) -> bytes: return _awscrt.cbor_encoder_get_encoded_data(self._binding) - def add_int(self, val: int): + def write_int(self, val: int): """Add int to encode, -2^64 to 2^64 inclusive. Otherwise, overflow will be raised. Args: @@ -40,25 +40,25 @@ def add_int(self, val: int): raise OverflowError(f"{val} is overflowed to be encoded into cbor integers") if val >= 0: - return _awscrt.cbor_encoder_encode_unsigned_int(self._binding, val) + return _awscrt.cbor_encoder_write_unsigned_int(self._binding, val) else: - return _awscrt.cbor_encoder_encode_negative_int(self._binding, -1 - val) + return _awscrt.cbor_encoder_write_negative_int(self._binding, -1 - val) - def add_float(self, val: float): + def write_float(self, val: float): """Adding a "double" to encode Rely on `PyFloat_AsDouble()` for error checking. Args: val (float): _description_ """ - return _awscrt.cbor_encoder_encode_float(self._binding, val) + return _awscrt.cbor_encoder_write_float(self._binding, val) - def add_bytes(self, val: bytes): - return _awscrt.cbor_encoder_encode_bytes(self._binding, val) + def write_bytes(self, val: bytes): + return _awscrt.cbor_encoder_write_bytes(self._binding, val) - def add_string(self, val: str): - return _awscrt.cbor_encoder_encode_str(self._binding, val) + def write_string(self, val: str): + return _awscrt.cbor_encoder_write_str(self._binding, val) - def add_array_start(self, number_entries: int): + def write_array_start(self, number_entries: int): """Add a start of array element, with the `number_entries` for the cbor data items to be included in the array. `number_entries` should 0 to 2^64 inclusive. @@ -73,9 +73,9 @@ def add_array_start(self, number_entries: int): if number_entries < 0 or number_entries > 2**64: raise OverflowError() - return _awscrt.cbor_encoder_encode_array_start(self._binding, number_entries) + return _awscrt.cbor_encoder_write_array_start(self._binding, number_entries) - def add_map_start(self, number_entries: int): + def write_map_start(self, number_entries: int): """Add a start of map element, with the `number_entries` for the number of pair of cbor data items to be included in the map. `number_entries` should 0 to 2^64 inclusive. @@ -90,16 +90,16 @@ def add_map_start(self, number_entries: int): if number_entries < 0 or number_entries > 2**64: raise ValueError() - return _awscrt.cbor_encoder_encode_map_start(self._binding, number_entries) + return _awscrt.cbor_encoder_write_map_start(self._binding, number_entries) - def add_tag(self, tag_number: int): + def write_tag(self, tag_number: int): if tag_number < 0 or tag_number > 2**64: raise ValueError() - return _awscrt.cbor_encoder_encode_tag(self._binding, tag_number) + return _awscrt.cbor_encoder_write_tag(self._binding, tag_number) - def add_null(self): - return _awscrt.cbor_encoder_encode_simple_types(self._binding, AwsCborElementType.NULL) + def write_null(self): + return _awscrt.cbor_encoder_write_simple_types(self._binding, AwsCborElementType.NULL) class AwsCborDecoder(NativeResource): diff --git a/crt/aws-c-common b/crt/aws-c-common index 7387d9022..3859f2737 160000 --- a/crt/aws-c-common +++ b/crt/aws-c-common @@ -1 +1 @@ -Subproject commit 7387d9022933a4b74ea6a4e037df2b9b7bcac03d +Subproject commit 3859f27370996f0f16abb129f4f6b54c89c22147 diff --git a/source/cbor.c b/source/cbor.c index 1debc44d7..0b89ece47 100644 --- a/source/cbor.c +++ b/source/cbor.c @@ -26,7 +26,7 @@ PyObject *aws_py_cbor_encoder_new(PyObject *self, PyObject *args) { (void)self; (void)args; - struct aws_cbor_encoder *encoder = aws_cbor_encoder_new(aws_py_get_allocator(), 128); + struct aws_cbor_encoder *encoder = aws_cbor_encoder_new(aws_py_get_allocator(), NULL); AWS_ASSERT(encoder != NULL); PyObject *py_capsule = PyCapsule_New(encoder, s_capsule_name_cbor_encoder, s_cbor_encoder_capsule_destructor); if (!py_capsule) { @@ -65,27 +65,27 @@ PyObject *aws_py_cbor_encoder_get_encoded_data(PyObject *self, PyObject *args) { return PyBytes_FromStringAndSize((const char *)encoded_data.ptr, encoded_data.len); } -PyObject *aws_py_cbor_encoder_encode_unsigned_int(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_encoder_write_unsigned_int(PyObject *self, PyObject *args) { PyObject *pylong; S_ENCODER_METHOD_START("O", &pylong); uint64_t data = PyLong_AsUnsignedLongLong(pylong); /* The python code has already checked the value */ AWS_ASSERT(!PyErr_Occurred()); - aws_cbor_encode_uint(encoder, data); + aws_cbor_encoder_write_uint(encoder, data); Py_RETURN_NONE; } -PyObject *aws_py_cbor_encoder_encode_negative_int(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_encoder_write_negative_int(PyObject *self, PyObject *args) { PyObject *pylong; S_ENCODER_METHOD_START("O", &pylong); uint64_t data = PyLong_AsUnsignedLongLong(pylong); /* The python code has already checked the value */ AWS_ASSERT(!PyErr_Occurred()); - aws_cbor_encode_negint(encoder, data); + aws_cbor_encoder_write_negint(encoder, data); Py_RETURN_NONE; } -PyObject *aws_py_cbor_encoder_encode_float(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_encoder_write_float(PyObject *self, PyObject *args) { PyObject *pyfloat; S_ENCODER_METHOD_START("O", &pyfloat); double data = PyFloat_AsDouble(pyfloat); @@ -94,60 +94,60 @@ PyObject *aws_py_cbor_encoder_encode_float(PyObject *self, PyObject *args) { PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.add_float is not a valid double to encode"); return NULL; } - aws_cbor_encode_double(encoder, data); + aws_cbor_encoder_write_double(encoder, data); Py_RETURN_NONE; } -PyObject *aws_py_cbor_encoder_encode_bytes(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_encoder_write_bytes(PyObject *self, PyObject *args) { struct aws_byte_cursor bytes_data; S_ENCODER_METHOD_START("y#", &bytes_data.ptr, &bytes_data.len); - aws_cbor_encode_bytes(encoder, bytes_data); + aws_cbor_encoder_write_bytes(encoder, bytes_data); Py_RETURN_NONE; } -PyObject *aws_py_cbor_encoder_encode_str(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_encoder_write_str(PyObject *self, PyObject *args) { struct aws_byte_cursor str_data; S_ENCODER_METHOD_START("s#", &str_data.ptr, &str_data.len); - aws_cbor_encode_string(encoder, str_data); + aws_cbor_encoder_write_string(encoder, str_data); Py_RETURN_NONE; } -PyObject *aws_py_cbor_encoder_encode_array_start(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_encoder_write_array_start(PyObject *self, PyObject *args) { PyObject *pylong; S_ENCODER_METHOD_START("O", &pylong); uint64_t data = PyLong_AsUnsignedLongLong(pylong); /* The python code has already checked the value */ AWS_ASSERT(!PyErr_Occurred()); - aws_cbor_encode_array_start(encoder, data); + aws_cbor_encoder_write_array_start(encoder, data); Py_RETURN_NONE; } -PyObject *aws_py_cbor_encoder_encode_map_start(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_encoder_write_map_start(PyObject *self, PyObject *args) { PyObject *pylong; S_ENCODER_METHOD_START("O", &pylong); uint64_t data = PyLong_AsUnsignedLongLong(pylong); /* The python code has already checked the value */ AWS_ASSERT(!PyErr_Occurred()); - aws_cbor_encode_map_start(encoder, data); + aws_cbor_encoder_write_map_start(encoder, data); Py_RETURN_NONE; } -PyObject *aws_py_cbor_encoder_encode_tag(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_encoder_write_tag(PyObject *self, PyObject *args) { PyObject *pylong; S_ENCODER_METHOD_START("O", &pylong); uint64_t data = PyLong_AsUnsignedLongLong(pylong); /* The python code has already checked the value */ AWS_ASSERT(!PyErr_Occurred()); - aws_cbor_encode_tag(encoder, data); + aws_cbor_encoder_write_tag(encoder, data); Py_RETURN_NONE; } -PyObject *aws_py_cbor_encoder_encode_simple_types(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_encoder_write_simple_types(PyObject *self, PyObject *args) { Py_ssize_t type_enum; S_ENCODER_METHOD_START("n", &type_enum); switch (type_enum) { case 5: - aws_cbor_encode_null(encoder); + aws_cbor_encoder_write_null(encoder); break; default: @@ -208,7 +208,7 @@ PyObject *aws_py_cbor_decoder_new(PyObject *self, PyObject *args) { PyObject *aws_py_cbor_decoder_peek_type(PyObject *self, PyObject *args) { enum aws_cbor_element_type out_type; - S_DECODER_METHOD_START(aws_cbor_decode_peek_type, out_type); + S_DECODER_METHOD_START(aws_cbor_decoder_peek_type, out_type); /* TODO: an convert from C type to the Python type */ Py_RETURN_NONE; } @@ -229,7 +229,7 @@ PyObject *aws_py_cbor_decoder_get_remaining_bytes_len(PyObject *self, PyObject * PyObject *aws_py_cbor_decoder_consume_next_element(PyObject *self, PyObject *args) { enum aws_cbor_element_type out_type; - S_DECODER_METHOD_START(aws_cbor_decode_consume_next_element, out_type); + S_DECODER_METHOD_START(aws_cbor_decoder_consume_next_element, out_type); /* TODO: an convert from C type to the Python type */ Py_RETURN_NONE; } @@ -244,7 +244,7 @@ PyObject *aws_py_cbor_decoder_consume_next_data_item(PyObject *self, PyObject *a if (!decoder) { return NULL; } - if (aws_cbor_decode_consume_next_data_item(decoder)) { + if (aws_cbor_decoder_consume_next_data_item(decoder)) { return PyErr_AwsLastError(); } Py_RETURN_NONE; @@ -252,54 +252,54 @@ PyObject *aws_py_cbor_decoder_consume_next_data_item(PyObject *self, PyObject *a PyObject *aws_py_cbor_decoder_get_next_unsigned_int(PyObject *self, PyObject *args) { uint64_t out_val; - S_DECODER_METHOD_START(aws_cbor_decode_get_next_unsigned_val, out_val); + S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_unsigned_val, out_val); return PyLong_FromUnsignedLongLong(out_val); } PyObject *aws_py_cbor_decoder_get_next_negative_int(PyObject *self, PyObject *args) { uint64_t out_val; - S_DECODER_METHOD_START(aws_cbor_decode_get_next_neg_val, out_val); + S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_neg_val, out_val); return PyLong_FromUnsignedLongLong(out_val); } PyObject *aws_py_cbor_decoder_get_next_double(PyObject *self, PyObject *args) { double out_val; - S_DECODER_METHOD_START(aws_cbor_decode_get_next_double_val, out_val); + S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_double_val, out_val); return PyFloat_FromDouble(out_val); } PyObject *aws_py_cbor_decoder_get_next_bool(PyObject *self, PyObject *args) { bool out_val; - S_DECODER_METHOD_START(aws_cbor_decode_get_next_boolean_val, out_val); + S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_boolean_val, out_val); return PyBool_FromLong(out_val); } PyObject *aws_py_cbor_decoder_get_next_bytes(PyObject *self, PyObject *args) { struct aws_byte_cursor out_val; - S_DECODER_METHOD_START(aws_cbor_decode_get_next_bytes_val, out_val); + S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_bytes_val, out_val); return PyBytes_FromAwsByteCursor(&out_val); } PyObject *aws_py_cbor_decoder_get_next_str(PyObject *self, PyObject *args) { struct aws_byte_cursor out_val; - S_DECODER_METHOD_START(aws_cbor_decode_get_next_str_val, out_val); + S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_str_val, out_val); return PyUnicode_FromAwsByteCursor(&out_val); } PyObject *aws_py_cbor_decoder_get_next_array_start(PyObject *self, PyObject *args) { uint64_t out_val; - S_DECODER_METHOD_START(aws_cbor_decode_get_next_array_start, out_val); + S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_array_start, out_val); return PyLong_FromUnsignedLongLong(out_val); } PyObject *aws_py_cbor_decoder_get_next_map_start(PyObject *self, PyObject *args) { uint64_t out_val; - S_DECODER_METHOD_START(aws_cbor_decode_get_next_map_start, out_val); + S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_map_start, out_val); return PyLong_FromUnsignedLongLong(out_val); } PyObject *aws_py_cbor_decoder_get_next_tag_val(PyObject *self, PyObject *args) { uint64_t out_val; - S_DECODER_METHOD_START(aws_cbor_decode_get_next_tag_val, out_val); + S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_tag_val, out_val); return PyLong_FromUnsignedLongLong(out_val); } \ No newline at end of file diff --git a/source/cbor.h b/source/cbor.h index 94c09322e..ef25a13af 100644 --- a/source/cbor.h +++ b/source/cbor.h @@ -1,10 +1,10 @@ #ifndef AWS_CRT_PYTHON_CBOR_H -# define AWS_CRT_PYTHON_CBOR_H +#define AWS_CRT_PYTHON_CBOR_H /** * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0. */ -# include "module.h" +#include "module.h" /******************************************************************************* * ENCODE @@ -13,17 +13,17 @@ PyObject *aws_py_cbor_encoder_new(PyObject *self, PyObject *args); PyObject *aws_py_cbor_encoder_get_encoded_data(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_encoder_encode_unsigned_int(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_encoder_encode_negative_int(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_encoder_encode_float(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_encoder_encode_bytes(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_encoder_encode_str(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_encoder_encode_array_start(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_encoder_encode_map_start(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_encoder_encode_tag(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_unsigned_int(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_negative_int(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_float(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_bytes(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_str(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_array_start(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_map_start(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_tag(PyObject *self, PyObject *args); /* Encode the types without value needed. The arg is the type to encode. */ -PyObject *aws_py_cbor_encoder_encode_simple_types(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_simple_types(PyObject *self, PyObject *args); /******************************************************************************* * DECODE @@ -44,5 +44,4 @@ PyObject *aws_py_cbor_decoder_get_next_array_start(PyObject *self, PyObject *arg PyObject *aws_py_cbor_decoder_get_next_map_start(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_get_next_tag_val(PyObject *self, PyObject *args); -#endif /* AWS_CRT_PYTHON_CBOR_H */ - // PyObject *aws_py_(PyObject *self, PyObject *args); \ No newline at end of file +#endif /* AWS_CRT_PYTHON_CBOR_H */ \ No newline at end of file diff --git a/source/module.c b/source/module.c index 0b208d618..ed2d9838c 100644 --- a/source/module.c +++ b/source/module.c @@ -826,15 +826,15 @@ static PyMethodDef s_module_methods[] = { /* CBOR Encode */ AWS_PY_METHOD_DEF(cbor_encoder_new, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_encoder_get_encoded_data, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_encoder_encode_unsigned_int, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_encoder_encode_negative_int, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_encoder_encode_float, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_encoder_encode_bytes, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_encoder_encode_str, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_encoder_encode_array_start, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_encoder_encode_map_start, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_encoder_encode_tag, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_encoder_encode_simple_types, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_unsigned_int, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_negative_int, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_float, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_bytes, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_str, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_array_start, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_map_start, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_tag, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_simple_types, METH_VARARGS), /* CBOR Decode */ AWS_PY_METHOD_DEF(cbor_decoder_new, METH_VARARGS), From 36ca2b567e680ff9dcdd386fa8529f2fba423b7f Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Mon, 15 Apr 2024 15:47:09 -0700 Subject: [PATCH 03/20] get next -> pop next --- awscrt/cbor.py | 36 ++++++++++++++++++------------------ crt/s2n | 2 +- source/cbor.c | 18 +++++++++--------- source/cbor.h | 20 ++++++++++---------- source/module.c | 18 +++++++++--------- 5 files changed, 47 insertions(+), 47 deletions(-) diff --git a/awscrt/cbor.py b/awscrt/cbor.py index 90e524d12..bc3ce4702 100644 --- a/awscrt/cbor.py +++ b/awscrt/cbor.py @@ -122,30 +122,30 @@ def consume_next_element(self): def consume_next_data_item(self): return _awscrt.cbor_decoder_consume_next_data_item(self._binding) - def get_next_unsigned_int(self) -> int: - return _awscrt.cbor_decoder_get_next_unsigned_int(self._binding) + def pop_next_unsigned_int(self) -> int: + return _awscrt.cbor_decoder_pop_next_unsigned_int(self._binding) - def get_next_negative_int(self) -> int: - val = _awscrt.cbor_decoder_get_next_negative_int(self._binding) + def pop_next_negative_int(self) -> int: + val = _awscrt.cbor_decoder_pop_next_negative_int(self._binding) return -1 - val - def get_next_double(self) -> float: - return _awscrt.cbor_decoder_get_next_double(self._binding) + def pop_next_double(self) -> float: + return _awscrt.cbor_decoder_pop_next_double(self._binding) - def get_next_bool(self) -> bool: - return _awscrt.cbor_decoder_get_next_bool(self._binding) + def pop_next_bool(self) -> bool: + return _awscrt.cbor_decoder_pop_next_bool(self._binding) - def get_next_bytes(self) -> bytes: - return _awscrt.cbor_decoder_get_next_bytes(self._binding) + def pop_next_bytes(self) -> bytes: + return _awscrt.cbor_decoder_pop_next_bytes(self._binding) - def get_next_str(self) -> str: - return _awscrt.cbor_decoder_get_next_str(self._binding) + def pop_next_str(self) -> str: + return _awscrt.cbor_decoder_pop_next_str(self._binding) - def get_next_array_start(self) -> int: - return _awscrt.cbor_decoder_get_next_array_start(self._binding) + def pop_next_array_start(self) -> int: + return _awscrt.cbor_decoder_pop_next_array_start(self._binding) - def get_next_map_start(self) -> int: - return _awscrt.cbor_decoder_get_next_map_start(self._binding) + def pop_next_map_start(self) -> int: + return _awscrt.cbor_decoder_pop_next_map_start(self._binding) - def get_next_tag_val(self) -> int: - return _awscrt.cbor_decoder_get_next_tag_val(self._binding) + def pop_next_tag_val(self) -> int: + return _awscrt.cbor_decoder_pop_next_tag_val(self._binding) diff --git a/crt/s2n b/crt/s2n index 171c96a23..ee58f3401 160000 --- a/crt/s2n +++ b/crt/s2n @@ -1 +1 @@ -Subproject commit 171c96a232eb2bf45415340378b55b3bb6dd29cd +Subproject commit ee58f34011e178919d322eb5586d2e4b92c523ed diff --git a/source/cbor.c b/source/cbor.c index 0b89ece47..323cb474d 100644 --- a/source/cbor.c +++ b/source/cbor.c @@ -250,55 +250,55 @@ PyObject *aws_py_cbor_decoder_consume_next_data_item(PyObject *self, PyObject *a Py_RETURN_NONE; } -PyObject *aws_py_cbor_decoder_get_next_unsigned_int(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_decoder_pop_next_unsigned_int(PyObject *self, PyObject *args) { uint64_t out_val; S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_unsigned_val, out_val); return PyLong_FromUnsignedLongLong(out_val); } -PyObject *aws_py_cbor_decoder_get_next_negative_int(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_decoder_pop_next_negative_int(PyObject *self, PyObject *args) { uint64_t out_val; S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_neg_val, out_val); return PyLong_FromUnsignedLongLong(out_val); } -PyObject *aws_py_cbor_decoder_get_next_double(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_decoder_pop_next_double(PyObject *self, PyObject *args) { double out_val; S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_double_val, out_val); return PyFloat_FromDouble(out_val); } -PyObject *aws_py_cbor_decoder_get_next_bool(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_decoder_pop_next_bool(PyObject *self, PyObject *args) { bool out_val; S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_boolean_val, out_val); return PyBool_FromLong(out_val); } -PyObject *aws_py_cbor_decoder_get_next_bytes(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_decoder_pop_next_bytes(PyObject *self, PyObject *args) { struct aws_byte_cursor out_val; S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_bytes_val, out_val); return PyBytes_FromAwsByteCursor(&out_val); } -PyObject *aws_py_cbor_decoder_get_next_str(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_decoder_pop_next_str(PyObject *self, PyObject *args) { struct aws_byte_cursor out_val; S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_str_val, out_val); return PyUnicode_FromAwsByteCursor(&out_val); } -PyObject *aws_py_cbor_decoder_get_next_array_start(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_decoder_pop_next_array_start(PyObject *self, PyObject *args) { uint64_t out_val; S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_array_start, out_val); return PyLong_FromUnsignedLongLong(out_val); } -PyObject *aws_py_cbor_decoder_get_next_map_start(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_decoder_pop_next_map_start(PyObject *self, PyObject *args) { uint64_t out_val; S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_map_start, out_val); return PyLong_FromUnsignedLongLong(out_val); } -PyObject *aws_py_cbor_decoder_get_next_tag_val(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_decoder_pop_next_tag_val(PyObject *self, PyObject *args) { uint64_t out_val; S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_tag_val, out_val); return PyLong_FromUnsignedLongLong(out_val); diff --git a/source/cbor.h b/source/cbor.h index ef25a13af..2fed0602e 100644 --- a/source/cbor.h +++ b/source/cbor.h @@ -34,14 +34,14 @@ PyObject *aws_py_cbor_decoder_peek_type(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_get_remaining_bytes_len(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_consume_next_element(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_consume_next_data_item(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_decoder_get_next_unsigned_int(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_decoder_get_next_negative_int(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_decoder_get_next_double(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_decoder_get_next_bool(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_decoder_get_next_bytes(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_decoder_get_next_str(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_decoder_get_next_array_start(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_decoder_get_next_map_start(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_decoder_get_next_tag_val(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_unsigned_int(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_negative_int(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_double(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_bool(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_bytes(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_str(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_array_start(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_map_start(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_tag_val(PyObject *self, PyObject *args); -#endif /* AWS_CRT_PYTHON_CBOR_H */ \ No newline at end of file +#endif /* AWS_CRT_PYTHON_CBOR_H */ diff --git a/source/module.c b/source/module.c index ed2d9838c..b45becb37 100644 --- a/source/module.c +++ b/source/module.c @@ -842,15 +842,15 @@ static PyMethodDef s_module_methods[] = { AWS_PY_METHOD_DEF(cbor_decoder_get_remaining_bytes_len, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_consume_next_element, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_consume_next_data_item, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_decoder_get_next_unsigned_int, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_decoder_get_next_negative_int, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_decoder_get_next_double, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_decoder_get_next_bool, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_decoder_get_next_bytes, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_decoder_get_next_str, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_decoder_get_next_array_start, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_decoder_get_next_map_start, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_decoder_get_next_tag_val, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_unsigned_int, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_negative_int, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_double, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_bool, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_bytes, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_str, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_array_start, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_map_start, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_tag_val, METH_VARARGS), {NULL, NULL, 0, NULL}, }; From 9e32058debedc536e8ab63e547a354235046c2bb Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Mon, 15 Apr 2024 16:49:27 -0700 Subject: [PATCH 04/20] new line at the end --- source/cbor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/cbor.c b/source/cbor.c index 323cb474d..96716b83a 100644 --- a/source/cbor.c +++ b/source/cbor.c @@ -302,4 +302,4 @@ PyObject *aws_py_cbor_decoder_pop_next_tag_val(PyObject *self, PyObject *args) { uint64_t out_val; S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_tag_val, out_val); return PyLong_FromUnsignedLongLong(out_val); -} \ No newline at end of file +} From 1f19817f92942be5d954814a63665b70702adacc Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Thu, 18 Apr 2024 17:50:35 -0700 Subject: [PATCH 05/20] basic impl --- awscrt/cbor.py | 228 ++++++++++++++++++++++++++++++++++++++++------ source/cbor.c | 40 ++++++-- source/cbor.h | 1 + source/module.c | 1 + test/test_cbor.py | 75 +++++++++++++++ 5 files changed, 306 insertions(+), 39 deletions(-) create mode 100644 test/test_cbor.py diff --git a/awscrt/cbor.py b/awscrt/cbor.py index bc3ce4702..dee01045d 100644 --- a/awscrt/cbor.py +++ b/awscrt/cbor.py @@ -5,16 +5,27 @@ from awscrt import NativeResource from enum import IntEnum -from typing import Any +from typing import Union, Any class AwsCborElementType(IntEnum): - Int = 0 - Float = 1 - String = 2 - Map = 3 - Array = 4 - NULL = 5 + # Corresponding to `enum aws_cbor_element_type` in aws/common/cbor.h + UnsignedInt = 0 + NegativeInt = 1 + Float = 2 + Bytes = 3 + String = 4 + ArrayStart = 5 + MapStart = 6 + Tag = 7 + Bool = 8 + Null = 9 + Undefined = 10 + Break = 11 + InfBytes = 12 + InfStr = 13 + InfArray = 14 + InfMap = 15 class AwsCborEncoder(NativeResource): @@ -28,15 +39,13 @@ def get_encoded_data(self) -> bytes: return _awscrt.cbor_encoder_get_encoded_data(self._binding) def write_int(self, val: int): - """Add int to encode, -2^64 to 2^64 inclusive. Otherwise, overflow will be raised. + """Write an int as cbor formatted, -2^64 to 2^64 - 1 inclusive. + Otherwise, overflow will be raised. Args: - val (int): _description_ - - Returns: - _type_: _description_ + val (int): value to be encoded and written to the encoded data. """ - if val < -2**64 or val > 2**64: + if val < -2**64 or val > 2**64 - 1: raise OverflowError(f"{val} is overflowed to be encoded into cbor integers") if val >= 0: @@ -44,31 +53,44 @@ def write_int(self, val: int): else: return _awscrt.cbor_encoder_write_negative_int(self._binding, -1 - val) - def write_float(self, val: float): - """Adding a "double" to encode - Rely on `PyFloat_AsDouble()` for error checking. + def write_float(self, val: Union[int, float]): + """Write a double as cbor formatted + If the val can be convert the int without loss of precision, + it will be converted to int to be written to as cbor formatted. + Args: - val (float): _description_ + val (float): value to be encoded and written to the encoded data. """ - return _awscrt.cbor_encoder_write_float(self._binding, val) + if isinstance(val, int): + self.write_int(val) + elif isinstance(val, float): + return _awscrt.cbor_encoder_write_float(self._binding, val) def write_bytes(self, val: bytes): + """Write bytes as cbor formatted + + Args: + val (bytes): value to be encoded and written to the encoded data. + """ return _awscrt.cbor_encoder_write_bytes(self._binding, val) def write_string(self, val: str): + """Write string as cbor formatted + + Args: + val (str): value to be encoded and written to the encoded data. + """ return _awscrt.cbor_encoder_write_str(self._binding, val) def write_array_start(self, number_entries: int): - """Add a start of array element, with the `number_entries` + """Add a start of array element. + A legistic with the `number_entries` for the cbor data items to be included in the array. `number_entries` should 0 to 2^64 inclusive. Otherwise, overflow will be raised. Args: - number_entries (int): _description_ - - Returns: - _type_: _description_ + number_entries (int): number of entries in the array to be written """ if number_entries < 0 or number_entries > 2**64: raise OverflowError() @@ -82,10 +104,7 @@ def write_map_start(self, number_entries: int): Otherwise, overflow will be raised. Args: - number_entries (int): _description_ - - Returns: - _type_: _description_ + number_entries (int): number of entries in the map to be written """ if number_entries < 0 or number_entries > 2**64: raise ValueError() @@ -99,7 +118,47 @@ def write_tag(self, tag_number: int): return _awscrt.cbor_encoder_write_tag(self._binding, tag_number) def write_null(self): - return _awscrt.cbor_encoder_write_simple_types(self._binding, AwsCborElementType.NULL) + return _awscrt.cbor_encoder_write_simple_types(self._binding, AwsCborElementType.Null) + + def write_bool(self, val: bool): + return _awscrt.cbor_encoder_write_bool(self._binding, val) + + def write_data_item(self, data_item: Any): + """Generic API to write any type of an data_item as cbor formatted. + TODO: timestamp? + + Args: + data_item (Any): any type of data_item. If the type is not supported to be converted to cbor format, ValueError will be raised. + """ + if isinstance(data_item, str): + self.write_string(data_item) + elif isinstance(data_item, bytes): + self.write_bytes(data_item) + elif isinstance(data_item, int): + self.write_int(data_item) + elif isinstance(data_item, float): + self.write_float(data_item) + elif isinstance(data_item, dict): + self.write_dict(data_item) + elif isinstance(data_item, list): + self.write_list(data_item) + elif isinstance(data_item, bool): + self.write_bool(data_item) + elif data_item is None: + self.write_null() + else: + raise ValueError(f"not supported type for data_item: {data_item}") + + def write_dict(self, val: dict): + self.write_map_start(len(val)) + for key, value in val.items(): + self.write_data_item(key) + self.write_data_item(value) + + def write_list(self, val: list): + self.write_array_start(len(val)) + for data_item in val: + self.write_data_item(data_item) class AwsCborDecoder(NativeResource): @@ -111,7 +170,7 @@ def __init__(self, src: bytes): self._binding = _awscrt.cbor_decoder_new(src) def peek_next_type(self) -> AwsCborElementType: - return _awscrt.cbor_decoder_peek_type(self._binding) + return AwsCborElementType(_awscrt.cbor_decoder_peek_type(self._binding)) def get_remaining_bytes_len(self) -> int: return _awscrt.cbor_decoder_get_remaining_bytes_len(self._binding) @@ -149,3 +208,114 @@ def pop_next_map_start(self) -> int: def pop_next_tag_val(self) -> int: return _awscrt.cbor_decoder_pop_next_tag_val(self._binding) + + def pop_next_numeric(self) -> Union[int, float]: + type = self.peek_next_type() + if type == AwsCborElementType.UnsignedInt: + return self.pop_next_unsigned_int() + elif type == AwsCborElementType.NegativeInt: + return self.pop_next_negative_int() + elif type == AwsCborElementType.Float: + return self.pop_next_double() + # TODO: support bignum? + raise ValueError("the cbor src is not a numeric type to decode") + + def pop_next_inf_bytes(self) -> bytes: + type = self.peek_next_type() + if type != AwsCborElementType.InfBytes: + raise ValueError("the cbor src is not an indefinite bytes to decode") + result = b"" + # Consume the inf_bytes + self.consume_next_element() + while type != AwsCborElementType.Break: + result += self.pop_next_bytes() + type = self.peek_next_type() + # Consume the break + self.consume_next_element() + return result + + def pop_next_inf_str(self) -> bytes: + type = self.peek_next_type() + if type != AwsCborElementType.InfStr: + raise ValueError("the cbor src is not an indefinite string to decode") + result = "" + # Consume the inf_str + self.consume_next_element() + while type != AwsCborElementType.Break: + result += self.pop_next_str() + type = self.peek_next_type() + # Consume the break + self.consume_next_element() + return result + + def pop_next_list(self) -> list: + type = self.peek_next_type() + return_val = [] + if type == AwsCborElementType.InfArray: + # Consume the inf_array + self.consume_next_element() + while type != AwsCborElementType.Break: + return_val.append(self.pop_next_data_item()) + type = self.peek_next_type() + # Consume the break + self.consume_next_element() + return return_val + elif type == AwsCborElementType.ArrayStart: + number_elements = self.pop_next_array_start() + for i in range(number_elements): + return_val.append(self.pop_next_data_item()) + return return_val + else: + raise ValueError("the cbor src is not a list to decode") + + def pop_next_map(self) -> dict: + type = self.peek_next_type() + return_val = {} + if type == AwsCborElementType.InfMap: + # Consume the inf_map + self.consume_next_element() + while type != AwsCborElementType.Break: + return_val[self.pop_next_data_item()] = self.pop_next_data_item() + type = self.peek_next_type() + # Consume the break + self.consume_next_element() + return return_val + elif type == AwsCborElementType.MapStart: + number_elements = self.pop_next_map_start() + for i in range(number_elements): + key = self.pop_next_data_item() + value = self.pop_next_data_item() + return_val[key] = value + return return_val + else: + raise ValueError("the cbor src is not a map to decode") + + def pop_next_data_item(self) -> Any: + # TODO: tag, timestamp + # TODO: maybe wrote all those if elif in the binding level, so that we can use switch at least??? + type = self.peek_next_type() + if type == AwsCborElementType.UnsignedInt or \ + type == AwsCborElementType.NegativeInt or \ + type == AwsCborElementType.Float: + return self.pop_next_numeric() + elif type == AwsCborElementType.Bytes: + return self.pop_next_bytes() + elif type == AwsCborElementType.String: + return self.pop_next_str() + elif type == AwsCborElementType.Bool: + return self.pop_next_bool() + elif type == AwsCborElementType.Null: + self.consume_next_element() + return None + elif type == AwsCborElementType.ArrayStart or \ + type == AwsCborElementType.InfArray: + return self.pop_next_list() + elif type == AwsCborElementType.MapStart or \ + type == AwsCborElementType.InfMap: + return self.pop_next_map() + elif type == AwsCborElementType.InfBytes: + return self.pop_next_inf_bytes() + elif type == AwsCborElementType.InfStr: + return self.pop_next_inf_str() + else: + raise ValueError(f"unsupported type: {type.name}") diff --git a/source/cbor.c b/source/cbor.c index 96716b83a..189a76871 100644 --- a/source/cbor.c +++ b/source/cbor.c @@ -69,8 +69,10 @@ PyObject *aws_py_cbor_encoder_write_unsigned_int(PyObject *self, PyObject *args) PyObject *pylong; S_ENCODER_METHOD_START("O", &pylong); uint64_t data = PyLong_AsUnsignedLongLong(pylong); - /* The python code has already checked the value */ - AWS_ASSERT(!PyErr_Occurred()); + if (PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.write_int is not a valid int to encode"); + return NULL; + } aws_cbor_encoder_write_uint(encoder, data); Py_RETURN_NONE; } @@ -79,8 +81,10 @@ PyObject *aws_py_cbor_encoder_write_negative_int(PyObject *self, PyObject *args) PyObject *pylong; S_ENCODER_METHOD_START("O", &pylong); uint64_t data = PyLong_AsUnsignedLongLong(pylong); - /* The python code has already checked the value */ - AWS_ASSERT(!PyErr_Occurred()); + if (PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.write_int is not a valid int to encode"); + return NULL; + } aws_cbor_encoder_write_negint(encoder, data); Py_RETURN_NONE; } @@ -91,7 +95,7 @@ PyObject *aws_py_cbor_encoder_write_float(PyObject *self, PyObject *args) { double data = PyFloat_AsDouble(pyfloat); /* Rely on the python convert to check the pyfloat is able to convert to double. */ if (PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.add_float is not a valid double to encode"); + PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.write_float is not a valid double to encode"); return NULL; } aws_cbor_encoder_write_double(encoder, data); @@ -117,7 +121,10 @@ PyObject *aws_py_cbor_encoder_write_array_start(PyObject *self, PyObject *args) S_ENCODER_METHOD_START("O", &pylong); uint64_t data = PyLong_AsUnsignedLongLong(pylong); /* The python code has already checked the value */ - AWS_ASSERT(!PyErr_Occurred()); + if (PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.write_array_start is not a valid int to encode"); + return NULL; + } aws_cbor_encoder_write_array_start(encoder, data); Py_RETURN_NONE; } @@ -127,7 +134,10 @@ PyObject *aws_py_cbor_encoder_write_map_start(PyObject *self, PyObject *args) { S_ENCODER_METHOD_START("O", &pylong); uint64_t data = PyLong_AsUnsignedLongLong(pylong); /* The python code has already checked the value */ - AWS_ASSERT(!PyErr_Occurred()); + if (PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.write_map_start is not a valid int to encode"); + return NULL; + } aws_cbor_encoder_write_map_start(encoder, data); Py_RETURN_NONE; } @@ -137,16 +147,26 @@ PyObject *aws_py_cbor_encoder_write_tag(PyObject *self, PyObject *args) { S_ENCODER_METHOD_START("O", &pylong); uint64_t data = PyLong_AsUnsignedLongLong(pylong); /* The python code has already checked the value */ - AWS_ASSERT(!PyErr_Occurred()); + if (PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.write_tag is not a valid int to encode"); + return NULL; + } aws_cbor_encoder_write_tag(encoder, data); Py_RETURN_NONE; } +PyObject *aws_py_cbor_encoder_write_bool(PyObject *self, PyObject *args) { + int bool_val; + S_ENCODER_METHOD_START("p", &bool_val); + aws_cbor_encoder_write_bool(encoder, bool_val); + Py_RETURN_NONE; +} + PyObject *aws_py_cbor_encoder_write_simple_types(PyObject *self, PyObject *args) { Py_ssize_t type_enum; S_ENCODER_METHOD_START("n", &type_enum); switch (type_enum) { - case 5: + case AWS_CBOR_TYPE_NULL: aws_cbor_encoder_write_null(encoder); break; @@ -210,7 +230,7 @@ PyObject *aws_py_cbor_decoder_peek_type(PyObject *self, PyObject *args) { enum aws_cbor_element_type out_type; S_DECODER_METHOD_START(aws_cbor_decoder_peek_type, out_type); /* TODO: an convert from C type to the Python type */ - Py_RETURN_NONE; + return PyLong_FromSize_t(out_type); } PyObject *aws_py_cbor_decoder_get_remaining_bytes_len(PyObject *self, PyObject *args) { diff --git a/source/cbor.h b/source/cbor.h index 2fed0602e..90d456165 100644 --- a/source/cbor.h +++ b/source/cbor.h @@ -21,6 +21,7 @@ PyObject *aws_py_cbor_encoder_write_str(PyObject *self, PyObject *args); PyObject *aws_py_cbor_encoder_write_array_start(PyObject *self, PyObject *args); PyObject *aws_py_cbor_encoder_write_map_start(PyObject *self, PyObject *args); PyObject *aws_py_cbor_encoder_write_tag(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_bool(PyObject *self, PyObject *args); /* Encode the types without value needed. The arg is the type to encode. */ PyObject *aws_py_cbor_encoder_write_simple_types(PyObject *self, PyObject *args); diff --git a/source/module.c b/source/module.c index b45becb37..83e0a241f 100644 --- a/source/module.c +++ b/source/module.c @@ -834,6 +834,7 @@ static PyMethodDef s_module_methods[] = { AWS_PY_METHOD_DEF(cbor_encoder_write_array_start, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_encoder_write_map_start, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_encoder_write_tag, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_bool, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_encoder_write_simple_types, METH_VARARGS), /* CBOR Decode */ diff --git a/test/test_cbor.py b/test/test_cbor.py new file mode 100644 index 000000000..29690f729 --- /dev/null +++ b/test/test_cbor.py @@ -0,0 +1,75 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0. + +from test import NativeResourceTest +from awscrt.cbor import * + + +class TestCBOR(NativeResourceTest): + def test_cbor_encode_decode_int(self): + encoder = AwsCborEncoder() + try: + # Overflow + encoder.write_int(2**64) + except OverflowError as e: + self.assertIsNotNone(e) + else: + self.assertTrue(False) + try: + # pass float instead of int + encoder.write_int(1.1) + except ValueError as e: + self.assertIsNotNone(e) + else: + self.assertTrue(False) + + val_to_write = [-100, 100, 2**64 - 1, -2**64] + for val in val_to_write: + encoder.write_int(val) + + decoder = AwsCborDecoder(encoder.get_encoded_data()) + try: + val = decoder.pop_next_unsigned_int() + except RuntimeError as e: + self.assertIsNotNone(e) + else: + self.assertTrue(False) + + for val in val_to_write: + t = decoder.pop_next_numeric() + self.assertEqual(t, val) + + self.assertEqual(decoder.get_remaining_bytes_len(), 0) + + def test_cbor_encode_decode_float(self): + encoder = AwsCborEncoder() + val_to_write = [-100.12, -100, 100, 2**64 - 1, -2**64, 18446744073709551616.0] + for val in val_to_write: + encoder.write_float(val) + + decoder = AwsCborDecoder(encoder.get_encoded_data()) + + for val in val_to_write: + t = decoder.pop_next_numeric() + self.assertEqual(t, val) + + self.assertEqual(decoder.get_remaining_bytes_len(), 0) + + def test_cbor_encode_decode_data_item(self): + encoder = AwsCborEncoder() + val_to_write = { + "mytest": b"write_test", + b"test_more": { + "another": 123, + b"more": [1, 2, 3] + }, + 2: { + 2.3: ["a", "b", "c"] + } + } + encoder.write_data_item(val_to_write) + + decoder = AwsCborDecoder(encoder.get_encoded_data()) + + t = decoder.pop_next_data_item() + self.assertEqual(val_to_write, t) From 0e3370dd77a0b9460c9e127ed5619ee83f3e96a5 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Fri, 19 Apr 2024 16:38:26 -0700 Subject: [PATCH 06/20] WIP --- awscrt/cbor.py | 99 +++++++++++++++++++------ benchmark_cbor.py | 91 +++++++++++++++++++++++ source/cbor.c | 185 ++++++++++++++++++++++++++++++++++------------ source/cbor.h | 2 + source/module.c | 1 + test/test_cbor.py | 37 +++------- 6 files changed, 317 insertions(+), 98 deletions(-) create mode 100644 benchmark_cbor.py diff --git a/awscrt/cbor.py b/awscrt/cbor.py index dee01045d..9f9c5c4bd 100644 --- a/awscrt/cbor.py +++ b/awscrt/cbor.py @@ -28,6 +28,17 @@ class AwsCborElementType(IntEnum): InfMap = 15 +class AwsCborTags(IntEnum): + # Corresponding to `enum aws_cbor_tags` in aws/common/cbor.h + StandardTime = 0 + EpochTime = 1 + UnsignedBigNum = 2 + NegativeBigNum = 3 + DecimalFraction = 4 + BigFloat = 5 + Unclassified = 6 + + class AwsCborEncoder(NativeResource): """ Encoder for CBOR """ @@ -36,24 +47,47 @@ def __init__(self): self._binding = _awscrt.cbor_encoder_new() def get_encoded_data(self) -> bytes: + """Return the current encoded data as bytes + + Returns: + bytes: The encoded data currently + """ return _awscrt.cbor_encoder_get_encoded_data(self._binding) def write_int(self, val: int): - """Write an int as cbor formatted, -2^64 to 2^64 - 1 inclusive. - Otherwise, overflow will be raised. + """Write an int as cbor formatted, + val less than -2^64 will be encoded as Negative bignum for CBOR + val between -2^64 to -1, inclusive, will be encode as negative integer for CBOR + val between 0 to 2^64 - 1, inclusive, will be encoded as unsigned integer for CBOR + val greater than 2^64 - 1 will be encoded as Unsigned bignum for CBOR Args: val (int): value to be encoded and written to the encoded data. """ - if val < -2**64 or val > 2**64 - 1: - raise OverflowError(f"{val} is overflowed to be encoded into cbor integers") + assert isinstance(val, int) + val_to_encode = val + if val < 0: + # For negative value, the value to encode is -1 - val. + val_to_encode = -1 - val + bit_len = val_to_encode.bit_length() + if bit_len > 64: + # Bignum + bytes_len = bit_len // 8 + if bit_len % 8 > 0: + bytes_len += 1 + bytes_val = val_to_encode.to_bytes(bytes_len, "big") + if val < 0: + self.write_tag(AwsCborTags.NegativeBigNum) # tag for negative bignum + else: + self.write_tag(AwsCborTags.UnsignedBigNum) # tag for unsigned bignum + return self.write_bytes(bytes_val) if val >= 0: - return _awscrt.cbor_encoder_write_unsigned_int(self._binding, val) + return _awscrt.cbor_encoder_write_unsigned_int(self._binding, val_to_encode) else: - return _awscrt.cbor_encoder_write_negative_int(self._binding, -1 - val) + return _awscrt.cbor_encoder_write_negative_int(self._binding, val_to_encode) - def write_float(self, val: Union[int, float]): + def write_float(self, val: float): """Write a double as cbor formatted If the val can be convert the int without loss of precision, it will be converted to int to be written to as cbor formatted. @@ -61,10 +95,9 @@ def write_float(self, val: Union[int, float]): Args: val (float): value to be encoded and written to the encoded data. """ - if isinstance(val, int): - self.write_int(val) - elif isinstance(val, float): - return _awscrt.cbor_encoder_write_float(self._binding, val) + assert isinstance(val, float) + # Floating point numbers are usually implemented using double in C + return _awscrt.cbor_encoder_write_float(self._binding, val) def write_bytes(self, val: bytes): """Write bytes as cbor formatted @@ -125,7 +158,7 @@ def write_bool(self, val: bool): def write_data_item(self, data_item: Any): """Generic API to write any type of an data_item as cbor formatted. - TODO: timestamp? + TODO: timestamp <-> datetime?? Decimal fraction <-> decimal?? Args: data_item (Any): any type of data_item. If the type is not supported to be converted to cbor format, ValueError will be raised. @@ -210,7 +243,7 @@ def pop_next_tag_val(self) -> int: return _awscrt.cbor_decoder_pop_next_tag_val(self._binding) def pop_next_numeric(self) -> Union[int, float]: - type = self.peek_next_type() + type = _awscrt.cbor_decoder_peek_type(self._binding) if type == AwsCborElementType.UnsignedInt: return self.pop_next_unsigned_int() elif type == AwsCborElementType.NegativeInt: @@ -218,10 +251,11 @@ def pop_next_numeric(self) -> Union[int, float]: elif type == AwsCborElementType.Float: return self.pop_next_double() # TODO: support bignum? + # TODO: Instead of ValueError, probably raise the same error from C with the same AWS_ERROR_CBOR_UNEXPECTED_TYPE raise ValueError("the cbor src is not a numeric type to decode") def pop_next_inf_bytes(self) -> bytes: - type = self.peek_next_type() + type = _awscrt.cbor_decoder_peek_type(self._binding) if type != AwsCborElementType.InfBytes: raise ValueError("the cbor src is not an indefinite bytes to decode") result = b"" @@ -229,13 +263,13 @@ def pop_next_inf_bytes(self) -> bytes: self.consume_next_element() while type != AwsCborElementType.Break: result += self.pop_next_bytes() - type = self.peek_next_type() + type = _awscrt.cbor_decoder_peek_type(self._binding) # Consume the break self.consume_next_element() return result def pop_next_inf_str(self) -> bytes: - type = self.peek_next_type() + type = _awscrt.cbor_decoder_peek_type(self._binding) if type != AwsCborElementType.InfStr: raise ValueError("the cbor src is not an indefinite string to decode") result = "" @@ -243,20 +277,20 @@ def pop_next_inf_str(self) -> bytes: self.consume_next_element() while type != AwsCborElementType.Break: result += self.pop_next_str() - type = self.peek_next_type() + type = _awscrt.cbor_decoder_peek_type(self._binding) # Consume the break self.consume_next_element() return result def pop_next_list(self) -> list: - type = self.peek_next_type() + type = _awscrt.cbor_decoder_peek_type(self._binding) return_val = [] if type == AwsCborElementType.InfArray: # Consume the inf_array self.consume_next_element() while type != AwsCborElementType.Break: return_val.append(self.pop_next_data_item()) - type = self.peek_next_type() + type = _awscrt.cbor_decoder_peek_type(self._binding) # Consume the break self.consume_next_element() return return_val @@ -269,14 +303,14 @@ def pop_next_list(self) -> list: raise ValueError("the cbor src is not a list to decode") def pop_next_map(self) -> dict: - type = self.peek_next_type() + type = _awscrt.cbor_decoder_peek_type(self._binding) return_val = {} if type == AwsCborElementType.InfMap: # Consume the inf_map self.consume_next_element() while type != AwsCborElementType.Break: return_val[self.pop_next_data_item()] = self.pop_next_data_item() - type = self.peek_next_type() + type = _awscrt.cbor_decoder_peek_type(self._binding) # Consume the break self.consume_next_element() return return_val @@ -291,9 +325,11 @@ def pop_next_map(self) -> dict: raise ValueError("the cbor src is not a map to decode") def pop_next_data_item(self) -> Any: - # TODO: tag, timestamp + # TODO: timestamp, decimal fraction # TODO: maybe wrote all those if elif in the binding level, so that we can use switch at least??? - type = self.peek_next_type() + # And possible to avoid some call cross language boundary??? + # TODO: If it fails in the middle, with bunch of stuff already popped. Do we want a way to resume?? + type = _awscrt.cbor_decoder_peek_type(self._binding) if type == AwsCborElementType.UnsignedInt or \ type == AwsCborElementType.NegativeInt or \ type == AwsCborElementType.Float: @@ -304,7 +340,9 @@ def pop_next_data_item(self) -> Any: return self.pop_next_str() elif type == AwsCborElementType.Bool: return self.pop_next_bool() - elif type == AwsCborElementType.Null: + elif type == AwsCborElementType.Null or \ + type == AwsCborElementType.Undefined: + # Treat both NULL and Undefined as None. self.consume_next_element() return None elif type == AwsCborElementType.ArrayStart or \ @@ -317,5 +355,18 @@ def pop_next_data_item(self) -> Any: return self.pop_next_inf_bytes() elif type == AwsCborElementType.InfStr: return self.pop_next_inf_str() + elif type == AwsCborElementType.Tag: + tag_val = self.pop_next_tag_val() + if tag_val == AwsCborTags.NegativeBigNum: + bytes_val = self.pop_next_bytes() + return -1 - int.from_bytes(bytes_val, "big") + elif tag_val == AwsCborTags.UnsignedBigNum: + bytes_val = self.pop_next_bytes() + return int.from_bytes(bytes_val, "big") + else: + raise ValueError(f"unsupported tag value: {tag_val}") else: raise ValueError(f"unsupported type: {type.name}") + + def pop_next_data_item_2(self) -> Any: + return _awscrt.cbor_decoder_pop_next_data_item(self._binding) diff --git a/benchmark_cbor.py b/benchmark_cbor.py new file mode 100644 index 000000000..f0d8f5fb3 --- /dev/null +++ b/benchmark_cbor.py @@ -0,0 +1,91 @@ +from awscrt.cbor import * +import random +import time +import cbor2 + + +def ns_to_secs(ns: int) -> float: + return ns / 1_000_000_000.0 + + +def bytes_to_MiB(bytes: int) -> float: + return bytes / float(1024**2) + + +class TestData: + # generate predictable, but variable test values of different types + @staticmethod + def random_value(i=0, seed=0): + r = random.Random(i + seed) # use the index as the seed for predictable results + random_number = TestData.random_number(r, 5) + if random_number == 0: + return f"Some String value {i}" + elif random_number == 1: + return r.random() # a float value + elif random_number == 2: + return TestData.random_number(r, 100000) # a large integer + elif random_number == 3: + return list(range(TestData.random_number(r, 100))) # an array + elif random_number == 4: + return {"a": 1, "b": 2, "c": 3} # a hash + else: + return "generic string" + + # generate a predictable, but variable hash with a range of data types + @staticmethod + def test_hash(n_keys=5, seed=0): + return {f"key{i}": TestData.random_value(i, seed) for i in range(n_keys)} + + @staticmethod + def random_number(r, n): + return int(r.random() * n) + + +t = TestData.test_hash(100000) + + +print("cbor2 -- encode") +run_start_ns = time.perf_counter_ns() +cbor2_encoded = cbor2.dumps(t) +run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) +print(f"encoded MB: {bytes_to_MiB(len(cbor2_encoded))}") +print(f"time passed: {run_secs} secs") + + +print("CRT -- encode") +encoder = AwsCborEncoder() + +run_start_ns = time.perf_counter_ns() +encoder.write_data_item(t) +encoded = encoder.get_encoded_data() +run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) +print(f"encoded MB: {bytes_to_MiB(len(encoded))}") +print(f"time passed: {run_secs} secs") + +print(cbor2_encoded == encoded) + +print("cbor2 -- decode") +run_start_ns = time.perf_counter_ns() +decoded = cbor2.loads(encoded) +run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) +print(f"time passed: {run_secs} secs") + +print("CRT -- decode") +run_start_ns = time.perf_counter_ns() +decoder = AwsCborDecoder(encoded) +crt_decoded = decoder.pop_next_data_item() + +run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) +print(f"time passed: {run_secs} secs") + + +print("CRT -- decode 2") +run_start_ns = time.perf_counter_ns() +decoder_2 = AwsCborDecoder(encoded) +decoder_2.consume_next_data_item() + +run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) +print(f"time passed: {run_secs} secs") + +print(crt_decoded == t) +print(crt_decoded == decoded) diff --git a/source/cbor.c b/source/cbor.c index 189a76871..3e065c8bf 100644 --- a/source/cbor.c +++ b/source/cbor.c @@ -212,7 +212,7 @@ PyObject *aws_py_cbor_decoder_new(PyObject *self, PyObject *args) { return py_capsule; } -#define S_DECODER_METHOD_START(decoder_func, out_val) \ +#define S_GET_DECODER() \ (void)self; \ PyObject *py_capsule; \ if (!PyArg_ParseTuple(args, "O", &py_capsule)) { \ @@ -221,28 +221,40 @@ PyObject *aws_py_cbor_decoder_new(PyObject *self, PyObject *args) { struct aws_cbor_decoder *decoder = s_cbor_decoder_from_capsule(py_capsule); \ if (!decoder) { \ return NULL; \ - } \ + } + +#define S_DECODER_METHOD_START(decoder_func, out_val) \ + S_GET_DECODER() \ if (decoder_func(decoder, &out_val)) { \ return PyErr_AwsLastError(); \ } +#define S_POP_NEXT_TO_PYOBJECT(ctype, field, py_conversion) \ + static PyObject *s_cbor_decoder_pop_next_##field##_to_pyobject(struct aws_cbor_decoder *decoder) { \ + ctype out_val; \ + if (aws_cbor_decoder_pop_next_##field(decoder, &out_val)) { \ + return PyErr_AwsLastError(); \ + } \ + return py_conversion(out_val); \ + } + +#define S_POP_NEXT_TO_PYOBJECT_CURSOR(field, py_conversion) \ + static PyObject *s_cbor_decoder_pop_next_##field##_to_pyobject(struct aws_cbor_decoder *decoder) { \ + struct aws_byte_cursor out_val; \ + if (aws_cbor_decoder_pop_next_##field(decoder, &out_val)) { \ + return PyErr_AwsLastError(); \ + } \ + return py_conversion(&out_val); \ + } + PyObject *aws_py_cbor_decoder_peek_type(PyObject *self, PyObject *args) { enum aws_cbor_element_type out_type; S_DECODER_METHOD_START(aws_cbor_decoder_peek_type, out_type); - /* TODO: an convert from C type to the Python type */ return PyLong_FromSize_t(out_type); } PyObject *aws_py_cbor_decoder_get_remaining_bytes_len(PyObject *self, PyObject *args) { - (void)self; - PyObject *py_capsule; - if (!PyArg_ParseTuple(args, "O", &py_capsule)) { - return NULL; - } - struct aws_cbor_decoder *decoder = s_cbor_decoder_from_capsule(py_capsule); - if (!decoder) { - return NULL; - } + S_GET_DECODER(); size_t remaining_len = aws_cbor_decoder_get_remaining_length(decoder); return PyLong_FromSize_t(remaining_len); } @@ -255,71 +267,148 @@ PyObject *aws_py_cbor_decoder_consume_next_element(PyObject *self, PyObject *arg } PyObject *aws_py_cbor_decoder_consume_next_data_item(PyObject *self, PyObject *args) { - (void)self; - PyObject *py_capsule; - if (!PyArg_ParseTuple(args, "O", &py_capsule)) { - return NULL; - } - struct aws_cbor_decoder *decoder = s_cbor_decoder_from_capsule(py_capsule); - if (!decoder) { - return NULL; - } + S_GET_DECODER(); if (aws_cbor_decoder_consume_next_data_item(decoder)) { return PyErr_AwsLastError(); } Py_RETURN_NONE; } +// static PyObject *s_decode(struct aws_cbor_decoder *decoder) {} + +S_POP_NEXT_TO_PYOBJECT(uint64_t, unsigned_val, PyLong_FromUnsignedLongLong) +S_POP_NEXT_TO_PYOBJECT(uint64_t, neg_val, PyLong_FromUnsignedLongLong) +S_POP_NEXT_TO_PYOBJECT(double, double_val, PyFloat_FromDouble) +S_POP_NEXT_TO_PYOBJECT(bool, boolean_val, PyBool_FromLong) +S_POP_NEXT_TO_PYOBJECT_CURSOR(bytes_val, PyBytes_FromAwsByteCursor) +S_POP_NEXT_TO_PYOBJECT_CURSOR(str_val, PyUnicode_FromAwsByteCursor) +S_POP_NEXT_TO_PYOBJECT(uint64_t, array_start, PyLong_FromUnsignedLongLong) +S_POP_NEXT_TO_PYOBJECT(uint64_t, map_start, PyLong_FromUnsignedLongLong) +S_POP_NEXT_TO_PYOBJECT(uint64_t, tag_val, PyLong_FromUnsignedLongLong) + +/** + * Generic helper to convert a cbor encoded data to PyObject + */ +static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_decoder *decoder) { + enum aws_cbor_element_type out_type = 0; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + switch (decoder->cached_context.type) { + case AWS_CBOR_TYPE_TAG: + /* Read the next data item */ + /* TODO: error check for the tag content?? */ + decoder->cached_context.type = AWS_CBOR_TYPE_MAX; + if (aws_cbor_decoder_consume_next_data_item(decoder)) { + return AWS_OP_ERR; + } + break; + case AWS_CBOR_TYPE_MAP_START: { + uint64_t num_map_item = decoder->cached_context.cbor_data.map_start; + /* Reset type */ + decoder->cached_context.type = AWS_CBOR_TYPE_MAX; + for (uint64_t i = 0; i < num_map_item; i++) { + /* Key */ + if (aws_cbor_decoder_consume_next_data_item(decoder)) { + return AWS_OP_ERR; + } + /* Value */ + if (aws_cbor_decoder_consume_next_data_item(decoder)) { + return AWS_OP_ERR; + } + } + break; + } + case AWS_CBOR_TYPE_ARRAY_START: { + uint64_t num_array_item = decoder->cached_context.cbor_data.array_start; + /* Reset type */ + decoder->cached_context.type = AWS_CBOR_TYPE_MAX; + for (uint64_t i = 0; i < num_array_item; i++) { + /* item */ + if (aws_cbor_decoder_consume_next_data_item(decoder)) { + return AWS_OP_ERR; + } + } + break; + } + case AWS_CBOR_TYPE_INF_BYTESTRING_START: + case AWS_CBOR_TYPE_INF_STRING_START: + case AWS_CBOR_TYPE_INF_ARRAY_START: + case AWS_CBOR_TYPE_INF_MAP_START: { + enum aws_cbor_element_type next_type; + /* Reset the cache for the tag val */ + decoder->cached_context.type = AWS_CBOR_TYPE_MAX; + if (aws_cbor_decoder_peek_type(decoder, &next_type)) { + return AWS_OP_ERR; + } + while (next_type != AWS_CBOR_TYPE_BREAK) { + if (aws_cbor_decoder_consume_next_data_item(decoder)) { + return AWS_OP_ERR; + } + if (aws_cbor_decoder_peek_type(decoder, &next_type)) { + return AWS_OP_ERR; + } + } + break; + } + + default: + break; + } + + /* Done, just reset the cache */ + decoder->cached_context.type = AWS_CBOR_TYPE_MAX; + return AWS_OP_SUCCESS; +} + +/*********************************** BINDINGS ***********************************************/ + PyObject *aws_py_cbor_decoder_pop_next_unsigned_int(PyObject *self, PyObject *args) { - uint64_t out_val; - S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_unsigned_val, out_val); - return PyLong_FromUnsignedLongLong(out_val); + S_GET_DECODER(); + return s_cbor_decoder_pop_next_unsigned_val_to_pyobject(decoder); } PyObject *aws_py_cbor_decoder_pop_next_negative_int(PyObject *self, PyObject *args) { - uint64_t out_val; - S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_neg_val, out_val); - return PyLong_FromUnsignedLongLong(out_val); + S_GET_DECODER(); + return s_cbor_decoder_pop_next_neg_val_to_pyobject(decoder); } PyObject *aws_py_cbor_decoder_pop_next_double(PyObject *self, PyObject *args) { - double out_val; - S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_double_val, out_val); - return PyFloat_FromDouble(out_val); + S_GET_DECODER(); + return s_cbor_decoder_pop_next_double_val_to_pyobject(decoder); } PyObject *aws_py_cbor_decoder_pop_next_bool(PyObject *self, PyObject *args) { - bool out_val; - S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_boolean_val, out_val); - return PyBool_FromLong(out_val); + S_GET_DECODER(); + return s_cbor_decoder_pop_next_boolean_val_to_pyobject(decoder); } PyObject *aws_py_cbor_decoder_pop_next_bytes(PyObject *self, PyObject *args) { - struct aws_byte_cursor out_val; - S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_bytes_val, out_val); - return PyBytes_FromAwsByteCursor(&out_val); + S_GET_DECODER(); + return s_cbor_decoder_pop_next_bytes_val_to_pyobject(decoder); } PyObject *aws_py_cbor_decoder_pop_next_str(PyObject *self, PyObject *args) { - struct aws_byte_cursor out_val; - S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_str_val, out_val); - return PyUnicode_FromAwsByteCursor(&out_val); + S_GET_DECODER(); + return s_cbor_decoder_pop_next_str_val_to_pyobject(decoder); } PyObject *aws_py_cbor_decoder_pop_next_array_start(PyObject *self, PyObject *args) { - uint64_t out_val; - S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_array_start, out_val); - return PyLong_FromUnsignedLongLong(out_val); + S_GET_DECODER(); + return s_cbor_decoder_pop_next_array_start_to_pyobject(decoder); } PyObject *aws_py_cbor_decoder_pop_next_map_start(PyObject *self, PyObject *args) { - uint64_t out_val; - S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_map_start, out_val); - return PyLong_FromUnsignedLongLong(out_val); + S_GET_DECODER(); + return s_cbor_decoder_pop_next_map_start_to_pyobject(decoder); } PyObject *aws_py_cbor_decoder_pop_next_tag_val(PyObject *self, PyObject *args) { - uint64_t out_val; - S_DECODER_METHOD_START(aws_cbor_decoder_pop_next_tag_val, out_val); - return PyLong_FromUnsignedLongLong(out_val); + S_GET_DECODER(); + return s_cbor_decoder_pop_next_tag_val_to_pyobject(decoder); +} + +PyObject *aws_py_cbor_decoder_pop_next_data_item(PyObject *self, PyObject *args) { + S_GET_DECODER(); + return s_cbor_decoder_pop_next_unsigned_val_to_pyobject(decoder); } diff --git a/source/cbor.h b/source/cbor.h index 90d456165..0bbe5994b 100644 --- a/source/cbor.h +++ b/source/cbor.h @@ -45,4 +45,6 @@ PyObject *aws_py_cbor_decoder_pop_next_array_start(PyObject *self, PyObject *arg PyObject *aws_py_cbor_decoder_pop_next_map_start(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_pop_next_tag_val(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_data_item(PyObject *self, PyObject *args); + #endif /* AWS_CRT_PYTHON_CBOR_H */ diff --git a/source/module.c b/source/module.c index 83e0a241f..7c2ef18fb 100644 --- a/source/module.c +++ b/source/module.c @@ -852,6 +852,7 @@ static PyMethodDef s_module_methods[] = { AWS_PY_METHOD_DEF(cbor_decoder_pop_next_array_start, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_map_start, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_tag_val, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_data_item, METH_VARARGS), {NULL, NULL, 0, NULL}, }; diff --git a/test/test_cbor.py b/test/test_cbor.py index 29690f729..0bac1e454 100644 --- a/test/test_cbor.py +++ b/test/test_cbor.py @@ -8,17 +8,10 @@ class TestCBOR(NativeResourceTest): def test_cbor_encode_decode_int(self): encoder = AwsCborEncoder() - try: - # Overflow - encoder.write_int(2**64) - except OverflowError as e: - self.assertIsNotNone(e) - else: - self.assertTrue(False) try: # pass float instead of int encoder.write_int(1.1) - except ValueError as e: + except AssertionError as e: self.assertIsNotNone(e) else: self.assertTrue(False) @@ -29,6 +22,7 @@ def test_cbor_encode_decode_int(self): decoder = AwsCborDecoder(encoder.get_encoded_data()) try: + # The first val is a negative val. val = decoder.pop_next_unsigned_int() except RuntimeError as e: self.assertIsNotNone(e) @@ -41,28 +35,18 @@ def test_cbor_encode_decode_int(self): self.assertEqual(decoder.get_remaining_bytes_len(), 0) - def test_cbor_encode_decode_float(self): - encoder = AwsCborEncoder() - val_to_write = [-100.12, -100, 100, 2**64 - 1, -2**64, 18446744073709551616.0] - for val in val_to_write: - encoder.write_float(val) - - decoder = AwsCborDecoder(encoder.get_encoded_data()) - - for val in val_to_write: - t = decoder.pop_next_numeric() - self.assertEqual(t, val) - - self.assertEqual(decoder.get_remaining_bytes_len(), 0) - def test_cbor_encode_decode_data_item(self): encoder = AwsCborEncoder() + numerics = [-100.12, 100.0, -100, 100, 2**64 - 1, -2**64, 18446744073709551616.0] + another_map = { + "bignum": 2**65, + "negative bignum": -2**75, + 2**65: [1, 2, 3], + -2**65: [1, ["2", b"3"], {"most complicated": numerics}, 2**65, -2**75] + } val_to_write = { "mytest": b"write_test", - b"test_more": { - "another": 123, - b"more": [1, 2, 3] - }, + b"test_more": another_map, 2: { 2.3: ["a", "b", "c"] } @@ -71,5 +55,6 @@ def test_cbor_encode_decode_data_item(self): decoder = AwsCborDecoder(encoder.get_encoded_data()) + # Temp val only for easier to debug. t = decoder.pop_next_data_item() self.assertEqual(val_to_write, t) From da13a7f6d79d5073f320de0e6b1a307063f9ce07 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Mon, 22 Apr 2024 09:06:31 -0700 Subject: [PATCH 07/20] decode 2 --- awscrt/cbor.py | 78 +++++++-------- benchmark_cbor.py | 2 +- source/cbor.c | 243 ++++++++++++++++++++++++++++++++++++---------- source/cbor.h | 2 + source/module.c | 2 + 5 files changed, 237 insertions(+), 90 deletions(-) diff --git a/awscrt/cbor.py b/awscrt/cbor.py index 9f9c5c4bd..2a7cb60ac 100644 --- a/awscrt/cbor.py +++ b/awscrt/cbor.py @@ -283,46 +283,48 @@ def pop_next_inf_str(self) -> bytes: return result def pop_next_list(self) -> list: - type = _awscrt.cbor_decoder_peek_type(self._binding) - return_val = [] - if type == AwsCborElementType.InfArray: - # Consume the inf_array - self.consume_next_element() - while type != AwsCborElementType.Break: - return_val.append(self.pop_next_data_item()) - type = _awscrt.cbor_decoder_peek_type(self._binding) - # Consume the break - self.consume_next_element() - return return_val - elif type == AwsCborElementType.ArrayStart: - number_elements = self.pop_next_array_start() - for i in range(number_elements): - return_val.append(self.pop_next_data_item()) - return return_val - else: - raise ValueError("the cbor src is not a list to decode") + return _awscrt.cbor_decoder_pop_next_py_list(self._binding) + # type = _awscrt.cbor_decoder_peek_type(self._binding) + # return_val = [] + # if type == AwsCborElementType.InfArray: + # # Consume the inf_array + # self.consume_next_element() + # while type != AwsCborElementType.Break: + # return_val.append(self.pop_next_data_item()) + # type = _awscrt.cbor_decoder_peek_type(self._binding) + # # Consume the break + # self.consume_next_element() + # return return_val + # elif type == AwsCborElementType.ArrayStart: + # number_elements = self.pop_next_array_start() + # for i in range(number_elements): + # return_val.append(self.pop_next_data_item()) + # return return_val + # else: + # raise ValueError("the cbor src is not a list to decode") def pop_next_map(self) -> dict: - type = _awscrt.cbor_decoder_peek_type(self._binding) - return_val = {} - if type == AwsCborElementType.InfMap: - # Consume the inf_map - self.consume_next_element() - while type != AwsCborElementType.Break: - return_val[self.pop_next_data_item()] = self.pop_next_data_item() - type = _awscrt.cbor_decoder_peek_type(self._binding) - # Consume the break - self.consume_next_element() - return return_val - elif type == AwsCborElementType.MapStart: - number_elements = self.pop_next_map_start() - for i in range(number_elements): - key = self.pop_next_data_item() - value = self.pop_next_data_item() - return_val[key] = value - return return_val - else: - raise ValueError("the cbor src is not a map to decode") + return _awscrt.cbor_decoder_pop_next_py_dict(self._binding) + # type = _awscrt.cbor_decoder_peek_type(self._binding) + # return_val = {} + # if type == AwsCborElementType.InfMap: + # # Consume the inf_map + # self.consume_next_element() + # while type != AwsCborElementType.Break: + # return_val[self.pop_next_data_item()] = self.pop_next_data_item() + # type = _awscrt.cbor_decoder_peek_type(self._binding) + # # Consume the break + # self.consume_next_element() + # return return_val + # elif type == AwsCborElementType.MapStart: + # number_elements = self.pop_next_map_start() + # for i in range(number_elements): + # key = self.pop_next_data_item() + # value = self.pop_next_data_item() + # return_val[key] = value + # return return_val + # else: + # raise ValueError("the cbor src is not a map to decode") def pop_next_data_item(self) -> Any: # TODO: timestamp, decimal fraction diff --git a/benchmark_cbor.py b/benchmark_cbor.py index f0d8f5fb3..655fa6a32 100644 --- a/benchmark_cbor.py +++ b/benchmark_cbor.py @@ -82,7 +82,7 @@ def random_number(r, n): print("CRT -- decode 2") run_start_ns = time.perf_counter_ns() decoder_2 = AwsCborDecoder(encoded) -decoder_2.consume_next_data_item() +crt_decoded = decoder_2.pop_next_data_item_2() run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) print(f"time passed: {run_secs} secs") diff --git a/source/cbor.c b/source/cbor.c index 3e065c8bf..da389bcac 100644 --- a/source/cbor.c +++ b/source/cbor.c @@ -286,79 +286,210 @@ S_POP_NEXT_TO_PYOBJECT(uint64_t, array_start, PyLong_FromUnsignedLongLong) S_POP_NEXT_TO_PYOBJECT(uint64_t, map_start, PyLong_FromUnsignedLongLong) S_POP_NEXT_TO_PYOBJECT(uint64_t, tag_val, PyLong_FromUnsignedLongLong) +static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_decoder *decoder); + /** - * Generic helper to convert a cbor encoded data to PyObject + * Generic helper to convert next data item to py_list */ -static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_decoder *decoder) { +static PyObject *s_cbor_decoder_pop_next_data_item_to_py_list(struct aws_cbor_decoder *decoder) { enum aws_cbor_element_type out_type = 0; if (aws_cbor_decoder_peek_type(decoder, &out_type)) { return PyErr_AwsLastError(); } - switch (decoder->cached_context.type) { - case AWS_CBOR_TYPE_TAG: - /* Read the next data item */ - /* TODO: error check for the tag content?? */ - decoder->cached_context.type = AWS_CBOR_TYPE_MAX; - if (aws_cbor_decoder_consume_next_data_item(decoder)) { - return AWS_OP_ERR; + PyObject *array = NULL; + PyObject *item = NULL; + switch (out_type) { + case AWS_CBOR_TYPE_ARRAY_START: { + uint64_t num_array_item; + aws_cbor_decoder_pop_next_array_start(decoder, &num_array_item); + if (num_array_item > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, "number of array is too large to fit."); + return NULL; } - break; - case AWS_CBOR_TYPE_MAP_START: { - uint64_t num_map_item = decoder->cached_context.cbor_data.map_start; - /* Reset type */ - decoder->cached_context.type = AWS_CBOR_TYPE_MAX; - for (uint64_t i = 0; i < num_map_item; i++) { - /* Key */ - if (aws_cbor_decoder_consume_next_data_item(decoder)) { - return AWS_OP_ERR; + array = PyList_New((Py_ssize_t)num_array_item); + if (!array) { + return NULL; + } + for (size_t i = 0; i < num_array_item; ++i) { + item = s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); + if (!item) { + goto error; } - /* Value */ - if (aws_cbor_decoder_consume_next_data_item(decoder)) { - return AWS_OP_ERR; + PyList_SetItem(array, i, item); /* Steals reference to item */ + } + return array; + } + case AWS_CBOR_TYPE_INF_ARRAY_START: { + array = PyList_New(0); + if (!array) { + return NULL; + } + /* Consume the inf array start */ + aws_cbor_decoder_consume_next_element(decoder, NULL /*consumed_type*/); + aws_cbor_decoder_peek_type(decoder, &out_type); + while (out_type != AWS_CBOR_TYPE_BREAK) { + item = s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); + if (!item) { + goto error; + } + if (PyList_Append(array, item) == -1) { + goto error; + } + /* Append will not steal the reference, deref here. */ + Py_DECREF(item); + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + PyErr_AwsLastError(); + goto error; } } - break; + return array; } - case AWS_CBOR_TYPE_ARRAY_START: { - uint64_t num_array_item = decoder->cached_context.cbor_data.array_start; - /* Reset type */ - decoder->cached_context.type = AWS_CBOR_TYPE_MAX; - for (uint64_t i = 0; i < num_array_item; i++) { - /* item */ - if (aws_cbor_decoder_consume_next_data_item(decoder)) { - return AWS_OP_ERR; + default: + aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); + return PyErr_AwsLastError(); + } +error: + if (array) { + Py_DECREF(array); + } + return NULL; +} + +/** + * Generic helper to convert next data item to py_dict + */ +static PyObject *s_cbor_decoder_pop_next_data_item_to_py_dict(struct aws_cbor_decoder *decoder) { + enum aws_cbor_element_type out_type = 0; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + PyObject *dict = NULL; + PyObject *key = NULL; + PyObject *value = NULL; + switch (out_type) { + case AWS_CBOR_TYPE_MAP_START: { + uint64_t num_item; + aws_cbor_decoder_pop_next_map_start(decoder, &num_item); + if (num_item > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, "number of dict is too large to fit."); + return NULL; + } + dict = PyDict_New(); + if (!dict) { + return NULL; + } + for (size_t i = 0; i < num_item; ++i) { + key = s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); + value = s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); + if (!key || !value) { + goto error; + } + if (PyDict_SetItem(dict, key, value) == -1) { + goto error; } + Py_DECREF(key); + Py_DECREF(value); } - break; + return dict; } - case AWS_CBOR_TYPE_INF_BYTESTRING_START: - case AWS_CBOR_TYPE_INF_STRING_START: - case AWS_CBOR_TYPE_INF_ARRAY_START: case AWS_CBOR_TYPE_INF_MAP_START: { - enum aws_cbor_element_type next_type; - /* Reset the cache for the tag val */ - decoder->cached_context.type = AWS_CBOR_TYPE_MAX; - if (aws_cbor_decoder_peek_type(decoder, &next_type)) { - return AWS_OP_ERR; + dict = PyDict_New(); + if (!dict) { + return NULL; } - while (next_type != AWS_CBOR_TYPE_BREAK) { - if (aws_cbor_decoder_consume_next_data_item(decoder)) { - return AWS_OP_ERR; + /* Consume the inf array start */ + aws_cbor_decoder_consume_next_element(decoder, NULL /*consumed_type*/); + aws_cbor_decoder_peek_type(decoder, &out_type); + while (out_type != AWS_CBOR_TYPE_BREAK) { + key = s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); + value = s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); + if (!key || !value) { + goto error; } - if (aws_cbor_decoder_peek_type(decoder, &next_type)) { - return AWS_OP_ERR; + if (PyDict_SetItem(dict, key, value) == -1) { + goto error; + } + Py_DECREF(key); + Py_DECREF(value); + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + PyErr_AwsLastError(); + goto error; } } - break; + return dict; } - default: - break; + aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); + return PyErr_AwsLastError(); + } +error: + if (dict) { + Py_DECREF(dict); } + if (key) { + Py_DECREF(key); + } + if (value) { + Py_DECREF(value); + } + return NULL; +} - /* Done, just reset the cache */ - decoder->cached_context.type = AWS_CBOR_TYPE_MAX; - return AWS_OP_SUCCESS; +/** + * Generic helper to convert a cbor encoded data to PyObject + */ +static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_decoder *decoder) { + enum aws_cbor_element_type out_type = 0; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + switch (out_type) { + case AWS_CBOR_TYPE_UINT: + return s_cbor_decoder_pop_next_unsigned_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_NEGINT: { + /* The value from native code is -1 - val. */ + PyObject *minus_one = PyLong_FromLong(-1); + if (!minus_one) { + return NULL; + } + PyObject *val = s_cbor_decoder_pop_next_neg_val_to_pyobject(decoder); + if (!val) { + Py_DECREF(minus_one); + return NULL; + } + /* Get */ + PyObject *ret_val = PyNumber_Subtract(minus_one, val); + Py_DECREF(minus_one); + Py_DECREF(val); + return ret_val; + } + case AWS_CBOR_TYPE_DOUBLE: + return s_cbor_decoder_pop_next_double_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_BYTESTRING: + return s_cbor_decoder_pop_next_bytes_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_STRING: + return s_cbor_decoder_pop_next_str_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_BOOL: + return s_cbor_decoder_pop_next_boolean_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_NULL: + case AWS_CBOR_TYPE_UNDEFINE: + aws_cbor_decoder_consume_next_element(decoder, NULL); + Py_RETURN_NONE; + case AWS_CBOR_TYPE_MAP_START: + case AWS_CBOR_TYPE_INF_MAP_START: + return s_cbor_decoder_pop_next_data_item_to_py_dict(decoder); + case AWS_CBOR_TYPE_ARRAY_START: + case AWS_CBOR_TYPE_INF_ARRAY_START: + return s_cbor_decoder_pop_next_data_item_to_py_list(decoder); + case AWS_CBOR_TYPE_INF_BYTESTRING_START: + case AWS_CBOR_TYPE_INF_STRING_START: + case AWS_CBOR_TYPE_TAG: + /* TODO: handle those case */ + default: + aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); + return PyErr_AwsLastError(); + } + return NULL; } /*********************************** BINDINGS ***********************************************/ @@ -408,7 +539,17 @@ PyObject *aws_py_cbor_decoder_pop_next_tag_val(PyObject *self, PyObject *args) { return s_cbor_decoder_pop_next_tag_val_to_pyobject(decoder); } +PyObject *aws_py_cbor_decoder_pop_next_py_list(PyObject *self, PyObject *args) { + S_GET_DECODER(); + return s_cbor_decoder_pop_next_data_item_to_py_list(decoder); +} + +PyObject *aws_py_cbor_decoder_pop_next_py_dict(PyObject *self, PyObject *args) { + S_GET_DECODER(); + return s_cbor_decoder_pop_next_data_item_to_py_dict(decoder); +} + PyObject *aws_py_cbor_decoder_pop_next_data_item(PyObject *self, PyObject *args) { S_GET_DECODER(); - return s_cbor_decoder_pop_next_unsigned_val_to_pyobject(decoder); + return s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); } diff --git a/source/cbor.h b/source/cbor.h index 0bbe5994b..64b8e9d02 100644 --- a/source/cbor.h +++ b/source/cbor.h @@ -45,6 +45,8 @@ PyObject *aws_py_cbor_decoder_pop_next_array_start(PyObject *self, PyObject *arg PyObject *aws_py_cbor_decoder_pop_next_map_start(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_pop_next_tag_val(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_py_list(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_py_dict(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_pop_next_data_item(PyObject *self, PyObject *args); #endif /* AWS_CRT_PYTHON_CBOR_H */ diff --git a/source/module.c b/source/module.c index 7c2ef18fb..8cb645841 100644 --- a/source/module.c +++ b/source/module.c @@ -852,6 +852,8 @@ static PyMethodDef s_module_methods[] = { AWS_PY_METHOD_DEF(cbor_decoder_pop_next_array_start, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_map_start, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_tag_val, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_py_list, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_py_dict, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_data_item, METH_VARARGS), {NULL, NULL, 0, NULL}, }; From eefd3f0d7a0b0e9c499b95798da42502673c538b Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Mon, 22 Apr 2024 21:12:31 +0000 Subject: [PATCH 08/20] encoding rewrite --- awscrt/cbor.py | 100 ++++++++++-------- benchmark_cbor.py | 24 ++++- source/cbor.c | 257 +++++++++++++++++++++++++++++++++------------- source/cbor.h | 4 + source/module.c | 3 + test/test_cbor.py | 12 ++- 6 files changed, 284 insertions(+), 116 deletions(-) diff --git a/awscrt/cbor.py b/awscrt/cbor.py index 2a7cb60ac..3b10b5c1e 100644 --- a/awscrt/cbor.py +++ b/awscrt/cbor.py @@ -44,7 +44,7 @@ class AwsCborEncoder(NativeResource): def __init__(self): super().__init__() - self._binding = _awscrt.cbor_encoder_new() + self._binding = _awscrt.cbor_encoder_new(self) def get_encoded_data(self) -> bytes: """Return the current encoded data as bytes @@ -193,6 +193,24 @@ def write_list(self, val: list): for data_item in val: self.write_data_item(data_item) + def write_data_item_2(self, data_item: Any): + """Generic API to write any type of an data_item as cbor formatted. + TODO: timestamp <-> datetime?? Decimal fraction <-> decimal?? + + Args: + data_item (Any): any type of data_item. If the type is not supported to be converted to cbor format, ValueError will be raised. + """ + return _awscrt.cbor_encoder_write_data_item(self._binding, data_item) + + def print_key(self, key): + print(f"key: {key}") + + + def print_value(self, value): + print(f"value: {value}") + + def print_length(self, value): + print(f"length: {value}") class AwsCborDecoder(NativeResource): """ Decoder for CBOR """ @@ -283,48 +301,48 @@ def pop_next_inf_str(self) -> bytes: return result def pop_next_list(self) -> list: - return _awscrt.cbor_decoder_pop_next_py_list(self._binding) - # type = _awscrt.cbor_decoder_peek_type(self._binding) - # return_val = [] - # if type == AwsCborElementType.InfArray: - # # Consume the inf_array - # self.consume_next_element() - # while type != AwsCborElementType.Break: - # return_val.append(self.pop_next_data_item()) - # type = _awscrt.cbor_decoder_peek_type(self._binding) - # # Consume the break - # self.consume_next_element() - # return return_val - # elif type == AwsCborElementType.ArrayStart: - # number_elements = self.pop_next_array_start() - # for i in range(number_elements): - # return_val.append(self.pop_next_data_item()) - # return return_val - # else: - # raise ValueError("the cbor src is not a list to decode") + # return _awscrt.cbor_decoder_pop_next_py_list(self._binding) + type = _awscrt.cbor_decoder_peek_type(self._binding) + return_val = [] + if type == AwsCborElementType.InfArray: + # Consume the inf_array + self.consume_next_element() + while type != AwsCborElementType.Break: + return_val.append(self.pop_next_data_item()) + type = _awscrt.cbor_decoder_peek_type(self._binding) + # Consume the break + self.consume_next_element() + return return_val + elif type == AwsCborElementType.ArrayStart: + number_elements = self.pop_next_array_start() + for i in range(number_elements): + return_val.append(self.pop_next_data_item()) + return return_val + else: + raise ValueError("the cbor src is not a list to decode") def pop_next_map(self) -> dict: - return _awscrt.cbor_decoder_pop_next_py_dict(self._binding) - # type = _awscrt.cbor_decoder_peek_type(self._binding) - # return_val = {} - # if type == AwsCborElementType.InfMap: - # # Consume the inf_map - # self.consume_next_element() - # while type != AwsCborElementType.Break: - # return_val[self.pop_next_data_item()] = self.pop_next_data_item() - # type = _awscrt.cbor_decoder_peek_type(self._binding) - # # Consume the break - # self.consume_next_element() - # return return_val - # elif type == AwsCborElementType.MapStart: - # number_elements = self.pop_next_map_start() - # for i in range(number_elements): - # key = self.pop_next_data_item() - # value = self.pop_next_data_item() - # return_val[key] = value - # return return_val - # else: - # raise ValueError("the cbor src is not a map to decode") + # return _awscrt.cbor_decoder_pop_next_py_dict(self._binding) + type = _awscrt.cbor_decoder_peek_type(self._binding) + return_val = {} + if type == AwsCborElementType.InfMap: + # Consume the inf_map + self.consume_next_element() + while type != AwsCborElementType.Break: + return_val[self.pop_next_data_item()] = self.pop_next_data_item() + type = _awscrt.cbor_decoder_peek_type(self._binding) + # Consume the break + self.consume_next_element() + return return_val + elif type == AwsCborElementType.MapStart: + number_elements = self.pop_next_map_start() + for i in range(number_elements): + key = self.pop_next_data_item() + value = self.pop_next_data_item() + return_val[key] = value + return return_val + else: + raise ValueError("the cbor src is not a map to decode") def pop_next_data_item(self) -> Any: # TODO: timestamp, decimal fraction diff --git a/benchmark_cbor.py b/benchmark_cbor.py index 655fa6a32..035468a74 100644 --- a/benchmark_cbor.py +++ b/benchmark_cbor.py @@ -44,6 +44,8 @@ def random_number(r, n): t = TestData.test_hash(100000) +# print(t) + print("cbor2 -- encode") run_start_ns = time.perf_counter_ns() cbor2_encoded = cbor2.dumps(t) @@ -52,6 +54,21 @@ def random_number(r, n): print(f"time passed: {run_secs} secs") +print("CRT -- encode 2") +encoder_2 = AwsCborEncoder() + +run_start_ns = time.perf_counter_ns() +try: + encoder_2.write_data_item_2(t) + encoded_2 = encoder_2.get_encoded_data() +except Exception as e: + print(e) + +run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) +print(f"encoded MB: {bytes_to_MiB(len(encoded_2))}") +print(f"time passed: {run_secs} secs") + + print("CRT -- encode") encoder = AwsCborEncoder() @@ -62,7 +79,9 @@ def random_number(r, n): print(f"encoded MB: {bytes_to_MiB(len(encoded))}") print(f"time passed: {run_secs} secs") + print(cbor2_encoded == encoded) +print(cbor2_encoded == encoded_2) print("cbor2 -- decode") run_start_ns = time.perf_counter_ns() @@ -82,10 +101,13 @@ def random_number(r, n): print("CRT -- decode 2") run_start_ns = time.perf_counter_ns() decoder_2 = AwsCborDecoder(encoded) -crt_decoded = decoder_2.pop_next_data_item_2() +crt_decoded_2 = decoder_2.pop_next_data_item_2() run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) print(f"time passed: {run_secs} secs") print(crt_decoded == t) print(crt_decoded == decoded) + +print(crt_decoded_2 == t) +print(crt_decoded_2 == decoded) diff --git a/source/cbor.c b/source/cbor.c index da389bcac..3ad7440da 100644 --- a/source/cbor.c +++ b/source/cbor.c @@ -10,32 +10,55 @@ * ENCODE ******************************************************************************/ +struct encoder_binding { + struct aws_cbor_encoder *native; + + /* This reference is solely used for invoking callbacks, + * and is cleared after the final callback is invoked. + * If it were not cleared, circular references between the python object + * and its binding would prevent the GC from ever cleaning things up */ + PyObject *self_py; +}; + static const char *s_capsule_name_cbor_encoder = "aws_cbor_encoder"; static struct aws_cbor_encoder *s_cbor_encoder_from_capsule(PyObject *py_capsule) { - return PyCapsule_GetPointer(py_capsule, s_capsule_name_cbor_encoder); + struct encoder_binding *binding = PyCapsule_GetPointer(py_capsule, s_capsule_name_cbor_encoder); + if (!binding) { + return NULL; + } + return binding->native; } /* Runs when GC destroys the capsule */ static void s_cbor_encoder_capsule_destructor(PyObject *py_capsule) { - struct aws_cbor_encoder *encoder = s_cbor_encoder_from_capsule(py_capsule); - aws_cbor_encoder_release(encoder); + struct encoder_binding *binding = PyCapsule_GetPointer(py_capsule, s_capsule_name_cbor_encoder); + aws_cbor_encoder_release(binding->native); + aws_mem_release(aws_py_get_allocator(), binding); } PyObject *aws_py_cbor_encoder_new(PyObject *self, PyObject *args) { (void)self; - (void)args; + PyObject *py_self; + if (!PyArg_ParseTuple(args, "O", &py_self)) { + return NULL; + } - struct aws_cbor_encoder *encoder = aws_cbor_encoder_new(aws_py_get_allocator(), NULL); + struct encoder_binding *binding = aws_mem_calloc(aws_py_get_allocator(), 1, sizeof(struct encoder_binding)); + binding->native = aws_cbor_encoder_new(aws_py_get_allocator(), NULL); AWS_ASSERT(encoder != NULL); - PyObject *py_capsule = PyCapsule_New(encoder, s_capsule_name_cbor_encoder, s_cbor_encoder_capsule_destructor); + PyObject *py_capsule = PyCapsule_New(binding, s_capsule_name_cbor_encoder, s_cbor_encoder_capsule_destructor); if (!py_capsule) { - aws_cbor_encoder_release(encoder); + aws_cbor_encoder_release(binding->native); + aws_mem_release(aws_py_get_allocator(), binding); return NULL; } + /* The binding and the py_object have the same life time */ + binding->self_py = py_self; return py_capsule; } + #define S_ENCODER_METHOD_START(FMT, ...) \ (void)self; \ PyObject *py_capsule; \ @@ -65,101 +88,79 @@ PyObject *aws_py_cbor_encoder_get_encoded_data(PyObject *self, PyObject *args) { return PyBytes_FromStringAndSize((const char *)encoded_data.ptr, encoded_data.len); } +#define S_ENCODER_WRITE_PYOBJECT(ctype, py_conversion, field) \ + static PyObject *s_cbor_encoder_write_pyobject_as_##field(struct aws_cbor_encoder *encoder, PyObject *py_object) { \ + ctype data = py_conversion(py_object); \ + if (PyErr_Occurred()) { \ + return NULL; \ + } \ + aws_cbor_encoder_write_##field(encoder, data); \ + Py_RETURN_NONE; \ + } + +S_ENCODER_WRITE_PYOBJECT(uint64_t, PyLong_AsUnsignedLongLong, uint) +S_ENCODER_WRITE_PYOBJECT(uint64_t, PyLong_AsUnsignedLongLong, negint) +S_ENCODER_WRITE_PYOBJECT(double, PyFloat_AsDouble, double) +S_ENCODER_WRITE_PYOBJECT(struct aws_byte_cursor, aws_byte_cursor_from_pybytes, bytes) +S_ENCODER_WRITE_PYOBJECT(struct aws_byte_cursor, aws_byte_cursor_from_pyunicode, string) +S_ENCODER_WRITE_PYOBJECT(bool, PyObject_IsTrue, bool) + +S_ENCODER_WRITE_PYOBJECT(uint64_t, PyLong_AsUnsignedLongLong, array_start) +S_ENCODER_WRITE_PYOBJECT(uint64_t, PyLong_AsUnsignedLongLong, map_start) +S_ENCODER_WRITE_PYOBJECT(uint64_t, PyLong_AsUnsignedLongLong, tag) + PyObject *aws_py_cbor_encoder_write_unsigned_int(PyObject *self, PyObject *args) { PyObject *pylong; S_ENCODER_METHOD_START("O", &pylong); - uint64_t data = PyLong_AsUnsignedLongLong(pylong); - if (PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.write_int is not a valid int to encode"); - return NULL; - } - aws_cbor_encoder_write_uint(encoder, data); - Py_RETURN_NONE; + return s_cbor_encoder_write_pyobject_as_uint(encoder, pylong); } PyObject *aws_py_cbor_encoder_write_negative_int(PyObject *self, PyObject *args) { PyObject *pylong; S_ENCODER_METHOD_START("O", &pylong); - uint64_t data = PyLong_AsUnsignedLongLong(pylong); - if (PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.write_int is not a valid int to encode"); - return NULL; - } - aws_cbor_encoder_write_negint(encoder, data); - Py_RETURN_NONE; + return s_cbor_encoder_write_pyobject_as_negint(encoder, pylong); } PyObject *aws_py_cbor_encoder_write_float(PyObject *self, PyObject *args) { PyObject *pyfloat; S_ENCODER_METHOD_START("O", &pyfloat); - double data = PyFloat_AsDouble(pyfloat); - /* Rely on the python convert to check the pyfloat is able to convert to double. */ - if (PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.write_float is not a valid double to encode"); - return NULL; - } - aws_cbor_encoder_write_double(encoder, data); - Py_RETURN_NONE; + return s_cbor_encoder_write_pyobject_as_double(encoder, pyfloat); } PyObject *aws_py_cbor_encoder_write_bytes(PyObject *self, PyObject *args) { - struct aws_byte_cursor bytes_data; - S_ENCODER_METHOD_START("y#", &bytes_data.ptr, &bytes_data.len); - aws_cbor_encoder_write_bytes(encoder, bytes_data); - Py_RETURN_NONE; + PyObject *py_bytes; + S_ENCODER_METHOD_START("O", &py_bytes); + return s_cbor_encoder_write_pyobject_as_bytes(encoder, py_bytes); } PyObject *aws_py_cbor_encoder_write_str(PyObject *self, PyObject *args) { - struct aws_byte_cursor str_data; - S_ENCODER_METHOD_START("s#", &str_data.ptr, &str_data.len); - aws_cbor_encoder_write_string(encoder, str_data); - Py_RETURN_NONE; + PyObject *py_str; + S_ENCODER_METHOD_START("O", &py_str); + return s_cbor_encoder_write_pyobject_as_string(encoder, py_str); } PyObject *aws_py_cbor_encoder_write_array_start(PyObject *self, PyObject *args) { PyObject *pylong; S_ENCODER_METHOD_START("O", &pylong); - uint64_t data = PyLong_AsUnsignedLongLong(pylong); - /* The python code has already checked the value */ - if (PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.write_array_start is not a valid int to encode"); - return NULL; - } - aws_cbor_encoder_write_array_start(encoder, data); - Py_RETURN_NONE; + return s_cbor_encoder_write_pyobject_as_array_start(encoder, pylong); } PyObject *aws_py_cbor_encoder_write_map_start(PyObject *self, PyObject *args) { PyObject *pylong; S_ENCODER_METHOD_START("O", &pylong); - uint64_t data = PyLong_AsUnsignedLongLong(pylong); - /* The python code has already checked the value */ - if (PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.write_map_start is not a valid int to encode"); - return NULL; - } - aws_cbor_encoder_write_map_start(encoder, data); - Py_RETURN_NONE; + return s_cbor_encoder_write_pyobject_as_map_start(encoder, pylong); } PyObject *aws_py_cbor_encoder_write_tag(PyObject *self, PyObject *args) { PyObject *pylong; S_ENCODER_METHOD_START("O", &pylong); - uint64_t data = PyLong_AsUnsignedLongLong(pylong); - /* The python code has already checked the value */ - if (PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "AwsCborEncoder.write_tag is not a valid int to encode"); - return NULL; - } - aws_cbor_encoder_write_tag(encoder, data); - Py_RETURN_NONE; + return s_cbor_encoder_write_pyobject_as_tag(encoder, pylong); } PyObject *aws_py_cbor_encoder_write_bool(PyObject *self, PyObject *args) { - int bool_val; - S_ENCODER_METHOD_START("p", &bool_val); - aws_cbor_encoder_write_bool(encoder, bool_val); - Py_RETURN_NONE; + PyObject *pybool; + S_ENCODER_METHOD_START("O", &pybool); + return s_cbor_encoder_write_pyobject_as_bool(encoder, pybool); } PyObject *aws_py_cbor_encoder_write_simple_types(PyObject *self, PyObject *args) { @@ -172,11 +173,128 @@ PyObject *aws_py_cbor_encoder_write_simple_types(PyObject *self, PyObject *args) default: Py_RETURN_NONE; - break; } Py_RETURN_NONE; } +static PyObject *s_cbor_encoder_write_pyobject(struct encoder_binding *encoder_binding, PyObject *py_object); + +static PyObject *s_cbor_encoder_write_pylong(struct encoder_binding *encoder_binding, PyObject *py_object) { + long val; + int overflow; + + val = PyLong_AsLongAndOverflow(py_object, &overflow); + if (overflow == 0) { + /* No overflow, just call into C */ + if (val >= 0) { + aws_cbor_encoder_write_uint(encoder_binding->native, (uint64_t)val); + } else { + aws_cbor_encoder_write_negint(encoder_binding->native, -1 - val); + } + } else { + /* TODO: handle it in C? */ + return PyObject_CallMethod(encoder_binding->self_py, "write_int", "(O)", py_object); + } + Py_RETURN_NONE; +} + +static PyObject *s_cbor_encoder_write_pylist(struct encoder_binding *encoder_binding, PyObject *py_list) { + Py_ssize_t size = PyList_Size(py_list); + aws_cbor_encoder_write_array_start(encoder_binding->native, (size_t)size); + for (Py_ssize_t i = 0; i < size; i++) { + PyObject *item = PyList_GetItem(py_list, i); + if (!item) { + return NULL; + } + s_cbor_encoder_write_pyobject(encoder_binding, item); + } + Py_RETURN_NONE; +} + +static PyObject *s_cbor_encoder_write_pydict(struct encoder_binding *encoder_binding, PyObject *py_dict) { + Py_ssize_t size = PyDict_Size(py_dict); + aws_cbor_encoder_write_map_start(encoder_binding->native, (size_t)size); + PyObject *key = NULL; + PyObject *value = NULL; + Py_ssize_t pos = 0; + + while (PyDict_Next(py_dict, &pos, &key, &value)) { + s_cbor_encoder_write_pyobject(encoder_binding, key); + s_cbor_encoder_write_pyobject(encoder_binding, value); + } + Py_RETURN_NONE; +} + +static PyObject *s_cbor_encoder_write_pyobject(struct encoder_binding *encoder_binding, PyObject *py_object) { + + if (PyLong_CheckExact(py_object)) { + /* Call to Python to write pylong, as it's too complicate */ + return s_cbor_encoder_write_pylong(encoder_binding, py_object); + } else if (PyFloat_CheckExact(py_object)) { + return s_cbor_encoder_write_pyobject_as_double(encoder_binding->native, py_object); + } else if (PyBool_Check(py_object)) { + return s_cbor_encoder_write_pyobject_as_bool(encoder_binding->native, py_object); + } else if (PyBytes_CheckExact(py_object)) { + return s_cbor_encoder_write_pyobject_as_bytes(encoder_binding->native, py_object); + } else if (PyUnicode_CheckExact(py_object)) { + return s_cbor_encoder_write_pyobject_as_string(encoder_binding->native, py_object); + } else if (PyList_CheckExact(py_object)) { + /* Write py_list */ + return s_cbor_encoder_write_pylist(encoder_binding, py_object); + } else if (PyDict_CheckExact(py_object)) { + /* Write py_dict */ + return s_cbor_encoder_write_pydict(encoder_binding, py_object); + } else if (py_object == Py_None) { + aws_cbor_encoder_write_null(encoder_binding->native); + } else { + PyErr_Format(PyExc_ValueError, "Not supported type %R", (PyObject *)Py_TYPE(py_object)); + } + + Py_RETURN_NONE; +} + +PyObject *aws_py_cbor_encoder_write_py_list(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_object; + PyObject *py_capsule; + if (!PyArg_ParseTuple(args, "OO", &py_capsule, &py_object)) { + return NULL; + } + struct encoder_binding *binding = PyCapsule_GetPointer(py_capsule, s_capsule_name_cbor_encoder); + if (!binding) { + return NULL; + } + return s_cbor_encoder_write_pylist(binding, py_object); +} + +PyObject *aws_py_cbor_encoder_write_py_dict(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_object; + PyObject *py_capsule; + if (!PyArg_ParseTuple(args, "OO", &py_capsule, &py_object)) { + return NULL; + } + struct encoder_binding *binding = PyCapsule_GetPointer(py_capsule, s_capsule_name_cbor_encoder); + if (!binding) { + return NULL; + } + return s_cbor_encoder_write_pydict(binding, py_object); +} + +PyObject *aws_py_cbor_encoder_write_data_item(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_object; + PyObject *py_capsule; + if (!PyArg_ParseTuple(args, "OO", &py_capsule, &py_object)) { + return NULL; + } + struct encoder_binding *binding = PyCapsule_GetPointer(py_capsule, s_capsule_name_cbor_encoder); + if (!binding) { + return NULL; + } + return s_cbor_encoder_write_pyobject(binding, py_object); +} + /******************************************************************************* * DECODE ******************************************************************************/ @@ -262,7 +380,6 @@ PyObject *aws_py_cbor_decoder_get_remaining_bytes_len(PyObject *self, PyObject * PyObject *aws_py_cbor_decoder_consume_next_element(PyObject *self, PyObject *args) { enum aws_cbor_element_type out_type; S_DECODER_METHOD_START(aws_cbor_decoder_consume_next_element, out_type); - /* TODO: an convert from C type to the Python type */ Py_RETURN_NONE; } @@ -274,8 +391,6 @@ PyObject *aws_py_cbor_decoder_consume_next_data_item(PyObject *self, PyObject *a Py_RETURN_NONE; } -// static PyObject *s_decode(struct aws_cbor_decoder *decoder) {} - S_POP_NEXT_TO_PYOBJECT(uint64_t, unsigned_val, PyLong_FromUnsignedLongLong) S_POP_NEXT_TO_PYOBJECT(uint64_t, neg_val, PyLong_FromUnsignedLongLong) S_POP_NEXT_TO_PYOBJECT(double, double_val, PyFloat_FromDouble) @@ -289,7 +404,7 @@ S_POP_NEXT_TO_PYOBJECT(uint64_t, tag_val, PyLong_FromUnsignedLongLong) static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_decoder *decoder); /** - * Generic helper to convert next data item to py_list + * helper to convert next data item to py_list */ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_list(struct aws_cbor_decoder *decoder) { enum aws_cbor_element_type out_type = 0; @@ -356,7 +471,7 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_list(struct aws_cbor_de } /** - * Generic helper to convert next data item to py_dict + * helper to convert next data item to py_dict */ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_dict(struct aws_cbor_decoder *decoder) { enum aws_cbor_element_type out_type = 0; diff --git a/source/cbor.h b/source/cbor.h index 64b8e9d02..fbf62aade 100644 --- a/source/cbor.h +++ b/source/cbor.h @@ -26,6 +26,10 @@ PyObject *aws_py_cbor_encoder_write_bool(PyObject *self, PyObject *args); /* Encode the types without value needed. The arg is the type to encode. */ PyObject *aws_py_cbor_encoder_write_simple_types(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_py_list(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_py_dict(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_data_item(PyObject *self, PyObject *args); + /******************************************************************************* * DECODE ******************************************************************************/ diff --git a/source/module.c b/source/module.c index 8cb645841..c7ed96730 100644 --- a/source/module.c +++ b/source/module.c @@ -836,6 +836,9 @@ static PyMethodDef s_module_methods[] = { AWS_PY_METHOD_DEF(cbor_encoder_write_tag, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_encoder_write_bool, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_encoder_write_simple_types, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_py_list, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_py_dict, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_data_item, METH_VARARGS), /* CBOR Decode */ AWS_PY_METHOD_DEF(cbor_decoder_new, METH_VARARGS), diff --git a/test/test_cbor.py b/test/test_cbor.py index 0bac1e454..1d0dec18a 100644 --- a/test/test_cbor.py +++ b/test/test_cbor.py @@ -49,12 +49,18 @@ def test_cbor_encode_decode_data_item(self): b"test_more": another_map, 2: { 2.3: ["a", "b", "c"] - } + }, + "empty map": {}, + "empty array": [], + "True": True, + "False": False, } - encoder.write_data_item(val_to_write) + encoder.write_data_item_2(val_to_write) decoder = AwsCborDecoder(encoder.get_encoded_data()) # Temp val only for easier to debug. t = decoder.pop_next_data_item() - self.assertEqual(val_to_write, t) + print(t) + print(val_to_write) + self.assertEqual(val_to_write, t) \ No newline at end of file From 9623e51490a8a09761bc0a1ccf88ee24c19e1dc9 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Mon, 22 Apr 2024 14:18:56 -0700 Subject: [PATCH 09/20] add empty str and empty bytes --- test/test_cbor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_cbor.py b/test/test_cbor.py index 1d0dec18a..7c0bc4ccb 100644 --- a/test/test_cbor.py +++ b/test/test_cbor.py @@ -54,6 +54,8 @@ def test_cbor_encode_decode_data_item(self): "empty array": [], "True": True, "False": False, + "empty str": "", + "empty bytes": b"", } encoder.write_data_item_2(val_to_write) @@ -61,6 +63,4 @@ def test_cbor_encode_decode_data_item(self): # Temp val only for easier to debug. t = decoder.pop_next_data_item() - print(t) - print(val_to_write) - self.assertEqual(val_to_write, t) \ No newline at end of file + self.assertEqual(val_to_write, t) From d1dd6ad1fa74d904b1033dadf7f7be6edd591a57 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Mon, 22 Apr 2024 21:40:16 +0000 Subject: [PATCH 10/20] reordering --- awscrt/cbor.py | 23 ++++------------------- benchmark_cbor.py | 25 ++++++++++++------------- source/cbor.c | 10 +++++----- 3 files changed, 21 insertions(+), 37 deletions(-) diff --git a/awscrt/cbor.py b/awscrt/cbor.py index 3b10b5c1e..4d43cf958 100644 --- a/awscrt/cbor.py +++ b/awscrt/cbor.py @@ -182,16 +182,11 @@ def write_data_item(self, data_item: Any): else: raise ValueError(f"not supported type for data_item: {data_item}") - def write_dict(self, val: dict): - self.write_map_start(len(val)) - for key, value in val.items(): - self.write_data_item(key) - self.write_data_item(value) - def write_list(self, val: list): - self.write_array_start(len(val)) - for data_item in val: - self.write_data_item(data_item) + return _awscrt.cbor_encoder_write_py_list(self._binding, val) + + def write_dict(self, val: dict): + return _awscrt.cbor_encoder_write_py_dict(self._binding, val) def write_data_item_2(self, data_item: Any): """Generic API to write any type of an data_item as cbor formatted. @@ -202,16 +197,6 @@ def write_data_item_2(self, data_item: Any): """ return _awscrt.cbor_encoder_write_data_item(self._binding, data_item) - def print_key(self, key): - print(f"key: {key}") - - - def print_value(self, value): - print(f"value: {value}") - - def print_length(self, value): - print(f"length: {value}") - class AwsCborDecoder(NativeResource): """ Decoder for CBOR """ diff --git a/benchmark_cbor.py b/benchmark_cbor.py index 035468a74..0bdb564af 100644 --- a/benchmark_cbor.py +++ b/benchmark_cbor.py @@ -54,32 +54,31 @@ def random_number(r, n): print(f"time passed: {run_secs} secs") + +print("CRT -- encode") +encoder = AwsCborEncoder() + +run_start_ns = time.perf_counter_ns() +encoder.write_data_item(t) +encoded = encoder.get_encoded_data() +run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) +print(f"encoded MB: {bytes_to_MiB(len(encoded))}") +print(f"time passed: {run_secs} secs") + + print("CRT -- encode 2") encoder_2 = AwsCborEncoder() - run_start_ns = time.perf_counter_ns() try: encoder_2.write_data_item_2(t) encoded_2 = encoder_2.get_encoded_data() except Exception as e: print(e) - run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) print(f"encoded MB: {bytes_to_MiB(len(encoded_2))}") print(f"time passed: {run_secs} secs") -print("CRT -- encode") -encoder = AwsCborEncoder() - -run_start_ns = time.perf_counter_ns() -encoder.write_data_item(t) -encoded = encoder.get_encoded_data() -run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) -print(f"encoded MB: {bytes_to_MiB(len(encoded))}") -print(f"time passed: {run_secs} secs") - - print(cbor2_encoded == encoded) print(cbor2_encoded == encoded_2) diff --git a/source/cbor.c b/source/cbor.c index 3ad7440da..974726856 100644 --- a/source/cbor.c +++ b/source/cbor.c @@ -13,10 +13,7 @@ struct encoder_binding { struct aws_cbor_encoder *native; - /* This reference is solely used for invoking callbacks, - * and is cleared after the final callback is invoked. - * If it were not cleared, circular references between the python object - * and its binding would prevent the GC from ever cleaning things up */ + /* Encoder has simple lifetime, no async/multi-thread allowed. */ PyObject *self_py; }; @@ -227,6 +224,9 @@ static PyObject *s_cbor_encoder_write_pydict(struct encoder_binding *encoder_bin static PyObject *s_cbor_encoder_write_pyobject(struct encoder_binding *encoder_binding, PyObject *py_object) { + /** + * TODO: timestamp <-> datetime?? Decimal fraction <-> decimal?? + */ if (PyLong_CheckExact(py_object)) { /* Call to Python to write pylong, as it's too complicate */ return s_cbor_encoder_write_pylong(encoder_binding, py_object); @@ -599,7 +599,7 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_d case AWS_CBOR_TYPE_INF_BYTESTRING_START: case AWS_CBOR_TYPE_INF_STRING_START: case AWS_CBOR_TYPE_TAG: - /* TODO: handle those case */ + /* TODO: handle those case. Give more detail of unhandled tags */ default: aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); return PyErr_AwsLastError(); From f4a5730ea70576290472ac26027ed168871e0842 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Mon, 22 Apr 2024 16:42:21 -0700 Subject: [PATCH 11/20] add more supports --- awscrt/cbor.py | 12 ++++- benchmark_cbor.py | 4 -- crt/aws-c-common | 2 +- source/cbor.c | 112 +++++++++++++++++++++++++++++++++++++++++++--- test/test_cbor.py | 8 ++-- 5 files changed, 122 insertions(+), 16 deletions(-) diff --git a/awscrt/cbor.py b/awscrt/cbor.py index 4d43cf958..4f2fdcf97 100644 --- a/awscrt/cbor.py +++ b/awscrt/cbor.py @@ -183,10 +183,17 @@ def write_data_item(self, data_item: Any): raise ValueError(f"not supported type for data_item: {data_item}") def write_list(self, val: list): - return _awscrt.cbor_encoder_write_py_list(self._binding, val) + # return _awscrt.cbor_encoder_write_py_list(self._binding, val) + self.write_array_start(len(val)) + for data_item in val: + self.write_data_item(data_item) def write_dict(self, val: dict): - return _awscrt.cbor_encoder_write_py_dict(self._binding, val) + # return _awscrt.cbor_encoder_write_py_dict(self._binding, val) + self.write_map_start(len(val)) + for key, value in val.items(): + self.write_data_item(key) + self.write_data_item(value) def write_data_item_2(self, data_item: Any): """Generic API to write any type of an data_item as cbor formatted. @@ -197,6 +204,7 @@ def write_data_item_2(self, data_item: Any): """ return _awscrt.cbor_encoder_write_data_item(self._binding, data_item) + class AwsCborDecoder(NativeResource): """ Decoder for CBOR """ diff --git a/benchmark_cbor.py b/benchmark_cbor.py index 0bdb564af..1486b375b 100644 --- a/benchmark_cbor.py +++ b/benchmark_cbor.py @@ -54,7 +54,6 @@ def random_number(r, n): print(f"time passed: {run_secs} secs") - print("CRT -- encode") encoder = AwsCborEncoder() @@ -106,7 +105,4 @@ def random_number(r, n): print(f"time passed: {run_secs} secs") print(crt_decoded == t) -print(crt_decoded == decoded) - print(crt_decoded_2 == t) -print(crt_decoded_2 == decoded) diff --git a/crt/aws-c-common b/crt/aws-c-common index 3859f2737..6ee522700 160000 --- a/crt/aws-c-common +++ b/crt/aws-c-common @@ -1 +1 @@ -Subproject commit 3859f27370996f0f16abb129f4f6b54c89c22147 +Subproject commit 6ee522700fa329a0d729aa7bef4af94e2231f4b2 diff --git a/source/cbor.c b/source/cbor.c index 974726856..fc754c367 100644 --- a/source/cbor.c +++ b/source/cbor.c @@ -407,7 +407,7 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_d * helper to convert next data item to py_list */ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_list(struct aws_cbor_decoder *decoder) { - enum aws_cbor_element_type out_type = 0; + enum aws_cbor_element_type out_type = AWS_CBOR_TYPE_MAX; if (aws_cbor_decoder_peek_type(decoder, &out_type)) { return PyErr_AwsLastError(); } @@ -474,7 +474,7 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_list(struct aws_cbor_de * helper to convert next data item to py_dict */ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_dict(struct aws_cbor_decoder *decoder) { - enum aws_cbor_element_type out_type = 0; + enum aws_cbor_element_type out_type = AWS_CBOR_TYPE_MAX; if (aws_cbor_decoder_peek_type(decoder, &out_type)) { return PyErr_AwsLastError(); } @@ -550,11 +550,109 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_dict(struct aws_cbor_de return NULL; } +/** + * helper to get the next inf byte + */ +static PyObject *s_cbor_decoder_pop_next_inf_bytes_to_py_bytes(struct aws_cbor_decoder *decoder) { + enum aws_cbor_element_type out_type = AWS_CBOR_TYPE_MAX; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + if (out_type != AWS_CBOR_TYPE_INF_BYTESTRING_START) { + aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); + return PyErr_AwsLastError(); + } + /* consume the bytes start element */ + aws_cbor_decoder_consume_next_element(decoder, NULL); + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + /* Empty bytes */ + PyObject *result = PyBytes_FromStringAndSize(NULL, 0); + while (out_type != AWS_CBOR_TYPE_BREAK) { + PyObject *next_part = s_cbor_decoder_pop_next_bytes_val_to_pyobject(decoder); + if (!next_part) { + Py_DECREF(result); + return NULL; + } + /* The reference to the old value of bytes will be stolen and next_part will be del. */ + PyBytes_ConcatAndDel(&result, next_part); + if (!result) { + return NULL; + } + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + } + return result; +} + +/** + * helper to get the next inf string + */ +static PyObject *s_cbor_decoder_pop_next_inf_string_to_py_str(struct aws_cbor_decoder *decoder) { + enum aws_cbor_element_type out_type = AWS_CBOR_TYPE_MAX; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + if (out_type != AWS_CBOR_TYPE_INF_STRING_START) { + aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); + return PyErr_AwsLastError(); + } + /* consume the bytes start element */ + aws_cbor_decoder_consume_next_element(decoder, NULL); + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + /* Empty string */ + PyObject *result = PyUnicode_FromStringAndSize(NULL, 0); + while (out_type != AWS_CBOR_TYPE_BREAK) { + PyObject *next_part = s_cbor_decoder_pop_next_str_val_to_pyobject(decoder); + if (!next_part) { + Py_DECREF(result); + return NULL; + } + /* Returns a new reference and keep the arguments unchanged. */ + PyObject *concat_val = PyUnicode_Concat(result, next_part); + Py_DECREF(result); + Py_DECREF(next_part); + if (!concat_val) { + return NULL; + } + result = concat_val; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + } + return result; +} + +/** + * Generic helper to convert a cbor encoded data to PyObject + */ +static PyObject *s_cbor_decoder_pop_next_tag_to_pyobject(struct aws_cbor_decoder *decoder) { + uint64_t out_tag_val = 0; + if (aws_cbor_decoder_pop_next_tag_val(decoder, &out_tag_val)) { + return PyErr_AwsLastError(); + } + /* TODO: implement those tags */ + switch (out_tag_val) { + case AWS_CBOR_TAG_EPOCH_TIME: + case AWS_CBOR_TAG_UNSIGNED_BIGNUM: + case AWS_CBOR_TAG_NEGATIVE_BIGNUM: + case AWS_CBOR_TAG_DECIMAL_FRACTION: + default: + PyErr_SetString(PyExc_ValueError, "Unsupported tag value: %" PRIu64 ".", out_tag_val); + return NULL; + } + Py_RETURN_NONE; +} + /** * Generic helper to convert a cbor encoded data to PyObject */ static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_decoder *decoder) { - enum aws_cbor_element_type out_type = 0; + enum aws_cbor_element_type out_type = AWS_CBOR_TYPE_MAX; if (aws_cbor_decoder_peek_type(decoder, &out_type)) { return PyErr_AwsLastError(); } @@ -572,7 +670,6 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_d Py_DECREF(minus_one); return NULL; } - /* Get */ PyObject *ret_val = PyNumber_Subtract(minus_one, val); Py_DECREF(minus_one); Py_DECREF(val); @@ -587,19 +684,24 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_d case AWS_CBOR_TYPE_BOOL: return s_cbor_decoder_pop_next_boolean_val_to_pyobject(decoder); case AWS_CBOR_TYPE_NULL: + /* fall through */ case AWS_CBOR_TYPE_UNDEFINE: aws_cbor_decoder_consume_next_element(decoder, NULL); Py_RETURN_NONE; case AWS_CBOR_TYPE_MAP_START: + /* fall through */ case AWS_CBOR_TYPE_INF_MAP_START: return s_cbor_decoder_pop_next_data_item_to_py_dict(decoder); case AWS_CBOR_TYPE_ARRAY_START: + /* fall through */ case AWS_CBOR_TYPE_INF_ARRAY_START: return s_cbor_decoder_pop_next_data_item_to_py_list(decoder); case AWS_CBOR_TYPE_INF_BYTESTRING_START: + return s_cbor_decoder_pop_next_inf_bytes_to_py_bytes(decoder); case AWS_CBOR_TYPE_INF_STRING_START: + return s_cbor_decoder_pop_next_inf_string_to_py_str(decoder); case AWS_CBOR_TYPE_TAG: - /* TODO: handle those case. Give more detail of unhandled tags */ + return s_cbor_decoder_pop_next_tag_to_pyobject(decoder); default: aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); return PyErr_AwsLastError(); diff --git a/test/test_cbor.py b/test/test_cbor.py index 7c0bc4ccb..c3b162333 100644 --- a/test/test_cbor.py +++ b/test/test_cbor.py @@ -39,10 +39,10 @@ def test_cbor_encode_decode_data_item(self): encoder = AwsCborEncoder() numerics = [-100.12, 100.0, -100, 100, 2**64 - 1, -2**64, 18446744073709551616.0] another_map = { - "bignum": 2**65, - "negative bignum": -2**75, - 2**65: [1, 2, 3], - -2**65: [1, ["2", b"3"], {"most complicated": numerics}, 2**65, -2**75] + # "bignum": 2**65, TODO: big number are not supported from C impl yet. + # "negative bignum": -2**75, + 2**6: [1, 2, 3], + -2**6: [1, ["2", b"3"], {"most complicated": numerics}, 2**6, -2**7] } val_to_write = { "mytest": b"write_test", From 7a6709996e93f4bd1fd3301756277695a7a79e94 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Mon, 22 Apr 2024 16:48:45 -0700 Subject: [PATCH 12/20] clean up the impl --- awscrt/cbor.py | 166 ++-------------------------------------------- benchmark_cbor.py | 108 ------------------------------ source/cbor.c | 2 +- test/test_cbor.py | 8 ++- 4 files changed, 12 insertions(+), 272 deletions(-) delete mode 100644 benchmark_cbor.py diff --git a/awscrt/cbor.py b/awscrt/cbor.py index 4f2fdcf97..33121d022 100644 --- a/awscrt/cbor.py +++ b/awscrt/cbor.py @@ -55,6 +55,7 @@ def get_encoded_data(self) -> bytes: return _awscrt.cbor_encoder_get_encoded_data(self._binding) def write_int(self, val: int): + # TODO: maybe not support bignum for now. Not needed? """Write an int as cbor formatted, val less than -2^64 will be encoded as Negative bignum for CBOR val between -2^64 to -1, inclusive, will be encode as negative integer for CBOR @@ -156,46 +157,13 @@ def write_null(self): def write_bool(self, val: bool): return _awscrt.cbor_encoder_write_bool(self._binding, val) - def write_data_item(self, data_item: Any): - """Generic API to write any type of an data_item as cbor formatted. - TODO: timestamp <-> datetime?? Decimal fraction <-> decimal?? - - Args: - data_item (Any): any type of data_item. If the type is not supported to be converted to cbor format, ValueError will be raised. - """ - if isinstance(data_item, str): - self.write_string(data_item) - elif isinstance(data_item, bytes): - self.write_bytes(data_item) - elif isinstance(data_item, int): - self.write_int(data_item) - elif isinstance(data_item, float): - self.write_float(data_item) - elif isinstance(data_item, dict): - self.write_dict(data_item) - elif isinstance(data_item, list): - self.write_list(data_item) - elif isinstance(data_item, bool): - self.write_bool(data_item) - elif data_item is None: - self.write_null() - else: - raise ValueError(f"not supported type for data_item: {data_item}") - def write_list(self, val: list): - # return _awscrt.cbor_encoder_write_py_list(self._binding, val) - self.write_array_start(len(val)) - for data_item in val: - self.write_data_item(data_item) + return _awscrt.cbor_encoder_write_py_list(self._binding, val) def write_dict(self, val: dict): - # return _awscrt.cbor_encoder_write_py_dict(self._binding, val) - self.write_map_start(len(val)) - for key, value in val.items(): - self.write_data_item(key) - self.write_data_item(value) + return _awscrt.cbor_encoder_write_py_dict(self._binding, val) - def write_data_item_2(self, data_item: Any): + def write_data_item(self, data_item: Any): """Generic API to write any type of an data_item as cbor formatted. TODO: timestamp <-> datetime?? Decimal fraction <-> decimal?? @@ -253,133 +221,11 @@ def pop_next_map_start(self) -> int: def pop_next_tag_val(self) -> int: return _awscrt.cbor_decoder_pop_next_tag_val(self._binding) - def pop_next_numeric(self) -> Union[int, float]: - type = _awscrt.cbor_decoder_peek_type(self._binding) - if type == AwsCborElementType.UnsignedInt: - return self.pop_next_unsigned_int() - elif type == AwsCborElementType.NegativeInt: - return self.pop_next_negative_int() - elif type == AwsCborElementType.Float: - return self.pop_next_double() - # TODO: support bignum? - # TODO: Instead of ValueError, probably raise the same error from C with the same AWS_ERROR_CBOR_UNEXPECTED_TYPE - raise ValueError("the cbor src is not a numeric type to decode") - - def pop_next_inf_bytes(self) -> bytes: - type = _awscrt.cbor_decoder_peek_type(self._binding) - if type != AwsCborElementType.InfBytes: - raise ValueError("the cbor src is not an indefinite bytes to decode") - result = b"" - # Consume the inf_bytes - self.consume_next_element() - while type != AwsCborElementType.Break: - result += self.pop_next_bytes() - type = _awscrt.cbor_decoder_peek_type(self._binding) - # Consume the break - self.consume_next_element() - return result - - def pop_next_inf_str(self) -> bytes: - type = _awscrt.cbor_decoder_peek_type(self._binding) - if type != AwsCborElementType.InfStr: - raise ValueError("the cbor src is not an indefinite string to decode") - result = "" - # Consume the inf_str - self.consume_next_element() - while type != AwsCborElementType.Break: - result += self.pop_next_str() - type = _awscrt.cbor_decoder_peek_type(self._binding) - # Consume the break - self.consume_next_element() - return result - def pop_next_list(self) -> list: - # return _awscrt.cbor_decoder_pop_next_py_list(self._binding) - type = _awscrt.cbor_decoder_peek_type(self._binding) - return_val = [] - if type == AwsCborElementType.InfArray: - # Consume the inf_array - self.consume_next_element() - while type != AwsCborElementType.Break: - return_val.append(self.pop_next_data_item()) - type = _awscrt.cbor_decoder_peek_type(self._binding) - # Consume the break - self.consume_next_element() - return return_val - elif type == AwsCborElementType.ArrayStart: - number_elements = self.pop_next_array_start() - for i in range(number_elements): - return_val.append(self.pop_next_data_item()) - return return_val - else: - raise ValueError("the cbor src is not a list to decode") + return _awscrt.cbor_decoder_pop_next_py_list(self._binding) def pop_next_map(self) -> dict: - # return _awscrt.cbor_decoder_pop_next_py_dict(self._binding) - type = _awscrt.cbor_decoder_peek_type(self._binding) - return_val = {} - if type == AwsCborElementType.InfMap: - # Consume the inf_map - self.consume_next_element() - while type != AwsCborElementType.Break: - return_val[self.pop_next_data_item()] = self.pop_next_data_item() - type = _awscrt.cbor_decoder_peek_type(self._binding) - # Consume the break - self.consume_next_element() - return return_val - elif type == AwsCborElementType.MapStart: - number_elements = self.pop_next_map_start() - for i in range(number_elements): - key = self.pop_next_data_item() - value = self.pop_next_data_item() - return_val[key] = value - return return_val - else: - raise ValueError("the cbor src is not a map to decode") + return _awscrt.cbor_decoder_pop_next_py_dict(self._binding) def pop_next_data_item(self) -> Any: - # TODO: timestamp, decimal fraction - # TODO: maybe wrote all those if elif in the binding level, so that we can use switch at least??? - # And possible to avoid some call cross language boundary??? - # TODO: If it fails in the middle, with bunch of stuff already popped. Do we want a way to resume?? - type = _awscrt.cbor_decoder_peek_type(self._binding) - if type == AwsCborElementType.UnsignedInt or \ - type == AwsCborElementType.NegativeInt or \ - type == AwsCborElementType.Float: - return self.pop_next_numeric() - elif type == AwsCborElementType.Bytes: - return self.pop_next_bytes() - elif type == AwsCborElementType.String: - return self.pop_next_str() - elif type == AwsCborElementType.Bool: - return self.pop_next_bool() - elif type == AwsCborElementType.Null or \ - type == AwsCborElementType.Undefined: - # Treat both NULL and Undefined as None. - self.consume_next_element() - return None - elif type == AwsCborElementType.ArrayStart or \ - type == AwsCborElementType.InfArray: - return self.pop_next_list() - elif type == AwsCborElementType.MapStart or \ - type == AwsCborElementType.InfMap: - return self.pop_next_map() - elif type == AwsCborElementType.InfBytes: - return self.pop_next_inf_bytes() - elif type == AwsCborElementType.InfStr: - return self.pop_next_inf_str() - elif type == AwsCborElementType.Tag: - tag_val = self.pop_next_tag_val() - if tag_val == AwsCborTags.NegativeBigNum: - bytes_val = self.pop_next_bytes() - return -1 - int.from_bytes(bytes_val, "big") - elif tag_val == AwsCborTags.UnsignedBigNum: - bytes_val = self.pop_next_bytes() - return int.from_bytes(bytes_val, "big") - else: - raise ValueError(f"unsupported tag value: {tag_val}") - else: - raise ValueError(f"unsupported type: {type.name}") - - def pop_next_data_item_2(self) -> Any: return _awscrt.cbor_decoder_pop_next_data_item(self._binding) diff --git a/benchmark_cbor.py b/benchmark_cbor.py deleted file mode 100644 index 1486b375b..000000000 --- a/benchmark_cbor.py +++ /dev/null @@ -1,108 +0,0 @@ -from awscrt.cbor import * -import random -import time -import cbor2 - - -def ns_to_secs(ns: int) -> float: - return ns / 1_000_000_000.0 - - -def bytes_to_MiB(bytes: int) -> float: - return bytes / float(1024**2) - - -class TestData: - # generate predictable, but variable test values of different types - @staticmethod - def random_value(i=0, seed=0): - r = random.Random(i + seed) # use the index as the seed for predictable results - random_number = TestData.random_number(r, 5) - if random_number == 0: - return f"Some String value {i}" - elif random_number == 1: - return r.random() # a float value - elif random_number == 2: - return TestData.random_number(r, 100000) # a large integer - elif random_number == 3: - return list(range(TestData.random_number(r, 100))) # an array - elif random_number == 4: - return {"a": 1, "b": 2, "c": 3} # a hash - else: - return "generic string" - - # generate a predictable, but variable hash with a range of data types - @staticmethod - def test_hash(n_keys=5, seed=0): - return {f"key{i}": TestData.random_value(i, seed) for i in range(n_keys)} - - @staticmethod - def random_number(r, n): - return int(r.random() * n) - - -t = TestData.test_hash(100000) - - -# print(t) - -print("cbor2 -- encode") -run_start_ns = time.perf_counter_ns() -cbor2_encoded = cbor2.dumps(t) -run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) -print(f"encoded MB: {bytes_to_MiB(len(cbor2_encoded))}") -print(f"time passed: {run_secs} secs") - - -print("CRT -- encode") -encoder = AwsCborEncoder() - -run_start_ns = time.perf_counter_ns() -encoder.write_data_item(t) -encoded = encoder.get_encoded_data() -run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) -print(f"encoded MB: {bytes_to_MiB(len(encoded))}") -print(f"time passed: {run_secs} secs") - - -print("CRT -- encode 2") -encoder_2 = AwsCborEncoder() -run_start_ns = time.perf_counter_ns() -try: - encoder_2.write_data_item_2(t) - encoded_2 = encoder_2.get_encoded_data() -except Exception as e: - print(e) -run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) -print(f"encoded MB: {bytes_to_MiB(len(encoded_2))}") -print(f"time passed: {run_secs} secs") - - -print(cbor2_encoded == encoded) -print(cbor2_encoded == encoded_2) - -print("cbor2 -- decode") -run_start_ns = time.perf_counter_ns() -decoded = cbor2.loads(encoded) -run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) -print(f"time passed: {run_secs} secs") - -print("CRT -- decode") -run_start_ns = time.perf_counter_ns() -decoder = AwsCborDecoder(encoded) -crt_decoded = decoder.pop_next_data_item() - -run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) -print(f"time passed: {run_secs} secs") - - -print("CRT -- decode 2") -run_start_ns = time.perf_counter_ns() -decoder_2 = AwsCborDecoder(encoded) -crt_decoded_2 = decoder_2.pop_next_data_item_2() - -run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) -print(f"time passed: {run_secs} secs") - -print(crt_decoded == t) -print(crt_decoded_2 == t) diff --git a/source/cbor.c b/source/cbor.c index fc754c367..33ea4e39b 100644 --- a/source/cbor.c +++ b/source/cbor.c @@ -642,7 +642,7 @@ static PyObject *s_cbor_decoder_pop_next_tag_to_pyobject(struct aws_cbor_decoder case AWS_CBOR_TAG_NEGATIVE_BIGNUM: case AWS_CBOR_TAG_DECIMAL_FRACTION: default: - PyErr_SetString(PyExc_ValueError, "Unsupported tag value: %" PRIu64 ".", out_tag_val); + PyErr_Format(PyExc_ValueError, "Unsupported tag value: %" PRIu64 ".", out_tag_val); return NULL; } Py_RETURN_NONE; diff --git a/test/test_cbor.py b/test/test_cbor.py index c3b162333..d9e4abc78 100644 --- a/test/test_cbor.py +++ b/test/test_cbor.py @@ -30,7 +30,7 @@ def test_cbor_encode_decode_int(self): self.assertTrue(False) for val in val_to_write: - t = decoder.pop_next_numeric() + t = decoder.pop_next_data_item() self.assertEqual(t, val) self.assertEqual(decoder.get_remaining_bytes_len(), 0) @@ -39,7 +39,7 @@ def test_cbor_encode_decode_data_item(self): encoder = AwsCborEncoder() numerics = [-100.12, 100.0, -100, 100, 2**64 - 1, -2**64, 18446744073709551616.0] another_map = { - # "bignum": 2**65, TODO: big number are not supported from C impl yet. + # "bignum": 2**65, # TODO: big number are not supported from C impl yet. # "negative bignum": -2**75, 2**6: [1, 2, 3], -2**6: [1, ["2", b"3"], {"most complicated": numerics}, 2**6, -2**7] @@ -57,10 +57,12 @@ def test_cbor_encode_decode_data_item(self): "empty str": "", "empty bytes": b"", } - encoder.write_data_item_2(val_to_write) + encoder.write_data_item(val_to_write) decoder = AwsCborDecoder(encoder.get_encoded_data()) # Temp val only for easier to debug. t = decoder.pop_next_data_item() self.assertEqual(val_to_write, t) + +# TODO: More tests: inf str/bytes/array/map From 15cee91d8f8a25c3e28d71856a54321793f4c792 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Wed, 24 Apr 2024 15:11:42 -0700 Subject: [PATCH 13/20] adjust based on the change from C --- awscrt/cbor.py | 22 +++++++-------- crt/aws-c-common | 2 +- source/cbor.c | 72 ++++++++++++++++++++++++------------------------ source/cbor.h | 6 ++-- source/module.c | 6 ++-- 5 files changed, 54 insertions(+), 54 deletions(-) diff --git a/awscrt/cbor.py b/awscrt/cbor.py index 33121d022..526aa8a4b 100644 --- a/awscrt/cbor.py +++ b/awscrt/cbor.py @@ -14,7 +14,7 @@ class AwsCborElementType(IntEnum): NegativeInt = 1 Float = 2 Bytes = 3 - String = 4 + Text = 4 ArrayStart = 5 MapStart = 6 Tag = 7 @@ -22,10 +22,10 @@ class AwsCborElementType(IntEnum): Null = 9 Undefined = 10 Break = 11 - InfBytes = 12 - InfStr = 13 - InfArray = 14 - InfMap = 15 + IndefBytes = 12 + IndefStr = 13 + IndefArray = 14 + IndefMap = 15 class AwsCborTags(IntEnum): @@ -108,13 +108,13 @@ def write_bytes(self, val: bytes): """ return _awscrt.cbor_encoder_write_bytes(self._binding, val) - def write_string(self, val: str): - """Write string as cbor formatted + def write_text(self, val: str): + """Write text as cbor formatted Args: val (str): value to be encoded and written to the encoded data. """ - return _awscrt.cbor_encoder_write_str(self._binding, val) + return _awscrt.cbor_encoder_write_text(self._binding, val) def write_array_start(self, number_entries: int): """Add a start of array element. @@ -201,7 +201,7 @@ def pop_next_negative_int(self) -> int: return -1 - val def pop_next_double(self) -> float: - return _awscrt.cbor_decoder_pop_next_double(self._binding) + return _awscrt.cbor_decoder_pop_next_float(self._binding) def pop_next_bool(self) -> bool: return _awscrt.cbor_decoder_pop_next_bool(self._binding) @@ -209,8 +209,8 @@ def pop_next_bool(self) -> bool: def pop_next_bytes(self) -> bytes: return _awscrt.cbor_decoder_pop_next_bytes(self._binding) - def pop_next_str(self) -> str: - return _awscrt.cbor_decoder_pop_next_str(self._binding) + def pop_next_text(self) -> str: + return _awscrt.cbor_decoder_pop_next_text(self._binding) def pop_next_array_start(self) -> int: return _awscrt.cbor_decoder_pop_next_array_start(self._binding) diff --git a/crt/aws-c-common b/crt/aws-c-common index 6ee522700..64cf7f355 160000 --- a/crt/aws-c-common +++ b/crt/aws-c-common @@ -1 +1 @@ -Subproject commit 6ee522700fa329a0d729aa7bef4af94e2231f4b2 +Subproject commit 64cf7f355574a4d81687e192f9c6f1f19ba77f4d diff --git a/source/cbor.c b/source/cbor.c index 33ea4e39b..0369595b5 100644 --- a/source/cbor.c +++ b/source/cbor.c @@ -30,7 +30,7 @@ static struct aws_cbor_encoder *s_cbor_encoder_from_capsule(PyObject *py_capsule /* Runs when GC destroys the capsule */ static void s_cbor_encoder_capsule_destructor(PyObject *py_capsule) { struct encoder_binding *binding = PyCapsule_GetPointer(py_capsule, s_capsule_name_cbor_encoder); - aws_cbor_encoder_release(binding->native); + aws_cbor_encoder_destroy(binding->native); aws_mem_release(aws_py_get_allocator(), binding); } @@ -42,11 +42,11 @@ PyObject *aws_py_cbor_encoder_new(PyObject *self, PyObject *args) { } struct encoder_binding *binding = aws_mem_calloc(aws_py_get_allocator(), 1, sizeof(struct encoder_binding)); - binding->native = aws_cbor_encoder_new(aws_py_get_allocator(), NULL); + binding->native = aws_cbor_encoder_new(aws_py_get_allocator()); AWS_ASSERT(encoder != NULL); PyObject *py_capsule = PyCapsule_New(binding, s_capsule_name_cbor_encoder, s_cbor_encoder_capsule_destructor); if (!py_capsule) { - aws_cbor_encoder_release(binding->native); + aws_cbor_encoder_destroy(binding->native); aws_mem_release(aws_py_get_allocator(), binding); return NULL; } @@ -97,9 +97,9 @@ PyObject *aws_py_cbor_encoder_get_encoded_data(PyObject *self, PyObject *args) { S_ENCODER_WRITE_PYOBJECT(uint64_t, PyLong_AsUnsignedLongLong, uint) S_ENCODER_WRITE_PYOBJECT(uint64_t, PyLong_AsUnsignedLongLong, negint) -S_ENCODER_WRITE_PYOBJECT(double, PyFloat_AsDouble, double) +S_ENCODER_WRITE_PYOBJECT(double, PyFloat_AsDouble, float) S_ENCODER_WRITE_PYOBJECT(struct aws_byte_cursor, aws_byte_cursor_from_pybytes, bytes) -S_ENCODER_WRITE_PYOBJECT(struct aws_byte_cursor, aws_byte_cursor_from_pyunicode, string) +S_ENCODER_WRITE_PYOBJECT(struct aws_byte_cursor, aws_byte_cursor_from_pyunicode, text) S_ENCODER_WRITE_PYOBJECT(bool, PyObject_IsTrue, bool) S_ENCODER_WRITE_PYOBJECT(uint64_t, PyLong_AsUnsignedLongLong, array_start) @@ -121,7 +121,7 @@ PyObject *aws_py_cbor_encoder_write_negative_int(PyObject *self, PyObject *args) PyObject *aws_py_cbor_encoder_write_float(PyObject *self, PyObject *args) { PyObject *pyfloat; S_ENCODER_METHOD_START("O", &pyfloat); - return s_cbor_encoder_write_pyobject_as_double(encoder, pyfloat); + return s_cbor_encoder_write_pyobject_as_float(encoder, pyfloat); } PyObject *aws_py_cbor_encoder_write_bytes(PyObject *self, PyObject *args) { @@ -130,10 +130,10 @@ PyObject *aws_py_cbor_encoder_write_bytes(PyObject *self, PyObject *args) { return s_cbor_encoder_write_pyobject_as_bytes(encoder, py_bytes); } -PyObject *aws_py_cbor_encoder_write_str(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_encoder_write_text(PyObject *self, PyObject *args) { PyObject *py_str; S_ENCODER_METHOD_START("O", &py_str); - return s_cbor_encoder_write_pyobject_as_string(encoder, py_str); + return s_cbor_encoder_write_pyobject_as_text(encoder, py_str); } PyObject *aws_py_cbor_encoder_write_array_start(PyObject *self, PyObject *args) { @@ -231,13 +231,13 @@ static PyObject *s_cbor_encoder_write_pyobject(struct encoder_binding *encoder_b /* Call to Python to write pylong, as it's too complicate */ return s_cbor_encoder_write_pylong(encoder_binding, py_object); } else if (PyFloat_CheckExact(py_object)) { - return s_cbor_encoder_write_pyobject_as_double(encoder_binding->native, py_object); + return s_cbor_encoder_write_pyobject_as_float(encoder_binding->native, py_object); } else if (PyBool_Check(py_object)) { return s_cbor_encoder_write_pyobject_as_bool(encoder_binding->native, py_object); } else if (PyBytes_CheckExact(py_object)) { return s_cbor_encoder_write_pyobject_as_bytes(encoder_binding->native, py_object); } else if (PyUnicode_CheckExact(py_object)) { - return s_cbor_encoder_write_pyobject_as_string(encoder_binding->native, py_object); + return s_cbor_encoder_write_pyobject_as_text(encoder_binding->native, py_object); } else if (PyList_CheckExact(py_object)) { /* Write py_list */ return s_cbor_encoder_write_pylist(encoder_binding, py_object); @@ -391,12 +391,12 @@ PyObject *aws_py_cbor_decoder_consume_next_data_item(PyObject *self, PyObject *a Py_RETURN_NONE; } -S_POP_NEXT_TO_PYOBJECT(uint64_t, unsigned_val, PyLong_FromUnsignedLongLong) -S_POP_NEXT_TO_PYOBJECT(uint64_t, neg_val, PyLong_FromUnsignedLongLong) -S_POP_NEXT_TO_PYOBJECT(double, double_val, PyFloat_FromDouble) +S_POP_NEXT_TO_PYOBJECT(uint64_t, unsigned_int_val, PyLong_FromUnsignedLongLong) +S_POP_NEXT_TO_PYOBJECT(uint64_t, negative_int_val, PyLong_FromUnsignedLongLong) +S_POP_NEXT_TO_PYOBJECT(double, float_val, PyFloat_FromDouble) S_POP_NEXT_TO_PYOBJECT(bool, boolean_val, PyBool_FromLong) S_POP_NEXT_TO_PYOBJECT_CURSOR(bytes_val, PyBytes_FromAwsByteCursor) -S_POP_NEXT_TO_PYOBJECT_CURSOR(str_val, PyUnicode_FromAwsByteCursor) +S_POP_NEXT_TO_PYOBJECT_CURSOR(text_val, PyUnicode_FromAwsByteCursor) S_POP_NEXT_TO_PYOBJECT(uint64_t, array_start, PyLong_FromUnsignedLongLong) S_POP_NEXT_TO_PYOBJECT(uint64_t, map_start, PyLong_FromUnsignedLongLong) S_POP_NEXT_TO_PYOBJECT(uint64_t, tag_val, PyLong_FromUnsignedLongLong) @@ -434,7 +434,7 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_list(struct aws_cbor_de } return array; } - case AWS_CBOR_TYPE_INF_ARRAY_START: { + case AWS_CBOR_TYPE_INDEF_ARRAY_START: { array = PyList_New(0); if (!array) { return NULL; @@ -507,7 +507,7 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_dict(struct aws_cbor_de } return dict; } - case AWS_CBOR_TYPE_INF_MAP_START: { + case AWS_CBOR_TYPE_INDEF_MAP_START: { dict = PyDict_New(); if (!dict) { return NULL; @@ -558,7 +558,7 @@ static PyObject *s_cbor_decoder_pop_next_inf_bytes_to_py_bytes(struct aws_cbor_d if (aws_cbor_decoder_peek_type(decoder, &out_type)) { return PyErr_AwsLastError(); } - if (out_type != AWS_CBOR_TYPE_INF_BYTESTRING_START) { + if (out_type != AWS_CBOR_TYPE_INDEF_BYTES_START) { aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); return PyErr_AwsLastError(); } @@ -595,7 +595,7 @@ static PyObject *s_cbor_decoder_pop_next_inf_string_to_py_str(struct aws_cbor_de if (aws_cbor_decoder_peek_type(decoder, &out_type)) { return PyErr_AwsLastError(); } - if (out_type != AWS_CBOR_TYPE_INF_STRING_START) { + if (out_type != AWS_CBOR_TYPE_INDEF_TEXT_START) { aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); return PyErr_AwsLastError(); } @@ -607,7 +607,7 @@ static PyObject *s_cbor_decoder_pop_next_inf_string_to_py_str(struct aws_cbor_de /* Empty string */ PyObject *result = PyUnicode_FromStringAndSize(NULL, 0); while (out_type != AWS_CBOR_TYPE_BREAK) { - PyObject *next_part = s_cbor_decoder_pop_next_str_val_to_pyobject(decoder); + PyObject *next_part = s_cbor_decoder_pop_next_text_val_to_pyobject(decoder); if (!next_part) { Py_DECREF(result); return NULL; @@ -658,14 +658,14 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_d } switch (out_type) { case AWS_CBOR_TYPE_UINT: - return s_cbor_decoder_pop_next_unsigned_val_to_pyobject(decoder); + return s_cbor_decoder_pop_next_unsigned_int_val_to_pyobject(decoder); case AWS_CBOR_TYPE_NEGINT: { /* The value from native code is -1 - val. */ PyObject *minus_one = PyLong_FromLong(-1); if (!minus_one) { return NULL; } - PyObject *val = s_cbor_decoder_pop_next_neg_val_to_pyobject(decoder); + PyObject *val = s_cbor_decoder_pop_next_negative_int_val_to_pyobject(decoder); if (!val) { Py_DECREF(minus_one); return NULL; @@ -675,12 +675,12 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_d Py_DECREF(val); return ret_val; } - case AWS_CBOR_TYPE_DOUBLE: - return s_cbor_decoder_pop_next_double_val_to_pyobject(decoder); - case AWS_CBOR_TYPE_BYTESTRING: + case AWS_CBOR_TYPE_FLOAT: + return s_cbor_decoder_pop_next_float_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_BYTES: return s_cbor_decoder_pop_next_bytes_val_to_pyobject(decoder); - case AWS_CBOR_TYPE_STRING: - return s_cbor_decoder_pop_next_str_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_TEXT: + return s_cbor_decoder_pop_next_text_val_to_pyobject(decoder); case AWS_CBOR_TYPE_BOOL: return s_cbor_decoder_pop_next_boolean_val_to_pyobject(decoder); case AWS_CBOR_TYPE_NULL: @@ -690,15 +690,15 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_d Py_RETURN_NONE; case AWS_CBOR_TYPE_MAP_START: /* fall through */ - case AWS_CBOR_TYPE_INF_MAP_START: + case AWS_CBOR_TYPE_INDEF_MAP_START: return s_cbor_decoder_pop_next_data_item_to_py_dict(decoder); case AWS_CBOR_TYPE_ARRAY_START: /* fall through */ - case AWS_CBOR_TYPE_INF_ARRAY_START: + case AWS_CBOR_TYPE_INDEF_ARRAY_START: return s_cbor_decoder_pop_next_data_item_to_py_list(decoder); - case AWS_CBOR_TYPE_INF_BYTESTRING_START: + case AWS_CBOR_TYPE_INDEF_BYTES_START: return s_cbor_decoder_pop_next_inf_bytes_to_py_bytes(decoder); - case AWS_CBOR_TYPE_INF_STRING_START: + case AWS_CBOR_TYPE_INDEF_TEXT_START: return s_cbor_decoder_pop_next_inf_string_to_py_str(decoder); case AWS_CBOR_TYPE_TAG: return s_cbor_decoder_pop_next_tag_to_pyobject(decoder); @@ -713,17 +713,17 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_d PyObject *aws_py_cbor_decoder_pop_next_unsigned_int(PyObject *self, PyObject *args) { S_GET_DECODER(); - return s_cbor_decoder_pop_next_unsigned_val_to_pyobject(decoder); + return s_cbor_decoder_pop_next_unsigned_int_val_to_pyobject(decoder); } PyObject *aws_py_cbor_decoder_pop_next_negative_int(PyObject *self, PyObject *args) { S_GET_DECODER(); - return s_cbor_decoder_pop_next_neg_val_to_pyobject(decoder); + return s_cbor_decoder_pop_next_negative_int_val_to_pyobject(decoder); } -PyObject *aws_py_cbor_decoder_pop_next_double(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_decoder_pop_next_float(PyObject *self, PyObject *args) { S_GET_DECODER(); - return s_cbor_decoder_pop_next_double_val_to_pyobject(decoder); + return s_cbor_decoder_pop_next_float_val_to_pyobject(decoder); } PyObject *aws_py_cbor_decoder_pop_next_bool(PyObject *self, PyObject *args) { @@ -736,9 +736,9 @@ PyObject *aws_py_cbor_decoder_pop_next_bytes(PyObject *self, PyObject *args) { return s_cbor_decoder_pop_next_bytes_val_to_pyobject(decoder); } -PyObject *aws_py_cbor_decoder_pop_next_str(PyObject *self, PyObject *args) { +PyObject *aws_py_cbor_decoder_pop_next_text(PyObject *self, PyObject *args) { S_GET_DECODER(); - return s_cbor_decoder_pop_next_str_val_to_pyobject(decoder); + return s_cbor_decoder_pop_next_text_val_to_pyobject(decoder); } PyObject *aws_py_cbor_decoder_pop_next_array_start(PyObject *self, PyObject *args) { diff --git a/source/cbor.h b/source/cbor.h index fbf62aade..094a330e2 100644 --- a/source/cbor.h +++ b/source/cbor.h @@ -17,7 +17,7 @@ PyObject *aws_py_cbor_encoder_write_unsigned_int(PyObject *self, PyObject *args) PyObject *aws_py_cbor_encoder_write_negative_int(PyObject *self, PyObject *args); PyObject *aws_py_cbor_encoder_write_float(PyObject *self, PyObject *args); PyObject *aws_py_cbor_encoder_write_bytes(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_encoder_write_str(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_encoder_write_text(PyObject *self, PyObject *args); PyObject *aws_py_cbor_encoder_write_array_start(PyObject *self, PyObject *args); PyObject *aws_py_cbor_encoder_write_map_start(PyObject *self, PyObject *args); PyObject *aws_py_cbor_encoder_write_tag(PyObject *self, PyObject *args); @@ -41,10 +41,10 @@ PyObject *aws_py_cbor_decoder_consume_next_element(PyObject *self, PyObject *arg PyObject *aws_py_cbor_decoder_consume_next_data_item(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_pop_next_unsigned_int(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_pop_next_negative_int(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_decoder_pop_next_double(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_float(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_pop_next_bool(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_pop_next_bytes(PyObject *self, PyObject *args); -PyObject *aws_py_cbor_decoder_pop_next_str(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_text(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_pop_next_array_start(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_pop_next_map_start(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_pop_next_tag_val(PyObject *self, PyObject *args); diff --git a/source/module.c b/source/module.c index c7ed96730..cd3de5147 100644 --- a/source/module.c +++ b/source/module.c @@ -830,7 +830,7 @@ static PyMethodDef s_module_methods[] = { AWS_PY_METHOD_DEF(cbor_encoder_write_negative_int, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_encoder_write_float, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_encoder_write_bytes, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_encoder_write_str, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_encoder_write_text, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_encoder_write_array_start, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_encoder_write_map_start, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_encoder_write_tag, METH_VARARGS), @@ -848,10 +848,10 @@ static PyMethodDef s_module_methods[] = { AWS_PY_METHOD_DEF(cbor_decoder_consume_next_data_item, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_unsigned_int, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_negative_int, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_decoder_pop_next_double, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_float, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_bool, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_bytes, METH_VARARGS), - AWS_PY_METHOD_DEF(cbor_decoder_pop_next_str, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_text, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_array_start, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_map_start, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_tag_val, METH_VARARGS), From 78379e3939c3b60831b5007b3f142686c619d3e7 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Wed, 19 Jun 2024 10:59:32 -0700 Subject: [PATCH 14/20] apply the latest change --- awscrt/cbor.py | 54 ++++++++++++++++++++---------------------------- crt/aws-c-common | 2 +- source/cbor.c | 38 ++++++++++++++++++---------------- 3 files changed, 43 insertions(+), 51 deletions(-) diff --git a/awscrt/cbor.py b/awscrt/cbor.py index 526aa8a4b..00fc9eada 100644 --- a/awscrt/cbor.py +++ b/awscrt/cbor.py @@ -8,35 +8,25 @@ from typing import Union, Any -class AwsCborElementType(IntEnum): - # Corresponding to `enum aws_cbor_element_type` in aws/common/cbor.h - UnsignedInt = 0 - NegativeInt = 1 - Float = 2 - Bytes = 3 - Text = 4 - ArrayStart = 5 - MapStart = 6 - Tag = 7 - Bool = 8 - Null = 9 - Undefined = 10 - Break = 11 - IndefBytes = 12 - IndefStr = 13 - IndefArray = 14 - IndefMap = 15 - - -class AwsCborTags(IntEnum): - # Corresponding to `enum aws_cbor_tags` in aws/common/cbor.h - StandardTime = 0 - EpochTime = 1 - UnsignedBigNum = 2 - NegativeBigNum = 3 - DecimalFraction = 4 - BigFloat = 5 - Unclassified = 6 +class AwsCborType(IntEnum): + # Corresponding to `enum aws_cbor_type` in aws/common/cbor.h + Unknown = 0 + UnsignedInt = 1 + NegativeInt = 2 + Float = 3 + Bytes = 4 + Text = 5 + ArrayStart = 6 + MapStart = 7 + Tag = 8 + Bool = 9 + Null = 10 + Undefined = 11 + Break = 12 + IndefBytes = 13 + IndefStr = 14 + IndefArray = 15 + IndefMap = 16 class AwsCborEncoder(NativeResource): @@ -152,7 +142,7 @@ def write_tag(self, tag_number: int): return _awscrt.cbor_encoder_write_tag(self._binding, tag_number) def write_null(self): - return _awscrt.cbor_encoder_write_simple_types(self._binding, AwsCborElementType.Null) + return _awscrt.cbor_encoder_write_simple_types(self._binding, AwsCborType.Null) def write_bool(self, val: bool): return _awscrt.cbor_encoder_write_bool(self._binding, val) @@ -181,8 +171,8 @@ def __init__(self, src: bytes): self._src = src self._binding = _awscrt.cbor_decoder_new(src) - def peek_next_type(self) -> AwsCborElementType: - return AwsCborElementType(_awscrt.cbor_decoder_peek_type(self._binding)) + def peek_next_type(self) -> AwsCborType: + return AwsCborType(_awscrt.cbor_decoder_peek_type(self._binding)) def get_remaining_bytes_len(self) -> int: return _awscrt.cbor_decoder_get_remaining_bytes_len(self._binding) diff --git a/crt/aws-c-common b/crt/aws-c-common index 64cf7f355..36104790d 160000 --- a/crt/aws-c-common +++ b/crt/aws-c-common @@ -1 +1 @@ -Subproject commit 64cf7f355574a4d81687e192f9c6f1f19ba77f4d +Subproject commit 36104790d241045d153e3e37227cf2ece26b3f8d diff --git a/source/cbor.c b/source/cbor.c index 0369595b5..a916ac364 100644 --- a/source/cbor.c +++ b/source/cbor.c @@ -307,7 +307,7 @@ static struct aws_cbor_decoder *s_cbor_decoder_from_capsule(PyObject *py_capsule /* Runs when GC destroys the capsule */ static void s_cbor_decoder_capsule_destructor(PyObject *py_capsule) { struct aws_cbor_decoder *decoder = s_cbor_decoder_from_capsule(py_capsule); - aws_cbor_decoder_release(decoder); + aws_cbor_decoder_destroy(decoder); } PyObject *aws_py_cbor_decoder_new(PyObject *self, PyObject *args) { @@ -319,11 +319,11 @@ PyObject *aws_py_cbor_decoder_new(PyObject *self, PyObject *args) { return NULL; } - struct aws_cbor_decoder *decoder = aws_cbor_decoder_new(aws_py_get_allocator(), &src); + struct aws_cbor_decoder *decoder = aws_cbor_decoder_new(aws_py_get_allocator(), src); AWS_ASSERT(decoder != NULL); PyObject *py_capsule = PyCapsule_New(decoder, s_capsule_name_cbor_decoder, s_cbor_decoder_capsule_destructor); if (!py_capsule) { - aws_cbor_decoder_release(decoder); + aws_cbor_decoder_destroy(decoder); return NULL; } @@ -366,7 +366,7 @@ PyObject *aws_py_cbor_decoder_new(PyObject *self, PyObject *args) { } PyObject *aws_py_cbor_decoder_peek_type(PyObject *self, PyObject *args) { - enum aws_cbor_element_type out_type; + enum aws_cbor_type out_type; S_DECODER_METHOD_START(aws_cbor_decoder_peek_type, out_type); return PyLong_FromSize_t(out_type); } @@ -378,14 +378,16 @@ PyObject *aws_py_cbor_decoder_get_remaining_bytes_len(PyObject *self, PyObject * } PyObject *aws_py_cbor_decoder_consume_next_element(PyObject *self, PyObject *args) { - enum aws_cbor_element_type out_type; - S_DECODER_METHOD_START(aws_cbor_decoder_consume_next_element, out_type); + S_GET_DECODER(); + if (aws_cbor_decoder_consume_next_single_element(decoder)) { + return PyErr_AwsLastError(); + } Py_RETURN_NONE; } PyObject *aws_py_cbor_decoder_consume_next_data_item(PyObject *self, PyObject *args) { S_GET_DECODER(); - if (aws_cbor_decoder_consume_next_data_item(decoder)) { + if (aws_cbor_decoder_consume_next_whole_data_item(decoder)) { return PyErr_AwsLastError(); } Py_RETURN_NONE; @@ -407,7 +409,7 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_d * helper to convert next data item to py_list */ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_list(struct aws_cbor_decoder *decoder) { - enum aws_cbor_element_type out_type = AWS_CBOR_TYPE_MAX; + enum aws_cbor_type out_type = AWS_CBOR_TYPE_UNKNOWN; if (aws_cbor_decoder_peek_type(decoder, &out_type)) { return PyErr_AwsLastError(); } @@ -440,7 +442,7 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_list(struct aws_cbor_de return NULL; } /* Consume the inf array start */ - aws_cbor_decoder_consume_next_element(decoder, NULL /*consumed_type*/); + aws_cbor_decoder_consume_next_single_element(decoder); aws_cbor_decoder_peek_type(decoder, &out_type); while (out_type != AWS_CBOR_TYPE_BREAK) { item = s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); @@ -474,7 +476,7 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_list(struct aws_cbor_de * helper to convert next data item to py_dict */ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_dict(struct aws_cbor_decoder *decoder) { - enum aws_cbor_element_type out_type = AWS_CBOR_TYPE_MAX; + enum aws_cbor_type out_type = AWS_CBOR_TYPE_UNKNOWN; if (aws_cbor_decoder_peek_type(decoder, &out_type)) { return PyErr_AwsLastError(); } @@ -513,7 +515,7 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_dict(struct aws_cbor_de return NULL; } /* Consume the inf array start */ - aws_cbor_decoder_consume_next_element(decoder, NULL /*consumed_type*/); + aws_cbor_decoder_consume_next_single_element(decoder); aws_cbor_decoder_peek_type(decoder, &out_type); while (out_type != AWS_CBOR_TYPE_BREAK) { key = s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); @@ -554,7 +556,7 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_py_dict(struct aws_cbor_de * helper to get the next inf byte */ static PyObject *s_cbor_decoder_pop_next_inf_bytes_to_py_bytes(struct aws_cbor_decoder *decoder) { - enum aws_cbor_element_type out_type = AWS_CBOR_TYPE_MAX; + enum aws_cbor_type out_type = AWS_CBOR_TYPE_UNKNOWN; if (aws_cbor_decoder_peek_type(decoder, &out_type)) { return PyErr_AwsLastError(); } @@ -563,7 +565,7 @@ static PyObject *s_cbor_decoder_pop_next_inf_bytes_to_py_bytes(struct aws_cbor_d return PyErr_AwsLastError(); } /* consume the bytes start element */ - aws_cbor_decoder_consume_next_element(decoder, NULL); + aws_cbor_decoder_consume_next_single_element(decoder); if (aws_cbor_decoder_peek_type(decoder, &out_type)) { return PyErr_AwsLastError(); } @@ -591,7 +593,7 @@ static PyObject *s_cbor_decoder_pop_next_inf_bytes_to_py_bytes(struct aws_cbor_d * helper to get the next inf string */ static PyObject *s_cbor_decoder_pop_next_inf_string_to_py_str(struct aws_cbor_decoder *decoder) { - enum aws_cbor_element_type out_type = AWS_CBOR_TYPE_MAX; + enum aws_cbor_type out_type = AWS_CBOR_TYPE_UNKNOWN; if (aws_cbor_decoder_peek_type(decoder, &out_type)) { return PyErr_AwsLastError(); } @@ -600,7 +602,7 @@ static PyObject *s_cbor_decoder_pop_next_inf_string_to_py_str(struct aws_cbor_de return PyErr_AwsLastError(); } /* consume the bytes start element */ - aws_cbor_decoder_consume_next_element(decoder, NULL); + aws_cbor_decoder_consume_next_single_element(decoder); if (aws_cbor_decoder_peek_type(decoder, &out_type)) { return PyErr_AwsLastError(); } @@ -652,7 +654,7 @@ static PyObject *s_cbor_decoder_pop_next_tag_to_pyobject(struct aws_cbor_decoder * Generic helper to convert a cbor encoded data to PyObject */ static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_decoder *decoder) { - enum aws_cbor_element_type out_type = AWS_CBOR_TYPE_MAX; + enum aws_cbor_type out_type = AWS_CBOR_TYPE_UNKNOWN; if (aws_cbor_decoder_peek_type(decoder, &out_type)) { return PyErr_AwsLastError(); } @@ -685,8 +687,8 @@ static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_d return s_cbor_decoder_pop_next_boolean_val_to_pyobject(decoder); case AWS_CBOR_TYPE_NULL: /* fall through */ - case AWS_CBOR_TYPE_UNDEFINE: - aws_cbor_decoder_consume_next_element(decoder, NULL); + case AWS_CBOR_TYPE_UNDEFINED: + aws_cbor_decoder_consume_next_single_element(decoder); Py_RETURN_NONE; case AWS_CBOR_TYPE_MAP_START: /* fall through */ From 3cd840b3b68d1903d992d515fb45763942fc3a15 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Wed, 19 Jun 2024 20:44:22 +0000 Subject: [PATCH 15/20] pick up the fix from C, and just ignore the whole vscode dir --- .gitignore | 7 +------ crt/aws-c-common | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 85e5eccff..d6360bbc0 100644 --- a/.gitignore +++ b/.gitignore @@ -350,12 +350,7 @@ poetry.toml pyrightconfig.json ### VisualStudioCode ### -.vscode/* -!.vscode/settings.json -!.vscode/tasks.json -!.vscode/launch.json -!.vscode/extensions.json -!.vscode/*.code-snippets +.vscode # Local History for Visual Studio Code .history/ diff --git a/crt/aws-c-common b/crt/aws-c-common index 36104790d..d39af0f44 160000 --- a/crt/aws-c-common +++ b/crt/aws-c-common @@ -1 +1 @@ -Subproject commit 36104790d241045d153e3e37227cf2ece26b3f8d +Subproject commit d39af0f442e4827547a16b2fc145674b8640cc4a From 9366b752ea4117178add4ab3466e08ad0f1498f1 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Fri, 21 Jun 2024 08:48:11 -0700 Subject: [PATCH 16/20] use tag --- crt/aws-c-common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crt/aws-c-common b/crt/aws-c-common index d39af0f44..6d974f92c 160000 --- a/crt/aws-c-common +++ b/crt/aws-c-common @@ -1 +1 @@ -Subproject commit d39af0f442e4827547a16b2fc145674b8640cc4a +Subproject commit 6d974f92c1d86391c1dcb1173239adf757c52b2d From 9e74ae42c2ea0ab5d6067ff20f4cdda5440dbd8b Mon Sep 17 00:00:00 2001 From: Dengke Date: Mon, 13 Jan 2025 10:50:38 -0800 Subject: [PATCH 17/20] amazon Q is great --- test/resources/decode-error-tests.json | 282 ++++ test/resources/decode-success-tests.json | 1528 ++++++++++++++++++++++ test/test_cbor.py | 110 +- 3 files changed, 1919 insertions(+), 1 deletion(-) create mode 100644 test/resources/decode-error-tests.json create mode 100644 test/resources/decode-success-tests.json diff --git a/test/resources/decode-error-tests.json b/test/resources/decode-error-tests.json new file mode 100644 index 000000000..d32f2308d --- /dev/null +++ b/test/resources/decode-error-tests.json @@ -0,0 +1,282 @@ +[ + { + "description": "TestDecode_InvalidArgument - map/2 - arg len 2 greater than remaining buf len", + "input": "b900", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - tag/1 - arg len 1 greater than remaining buf len", + "input": "d8", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - major7/float64 - incomplete float64 at end of buf", + "input": "fb00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - negint/4 - arg len 4 greater than remaining buf len", + "input": "3a000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - negint/8 - arg len 8 greater than remaining buf len", + "input": "3b00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - string/4 - arg len 4 greater than remaining buf len", + "input": "7a000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - map/1 - arg len 1 greater than remaining buf len", + "input": "b8", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - map/4 - arg len 4 greater than remaining buf len", + "input": "ba000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - tag/2 - arg len 2 greater than remaining buf len", + "input": "d900", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - uint/1 - arg len 1 greater than remaining buf len", + "input": "18", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - string/1 - arg len 1 greater than remaining buf len", + "input": "78", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - string/8 - arg len 8 greater than remaining buf len", + "input": "7b00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - string/2 - arg len 2 greater than remaining buf len", + "input": "7900", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - list/2 - arg len 2 greater than remaining buf len", + "input": "9900", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - slice/1 - arg len 1 greater than remaining buf len", + "input": "58", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - slice/4 - arg len 4 greater than remaining buf len", + "input": "5a000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - slice/8 - arg len 8 greater than remaining buf len", + "input": "5b00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - negint/? - unexpected minor value 31", + "input": "3f", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - tag/8 - arg len 8 greater than remaining buf len", + "input": "db00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - uint/2 - arg len 2 greater than remaining buf len", + "input": "1900", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - uint/8 - arg len 8 greater than remaining buf len", + "input": "1b00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - negint/2 - arg len 2 greater than remaining buf len", + "input": "3900", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - negint/1 - arg len 1 greater than remaining buf len", + "input": "38", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - list/8 - arg len 8 greater than remaining buf len", + "input": "9b00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - tag/4 - arg len 4 greater than remaining buf len", + "input": "da000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - major7/float32 - incomplete float32 at end of buf", + "input": "fa000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - uint/4 - arg len 4 greater than remaining buf len", + "input": "1a000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - slice/2 - arg len 2 greater than remaining buf len", + "input": "5900", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - list/4 - arg len 4 greater than remaining buf len", + "input": "9a000000", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - tag/? - unexpected minor value 31", + "input": "df", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - major7/? - unexpected minor value 31", + "input": "ff", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - uint/? - unexpected minor value 31", + "input": "1f", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - list/1 - arg len 1 greater than remaining buf len", + "input": "98", + "error": true + }, + { + "description": "TestDecode_InvalidArgument - map/8 - arg len 8 greater than remaining buf len", + "input": "bb00000000000000", + "error": true + }, + { + "description": "TestDecode_InvalidList - [] / eof after head - unexpected end of payload", + "input": "81", + "error": true + }, + { + "description": "TestDecode_InvalidList - [] / invalid item - arg len 1 greater than remaining buf len", + "input": "8118", + "error": true + }, + { + "description": "TestDecode_InvalidList - [_ ] / no break - expected break marker", + "input": "9f", + "error": true + }, + { + "description": "TestDecode_InvalidList - [_ ] / invalid item - arg len 1 greater than remaining buf len", + "input": "9f18", + "error": true + }, + { + "description": "TestDecode_InvalidMap - {} / invalid key - slice len 1 greater than remaining buf len", + "input": "a17801", + "error": true + }, + { + "description": "TestDecode_InvalidMap - {} / invalid value - arg len 1 greater than remaining buf len", + "input": "a163666f6f18", + "error": true + }, + { + "description": "TestDecode_InvalidMap - {_ } / no break - expected break marker", + "input": "bf", + "error": true + }, + { + "description": "TestDecode_InvalidMap - {_ } / invalid key - slice len 1 greater than remaining buf len", + "input": "bf7801", + "error": true + }, + { + "description": "TestDecode_InvalidMap - {_ } / invalid value - arg len 1 greater than remaining buf len", + "input": "bf63666f6f18", + "error": true + }, + { + "description": "TestDecode_InvalidMap - {} / eof after head - unexpected end of payload", + "input": "a1", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - slice/1, not enough bytes - slice len 1 greater than remaining buf len", + "input": "5801", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - slice/?, nested indefinite - nested indefinite slice", + "input": "5f5f", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - string/?, no break - expected break marker", + "input": "7f", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - string/?, nested indefinite - nested indefinite slice", + "input": "7f7f", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - string/?, invalid nested definite - decode subslice: slice len 1 greater than remaining buf len", + "input": "7f7801", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - slice/?, no break - expected break marker", + "input": "5f", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - slice/?, invalid nested major - unexpected major type 3 in indefinite slice", + "input": "5f60", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - slice/?, invalid nested definite - decode subslice: slice len 1 greater than remaining buf len", + "input": "5f5801", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - string/1, not enough bytes - slice len 1 greater than remaining buf len", + "input": "7801", + "error": true + }, + { + "description": "TestDecode_InvalidSlice - string/?, invalid nested major - unexpected major type 2 in indefinite slice", + "input": "7f40", + "error": true + }, + { + "description": "TestDecode_InvalidTag - invalid value - arg len 1 greater than remaining buf len", + "input": "c118", + "error": true + }, + { + "description": "TestDecode_InvalidTag - eof - unexpected end of payload", + "input": "c1", + "error": true + } +] \ No newline at end of file diff --git a/test/resources/decode-success-tests.json b/test/resources/decode-success-tests.json new file mode 100644 index 000000000..a3d89502a --- /dev/null +++ b/test/resources/decode-success-tests.json @@ -0,0 +1,1528 @@ +[ + { + "description": "atomic - uint/0/max", + "input": "17", + "expect": { + "uint": 23 + } + }, + { + "description": "atomic - uint/2/min", + "input": "190000", + "expect": { + "uint": 0 + } + }, + { + "description": "atomic - uint/8/min", + "input": "1b0000000000000000", + "expect": { + "uint": 0 + } + }, + { + "description": "atomic - negint/1/min", + "input": "3800", + "expect": { + "negint": -1 + } + }, + { + "description": "atomic - negint/2/min", + "input": "390000", + "expect": { + "negint": -1 + } + }, + { + "description": "atomic - false", + "input": "f4", + "expect": { + "bool": false + } + }, + { + "description": "atomic - uint/1/min", + "input": "1800", + "expect": { + "uint": 0 + } + }, + { + "description": "atomic - negint/8/min", + "input": "3b0000000000000000", + "expect": { + "negint": -1 + } + }, + { + "description": "atomic - float64/+Inf", + "input": "fb7ff0000000000000", + "expect": { + "float64": 9218868437227405312 + } + }, + { + "description": "atomic - uint/4/min", + "input": "1a00000000", + "expect": { + "uint": 0 + } + }, + { + "description": "atomic - null", + "input": "f6", + "expect": { + "null": {} + } + }, + { + "description": "atomic - negint/2/max", + "input": "39ffff", + "expect": { + "negint": -65536 + } + }, + { + "description": "atomic - negint/8/max", + "input": "3bfffffffffffffffe", + "expect": { + "negint": -18446744073709551615 + } + }, + { + "description": "atomic - float32/1.625", + "input": "fa3fd00000", + "expect": { + "float32": 1070596096 + } + }, + { + "description": "atomic - uint/0/min", + "input": "00", + "expect": { + "uint": 0 + } + }, + { + "description": "atomic - uint/1/max", + "input": "18ff", + "expect": { + "uint": 255 + } + }, + { + "description": "atomic - uint/8/max", + "input": "1bffffffffffffffff", + "expect": { + "uint": 18446744073709551615 + } + }, + { + "description": "atomic - negint/1/max", + "input": "38ff", + "expect": { + "negint": -256 + } + }, + { + "description": "atomic - negint/4/min", + "input": "3a00000000", + "expect": { + "negint": -1 + } + }, + { + "description": "atomic - float64/1.625", + "input": "fb3ffa000000000000", + "expect": { + "float64": 4609997168567123968 + } + }, + { + "description": "atomic - uint/2/max", + "input": "19ffff", + "expect": { + "uint": 65535 + } + }, + { + "description": "atomic - negint/0/max", + "input": "37", + "expect": { + "negint": -24 + } + }, + { + "description": "atomic - negint/4/max", + "input": "3affffffff", + "expect": { + "negint": -4294967296 + } + }, + { + "description": "atomic - uint/4/max", + "input": "1affffffff", + "expect": { + "uint": 4294967295 + } + }, + { + "description": "atomic - negint/0/min", + "input": "20", + "expect": { + "negint": -1 + } + }, + { + "description": "atomic - true", + "input": "f5", + "expect": { + "bool": true + } + }, + { + "description": "atomic - float32/+Inf", + "input": "fa7f800000", + "expect": { + "float32": 2139095040 + } + }, + { + "description": "definite slice - len = 0", + "input": "40", + "expect": { + "bytestring": [] + } + }, + { + "description": "definite slice - len \u003e 0", + "input": "43666f6f", + "expect": { + "bytestring": [ + 102, + 111, + 111 + ] + } + }, + { + "description": "definite string - len = 0", + "input": "60", + "expect": { + "string": "" + } + }, + { + "description": "definite string - len \u003e 0", + "input": "63666f6f", + "expect": { + "string": "foo" + } + }, + { + "description": "indefinite slice - len = 0", + "input": "5fff", + "expect": { + "bytestring": [] + } + }, + { + "description": "indefinite slice - len = 0, explicit", + "input": "5f40ff", + "expect": { + "bytestring": [] + } + }, + { + "description": "indefinite slice - len = 0, len \u003e 0", + "input": "5f4043666f6fff", + "expect": { + "bytestring": [ + 102, + 111, + 111 + ] + } + }, + { + "description": "indefinite slice - len \u003e 0, len = 0", + "input": "5f43666f6f40ff", + "expect": { + "bytestring": [ + 102, + 111, + 111 + ] + } + }, + { + "description": "indefinite slice - len \u003e 0, len \u003e 0", + "input": "5f43666f6f43666f6fff", + "expect": { + "bytestring": [ + 102, + 111, + 111, + 102, + 111, + 111 + ] + } + }, + { + "description": "indefinite string - len = 0", + "input": "7fff", + "expect": { + "string": "" + } + }, + { + "description": "indefinite string - len = 0, explicit", + "input": "7f60ff", + "expect": { + "string": "" + } + }, + { + "description": "indefinite string - len = 0, len \u003e 0", + "input": "7f6063666f6fff", + "expect": { + "string": "foo" + } + }, + { + "description": "indefinite string - len \u003e 0, len = 0", + "input": "7f63666f6f60ff", + "expect": { + "string": "foo" + } + }, + { + "description": "indefinite string - len \u003e 0, len \u003e 0", + "input": "7f63666f6f63666f6fff", + "expect": { + "string": "foofoo" + } + }, + { + "description": "list - [float64]", + "input": "81fb7ff0000000000000", + "expect": { + "list": [ + { + "float64": 9218868437227405312 + } + ] + } + }, + { + "description": "list - [_ negint/4/min]", + "input": "9f3a00000000ff", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [uint/1/min]", + "input": "811800", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [_ uint/4/min]", + "input": "9f1a00000000ff", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [uint/0/max]", + "input": "8117", + "expect": { + "list": [ + { + "uint": 23 + } + ] + } + }, + { + "description": "list - [uint/1/max]", + "input": "8118ff", + "expect": { + "list": [ + { + "uint": 255 + } + ] + } + }, + { + "description": "list - [negint/2/min]", + "input": "81390000", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [negint/8/min]", + "input": "813b0000000000000000", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [_ uint/2/min]", + "input": "9f190000ff", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [uint/0/min]", + "input": "8100", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [negint/0/min]", + "input": "8120", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [negint/0/max]", + "input": "8137", + "expect": { + "list": [ + { + "negint": -24 + } + ] + } + }, + { + "description": "list - [negint/1/min]", + "input": "813800", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [negint/1/max]", + "input": "8138ff", + "expect": { + "list": [ + { + "negint": -256 + } + ] + } + }, + { + "description": "list - [negint/4/max]", + "input": "813affffffff", + "expect": { + "list": [ + { + "negint": -4294967296 + } + ] + } + }, + { + "description": "list - [_ uint/4/max]", + "input": "9f1affffffffff", + "expect": { + "list": [ + { + "uint": 4294967295 + } + ] + } + }, + { + "description": "list - [_ negint/0/max]", + "input": "9f37ff", + "expect": { + "list": [ + { + "negint": -24 + } + ] + } + }, + { + "description": "list - [uint/2/min]", + "input": "81190000", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [_ false]", + "input": "9ff4ff", + "expect": { + "list": [ + { + "bool": false + } + ] + } + }, + { + "description": "list - [_ float32]", + "input": "9ffa7f800000ff", + "expect": { + "list": [ + { + "float32": 2139095040 + } + ] + } + }, + { + "description": "list - [_ negint/1/max]", + "input": "9f38ffff", + "expect": { + "list": [ + { + "negint": -256 + } + ] + } + }, + { + "description": "list - [uint/8/max]", + "input": "811bffffffffffffffff", + "expect": { + "list": [ + { + "uint": 18446744073709551615 + } + ] + } + }, + { + "description": "list - [negint/4/min]", + "input": "813a00000000", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [negint/8/max]", + "input": "813bfffffffffffffffe", + "expect": { + "list": [ + { + "negint": -18446744073709551615 + } + ] + } + }, + { + "description": "list - [_ negint/2/min]", + "input": "9f390000ff", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [_ negint/4/max]", + "input": "9f3affffffffff", + "expect": { + "list": [ + { + "negint": -4294967296 + } + ] + } + }, + { + "description": "list - [_ true]", + "input": "9ff5ff", + "expect": { + "list": [ + { + "bool": true + } + ] + } + }, + { + "description": "list - [_ null]", + "input": "9ff6ff", + "expect": { + "list": [ + { + "null": {} + } + ] + } + }, + { + "description": "list - [uint/8/min]", + "input": "811b0000000000000000", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [null]", + "input": "81f6", + "expect": { + "list": [ + { + "null": {} + } + ] + } + }, + { + "description": "list - [_ uint/1/min]", + "input": "9f1800ff", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [_ uint/1/max]", + "input": "9f18ffff", + "expect": { + "list": [ + { + "uint": 255 + } + ] + } + }, + { + "description": "list - [_ uint/2/max]", + "input": "9f19ffffff", + "expect": { + "list": [ + { + "uint": 65535 + } + ] + } + }, + { + "description": "list - [_ uint/8/min]", + "input": "9f1b0000000000000000ff", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [_ negint/8/min]", + "input": "9f3b0000000000000000ff", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [_ float64]", + "input": "9ffb7ff0000000000000ff", + "expect": { + "list": [ + { + "float64": 9218868437227405312 + } + ] + } + }, + { + "description": "list - [uint/4/min]", + "input": "811a00000000", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [true]", + "input": "81f5", + "expect": { + "list": [ + { + "bool": true + } + ] + } + }, + { + "description": "list - [float32]", + "input": "81fa7f800000", + "expect": { + "list": [ + { + "float32": 2139095040 + } + ] + } + }, + { + "description": "list - [_ uint/0/min]", + "input": "9f00ff", + "expect": { + "list": [ + { + "uint": 0 + } + ] + } + }, + { + "description": "list - [_ uint/0/max]", + "input": "9f17ff", + "expect": { + "list": [ + { + "uint": 23 + } + ] + } + }, + { + "description": "list - [_ uint/8/max]", + "input": "9f1bffffffffffffffffff", + "expect": { + "list": [ + { + "uint": 18446744073709551615 + } + ] + } + }, + { + "description": "list - [_ negint/1/min]", + "input": "9f3800ff", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [_ negint/2/max]", + "input": "9f39ffffff", + "expect": { + "list": [ + { + "negint": -65536 + } + ] + } + }, + { + "description": "list - [uint/2/max]", + "input": "8119ffff", + "expect": { + "list": [ + { + "uint": 65535 + } + ] + } + }, + { + "description": "list - [negint/2/max]", + "input": "8139ffff", + "expect": { + "list": [ + { + "negint": -65536 + } + ] + } + }, + { + "description": "list - [false]", + "input": "81f4", + "expect": { + "list": [ + { + "bool": false + } + ] + } + }, + { + "description": "list - [_ negint/0/min]", + "input": "9f20ff", + "expect": { + "list": [ + { + "negint": -1 + } + ] + } + }, + { + "description": "list - [_ negint/8/max]", + "input": "9f3bfffffffffffffffeff", + "expect": { + "list": [ + { + "negint": -18446744073709551615 + } + ] + } + }, + { + "description": "list - [uint/4/max]", + "input": "811affffffff", + "expect": { + "list": [ + { + "uint": 4294967295 + } + ] + } + }, + { + "description": "map - {uint/0/min}", + "input": "a163666f6f00", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {uint/4/max}", + "input": "a163666f6f1affffffff", + "expect": { + "map": { + "foo": { + "uint": 4294967295 + } + } + } + }, + { + "description": "map - {negint/0/min}", + "input": "a163666f6f20", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {_ float32}", + "input": "bf63666f6ffa7f800000ff", + "expect": { + "map": { + "foo": { + "float32": 2139095040 + } + } + } + }, + { + "description": "map - {false}", + "input": "a163666f6ff4", + "expect": { + "map": { + "foo": { + "bool": false + } + } + } + }, + { + "description": "map - {float32}", + "input": "a163666f6ffa7f800000", + "expect": { + "map": { + "foo": { + "float32": 2139095040 + } + } + } + }, + { + "description": "map - {_ uint/0/max}", + "input": "bf63666f6f17ff", + "expect": { + "map": { + "foo": { + "uint": 23 + } + } + } + }, + { + "description": "map - {_ negint/2/min}", + "input": "bf63666f6f390000ff", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {_ false}", + "input": "bf63666f6ff4ff", + "expect": { + "map": { + "foo": { + "bool": false + } + } + } + }, + { + "description": "map - {uint/8/min}", + "input": "a163666f6f1b0000000000000000", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {_ negint/0/max}", + "input": "bf63666f6f37ff", + "expect": { + "map": { + "foo": { + "negint": -24 + } + } + } + }, + { + "description": "map - {_ null}", + "input": "bf63666f6ff6ff", + "expect": { + "map": { + "foo": { + "null": {} + } + } + } + }, + { + "description": "map - {uint/1/min}", + "input": "a163666f6f1800", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {_ uint/1/min}", + "input": "bf63666f6f1800ff", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {_ uint/8/max}", + "input": "bf63666f6f1bffffffffffffffffff", + "expect": { + "map": { + "foo": { + "uint": 18446744073709551615 + } + } + } + }, + { + "description": "map - {_ negint/0/min}", + "input": "bf63666f6f20ff", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {_ negint/1/min}", + "input": "bf63666f6f3800ff", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {_ negint/1/max}", + "input": "bf63666f6f38ffff", + "expect": { + "map": { + "foo": { + "negint": -256 + } + } + } + }, + { + "description": "map - {_ negint/2/max}", + "input": "bf63666f6f39ffffff", + "expect": { + "map": { + "foo": { + "negint": -65536 + } + } + } + }, + { + "description": "map - {_ negint/4/min}", + "input": "bf63666f6f3a00000000ff", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {_ true}", + "input": "bf63666f6ff5ff", + "expect": { + "map": { + "foo": { + "bool": true + } + } + } + }, + { + "description": "map - {uint/2/max}", + "input": "a163666f6f19ffff", + "expect": { + "map": { + "foo": { + "uint": 65535 + } + } + } + }, + { + "description": "map - {uint/8/max}", + "input": "a163666f6f1bffffffffffffffff", + "expect": { + "map": { + "foo": { + "uint": 18446744073709551615 + } + } + } + }, + { + "description": "map - {negint/0/max}", + "input": "a163666f6f37", + "expect": { + "map": { + "foo": { + "negint": -24 + } + } + } + }, + { + "description": "map - {negint/1/max}", + "input": "a163666f6f38ff", + "expect": { + "map": { + "foo": { + "negint": -256 + } + } + } + }, + { + "description": "map - {negint/2/max}", + "input": "a163666f6f39ffff", + "expect": { + "map": { + "foo": { + "negint": -65536 + } + } + } + }, + { + "description": "map - {negint/4/min}", + "input": "a163666f6f3a00000000", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {negint/8/max}", + "input": "a163666f6f3bfffffffffffffffe", + "expect": { + "map": { + "foo": { + "negint": -18446744073709551615 + } + } + } + }, + { + "description": "map - {float64}", + "input": "a163666f6ffb7ff0000000000000", + "expect": { + "map": { + "foo": { + "float64": 9218868437227405312 + } + } + } + }, + { + "description": "map - {_ uint/0/min}", + "input": "bf63666f6f00ff", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {_ uint/4/min}", + "input": "bf63666f6f1a00000000ff", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {_ uint/8/min}", + "input": "bf63666f6f1b0000000000000000ff", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {uint/1/max}", + "input": "a163666f6f18ff", + "expect": { + "map": { + "foo": { + "uint": 255 + } + } + } + }, + { + "description": "map - {negint/2/min}", + "input": "a163666f6f390000", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {negint/8/min}", + "input": "a163666f6f3b0000000000000000", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {true}", + "input": "a163666f6ff5", + "expect": { + "map": { + "foo": { + "bool": true + } + } + } + }, + { + "description": "map - {_ uint/2/min}", + "input": "bf63666f6f190000ff", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {_ negint/8/min}", + "input": "bf63666f6f3b0000000000000000ff", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {_ negint/8/max}", + "input": "bf63666f6f3bfffffffffffffffeff", + "expect": { + "map": { + "foo": { + "negint": -18446744073709551615 + } + } + } + }, + { + "description": "map - {uint/0/max}", + "input": "a163666f6f17", + "expect": { + "map": { + "foo": { + "uint": 23 + } + } + } + }, + { + "description": "map - {negint/4/max}", + "input": "a163666f6f3affffffff", + "expect": { + "map": { + "foo": { + "negint": -4294967296 + } + } + } + }, + { + "description": "map - {null}", + "input": "a163666f6ff6", + "expect": { + "map": { + "foo": { + "null": {} + } + } + } + }, + { + "description": "map - {_ uint/4/max}", + "input": "bf63666f6f1affffffffff", + "expect": { + "map": { + "foo": { + "uint": 4294967295 + } + } + } + }, + { + "description": "map - {_ float64}", + "input": "bf63666f6ffb7ff0000000000000ff", + "expect": { + "map": { + "foo": { + "float64": 9218868437227405312 + } + } + } + }, + { + "description": "map - {uint/2/min}", + "input": "a163666f6f190000", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {uint/4/min}", + "input": "a163666f6f1a00000000", + "expect": { + "map": { + "foo": { + "uint": 0 + } + } + } + }, + { + "description": "map - {negint/1/min}", + "input": "a163666f6f3800", + "expect": { + "map": { + "foo": { + "negint": -1 + } + } + } + }, + { + "description": "map - {_ uint/1/max}", + "input": "bf63666f6f18ffff", + "expect": { + "map": { + "foo": { + "uint": 255 + } + } + } + }, + { + "description": "map - {_ uint/2/max}", + "input": "bf63666f6f19ffffff", + "expect": { + "map": { + "foo": { + "uint": 65535 + } + } + } + }, + { + "description": "map - {_ negint/4/max}", + "input": "bf63666f6f3affffffffff", + "expect": { + "map": { + "foo": { + "negint": -4294967296 + } + } + } + }, + { + "description": "tag - 0/min", + "input": "c001", + "expect": { + "tag": { + "id": 0, + "value": { + "uint": 1 + } + } + } + }, + { + "description": "tag - 1/min", + "input": "d80001", + "expect": { + "tag": { + "id": 0, + "value": { + "uint": 1 + } + } + } + }, + { + "description": "tag - 1/max", + "input": "d8ff01", + "expect": { + "tag": { + "id": 255, + "value": { + "uint": 1 + } + } + } + }, + { + "description": "tag - 4/min", + "input": "da0000000001", + "expect": { + "tag": { + "id": 0, + "value": { + "uint": 1 + } + } + } + }, + { + "description": "tag - 8/min", + "input": "db000000000000000001", + "expect": { + "tag": { + "id": 0, + "value": { + "uint": 1 + } + } + } + }, + { + "description": "tag - 0/max", + "input": "d701", + "expect": { + "tag": { + "id": 23, + "value": { + "uint": 1 + } + } + } + }, + { + "description": "tag - 2/min", + "input": "d9000001", + "expect": { + "tag": { + "id": 0, + "value": { + "uint": 1 + } + } + } + }, + { + "description": "tag - 2/max", + "input": "d9ffff01", + "expect": { + "tag": { + "id": 65535, + "value": { + "uint": 1 + } + } + } + }, + { + "description": "tag - 4/max", + "input": "daffffffff01", + "expect": { + "tag": { + "id": 4294967295, + "value": { + "uint": 1 + } + } + } + }, + { + "description": "tag - 8/max", + "input": "dbffffffffffffffff01", + "expect": { + "tag": { + "id": 18446744073709551615, + "value": { + "uint": 1 + } + } + } + } +] \ No newline at end of file diff --git a/test/test_cbor.py b/test/test_cbor.py index d9e4abc78..0af8073ff 100644 --- a/test/test_cbor.py +++ b/test/test_cbor.py @@ -3,6 +3,9 @@ from test import NativeResourceTest from awscrt.cbor import * +import json +import struct +import os class TestCBOR(NativeResourceTest): @@ -65,4 +68,109 @@ def test_cbor_encode_decode_data_item(self): t = decoder.pop_next_data_item() self.assertEqual(val_to_write, t) -# TODO: More tests: inf str/bytes/array/map + def _ieee754_bits_to_float(self, bits): + return struct.unpack('>f', struct.pack('>I', bits))[0] + + def _ieee754_bits_to_double(self, bits): + return struct.unpack('>d', struct.pack('>Q', bits))[0] + + def _convert_expect(self, expect): + if isinstance(expect, dict): + if 'uint' in expect: + return expect['uint'] + elif 'negint' in expect: + return expect['negint'] + elif 'bool' in expect: + return expect['bool'] + elif 'float32' in expect: + return self._ieee754_bits_to_float(expect['float32']) + elif 'float64' in expect: + return self._ieee754_bits_to_double(expect['float64']) + elif 'null' in expect: + return None + elif 'bytestring' in expect: + return bytes(expect['bytestring']) + elif 'string' in expect: + return expect['string'] + elif 'list' in expect: + return [self._convert_expect(item) for item in expect['list']] + elif 'map' in expect: + return {k: self._convert_expect(v) for k, v in expect['map'].items()} + return expect + + def test_cbor_decode_success(self): + """Test CBOR decoding using test cases from JSON file""" + current_dir = os.path.dirname(os.path.abspath(__file__)) + test_file = os.path.join(current_dir, 'resources', 'decode-success-tests.json') + with open(test_file, 'r') as f: + test_cases = json.load(f) + + for case in test_cases: + description = case.get("description", "No description") + input_hex = case.get("input") + expected = self._convert_expect(case.get("expect")) + + with self.subTest(description=description): + # Convert hex input to bytes + try: + bytes_input = bytes.fromhex(input_hex) + except ValueError as e: + self.fail(f"Failed to convert hex input: {e}") + + # Decode the CBOR data + try: + decoder = AwsCborDecoder(bytes_input) + type = decoder.peek_next_type() + if type == AwsCborType.Tag: + # TODO: we don't support parse the tag to python type yet. + # hard code the tag cases to the expected format. + tag_id = decoder.pop_next_tag_val() + tag_data = decoder.pop_next_data_item() + decoded_data = { + "tag": { + "id": tag_id, + "value": { + "uint": tag_data + } + } + } + else: + decoded_data = decoder.pop_next_data_item() + + self.assertEqual( + decoded_data, + expected, + f"Failed case '{description}'\nDecoded: {decoded_data}\nExpected: {expected}" + ) + except Exception as e: + self.fail(f"Failed to decode CBOR data: {e}") + + def test_cbor_decode_errors(self): + """Test CBOR decoding error cases from JSON file""" + current_dir = os.path.dirname(os.path.abspath(__file__)) + test_file = os.path.join(current_dir, 'resources', 'decode-error-tests.json') + + with open(test_file, 'r') as f: + test_cases = json.load(f) + + for case in test_cases: + description = case.get("description", "No description") + input_hex = case.get("input") + + with self.subTest(description=description): + # Convert hex input to bytes + try: + bytes_input = bytes.fromhex(input_hex) + except ValueError as e: + self.fail(f"Failed to convert hex input: {e}") + + # Decode the CBOR data - should raise an exception + decoder = AwsCborDecoder(bytes_input) + + with self.assertRaises((RuntimeError, ValueError, AssertionError)): + type = decoder.peek_next_type() + if type == AwsCborType.Tag: + tag_id = decoder.pop_next_tag_val() + tag_data = decoder.pop_next_data_item() + else: + decoded_data = decoder.pop_next_data_item() From 95b4ac3f6a04d719514b2e91fc268363f1385fdd Mon Sep 17 00:00:00 2001 From: Dengke Date: Mon, 13 Jan 2025 11:03:08 -0800 Subject: [PATCH 18/20] add some doc --- awscrt/cbor.py | 5 +++++ test/test_cbor.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/awscrt/cbor.py b/awscrt/cbor.py index 00fc9eada..90fea5411 100644 --- a/awscrt/cbor.py +++ b/awscrt/cbor.py @@ -172,6 +172,8 @@ def __init__(self, src: bytes): self._binding = _awscrt.cbor_decoder_new(src) def peek_next_type(self) -> AwsCborType: + """Return the AwsCborType of the next data item in the cbor formatted data + """ return AwsCborType(_awscrt.cbor_decoder_peek_type(self._binding)) def get_remaining_bytes_len(self) -> int: @@ -218,4 +220,7 @@ def pop_next_map(self) -> dict: return _awscrt.cbor_decoder_pop_next_py_dict(self._binding) def pop_next_data_item(self) -> Any: + """Generic API to decode cbor formatted data to a python object. + TODO: tags are NOT supported yet. + """ return _awscrt.cbor_decoder_pop_next_data_item(self._binding) diff --git a/test/test_cbor.py b/test/test_cbor.py index 0af8073ff..9f27bb7d0 100644 --- a/test/test_cbor.py +++ b/test/test_cbor.py @@ -167,7 +167,7 @@ def test_cbor_decode_errors(self): # Decode the CBOR data - should raise an exception decoder = AwsCborDecoder(bytes_input) - with self.assertRaises((RuntimeError, ValueError, AssertionError)): + with self.assertRaises(RuntimeError): type = decoder.peek_next_type() if type == AwsCborType.Tag: tag_id = decoder.pop_next_tag_val() From b036836e3a38609323e1f1fd695e37f572598959 Mon Sep 17 00:00:00 2001 From: Dengke Date: Mon, 13 Jan 2025 11:22:19 -0800 Subject: [PATCH 19/20] let's move to arm64 codebuild --- .github/workflows/ci.yml | 70 ++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ea94c5bfd..11c375268 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,7 +41,7 @@ jobs: aws s3 cp s3://aws-crt-test-stuff/ci/${{ env.BUILDER_VERSION }}/linux-container-ci.sh ./linux-container-ci.sh && chmod a+x ./linux-container-ci.sh ./linux-container-ci.sh ${{ env.BUILDER_VERSION }} aws-crt-manylinux1-${{ matrix.image }} build -p ${{ env.PACKAGE_NAME }} --python /opt/python/${{ matrix.python }}/bin/python - manylinux2014: + manylinux2014-x86: runs-on: ubuntu-20.04 # latest strategy: fail-fast: false @@ -49,7 +49,6 @@ jobs: image: - x64 - x86 - - aarch64 python: - cp38-cp38 - cp39-cp39 @@ -65,23 +64,43 @@ jobs: with: role-to-assume: ${{ env.CRT_CI_ROLE }} aws-region: ${{ env.AWS_DEFAULT_REGION }} - # Only aarch64 needs this, but it doesn't hurt anything - - name: Install qemu/docker - run: docker run --rm --privileged multiarch/qemu-user-static --reset -p yes - - name: Build ${{ env.PACKAGE_NAME }} run: | aws s3 cp s3://aws-crt-test-stuff/ci/${{ env.BUILDER_VERSION }}/linux-container-ci.sh ./linux-container-ci.sh && chmod a+x ./linux-container-ci.sh ./linux-container-ci.sh ${{ env.BUILDER_VERSION }} aws-crt-manylinux2014-${{ matrix.image }} build -p ${{ env.PACKAGE_NAME }} --python /opt/python/${{ matrix.python }}/bin/python - musllinux-1-1: + manylinux2014-arm64: + runs-on: codebuild-aws-crt-python-arm64-${{ github.run_id }}-${{ github.run_attempt }}-arm-3.0-large + strategy: + fail-fast: false + matrix: + python: + - cp38-cp38 + - cp39-cp39 + - cp310-cp310 + - cp311-cp311 + - cp312-cp312 + - cp313-cp313 + permissions: + id-token: write # This is required for requesting the JWT + steps: + - name: configure AWS credentials (containers) + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ env.CRT_CI_ROLE }} + aws-region: ${{ env.AWS_DEFAULT_REGION }} + - name: Build ${{ env.PACKAGE_NAME }} + run: | + aws s3 cp s3://aws-crt-test-stuff/ci/${{ env.BUILDER_VERSION }}/linux-container-ci.sh ./linux-container-ci.sh && chmod a+x ./linux-container-ci.sh + ./linux-container-ci.sh ${{ env.BUILDER_VERSION }} aws-crt-manylinux2014-aarch64 build -p ${{ env.PACKAGE_NAME }} --python /opt/python/${{ matrix.python }}/bin/python + + musllinux-1-1-x64: runs-on: ubuntu-22.04 # latest strategy: fail-fast: false matrix: image: - x64 - - aarch64 python: - cp38-cp38 - cp39-cp39 @@ -97,18 +116,39 @@ jobs: with: role-to-assume: ${{ env.CRT_CI_ROLE }} aws-region: ${{ env.AWS_DEFAULT_REGION }} + - name: Build ${{ env.PACKAGE_NAME }} + run: | + aws s3 cp s3://aws-crt-test-stuff/ci/${{ env.BUILDER_VERSION }}/linux-container-ci.sh ./linux-container-ci.sh && chmod a+x ./linux-container-ci.sh + ./linux-container-ci.sh ${{ env.BUILDER_VERSION }} aws-crt-musllinux-1-1-${{ matrix.image }} build -p ${{ env.PACKAGE_NAME }} --python /opt/python/${{ matrix.python }}/bin/python - # Only aarch64 needs this, but it doesn't hurt anything - - name: Install qemu/docker - run: docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + musllinux-1-1-arm64: + runs-on: codebuild-aws-crt-python-arm64-${{ github.run_id }}-${{ github.run_attempt }}-arm-3.0-large + strategy: + fail-fast: false + matrix: + python: + - cp38-cp38 + - cp39-cp39 + - cp310-cp310 + - cp311-cp311 + - cp312-cp312 + - cp313-cp313 + permissions: + id-token: write # This is required for requesting the JWT + steps: + - name: configure AWS credentials (containers) + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ env.CRT_CI_ROLE }} + aws-region: ${{ env.AWS_DEFAULT_REGION }} - name: Build ${{ env.PACKAGE_NAME }} run: | aws s3 cp s3://aws-crt-test-stuff/ci/${{ env.BUILDER_VERSION }}/linux-container-ci.sh ./linux-container-ci.sh && chmod a+x ./linux-container-ci.sh - ./linux-container-ci.sh ${{ env.BUILDER_VERSION }} aws-crt-musllinux-1-1-${{ matrix.image }} build -p ${{ env.PACKAGE_NAME }} --python /opt/python/${{ matrix.python }}/bin/python + ./linux-container-ci.sh ${{ env.BUILDER_VERSION }} aws-crt-musllinux-1-1-aarch64 build -p ${{ env.PACKAGE_NAME }} --python /opt/python/${{ matrix.python }}/bin/python raspberry: - runs-on: ubuntu-20.04 # latest + runs-on: codebuild-aws-crt-python-arm64-${{ github.run_id }}-${{ github.run_attempt }}-arm-3.0-large strategy: fail-fast: false matrix: @@ -123,10 +163,6 @@ jobs: role-to-assume: ${{ env.CRT_CI_ROLE }} aws-region: ${{ env.AWS_DEFAULT_REGION }} - # set arm arch - - name: Install qemu/docker - run: docker run --rm --privileged multiarch/qemu-user-static --reset -p yes - - name: Build ${{ env.PACKAGE_NAME }} run: | aws s3 cp s3://aws-crt-test-stuff/ci/${{ env.BUILDER_VERSION }}/linux-container-ci.sh ./linux-container-ci.sh && chmod a+x ./linux-container-ci.sh From 70dd7df74e9ac180ddfca09b1426419472ec6e07 Mon Sep 17 00:00:00 2001 From: Dengke Date: Mon, 13 Jan 2025 11:30:48 -0800 Subject: [PATCH 20/20] actually the image matrix did explain it well enough --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 11c375268..7089967d3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,7 +41,7 @@ jobs: aws s3 cp s3://aws-crt-test-stuff/ci/${{ env.BUILDER_VERSION }}/linux-container-ci.sh ./linux-container-ci.sh && chmod a+x ./linux-container-ci.sh ./linux-container-ci.sh ${{ env.BUILDER_VERSION }} aws-crt-manylinux1-${{ matrix.image }} build -p ${{ env.PACKAGE_NAME }} --python /opt/python/${{ matrix.python }}/bin/python - manylinux2014-x86: + manylinux2014: runs-on: ubuntu-20.04 # latest strategy: fail-fast: false @@ -94,7 +94,7 @@ jobs: aws s3 cp s3://aws-crt-test-stuff/ci/${{ env.BUILDER_VERSION }}/linux-container-ci.sh ./linux-container-ci.sh && chmod a+x ./linux-container-ci.sh ./linux-container-ci.sh ${{ env.BUILDER_VERSION }} aws-crt-manylinux2014-aarch64 build -p ${{ env.PACKAGE_NAME }} --python /opt/python/${{ matrix.python }}/bin/python - musllinux-1-1-x64: + musllinux-1-1: runs-on: ubuntu-22.04 # latest strategy: fail-fast: false