diff --git a/src/bindings/python/src/openvino/runtime/ie_api.py b/src/bindings/python/src/openvino/runtime/ie_api.py index a5df4733d7a19b..428639383263f6 100644 --- a/src/bindings/python/src/openvino/runtime/ie_api.py +++ b/src/bindings/python/src/openvino/runtime/ie_api.py @@ -262,7 +262,7 @@ def create_infer_request(self) -> InferRequest: """ return InferRequest(super().create_infer_request()) - def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None) -> OVDict: + def infer_new_request(self, inputs: Any = None) -> OVDict: """Infers specified input(s) in synchronous mode. Blocks all methods of CompiledModel while request is running. @@ -287,7 +287,7 @@ def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] function throws error. :param inputs: Data to be set on input tensors. - :type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.ndarray], optional + :type inputs: Any, optional :return: Dictionary of results from output tensors with port/int/str keys. :rtype: OVDict """ @@ -297,7 +297,7 @@ def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] def __call__( self, - inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None, + inputs: Any = None, share_inputs: bool = True, share_outputs: bool = False, *, @@ -332,7 +332,7 @@ def __call__( function throws error. :param inputs: Data to be set on input tensors. - :type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.ndarray], optional + :type inputs: Any, optional :param share_inputs: Enables `share_inputs` mode. Controls memory usage on inference's inputs. If set to `False` inputs the data dispatcher will safely copy data diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py index f0b7e96fbf0d48..f797406e89556d 100644 --- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py +++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py @@ -10,7 +10,7 @@ from openvino._pyopenvino import ConstOutput, Tensor, Type from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper, OVDict -ContainerTypes = Union[dict, list, tuple] +ContainerTypes = Union[dict, list, tuple, OVDict] ScalarTypes = Union[np.number, int, float] ValidKeys = Union[str, int, ConstOutput] @@ -31,7 +31,7 @@ def get_request_tensor( @singledispatch def value_to_tensor( - value: Union[Tensor, np.ndarray, ScalarTypes], + value: Union[Tensor, np.ndarray, ScalarTypes, str], request: Optional[_InferRequestWrapper] = None, is_shared: bool = False, key: Optional[ValidKeys] = None, @@ -59,6 +59,11 @@ def _( tensor = get_request_tensor(request, key) tensor_type = tensor.get_element_type() tensor_dtype = tensor_type.to_dtype() + # String edge-case, always copy. + # Scalars are also handled by C++. + if tensor_type == Type.string: + return Tensor(value, shared_memory=False) + # Scalars edge-case: if value.ndim == 0: tensor_shape = tuple(tensor.shape) if tensor_dtype == value.dtype and tensor_shape == value.shape: @@ -82,21 +87,34 @@ def _( return Tensor(value, shared_memory=is_shared) +@value_to_tensor.register(list) +def _( + value: list, + request: _InferRequestWrapper, + is_shared: bool = False, + key: Optional[ValidKeys] = None, +) -> Tensor: + return Tensor(value) + + @value_to_tensor.register(np.number) @value_to_tensor.register(int) @value_to_tensor.register(float) +@value_to_tensor.register(str) +@value_to_tensor.register(bytes) def _( - value: ScalarTypes, + value: Union[ScalarTypes, str, bytes], request: _InferRequestWrapper, is_shared: bool = False, key: Optional[ValidKeys] = None, ) -> Tensor: - # np.number/int/float edge-case, copy will occur in both scenarios. + # np.number/int/float/str/bytes edge-case, copy will occur in both scenarios. tensor_type = get_request_tensor(request, key).get_element_type() tensor_dtype = tensor_type.to_dtype() tmp = np.array(value) + # String edge-case -- it converts the data inside of Tensor class. # If types are mismatched, convert. - if tensor_dtype != tmp.dtype: + if tensor_type != Type.string and tensor_dtype != tmp.dtype: return Tensor(tmp.astype(tensor_dtype), shared_memory=False) return Tensor(tmp, shared_memory=False) @@ -204,8 +222,10 @@ def _( @create_shared.register(np.number) @create_shared.register(int) @create_shared.register(float) +@create_shared.register(str) +@create_shared.register(bytes) def _( - inputs: Union[Tensor, ScalarTypes], + inputs: Union[Tensor, ScalarTypes, str, bytes], request: _InferRequestWrapper, ) -> Tensor: return value_to_tensor(inputs, request=request, is_shared=True) @@ -256,7 +276,10 @@ def _( if tuple(tensor.shape) != inputs.shape: tensor.shape = inputs.shape # When copying, type should be up/down-casted automatically. - tensor.data[:] = inputs[:] + if tensor.element_type == Type.string: + tensor.bytes_data = inputs + else: + tensor.data[:] = inputs[:] else: # If shape is "empty", assume this is a scalar value set_request_tensor( @@ -269,8 +292,9 @@ def _( @update_tensor.register(np.number) # type: ignore @update_tensor.register(float) @update_tensor.register(int) +@update_tensor.register(str) def _( - inputs: Union[np.number, float, int], + inputs: Union[ScalarTypes, str], request: _InferRequestWrapper, key: Optional[ValidKeys] = None, ) -> None: @@ -286,6 +310,7 @@ def update_inputs(inputs: dict, request: _InferRequestWrapper) -> dict: It creates copy of Tensors or copy data to already allocated Tensors on device if the item is of type `np.ndarray`, `np.number`, `int`, `float` or has numpy __array__ attribute. + If value is of type `list`, create a Tensor based on it, copy will occur in the Tensor constructor. """ # Create new temporary dictionary. # new_inputs will be used to transfer data to inference calls, @@ -296,8 +321,10 @@ def update_inputs(inputs: dict, request: _InferRequestWrapper) -> dict: raise TypeError(f"Incompatible key type for input: {key}") # Copy numpy arrays to already allocated Tensors. # If value object has __array__ attribute, load it to Tensor using np.array - if isinstance(value, (np.ndarray, np.number, int, float)) or hasattr(value, "__array__"): + if isinstance(value, (np.ndarray, np.number, int, float, str)) or hasattr(value, "__array__"): update_tensor(value, request, key) + elif isinstance(value, list): + new_inputs[key] = Tensor(value) # If value is of Tensor type, put it into temporary dictionary. elif isinstance(value, Tensor): new_inputs[key] = value @@ -309,7 +336,7 @@ def update_inputs(inputs: dict, request: _InferRequestWrapper) -> dict: @singledispatch def create_copied( - inputs: Union[ContainerTypes, OVDict, np.ndarray, ScalarTypes], + inputs: Union[ContainerTypes, np.ndarray, ScalarTypes, str, bytes], request: _InferRequestWrapper, ) -> Union[dict, None]: # Check the special case of the array-interface @@ -325,7 +352,7 @@ def create_copied( @create_copied.register(tuple) @create_copied.register(OVDict) def _( - inputs: Union[ContainerTypes, OVDict], + inputs: ContainerTypes, request: _InferRequestWrapper, ) -> dict: return update_inputs(normalize_arrays(inputs, is_shared=False), request) @@ -344,8 +371,10 @@ def _( @create_copied.register(np.number) @create_copied.register(int) @create_copied.register(float) +@create_copied.register(str) +@create_copied.register(bytes) def _( - inputs: Union[Tensor, ScalarTypes], + inputs: Union[Tensor, ScalarTypes, str, bytes], request: _InferRequestWrapper, ) -> Tensor: return value_to_tensor(inputs, request=request, is_shared=False) @@ -356,7 +385,7 @@ def _( def _data_dispatch( request: _InferRequestWrapper, - inputs: Union[ContainerTypes, OVDict, Tensor, np.ndarray, ScalarTypes] = None, + inputs: Union[ContainerTypes, Tensor, np.ndarray, ScalarTypes, str] = None, is_shared: bool = False, ) -> Union[dict, Tensor]: if inputs is None: diff --git a/src/bindings/python/src/openvino/runtime/utils/types.py b/src/bindings/python/src/openvino/runtime/utils/types.py index aa986d4f873c9c..d493f2ff825d8d 100644 --- a/src/bindings/python/src/openvino/runtime/utils/types.py +++ b/src/bindings/python/src/openvino/runtime/utils/types.py @@ -36,6 +36,10 @@ (Type.u32, np.uint32), (Type.u64, np.uint64), (Type.bf16, np.uint16), + (Type.string, str), + (Type.string, np.str_), + (Type.string, bytes), + (Type.string, np.bytes_), ] openvino_to_numpy_types_str_map = [ @@ -52,6 +56,10 @@ ("u16", np.uint16), ("u32", np.uint32), ("u64", np.uint64), + ("string", str), + ("string", np.str_), + ("string", bytes), + ("string", np.bytes_), ] diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp index 603583bb1825cd..0426acc4da1b6a 100644 --- a/src/bindings/python/src/pyopenvino/core/common.cpp +++ b/src/bindings/python/src/pyopenvino/core/common.cpp @@ -14,6 +14,9 @@ #define C_CONTIGUOUS py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_ namespace Common { + +namespace type_helpers { + const std::map& ov_type_to_dtype() { static const std::map ov_type_to_dtype_mapping = { {ov::element::f16, py::dtype("float16")}, @@ -33,10 +36,15 @@ const std::map& ov_type_to_dtype() { {ov::element::u4, py::dtype("uint8")}, {ov::element::nf4, py::dtype("uint8")}, {ov::element::i4, py::dtype("int8")}, + {ov::element::string, py::dtype("bytes_")}, }; return ov_type_to_dtype_mapping; } +py::dtype get_dtype(const ov::element::Type& ov_type) { + return ov_type_to_dtype().at(ov_type); +} + const std::map& dtype_to_ov_type() { static const std::map dtype_to_ov_type_mapping = { {"float16", ov::element::f16}, @@ -51,10 +59,36 @@ const std::map& dtype_to_ov_type() { {"uint32", ov::element::u32}, {"uint64", ov::element::u64}, {"bool", ov::element::boolean}, + {"bytes_", ov::element::string}, + {"str_", ov::element::string}, + {"bytes", ov::element::string}, + {"str", ov::element::string}, }; return dtype_to_ov_type_mapping; } +ov::element::Type get_ov_type(const py::array& array) { + // More about character codes: + // https://numpy.org/doc/stable/reference/arrays.scalars.html + char ctype = array.dtype().kind(); + if (ctype == 'U' || ctype == 'S') { + return ov::element::string; + } + return dtype_to_ov_type().at(py::str(array.dtype())); +} + +ov::element::Type get_ov_type(py::dtype& dtype) { + // More about character codes: + // https://numpy.org/doc/stable/reference/arrays.scalars.html + char ctype = dtype.kind(); + if (ctype == 'U' || ctype == 'S') { + return ov::element::string; + } + return dtype_to_ov_type().at(py::str(dtype)); +} + +}; // namespace type_helpers + namespace containers { const TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs) { TensorIndexMap result_map; @@ -76,16 +110,110 @@ const TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs) { } }; // namespace containers +namespace string_helpers { + +py::array bytes_array_from_tensor(ov::Tensor&& t) { + if (t.get_element_type() != ov::element::string) { + OPENVINO_THROW("Tensor's type must be a string!"); + } + auto data = t.data(); + auto max_element = std::max_element(data, data + t.get_size(), [](const std::string& x, const std::string& y) { + return x.length() < y.length(); + }); + auto max_stride = max_element->length(); + auto dtype = py::dtype("|S" + std::to_string(max_stride)); + // Adjusting strides to follow the numpy convention: + py::array array; + auto new_strides = t.get_strides(); + if (new_strides.size() == 0) { + array = py::array(dtype, t.get_shape(), {}); + } else { + auto element_stride = new_strides[new_strides.size() - 1]; + for (size_t i = 0; i < new_strides.size(); ++i) { + new_strides[i] = (new_strides[i] / element_stride) * max_stride; + } + array = py::array(dtype, t.get_shape(), new_strides); + } + // Create an empty array and populate it with utf-8 encoded strings: + auto ptr = array.data(); + for (size_t i = 0; i < t.get_size(); ++i) { + auto start = &data[i][0]; + auto length = data[i].length(); + auto end = std::copy(start, start + length, (char*)ptr + i * max_stride); + std::fill_n(end, max_stride - length, 0); + } + return array; +} + +py::array string_array_from_tensor(ov::Tensor&& t) { + if (t.get_element_type() != ov::element::string) { + OPENVINO_THROW("Tensor's type must be a string!"); + } + // Approach that is compact and faster than np.char.decode(tensor.data): + auto data = t.data(); + py::list _list; + for (size_t i = 0; i < t.get_size(); ++i) { + PyObject* _unicode_obj = PyUnicode_DecodeUTF8(&data[i][0], data[i].length(), "strict"); + _list.append(_unicode_obj); + Py_XDECREF(_unicode_obj); + } + // Adjusting shape to follow the numpy convention: + py::array array(_list); + array.resize(t.get_shape()); + return array; +} + +void fill_tensor_from_bytes(ov::Tensor& tensor, py::array& array) { + if (tensor.get_size() != static_cast(array.size())) { + OPENVINO_THROW("Passed array must have the same size (number of elements) as the Tensor!"); + } + py::buffer_info buf = array.request(); + auto data = tensor.data(); + for (size_t i = 0; i < tensor.get_size(); ++i) { + const char* ptr = reinterpret_cast(buf.ptr) + (i * buf.itemsize); + data[i] = std::string(ptr, buf.ndim == 0 ? buf.itemsize : buf.strides[0]); + } +} + +void fill_tensor_from_strings(ov::Tensor& tensor, py::array& array) { + if (tensor.get_size() != static_cast(array.size())) { + OPENVINO_THROW("Passed array must have the same size (number of elements) as the Tensor!"); + } + py::buffer_info buf = array.request(); + auto data = tensor.data(); + for (size_t i = 0; i < tensor.get_size(); ++i) { + char* ptr = reinterpret_cast(buf.ptr) + (i * buf.itemsize); + // TODO: check other unicode kinds? 2BYTE and 1BYTE? + PyObject* _unicode_obj = + PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, reinterpret_cast(ptr), buf.itemsize / 4); + PyObject* _utf8_obj = PyUnicode_AsUTF8String(_unicode_obj); + const char* _tmp_str = PyBytes_AsString(_utf8_obj); + data[i] = std::string(_tmp_str); + Py_XDECREF(_unicode_obj); + Py_XDECREF(_utf8_obj); + } +} + +void fill_string_tensor_data(ov::Tensor& tensor, py::array& array) { + // More about character codes: + // https://numpy.org/doc/stable/reference/arrays.scalars.html + if (array.dtype().kind() == 'S') { + fill_tensor_from_bytes(tensor, array); + } else if (array.dtype().kind() == 'U') { + fill_tensor_from_strings(tensor, array); + } else { + OPENVINO_THROW("Unknown string kind passed to fill the Tensor's data!"); + } +} + +}; // namespace string_helpers + namespace array_helpers { bool is_contiguous(const py::array& array) { return C_CONTIGUOUS == (array.flags() & C_CONTIGUOUS); } -ov::element::Type get_ov_type(const py::array& array) { - return Common::dtype_to_ov_type().at(py::str(array.dtype())); -} - std::vector get_shape(const py::array& array) { return std::vector(array.shape(), array.shape() + array.ndim()); } @@ -134,9 +262,18 @@ py::array as_contiguous(py::array& array, ov::element::Type type) { } py::array array_from_tensor(ov::Tensor&& t, bool is_shared) { + // Special case for string data type. + if (t.get_element_type() == ov::element::string) { + PyErr_WarnEx(PyExc_RuntimeWarning, + "Data of string type will be copied! Please use dedicated properties " + "`str_data` and `bytes_data` to avoid confusion while accessing " + "Tensor's contents.", + 1); + return string_helpers::bytes_array_from_tensor(std::move(t)); + } + // Get actual dtype from OpenVINO type: auto ov_type = t.get_element_type(); - auto dtype = Common::ov_type_to_dtype().at(ov_type); - + auto dtype = Common::type_helpers::get_dtype(ov_type); // Return the array as a view: if (is_shared) { if (ov_type.bitwidth() < Common::values::min_bitwidth) { @@ -157,16 +294,16 @@ template <> ov::op::v0::Constant create_copied(py::array& array) { // Do not copy data from the array, only return empty tensor based on type. if (array.size() == 0) { - return ov::op::v0::Constant(array_helpers::get_ov_type(array), array_helpers::get_shape(array)); + return ov::op::v0::Constant(type_helpers::get_ov_type(array), array_helpers::get_shape(array)); } // Convert to contiguous array if not already in C-style. if (!array_helpers::is_contiguous(array)) { - array = array_helpers::as_contiguous(array, array_helpers::get_ov_type(array)); + array = array_helpers::as_contiguous(array, type_helpers::get_ov_type(array)); } // Create actual Constant and a constructor is copying data. // If ndim is equal to 0, creates scalar Constant. // If size is equal to 0, creates empty Constant. - return ov::op::v0::Constant(array_helpers::get_ov_type(array), + return ov::op::v0::Constant(type_helpers::get_ov_type(array), array_helpers::get_shape(array), array.ndim() == 0 ? array.data() : array.data(0)); } @@ -188,7 +325,7 @@ ov::op::v0::Constant create_shared(py::array& array) { static_cast((array.ndim() == 0 || array.size() == 0) ? array.mutable_data() : array.mutable_data(0)), array.ndim() == 0 ? array.itemsize() : array.nbytes(), array); - return ov::op::v0::Constant(array_helpers::get_ov_type(array), array_helpers::get_shape(array), memory); + return ov::op::v0::Constant(type_helpers::get_ov_type(array), array_helpers::get_shape(array), memory); } // If passed array is not C-style, throw an error. OPENVINO_THROW("SHARED MEMORY MODE FOR THIS CONSTANT IS NOT APPLICABLE! Passed numpy array must be C contiguous."); @@ -202,7 +339,7 @@ ov::op::v0::Constant create_shared(ov::Tensor& tensor) { template <> ov::Tensor create_copied(py::array& array) { // Create actual Tensor. - auto tensor = ov::Tensor(array_helpers::get_ov_type(array), array_helpers::get_shape(array)); + auto tensor = ov::Tensor(type_helpers::get_ov_type(array), array_helpers::get_shape(array)); // If size of an array is equal to 0, the array is empty. // Alternative could be `array.nbytes()`. // Do not copy data from it, only return empty tensor based on type. @@ -211,7 +348,13 @@ ov::Tensor create_copied(py::array& array) { } // Convert to contiguous array if not already in C-style. if (!array_helpers::is_contiguous(array)) { - array = array_helpers::as_contiguous(array, array_helpers::get_ov_type(array)); + array = array_helpers::as_contiguous(array, type_helpers::get_ov_type(array)); + } + // Special case with string data type of kind "S"(bytes_) or "U"(str_). + // pass through check for other string types like bytes and str. + if (type_helpers::get_ov_type(array) == ov::element::string) { + string_helpers::fill_string_tensor_data(tensor, array); + return tensor; } // If ndim of py::array is 0, array is a numpy scalar. That results in size to be equal to 0. std::memcpy(tensor.data(), @@ -222,12 +365,15 @@ ov::Tensor create_copied(py::array& array) { template <> ov::Tensor create_shared(py::array& array) { + if (type_helpers::get_ov_type(array) == ov::element::string) { + OPENVINO_THROW("SHARED MEMORY MODE FOR THIS TENSOR IS NOT APPLICABLE! String types can be only copied."); + } // Check if passed array has C-style contiguous memory layout. // If memory is going to be shared it needs to be contiguous before passing to the constructor. if (array_helpers::is_contiguous(array)) { // If ndim of py::array is 0, array is a numpy scalar. // If size of an array is equal to 0, the array is empty. - return ov::Tensor(array_helpers::get_ov_type(array), + return ov::Tensor(type_helpers::get_ov_type(array), array_helpers::get_shape(array), (array.ndim() == 0 || array.size() == 0) ? array.mutable_data() : array.mutable_data(0)); } @@ -236,7 +382,11 @@ ov::Tensor create_shared(py::array& array) { } ov::Tensor tensor_from_pointer(py::array& array, const ov::Shape& shape, const ov::element::Type& type) { - auto element_type = (type == ov::element::undefined) ? Common::dtype_to_ov_type().at(py::str(array.dtype())) : type; + if (type_helpers::get_ov_type(array) == ov::element::string) { + OPENVINO_THROW("SHARED MEMORY MODE FOR THIS TENSOR IS NOT APPLICABLE! String types can be only copied."); + } + + auto element_type = (type == ov::element::undefined) ? Common::type_helpers::get_ov_type(array) : type; if (array_helpers::is_contiguous(array)) { return ov::Tensor(element_type, shape, const_cast(array.data(0)), {}); @@ -245,7 +395,11 @@ ov::Tensor tensor_from_pointer(py::array& array, const ov::Shape& shape, const o } ov::Tensor tensor_from_pointer(py::array& array, const ov::Output& port) { - auto array_type = array_helpers::get_ov_type(array); + if (type_helpers::get_ov_type(array) == ov::element::string) { + OPENVINO_THROW("SHARED MEMORY MODE FOR THIS TENSOR IS NOT APPLICABLE! String types can be only copied."); + } + + auto array_type = type_helpers::get_ov_type(array); auto array_shape_size = ov::shape_size(array_helpers::get_shape(array)); auto port_element_type = port.get_element_type(); auto port_shape_size = ov::shape_size(port.get_partial_shape().is_dynamic() ? ov::Shape{0} : port.get_shape()); @@ -348,7 +502,15 @@ uint32_t get_optimal_number_of_requests(const ov::CompiledModel& actual) { py::dict outputs_to_dict(InferRequestWrapper& request, bool share_outputs) { py::dict res; for (const auto& out : request.m_outputs) { - res[py::cast(out)] = array_helpers::array_from_tensor(request.m_request.get_tensor(out), share_outputs); + auto t = request.m_request.get_tensor(out); + if (t.get_element_type() == ov::element::string) { + if (share_outputs) { + PyErr_WarnEx(PyExc_RuntimeWarning, "Result of a string type will be copied to OVDict!", 1); + } + res[py::cast(out)] = string_helpers::string_array_from_tensor(std::move(t)); + } else { + res[py::cast(out)] = array_helpers::array_from_tensor(std::move(t), share_outputs); + } } return res; } diff --git a/src/bindings/python/src/pyopenvino/core/common.hpp b/src/bindings/python/src/pyopenvino/core/common.hpp index 187f0d87ce5a15..9d504aff5da36d 100644 --- a/src/bindings/python/src/pyopenvino/core/common.hpp +++ b/src/bindings/python/src/pyopenvino/core/common.hpp @@ -40,17 +40,40 @@ constexpr size_t min_bitwidth = sizeof(int8_t) * CHAR_BIT; }; // namespace values +// Helpers for dtypes and OpenVINO types +namespace type_helpers { + const std::map& ov_type_to_dtype(); +py::dtype get_dtype(const ov::element::Type& ov_type); + const std::map& dtype_to_ov_type(); +ov::element::Type get_ov_type(const py::array& array); + +ov::element::Type get_ov_type(py::dtype& dtype); +} + +// Helpers for string types and numpy arrays of strings +namespace string_helpers { + +py::array bytes_array_from_tensor(ov::Tensor&& t); + +py::array string_array_from_tensor(ov::Tensor&& t); + +void fill_tensor_from_bytes(ov::Tensor& tensor, py::array& array); + +void fill_tensor_from_strings(ov::Tensor& tensor, py::array& array); + +void fill_string_tensor_data(ov::Tensor& tensor, py::array& array); + +}; // namespace string_helpers + // Helpers for numpy arrays namespace array_helpers { bool is_contiguous(const py::array& array); -ov::element::Type get_ov_type(const py::array& array); - std::vector get_shape(const py::array& array); std::vector get_strides(const py::array& array); diff --git a/src/bindings/python/src/pyopenvino/core/core.cpp b/src/bindings/python/src/pyopenvino/core/core.cpp index 734d8ea1ef9ad9..68d8775d24440f 100644 --- a/src/bindings/python/src/pyopenvino/core/core.cpp +++ b/src/bindings/python/src/pyopenvino/core/core.cpp @@ -15,11 +15,6 @@ namespace py = pybind11; -inline std::string to_string(py::handle handle) { - auto encodedString = PyUnicode_AsUTF8String(handle.ptr()); - return PyBytes_AsString(encodedString); -} - void regclass_Core(py::module m) { py::class_> cls(m, "Core"); cls.doc() = diff --git a/src/bindings/python/src/pyopenvino/core/tensor.cpp b/src/bindings/python/src/pyopenvino/core/tensor.cpp index a4b48f6ed87e7a..e7b94ffece0335 100644 --- a/src/bindings/python/src/pyopenvino/core/tensor.cpp +++ b/src/bindings/python/src/pyopenvino/core/tensor.cpp @@ -24,7 +24,7 @@ void regclass_Tensor(py::module m) { R"( Tensor's special constructor. - :param array: Array to create tensor from. + :param array: Array to create the tensor from. :type array: numpy.array :param shared_memory: If `True`, this Tensor memory is being shared with a host, that means the responsibility of keeping host memory is @@ -105,30 +105,48 @@ void regclass_Tensor(py::module m) { t = ov.Tensor(arr, [100, 8], ov.Type.u1) )"); + // It may clash in future with overloads like + cls.def(py::init([](py::list& list) { + auto array = py::array(list); + return Common::object_from_data(array, false); + }), + py::arg("list"), + R"( + Tensor's special constructor. + + Creates a Tensor from a given Python list. + Warning: It is always a copy of list's data! + + :param array: List to create the tensor from. + :type array: List[int, float, str] + )"); + cls.def(py::init(), py::arg("type"), py::arg("shape")); cls.def(py::init>(), py::arg("type"), py::arg("shape")); cls.def(py::init([](py::dtype& np_dtype, std::vector& shape) { - return ov::Tensor(Common::dtype_to_ov_type().at(py::str(np_dtype)), shape); + return ov::Tensor(Common::type_helpers::get_ov_type(np_dtype), shape); }), py::arg("type"), py::arg("shape")); cls.def(py::init([](py::object& np_literal, std::vector& shape) { - return ov::Tensor(Common::dtype_to_ov_type().at(py::str(py::dtype::from_args(np_literal))), shape); + auto dtype = py::dtype::from_args(np_literal); + return ov::Tensor(Common::type_helpers::get_ov_type(dtype), shape); }), py::arg("type"), py::arg("shape")); cls.def(py::init([](py::dtype& np_dtype, const ov::Shape& shape) { - return ov::Tensor(Common::dtype_to_ov_type().at(py::str(np_dtype)), shape); + return ov::Tensor(Common::type_helpers::get_ov_type(np_dtype), shape); }), py::arg("type"), py::arg("shape")); cls.def(py::init([](py::object& np_literal, const ov::Shape& shape) { - return ov::Tensor(Common::dtype_to_ov_type().at(py::str(py::dtype::from_args(np_literal))), shape); + auto dtype = py::dtype::from_args(np_literal); + return ov::Tensor(Common::type_helpers::get_ov_type(dtype), shape); }), py::arg("type"), py::arg("shape")); @@ -271,12 +289,75 @@ void regclass_Tensor(py::module m) { Access to Tensor's data. Returns numpy array with corresponding shape and dtype. - For tensors with openvino specific element type, such as u1, u4 or i4 + + For tensors with OpenVINO specific element type, such as u1, u4 or i4 it returns linear array, with uint8 / int8 numpy dtype. + For tensors with string element type, returns a numpy array of bytes + without any decoding. + To change the underlaying data use `str_data`/`bytes_data` properties + or the `copy_from` function. + Warning: Data of string type is always a copy of underlaying memory! + :rtype: numpy.array )"); + cls.def_property( + "bytes_data", + [](ov::Tensor& self) { + return Common::string_helpers::bytes_array_from_tensor(std::forward(self)); + }, + [](ov::Tensor& self, py::object& other) { + if (py::isinstance(other)) { + auto array = other.cast(); + Common::string_helpers::fill_string_tensor_data(self, array); + } else if (py::isinstance(other)) { + auto array = py::array(other.cast()); + Common::string_helpers::fill_string_tensor_data(self, array); + } else { + OPENVINO_THROW("Invalid data to fill String Tensor!"); + } + return; + }, + R"( + Access to Tensor's data with string Type in `np.bytes_` dtype. + + Getter returns a numpy array with corresponding shape and dtype. + Warning: Data of string type is always a copy of underlaying memory! + + Setter fills underlaying Tensor's memory by copying strings from `other`. + `other` must have the same size (number of elements) as the Tensor. + Tensor's shape is not changed by performing this operation! + )"); + + cls.def_property( + "str_data", + [](ov::Tensor& self) { + return Common::string_helpers::string_array_from_tensor(std::forward(self)); + }, + [](ov::Tensor& self, py::object& other) { + if (py::isinstance(other)) { + auto array = other.cast(); + Common::string_helpers::fill_string_tensor_data(self, array); + } else if (py::isinstance(other)) { + auto array = py::array(other.cast()); + Common::string_helpers::fill_string_tensor_data(self, array); + } else { + OPENVINO_THROW("Invalid data to fill String Tensor!"); + } + return; + }, + R"( + Access to Tensor's data with string Type in `np.str_` dtype. + + Getter returns a numpy array with corresponding shape and dtype. + Warning: Data of string type is always a copy of underlaying memory! + + Setter fills underlaying Tensor's memory by copying strings from `other`. + `other` must have the same size (number of elements) as the Tensor. + Tensor's shape is not changed by performing this operation! + )"); + cls.def("get_shape", &ov::Tensor::get_shape, R"( @@ -310,6 +391,47 @@ void regclass_Tensor(py::module m) { Copy tensor's data to a destination tensor. The destination tensor should have the same element type and shape. )"); + cls.def( + "copy_from", + [](ov::Tensor& self, ov::Tensor& source) { + return source.copy_to(self); + }, + py::arg("source"), + R"( + Copy source tensor's data to this tensor. Tensors should have the same element type and shape. + )"); + + cls.def( + "copy_from", + [](ov::Tensor& self, py::array& source) { + auto _source = Common::object_from_data(source, false); + if (self.get_shape() != _source.get_shape()) { + self.set_shape(_source.get_shape()); + } + return _source.copy_to(self); + }, + py::arg("source"), + R"( + Copy the source to this tensor. This tensor and the source should have the same element type. + Shape will be adjusted if there is a mismatch. + )"); + + cls.def( + "copy_from", + [](ov::Tensor& self, py::list& source) { + auto array = py::array(source); + auto _source = Common::object_from_data(array, false); + if (self.get_shape() != _source.get_shape()) { + self.set_shape(_source.get_shape()); + } + return _source.copy_to(self); + }, + py::arg("source"), + R"( + Copy the source to this tensor. This tensor and the source should have the same element type. + Shape will be adjusted if there is a mismatch. + )"); + cls.def("is_continuous", &ov::Tensor::is_continuous, R"( diff --git a/src/bindings/python/src/pyopenvino/graph/ops/constant.cpp b/src/bindings/python/src/pyopenvino/graph/ops/constant.cpp index 69348f1f947547..f0d1dc2d177ce5 100644 --- a/src/bindings/python/src/pyopenvino/graph/ops/constant.cpp +++ b/src/bindings/python/src/pyopenvino/graph/ops/constant.cpp @@ -203,7 +203,7 @@ void regclass_graph_op_Constant(py::module m) { "get_data", [](ov::op::v0::Constant& self) { auto ov_type = self.get_element_type(); - auto dtype = Common::ov_type_to_dtype().at(ov_type); + auto dtype = Common::type_helpers::get_dtype(ov_type); if (ov_type.bitwidth() < Common::values::min_bitwidth) { return py::array(dtype, self.get_byte_size(), self.get_data_ptr()); } @@ -223,7 +223,7 @@ void regclass_graph_op_Constant(py::module m) { "data", [](ov::op::v0::Constant& self) { auto ov_type = self.get_element_type(); - auto dtype = Common::ov_type_to_dtype().at(ov_type); + auto dtype = Common::type_helpers::get_dtype(ov_type); if (ov_type.bitwidth() < Common::values::min_bitwidth) { return py::array(dtype, self.get_byte_size(), self.get_data_ptr(), py::cast(self)); } diff --git a/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp b/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp index 1b5e8fda19eb35..595d06fb073145 100644 --- a/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp +++ b/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp @@ -18,7 +18,8 @@ void regclass_graph_Type(py::module m) { type.doc() = "openvino.runtime.Type wraps ov::element::Type"; type.def(py::init([](py::object& np_literal) { - return Common::dtype_to_ov_type().at(py::str(py::dtype::from_args(np_literal))); + auto dtype = py::dtype::from_args(np_literal); + return Common::type_helpers::get_ov_type(dtype); }), py::arg("dtype"), R"( @@ -49,6 +50,7 @@ void regclass_graph_Type(py::module m) { type.attr("u64") = ov::element::u64; type.attr("bf16") = ov::element::bf16; type.attr("nf4") = ov::element::nf4; + type.attr("string") = ov::element::string; type.def("__hash__", &ov::element::Type::hash); type.def("__repr__", [](const ov::element::Type& self) { @@ -120,7 +122,7 @@ void regclass_graph_Type(py::module m) { type.def( "to_dtype", [](ov::element::Type& self) { - return Common::ov_type_to_dtype().at(self); + return Common::type_helpers::get_dtype(self); }, R"( Convert Type to numpy dtype. diff --git a/src/bindings/python/tests/test_runtime/test_sync_infer_request.py b/src/bindings/python/tests/test_runtime/test_sync_infer_request.py index 804860a4debc14..739d905f4dc94d 100644 --- a/src/bindings/python/tests/test_runtime/test_sync_infer_request.py +++ b/src/bindings/python/tests/test_runtime/test_sync_infer_request.py @@ -612,7 +612,10 @@ def test_inputs_tuple_not_replaced(device, share_inputs): def test_invalid_inputs(device, share_inputs): request, _, _ = create_simple_request_and_inputs(device) - inputs = "some_input" + class InvalidInput(): + pass + + inputs = InvalidInput() with pytest.raises(TypeError) as e: request.infer(inputs, share_inputs=share_inputs) diff --git a/src/bindings/python/tests/test_runtime/test_tensor.py b/src/bindings/python/tests/test_runtime/test_tensor.py index 4b0a4755a1347f..bc8d3beaddf5a1 100644 --- a/src/bindings/python/tests/test_runtime/test_tensor.py +++ b/src/bindings/python/tests/test_runtime/test_tensor.py @@ -560,3 +560,23 @@ def test_init_from_empty_array(shared_flag, init_value): assert tensor.element_type.to_dtype() == init_value.dtype assert tensor.byte_size == init_value.nbytes assert np.array_equal(tensor.data, init_value) + + +@pytest.mark.parametrize( + "init_value", + [ + ([1.0, 2.0, 3.0]), + ([21, 37, 42]), + ([[10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]]), + ([[2.2, 6.5], [0.2, 6.7]]), + ], +) +def test_init_from_list(init_value): + tensor = ov.Tensor(init_value) + assert np.array_equal(tensor.data, init_value) + # Convert to numpy to perform all checks. Memory is not shared, + # so it does not matter if data is stored in numpy format. + _init_value = np.array(init_value) + assert tuple(tensor.shape) == _init_value.shape + assert tensor.element_type.to_dtype() == _init_value.dtype + assert tensor.byte_size == _init_value.nbytes diff --git a/src/bindings/python/tests/test_runtime/test_tensor_string.py b/src/bindings/python/tests/test_runtime/test_tensor_string.py new file mode 100644 index 00000000000000..0d3e2b7f7df702 --- /dev/null +++ b/src/bindings/python/tests/test_runtime/test_tensor_string.py @@ -0,0 +1,256 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import openvino as ov + +import pytest + +from enum import Enum + + +class DataGetter(Enum): + BYTES = 1 + STRINGS = 2 + + +def _check_tensor_string(tensor_data, test_data): + assert tensor_data.shape == test_data.shape + assert tensor_data.strides == test_data.strides + assert np.array_equal(tensor_data, test_data) + assert not (np.shares_memory(tensor_data, test_data)) + + +def check_bytes_based(tensor, string_data, to_flat=False): + tensor_data = tensor.bytes_data + encoded_data = string_data if string_data.dtype.kind == "S" else np.char.encode(string_data) + assert tensor_data.dtype.kind == "S" + _check_tensor_string(tensor_data.flatten() if to_flat else tensor_data, encoded_data.flatten() if to_flat else encoded_data) + + +def check_string_based(tensor, string_data, to_flat=False): + tensor_data = tensor.str_data + decoded_data = string_data if string_data.dtype.kind == "U" else np.char.decode(string_data) + assert tensor_data.dtype.kind == "U" + _check_tensor_string(tensor_data.flatten() if to_flat else tensor_data, decoded_data.flatten() if to_flat else decoded_data) + + +def test_string_tensor_shared_memory_fails(): + data = np.array(["You", "shall", "not", "pass!"]) + with pytest.raises(RuntimeError) as e: + _ = ov.Tensor(data, shared_memory=True) + assert "SHARED MEMORY MODE FOR THIS TENSOR IS NOT APPLICABLE! String types can be only copied." in str(e.value) + + +def test_string_tensor_data_warning(): + data = np.array(["You", "shall", "not", "pass!"]) + tensor = ov.Tensor(data, shared_memory=False) + with pytest.warns(RuntimeWarning) as w: + _ = tensor.data + assert "Data of string type will be copied! Please use dedicated properties" in str(w[0].message) + + +@pytest.mark.parametrize( + ("init_type"), + [ + (ov.Type.string), + (str), + (bytes), + (np.str_), + (np.bytes_), + ], +) +def test_empty_string_tensor(init_type): + tensor = ov.Tensor(type=init_type, shape=ov.Shape([2, 2])) + assert tensor.element_type == ov.Type.string + + +@pytest.mark.parametrize( + ("string_data"), + [ + ([bytes("text", encoding="utf-8"), bytes("openvino", encoding="utf-8")]), + ([[b"xyz"], [b"abc"], [b"this is my last"]]), + (["text", "abc", "openvino"]), + (["text", "больше текста", "jeszcze więcej słów", "효과가 있었어"]), + ([["text"], ["abc"], ["openvino"]]), + ([["jeszcze więcej słów", "효과가 있었어"]]), + ], +) +def test_init_with_list(string_data): + tensor = ov.Tensor(string_data) + assert tensor.element_type == ov.Type.string + # Convert to numpy to perform all checks. Memory is not shared, + # so it does not matter if data is stored in numpy format. + _string_data = np.array(string_data) + # Encoded: + check_bytes_based(tensor, _string_data) + # Decoded: + check_string_based(tensor, _string_data) + + +@pytest.mark.parametrize( + ("string_data"), + [ + (np.array(["text", "abc", "openvino"]).astype("S")), # "|S" + (np.array([["xyz"], ["abc"]]).astype(np.bytes_)), # "|S" + (np.array(["text", "abc", "openvino"])), # "