Skip to content

Commit

Permalink
[PyOV] Remove bytes padding in fill_tensor_from_bytes (#28324)
Browse files Browse the repository at this point in the history
### Details:
- Strings are unexpectedly padded when creating an `ov::Tensor` from
bytes in Python API
On an example of strings "hi" and "i'm a string":

```
[Python] hi (bytes: 68 69)
[C++] hi (bytes: 68 69 00 00 00 00 00 00 00 00 00 00)

[Python] i'm a string (bytes: 69 27 6d 20 61 20 73 74 72 69 6e 67)
[C++] i'm a string (bytes: 69 27 6d 20 61 20 73 74 72 69 6e 67)
```

 - After the change, bytes are no longer padded


```
[Python] hi (bytes: 68 69)
[C++] hi (bytes: 68 69)

[Python] i'm a string (bytes: 69 27 6d 20 61 20 73 74 72 69 6e 67)
[C++] i'm a string (bytes: 69 27 6d 20 61 20 73 74 72 69 6e 67)
```

### Tickets:
 - CVS-159581

---------

Signed-off-by: p-wysocki <[email protected]>
  • Loading branch information
p-wysocki authored Jan 13, 2025
1 parent 7cf3e8c commit 2c80544
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 1 deletion.
11 changes: 10 additions & 1 deletion src/bindings/python/src/pyopenvino/core/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,14 @@ py::array string_array_from_tensor(ov::Tensor&& t) {
return array;
}

static const char* find_first_not_null(const char* ptr, size_t itemsize) {
auto rbegin = std::make_reverse_iterator(ptr + itemsize);
auto first_not_null = std::find_if(rbegin, std::make_reverse_iterator(ptr), [](const auto& c) {
return c != '\0';
});
return first_not_null.base();
}

void fill_tensor_from_bytes(ov::Tensor& tensor, py::array& array) {
if (tensor.get_size() != static_cast<size_t>(array.size())) {
OPENVINO_THROW("Passed array must have the same size (number of elements) as the Tensor!");
Expand All @@ -177,7 +185,8 @@ void fill_tensor_from_bytes(ov::Tensor& tensor, py::array& array) {
auto data = tensor.data<std::string>();
for (size_t i = 0; i < tensor.get_size(); ++i) {
const char* ptr = reinterpret_cast<const char*>(buf.ptr) + (i * buf.itemsize);
data[i] = std::string(ptr, buf.ndim == 0 ? buf.itemsize : buf.strides[0]);
auto first_not_null = find_first_not_null(ptr, buf.itemsize);
data[i] = std::string(ptr, first_not_null);
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/bindings/python/tests/test_runtime/test_string_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def test_default_decode_flag(class_defaults, expected_value):
[
([bytes("text", encoding="utf-8"), bytes("openvino", encoding="utf-8")], [-1]),
([[b"xyz"], [b"abc"], [b"this is my last"]], [3, -1]),
([b"text\0with\0null", b"openvino\0"], [-1]),
(["text", "abc", "openvino"], [3]),
(["text", "больше текста", "jeszcze więcej słów", "효과가 있었어"], [-1]),
([["text"], ["abc"], ["openvino"]], [3, 1]),
Expand All @@ -91,6 +92,7 @@ def test_infer_request_infer(string_data, data_shape, decode_strings):
[
([bytes("text", encoding="utf-8"), bytes("openvino", encoding="utf-8")], [-1]),
([[b"xyz"], [b"abc"], [b"this is my last"]], [3, -1]),
([b"text\0with\0null", b"openvino\0"], [-1]),
(["text", "abc", "openvino"], [3]),
(["text", "больше текста", "jeszcze więcej słów", "효과가 있었어"], [-1]),
([["text"], ["abc"], ["openvino"]], [3, 1]),
Expand Down
2 changes: 2 additions & 0 deletions src/bindings/python/tests/test_runtime/test_tensor_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def test_empty_string_tensor(init_type):
[
([bytes("text", encoding="utf-8"), bytes("openvino", encoding="utf-8")]),
([[b"xyz"], [b"abc"], [b"this is my last"]]),
([[b"text\0with\0null"], [b"openvino\0"]]),
(["text", "abc", "openvino"]),
(["text", "больше текста", "jeszcze więcej słów", "효과가 있었어"]),
([["text"], ["abc"], ["openvino"]]),
Expand Down Expand Up @@ -223,6 +224,7 @@ def test_populate_fails_type_check(string_data):
(ov.Shape([3]), np.array(["text", "больше текста", "jeszcze więcej słów"])),
(ov.Shape([3]), [b"xyz", b"abc", b"this is my last"]),
(ov.Shape([3]), ["text", "abc", "openvino"]),
(ov.Shape([2]), [[b"text\0with\0null"], [b"openvino\0"]]),
(ov.Shape([3]), ["text", "больше текста", "jeszcze więcej słów"]),
(ov.Shape([2, 2]), np.array(["text", "abc", "openvino", "different"]).astype(np.bytes_)),
(ov.Shape([2, 2]), np.array(["text", "больше текста", "jeszcze więcej słów", "abcdefg"])),
Expand Down

0 comments on commit 2c80544

Please sign in to comment.