[ManagedDB] Rest API based Thin client for ManagedService #11878
3033 tests run, 1652 passed, 1339 skipped, 42 failed.
Annotations
Check failure on line 1 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py
github-actions / JUnit Test Report
test_deepmemory.test_deepmemory_train_and_cancel
failed on setup with "NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet."
Raw output
self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7eff70da8ee0>
@property
def dataset(self):
"""Returns the dataset"""
try:
> return self.dataset_handler.dataset
E AttributeError: 'ManagedDH' object has no attribute 'dataset'
deeplake/core/vectorstore/deeplake_vectorstore.py:523: AttributeError
During handling of the above exception, another exception occurred:
request = <SubRequest 'corpus_query_relevances_copy' for <Function test_deepmemory_train_and_cancel>>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjU5MywiZXhwIjoxNzA5NTIyNTkzfQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.7kRFpUz4tRt5LslKwClIGFsYkOcdCWWS9Z-5mh46KHpzTkN8sM0b6xgxCBgYtMWnLGwkNshBn_wBRFDcxYZbUA'
@pytest.fixture
def corpus_query_relevances_copy(request, hub_cloud_dev_token):
if not is_opt_true(request, HUB_CLOUD_OPT):
pytest.skip(f"{HUB_CLOUD_OPT} flag not set")
return
corpus = _get_storage_path(request, HUB_CLOUD)
query_vs = VectorStore(
path=f"hub://{HUB_CLOUD_DEV_USERNAME}/deepmemory_test_queries2",
runtime={"tensor_db": True},
token=hub_cloud_dev_token,
)
> queries = query_vs.dataset.text.data()["value"]
deeplake/tests/path_fixtures.py:487:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7eff70da8ee0>
@property
def dataset(self):
"""Returns the dataset"""
try:
return self.dataset_handler.dataset
except AttributeError:
> raise NotImplementedError(
"Acessing the dataset is not implemented for managed Vector Store yet."
)
E NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.
deeplake/core/vectorstore/deeplake_vectorstore.py:525: NotImplementedError
Check failure on line 780 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py
github-actions / JUnit Test Report
test_deepmemory.test_deepmemory_evaluate_with_embedding_function_specified_in_constructor_should_not_throw_any_exception
NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.
Raw output
self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7eff5dffee60>
@property
def dataset(self):
"""Returns the dataset"""
try:
> return self.dataset_handler.dataset
E AttributeError: 'ManagedDH' object has no attribute 'dataset'
deeplake/core/vectorstore/deeplake_vectorstore.py:523: AttributeError
During handling of the above exception, another exception occurred:
corpus_query_pair_path = ('hub://testingacc2/deepmemory_test_corpus_managed_2', 'hub://testingacc2/deepmemory_test_corpus_managed_2_eval_queries')
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjU5MywiZXhwIjoxNzA5NTIyNTkzfQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.7kRFpUz4tRt5LslKwClIGFsYkOcdCWWS9Z-5mh46KHpzTkN8sM0b6xgxCBgYtMWnLGwkNshBn_wBRFDcxYZbUA'
@pytest.mark.slow
@pytest.mark.flaky(reruns=3)
@pytest.mark.skipif(sys.platform == "win32", reason="Does not run on Windows")
def test_deepmemory_evaluate_with_embedding_function_specified_in_constructor_should_not_throw_any_exception(
corpus_query_pair_path,
hub_cloud_dev_token,
):
corpus, queries = corpus_query_pair_path
db = VectorStore(
path=corpus,
runtime={"tensor_db": True},
token=hub_cloud_dev_token,
embedding_function=embedding_fn,
)
queries_vs = VectorStore(
path=queries,
runtime={"tensor_db": True},
token=hub_cloud_dev_token,
embedding_function=embedding_fn,
)
> queries = queries_vs.dataset[:10].text.data()["value"]
deeplake/core/vectorstore/deep_memory/test_deepmemory.py:780:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7eff5dffee60>
@property
def dataset(self):
"""Returns the dataset"""
try:
return self.dataset_handler.dataset
except AttributeError:
> raise NotImplementedError(
"Acessing the dataset is not implemented for managed Vector Store yet."
)
E NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.
deeplake/core/vectorstore/deeplake_vectorstore.py:525: NotImplementedError
Check failure on line 505 in deeplake/api/tests/test_update_samples.py
github-actions / JUnit Test Report
test_update_samples.test_ds_update_generic
deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
Raw output
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f66027840d0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f6602784690>])
samples = [1, 1, 1, 1, 'hello'], operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='xyz')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
> (self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
deeplake/core/chunk_engine.py:1423:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/chunk_engine.py:1705: in _update
self._update_non_tiled_sample(
deeplake/core/chunk_engine.py:1341: in _update_non_tiled_sample
chunk.update_sample(local_sample_index, sample)
deeplake/core/chunk/uncompressed_chunk.py:265: in update_sample
serialized_sample, shape = self.serialize_sample(sample, break_into_tiles=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.chunk.uncompressed_chunk.UncompressedChunk object at 0x7f66027872d0>
incoming_sample = 'hello', sample_compression = None, chunk_compression = None
break_into_tiles = False, store_uncompressed_tiles = False
def serialize_sample(
self,
incoming_sample: InputSample,
sample_compression: Optional[str] = None,
chunk_compression: Optional[str] = None,
break_into_tiles: bool = True,
store_uncompressed_tiles: bool = False,
) -> SerializedOutput:
"""Converts the sample into bytes"""
dt, ht, min_chunk_size, tiling_threshold = (
self.dtype,
self.htype,
self.min_chunk_size,
self.tiling_threshold,
)
if tiling_threshold < 0:
break_into_tiles = False
if isinstance(incoming_sample, LinkedSample):
if self.tensor_meta.is_link:
incoming_sample = incoming_sample.path
else:
raise ValueError(
"deeplake.link() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
)
if isinstance(incoming_sample, LinkedTiledSample):
if not self.tensor_meta.is_link:
raise ValueError(
"deeplake.link_tiled() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
)
if self.is_text_like:
if isinstance(incoming_sample, LinkedSample):
incoming_sample = incoming_sample.path
if incoming_sample is None:
htype = "text" if self.tensor_meta.is_link else self.htype
empty_mapping = {"text": "", "list": [], "json": {}, "tag": []}
incoming_sample = empty_mapping[htype]
if isinstance(incoming_sample, Sample):
if incoming_sample.is_text_like:
incoming_sample, shape = serialize_text_sample_object( # type: ignore
incoming_sample, sample_compression
)
else:
htype = "Linked" if self.tensor_meta.is_link else self.htype
raise TypeError(
f"Cannot append to {htype} tensor with Sample object"
)
elif isinstance(incoming_sample, LinkedTiledSample):
incoming_sample, shape = serialize_linked_tiled_sample(incoming_sample)
else:
incoming_sample, shape = serialize_text(
incoming_sample, sample_compression, dt, ht # type: ignore
)
elif incoming_sample is None:
shape = (0,) * self.num_dims if self.num_dims else None
incoming_sample = b""
elif isinstance(incoming_sample, Sample):
incoming_sample, shape = serialize_sample_object( # type: ignore
incoming_sample,
sample_compression,
chunk_compression,
dt,
ht,
tiling_threshold,
break_into_tiles,
store_uncompressed_tiles,
)
elif isinstance(incoming_sample, PartialSample):
incoming_sample, shape = serialize_partial_sample_object(
incoming_sample,
sample_compression,
chunk_compression,
dt,
ht,
min_chunk_size,
)
elif isinstance(incoming_sample, deeplake.core.tensor.Tensor):
incoming_sample, shape = serialize_tensor(
incoming_sample,
sample_compression,
chunk_compression,
dt,
ht,
tiling_threshold,
break_into_tiles,
store_uncompressed_tiles,
)
elif isinstance(
incoming_sample,
(np.ndarray, list, int, float, bool, np.integer, np.floating, np.bool_),
):
incoming_sample, shape = serialize_numpy_and_base_types(
incoming_sample,
sample_compression,
chunk_compression,
dt,
ht,
tiling_threshold,
break_into_tiles,
store_uncompressed_tiles,
)
elif isinstance(incoming_sample, SampleTiles):
shape = incoming_sample.sample_shape
elif isinstance(incoming_sample, Polygons):
incoming_sample, shape = serialize_polygons(
incoming_sample, sample_compression, dt
)
else:
msg = f"Cannot serialize sample of type {type(incoming_sample)}."
if isinstance(msg, str):
method = "link" if self.tensor_meta.is_link else "read"
msg += f"If you are appending data from a file, please pass deeplake.{method}(filename) to the append operation, instead of the filename string."
> raise TypeError(msg)
E TypeError: Cannot serialize sample of type <class 'str'>.If you are appending data from a file, please pass deeplake.read(filename) to the append operation, instead of the filename string.
deeplake/core/chunk/base_chunk.py:409: TypeError
The above exception was the direct cause of the following exception:
self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_generic', index=Index([slice(5, None, None)]), tensors=['abc', 'xyz'])
sample = {'abc': [1, 1, 1, 1, 1], 'xyz': [1, 1, 1, 1, 'hello']}
def update(self, sample: Dict[str, Any]):
"""Update existing samples in the dataset with new values.
Examples:
>>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
>>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
>>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
Args:
sample (dict): Dictionary with tensor names as keys and samples as values.
Raises:
ValueError: If partial update of a sample is attempted.
Exception: Error while attempting to rollback updates.
"""
if len(self.index) > 1:
raise ValueError(
"Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
)
# remove update hooks from view base so that the view is not invalidated
if self._view_base:
saved_update_hooks = self._view_base._update_hooks
self._view_base._update_hooks = {}
with self:
try:
self._commit("Backup before update", None, False)
for k, v in sample.items():
if deeplake.shutdown_event.is_set():
sys.exit()
> self[k] = v
deeplake/core/dataset/dataset.py:3379:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:633: in __setitem__
tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f66027840d0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f6602784690>])
samples = [1, 1, 1, 1, 'hello'], operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='xyz')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
(self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
except Exception as e:
if cmap is not None:
key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
self.meta_cache[key] = cmap
self._commit_chunk_map = cmap
self.meta_cache.register_deeplake_object(key, cmap)
> raise SampleUpdateError(self.name) from e
E deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor xyz.
deeplake/core/chunk_engine.py:1435: SampleUpdateError
During handling of the above exception, another exception occurred:
local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_generic', tensors=['abc', 'xyz'])
def test_ds_update_generic(local_ds):
with local_ds as ds:
ds.create_tensor("abc")
ds.create_tensor("xyz")
ds.abc.extend(list(range(10)))
ds.xyz.extend(list(range(10)))
ds[0].update({"abc": 1, "xyz": 1})
ds[2:5].update({"abc": [1] * 3, "xyz": [1] * 3})
np.testing.assert_array_equal(ds.abc[:5].numpy().flatten(), [1] * 5)
np.testing.assert_array_equal(ds.xyz[:5].numpy().flatten(), [1] * 5)
with pytest.raises(SampleUpdateError):
> ds[5:].update({"abc": [1] * 5, "xyz": [1] * 4 + ["hello"]})
deeplake/api/tests/test_update_samples.py:505:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:3382: in update
self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_generic', index=Index([slice(5, None, None)]), tensors=['abc', 'xyz'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_generic', index=Index([slice(5, None, None)]), tensors=['abc', 'xyz'])
is_del = False, managed_view = False, has_vds = False, is_view = True
@wraps(callable)
def inner(x, *args, **kwargs):
ds = x if isinstance(x, deeplake.Dataset) else x.dataset
if not ds.__dict__.get("_allow_view_updates"):
is_del = callable.__name__ == "delete"
managed_view = "_view_entry" in ds.__dict__
has_vds = "_vds" in ds.__dict__
is_view = not x.index.is_trivial() or has_vds or managed_view
if is_view and not (is_del and (has_vds or managed_view)):
> raise InvalidOperationError(
callable.__name__,
type(x).__name__,
)
E deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
deeplake/util/invalid_view_op.py:18: InvalidOperationError
Check failure on line 555 in deeplake/api/tests/test_update_samples.py
github-actions / JUnit Test Report
test_update_samples.test_ds_update_text_like[lz4-None]
deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
Raw output
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f6602480610>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f6602481dd0>])
samples = [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='json')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
> (self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
deeplake/core/chunk_engine.py:1423:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/chunk_engine.py:1705: in _update
self._update_non_tiled_sample(
deeplake/core/chunk_engine.py:1341: in _update_non_tiled_sample
chunk.update_sample(local_sample_index, sample)
deeplake/core/chunk/sample_compressed_chunk.py:163: in update_sample
serialized_sample, shape = self.serialize_sample(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.chunk.sample_compressed_chunk.SampleCompressedChunk object at 0x7f6602481f50>
incoming_sample = Sample(is_lazy=True, path=bad_sample)
sample_compression = 'lz4', chunk_compression = None, break_into_tiles = False
store_uncompressed_tiles = False
def serialize_sample(
self,
incoming_sample: InputSample,
sample_compression: Optional[str] = None,
chunk_compression: Optional[str] = None,
break_into_tiles: bool = True,
store_uncompressed_tiles: bool = False,
) -> SerializedOutput:
"""Converts the sample into bytes"""
dt, ht, min_chunk_size, tiling_threshold = (
self.dtype,
self.htype,
self.min_chunk_size,
self.tiling_threshold,
)
if tiling_threshold < 0:
break_into_tiles = False
if isinstance(incoming_sample, LinkedSample):
if self.tensor_meta.is_link:
incoming_sample = incoming_sample.path
else:
raise ValueError(
"deeplake.link() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
)
if isinstance(incoming_sample, LinkedTiledSample):
if not self.tensor_meta.is_link:
raise ValueError(
"deeplake.link_tiled() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
)
if self.is_text_like:
if isinstance(incoming_sample, LinkedSample):
incoming_sample = incoming_sample.path
if incoming_sample is None:
htype = "text" if self.tensor_meta.is_link else self.htype
empty_mapping = {"text": "", "list": [], "json": {}, "tag": []}
incoming_sample = empty_mapping[htype]
if isinstance(incoming_sample, Sample):
if incoming_sample.is_text_like:
incoming_sample, shape = serialize_text_sample_object( # type: ignore
incoming_sample, sample_compression
)
else:
htype = "Linked" if self.tensor_meta.is_link else self.htype
> raise TypeError(
f"Cannot append to {htype} tensor with Sample object"
E TypeError: Cannot append to json tensor with Sample object
deeplake/core/chunk/base_chunk.py:341: TypeError
The above exception was the direct cause of the following exception:
self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-lz4-None-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
sample = {'json': [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)], 'list': [[1, 2, 3], [1, 2, 3], [1, 2, 3]], 'text': ['hello', 'hello', 'hello']}
def update(self, sample: Dict[str, Any]):
"""Update existing samples in the dataset with new values.
Examples:
>>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
>>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
>>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
Args:
sample (dict): Dictionary with tensor names as keys and samples as values.
Raises:
ValueError: If partial update of a sample is attempted.
Exception: Error while attempting to rollback updates.
"""
if len(self.index) > 1:
raise ValueError(
"Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
)
# remove update hooks from view base so that the view is not invalidated
if self._view_base:
saved_update_hooks = self._view_base._update_hooks
self._view_base._update_hooks = {}
with self:
try:
self._commit("Backup before update", None, False)
for k, v in sample.items():
if deeplake.shutdown_event.is_set():
sys.exit()
> self[k] = v
deeplake/core/dataset/dataset.py:3379:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:633: in __setitem__
tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f6602480610>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f6602481dd0>])
samples = [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='json')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
(self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
except Exception as e:
if cmap is not None:
key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
self.meta_cache[key] = cmap
self._commit_chunk_map = cmap
self.meta_cache.register_deeplake_object(key, cmap)
> raise SampleUpdateError(self.name) from e
E deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor json.
deeplake/core/chunk_engine.py:1435: SampleUpdateError
During handling of the above exception, another exception occurred:
local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-lz4-None-', tensors=['json', 'list', 'text'])
sc = 'lz4', cc = None
@pytest.mark.parametrize(("sc", "cc"), [("lz4", None), (None, "lz4"), (None, None)])
def test_ds_update_text_like(local_ds, sc, cc):
with local_ds as ds:
ds.create_tensor(
"text", htype="text", sample_compression=sc, chunk_compression=cc
)
ds.create_tensor(
"list", htype="list", sample_compression=sc, chunk_compression=cc
)
ds.create_tensor(
"json", htype="json", sample_compression=sc, chunk_compression=cc
)
text_samples = (["hello"] + ["world"] * 2) * 2
t = "hello"
ds.text.extend(text_samples)
list_samples = ([[1, 2, 3]] + [[4, 5, 6]] * 2) * 2
l = [1, 2, 3]
ds.list.extend(list_samples)
json_samples = ([{"a": 1}] + [{"b": 2, "c": 3}] * 2) * 2
j = {"a": 1}
ds.json.extend(json_samples)
ds[1].update({"text": t, "list": l, "json": j})
assert ds[1].text.data()["value"] == t
assert ds[1].list.data()["value"] == l
assert ds[1].json.data()["value"] == j
ds[:3].update({"text": [t] * 3, "list": [l] * 3, "json": [j] * 3})
assert ds[:3].text.data()["value"] == [t] * 3
assert ds[:3].list.data()["value"] == [l] * 3
assert ds[:3].json.data()["value"] == [j] * 3
with pytest.raises(SampleUpdateError):
> ds[3:].update(
{
"text": [t] * 3,
"list": [l] * 3,
"json": [j] * 2 + [deeplake.read("bad_sample")],
}
)
deeplake/api/tests/test_update_samples.py:555:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:3382: in update
self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-lz4-None-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-lz4-None-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
is_del = False, managed_view = False, has_vds = False, is_view = True
@wraps(callable)
def inner(x, *args, **kwargs):
ds = x if isinstance(x, deeplake.Dataset) else x.dataset
if not ds.__dict__.get("_allow_view_updates"):
is_del = callable.__name__ == "delete"
managed_view = "_view_entry" in ds.__dict__
has_vds = "_vds" in ds.__dict__
is_view = not x.index.is_trivial() or has_vds or managed_view
if is_view and not (is_del and (has_vds or managed_view)):
> raise InvalidOperationError(
callable.__name__,
type(x).__name__,
)
E deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
deeplake/util/invalid_view_op.py:18: InvalidOperationError
Check failure on line 555 in deeplake/api/tests/test_update_samples.py
github-actions / JUnit Test Report
test_update_samples.test_ds_update_text_like[None-lz4]
deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
Raw output
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f660280c8d0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f660280c790>])
samples = [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='json')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
> (self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
deeplake/core/chunk_engine.py:1423:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/chunk_engine.py:1705: in _update
self._update_non_tiled_sample(
deeplake/core/chunk_engine.py:1341: in _update_non_tiled_sample
chunk.update_sample(local_sample_index, sample)
deeplake/core/chunk/chunk_compressed_chunk.py:449: in update_sample
self.update_sample_byte_compression(local_index, new_sample)
deeplake/core/chunk/chunk_compressed_chunk.py:454: in update_sample_byte_compression
serialized_sample, shape = self.serialize_sample(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.chunk.chunk_compressed_chunk.ChunkCompressedChunk object at 0x7f660280d490>
incoming_sample = Sample(is_lazy=True, path=bad_sample)
sample_compression = None, chunk_compression = 'lz4', break_into_tiles = False
store_uncompressed_tiles = False
def serialize_sample(
self,
incoming_sample: InputSample,
sample_compression: Optional[str] = None,
chunk_compression: Optional[str] = None,
break_into_tiles: bool = True,
store_uncompressed_tiles: bool = False,
) -> SerializedOutput:
"""Converts the sample into bytes"""
dt, ht, min_chunk_size, tiling_threshold = (
self.dtype,
self.htype,
self.min_chunk_size,
self.tiling_threshold,
)
if tiling_threshold < 0:
break_into_tiles = False
if isinstance(incoming_sample, LinkedSample):
if self.tensor_meta.is_link:
incoming_sample = incoming_sample.path
else:
raise ValueError(
"deeplake.link() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
)
if isinstance(incoming_sample, LinkedTiledSample):
if not self.tensor_meta.is_link:
raise ValueError(
"deeplake.link_tiled() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
)
if self.is_text_like:
if isinstance(incoming_sample, LinkedSample):
incoming_sample = incoming_sample.path
if incoming_sample is None:
htype = "text" if self.tensor_meta.is_link else self.htype
empty_mapping = {"text": "", "list": [], "json": {}, "tag": []}
incoming_sample = empty_mapping[htype]
if isinstance(incoming_sample, Sample):
if incoming_sample.is_text_like:
incoming_sample, shape = serialize_text_sample_object( # type: ignore
incoming_sample, sample_compression
)
else:
htype = "Linked" if self.tensor_meta.is_link else self.htype
> raise TypeError(
f"Cannot append to {htype} tensor with Sample object"
E TypeError: Cannot append to json tensor with Sample object
deeplake/core/chunk/base_chunk.py:341: TypeError
The above exception was the direct cause of the following exception:
self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-lz4-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
sample = {'json': [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)], 'list': [[1, 2, 3], [1, 2, 3], [1, 2, 3]], 'text': ['hello', 'hello', 'hello']}
def update(self, sample: Dict[str, Any]):
"""Update existing samples in the dataset with new values.
Examples:
>>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
>>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
>>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
Args:
sample (dict): Dictionary with tensor names as keys and samples as values.
Raises:
ValueError: If partial update of a sample is attempted.
Exception: Error while attempting to rollback updates.
"""
if len(self.index) > 1:
raise ValueError(
"Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
)
# remove update hooks from view base so that the view is not invalidated
if self._view_base:
saved_update_hooks = self._view_base._update_hooks
self._view_base._update_hooks = {}
with self:
try:
self._commit("Backup before update", None, False)
for k, v in sample.items():
if deeplake.shutdown_event.is_set():
sys.exit()
> self[k] = v
deeplake/core/dataset/dataset.py:3379:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:633: in __setitem__
tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f660280c8d0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f660280c790>])
samples = [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='json')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
(self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
except Exception as e:
if cmap is not None:
key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
self.meta_cache[key] = cmap
self._commit_chunk_map = cmap
self.meta_cache.register_deeplake_object(key, cmap)
> raise SampleUpdateError(self.name) from e
E deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor json.
deeplake/core/chunk_engine.py:1435: SampleUpdateError
During handling of the above exception, another exception occurred:
local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-lz4-', tensors=['json', 'list', 'text'])
sc = None, cc = 'lz4'
@pytest.mark.parametrize(("sc", "cc"), [("lz4", None), (None, "lz4"), (None, None)])
def test_ds_update_text_like(local_ds, sc, cc):
with local_ds as ds:
ds.create_tensor(
"text", htype="text", sample_compression=sc, chunk_compression=cc
)
ds.create_tensor(
"list", htype="list", sample_compression=sc, chunk_compression=cc
)
ds.create_tensor(
"json", htype="json", sample_compression=sc, chunk_compression=cc
)
text_samples = (["hello"] + ["world"] * 2) * 2
t = "hello"
ds.text.extend(text_samples)
list_samples = ([[1, 2, 3]] + [[4, 5, 6]] * 2) * 2
l = [1, 2, 3]
ds.list.extend(list_samples)
json_samples = ([{"a": 1}] + [{"b": 2, "c": 3}] * 2) * 2
j = {"a": 1}
ds.json.extend(json_samples)
ds[1].update({"text": t, "list": l, "json": j})
assert ds[1].text.data()["value"] == t
assert ds[1].list.data()["value"] == l
assert ds[1].json.data()["value"] == j
ds[:3].update({"text": [t] * 3, "list": [l] * 3, "json": [j] * 3})
assert ds[:3].text.data()["value"] == [t] * 3
assert ds[:3].list.data()["value"] == [l] * 3
assert ds[:3].json.data()["value"] == [j] * 3
with pytest.raises(SampleUpdateError):
> ds[3:].update(
{
"text": [t] * 3,
"list": [l] * 3,
"json": [j] * 2 + [deeplake.read("bad_sample")],
}
)
deeplake/api/tests/test_update_samples.py:555:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:3382: in update
self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-lz4-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-lz4-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
is_del = False, managed_view = False, has_vds = False, is_view = True
@wraps(callable)
def inner(x, *args, **kwargs):
ds = x if isinstance(x, deeplake.Dataset) else x.dataset
if not ds.__dict__.get("_allow_view_updates"):
is_del = callable.__name__ == "delete"
managed_view = "_view_entry" in ds.__dict__
has_vds = "_vds" in ds.__dict__
is_view = not x.index.is_trivial() or has_vds or managed_view
if is_view and not (is_del and (has_vds or managed_view)):
> raise InvalidOperationError(
callable.__name__,
type(x).__name__,
)
E deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
deeplake/util/invalid_view_op.py:18: InvalidOperationError
Check failure on line 555 in deeplake/api/tests/test_update_samples.py
github-actions / JUnit Test Report
test_update_samples.test_ds_update_text_like[None-None]
deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
Raw output
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f66022c73d0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f66022c7110>])
samples = [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='json')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
> (self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
deeplake/core/chunk_engine.py:1423:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/chunk_engine.py:1705: in _update
self._update_non_tiled_sample(
deeplake/core/chunk_engine.py:1341: in _update_non_tiled_sample
chunk.update_sample(local_sample_index, sample)
deeplake/core/chunk/uncompressed_chunk.py:265: in update_sample
serialized_sample, shape = self.serialize_sample(sample, break_into_tiles=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.chunk.uncompressed_chunk.UncompressedChunk object at 0x7f66022c4c10>
incoming_sample = Sample(is_lazy=True, path=bad_sample)
sample_compression = None, chunk_compression = None, break_into_tiles = False
store_uncompressed_tiles = False
def serialize_sample(
self,
incoming_sample: InputSample,
sample_compression: Optional[str] = None,
chunk_compression: Optional[str] = None,
break_into_tiles: bool = True,
store_uncompressed_tiles: bool = False,
) -> SerializedOutput:
"""Converts the sample into bytes"""
dt, ht, min_chunk_size, tiling_threshold = (
self.dtype,
self.htype,
self.min_chunk_size,
self.tiling_threshold,
)
if tiling_threshold < 0:
break_into_tiles = False
if isinstance(incoming_sample, LinkedSample):
if self.tensor_meta.is_link:
incoming_sample = incoming_sample.path
else:
raise ValueError(
"deeplake.link() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
)
if isinstance(incoming_sample, LinkedTiledSample):
if not self.tensor_meta.is_link:
raise ValueError(
"deeplake.link_tiled() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
)
if self.is_text_like:
if isinstance(incoming_sample, LinkedSample):
incoming_sample = incoming_sample.path
if incoming_sample is None:
htype = "text" if self.tensor_meta.is_link else self.htype
empty_mapping = {"text": "", "list": [], "json": {}, "tag": []}
incoming_sample = empty_mapping[htype]
if isinstance(incoming_sample, Sample):
if incoming_sample.is_text_like:
incoming_sample, shape = serialize_text_sample_object( # type: ignore
incoming_sample, sample_compression
)
else:
htype = "Linked" if self.tensor_meta.is_link else self.htype
> raise TypeError(
f"Cannot append to {htype} tensor with Sample object"
E TypeError: Cannot append to json tensor with Sample object
deeplake/core/chunk/base_chunk.py:341: TypeError
The above exception was the direct cause of the following exception:
self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-None-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
sample = {'json': [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)], 'list': [[1, 2, 3], [1, 2, 3], [1, 2, 3]], 'text': ['hello', 'hello', 'hello']}
def update(self, sample: Dict[str, Any]):
"""Update existing samples in the dataset with new values.
Examples:
>>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
>>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
>>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
Args:
sample (dict): Dictionary with tensor names as keys and samples as values.
Raises:
ValueError: If partial update of a sample is attempted.
Exception: Error while attempting to rollback updates.
"""
if len(self.index) > 1:
raise ValueError(
"Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
)
# remove update hooks from view base so that the view is not invalidated
if self._view_base:
saved_update_hooks = self._view_base._update_hooks
self._view_base._update_hooks = {}
with self:
try:
self._commit("Backup before update", None, False)
for k, v in sample.items():
if deeplake.shutdown_event.is_set():
sys.exit()
> self[k] = v
deeplake/core/dataset/dataset.py:3379:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:633: in __setitem__
tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f66022c73d0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f66022c7110>])
samples = [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='json')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
(self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
except Exception as e:
if cmap is not None:
key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
self.meta_cache[key] = cmap
self._commit_chunk_map = cmap
self.meta_cache.register_deeplake_object(key, cmap)
> raise SampleUpdateError(self.name) from e
E deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor json.
deeplake/core/chunk_engine.py:1435: SampleUpdateError
During handling of the above exception, another exception occurred:
local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-None-', tensors=['json', 'list', 'text'])
sc = None, cc = None
@pytest.mark.parametrize(("sc", "cc"), [("lz4", None), (None, "lz4"), (None, None)])
def test_ds_update_text_like(local_ds, sc, cc):
with local_ds as ds:
ds.create_tensor(
"text", htype="text", sample_compression=sc, chunk_compression=cc
)
ds.create_tensor(
"list", htype="list", sample_compression=sc, chunk_compression=cc
)
ds.create_tensor(
"json", htype="json", sample_compression=sc, chunk_compression=cc
)
text_samples = (["hello"] + ["world"] * 2) * 2
t = "hello"
ds.text.extend(text_samples)
list_samples = ([[1, 2, 3]] + [[4, 5, 6]] * 2) * 2
l = [1, 2, 3]
ds.list.extend(list_samples)
json_samples = ([{"a": 1}] + [{"b": 2, "c": 3}] * 2) * 2
j = {"a": 1}
ds.json.extend(json_samples)
ds[1].update({"text": t, "list": l, "json": j})
assert ds[1].text.data()["value"] == t
assert ds[1].list.data()["value"] == l
assert ds[1].json.data()["value"] == j
ds[:3].update({"text": [t] * 3, "list": [l] * 3, "json": [j] * 3})
assert ds[:3].text.data()["value"] == [t] * 3
assert ds[:3].list.data()["value"] == [l] * 3
assert ds[:3].json.data()["value"] == [j] * 3
with pytest.raises(SampleUpdateError):
> ds[3:].update(
{
"text": [t] * 3,
"list": [l] * 3,
"json": [j] * 2 + [deeplake.read("bad_sample")],
}
)
deeplake/api/tests/test_update_samples.py:555:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:3382: in update
self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-None-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-None-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
is_del = False, managed_view = False, has_vds = False, is_view = True
@wraps(callable)
def inner(x, *args, **kwargs):
ds = x if isinstance(x, deeplake.Dataset) else x.dataset
if not ds.__dict__.get("_allow_view_updates"):
is_del = callable.__name__ == "delete"
managed_view = "_view_entry" in ds.__dict__
has_vds = "_vds" in ds.__dict__
is_view = not x.index.is_trivial() or has_vds or managed_view
if is_view and not (is_del and (has_vds or managed_view)):
> raise InvalidOperationError(
callable.__name__,
type(x).__name__,
)
E deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
deeplake/util/invalid_view_op.py:18: InvalidOperationError
Check failure on line 632 in deeplake/api/tests/test_update_samples.py
github-actions / JUnit Test Report
test_update_samples.test_ds_update_link
deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
Raw output
self = Sample(is_lazy=True, path=bad_sample)
def _read_from_path(self) -> bytes: # type: ignore
if self._buffer is None:
path_type = get_path_type(self.path)
try:
if path_type == "local":
> self._buffer = self._read_from_local()
deeplake/core/sample.py:440:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Sample(is_lazy=True, path=bad_sample)
def _read_from_local(self) -> bytes:
> with open(self.path, "rb") as f: # type: ignore
E FileNotFoundError: [Errno 2] No such file or directory: 'bad_sample'
deeplake/core/sample.py:456: FileNotFoundError
The above exception was the direct cause of the following exception:
sample_path = 'bad_sample', sample_creds_key = None
link_creds = <deeplake.core.link_creds.LinkCreds object at 0x7f6601f88050>
verify = True
def read_linked_sample(
sample_path: str, sample_creds_key: Optional[str], link_creds, verify: bool
):
provider_type = get_path_type(sample_path)
try:
if provider_type == "local":
> return deeplake.read(sample_path, verify=verify)
deeplake/core/linked_sample.py:27:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/api/read.py:61: in read
return Sample(
deeplake/core/sample.py:101: in __init__
compressed_bytes = self._read_from_path()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Sample(is_lazy=True, path=bad_sample)
def _read_from_path(self) -> bytes: # type: ignore
if self._buffer is None:
path_type = get_path_type(self.path)
try:
if path_type == "local":
self._buffer = self._read_from_local()
elif path_type == "gcs":
self._buffer = self._read_from_gcs()
elif path_type == "s3":
self._buffer = self._read_from_s3()
elif path_type == "azure":
self._buffer = self._read_from_azure()
elif path_type == "gdrive":
self._buffer = self._read_from_gdrive()
elif path_type == "http":
self._buffer = self._read_from_http()
except Exception as e:
> raise SampleReadError(self.path) from e # type: ignore
E deeplake.util.exceptions.SampleReadError: Unable to read sample from bad_sample
deeplake/core/sample.py:452: SampleReadError
The above exception was the direct cause of the following exception:
self = <deeplake.core.linked_chunk_engine.LinkedChunkEngine object at 0x7f6602390210>
samples = [<deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6602497750>]
verify = True, ignore_errors = False
def check_each_sample(self, samples, verify=True, ignore_errors=False):
link_creds = self.link_creds
verified_samples = []
skipped = []
for i, sample in enumerate(samples):
try:
if isinstance(sample, deeplake.core.tensor.Tensor) and sample.is_link:
sample = sample._linked_sample()
samples[i] = sample
elif (
not isinstance(sample, (LinkedSample, LinkedTiledSample))
and sample is not None
):
raise TypeError(
f"Expected LinkedSample or LinkedTiledSample, got {type(sample)} instead. Use deeplake.link() to link samples or deeplake.link_tiled() to link multiple images as tiles."
)
path, creds_key = get_path_creds_key(sample)
# verifies existence of creds_key
if verify:
link_creds.get_encoding(creds_key, path)
if sample is None or sample.path == "":
verified_samples.append(sample)
elif isinstance(sample, LinkedTiledSample):
verify_samples = self.verify and verify
sample.set_check_tile_shape(self.link_creds, verify_samples)
sample.set_sample_shape()
verified_samples.append(sample)
else:
try:
_verify = verify and self.verify
verified_samples.append(
> read_linked_sample(
sample.path,
sample.creds_key,
self.link_creds,
verify=_verify,
)
)
deeplake/core/linked_chunk_engine.py:280:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
sample_path = 'bad_sample', sample_creds_key = None
link_creds = <deeplake.core.link_creds.LinkCreds object at 0x7f6601f88050>
verify = True
def read_linked_sample(
sample_path: str, sample_creds_key: Optional[str], link_creds, verify: bool
):
provider_type = get_path_type(sample_path)
try:
if provider_type == "local":
return deeplake.read(sample_path, verify=verify)
elif provider_type == "http":
return _read_http_linked_sample(
link_creds, sample_creds_key, sample_path, verify
)
else:
return _read_cloud_linked_sample(
link_creds, sample_creds_key, sample_path, provider_type, verify
)
except Exception as e:
> raise GetDataFromLinkError(sample_path) from e
E deeplake.util.exceptions.GetDataFromLinkError: Unable to get data from link bad_sample.
deeplake/core/linked_sample.py:37: GetDataFromLinkError
The above exception was the direct cause of the following exception:
self = <deeplake.core.linked_chunk_engine.LinkedChunkEngine object at 0x7f6602390210>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f66023903d0>])
samples = [<deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6602497750>]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='images2')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
> (self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
deeplake/core/chunk_engine.py:1423:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/chunk_engine.py:1689: in _update
verified_samples = self.check_each_sample(samples)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.linked_chunk_engine.LinkedChunkEngine object at 0x7f6602390210>
samples = [<deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6602497750>]
verify = True, ignore_errors = False
def check_each_sample(self, samples, verify=True, ignore_errors=False):
link_creds = self.link_creds
verified_samples = []
skipped = []
for i, sample in enumerate(samples):
try:
if isinstance(sample, deeplake.core.tensor.Tensor) and sample.is_link:
sample = sample._linked_sample()
samples[i] = sample
elif (
not isinstance(sample, (LinkedSample, LinkedTiledSample))
and sample is not None
):
raise TypeError(
f"Expected LinkedSample or LinkedTiledSample, got {type(sample)} instead. Use deeplake.link() to link samples or deeplake.link_tiled() to link multiple images as tiles."
)
path, creds_key = get_path_creds_key(sample)
# verifies existence of creds_key
if verify:
link_creds.get_encoding(creds_key, path)
if sample is None or sample.path == "":
verified_samples.append(sample)
elif isinstance(sample, LinkedTiledSample):
verify_samples = self.verify and verify
sample.set_check_tile_shape(self.link_creds, verify_samples)
sample.set_sample_shape()
verified_samples.append(sample)
else:
try:
_verify = verify and self.verify
verified_samples.append(
read_linked_sample(
sample.path,
sample.creds_key,
self.link_creds,
verify=_verify,
)
)
except Exception as e:
> raise BadLinkError(sample.path, sample.creds_key) from e
E deeplake.util.exceptions.BadLinkError: Verification of link failed. Make sure that the link you are trying to append is correct.
E
E Failed link: bad_sample
E creds_key used: None
E
E No credentials have been provided to access the link. If the link is not publibly accessible, add access credentials to your dataset and use the appropriate creds_key.
deeplake/core/linked_chunk_engine.py:288: BadLinkError
The above exception was the direct cause of the following exception:
self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_link', index=Index([slice(3, None, None)]), tensors=['images1', 'images2'])
sample = {'images1': [<deeplake.core.linked_sample.LinkedSample object at 0x7f6601f8a750>, <deeplake.core.linked_sample.LinkedS...ed_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6602497750>]}
def update(self, sample: Dict[str, Any]):
"""Update existing samples in the dataset with new values.
Examples:
>>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
>>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
>>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
Args:
sample (dict): Dictionary with tensor names as keys and samples as values.
Raises:
ValueError: If partial update of a sample is attempted.
Exception: Error while attempting to rollback updates.
"""
if len(self.index) > 1:
raise ValueError(
"Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
)
# remove update hooks from view base so that the view is not invalidated
if self._view_base:
saved_update_hooks = self._view_base._update_hooks
self._view_base._update_hooks = {}
with self:
try:
self._commit("Backup before update", None, False)
for k, v in sample.items():
if deeplake.shutdown_event.is_set():
sys.exit()
> self[k] = v
deeplake/core/dataset/dataset.py:3379:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:633: in __setitem__
tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.linked_chunk_engine.LinkedChunkEngine object at 0x7f6602390210>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f66023903d0>])
samples = [<deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6602497750>]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='images2')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
(self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
except Exception as e:
if cmap is not None:
key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
self.meta_cache[key] = cmap
self._commit_chunk_map = cmap
self.meta_cache.register_deeplake_object(key, cmap)
> raise SampleUpdateError(self.name) from e
E deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor images2.
deeplake/core/chunk_engine.py:1435: SampleUpdateError
During handling of the above exception, another exception occurred:
local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_link', tensors=['images1', 'images2'])
cat_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg'
dog_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg'
def test_ds_update_link(local_ds, cat_path, dog_path):
with local_ds as ds:
ds.create_tensor("images1", htype="link[image]", sample_compression="png")
ds.create_tensor("images2", htype="link[image]", sample_compression="png")
dog = deeplake.link(dog_path)
cat = deeplake.link(cat_path)
ds.images1.extend([cat] * 6)
ds.images2.extend([dog] * 6)
ds[0].update({"images1": dog, "images2": cat})
assert ds[0].images1.shape == (323, 480, 3)
assert ds[0].images2.shape == (900, 900, 3)
ds[:3].update({"images1": [dog] * 3, "images2": [cat] * 3})
assert ds[:3].images1.shape == (3, 323, 480, 3)
assert ds[:3].images2.shape == (3, 900, 900, 3)
with pytest.raises(SampleUpdateError):
> ds[3:].update(
{
"images1": [dog] * 3,
"images2": [cat] * 2 + [deeplake.link("bad_sample")],
}
)
deeplake/api/tests/test_update_samples.py:632:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:3382: in update
self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_link', index=Index([slice(3, None, None)]), tensors=['images1', 'images2'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_link', index=Index([slice(3, None, None)]), tensors=['images1', 'images2'])
is_del = False, managed_view = False, has_vds = False, is_view = True
@wraps(callable)
def inner(x, *args, **kwargs):
ds = x if isinstance(x, deeplake.Dataset) else x.dataset
if not ds.__dict__.get("_allow_view_updates"):
is_del = callable.__name__ == "delete"
managed_view = "_view_entry" in ds.__dict__
has_vds = "_vds" in ds.__dict__
is_view = not x.index.is_trivial() or has_vds or managed_view
if is_view and not (is_del and (has_vds or managed_view)):
> raise InvalidOperationError(
callable.__name__,
type(x).__name__,
)
E deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
deeplake/util/invalid_view_op.py:18: InvalidOperationError
Check failure on line 668 in deeplake/api/tests/test_update_samples.py
github-actions / JUnit Test Report
test_update_samples.test_ds_update_polygon
deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
Raw output
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f66022f6d90>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f6601f683d0>])
samples = [array([[[1., 1.],
[1., 1.],
[1., 1.]],
[[1., 1.],
[1., 1.],
[1., 1.]],
...., 1.]],
[[1., 1.],
[1., 1.],
[1., 1.]]]), array([[1., 1.],
[1., 1.],
[1., 1.]])]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='xyz')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
> (self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
deeplake/core/chunk_engine.py:1423:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/chunk_engine.py:1693: in _update
samples = [Polygons(sample, self.tensor_meta.dtype) for sample in samples] # type: ignore
deeplake/core/chunk_engine.py:1693: in <listcomp>
samples = [Polygons(sample, self.tensor_meta.dtype) for sample in samples] # type: ignore
deeplake/core/polygon.py:59: in __init__
self._validate()
deeplake/core/polygon.py:66: in _validate
ndim = self[0].ndim
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.polygon.Polygon object at 0x7f6601f68250>
@property
def ndim(self):
"""Dimension of the polygon."""
> return len(self.coords[0])
E TypeError: object of type 'numpy.float64' has no len()
deeplake/core/polygon.py:22: TypeError
The above exception was the direct cause of the following exception:
self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_polygon', index=Index([slice(3, None, None)]), tensors=['abc', 'xyz'])
sample = {'abc': [array([[[1., 1.],
[1., 1.]],
[[1., 1.],
[1., 1.]]]), array([[[1., 1.],
[1., 1..., 1.]],
[[1., 1.],
[1., 1.],
[1., 1.]]]), array([[1., 1.],
[1., 1.],
[1., 1.]])]}
def update(self, sample: Dict[str, Any]):
"""Update existing samples in the dataset with new values.
Examples:
>>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
>>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
>>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
Args:
sample (dict): Dictionary with tensor names as keys and samples as values.
Raises:
ValueError: If partial update of a sample is attempted.
Exception: Error while attempting to rollback updates.
"""
if len(self.index) > 1:
raise ValueError(
"Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
)
# remove update hooks from view base so that the view is not invalidated
if self._view_base:
saved_update_hooks = self._view_base._update_hooks
self._view_base._update_hooks = {}
with self:
try:
self._commit("Backup before update", None, False)
for k, v in sample.items():
if deeplake.shutdown_event.is_set():
sys.exit()
> self[k] = v
deeplake/core/dataset/dataset.py:3379:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:633: in __setitem__
tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f66022f6d90>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f6601f683d0>])
samples = [array([[[1., 1.],
[1., 1.],
[1., 1.]],
[[1., 1.],
[1., 1.],
[1., 1.]],
...., 1.]],
[[1., 1.],
[1., 1.],
[1., 1.]]]), array([[1., 1.],
[1., 1.],
[1., 1.]])]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='xyz')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
(self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
except Exception as e:
if cmap is not None:
key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
self.meta_cache[key] = cmap
self._commit_chunk_map = cmap
self.meta_cache.register_deeplake_object(key, cmap)
> raise SampleUpdateError(self.name) from e
E deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor xyz.
deeplake/core/chunk_engine.py:1435: SampleUpdateError
During handling of the above exception, another exception occurred:
local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_polygon', tensors=['abc', 'xyz'])
def test_ds_update_polygon(local_ds):
with local_ds as ds:
ds.create_tensor("abc", htype="polygon", chunk_compression="lz4")
ds.create_tensor("xyz", htype="polygon", chunk_compression="lz4")
abc_samples = np.ones((6, 3, 3, 2))
xyz_samples = np.ones((6, 2, 2, 2))
ds.abc.extend(abc_samples)
ds.xyz.extend(xyz_samples)
ds[0].update({"abc": np.ones((2, 2, 2)), "xyz": np.ones((3, 3, 2))})
assert ds[0].abc.shape == (2, 2, 2)
assert ds[0].xyz.shape == (3, 3, 2)
ds[:3].update({"abc": [np.ones((2, 2, 2))] * 3, "xyz": [np.ones((3, 3, 2))] * 3})
assert ds[:3].abc.shape == (3, 2, 2, 2)
assert ds[:3].xyz.shape == (3, 3, 3, 2)
with pytest.raises(SampleUpdateError):
> ds[3:].update(
{
"abc": [np.ones((2, 2, 2))] * 3,
"xyz": [np.ones((3, 3, 2))] * 2 + [np.ones((3, 2))],
}
)
deeplake/api/tests/test_update_samples.py:668:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:3382: in update
self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_polygon', index=Index([slice(3, None, None)]), tensors=['abc', 'xyz'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_polygon', index=Index([slice(3, None, None)]), tensors=['abc', 'xyz'])
is_del = False, managed_view = False, has_vds = False, is_view = True
@wraps(callable)
def inner(x, *args, **kwargs):
ds = x if isinstance(x, deeplake.Dataset) else x.dataset
if not ds.__dict__.get("_allow_view_updates"):
is_del = callable.__name__ == "delete"
managed_view = "_view_entry" in ds.__dict__
has_vds = "_vds" in ds.__dict__
is_view = not x.index.is_trivial() or has_vds or managed_view
if is_view and not (is_del and (has_vds or managed_view)):
> raise InvalidOperationError(
callable.__name__,
type(x).__name__,
)
E deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
deeplake/util/invalid_view_op.py:18: InvalidOperationError
Check failure on line 209 in deeplake/core/tests/test_vdb_indexes.py
github-actions / JUnit Test Report
test_vdb_indexes.test_index_maintenance_delete
Exception: An error occurred while regenerating VDB indexes: request_failed
Raw output
tensor = Tensor(key='embeddings'), indexes = [4999], index_operation = 2
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
> tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
deeplake/core/index_maintenance.py:194:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Tensor(key='embeddings'), operation_kind = 2, row_ids = [4999]
def update_vdb_index(
self,
operation_kind: int,
row_ids: List[int] = [],
):
self.storage.check_readonly()
if self.meta.htype != "embedding":
raise Exception(f"Only supported for embedding tensors.")
self.invalidate_libdeeplake_dataset()
self.dataset.flush()
from deeplake.enterprise.convert_to_libdeeplake import (
dataset_to_libdeeplake,
)
ds = dataset_to_libdeeplake(self.dataset)
ts = getattr(ds, self.meta.name)
from deeplake.enterprise.convert_to_libdeeplake import (
import_indra_api,
)
api = import_indra_api()
commit_id = self.version_state["commit_id"]
if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
try:
indexes = api.vdb.add_samples_to_index(
ts,
add_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
try:
> indexes = api.vdb.remove_samples_from_index(
ts,
remove_indices=row_ids,
)
E RuntimeError: request_failed
deeplake/core/tensor.py:1568: RuntimeError
During handling of the above exception, another exception occurred:
local_auth_ds_generator = <function local_auth_ds_generator.<locals>.generate_local_auth_ds at 0x7f65d813e340>
@requires_libdeeplake
def test_index_maintenance_delete(local_auth_ds_generator):
ds = local_auth_ds_generator()
with ds:
ds.create_tensor(
"embeddings",
dtype=np.float32,
htype="embedding",
sample_compression=None,
)
ds.embeddings.unload_vdb_index_cache()
arr = np.random.uniform(-1, 1, (5000, 48)).astype("float32")
ds.embeddings.extend(arr)
ds.embeddings.create_vdb_index("hnsw_1", distance="cosine_similarity")
index = ds.embeddings.load_vdb_index("hnsw_1")
count = 0
for i in range(len(ds)):
ret = index.search_knn(ds.embeddings[i].numpy(), 1)
if i == ret.indices[0]:
count += 1
recall = count / len(ds)
sample = ds.embeddings[4999].numpy()
> ds.pop(4999)
deeplake/core/tests/test_vdb_indexes.py:209:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/util/invalid_view_op.py:22: in inner
return callable(x, *args, **kwargs)
deeplake/core/dataset/dataset.py:4711: in pop
index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
_incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tensor = Tensor(key='embeddings'), indexes = [4999], index_operation = 2
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
except Exception as e:
> raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E Exception: An error occurred while regenerating VDB indexes: request_failed
deeplake/core/index_maintenance.py:199: Exception
Check failure on line 1754 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_vdb_index_incr_maint_append_pop
Exception: An error occurred while regenerating VDB indexes: request_failed
Raw output
tensor = Tensor(key='embedding'), indexes = [2], index_operation = 2
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
> tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
deeplake/core/index_maintenance.py:194:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Tensor(key='embedding'), operation_kind = 2, row_ids = [2]
def update_vdb_index(
self,
operation_kind: int,
row_ids: List[int] = [],
):
self.storage.check_readonly()
if self.meta.htype != "embedding":
raise Exception(f"Only supported for embedding tensors.")
self.invalidate_libdeeplake_dataset()
self.dataset.flush()
from deeplake.enterprise.convert_to_libdeeplake import (
dataset_to_libdeeplake,
)
ds = dataset_to_libdeeplake(self.dataset)
ts = getattr(ds, self.meta.name)
from deeplake.enterprise.convert_to_libdeeplake import (
import_indra_api,
)
api = import_indra_api()
commit_id = self.version_state["commit_id"]
if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
try:
indexes = api.vdb.add_samples_to_index(
ts,
add_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
try:
> indexes = api.vdb.remove_samples_from_index(
ts,
remove_indices=row_ids,
)
E RuntimeError: request_failed
deeplake/core/tensor.py:1568: RuntimeError
During handling of the above exception, another exception occurred:
local_path = './hub_pytest/test_deeplake_vectorstore/test_vdb_index_incr_maint_append_pop'
capsys = <_pytest.capture.CaptureFixture object at 0x7f6601ee9450>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjE5NiwiZXhwIjoxNzA5NTIyMTk2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.76VsQkoBfnHsLLmUaMe8Lul1Hct0vUczdA9OhHlKoXv67yNdTNKtWWjzcwYKvny3_wc01yURAF-6JyFG1JhSiA'
@requires_libdeeplake
def test_vdb_index_incr_maint_append_pop(local_path, capsys, hub_cloud_dev_token):
number_of_data = 103
texts, embeddings, ids, metadatas, _ = utils.create_data(
number_of_data=number_of_data, embedding_dim=EMBEDDING_DIM
)
txt1 = texts[99]
md1 = metadatas[99]
ids1 = ids[99]
emb1 = embeddings[99]
txt2 = texts[100]
md2 = metadatas[100]
ids2 = ids[100]
emb2 = embeddings[100]
txt3 = texts[101]
md3 = metadatas[101]
ids3 = ids[101]
emb3 = embeddings[101]
txt4 = texts[102]
md4 = metadatas[102]
ids4 = ids[102]
emb4 = embeddings[102]
# initialize vector store object with vdb index threshold as 200.
vector_store = DeepLakeVectorStore(
path=local_path,
overwrite=True,
verbose=True,
exec_option="compute_engine",
index_params={"threshold": 2, "distance_metric": "L2"},
token=hub_cloud_dev_token,
)
ds = vector_store.dataset_handler.dataset
ds.append({"embedding": emb1, "text": txt1, "id": ids1, "metadata": md1})
ds.append({"embedding": emb2, "text": txt2, "id": ids2, "metadata": md2})
ds.append({"embedding": emb3, "text": txt3, "id": ids3, "metadata": md3})
ds.append({"embedding": emb4, "text": txt4, "id": ids4, "metadata": md4})
# assert len(vector_store) == number_of_data
assert set(vector_store.dataset_handler.dataset.tensors) == set(
[
"embedding",
"id",
"metadata",
"text",
]
)
assert set(vector_store.tensors()) == set(
[
"embedding",
"id",
"metadata",
"text",
]
)
# Check if the index is recreated properly.
# ds = vector_store.dataset
es = ds.embedding.get_vdb_indexes()
assert len(es) == 1
assert es[0]["id"] == "hnsw_1"
assert es[0]["distance"] == "l2_norm"
assert es[0]["type"] == "hnsw"
# search the embeddings.
query1 = ds.embedding[1].numpy()
query2 = ds.embedding[2].numpy()
query3 = ds.embedding[3].numpy()
s1 = ",".join(str(c) for c in query1)
view1 = ds.query(
f"select * order by cosine_similarity(embedding ,array[{s1}]) DESC limit 1"
)
res1 = list(view1.sample_indices)
assert res1[0] == 1
s2 = ",".join(str(c) for c in query2)
view2 = ds.query(
f"select * order by cosine_similarity(embedding ,array[{s2}]) DESC limit 1"
)
res2 = list(view2.sample_indices)
assert res2[0] == 2
s3 = ",".join(str(c) for c in query3)
view3 = ds.query(
f"select * order by cosine_similarity(embedding ,array[{s3}]) DESC limit 1"
)
res3 = list(view3.sample_indices)
assert res3[0] == 3
with pytest.raises(EmbeddingTensorPopError):
vector_store.dataset.embedding.pop(2)
vector_store.dataset.id.pop(2)
vector_store.dataset.metadata.pop(2)
vector_store.dataset.text.pop(2)
with pytest.raises(EmbeddingTensorPopError):
> vector_store.dataset.pop(2)
deeplake/core/vectorstore/test_deeplake_vectorstore.py:1754:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/util/invalid_view_op.py:22: in inner
return callable(x, *args, **kwargs)
deeplake/core/dataset/dataset.py:4711: in pop
index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
_incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tensor = Tensor(key='embedding'), indexes = [2], index_operation = 2
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
except Exception as e:
> raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E Exception: An error occurred while regenerating VDB indexes: request_failed
deeplake/core/index_maintenance.py:199: Exception
Check failure on line 1856 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_vdb_index_incr_maint_update
Exception: An error occurred while regenerating VDB indexes: request_failed
Raw output
tensor = Tensor(key='embedding', index=Index([3])), indexes = [3]
index_operation = 3
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
> tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
deeplake/core/index_maintenance.py:194:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Tensor(key='embedding', index=Index([3])), operation_kind = 3
row_ids = [3]
def update_vdb_index(
self,
operation_kind: int,
row_ids: List[int] = [],
):
self.storage.check_readonly()
if self.meta.htype != "embedding":
raise Exception(f"Only supported for embedding tensors.")
self.invalidate_libdeeplake_dataset()
self.dataset.flush()
from deeplake.enterprise.convert_to_libdeeplake import (
dataset_to_libdeeplake,
)
ds = dataset_to_libdeeplake(self.dataset)
ts = getattr(ds, self.meta.name)
from deeplake.enterprise.convert_to_libdeeplake import (
import_indra_api,
)
api = import_indra_api()
commit_id = self.version_state["commit_id"]
if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
try:
indexes = api.vdb.add_samples_to_index(
ts,
add_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
try:
indexes = api.vdb.remove_samples_from_index(
ts,
remove_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["UPDATE"]:
try:
> indexes = api.vdb.update_samples_in_index(
ts,
update_indices=row_ids,
)
E RuntimeError: request_failed
deeplake/core/tensor.py:1581: RuntimeError
During handling of the above exception, another exception occurred:
local_path = './hub_pytest/test_deeplake_vectorstore/test_vdb_index_incr_maint_update'
capsys = <_pytest.capture.CaptureFixture object at 0x7f6601e34f50>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjE5NiwiZXhwIjoxNzA5NTIyMTk2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.76VsQkoBfnHsLLmUaMe8Lul1Hct0vUczdA9OhHlKoXv67yNdTNKtWWjzcwYKvny3_wc01yURAF-6JyFG1JhSiA'
@requires_libdeeplake
def test_vdb_index_incr_maint_update(local_path, capsys, hub_cloud_dev_token):
number_of_data = 105
texts, embeddings, ids, metadatas, _ = utils.create_data(
number_of_data=number_of_data, embedding_dim=EMBEDDING_DIM
)
txt1 = texts[:100]
md1 = metadatas[:100]
ids1 = ids[:100]
emb1 = embeddings[:100]
txt2 = texts[100]
md2 = metadatas[100]
ids2 = ids[100]
emb2 = embeddings[100]
txt3 = texts[101]
md3 = metadatas[101]
ids3 = ids[101]
emb3 = embeddings[101]
txt4 = texts[102]
md4 = metadatas[102]
ids4 = ids[102]
emb4 = embeddings[102]
emb5 = embeddings[103]
emb6 = embeddings[104]
# initialize vector store object with vdb index threshold as 200.
vector_store = DeepLakeVectorStore(
path=local_path,
overwrite=True,
verbose=True,
exec_option="compute_engine",
index_params={"threshold": 2, "distance_metric": "L2"},
token=hub_cloud_dev_token,
)
vector_store.add(embedding=emb1, text=txt1, id=ids1, metadata=md1)
ds = vector_store.dataset_handler.dataset
ds.append({"embedding": emb2, "text": txt2, "id": ids2, "metadata": md2})
ds.append({"embedding": emb3, "text": txt3, "id": ids3, "metadata": md3})
ds.append({"embedding": emb4, "text": txt4, "id": ids4, "metadata": md4})
# assert len(vector_store) == number_of_data
assert set(vector_store.dataset_handler.dataset.tensors) == set(
[
"embedding",
"id",
"metadata",
"text",
]
)
assert set(vector_store.tensors()) == set(
[
"embedding",
"id",
"metadata",
"text",
]
)
# Check if the index is recreated properly.
# ds = vector_store.dataset
es = ds.embedding.get_vdb_indexes()
assert len(es) == 1
assert es[0]["id"] == "hnsw_1"
assert es[0]["distance"] == "l2_norm"
assert es[0]["type"] == "hnsw"
# search the embeddings.
query1 = ds.embedding[1].numpy()
query2 = ds.embedding[2].numpy()
query3 = ds.embedding[3].numpy()
s1 = ",".join(str(c) for c in query1)
view1 = ds.query(
f"select * order by cosine_similarity(embedding ,array[{s1}]) DESC limit 1"
)
res1 = list(view1.sample_indices)
assert res1[0] == 1
s2 = ",".join(str(c) for c in query2)
view2 = ds.query(
f"select * order by cosine_similarity(embedding ,array[{s2}]) DESC limit 1"
)
res2 = list(view2.sample_indices)
assert res2[0] == 2
s3 = ",".join(str(c) for c in query3)
view3 = ds.query(
f"select * order by cosine_similarity(embedding ,array[{s3}]) DESC limit 1"
)
res3 = list(view3.sample_indices)
assert res3[0] == 3
> ds[3].update({"embedding": emb5})
deeplake/core/vectorstore/test_deeplake_vectorstore.py:1856:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:3387: in update
index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
_incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tensor = Tensor(key='embedding', index=Index([3])), indexes = [3]
index_operation = 3
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
except Exception as e:
> raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E Exception: An error occurred while regenerating VDB indexes: request_failed
deeplake/core/index_maintenance.py:199: Exception
Check failure on line 107 in deeplake/core/vectorstore/dataset_handlers/test_managed_dh.py
github-actions / JUnit Test Report
test_managed_dh.test_managed_vectorstore_should_not_accept_embedding_function_during_search
Failed: Timeout >60.0s
Raw output
hub_cloud_path = 'hub://testingacc2/tmpdace_test_managed_dh_test_managed_vectorstore_should_not_accept_embedding_function_during_search'
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjE5NiwiZXhwIjoxNzA5NTIyMTk2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.76VsQkoBfnHsLLmUaMe8Lul1Hct0vUczdA9OhHlKoXv67yNdTNKtWWjzcwYKvny3_wc01yURAF-6JyFG1JhSiA'
def test_managed_vectorstore_should_not_accept_embedding_function_during_search(
hub_cloud_path, hub_cloud_dev_token
):
> db = utils.create_and_populate_vs(
path=hub_cloud_path,
token=hub_cloud_dev_token,
runtime={"tensor_db": True},
embedding_dim=100,
)
deeplake/core/vectorstore/dataset_handlers/test_managed_dh.py:107:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/vectorstore/vector_search/utils.py:713: in create_and_populate_vs
vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
deeplake/core/vectorstore/deeplake_vectorstore.py:222: in add
return self.dataset_handler.add(
deeplake/core/vectorstore/dataset_handlers/managed_dataset_handler.py:200: in add
response = self.client.vectorstore_add(
deeplake/client/managed/managed_client.py:164: in vectorstore_add
response = self.request(
deeplake/client/client.py:151: in request
response = requests.request(
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages/requests/api.py:59: in request
return session.request(method=method, url=url, **kwargs)
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages/requests/sessions.py:589: in request
resp = self.send(prep, **send_kwargs)
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages/requests/sessions.py:703: in send
r = adapter.send(request, **kwargs)
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages/requests/adapters.py:486: in send
resp = conn.urlopen(
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages/urllib3/connectionpool.py:791: in urlopen
response = self._make_request(
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages/urllib3/connectionpool.py:537: in _make_request
response = conn.getresponse()
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages/urllib3/connection.py:461: in getresponse
httplib_response = super().getresponse()
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/http/client.py:1386: in getresponse
response.begin()
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/http/client.py:325: in begin
version, status, reason = self._read_status()
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/http/client.py:286: in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/socket.py:706: in readinto
return self._sock.recv_into(b)
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/ssl.py:1315: in recv_into
return self.read(nbytes, buffer)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <ssl.SSLSocket [closed] fd=-1, family=2, type=1, proto=6>, len = 8192
buffer = <memory at 0x7f66009d3640>
def read(self, len=1024, buffer=None):
"""Read up to LEN bytes and return them.
Return zero-length string on EOF."""
self._checkClosed()
if self._sslobj is None:
raise ValueError("Read on closed or unwrapped SSL socket.")
try:
if buffer is not None:
> return self._sslobj.read(len, buffer)
E Failed: Timeout >60.0s
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/ssl.py:1167: Failed
Check failure on line 803 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py
github-actions / JUnit Test Report
test_deepmemory.test_db_deepmemory_status_should_show_best_model_with_deepmemory_v2_metadata_logic
NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.
Raw output
self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7f65d1342c50>
@property
def dataset(self):
"""Returns the dataset"""
try:
> return self.dataset_handler.dataset
E AttributeError: 'ManagedDH' object has no attribute 'dataset'
deeplake/core/vectorstore/deeplake_vectorstore.py:523: AttributeError
During handling of the above exception, another exception occurred:
capsys = <_pytest.capture.CaptureFixture object at 0x7f6602568810>
corpus_query_pair_path = ('hub://testingacc2/deepmemory_test_corpus_managed_2', 'hub://testingacc2/deepmemory_test_corpus_managed_2_eval_queries')
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjE5NiwiZXhwIjoxNzA5NTIyMTk2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.76VsQkoBfnHsLLmUaMe8Lul1Hct0vUczdA9OhHlKoXv67yNdTNKtWWjzcwYKvny3_wc01yURAF-6JyFG1JhSiA'
def test_db_deepmemory_status_should_show_best_model_with_deepmemory_v2_metadata_logic(
capsys,
corpus_query_pair_path,
hub_cloud_dev_token,
):
corpus, queries = corpus_query_pair_path
db = VectorStore(
path=corpus,
runtime={"tensor_db": True},
token=hub_cloud_dev_token,
embedding_function=embedding_fn,
)
> db.dataset.embedding.info = {
"deepmemory": {
"6581e3056a1162b64061a9a4_0.npy": {
"base_recall@10": 0.25,
"deep_memory_version": "0.2",
"delta": 0.25,
"job_id": "6581e3056a1162b64061a9a4_0",
"model_type": "npy",
"recall@10": 0.5,
},
"model.npy": {
"base_recall@10": 0.25,
"deep_memory_version": "0.2",
"delta": 0.25,
"job_id": "6581e3056a1162b64061a9a4_0",
"model_type": "npy",
"recall@10": 0.5,
},
}
}
deeplake/core/vectorstore/deep_memory/test_deepmemory.py:803:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7f65d1342c50>
@property
def dataset(self):
"""Returns the dataset"""
try:
return self.dataset_handler.dataset
except AttributeError:
> raise NotImplementedError(
"Acessing the dataset is not implemented for managed Vector Store yet."
)
E NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.
deeplake/core/vectorstore/deeplake_vectorstore.py:525: NotImplementedError
Check failure on line 846 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py
github-actions / JUnit Test Report
test_deepmemory.test_db_deepmemory_status_should_show_best_model_with_deepmemory_v1_metadata_logic
NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.
Raw output
self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7f66ceae6f50>
@property
def dataset(self):
"""Returns the dataset"""
try:
> return self.dataset_handler.dataset
E AttributeError: 'ManagedDH' object has no attribute 'dataset'
deeplake/core/vectorstore/deeplake_vectorstore.py:523: AttributeError
During handling of the above exception, another exception occurred:
capsys = <_pytest.capture.CaptureFixture object at 0x7f66db379c90>
corpus_query_pair_path = ('hub://testingacc2/deepmemory_test_corpus_managed_2', 'hub://testingacc2/deepmemory_test_corpus_managed_2_eval_queries')
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjE5NiwiZXhwIjoxNzA5NTIyMTk2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.76VsQkoBfnHsLLmUaMe8Lul1Hct0vUczdA9OhHlKoXv67yNdTNKtWWjzcwYKvny3_wc01yURAF-6JyFG1JhSiA'
def test_db_deepmemory_status_should_show_best_model_with_deepmemory_v1_metadata_logic(
capsys,
corpus_query_pair_path,
hub_cloud_dev_token,
):
corpus, queries = corpus_query_pair_path
db = VectorStore(
path=corpus,
runtime={"tensor_db": True},
token=hub_cloud_dev_token,
embedding_function=embedding_fn,
)
> db.dataset.embedding.info = {
"deepmemory": {
"6581e3056a1162b64061a9a4_0.npy": {
"base_recall@10": 0.25,
"deep_memory_version": "0.2",
"delta": 0.25,
"job_id": "6581e3056a1162b64061a9a4_0",
"model_type": "npy",
"recall@10": 0.5,
},
},
"deepmemory/model.npy": {
"base_recall@10": 0.25,
"deep_memory_version": "0.2",
"delta": 0.25,
"job_id": "6581e3056a1162b64061a9a4_0",
"model_type": "npy",
"recall@10": 0.5,
},
}
deeplake/core/vectorstore/deep_memory/test_deepmemory.py:846:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7f66ceae6f50>
@property
def dataset(self):
"""Returns the dataset"""
try:
return self.dataset_handler.dataset
except AttributeError:
> raise NotImplementedError(
"Acessing the dataset is not implemented for managed Vector Store yet."
)
E NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.
deeplake/core/vectorstore/deeplake_vectorstore.py:525: NotImplementedError
Check failure on line 448 in deeplake/api/tests/test_update_samples.py
github-actions / JUnit Test Report
test_update_samples.test_ds_update_image
deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
Raw output
file = 'bad_sample', compression = None
def read_meta_from_compressed_file(
file, compression: Optional[str] = None
) -> Tuple[str, Tuple[int], str]:
"""Reads shape, dtype and format without decompressing or verifying the sample."""
path = None
if isinstance(file, (str, Path)):
path = str(file)
try:
> f = open(file, "rb")
E FileNotFoundError: [Errno 2] No such file or directory: 'bad_sample'
deeplake/core/compression.py:637: FileNotFoundError
The above exception was the direct cause of the following exception:
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f1957fdeb90>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f196a844110>])
samples = [Sample(is_lazy=True, path=bad_sample)], operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='images')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
> (self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
deeplake/core/chunk_engine.py:1423:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/chunk_engine.py:1705: in _update
self._update_non_tiled_sample(
deeplake/core/chunk_engine.py:1341: in _update_non_tiled_sample
chunk.update_sample(local_sample_index, sample)
deeplake/core/chunk/uncompressed_chunk.py:265: in update_sample
serialized_sample, shape = self.serialize_sample(sample, break_into_tiles=False)
deeplake/core/chunk/base_chunk.py:354: in serialize_sample
incoming_sample, shape = serialize_sample_object( # type: ignore
deeplake/core/serialize.py:610: in serialize_sample_object
shape = incoming_sample.shape
deeplake/core/sample.py:161: in shape
self._read_meta()
deeplake/core/sample.py:203: in _read_meta
self._compression, self._shape, self._typestr = read_meta_from_compressed_file(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
file = 'bad_sample', compression = None
def read_meta_from_compressed_file(
file, compression: Optional[str] = None
) -> Tuple[str, Tuple[int], str]:
"""Reads shape, dtype and format without decompressing or verifying the sample."""
path = None
if isinstance(file, (str, Path)):
path = str(file)
try:
f = open(file, "rb")
except FileNotFoundError as e:
> raise SampleReadError(path) from e
E deeplake.util.exceptions.SampleReadError: Unable to read sample from bad_sample
deeplake/core/compression.py:639: SampleReadError
The above exception was the direct cause of the following exception:
self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_image', index=Index([2]), tensors=['images', 'images_cc', 'images_sc'])
sample = {'images': Sample(is_lazy=True, path=bad_sample), 'images_cc': Sample(is_lazy=False, shape=(900, 900, 3), compression=... compression='jpeg', dtype='uint8' path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg)}
def update(self, sample: Dict[str, Any]):
"""Update existing samples in the dataset with new values.
Examples:
>>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
>>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
>>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
Args:
sample (dict): Dictionary with tensor names as keys and samples as values.
Raises:
ValueError: If partial update of a sample is attempted.
Exception: Error while attempting to rollback updates.
"""
if len(self.index) > 1:
raise ValueError(
"Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
)
# remove update hooks from view base so that the view is not invalidated
if self._view_base:
saved_update_hooks = self._view_base._update_hooks
self._view_base._update_hooks = {}
with self:
try:
self._commit("Backup before update", None, False)
for k, v in sample.items():
if deeplake.shutdown_event.is_set():
sys.exit()
> self[k] = v
deeplake/core/dataset/dataset.py:3379:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:633: in __setitem__
tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f1957fdeb90>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f196a844110>])
samples = [Sample(is_lazy=True, path=bad_sample)], operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='images')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
(self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
except Exception as e:
if cmap is not None:
key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
self.meta_cache[key] = cmap
self._commit_chunk_map = cmap
self.meta_cache.register_deeplake_object(key, cmap)
> raise SampleUpdateError(self.name) from e
E deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor images.
deeplake/core/chunk_engine.py:1435: SampleUpdateError
During handling of the above exception, another exception occurred:
local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_image', tensors=['images', 'images_cc', 'images_sc'])
cat_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg'
dog_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg'
@pytest.mark.slow
def test_ds_update_image(local_ds, cat_path, dog_path):
with local_ds as ds:
ds.create_tensor("images_sc", htype="image", sample_compression="png")
ds.create_tensor("images_cc", htype="image", chunk_compression="png")
ds.create_tensor("images", htype="image", sample_compression=None)
cat = deeplake.read(cat_path)
dog = deeplake.read(dog_path)
samples = ([cat] + [dog] * 2) * 2
with ds:
ds.images_sc.extend(samples)
ds.images_cc.extend(samples)
ds.images.extend(samples)
ds[1].update({"images_sc": cat, "images_cc": cat, "images": cat})
with pytest.raises(SampleUpdateError):
> ds[2].update(
{"images_sc": cat, "images_cc": cat, "images": deeplake.read("bad_sample")}
)
deeplake/api/tests/test_update_samples.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:3382: in update
self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_image', index=Index([2]), tensors=['images', 'images_cc', 'images_sc'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_image', index=Index([2]), tensors=['images', 'images_cc', 'images_sc'])
is_del = False, managed_view = False, has_vds = False, is_view = True
@wraps(callable)
def inner(x, *args, **kwargs):
ds = x if isinstance(x, deeplake.Dataset) else x.dataset
if not ds.__dict__.get("_allow_view_updates"):
is_del = callable.__name__ == "delete"
managed_view = "_view_entry" in ds.__dict__
has_vds = "_vds" in ds.__dict__
is_view = not x.index.is_trivial() or has_vds or managed_view
if is_view and not (is_del and (has_vds or managed_view)):
> raise InvalidOperationError(
callable.__name__,
type(x).__name__,
)
E deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
deeplake/util/invalid_view_op.py:18: InvalidOperationError
Check failure on line 596 in deeplake/api/tests/test_update_samples.py
github-actions / JUnit Test Report
test_update_samples.test_ds_update_sequence
deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
Raw output
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f196ae97bd0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f196ae77cd0>])
samples = [[Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_...cat.jpeg)], [Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg)]]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='seq_image')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
> (self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
deeplake/core/chunk_engine.py:1423:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/chunk_engine.py:2894: in _sequence_update
flat_verified_samples: List = self._update(
deeplake/core/chunk_engine.py:1688: in _update
samples = make_sequence(samples, index_length)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
samples = [Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_l...es/cat.jpeg), Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg)]
index_length = 6
def make_sequence(
samples: Union[np.ndarray, Sequence[InputSample], InputSample], index_length: int
) -> Sequence[InputSample]:
"""Make `samples` a sequence of `InputSample`s.
Args:
samples (Union[np.ndarray, Sequence[InputSample]]): Incoming samples to be made into a sequence.
index_length (int): Number of expected samples in the sequence.
Raises:
ValueError: If `index_length` is incompatible with the true length of `samples`.
Returns:
Sequence[InputSample]: Sequence of `InputSample`s with the same length as `index_length`.
"""
if index_length == 1:
if hasattr(samples, "__len__"):
if len(samples) != 1: # type: ignore
samples = [samples]
elif hasattr(samples, "shape"):
if len(samples.shape) > 0 and samples.shape[0] != 1: # type: ignore
samples = [samples]
else:
samples = [samples]
if not hasattr(samples, "__len__"):
samples = [samples]
if index_length != len(samples): # type: ignore
> raise ValueError(
f"Index length ({index_length}) and length of samples ({len(samples)}) must be equal for updating a tensor." # type: ignore
)
E ValueError: Index length (6) and length of samples (5) must be equal for updating a tensor.
deeplake/util/chunk_engine.py:67: ValueError
The above exception was the direct cause of the following exception:
self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_sequence', index=Index([slice(3, None, None)]), tensors=['seq', 'seq_image'])
sample = {'seq': [[1, 2, 3], [1, 2, 3], [1, 2, 3]], 'seq_image': [[Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplak...at.jpeg)], [Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg)]]}
def update(self, sample: Dict[str, Any]):
"""Update existing samples in the dataset with new values.
Examples:
>>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
>>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
>>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
Args:
sample (dict): Dictionary with tensor names as keys and samples as values.
Raises:
ValueError: If partial update of a sample is attempted.
Exception: Error while attempting to rollback updates.
"""
if len(self.index) > 1:
raise ValueError(
"Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
)
# remove update hooks from view base so that the view is not invalidated
if self._view_base:
saved_update_hooks = self._view_base._update_hooks
self._view_base._update_hooks = {}
with self:
try:
self._commit("Backup before update", None, False)
for k, v in sample.items():
if deeplake.shutdown_event.is_set():
sys.exit()
> self[k] = v
deeplake/core/dataset/dataset.py:3379:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:633: in __setitem__
tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f196ae97bd0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f196ae77cd0>])
samples = [[Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_...cat.jpeg)], [Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg)]]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='seq_image')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
(self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
except Exception as e:
if cmap is not None:
key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
self.meta_cache[key] = cmap
self._commit_chunk_map = cmap
self.meta_cache.register_deeplake_object(key, cmap)
> raise SampleUpdateError(self.name) from e
E deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor seq_image.
deeplake/core/chunk_engine.py:1435: SampleUpdateError
During handling of the above exception, another exception occurred:
local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_sequence', tensors=['seq', 'seq_image'])
cat_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg'
dog_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg'
@pytest.mark.slow
def test_ds_update_sequence(local_ds, cat_path, dog_path):
with local_ds as ds:
ds.create_tensor("seq", htype="sequence")
ds.create_tensor("seq_image", htype="sequence[image]", sample_compression="png")
seq_samples = [[1, 2, 3], [4, 5, 6], [4, 5, 6]] * 2
ds.seq.extend(seq_samples)
dog = deeplake.read(dog_path)
cat = deeplake.read(cat_path)
seq_image_samples = [[cat, cat], [dog, dog], [dog, dog]] * 2
ds.seq_image.extend(seq_image_samples)
ds[1].update({"seq": [1, 2, 3], "seq_image": [cat, cat]})
np.testing.assert_array_equal(ds[1].seq.numpy(), [[1], [2], [3]])
assert ds[1].seq_image.shape == (2, 900, 900, 3)
ds[:3].update({"seq": [[1, 2, 3]] * 3, "seq_image": [[cat, cat]] * 3})
np.testing.assert_array_equal(ds[:3].seq.numpy(), [[[1], [2], [3]]] * 3)
assert ds[:3].seq_image.shape == (3, 2, 900, 900, 3)
with pytest.raises(SampleUpdateError):
> ds[3:].update(
{"seq": [[1, 2, 3]] * 3, "seq_image": [[cat, cat], [cat, cat], [dog]]}
)
deeplake/api/tests/test_update_samples.py:596:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:3382: in update
self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_sequence', index=Index([slice(3, None, None)]), tensors=['seq', 'seq_image'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_sequence', index=Index([slice(3, None, None)]), tensors=['seq', 'seq_image'])
is_del = False, managed_view = False, has_vds = False, is_view = True
@wraps(callable)
def inner(x, *args, **kwargs):
ds = x if isinstance(x, deeplake.Dataset) else x.dataset
if not ds.__dict__.get("_allow_view_updates"):
is_del = callable.__name__ == "delete"
managed_view = "_view_entry" in ds.__dict__
has_vds = "_vds" in ds.__dict__
is_view = not x.index.is_trivial() or has_vds or managed_view
if is_view and not (is_del and (has_vds or managed_view)):
> raise InvalidOperationError(
callable.__name__,
type(x).__name__,
)
E deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
deeplake/util/invalid_view_op.py:18: InvalidOperationError
Check failure on line 708 in deeplake/api/tests/test_update_samples.py
github-actions / JUnit Test Report
test_update_samples.test_ds_update_tiles
deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
Raw output
file = 'bad_sample', compression = None
def read_meta_from_compressed_file(
file, compression: Optional[str] = None
) -> Tuple[str, Tuple[int], str]:
"""Reads shape, dtype and format without decompressing or verifying the sample."""
path = None
if isinstance(file, (str, Path)):
path = str(file)
try:
> f = open(file, "rb")
E FileNotFoundError: [Errno 2] No such file or directory: 'bad_sample'
deeplake/core/compression.py:637: FileNotFoundError
The above exception was the direct cause of the following exception:
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f188f628090>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f188f6281d0>])
samples = [Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_l...=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='images2')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
> (self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
deeplake/core/chunk_engine.py:1423:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/chunk_engine.py:1701: in _update
self._update_tiled_sample(
deeplake/core/chunk_engine.py:1294: in _update_tiled_sample
self._replace_tiled_sample(global_sample_index, sample)
deeplake/core/chunk_engine.py:1279: in _replace_tiled_sample
new_chunk_ids, tiles = self._samples_to_chunks(
deeplake/core/chunk_engine.py:877: in _samples_to_chunks
num_samples_added = current_chunk.extend_if_has_space(
deeplake/core/chunk/sample_compressed_chunk.py:26: in extend_if_has_space
serialized_sample, shape = self.serialize_sample(incoming_sample, compr)
deeplake/core/chunk/base_chunk.py:354: in serialize_sample
incoming_sample, shape = serialize_sample_object( # type: ignore
deeplake/core/serialize.py:610: in serialize_sample_object
shape = incoming_sample.shape
deeplake/core/sample.py:161: in shape
self._read_meta()
deeplake/core/sample.py:203: in _read_meta
self._compression, self._shape, self._typestr = read_meta_from_compressed_file(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
file = 'bad_sample', compression = None
def read_meta_from_compressed_file(
file, compression: Optional[str] = None
) -> Tuple[str, Tuple[int], str]:
"""Reads shape, dtype and format without decompressing or verifying the sample."""
path = None
if isinstance(file, (str, Path)):
path = str(file)
try:
f = open(file, "rb")
except FileNotFoundError as e:
> raise SampleReadError(path) from e
E deeplake.util.exceptions.SampleReadError: Unable to read sample from bad_sample
deeplake/core/compression.py:639: SampleReadError
The above exception was the direct cause of the following exception:
self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_tiles', index=Index([slice(3, None, None)]), tensors=['images1', 'images2'])
sample = {'images1': [Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg),.../home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_lazy=True, path=bad_sample)]}
def update(self, sample: Dict[str, Any]):
"""Update existing samples in the dataset with new values.
Examples:
>>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
>>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
>>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
Args:
sample (dict): Dictionary with tensor names as keys and samples as values.
Raises:
ValueError: If partial update of a sample is attempted.
Exception: Error while attempting to rollback updates.
"""
if len(self.index) > 1:
raise ValueError(
"Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
)
# remove update hooks from view base so that the view is not invalidated
if self._view_base:
saved_update_hooks = self._view_base._update_hooks
self._view_base._update_hooks = {}
with self:
try:
self._commit("Backup before update", None, False)
for k, v in sample.items():
if deeplake.shutdown_event.is_set():
sys.exit()
> self[k] = v
deeplake/core/dataset/dataset.py:3379:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:633: in __setitem__
tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f188f628090>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f188f6281d0>])
samples = [Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_l...=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='images2')>
def update(
self,
index: Index,
samples: Union[np.ndarray, Sequence[InputSample], InputSample],
operator: Optional[str] = None,
link_callback: Optional[Callable] = None,
):
"""Update data at `index` with `samples`."""
cmap = self.commit_chunk_map
if cmap is not None:
cmap = CommitChunkMap.frombuffer(cmap.tobytes())
try:
self.check_link_ready()
(self._sequence_update if self.is_sequence else self._update)( # type: ignore
index,
samples,
operator,
link_callback=link_callback,
)
except Exception as e:
if cmap is not None:
key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
self.meta_cache[key] = cmap
self._commit_chunk_map = cmap
self.meta_cache.register_deeplake_object(key, cmap)
> raise SampleUpdateError(self.name) from e
E deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor images2.
deeplake/core/chunk_engine.py:1435: SampleUpdateError
During handling of the above exception, another exception occurred:
local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_tiles', tensors=['images1', 'images2'])
cat_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg'
dog_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg'
@pytest.mark.slow
def test_ds_update_tiles(local_ds, cat_path, dog_path):
with local_ds as ds:
ds.create_tensor(
"images1", htype="image", sample_compression="jpg", tiling_threshold=1 * KB
)
ds.create_tensor(
"images2", htype="image", sample_compression="jpg", tiling_threshold=1 * KB
)
cat = deeplake.read(cat_path)
dog = deeplake.read(dog_path)
ds.images1.extend([cat] * 6)
ds.images2.extend([dog] * 6)
ds[0].update({"images1": dog, "images2": cat})
assert ds[0].images1.shape == (323, 480, 3)
assert ds[0].images2.shape == (900, 900, 3)
ds[:3].update({"images1": [dog] * 3, "images2": [cat] * 3})
assert ds[:3].images1.shape == (3, 323, 480, 3)
assert ds[:3].images2.shape == (3, 900, 900, 3)
with pytest.raises(SampleUpdateError):
> ds[3:].update(
{
"images1": [dog] * 3,
"images2": [cat] * 2 + [deeplake.read("bad_sample")],
}
)
deeplake/api/tests/test_update_samples.py:708:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/dataset/dataset.py:3382: in update
self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_tiles', index=Index([slice(3, None, None)]), tensors=['images1', 'images2'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_tiles', index=Index([slice(3, None, None)]), tensors=['images1', 'images2'])
is_del = False, managed_view = False, has_vds = False, is_view = True
@wraps(callable)
def inner(x, *args, **kwargs):
ds = x if isinstance(x, deeplake.Dataset) else x.dataset
if not ds.__dict__.get("_allow_view_updates"):
is_del = callable.__name__ == "delete"
managed_view = "_view_entry" in ds.__dict__
has_vds = "_vds" in ds.__dict__
is_view = not x.index.is_trivial() or has_vds or managed_view
if is_view and not (is_del and (has_vds or managed_view)):
> raise InvalidOperationError(
callable.__name__,
type(x).__name__,
)
E deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.
deeplake/util/invalid_view_op.py:18: InvalidOperationError
Check failure on line 1 in deeplake/client/test_client.py
github-actions / JUnit Test Report
test_client.test_deepmemory_delete
failed on setup with "NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet."
Raw output
self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7f1870fd7c90>
@property
def dataset(self):
"""Returns the dataset"""
try:
> return self.dataset_handler.dataset
E AttributeError: 'ManagedDH' object has no attribute 'dataset'
deeplake/core/vectorstore/deeplake_vectorstore.py:523: AttributeError
During handling of the above exception, another exception occurred:
request = <SubRequest 'corpus_query_relevances_copy' for <Function test_deepmemory_delete>>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjkyNiwiZXhwIjoxNzA5NTIyOTI2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.2XGeUNIpCWWJJziZICzU4BvM4U5qi1guCi6GPuVDyGTtQxCjPbk4Vnme0X8JLk8E0QmlD70XKhrQN1zfuQC9-Q'
@pytest.fixture
def corpus_query_relevances_copy(request, hub_cloud_dev_token):
if not is_opt_true(request, HUB_CLOUD_OPT):
pytest.skip(f"{HUB_CLOUD_OPT} flag not set")
return
corpus = _get_storage_path(request, HUB_CLOUD)
query_vs = VectorStore(
path=f"hub://{HUB_CLOUD_DEV_USERNAME}/deepmemory_test_queries2",
runtime={"tensor_db": True},
token=hub_cloud_dev_token,
)
> queries = query_vs.dataset.text.data()["value"]
deeplake/tests/path_fixtures.py:487:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7f1870fd7c90>
@property
def dataset(self):
"""Returns the dataset"""
try:
return self.dataset_handler.dataset
except AttributeError:
> raise NotImplementedError(
"Acessing the dataset is not implemented for managed Vector Store yet."
)
E NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.
deeplake/core/vectorstore/deeplake_vectorstore.py:525: NotImplementedError
Check failure on line 611 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_index_basic
Exception: An error occurred while regenerating VDB indexes: request_failed
Raw output
tensor = Tensor(key='embedding'), indexes = [10], index_operation = 2
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
> tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
deeplake/core/index_maintenance.py:194:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Tensor(key='embedding'), operation_kind = 2, row_ids = [10]
def update_vdb_index(
self,
operation_kind: int,
row_ids: List[int] = [],
):
self.storage.check_readonly()
if self.meta.htype != "embedding":
raise Exception(f"Only supported for embedding tensors.")
self.invalidate_libdeeplake_dataset()
self.dataset.flush()
from deeplake.enterprise.convert_to_libdeeplake import (
dataset_to_libdeeplake,
)
ds = dataset_to_libdeeplake(self.dataset)
ts = getattr(ds, self.meta.name)
from deeplake.enterprise.convert_to_libdeeplake import (
import_indra_api,
)
api = import_indra_api()
commit_id = self.version_state["commit_id"]
if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
try:
indexes = api.vdb.add_samples_to_index(
ts,
add_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
try:
> indexes = api.vdb.remove_samples_from_index(
ts,
remove_indices=row_ids,
)
E RuntimeError: request_failed
deeplake/core/tensor.py:1568: RuntimeError
During handling of the above exception, another exception occurred:
local_path = './hub_pytest/test_deeplake_vectorstore/test_index_basic'
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjkyNiwiZXhwIjoxNzA5NTIyOTI2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.2XGeUNIpCWWJJziZICzU4BvM4U5qi1guCi6GPuVDyGTtQxCjPbk4Vnme0X8JLk8E0QmlD70XKhrQN1zfuQC9-Q'
@pytest.mark.slow
@requires_libdeeplake
def test_index_basic(local_path, hub_cloud_dev_token):
# Start by testing behavior without an index
vector_store = VectorStore(
path=local_path,
overwrite=True,
token=hub_cloud_dev_token,
)
assert vector_store.dataset_handler.distance_metric_index is None
# Then test behavior when index is added
vector_store = VectorStore(
path=local_path, token=hub_cloud_dev_token, index_params={"threshold": 1}
)
vector_store.add(embedding=embeddings, text=texts, metadata=metadatas)
es = vector_store.dataset_handler.dataset.embedding.get_vdb_indexes()
assert (
es[0]["distance"] == METRIC_TO_INDEX_METRIC[DEFAULT_VECTORSTORE_DISTANCE_METRIC]
)
# Then test behavior when index is added previously and the dataset is reloaded
vector_store = VectorStore(path=local_path, token=hub_cloud_dev_token)
es = vector_store.dataset_handler.dataset.embedding.get_vdb_indexes()
assert (
es[0]["distance"] == METRIC_TO_INDEX_METRIC[DEFAULT_VECTORSTORE_DISTANCE_METRIC]
)
# Test index with sample updates
pre_update_index = vector_store.dataset_handler.dataset.embedding.get_vdb_indexes()[
0
]
vector_store.add(
embedding=[embeddings[0]], text=[texts[0]], metadata=[metadatas[0]]
)
post_update_index = (
vector_store.dataset_handler.dataset.embedding.get_vdb_indexes()[0]
)
assert pre_update_index == post_update_index
# Test index with sample deletion
pre_delete_index = vector_store.dataset_handler.dataset.embedding.get_vdb_indexes()[
0
]
> vector_store.delete(row_ids=[len(vector_store) - 1])
deeplake/core/vectorstore/test_deeplake_vectorstore.py:611:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/vectorstore/deeplake_vectorstore.py:375: in delete
return self.dataset_handler.delete(
deeplake/core/vectorstore/dataset_handlers/embedded_dataset_handler.py:310: in delete
self.dataset.pop(row_ids)
deeplake/util/invalid_view_op.py:22: in inner
return callable(x, *args, **kwargs)
deeplake/core/dataset/dataset.py:4711: in pop
index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
_incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tensor = Tensor(key='embedding'), indexes = [10], index_operation = 2
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
except Exception as e:
> raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E Exception: An error occurred while regenerating VDB indexes: request_failed
deeplake/core/index_maintenance.py:199: Exception
Check failure on line 943 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_update_embedding[embedding_fn3-local_auth_ds-vector_store_hash_ids-None-None-None-None-hub_cloud_dev_token]
Exception: An error occurred while regenerating VDB indexes: request_failed
Raw output
tensor = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
indexes = [0, 1, 2, 3, 4], index_operation = 3
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
> tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
deeplake/core/index_maintenance.py:194:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
operation_kind = 3, row_ids = [0, 1, 2, 3, 4]
def update_vdb_index(
self,
operation_kind: int,
row_ids: List[int] = [],
):
self.storage.check_readonly()
if self.meta.htype != "embedding":
raise Exception(f"Only supported for embedding tensors.")
self.invalidate_libdeeplake_dataset()
self.dataset.flush()
from deeplake.enterprise.convert_to_libdeeplake import (
dataset_to_libdeeplake,
)
ds = dataset_to_libdeeplake(self.dataset)
ts = getattr(ds, self.meta.name)
from deeplake.enterprise.convert_to_libdeeplake import (
import_indra_api,
)
api = import_indra_api()
commit_id = self.version_state["commit_id"]
if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
try:
indexes = api.vdb.add_samples_to_index(
ts,
add_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
try:
indexes = api.vdb.remove_samples_from_index(
ts,
remove_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["UPDATE"]:
try:
> indexes = api.vdb.update_samples_in_index(
ts,
update_indices=row_ids,
)
E RuntimeError: request_failed
deeplake/core/tensor.py:1581: RuntimeError
During handling of the above exception, another exception occurred:
ds = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_update_embedding-embedding_fn3-local_auth_ds-vector_store_hash_ids-None-None-None-None-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = ['0', '1', '2', '3', '4'], vector_store_row_ids = None
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = None
init_embedding_function = <function embedding_fn3 at 0x7f194287e700>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyNDQwMSwiZXhwIjoxNzA5NTI0NDAxfQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.XdQ6_DC_JPn-ellR43c4Fm1XysTy01hd8zlLAsUShzvo91lcCK_uyt24k-yruEbuxDdz0f3zdU9EiC1OANiv2g'
@requires_libdeeplake
@pytest.mark.parametrize(
"ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
[
(
"local_auth_ds",
"vector_store_hash_ids",
None,
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
"vector_store_row_ids",
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
None,
"vector_store_filter_udf",
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
"vector_store_filters",
None,
None,
"hub_cloud_dev_token",
),
(
"hub_cloud_ds",
None,
None,
None,
None,
"vector_store_query",
"hub_cloud_dev_token",
),
],
indirect=True,
)
@pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
@pytest.mark.slow
@requires_libdeeplake
def test_update_embedding(
ds,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_filter_udf,
vector_store_query,
init_embedding_function,
hub_cloud_dev_token,
):
vector_store_filters = vector_store_filters or vector_store_filter_udf
exec_option = "compute_engine"
if vector_store_filter_udf:
exec_option = "python"
embedding_tensor = "embedding"
embedding_source_tensor = "text"
# dataset has a single embedding_tensor:
path = ds.path
vector_store = DeepLakeVectorStore(
path=path,
overwrite=True,
verbose=False,
exec_option=exec_option,
embedding_function=init_embedding_function,
index_params={"threshold": 10},
token=hub_cloud_dev_token,
)
# add data to the dataset:
metadatas[1:6] = [{"a": 1} for _ in range(5)]
vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
# case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
new_embedding_value = 100
embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
> vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_function=embedding_fn,
embedding_source_tensor=embedding_source_tensor,
embedding_tensor=embedding_tensor,
)
deeplake/core/vectorstore/test_deeplake_vectorstore.py:943:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/vectorstore/deeplake_vectorstore.py:442: in update_embedding
self.dataset_handler.update_embedding(
deeplake/core/vectorstore/dataset_handlers/embedded_dataset_handler.py:390: in update_embedding
self.dataset[row_ids].update(embedding_tensor_data)
deeplake/core/dataset/dataset.py:3387: in update
index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
_incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tensor = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
indexes = [0, 1, 2, 3, 4], index_operation = 3
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
except Exception as e:
> raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E Exception: An error occurred while regenerating VDB indexes: request_failed
deeplake/core/index_maintenance.py:199: Exception
Check failure on line 943 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_update_embedding[embedding_fn3-local_auth_ds-None-vector_store_row_ids-None-None-None-hub_cloud_dev_token]
Exception: An error occurred while regenerating VDB indexes: request_failed
Raw output
tensor = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
indexes = [0, 1, 2, 3, 4], index_operation = 3
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
> tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
deeplake/core/index_maintenance.py:194:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
operation_kind = 3, row_ids = [0, 1, 2, 3, 4]
def update_vdb_index(
self,
operation_kind: int,
row_ids: List[int] = [],
):
self.storage.check_readonly()
if self.meta.htype != "embedding":
raise Exception(f"Only supported for embedding tensors.")
self.invalidate_libdeeplake_dataset()
self.dataset.flush()
from deeplake.enterprise.convert_to_libdeeplake import (
dataset_to_libdeeplake,
)
ds = dataset_to_libdeeplake(self.dataset)
ts = getattr(ds, self.meta.name)
from deeplake.enterprise.convert_to_libdeeplake import (
import_indra_api,
)
api = import_indra_api()
commit_id = self.version_state["commit_id"]
if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
try:
indexes = api.vdb.add_samples_to_index(
ts,
add_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
try:
indexes = api.vdb.remove_samples_from_index(
ts,
remove_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["UPDATE"]:
try:
> indexes = api.vdb.update_samples_in_index(
ts,
update_indices=row_ids,
)
E RuntimeError: request_failed
deeplake/core/tensor.py:1581: RuntimeError
During handling of the above exception, another exception occurred:
ds = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_update_embedding-embedding_fn3-local_auth_ds-None-vector_store_row_ids-None-None-None-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = [0, 1, 2, 3, 4]
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = None
init_embedding_function = <function embedding_fn3 at 0x7f194287e700>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyNDQwNiwiZXhwIjoxNzA5NTI0NDA2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.RR3A5aVK4PwIrKvzAl4eLnlZZqHcE-R6u9UqVdbU5X_5TUyHU5ved5Ofum6mwgzyVT4nQ6dh7VqKF1g42PXG3g'
@requires_libdeeplake
@pytest.mark.parametrize(
"ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
[
(
"local_auth_ds",
"vector_store_hash_ids",
None,
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
"vector_store_row_ids",
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
None,
"vector_store_filter_udf",
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
"vector_store_filters",
None,
None,
"hub_cloud_dev_token",
),
(
"hub_cloud_ds",
None,
None,
None,
None,
"vector_store_query",
"hub_cloud_dev_token",
),
],
indirect=True,
)
@pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
@pytest.mark.slow
@requires_libdeeplake
def test_update_embedding(
ds,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_filter_udf,
vector_store_query,
init_embedding_function,
hub_cloud_dev_token,
):
vector_store_filters = vector_store_filters or vector_store_filter_udf
exec_option = "compute_engine"
if vector_store_filter_udf:
exec_option = "python"
embedding_tensor = "embedding"
embedding_source_tensor = "text"
# dataset has a single embedding_tensor:
path = ds.path
vector_store = DeepLakeVectorStore(
path=path,
overwrite=True,
verbose=False,
exec_option=exec_option,
embedding_function=init_embedding_function,
index_params={"threshold": 10},
token=hub_cloud_dev_token,
)
# add data to the dataset:
metadatas[1:6] = [{"a": 1} for _ in range(5)]
vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
# case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
new_embedding_value = 100
embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
> vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_function=embedding_fn,
embedding_source_tensor=embedding_source_tensor,
embedding_tensor=embedding_tensor,
)
deeplake/core/vectorstore/test_deeplake_vectorstore.py:943:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/vectorstore/deeplake_vectorstore.py:442: in update_embedding
self.dataset_handler.update_embedding(
deeplake/core/vectorstore/dataset_handlers/embedded_dataset_handler.py:390: in update_embedding
self.dataset[row_ids].update(embedding_tensor_data)
deeplake/core/dataset/dataset.py:3387: in update
index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
_incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tensor = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
indexes = [0, 1, 2, 3, 4], index_operation = 3
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
except Exception as e:
> raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E Exception: An error occurred while regenerating VDB indexes: request_failed
deeplake/core/index_maintenance.py:199: Exception
Check failure on line 943 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_update_embedding[embedding_fn3-local_auth_ds-None-None-None-vector_store_filter_udf-None-hub_cloud_dev_token]
Exception: An error occurred while regenerating VDB indexes: request_failed
Raw output
tensor = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
indexes = [1, 2, 3, 4, 5], index_operation = 3
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
> tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
deeplake/core/index_maintenance.py:194:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
operation_kind = 3, row_ids = [1, 2, 3, 4, 5]
def update_vdb_index(
self,
operation_kind: int,
row_ids: List[int] = [],
):
self.storage.check_readonly()
if self.meta.htype != "embedding":
raise Exception(f"Only supported for embedding tensors.")
self.invalidate_libdeeplake_dataset()
self.dataset.flush()
from deeplake.enterprise.convert_to_libdeeplake import (
dataset_to_libdeeplake,
)
ds = dataset_to_libdeeplake(self.dataset)
ts = getattr(ds, self.meta.name)
from deeplake.enterprise.convert_to_libdeeplake import (
import_indra_api,
)
api = import_indra_api()
commit_id = self.version_state["commit_id"]
if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
try:
indexes = api.vdb.add_samples_to_index(
ts,
add_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
try:
indexes = api.vdb.remove_samples_from_index(
ts,
remove_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["UPDATE"]:
try:
> indexes = api.vdb.update_samples_in_index(
ts,
update_indices=row_ids,
)
E RuntimeError: request_failed
deeplake/core/tensor.py:1581: RuntimeError
During handling of the above exception, another exception occurred:
ds = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_update_embedding-embedding_fn3-local_auth_ds-None-None-None-vector_store_filter_udf-None-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = None
vector_store_filters = <function vector_store_filter_udf.<locals>.filter_udf at 0x7f18708d79c0>
vector_store_filter_udf = <function vector_store_filter_udf.<locals>.filter_udf at 0x7f18708d79c0>
vector_store_query = None
init_embedding_function = <function embedding_fn3 at 0x7f194287e700>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyNDQxMCwiZXhwIjoxNzA5NTI0NDEwfQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.R6QpGBQ2r6eqGQjDs40KfhojgHV6CPmtd_rZDMIfcTgabV6B4HqL9i4WSnwVH40LHFLk0Czm8ZPpsW4MCW8Jig'
@requires_libdeeplake
@pytest.mark.parametrize(
"ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
[
(
"local_auth_ds",
"vector_store_hash_ids",
None,
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
"vector_store_row_ids",
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
None,
"vector_store_filter_udf",
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
"vector_store_filters",
None,
None,
"hub_cloud_dev_token",
),
(
"hub_cloud_ds",
None,
None,
None,
None,
"vector_store_query",
"hub_cloud_dev_token",
),
],
indirect=True,
)
@pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
@pytest.mark.slow
@requires_libdeeplake
def test_update_embedding(
ds,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_filter_udf,
vector_store_query,
init_embedding_function,
hub_cloud_dev_token,
):
vector_store_filters = vector_store_filters or vector_store_filter_udf
exec_option = "compute_engine"
if vector_store_filter_udf:
exec_option = "python"
embedding_tensor = "embedding"
embedding_source_tensor = "text"
# dataset has a single embedding_tensor:
path = ds.path
vector_store = DeepLakeVectorStore(
path=path,
overwrite=True,
verbose=False,
exec_option=exec_option,
embedding_function=init_embedding_function,
index_params={"threshold": 10},
token=hub_cloud_dev_token,
)
# add data to the dataset:
metadatas[1:6] = [{"a": 1} for _ in range(5)]
vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
# case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
new_embedding_value = 100
embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
> vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_function=embedding_fn,
embedding_source_tensor=embedding_source_tensor,
embedding_tensor=embedding_tensor,
)
deeplake/core/vectorstore/test_deeplake_vectorstore.py:943:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/vectorstore/deeplake_vectorstore.py:442: in update_embedding
self.dataset_handler.update_embedding(
deeplake/core/vectorstore/dataset_handlers/embedded_dataset_handler.py:390: in update_embedding
self.dataset[row_ids].update(embedding_tensor_data)
deeplake/core/dataset/dataset.py:3387: in update
index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
_incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tensor = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
indexes = [1, 2, 3, 4, 5], index_operation = 3
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
except Exception as e:
> raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E Exception: An error occurred while regenerating VDB indexes: request_failed
deeplake/core/index_maintenance.py:199: Exception
Check failure on line 943 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_update_embedding[embedding_fn3-local_auth_ds-None-None-vector_store_filters-None-None-hub_cloud_dev_token]
Exception: An error occurred while regenerating VDB indexes: request_failed
Raw output
tensor = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
indexes = [1, 2, 3, 4, 5], index_operation = 3
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
> tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
deeplake/core/index_maintenance.py:194:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
operation_kind = 3, row_ids = [1, 2, 3, 4, 5]
def update_vdb_index(
self,
operation_kind: int,
row_ids: List[int] = [],
):
self.storage.check_readonly()
if self.meta.htype != "embedding":
raise Exception(f"Only supported for embedding tensors.")
self.invalidate_libdeeplake_dataset()
self.dataset.flush()
from deeplake.enterprise.convert_to_libdeeplake import (
dataset_to_libdeeplake,
)
ds = dataset_to_libdeeplake(self.dataset)
ts = getattr(ds, self.meta.name)
from deeplake.enterprise.convert_to_libdeeplake import (
import_indra_api,
)
api = import_indra_api()
commit_id = self.version_state["commit_id"]
if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
try:
indexes = api.vdb.add_samples_to_index(
ts,
add_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
try:
indexes = api.vdb.remove_samples_from_index(
ts,
remove_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["UPDATE"]:
try:
> indexes = api.vdb.update_samples_in_index(
ts,
update_indices=row_ids,
)
E RuntimeError: request_failed
deeplake/core/tensor.py:1581: RuntimeError
During handling of the above exception, another exception occurred:
ds = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_update_embedding-embedding_fn3-local_auth_ds-None-None-vector_store_filters-None-None-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = None
vector_store_filters = {'metadata': {'a': 1}}, vector_store_filter_udf = None
vector_store_query = None
init_embedding_function = <function embedding_fn3 at 0x7f194287e700>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyNDQxNSwiZXhwIjoxNzA5NTI0NDE1fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.mzLgXK4KftVTIDlcJRMZ0sMBncfo5BiEj_LdNG3VaIfa2ecEncU5qrQUI1jrcarHkytgI69d926H7_WAi6naAw'
@requires_libdeeplake
@pytest.mark.parametrize(
"ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
[
(
"local_auth_ds",
"vector_store_hash_ids",
None,
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
"vector_store_row_ids",
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
None,
"vector_store_filter_udf",
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
"vector_store_filters",
None,
None,
"hub_cloud_dev_token",
),
(
"hub_cloud_ds",
None,
None,
None,
None,
"vector_store_query",
"hub_cloud_dev_token",
),
],
indirect=True,
)
@pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
@pytest.mark.slow
@requires_libdeeplake
def test_update_embedding(
ds,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_filter_udf,
vector_store_query,
init_embedding_function,
hub_cloud_dev_token,
):
vector_store_filters = vector_store_filters or vector_store_filter_udf
exec_option = "compute_engine"
if vector_store_filter_udf:
exec_option = "python"
embedding_tensor = "embedding"
embedding_source_tensor = "text"
# dataset has a single embedding_tensor:
path = ds.path
vector_store = DeepLakeVectorStore(
path=path,
overwrite=True,
verbose=False,
exec_option=exec_option,
embedding_function=init_embedding_function,
index_params={"threshold": 10},
token=hub_cloud_dev_token,
)
# add data to the dataset:
metadatas[1:6] = [{"a": 1} for _ in range(5)]
vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
# case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
new_embedding_value = 100
embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
> vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_function=embedding_fn,
embedding_source_tensor=embedding_source_tensor,
embedding_tensor=embedding_tensor,
)
deeplake/core/vectorstore/test_deeplake_vectorstore.py:943:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/vectorstore/deeplake_vectorstore.py:442: in update_embedding
self.dataset_handler.update_embedding(
deeplake/core/vectorstore/dataset_handlers/embedded_dataset_handler.py:390: in update_embedding
self.dataset[row_ids].update(embedding_tensor_data)
deeplake/core/dataset/dataset.py:3387: in update
index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
_incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tensor = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
indexes = [1, 2, 3, 4, 5], index_operation = 3
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
except Exception as e:
> raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E Exception: An error occurred while regenerating VDB indexes: request_failed
deeplake/core/index_maintenance.py:199: Exception
Check failure on line 943 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_update_embedding[embedding_fn3-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token]
Exception: An error occurred while regenerating VDB indexes: request_failed
Raw output
tensor = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
indexes = [1, 2, 3, 4, 5], index_operation = 3
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
> tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
deeplake/core/index_maintenance.py:194:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
operation_kind = 3, row_ids = [1, 2, 3, 4, 5]
def update_vdb_index(
self,
operation_kind: int,
row_ids: List[int] = [],
):
self.storage.check_readonly()
if self.meta.htype != "embedding":
raise Exception(f"Only supported for embedding tensors.")
self.invalidate_libdeeplake_dataset()
self.dataset.flush()
from deeplake.enterprise.convert_to_libdeeplake import (
dataset_to_libdeeplake,
)
ds = dataset_to_libdeeplake(self.dataset)
ts = getattr(ds, self.meta.name)
from deeplake.enterprise.convert_to_libdeeplake import (
import_indra_api,
)
api = import_indra_api()
commit_id = self.version_state["commit_id"]
if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
try:
indexes = api.vdb.add_samples_to_index(
ts,
add_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
try:
indexes = api.vdb.remove_samples_from_index(
ts,
remove_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["UPDATE"]:
try:
> indexes = api.vdb.update_samples_in_index(
ts,
update_indices=row_ids,
)
E RuntimeError: request_failed
deeplake/core/tensor.py:1581: RuntimeError
During handling of the above exception, another exception occurred:
ds = Dataset(path='hub://testingacc2/tmp089d_test_deeplake_vectorstore_test_update_embedding-embedding_fn3-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = None
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = "select * where metadata['a']==1"
init_embedding_function = <function embedding_fn3 at 0x7f194287e700>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyNDQyMCwiZXhwIjoxNzA5NTI0NDIwfQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.oOkNaNz9us5JhMymDW1wzX2fX8zGciDISCFsQA-DQLZTAwtdA-2bNP67EpqxY2jXeqoDZKc_6AEWTKqCpVdYUA'
@requires_libdeeplake
@pytest.mark.parametrize(
"ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
[
(
"local_auth_ds",
"vector_store_hash_ids",
None,
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
"vector_store_row_ids",
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
None,
"vector_store_filter_udf",
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
"vector_store_filters",
None,
None,
"hub_cloud_dev_token",
),
(
"hub_cloud_ds",
None,
None,
None,
None,
"vector_store_query",
"hub_cloud_dev_token",
),
],
indirect=True,
)
@pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
@pytest.mark.slow
@requires_libdeeplake
def test_update_embedding(
ds,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_filter_udf,
vector_store_query,
init_embedding_function,
hub_cloud_dev_token,
):
vector_store_filters = vector_store_filters or vector_store_filter_udf
exec_option = "compute_engine"
if vector_store_filter_udf:
exec_option = "python"
embedding_tensor = "embedding"
embedding_source_tensor = "text"
# dataset has a single embedding_tensor:
path = ds.path
vector_store = DeepLakeVectorStore(
path=path,
overwrite=True,
verbose=False,
exec_option=exec_option,
embedding_function=init_embedding_function,
index_params={"threshold": 10},
token=hub_cloud_dev_token,
)
# add data to the dataset:
metadatas[1:6] = [{"a": 1} for _ in range(5)]
vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
# case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
new_embedding_value = 100
embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
> vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_function=embedding_fn,
embedding_source_tensor=embedding_source_tensor,
embedding_tensor=embedding_tensor,
)
deeplake/core/vectorstore/test_deeplake_vectorstore.py:943:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/vectorstore/deeplake_vectorstore.py:442: in update_embedding
self.dataset_handler.update_embedding(
deeplake/core/vectorstore/dataset_handlers/embedded_dataset_handler.py:390: in update_embedding
self.dataset[row_ids].update(embedding_tensor_data)
deeplake/core/dataset/dataset.py:3387: in update
index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
_incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tensor = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
indexes = [1, 2, 3, 4, 5], index_operation = 3
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
except Exception as e:
> raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E Exception: An error occurred while regenerating VDB indexes: request_failed
deeplake/core/index_maintenance.py:199: Exception
Check failure on line 943 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_update_embedding[None-local_auth_ds-vector_store_hash_ids-None-None-None-None-hub_cloud_dev_token]
Exception: An error occurred while regenerating VDB indexes: request_failed
Raw output
tensor = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
indexes = [0, 1, 2, 3, 4], index_operation = 3
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
> tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
deeplake/core/index_maintenance.py:194:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
operation_kind = 3, row_ids = [0, 1, 2, 3, 4]
def update_vdb_index(
self,
operation_kind: int,
row_ids: List[int] = [],
):
self.storage.check_readonly()
if self.meta.htype != "embedding":
raise Exception(f"Only supported for embedding tensors.")
self.invalidate_libdeeplake_dataset()
self.dataset.flush()
from deeplake.enterprise.convert_to_libdeeplake import (
dataset_to_libdeeplake,
)
ds = dataset_to_libdeeplake(self.dataset)
ts = getattr(ds, self.meta.name)
from deeplake.enterprise.convert_to_libdeeplake import (
import_indra_api,
)
api = import_indra_api()
commit_id = self.version_state["commit_id"]
if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
try:
indexes = api.vdb.add_samples_to_index(
ts,
add_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
try:
indexes = api.vdb.remove_samples_from_index(
ts,
remove_indices=row_ids,
)
for id, index in indexes:
b = index.serialize()
commit_id = self.version_state["commit_id"]
self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
self.storage.flush()
except:
raise
elif operation_kind == _INDEX_OPERATION_MAPPING["UPDATE"]:
try:
> indexes = api.vdb.update_samples_in_index(
ts,
update_indices=row_ids,
)
E RuntimeError: request_failed
deeplake/core/tensor.py:1581: RuntimeError
During handling of the above exception, another exception occurred:
ds = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_update_embedding-None-local_auth_ds-vector_store_hash_ids-None-None-None-None-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = ['0', '1', '2', '3', '4'], vector_store_row_ids = None
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = None, init_embedding_function = None
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyNDQ1NSwiZXhwIjoxNzA5NTI0NDU1fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.Y5H0zpYR3dJtvmsbnjqYXSVq7dgNnAlhDAcjPhnbhV0o8V-_FkfN3jo577oW9NTKhywTLaUVaU-RNowC9qb_ng'
@requires_libdeeplake
@pytest.mark.parametrize(
"ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
[
(
"local_auth_ds",
"vector_store_hash_ids",
None,
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
"vector_store_row_ids",
None,
None,
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
None,
"vector_store_filter_udf",
None,
"hub_cloud_dev_token",
),
(
"local_auth_ds",
None,
None,
"vector_store_filters",
None,
None,
"hub_cloud_dev_token",
),
(
"hub_cloud_ds",
None,
None,
None,
None,
"vector_store_query",
"hub_cloud_dev_token",
),
],
indirect=True,
)
@pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
@pytest.mark.slow
@requires_libdeeplake
def test_update_embedding(
ds,
vector_store_hash_ids,
vector_store_row_ids,
vector_store_filters,
vector_store_filter_udf,
vector_store_query,
init_embedding_function,
hub_cloud_dev_token,
):
vector_store_filters = vector_store_filters or vector_store_filter_udf
exec_option = "compute_engine"
if vector_store_filter_udf:
exec_option = "python"
embedding_tensor = "embedding"
embedding_source_tensor = "text"
# dataset has a single embedding_tensor:
path = ds.path
vector_store = DeepLakeVectorStore(
path=path,
overwrite=True,
verbose=False,
exec_option=exec_option,
embedding_function=init_embedding_function,
index_params={"threshold": 10},
token=hub_cloud_dev_token,
)
# add data to the dataset:
metadatas[1:6] = [{"a": 1} for _ in range(5)]
vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
# case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
new_embedding_value = 100
embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
> vector_store.update_embedding(
ids=vector_store_hash_ids,
row_ids=vector_store_row_ids,
filter=vector_store_filters,
query=vector_store_query,
embedding_function=embedding_fn,
embedding_source_tensor=embedding_source_tensor,
embedding_tensor=embedding_tensor,
)
deeplake/core/vectorstore/test_deeplake_vectorstore.py:943:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/vectorstore/deeplake_vectorstore.py:442: in update_embedding
self.dataset_handler.update_embedding(
deeplake/core/vectorstore/dataset_handlers/embedded_dataset_handler.py:390: in update_embedding
self.dataset[row_ids].update(embedding_tensor_data)
deeplake/core/dataset/dataset.py:3387: in update
index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
_incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tensor = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
indexes = [0, 1, 2, 3, 4], index_operation = 3
def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
try:
is_embedding = tensor.htype == "embedding"
has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
try:
vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
except AttributeError:
vdb_index_ids_present = False
if is_embedding and has_vdb_indexes and vdb_index_ids_present:
for vdb_index in tensor.meta.vdb_indexes:
tensor.update_vdb_index(
operation_kind=index_operation,
row_ids=indexes,
)
except Exception as e:
> raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E Exception: An error occurred while regenerating VDB indexes: request_failed
deeplake/core/index_maintenance.py:199: Exception