Lint

[ManagedDB] Rest API based Thin client for ManagedService #11878

GitHub Actions / JUnit Test Report failed Jan 22, 2024 in 0s

3033 tests run, 1652 passed, 1339 skipped, 42 failed.

Annotations

Check failure on line 1 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py

github-actions / JUnit Test Report

test_deepmemory.test_deepmemory_train_and_cancel

failed on setup with "NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet."

Raw output


            self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7eff70da8ee0>

    @property
    def dataset(self):
        """Returns the dataset"""
        try:
>           return self.dataset_handler.dataset
E           AttributeError: 'ManagedDH' object has no attribute 'dataset'

deeplake/core/vectorstore/deeplake_vectorstore.py:523: AttributeError

During handling of the above exception, another exception occurred:

request = <SubRequest 'corpus_query_relevances_copy' for <Function test_deepmemory_train_and_cancel>>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjU5MywiZXhwIjoxNzA5NTIyNTkzfQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.7kRFpUz4tRt5LslKwClIGFsYkOcdCWWS9Z-5mh46KHpzTkN8sM0b6xgxCBgYtMWnLGwkNshBn_wBRFDcxYZbUA'

    @pytest.fixture
    def corpus_query_relevances_copy(request, hub_cloud_dev_token):
        if not is_opt_true(request, HUB_CLOUD_OPT):
            pytest.skip(f"{HUB_CLOUD_OPT} flag not set")
            return
    
        corpus = _get_storage_path(request, HUB_CLOUD)
        query_vs = VectorStore(
            path=f"hub://{HUB_CLOUD_DEV_USERNAME}/deepmemory_test_queries2",
            runtime={"tensor_db": True},
            token=hub_cloud_dev_token,
        )
>       queries = query_vs.dataset.text.data()["value"]

deeplake/tests/path_fixtures.py:487: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7eff70da8ee0>

    @property
    def dataset(self):
        """Returns the dataset"""
        try:
            return self.dataset_handler.dataset
        except AttributeError:
>           raise NotImplementedError(
                "Acessing the dataset is not implemented for managed Vector Store yet."
            )
E           NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.

deeplake/core/vectorstore/deeplake_vectorstore.py:525: NotImplementedError

Check failure on line 780 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py

github-actions / JUnit Test Report

test_deepmemory.test_deepmemory_evaluate_with_embedding_function_specified_in_constructor_should_not_throw_any_exception

NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.

Raw output


            self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7eff5dffee60>

    @property
    def dataset(self):
        """Returns the dataset"""
        try:
>           return self.dataset_handler.dataset
E           AttributeError: 'ManagedDH' object has no attribute 'dataset'

deeplake/core/vectorstore/deeplake_vectorstore.py:523: AttributeError

During handling of the above exception, another exception occurred:

corpus_query_pair_path = ('hub://testingacc2/deepmemory_test_corpus_managed_2', 'hub://testingacc2/deepmemory_test_corpus_managed_2_eval_queries')
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjU5MywiZXhwIjoxNzA5NTIyNTkzfQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.7kRFpUz4tRt5LslKwClIGFsYkOcdCWWS9Z-5mh46KHpzTkN8sM0b6xgxCBgYtMWnLGwkNshBn_wBRFDcxYZbUA'

    @pytest.mark.slow
    @pytest.mark.flaky(reruns=3)
    @pytest.mark.skipif(sys.platform == "win32", reason="Does not run on Windows")
    def test_deepmemory_evaluate_with_embedding_function_specified_in_constructor_should_not_throw_any_exception(
        corpus_query_pair_path,
        hub_cloud_dev_token,
    ):
        corpus, queries = corpus_query_pair_path
    
        db = VectorStore(
            path=corpus,
            runtime={"tensor_db": True},
            token=hub_cloud_dev_token,
            embedding_function=embedding_fn,
        )
    
        queries_vs = VectorStore(
            path=queries,
            runtime={"tensor_db": True},
            token=hub_cloud_dev_token,
            embedding_function=embedding_fn,
        )
    
>       queries = queries_vs.dataset[:10].text.data()["value"]

deeplake/core/vectorstore/deep_memory/test_deepmemory.py:780: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7eff5dffee60>

    @property
    def dataset(self):
        """Returns the dataset"""
        try:
            return self.dataset_handler.dataset
        except AttributeError:
>           raise NotImplementedError(
                "Acessing the dataset is not implemented for managed Vector Store yet."
            )
E           NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.

deeplake/core/vectorstore/deeplake_vectorstore.py:525: NotImplementedError

Check failure on line 505 in deeplake/api/tests/test_update_samples.py

github-actions / JUnit Test Report

test_update_samples.test_ds_update_generic

deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

Raw output


            self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f66027840d0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f6602784690>])
samples = [1, 1, 1, 1, 'hello'], operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='xyz')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
>           (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )

deeplake/core/chunk_engine.py:1423: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/chunk_engine.py:1705: in _update
    self._update_non_tiled_sample(
deeplake/core/chunk_engine.py:1341: in _update_non_tiled_sample
    chunk.update_sample(local_sample_index, sample)
deeplake/core/chunk/uncompressed_chunk.py:265: in update_sample
    serialized_sample, shape = self.serialize_sample(sample, break_into_tiles=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.chunk.uncompressed_chunk.UncompressedChunk object at 0x7f66027872d0>
incoming_sample = 'hello', sample_compression = None, chunk_compression = None
break_into_tiles = False, store_uncompressed_tiles = False

    def serialize_sample(
        self,
        incoming_sample: InputSample,
        sample_compression: Optional[str] = None,
        chunk_compression: Optional[str] = None,
        break_into_tiles: bool = True,
        store_uncompressed_tiles: bool = False,
    ) -> SerializedOutput:
        """Converts the sample into bytes"""
        dt, ht, min_chunk_size, tiling_threshold = (
            self.dtype,
            self.htype,
            self.min_chunk_size,
            self.tiling_threshold,
        )
        if tiling_threshold < 0:
            break_into_tiles = False
    
        if isinstance(incoming_sample, LinkedSample):
            if self.tensor_meta.is_link:
                incoming_sample = incoming_sample.path
            else:
                raise ValueError(
                    "deeplake.link() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
                )
    
        if isinstance(incoming_sample, LinkedTiledSample):
            if not self.tensor_meta.is_link:
                raise ValueError(
                    "deeplake.link_tiled() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
                )
    
        if self.is_text_like:
            if isinstance(incoming_sample, LinkedSample):
                incoming_sample = incoming_sample.path
            if incoming_sample is None:
                htype = "text" if self.tensor_meta.is_link else self.htype
                empty_mapping = {"text": "", "list": [], "json": {}, "tag": []}
                incoming_sample = empty_mapping[htype]
    
            if isinstance(incoming_sample, Sample):
                if incoming_sample.is_text_like:
                    incoming_sample, shape = serialize_text_sample_object(  # type: ignore
                        incoming_sample, sample_compression
                    )
                else:
                    htype = "Linked" if self.tensor_meta.is_link else self.htype
                    raise TypeError(
                        f"Cannot append to {htype} tensor with Sample object"
                    )
            elif isinstance(incoming_sample, LinkedTiledSample):
                incoming_sample, shape = serialize_linked_tiled_sample(incoming_sample)
            else:
                incoming_sample, shape = serialize_text(
                    incoming_sample, sample_compression, dt, ht  # type: ignore
                )
        elif incoming_sample is None:
            shape = (0,) * self.num_dims if self.num_dims else None
            incoming_sample = b""
        elif isinstance(incoming_sample, Sample):
            incoming_sample, shape = serialize_sample_object(  # type: ignore
                incoming_sample,
                sample_compression,
                chunk_compression,
                dt,
                ht,
                tiling_threshold,
                break_into_tiles,
                store_uncompressed_tiles,
            )
        elif isinstance(incoming_sample, PartialSample):
            incoming_sample, shape = serialize_partial_sample_object(
                incoming_sample,
                sample_compression,
                chunk_compression,
                dt,
                ht,
                min_chunk_size,
            )
        elif isinstance(incoming_sample, deeplake.core.tensor.Tensor):
            incoming_sample, shape = serialize_tensor(
                incoming_sample,
                sample_compression,
                chunk_compression,
                dt,
                ht,
                tiling_threshold,
                break_into_tiles,
                store_uncompressed_tiles,
            )
        elif isinstance(
            incoming_sample,
            (np.ndarray, list, int, float, bool, np.integer, np.floating, np.bool_),
        ):
            incoming_sample, shape = serialize_numpy_and_base_types(
                incoming_sample,
                sample_compression,
                chunk_compression,
                dt,
                ht,
                tiling_threshold,
                break_into_tiles,
                store_uncompressed_tiles,
            )
        elif isinstance(incoming_sample, SampleTiles):
            shape = incoming_sample.sample_shape
        elif isinstance(incoming_sample, Polygons):
            incoming_sample, shape = serialize_polygons(
                incoming_sample, sample_compression, dt
            )
        else:
            msg = f"Cannot serialize sample of type {type(incoming_sample)}."
            if isinstance(msg, str):
                method = "link" if self.tensor_meta.is_link else "read"
                msg += f"If you are appending data from a file, please pass deeplake.{method}(filename) to the append operation, instead of the filename string."
>           raise TypeError(msg)
E           TypeError: Cannot serialize sample of type <class 'str'>.If you are appending data from a file, please pass deeplake.read(filename) to the append operation, instead of the filename string.

deeplake/core/chunk/base_chunk.py:409: TypeError

The above exception was the direct cause of the following exception:

self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_generic', index=Index([slice(5, None, None)]), tensors=['abc', 'xyz'])
sample = {'abc': [1, 1, 1, 1, 1], 'xyz': [1, 1, 1, 1, 'hello']}

    def update(self, sample: Dict[str, Any]):
        """Update existing samples in the dataset with new values.
    
        Examples:
    
            >>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
    
            >>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
            >>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
    
        Args:
            sample (dict): Dictionary with tensor names as keys and samples as values.
    
        Raises:
            ValueError: If partial update of a sample is attempted.
            Exception: Error while attempting to rollback updates.
        """
        if len(self.index) > 1:
            raise ValueError(
                "Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
            )
    
        # remove update hooks from view base so that the view is not invalidated
        if self._view_base:
            saved_update_hooks = self._view_base._update_hooks
            self._view_base._update_hooks = {}
    
        with self:
            try:
                self._commit("Backup before update", None, False)
                for k, v in sample.items():
                    if deeplake.shutdown_event.is_set():
                        sys.exit()
>                   self[k] = v

deeplake/core/dataset/dataset.py:3379: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:633: in __setitem__
    tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
    self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f66027840d0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f6602784690>])
samples = [1, 1, 1, 1, 'hello'], operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='xyz')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
            (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )
        except Exception as e:
            if cmap is not None:
                key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
                self.meta_cache[key] = cmap
                self._commit_chunk_map = cmap
                self.meta_cache.register_deeplake_object(key, cmap)
>           raise SampleUpdateError(self.name) from e
E           deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor xyz.

deeplake/core/chunk_engine.py:1435: SampleUpdateError

During handling of the above exception, another exception occurred:

local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_generic', tensors=['abc', 'xyz'])

    def test_ds_update_generic(local_ds):
        with local_ds as ds:
            ds.create_tensor("abc")
            ds.create_tensor("xyz")
    
        ds.abc.extend(list(range(10)))
        ds.xyz.extend(list(range(10)))
    
        ds[0].update({"abc": 1, "xyz": 1})
        ds[2:5].update({"abc": [1] * 3, "xyz": [1] * 3})
    
        np.testing.assert_array_equal(ds.abc[:5].numpy().flatten(), [1] * 5)
        np.testing.assert_array_equal(ds.xyz[:5].numpy().flatten(), [1] * 5)
    
        with pytest.raises(SampleUpdateError):
>           ds[5:].update({"abc": [1] * 5, "xyz": [1] * 4 + ["hello"]})

deeplake/api/tests/test_update_samples.py:505: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:3382: in update
    self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_generic', index=Index([slice(5, None, None)]), tensors=['abc', 'xyz'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_generic', index=Index([slice(5, None, None)]), tensors=['abc', 'xyz'])
is_del = False, managed_view = False, has_vds = False, is_view = True

    @wraps(callable)
    def inner(x, *args, **kwargs):
        ds = x if isinstance(x, deeplake.Dataset) else x.dataset
        if not ds.__dict__.get("_allow_view_updates"):
            is_del = callable.__name__ == "delete"
            managed_view = "_view_entry" in ds.__dict__
            has_vds = "_vds" in ds.__dict__
            is_view = not x.index.is_trivial() or has_vds or managed_view
            if is_view and not (is_del and (has_vds or managed_view)):
>               raise InvalidOperationError(
                    callable.__name__,
                    type(x).__name__,
                )
E               deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

deeplake/util/invalid_view_op.py:18: InvalidOperationError

Check failure on line 555 in deeplake/api/tests/test_update_samples.py

github-actions / JUnit Test Report

test_update_samples.test_ds_update_text_like[lz4-None]

deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

Raw output


            self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f6602480610>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f6602481dd0>])
samples = [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='json')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
>           (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )

deeplake/core/chunk_engine.py:1423: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/chunk_engine.py:1705: in _update
    self._update_non_tiled_sample(
deeplake/core/chunk_engine.py:1341: in _update_non_tiled_sample
    chunk.update_sample(local_sample_index, sample)
deeplake/core/chunk/sample_compressed_chunk.py:163: in update_sample
    serialized_sample, shape = self.serialize_sample(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.chunk.sample_compressed_chunk.SampleCompressedChunk object at 0x7f6602481f50>
incoming_sample = Sample(is_lazy=True, path=bad_sample)
sample_compression = 'lz4', chunk_compression = None, break_into_tiles = False
store_uncompressed_tiles = False

    def serialize_sample(
        self,
        incoming_sample: InputSample,
        sample_compression: Optional[str] = None,
        chunk_compression: Optional[str] = None,
        break_into_tiles: bool = True,
        store_uncompressed_tiles: bool = False,
    ) -> SerializedOutput:
        """Converts the sample into bytes"""
        dt, ht, min_chunk_size, tiling_threshold = (
            self.dtype,
            self.htype,
            self.min_chunk_size,
            self.tiling_threshold,
        )
        if tiling_threshold < 0:
            break_into_tiles = False
    
        if isinstance(incoming_sample, LinkedSample):
            if self.tensor_meta.is_link:
                incoming_sample = incoming_sample.path
            else:
                raise ValueError(
                    "deeplake.link() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
                )
    
        if isinstance(incoming_sample, LinkedTiledSample):
            if not self.tensor_meta.is_link:
                raise ValueError(
                    "deeplake.link_tiled() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
                )
    
        if self.is_text_like:
            if isinstance(incoming_sample, LinkedSample):
                incoming_sample = incoming_sample.path
            if incoming_sample is None:
                htype = "text" if self.tensor_meta.is_link else self.htype
                empty_mapping = {"text": "", "list": [], "json": {}, "tag": []}
                incoming_sample = empty_mapping[htype]
    
            if isinstance(incoming_sample, Sample):
                if incoming_sample.is_text_like:
                    incoming_sample, shape = serialize_text_sample_object(  # type: ignore
                        incoming_sample, sample_compression
                    )
                else:
                    htype = "Linked" if self.tensor_meta.is_link else self.htype
>                   raise TypeError(
                        f"Cannot append to {htype} tensor with Sample object"
E                       TypeError: Cannot append to json tensor with Sample object

deeplake/core/chunk/base_chunk.py:341: TypeError

The above exception was the direct cause of the following exception:

self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-lz4-None-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
sample = {'json': [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)], 'list': [[1, 2, 3], [1, 2, 3], [1, 2, 3]], 'text': ['hello', 'hello', 'hello']}

    def update(self, sample: Dict[str, Any]):
        """Update existing samples in the dataset with new values.
    
        Examples:
    
            >>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
    
            >>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
            >>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
    
        Args:
            sample (dict): Dictionary with tensor names as keys and samples as values.
    
        Raises:
            ValueError: If partial update of a sample is attempted.
            Exception: Error while attempting to rollback updates.
        """
        if len(self.index) > 1:
            raise ValueError(
                "Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
            )
    
        # remove update hooks from view base so that the view is not invalidated
        if self._view_base:
            saved_update_hooks = self._view_base._update_hooks
            self._view_base._update_hooks = {}
    
        with self:
            try:
                self._commit("Backup before update", None, False)
                for k, v in sample.items():
                    if deeplake.shutdown_event.is_set():
                        sys.exit()
>                   self[k] = v

deeplake/core/dataset/dataset.py:3379: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:633: in __setitem__
    tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
    self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f6602480610>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f6602481dd0>])
samples = [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='json')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
            (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )
        except Exception as e:
            if cmap is not None:
                key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
                self.meta_cache[key] = cmap
                self._commit_chunk_map = cmap
                self.meta_cache.register_deeplake_object(key, cmap)
>           raise SampleUpdateError(self.name) from e
E           deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor json.

deeplake/core/chunk_engine.py:1435: SampleUpdateError

During handling of the above exception, another exception occurred:

local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-lz4-None-', tensors=['json', 'list', 'text'])
sc = 'lz4', cc = None

    @pytest.mark.parametrize(("sc", "cc"), [("lz4", None), (None, "lz4"), (None, None)])
    def test_ds_update_text_like(local_ds, sc, cc):
        with local_ds as ds:
            ds.create_tensor(
                "text", htype="text", sample_compression=sc, chunk_compression=cc
            )
            ds.create_tensor(
                "list", htype="list", sample_compression=sc, chunk_compression=cc
            )
            ds.create_tensor(
                "json", htype="json", sample_compression=sc, chunk_compression=cc
            )
    
            text_samples = (["hello"] + ["world"] * 2) * 2
            t = "hello"
            ds.text.extend(text_samples)
    
            list_samples = ([[1, 2, 3]] + [[4, 5, 6]] * 2) * 2
            l = [1, 2, 3]
            ds.list.extend(list_samples)
    
            json_samples = ([{"a": 1}] + [{"b": 2, "c": 3}] * 2) * 2
            j = {"a": 1}
            ds.json.extend(json_samples)
    
        ds[1].update({"text": t, "list": l, "json": j})
        assert ds[1].text.data()["value"] == t
        assert ds[1].list.data()["value"] == l
        assert ds[1].json.data()["value"] == j
    
        ds[:3].update({"text": [t] * 3, "list": [l] * 3, "json": [j] * 3})
        assert ds[:3].text.data()["value"] == [t] * 3
        assert ds[:3].list.data()["value"] == [l] * 3
        assert ds[:3].json.data()["value"] == [j] * 3
    
        with pytest.raises(SampleUpdateError):
>           ds[3:].update(
                {
                    "text": [t] * 3,
                    "list": [l] * 3,
                    "json": [j] * 2 + [deeplake.read("bad_sample")],
                }
            )

deeplake/api/tests/test_update_samples.py:555: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:3382: in update
    self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-lz4-None-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-lz4-None-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
is_del = False, managed_view = False, has_vds = False, is_view = True

    @wraps(callable)
    def inner(x, *args, **kwargs):
        ds = x if isinstance(x, deeplake.Dataset) else x.dataset
        if not ds.__dict__.get("_allow_view_updates"):
            is_del = callable.__name__ == "delete"
            managed_view = "_view_entry" in ds.__dict__
            has_vds = "_vds" in ds.__dict__
            is_view = not x.index.is_trivial() or has_vds or managed_view
            if is_view and not (is_del and (has_vds or managed_view)):
>               raise InvalidOperationError(
                    callable.__name__,
                    type(x).__name__,
                )
E               deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

deeplake/util/invalid_view_op.py:18: InvalidOperationError

Check failure on line 555 in deeplake/api/tests/test_update_samples.py

github-actions / JUnit Test Report

test_update_samples.test_ds_update_text_like[None-lz4]

deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

Raw output


            self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f660280c8d0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f660280c790>])
samples = [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='json')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
>           (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )

deeplake/core/chunk_engine.py:1423: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/chunk_engine.py:1705: in _update
    self._update_non_tiled_sample(
deeplake/core/chunk_engine.py:1341: in _update_non_tiled_sample
    chunk.update_sample(local_sample_index, sample)
deeplake/core/chunk/chunk_compressed_chunk.py:449: in update_sample
    self.update_sample_byte_compression(local_index, new_sample)
deeplake/core/chunk/chunk_compressed_chunk.py:454: in update_sample_byte_compression
    serialized_sample, shape = self.serialize_sample(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.chunk.chunk_compressed_chunk.ChunkCompressedChunk object at 0x7f660280d490>
incoming_sample = Sample(is_lazy=True, path=bad_sample)
sample_compression = None, chunk_compression = 'lz4', break_into_tiles = False
store_uncompressed_tiles = False

    def serialize_sample(
        self,
        incoming_sample: InputSample,
        sample_compression: Optional[str] = None,
        chunk_compression: Optional[str] = None,
        break_into_tiles: bool = True,
        store_uncompressed_tiles: bool = False,
    ) -> SerializedOutput:
        """Converts the sample into bytes"""
        dt, ht, min_chunk_size, tiling_threshold = (
            self.dtype,
            self.htype,
            self.min_chunk_size,
            self.tiling_threshold,
        )
        if tiling_threshold < 0:
            break_into_tiles = False
    
        if isinstance(incoming_sample, LinkedSample):
            if self.tensor_meta.is_link:
                incoming_sample = incoming_sample.path
            else:
                raise ValueError(
                    "deeplake.link() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
                )
    
        if isinstance(incoming_sample, LinkedTiledSample):
            if not self.tensor_meta.is_link:
                raise ValueError(
                    "deeplake.link_tiled() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
                )
    
        if self.is_text_like:
            if isinstance(incoming_sample, LinkedSample):
                incoming_sample = incoming_sample.path
            if incoming_sample is None:
                htype = "text" if self.tensor_meta.is_link else self.htype
                empty_mapping = {"text": "", "list": [], "json": {}, "tag": []}
                incoming_sample = empty_mapping[htype]
    
            if isinstance(incoming_sample, Sample):
                if incoming_sample.is_text_like:
                    incoming_sample, shape = serialize_text_sample_object(  # type: ignore
                        incoming_sample, sample_compression
                    )
                else:
                    htype = "Linked" if self.tensor_meta.is_link else self.htype
>                   raise TypeError(
                        f"Cannot append to {htype} tensor with Sample object"
E                       TypeError: Cannot append to json tensor with Sample object

deeplake/core/chunk/base_chunk.py:341: TypeError

The above exception was the direct cause of the following exception:

self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-lz4-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
sample = {'json': [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)], 'list': [[1, 2, 3], [1, 2, 3], [1, 2, 3]], 'text': ['hello', 'hello', 'hello']}

    def update(self, sample: Dict[str, Any]):
        """Update existing samples in the dataset with new values.
    
        Examples:
    
            >>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
    
            >>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
            >>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
    
        Args:
            sample (dict): Dictionary with tensor names as keys and samples as values.
    
        Raises:
            ValueError: If partial update of a sample is attempted.
            Exception: Error while attempting to rollback updates.
        """
        if len(self.index) > 1:
            raise ValueError(
                "Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
            )
    
        # remove update hooks from view base so that the view is not invalidated
        if self._view_base:
            saved_update_hooks = self._view_base._update_hooks
            self._view_base._update_hooks = {}
    
        with self:
            try:
                self._commit("Backup before update", None, False)
                for k, v in sample.items():
                    if deeplake.shutdown_event.is_set():
                        sys.exit()
>                   self[k] = v

deeplake/core/dataset/dataset.py:3379: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:633: in __setitem__
    tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
    self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f660280c8d0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f660280c790>])
samples = [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='json')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
            (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )
        except Exception as e:
            if cmap is not None:
                key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
                self.meta_cache[key] = cmap
                self._commit_chunk_map = cmap
                self.meta_cache.register_deeplake_object(key, cmap)
>           raise SampleUpdateError(self.name) from e
E           deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor json.

deeplake/core/chunk_engine.py:1435: SampleUpdateError

During handling of the above exception, another exception occurred:

local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-lz4-', tensors=['json', 'list', 'text'])
sc = None, cc = 'lz4'

    @pytest.mark.parametrize(("sc", "cc"), [("lz4", None), (None, "lz4"), (None, None)])
    def test_ds_update_text_like(local_ds, sc, cc):
        with local_ds as ds:
            ds.create_tensor(
                "text", htype="text", sample_compression=sc, chunk_compression=cc
            )
            ds.create_tensor(
                "list", htype="list", sample_compression=sc, chunk_compression=cc
            )
            ds.create_tensor(
                "json", htype="json", sample_compression=sc, chunk_compression=cc
            )
    
            text_samples = (["hello"] + ["world"] * 2) * 2
            t = "hello"
            ds.text.extend(text_samples)
    
            list_samples = ([[1, 2, 3]] + [[4, 5, 6]] * 2) * 2
            l = [1, 2, 3]
            ds.list.extend(list_samples)
    
            json_samples = ([{"a": 1}] + [{"b": 2, "c": 3}] * 2) * 2
            j = {"a": 1}
            ds.json.extend(json_samples)
    
        ds[1].update({"text": t, "list": l, "json": j})
        assert ds[1].text.data()["value"] == t
        assert ds[1].list.data()["value"] == l
        assert ds[1].json.data()["value"] == j
    
        ds[:3].update({"text": [t] * 3, "list": [l] * 3, "json": [j] * 3})
        assert ds[:3].text.data()["value"] == [t] * 3
        assert ds[:3].list.data()["value"] == [l] * 3
        assert ds[:3].json.data()["value"] == [j] * 3
    
        with pytest.raises(SampleUpdateError):
>           ds[3:].update(
                {
                    "text": [t] * 3,
                    "list": [l] * 3,
                    "json": [j] * 2 + [deeplake.read("bad_sample")],
                }
            )

deeplake/api/tests/test_update_samples.py:555: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:3382: in update
    self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-lz4-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-lz4-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
is_del = False, managed_view = False, has_vds = False, is_view = True

    @wraps(callable)
    def inner(x, *args, **kwargs):
        ds = x if isinstance(x, deeplake.Dataset) else x.dataset
        if not ds.__dict__.get("_allow_view_updates"):
            is_del = callable.__name__ == "delete"
            managed_view = "_view_entry" in ds.__dict__
            has_vds = "_vds" in ds.__dict__
            is_view = not x.index.is_trivial() or has_vds or managed_view
            if is_view and not (is_del and (has_vds or managed_view)):
>               raise InvalidOperationError(
                    callable.__name__,
                    type(x).__name__,
                )
E               deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

deeplake/util/invalid_view_op.py:18: InvalidOperationError

Check failure on line 555 in deeplake/api/tests/test_update_samples.py

github-actions / JUnit Test Report

test_update_samples.test_ds_update_text_like[None-None]

deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

Raw output


            self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f66022c73d0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f66022c7110>])
samples = [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='json')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
>           (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )

deeplake/core/chunk_engine.py:1423: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/chunk_engine.py:1705: in _update
    self._update_non_tiled_sample(
deeplake/core/chunk_engine.py:1341: in _update_non_tiled_sample
    chunk.update_sample(local_sample_index, sample)
deeplake/core/chunk/uncompressed_chunk.py:265: in update_sample
    serialized_sample, shape = self.serialize_sample(sample, break_into_tiles=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.chunk.uncompressed_chunk.UncompressedChunk object at 0x7f66022c4c10>
incoming_sample = Sample(is_lazy=True, path=bad_sample)
sample_compression = None, chunk_compression = None, break_into_tiles = False
store_uncompressed_tiles = False

    def serialize_sample(
        self,
        incoming_sample: InputSample,
        sample_compression: Optional[str] = None,
        chunk_compression: Optional[str] = None,
        break_into_tiles: bool = True,
        store_uncompressed_tiles: bool = False,
    ) -> SerializedOutput:
        """Converts the sample into bytes"""
        dt, ht, min_chunk_size, tiling_threshold = (
            self.dtype,
            self.htype,
            self.min_chunk_size,
            self.tiling_threshold,
        )
        if tiling_threshold < 0:
            break_into_tiles = False
    
        if isinstance(incoming_sample, LinkedSample):
            if self.tensor_meta.is_link:
                incoming_sample = incoming_sample.path
            else:
                raise ValueError(
                    "deeplake.link() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
                )
    
        if isinstance(incoming_sample, LinkedTiledSample):
            if not self.tensor_meta.is_link:
                raise ValueError(
                    "deeplake.link_tiled() samples can only be appended to linked tensors. To create linked tensors, include link in htype during create_tensor, for example 'link[image]'."
                )
    
        if self.is_text_like:
            if isinstance(incoming_sample, LinkedSample):
                incoming_sample = incoming_sample.path
            if incoming_sample is None:
                htype = "text" if self.tensor_meta.is_link else self.htype
                empty_mapping = {"text": "", "list": [], "json": {}, "tag": []}
                incoming_sample = empty_mapping[htype]
    
            if isinstance(incoming_sample, Sample):
                if incoming_sample.is_text_like:
                    incoming_sample, shape = serialize_text_sample_object(  # type: ignore
                        incoming_sample, sample_compression
                    )
                else:
                    htype = "Linked" if self.tensor_meta.is_link else self.htype
>                   raise TypeError(
                        f"Cannot append to {htype} tensor with Sample object"
E                       TypeError: Cannot append to json tensor with Sample object

deeplake/core/chunk/base_chunk.py:341: TypeError

The above exception was the direct cause of the following exception:

self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-None-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
sample = {'json': [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)], 'list': [[1, 2, 3], [1, 2, 3], [1, 2, 3]], 'text': ['hello', 'hello', 'hello']}

    def update(self, sample: Dict[str, Any]):
        """Update existing samples in the dataset with new values.
    
        Examples:
    
            >>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
    
            >>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
            >>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
    
        Args:
            sample (dict): Dictionary with tensor names as keys and samples as values.
    
        Raises:
            ValueError: If partial update of a sample is attempted.
            Exception: Error while attempting to rollback updates.
        """
        if len(self.index) > 1:
            raise ValueError(
                "Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
            )
    
        # remove update hooks from view base so that the view is not invalidated
        if self._view_base:
            saved_update_hooks = self._view_base._update_hooks
            self._view_base._update_hooks = {}
    
        with self:
            try:
                self._commit("Backup before update", None, False)
                for k, v in sample.items():
                    if deeplake.shutdown_event.is_set():
                        sys.exit()
>                   self[k] = v

deeplake/core/dataset/dataset.py:3379: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:633: in __setitem__
    tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
    self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f66022c73d0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f66022c7110>])
samples = [{'a': 1}, {'a': 1}, Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='json')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
            (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )
        except Exception as e:
            if cmap is not None:
                key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
                self.meta_cache[key] = cmap
                self._commit_chunk_map = cmap
                self.meta_cache.register_deeplake_object(key, cmap)
>           raise SampleUpdateError(self.name) from e
E           deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor json.

deeplake/core/chunk_engine.py:1435: SampleUpdateError

During handling of the above exception, another exception occurred:

local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-None-', tensors=['json', 'list', 'text'])
sc = None, cc = None

    @pytest.mark.parametrize(("sc", "cc"), [("lz4", None), (None, "lz4"), (None, None)])
    def test_ds_update_text_like(local_ds, sc, cc):
        with local_ds as ds:
            ds.create_tensor(
                "text", htype="text", sample_compression=sc, chunk_compression=cc
            )
            ds.create_tensor(
                "list", htype="list", sample_compression=sc, chunk_compression=cc
            )
            ds.create_tensor(
                "json", htype="json", sample_compression=sc, chunk_compression=cc
            )
    
            text_samples = (["hello"] + ["world"] * 2) * 2
            t = "hello"
            ds.text.extend(text_samples)
    
            list_samples = ([[1, 2, 3]] + [[4, 5, 6]] * 2) * 2
            l = [1, 2, 3]
            ds.list.extend(list_samples)
    
            json_samples = ([{"a": 1}] + [{"b": 2, "c": 3}] * 2) * 2
            j = {"a": 1}
            ds.json.extend(json_samples)
    
        ds[1].update({"text": t, "list": l, "json": j})
        assert ds[1].text.data()["value"] == t
        assert ds[1].list.data()["value"] == l
        assert ds[1].json.data()["value"] == j
    
        ds[:3].update({"text": [t] * 3, "list": [l] * 3, "json": [j] * 3})
        assert ds[:3].text.data()["value"] == [t] * 3
        assert ds[:3].list.data()["value"] == [l] * 3
        assert ds[:3].json.data()["value"] == [j] * 3
    
        with pytest.raises(SampleUpdateError):
>           ds[3:].update(
                {
                    "text": [t] * 3,
                    "list": [l] * 3,
                    "json": [j] * 2 + [deeplake.read("bad_sample")],
                }
            )

deeplake/api/tests/test_update_samples.py:555: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:3382: in update
    self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-None-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_text_like-None-None-', index=Index([slice(3, None, None)]), tensors=['json', 'list', 'text'])
is_del = False, managed_view = False, has_vds = False, is_view = True

    @wraps(callable)
    def inner(x, *args, **kwargs):
        ds = x if isinstance(x, deeplake.Dataset) else x.dataset
        if not ds.__dict__.get("_allow_view_updates"):
            is_del = callable.__name__ == "delete"
            managed_view = "_view_entry" in ds.__dict__
            has_vds = "_vds" in ds.__dict__
            is_view = not x.index.is_trivial() or has_vds or managed_view
            if is_view and not (is_del and (has_vds or managed_view)):
>               raise InvalidOperationError(
                    callable.__name__,
                    type(x).__name__,
                )
E               deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

deeplake/util/invalid_view_op.py:18: InvalidOperationError

Check failure on line 632 in deeplake/api/tests/test_update_samples.py

github-actions / JUnit Test Report

test_update_samples.test_ds_update_link

deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

Raw output


            self = Sample(is_lazy=True, path=bad_sample)

    def _read_from_path(self) -> bytes:  # type: ignore
        if self._buffer is None:
            path_type = get_path_type(self.path)
            try:
                if path_type == "local":
>                   self._buffer = self._read_from_local()

deeplake/core/sample.py:440: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Sample(is_lazy=True, path=bad_sample)

    def _read_from_local(self) -> bytes:
>       with open(self.path, "rb") as f:  # type: ignore
E       FileNotFoundError: [Errno 2] No such file or directory: 'bad_sample'

deeplake/core/sample.py:456: FileNotFoundError

The above exception was the direct cause of the following exception:

sample_path = 'bad_sample', sample_creds_key = None
link_creds = <deeplake.core.link_creds.LinkCreds object at 0x7f6601f88050>
verify = True

    def read_linked_sample(
        sample_path: str, sample_creds_key: Optional[str], link_creds, verify: bool
    ):
        provider_type = get_path_type(sample_path)
        try:
            if provider_type == "local":
>               return deeplake.read(sample_path, verify=verify)

deeplake/core/linked_sample.py:27: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/api/read.py:61: in read
    return Sample(
deeplake/core/sample.py:101: in __init__
    compressed_bytes = self._read_from_path()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Sample(is_lazy=True, path=bad_sample)

    def _read_from_path(self) -> bytes:  # type: ignore
        if self._buffer is None:
            path_type = get_path_type(self.path)
            try:
                if path_type == "local":
                    self._buffer = self._read_from_local()
                elif path_type == "gcs":
                    self._buffer = self._read_from_gcs()
                elif path_type == "s3":
                    self._buffer = self._read_from_s3()
                elif path_type == "azure":
                    self._buffer = self._read_from_azure()
                elif path_type == "gdrive":
                    self._buffer = self._read_from_gdrive()
                elif path_type == "http":
                    self._buffer = self._read_from_http()
            except Exception as e:
>               raise SampleReadError(self.path) from e  # type: ignore
E               deeplake.util.exceptions.SampleReadError: Unable to read sample from bad_sample

deeplake/core/sample.py:452: SampleReadError

The above exception was the direct cause of the following exception:

self = <deeplake.core.linked_chunk_engine.LinkedChunkEngine object at 0x7f6602390210>
samples = [<deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6602497750>]
verify = True, ignore_errors = False

    def check_each_sample(self, samples, verify=True, ignore_errors=False):
        link_creds = self.link_creds
        verified_samples = []
        skipped = []
        for i, sample in enumerate(samples):
            try:
                if isinstance(sample, deeplake.core.tensor.Tensor) and sample.is_link:
                    sample = sample._linked_sample()
                    samples[i] = sample
                elif (
                    not isinstance(sample, (LinkedSample, LinkedTiledSample))
                    and sample is not None
                ):
                    raise TypeError(
                        f"Expected LinkedSample or LinkedTiledSample, got {type(sample)} instead. Use deeplake.link() to link samples or deeplake.link_tiled() to link multiple images as tiles."
                    )
    
                path, creds_key = get_path_creds_key(sample)
    
                # verifies existence of creds_key
                if verify:
                    link_creds.get_encoding(creds_key, path)
    
                if sample is None or sample.path == "":
                    verified_samples.append(sample)
                elif isinstance(sample, LinkedTiledSample):
                    verify_samples = self.verify and verify
                    sample.set_check_tile_shape(self.link_creds, verify_samples)
                    sample.set_sample_shape()
                    verified_samples.append(sample)
                else:
                    try:
                        _verify = verify and self.verify
                        verified_samples.append(
>                           read_linked_sample(
                                sample.path,
                                sample.creds_key,
                                self.link_creds,
                                verify=_verify,
                            )
                        )

deeplake/core/linked_chunk_engine.py:280: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

sample_path = 'bad_sample', sample_creds_key = None
link_creds = <deeplake.core.link_creds.LinkCreds object at 0x7f6601f88050>
verify = True

    def read_linked_sample(
        sample_path: str, sample_creds_key: Optional[str], link_creds, verify: bool
    ):
        provider_type = get_path_type(sample_path)
        try:
            if provider_type == "local":
                return deeplake.read(sample_path, verify=verify)
            elif provider_type == "http":
                return _read_http_linked_sample(
                    link_creds, sample_creds_key, sample_path, verify
                )
            else:
                return _read_cloud_linked_sample(
                    link_creds, sample_creds_key, sample_path, provider_type, verify
                )
        except Exception as e:
>           raise GetDataFromLinkError(sample_path) from e
E           deeplake.util.exceptions.GetDataFromLinkError: Unable to get data from link bad_sample.

deeplake/core/linked_sample.py:37: GetDataFromLinkError

The above exception was the direct cause of the following exception:

self = <deeplake.core.linked_chunk_engine.LinkedChunkEngine object at 0x7f6602390210>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f66023903d0>])
samples = [<deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6602497750>]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='images2')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
>           (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )

deeplake/core/chunk_engine.py:1423: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/chunk_engine.py:1689: in _update
    verified_samples = self.check_each_sample(samples)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.linked_chunk_engine.LinkedChunkEngine object at 0x7f6602390210>
samples = [<deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6602497750>]
verify = True, ignore_errors = False

    def check_each_sample(self, samples, verify=True, ignore_errors=False):
        link_creds = self.link_creds
        verified_samples = []
        skipped = []
        for i, sample in enumerate(samples):
            try:
                if isinstance(sample, deeplake.core.tensor.Tensor) and sample.is_link:
                    sample = sample._linked_sample()
                    samples[i] = sample
                elif (
                    not isinstance(sample, (LinkedSample, LinkedTiledSample))
                    and sample is not None
                ):
                    raise TypeError(
                        f"Expected LinkedSample or LinkedTiledSample, got {type(sample)} instead. Use deeplake.link() to link samples or deeplake.link_tiled() to link multiple images as tiles."
                    )
    
                path, creds_key = get_path_creds_key(sample)
    
                # verifies existence of creds_key
                if verify:
                    link_creds.get_encoding(creds_key, path)
    
                if sample is None or sample.path == "":
                    verified_samples.append(sample)
                elif isinstance(sample, LinkedTiledSample):
                    verify_samples = self.verify and verify
                    sample.set_check_tile_shape(self.link_creds, verify_samples)
                    sample.set_sample_shape()
                    verified_samples.append(sample)
                else:
                    try:
                        _verify = verify and self.verify
                        verified_samples.append(
                            read_linked_sample(
                                sample.path,
                                sample.creds_key,
                                self.link_creds,
                                verify=_verify,
                            )
                        )
                    except Exception as e:
>                       raise BadLinkError(sample.path, sample.creds_key) from e
E                       deeplake.util.exceptions.BadLinkError: Verification of link failed. Make sure that the link you are trying to append is correct.
E                       
E                       Failed link: bad_sample
E                       creds_key used: None
E                       
E                       No credentials have been provided to access the link. If the link is not publibly accessible, add access credentials to your dataset and use the appropriate creds_key.

deeplake/core/linked_chunk_engine.py:288: BadLinkError

The above exception was the direct cause of the following exception:

self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_link', index=Index([slice(3, None, None)]), tensors=['images1', 'images2'])
sample = {'images1': [<deeplake.core.linked_sample.LinkedSample object at 0x7f6601f8a750>, <deeplake.core.linked_sample.LinkedS...ed_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6602497750>]}

    def update(self, sample: Dict[str, Any]):
        """Update existing samples in the dataset with new values.
    
        Examples:
    
            >>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
    
            >>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
            >>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
    
        Args:
            sample (dict): Dictionary with tensor names as keys and samples as values.
    
        Raises:
            ValueError: If partial update of a sample is attempted.
            Exception: Error while attempting to rollback updates.
        """
        if len(self.index) > 1:
            raise ValueError(
                "Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
            )
    
        # remove update hooks from view base so that the view is not invalidated
        if self._view_base:
            saved_update_hooks = self._view_base._update_hooks
            self._view_base._update_hooks = {}
    
        with self:
            try:
                self._commit("Backup before update", None, False)
                for k, v in sample.items():
                    if deeplake.shutdown_event.is_set():
                        sys.exit()
>                   self[k] = v

deeplake/core/dataset/dataset.py:3379: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:633: in __setitem__
    tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
    self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.linked_chunk_engine.LinkedChunkEngine object at 0x7f6602390210>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f66023903d0>])
samples = [<deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6601f89450>, <deeplake.core.linked_sample.LinkedSample object at 0x7f6602497750>]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='images2')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
            (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )
        except Exception as e:
            if cmap is not None:
                key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
                self.meta_cache[key] = cmap
                self._commit_chunk_map = cmap
                self.meta_cache.register_deeplake_object(key, cmap)
>           raise SampleUpdateError(self.name) from e
E           deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor images2.

deeplake/core/chunk_engine.py:1435: SampleUpdateError

During handling of the above exception, another exception occurred:

local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_link', tensors=['images1', 'images2'])
cat_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg'
dog_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg'

    def test_ds_update_link(local_ds, cat_path, dog_path):
        with local_ds as ds:
            ds.create_tensor("images1", htype="link[image]", sample_compression="png")
            ds.create_tensor("images2", htype="link[image]", sample_compression="png")
    
            dog = deeplake.link(dog_path)
            cat = deeplake.link(cat_path)
    
            ds.images1.extend([cat] * 6)
            ds.images2.extend([dog] * 6)
    
            ds[0].update({"images1": dog, "images2": cat})
    
            assert ds[0].images1.shape == (323, 480, 3)
            assert ds[0].images2.shape == (900, 900, 3)
    
            ds[:3].update({"images1": [dog] * 3, "images2": [cat] * 3})
            assert ds[:3].images1.shape == (3, 323, 480, 3)
            assert ds[:3].images2.shape == (3, 900, 900, 3)
    
            with pytest.raises(SampleUpdateError):
>               ds[3:].update(
                    {
                        "images1": [dog] * 3,
                        "images2": [cat] * 2 + [deeplake.link("bad_sample")],
                    }
                )

deeplake/api/tests/test_update_samples.py:632: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:3382: in update
    self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_link', index=Index([slice(3, None, None)]), tensors=['images1', 'images2'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_link', index=Index([slice(3, None, None)]), tensors=['images1', 'images2'])
is_del = False, managed_view = False, has_vds = False, is_view = True

    @wraps(callable)
    def inner(x, *args, **kwargs):
        ds = x if isinstance(x, deeplake.Dataset) else x.dataset
        if not ds.__dict__.get("_allow_view_updates"):
            is_del = callable.__name__ == "delete"
            managed_view = "_view_entry" in ds.__dict__
            has_vds = "_vds" in ds.__dict__
            is_view = not x.index.is_trivial() or has_vds or managed_view
            if is_view and not (is_del and (has_vds or managed_view)):
>               raise InvalidOperationError(
                    callable.__name__,
                    type(x).__name__,
                )
E               deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

deeplake/util/invalid_view_op.py:18: InvalidOperationError

Check failure on line 668 in deeplake/api/tests/test_update_samples.py

github-actions / JUnit Test Report

test_update_samples.test_ds_update_polygon

deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

Raw output


            self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f66022f6d90>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f6601f683d0>])
samples = [array([[[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]],

     ...., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]]), array([[1., 1.],
       [1., 1.],
       [1., 1.]])]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='xyz')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
>           (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )

deeplake/core/chunk_engine.py:1423: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/chunk_engine.py:1693: in _update
    samples = [Polygons(sample, self.tensor_meta.dtype) for sample in samples]  # type: ignore
deeplake/core/chunk_engine.py:1693: in <listcomp>
    samples = [Polygons(sample, self.tensor_meta.dtype) for sample in samples]  # type: ignore
deeplake/core/polygon.py:59: in __init__
    self._validate()
deeplake/core/polygon.py:66: in _validate
    ndim = self[0].ndim
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.polygon.Polygon object at 0x7f6601f68250>

    @property
    def ndim(self):
        """Dimension of the polygon."""
>       return len(self.coords[0])
E       TypeError: object of type 'numpy.float64' has no len()

deeplake/core/polygon.py:22: TypeError

The above exception was the direct cause of the following exception:

self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_polygon', index=Index([slice(3, None, None)]), tensors=['abc', 'xyz'])
sample = {'abc': [array([[[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]]]), array([[[1., 1.],
        [1., 1..., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]]), array([[1., 1.],
       [1., 1.],
       [1., 1.]])]}

    def update(self, sample: Dict[str, Any]):
        """Update existing samples in the dataset with new values.
    
        Examples:
    
            >>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
    
            >>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
            >>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
    
        Args:
            sample (dict): Dictionary with tensor names as keys and samples as values.
    
        Raises:
            ValueError: If partial update of a sample is attempted.
            Exception: Error while attempting to rollback updates.
        """
        if len(self.index) > 1:
            raise ValueError(
                "Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
            )
    
        # remove update hooks from view base so that the view is not invalidated
        if self._view_base:
            saved_update_hooks = self._view_base._update_hooks
            self._view_base._update_hooks = {}
    
        with self:
            try:
                self._commit("Backup before update", None, False)
                for k, v in sample.items():
                    if deeplake.shutdown_event.is_set():
                        sys.exit()
>                   self[k] = v

deeplake/core/dataset/dataset.py:3379: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:633: in __setitem__
    tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
    self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f66022f6d90>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f6601f683d0>])
samples = [array([[[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]],

     ...., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]]), array([[1., 1.],
       [1., 1.],
       [1., 1.]])]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='xyz')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
            (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )
        except Exception as e:
            if cmap is not None:
                key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
                self.meta_cache[key] = cmap
                self._commit_chunk_map = cmap
                self.meta_cache.register_deeplake_object(key, cmap)
>           raise SampleUpdateError(self.name) from e
E           deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor xyz.

deeplake/core/chunk_engine.py:1435: SampleUpdateError

During handling of the above exception, another exception occurred:

local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_polygon', tensors=['abc', 'xyz'])

    def test_ds_update_polygon(local_ds):
        with local_ds as ds:
            ds.create_tensor("abc", htype="polygon", chunk_compression="lz4")
            ds.create_tensor("xyz", htype="polygon", chunk_compression="lz4")
    
            abc_samples = np.ones((6, 3, 3, 2))
            xyz_samples = np.ones((6, 2, 2, 2))
    
            ds.abc.extend(abc_samples)
            ds.xyz.extend(xyz_samples)
    
        ds[0].update({"abc": np.ones((2, 2, 2)), "xyz": np.ones((3, 3, 2))})
        assert ds[0].abc.shape == (2, 2, 2)
        assert ds[0].xyz.shape == (3, 3, 2)
    
        ds[:3].update({"abc": [np.ones((2, 2, 2))] * 3, "xyz": [np.ones((3, 3, 2))] * 3})
        assert ds[:3].abc.shape == (3, 2, 2, 2)
        assert ds[:3].xyz.shape == (3, 3, 3, 2)
    
        with pytest.raises(SampleUpdateError):
>           ds[3:].update(
                {
                    "abc": [np.ones((2, 2, 2))] * 3,
                    "xyz": [np.ones((3, 3, 2))] * 2 + [np.ones((3, 2))],
                }
            )

deeplake/api/tests/test_update_samples.py:668: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:3382: in update
    self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_polygon', index=Index([slice(3, None, None)]), tensors=['abc', 'xyz'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_polygon', index=Index([slice(3, None, None)]), tensors=['abc', 'xyz'])
is_del = False, managed_view = False, has_vds = False, is_view = True

    @wraps(callable)
    def inner(x, *args, **kwargs):
        ds = x if isinstance(x, deeplake.Dataset) else x.dataset
        if not ds.__dict__.get("_allow_view_updates"):
            is_del = callable.__name__ == "delete"
            managed_view = "_view_entry" in ds.__dict__
            has_vds = "_vds" in ds.__dict__
            is_view = not x.index.is_trivial() or has_vds or managed_view
            if is_view and not (is_del and (has_vds or managed_view)):
>               raise InvalidOperationError(
                    callable.__name__,
                    type(x).__name__,
                )
E               deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

deeplake/util/invalid_view_op.py:18: InvalidOperationError

Check failure on line 209 in deeplake/core/tests/test_vdb_indexes.py

github-actions / JUnit Test Report

test_vdb_indexes.test_index_maintenance_delete

Exception: An error occurred while regenerating VDB indexes: request_failed

Raw output


            tensor = Tensor(key='embeddings'), indexes = [4999], index_operation = 2

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
>                   tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )

deeplake/core/index_maintenance.py:194: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Tensor(key='embeddings'), operation_kind = 2, row_ids = [4999]

    def update_vdb_index(
        self,
        operation_kind: int,
        row_ids: List[int] = [],
    ):
        self.storage.check_readonly()
        if self.meta.htype != "embedding":
            raise Exception(f"Only supported for embedding tensors.")
        self.invalidate_libdeeplake_dataset()
        self.dataset.flush()
        from deeplake.enterprise.convert_to_libdeeplake import (
            dataset_to_libdeeplake,
        )
    
        ds = dataset_to_libdeeplake(self.dataset)
        ts = getattr(ds, self.meta.name)
        from deeplake.enterprise.convert_to_libdeeplake import (
            import_indra_api,
        )
    
        api = import_indra_api()
    
        commit_id = self.version_state["commit_id"]
        if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
            try:
                indexes = api.vdb.add_samples_to_index(
                    ts,
                    add_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
            try:
>               indexes = api.vdb.remove_samples_from_index(
                    ts,
                    remove_indices=row_ids,
                )
E               RuntimeError: request_failed

deeplake/core/tensor.py:1568: RuntimeError

During handling of the above exception, another exception occurred:

local_auth_ds_generator = <function local_auth_ds_generator.<locals>.generate_local_auth_ds at 0x7f65d813e340>

    @requires_libdeeplake
    def test_index_maintenance_delete(local_auth_ds_generator):
        ds = local_auth_ds_generator()
        with ds:
            ds.create_tensor(
                "embeddings",
                dtype=np.float32,
                htype="embedding",
                sample_compression=None,
            )
            ds.embeddings.unload_vdb_index_cache()
            arr = np.random.uniform(-1, 1, (5000, 48)).astype("float32")
            ds.embeddings.extend(arr)
            ds.embeddings.create_vdb_index("hnsw_1", distance="cosine_similarity")
            index = ds.embeddings.load_vdb_index("hnsw_1")
            count = 0
            for i in range(len(ds)):
                ret = index.search_knn(ds.embeddings[i].numpy(), 1)
                if i == ret.indices[0]:
                    count += 1
            recall = count / len(ds)
            sample = ds.embeddings[4999].numpy()
>           ds.pop(4999)

deeplake/core/tests/test_vdb_indexes.py:209: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/util/invalid_view_op.py:22: in inner
    return callable(x, *args, **kwargs)
deeplake/core/dataset/dataset.py:4711: in pop
    index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
    _incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

tensor = Tensor(key='embeddings'), indexes = [4999], index_operation = 2

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
                    tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )
        except Exception as e:
>           raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E           Exception: An error occurred while regenerating VDB indexes: request_failed

deeplake/core/index_maintenance.py:199: Exception

Check failure on line 1754 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

github-actions / JUnit Test Report

test_deeplake_vectorstore.test_vdb_index_incr_maint_append_pop

Exception: An error occurred while regenerating VDB indexes: request_failed

Raw output


            tensor = Tensor(key='embedding'), indexes = [2], index_operation = 2

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
>                   tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )

deeplake/core/index_maintenance.py:194: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Tensor(key='embedding'), operation_kind = 2, row_ids = [2]

    def update_vdb_index(
        self,
        operation_kind: int,
        row_ids: List[int] = [],
    ):
        self.storage.check_readonly()
        if self.meta.htype != "embedding":
            raise Exception(f"Only supported for embedding tensors.")
        self.invalidate_libdeeplake_dataset()
        self.dataset.flush()
        from deeplake.enterprise.convert_to_libdeeplake import (
            dataset_to_libdeeplake,
        )
    
        ds = dataset_to_libdeeplake(self.dataset)
        ts = getattr(ds, self.meta.name)
        from deeplake.enterprise.convert_to_libdeeplake import (
            import_indra_api,
        )
    
        api = import_indra_api()
    
        commit_id = self.version_state["commit_id"]
        if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
            try:
                indexes = api.vdb.add_samples_to_index(
                    ts,
                    add_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
            try:
>               indexes = api.vdb.remove_samples_from_index(
                    ts,
                    remove_indices=row_ids,
                )
E               RuntimeError: request_failed

deeplake/core/tensor.py:1568: RuntimeError

During handling of the above exception, another exception occurred:

local_path = './hub_pytest/test_deeplake_vectorstore/test_vdb_index_incr_maint_append_pop'
capsys = <_pytest.capture.CaptureFixture object at 0x7f6601ee9450>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjE5NiwiZXhwIjoxNzA5NTIyMTk2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.76VsQkoBfnHsLLmUaMe8Lul1Hct0vUczdA9OhHlKoXv67yNdTNKtWWjzcwYKvny3_wc01yURAF-6JyFG1JhSiA'

    @requires_libdeeplake
    def test_vdb_index_incr_maint_append_pop(local_path, capsys, hub_cloud_dev_token):
        number_of_data = 103
        texts, embeddings, ids, metadatas, _ = utils.create_data(
            number_of_data=number_of_data, embedding_dim=EMBEDDING_DIM
        )
    
        txt1 = texts[99]
        md1 = metadatas[99]
        ids1 = ids[99]
        emb1 = embeddings[99]
    
        txt2 = texts[100]
        md2 = metadatas[100]
        ids2 = ids[100]
        emb2 = embeddings[100]
    
        txt3 = texts[101]
        md3 = metadatas[101]
        ids3 = ids[101]
        emb3 = embeddings[101]
    
        txt4 = texts[102]
        md4 = metadatas[102]
        ids4 = ids[102]
        emb4 = embeddings[102]
    
        # initialize vector store object with vdb index threshold as 200.
        vector_store = DeepLakeVectorStore(
            path=local_path,
            overwrite=True,
            verbose=True,
            exec_option="compute_engine",
            index_params={"threshold": 2, "distance_metric": "L2"},
            token=hub_cloud_dev_token,
        )
    
        ds = vector_store.dataset_handler.dataset
        ds.append({"embedding": emb1, "text": txt1, "id": ids1, "metadata": md1})
        ds.append({"embedding": emb2, "text": txt2, "id": ids2, "metadata": md2})
        ds.append({"embedding": emb3, "text": txt3, "id": ids3, "metadata": md3})
        ds.append({"embedding": emb4, "text": txt4, "id": ids4, "metadata": md4})
    
        # assert len(vector_store) == number_of_data
        assert set(vector_store.dataset_handler.dataset.tensors) == set(
            [
                "embedding",
                "id",
                "metadata",
                "text",
            ]
        )
        assert set(vector_store.tensors()) == set(
            [
                "embedding",
                "id",
                "metadata",
                "text",
            ]
        )
    
        # Check if the index is recreated properly.
        # ds = vector_store.dataset
        es = ds.embedding.get_vdb_indexes()
        assert len(es) == 1
        assert es[0]["id"] == "hnsw_1"
        assert es[0]["distance"] == "l2_norm"
        assert es[0]["type"] == "hnsw"
    
        # search the embeddings.
        query1 = ds.embedding[1].numpy()
        query2 = ds.embedding[2].numpy()
        query3 = ds.embedding[3].numpy()
    
        s1 = ",".join(str(c) for c in query1)
        view1 = ds.query(
            f"select *  order by cosine_similarity(embedding ,array[{s1}]) DESC limit 1"
        )
        res1 = list(view1.sample_indices)
        assert res1[0] == 1
    
        s2 = ",".join(str(c) for c in query2)
        view2 = ds.query(
            f"select *  order by cosine_similarity(embedding ,array[{s2}]) DESC limit 1"
        )
        res2 = list(view2.sample_indices)
        assert res2[0] == 2
    
        s3 = ",".join(str(c) for c in query3)
        view3 = ds.query(
            f"select *  order by cosine_similarity(embedding ,array[{s3}]) DESC limit 1"
        )
        res3 = list(view3.sample_indices)
        assert res3[0] == 3
    
        with pytest.raises(EmbeddingTensorPopError):
            vector_store.dataset.embedding.pop(2)
            vector_store.dataset.id.pop(2)
            vector_store.dataset.metadata.pop(2)
            vector_store.dataset.text.pop(2)
        with pytest.raises(EmbeddingTensorPopError):
>           vector_store.dataset.pop(2)

deeplake/core/vectorstore/test_deeplake_vectorstore.py:1754: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/util/invalid_view_op.py:22: in inner
    return callable(x, *args, **kwargs)
deeplake/core/dataset/dataset.py:4711: in pop
    index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
    _incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

tensor = Tensor(key='embedding'), indexes = [2], index_operation = 2

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
                    tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )
        except Exception as e:
>           raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E           Exception: An error occurred while regenerating VDB indexes: request_failed

deeplake/core/index_maintenance.py:199: Exception

Check failure on line 1856 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

github-actions / JUnit Test Report

test_deeplake_vectorstore.test_vdb_index_incr_maint_update

Exception: An error occurred while regenerating VDB indexes: request_failed

Raw output


            tensor = Tensor(key='embedding', index=Index([3])), indexes = [3]
index_operation = 3

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
>                   tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )

deeplake/core/index_maintenance.py:194: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Tensor(key='embedding', index=Index([3])), operation_kind = 3
row_ids = [3]

    def update_vdb_index(
        self,
        operation_kind: int,
        row_ids: List[int] = [],
    ):
        self.storage.check_readonly()
        if self.meta.htype != "embedding":
            raise Exception(f"Only supported for embedding tensors.")
        self.invalidate_libdeeplake_dataset()
        self.dataset.flush()
        from deeplake.enterprise.convert_to_libdeeplake import (
            dataset_to_libdeeplake,
        )
    
        ds = dataset_to_libdeeplake(self.dataset)
        ts = getattr(ds, self.meta.name)
        from deeplake.enterprise.convert_to_libdeeplake import (
            import_indra_api,
        )
    
        api = import_indra_api()
    
        commit_id = self.version_state["commit_id"]
        if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
            try:
                indexes = api.vdb.add_samples_to_index(
                    ts,
                    add_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
            try:
                indexes = api.vdb.remove_samples_from_index(
                    ts,
                    remove_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["UPDATE"]:
            try:
>               indexes = api.vdb.update_samples_in_index(
                    ts,
                    update_indices=row_ids,
                )
E               RuntimeError: request_failed

deeplake/core/tensor.py:1581: RuntimeError

During handling of the above exception, another exception occurred:

local_path = './hub_pytest/test_deeplake_vectorstore/test_vdb_index_incr_maint_update'
capsys = <_pytest.capture.CaptureFixture object at 0x7f6601e34f50>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjE5NiwiZXhwIjoxNzA5NTIyMTk2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.76VsQkoBfnHsLLmUaMe8Lul1Hct0vUczdA9OhHlKoXv67yNdTNKtWWjzcwYKvny3_wc01yURAF-6JyFG1JhSiA'

    @requires_libdeeplake
    def test_vdb_index_incr_maint_update(local_path, capsys, hub_cloud_dev_token):
        number_of_data = 105
        texts, embeddings, ids, metadatas, _ = utils.create_data(
            number_of_data=number_of_data, embedding_dim=EMBEDDING_DIM
        )
    
        txt1 = texts[:100]
        md1 = metadatas[:100]
        ids1 = ids[:100]
        emb1 = embeddings[:100]
    
        txt2 = texts[100]
        md2 = metadatas[100]
        ids2 = ids[100]
        emb2 = embeddings[100]
    
        txt3 = texts[101]
        md3 = metadatas[101]
        ids3 = ids[101]
        emb3 = embeddings[101]
    
        txt4 = texts[102]
        md4 = metadatas[102]
        ids4 = ids[102]
        emb4 = embeddings[102]
    
        emb5 = embeddings[103]
        emb6 = embeddings[104]
    
        # initialize vector store object with vdb index threshold as 200.
        vector_store = DeepLakeVectorStore(
            path=local_path,
            overwrite=True,
            verbose=True,
            exec_option="compute_engine",
            index_params={"threshold": 2, "distance_metric": "L2"},
            token=hub_cloud_dev_token,
        )
    
        vector_store.add(embedding=emb1, text=txt1, id=ids1, metadata=md1)
        ds = vector_store.dataset_handler.dataset
        ds.append({"embedding": emb2, "text": txt2, "id": ids2, "metadata": md2})
        ds.append({"embedding": emb3, "text": txt3, "id": ids3, "metadata": md3})
        ds.append({"embedding": emb4, "text": txt4, "id": ids4, "metadata": md4})
    
        # assert len(vector_store) == number_of_data
        assert set(vector_store.dataset_handler.dataset.tensors) == set(
            [
                "embedding",
                "id",
                "metadata",
                "text",
            ]
        )
        assert set(vector_store.tensors()) == set(
            [
                "embedding",
                "id",
                "metadata",
                "text",
            ]
        )
    
        # Check if the index is recreated properly.
        # ds = vector_store.dataset
        es = ds.embedding.get_vdb_indexes()
        assert len(es) == 1
        assert es[0]["id"] == "hnsw_1"
        assert es[0]["distance"] == "l2_norm"
        assert es[0]["type"] == "hnsw"
    
        # search the embeddings.
        query1 = ds.embedding[1].numpy()
        query2 = ds.embedding[2].numpy()
        query3 = ds.embedding[3].numpy()
    
        s1 = ",".join(str(c) for c in query1)
        view1 = ds.query(
            f"select *  order by cosine_similarity(embedding ,array[{s1}]) DESC limit 1"
        )
        res1 = list(view1.sample_indices)
        assert res1[0] == 1
    
        s2 = ",".join(str(c) for c in query2)
        view2 = ds.query(
            f"select *  order by cosine_similarity(embedding ,array[{s2}]) DESC limit 1"
        )
        res2 = list(view2.sample_indices)
        assert res2[0] == 2
    
        s3 = ",".join(str(c) for c in query3)
        view3 = ds.query(
            f"select *  order by cosine_similarity(embedding ,array[{s3}]) DESC limit 1"
        )
        res3 = list(view3.sample_indices)
        assert res3[0] == 3
    
>       ds[3].update({"embedding": emb5})

deeplake/core/vectorstore/test_deeplake_vectorstore.py:1856: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:3387: in update
    index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
    _incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

tensor = Tensor(key='embedding', index=Index([3])), indexes = [3]
index_operation = 3

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
                    tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )
        except Exception as e:
>           raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E           Exception: An error occurred while regenerating VDB indexes: request_failed

deeplake/core/index_maintenance.py:199: Exception

Check failure on line 107 in deeplake/core/vectorstore/dataset_handlers/test_managed_dh.py

github-actions / JUnit Test Report

test_managed_dh.test_managed_vectorstore_should_not_accept_embedding_function_during_search

Failed: Timeout >60.0s

Raw output


            hub_cloud_path = 'hub://testingacc2/tmpdace_test_managed_dh_test_managed_vectorstore_should_not_accept_embedding_function_during_search'
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjE5NiwiZXhwIjoxNzA5NTIyMTk2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.76VsQkoBfnHsLLmUaMe8Lul1Hct0vUczdA9OhHlKoXv67yNdTNKtWWjzcwYKvny3_wc01yURAF-6JyFG1JhSiA'

    def test_managed_vectorstore_should_not_accept_embedding_function_during_search(
        hub_cloud_path, hub_cloud_dev_token
    ):
>       db = utils.create_and_populate_vs(
            path=hub_cloud_path,
            token=hub_cloud_dev_token,
            runtime={"tensor_db": True},
            embedding_dim=100,
        )

deeplake/core/vectorstore/dataset_handlers/test_managed_dh.py:107: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/vector_search/utils.py:713: in create_and_populate_vs
    vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
deeplake/core/vectorstore/deeplake_vectorstore.py:222: in add
    return self.dataset_handler.add(
deeplake/core/vectorstore/dataset_handlers/managed_dataset_handler.py:200: in add
    response = self.client.vectorstore_add(
deeplake/client/managed/managed_client.py:164: in vectorstore_add
    response = self.request(
deeplake/client/client.py:151: in request
    response = requests.request(
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages/requests/api.py:59: in request
    return session.request(method=method, url=url, **kwargs)
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages/requests/sessions.py:589: in request
    resp = self.send(prep, **send_kwargs)
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages/requests/sessions.py:703: in send
    r = adapter.send(request, **kwargs)
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages/requests/adapters.py:486: in send
    resp = conn.urlopen(
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages/urllib3/connectionpool.py:791: in urlopen
    response = self._make_request(
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages/urllib3/connectionpool.py:537: in _make_request
    response = conn.getresponse()
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages/urllib3/connection.py:461: in getresponse
    httplib_response = super().getresponse()
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/http/client.py:1386: in getresponse
    response.begin()
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/http/client.py:325: in begin
    version, status, reason = self._read_status()
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/http/client.py:286: in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/socket.py:706: in readinto
    return self._sock.recv_into(b)
/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/ssl.py:1315: in recv_into
    return self.read(nbytes, buffer)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <ssl.SSLSocket [closed] fd=-1, family=2, type=1, proto=6>, len = 8192
buffer = <memory at 0x7f66009d3640>

    def read(self, len=1024, buffer=None):
        """Read up to LEN bytes and return them.
        Return zero-length string on EOF."""
    
        self._checkClosed()
        if self._sslobj is None:
            raise ValueError("Read on closed or unwrapped SSL socket.")
        try:
            if buffer is not None:
>               return self._sslobj.read(len, buffer)
E               Failed: Timeout >60.0s

/opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/ssl.py:1167: Failed

Check failure on line 803 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py

github-actions / JUnit Test Report

test_deepmemory.test_db_deepmemory_status_should_show_best_model_with_deepmemory_v2_metadata_logic

NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.

Raw output


            self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7f65d1342c50>

    @property
    def dataset(self):
        """Returns the dataset"""
        try:
>           return self.dataset_handler.dataset
E           AttributeError: 'ManagedDH' object has no attribute 'dataset'

deeplake/core/vectorstore/deeplake_vectorstore.py:523: AttributeError

During handling of the above exception, another exception occurred:

capsys = <_pytest.capture.CaptureFixture object at 0x7f6602568810>
corpus_query_pair_path = ('hub://testingacc2/deepmemory_test_corpus_managed_2', 'hub://testingacc2/deepmemory_test_corpus_managed_2_eval_queries')
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjE5NiwiZXhwIjoxNzA5NTIyMTk2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.76VsQkoBfnHsLLmUaMe8Lul1Hct0vUczdA9OhHlKoXv67yNdTNKtWWjzcwYKvny3_wc01yURAF-6JyFG1JhSiA'

    def test_db_deepmemory_status_should_show_best_model_with_deepmemory_v2_metadata_logic(
        capsys,
        corpus_query_pair_path,
        hub_cloud_dev_token,
    ):
        corpus, queries = corpus_query_pair_path
    
        db = VectorStore(
            path=corpus,
            runtime={"tensor_db": True},
            token=hub_cloud_dev_token,
            embedding_function=embedding_fn,
        )
>       db.dataset.embedding.info = {
            "deepmemory": {
                "6581e3056a1162b64061a9a4_0.npy": {
                    "base_recall@10": 0.25,
                    "deep_memory_version": "0.2",
                    "delta": 0.25,
                    "job_id": "6581e3056a1162b64061a9a4_0",
                    "model_type": "npy",
                    "recall@10": 0.5,
                },
                "model.npy": {
                    "base_recall@10": 0.25,
                    "deep_memory_version": "0.2",
                    "delta": 0.25,
                    "job_id": "6581e3056a1162b64061a9a4_0",
                    "model_type": "npy",
                    "recall@10": 0.5,
                },
            }
        }

deeplake/core/vectorstore/deep_memory/test_deepmemory.py:803: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7f65d1342c50>

    @property
    def dataset(self):
        """Returns the dataset"""
        try:
            return self.dataset_handler.dataset
        except AttributeError:
>           raise NotImplementedError(
                "Acessing the dataset is not implemented for managed Vector Store yet."
            )
E           NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.

deeplake/core/vectorstore/deeplake_vectorstore.py:525: NotImplementedError

Check failure on line 846 in deeplake/core/vectorstore/deep_memory/test_deepmemory.py

github-actions / JUnit Test Report

test_deepmemory.test_db_deepmemory_status_should_show_best_model_with_deepmemory_v1_metadata_logic

NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.

Raw output


            self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7f66ceae6f50>

    @property
    def dataset(self):
        """Returns the dataset"""
        try:
>           return self.dataset_handler.dataset
E           AttributeError: 'ManagedDH' object has no attribute 'dataset'

deeplake/core/vectorstore/deeplake_vectorstore.py:523: AttributeError

During handling of the above exception, another exception occurred:

capsys = <_pytest.capture.CaptureFixture object at 0x7f66db379c90>
corpus_query_pair_path = ('hub://testingacc2/deepmemory_test_corpus_managed_2', 'hub://testingacc2/deepmemory_test_corpus_managed_2_eval_queries')
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjE5NiwiZXhwIjoxNzA5NTIyMTk2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.76VsQkoBfnHsLLmUaMe8Lul1Hct0vUczdA9OhHlKoXv67yNdTNKtWWjzcwYKvny3_wc01yURAF-6JyFG1JhSiA'

    def test_db_deepmemory_status_should_show_best_model_with_deepmemory_v1_metadata_logic(
        capsys,
        corpus_query_pair_path,
        hub_cloud_dev_token,
    ):
        corpus, queries = corpus_query_pair_path
    
        db = VectorStore(
            path=corpus,
            runtime={"tensor_db": True},
            token=hub_cloud_dev_token,
            embedding_function=embedding_fn,
        )
>       db.dataset.embedding.info = {
            "deepmemory": {
                "6581e3056a1162b64061a9a4_0.npy": {
                    "base_recall@10": 0.25,
                    "deep_memory_version": "0.2",
                    "delta": 0.25,
                    "job_id": "6581e3056a1162b64061a9a4_0",
                    "model_type": "npy",
                    "recall@10": 0.5,
                },
            },
            "deepmemory/model.npy": {
                "base_recall@10": 0.25,
                "deep_memory_version": "0.2",
                "delta": 0.25,
                "job_id": "6581e3056a1162b64061a9a4_0",
                "model_type": "npy",
                "recall@10": 0.5,
            },
        }

deeplake/core/vectorstore/deep_memory/test_deepmemory.py:846: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7f66ceae6f50>

    @property
    def dataset(self):
        """Returns the dataset"""
        try:
            return self.dataset_handler.dataset
        except AttributeError:
>           raise NotImplementedError(
                "Acessing the dataset is not implemented for managed Vector Store yet."
            )
E           NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.

deeplake/core/vectorstore/deeplake_vectorstore.py:525: NotImplementedError

Check failure on line 448 in deeplake/api/tests/test_update_samples.py

github-actions / JUnit Test Report

test_update_samples.test_ds_update_image

deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

Raw output


            file = 'bad_sample', compression = None

    def read_meta_from_compressed_file(
        file, compression: Optional[str] = None
    ) -> Tuple[str, Tuple[int], str]:
        """Reads shape, dtype and format without decompressing or verifying the sample."""
        path = None
        if isinstance(file, (str, Path)):
            path = str(file)
            try:
>               f = open(file, "rb")
E               FileNotFoundError: [Errno 2] No such file or directory: 'bad_sample'

deeplake/core/compression.py:637: FileNotFoundError

The above exception was the direct cause of the following exception:

self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f1957fdeb90>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f196a844110>])
samples = [Sample(is_lazy=True, path=bad_sample)], operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='images')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
>           (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )

deeplake/core/chunk_engine.py:1423: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/chunk_engine.py:1705: in _update
    self._update_non_tiled_sample(
deeplake/core/chunk_engine.py:1341: in _update_non_tiled_sample
    chunk.update_sample(local_sample_index, sample)
deeplake/core/chunk/uncompressed_chunk.py:265: in update_sample
    serialized_sample, shape = self.serialize_sample(sample, break_into_tiles=False)
deeplake/core/chunk/base_chunk.py:354: in serialize_sample
    incoming_sample, shape = serialize_sample_object(  # type: ignore
deeplake/core/serialize.py:610: in serialize_sample_object
    shape = incoming_sample.shape
deeplake/core/sample.py:161: in shape
    self._read_meta()
deeplake/core/sample.py:203: in _read_meta
    self._compression, self._shape, self._typestr = read_meta_from_compressed_file(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

file = 'bad_sample', compression = None

    def read_meta_from_compressed_file(
        file, compression: Optional[str] = None
    ) -> Tuple[str, Tuple[int], str]:
        """Reads shape, dtype and format without decompressing or verifying the sample."""
        path = None
        if isinstance(file, (str, Path)):
            path = str(file)
            try:
                f = open(file, "rb")
            except FileNotFoundError as e:
>               raise SampleReadError(path) from e
E               deeplake.util.exceptions.SampleReadError: Unable to read sample from bad_sample

deeplake/core/compression.py:639: SampleReadError

The above exception was the direct cause of the following exception:

self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_image', index=Index([2]), tensors=['images', 'images_cc', 'images_sc'])
sample = {'images': Sample(is_lazy=True, path=bad_sample), 'images_cc': Sample(is_lazy=False, shape=(900, 900, 3), compression=... compression='jpeg', dtype='uint8' path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg)}

    def update(self, sample: Dict[str, Any]):
        """Update existing samples in the dataset with new values.
    
        Examples:
    
            >>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
    
            >>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
            >>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
    
        Args:
            sample (dict): Dictionary with tensor names as keys and samples as values.
    
        Raises:
            ValueError: If partial update of a sample is attempted.
            Exception: Error while attempting to rollback updates.
        """
        if len(self.index) > 1:
            raise ValueError(
                "Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
            )
    
        # remove update hooks from view base so that the view is not invalidated
        if self._view_base:
            saved_update_hooks = self._view_base._update_hooks
            self._view_base._update_hooks = {}
    
        with self:
            try:
                self._commit("Backup before update", None, False)
                for k, v in sample.items():
                    if deeplake.shutdown_event.is_set():
                        sys.exit()
>                   self[k] = v

deeplake/core/dataset/dataset.py:3379: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:633: in __setitem__
    tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
    self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f1957fdeb90>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f196a844110>])
samples = [Sample(is_lazy=True, path=bad_sample)], operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='images')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
            (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )
        except Exception as e:
            if cmap is not None:
                key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
                self.meta_cache[key] = cmap
                self._commit_chunk_map = cmap
                self.meta_cache.register_deeplake_object(key, cmap)
>           raise SampleUpdateError(self.name) from e
E           deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor images.

deeplake/core/chunk_engine.py:1435: SampleUpdateError

During handling of the above exception, another exception occurred:

local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_image', tensors=['images', 'images_cc', 'images_sc'])
cat_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg'
dog_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg'

    @pytest.mark.slow
    def test_ds_update_image(local_ds, cat_path, dog_path):
        with local_ds as ds:
            ds.create_tensor("images_sc", htype="image", sample_compression="png")
            ds.create_tensor("images_cc", htype="image", chunk_compression="png")
            ds.create_tensor("images", htype="image", sample_compression=None)
    
        cat = deeplake.read(cat_path)
        dog = deeplake.read(dog_path)
        samples = ([cat] + [dog] * 2) * 2
    
        with ds:
            ds.images_sc.extend(samples)
            ds.images_cc.extend(samples)
            ds.images.extend(samples)
    
        ds[1].update({"images_sc": cat, "images_cc": cat, "images": cat})
    
        with pytest.raises(SampleUpdateError):
>           ds[2].update(
                {"images_sc": cat, "images_cc": cat, "images": deeplake.read("bad_sample")}
            )

deeplake/api/tests/test_update_samples.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:3382: in update
    self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_image', index=Index([2]), tensors=['images', 'images_cc', 'images_sc'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_image', index=Index([2]), tensors=['images', 'images_cc', 'images_sc'])
is_del = False, managed_view = False, has_vds = False, is_view = True

    @wraps(callable)
    def inner(x, *args, **kwargs):
        ds = x if isinstance(x, deeplake.Dataset) else x.dataset
        if not ds.__dict__.get("_allow_view_updates"):
            is_del = callable.__name__ == "delete"
            managed_view = "_view_entry" in ds.__dict__
            has_vds = "_vds" in ds.__dict__
            is_view = not x.index.is_trivial() or has_vds or managed_view
            if is_view and not (is_del and (has_vds or managed_view)):
>               raise InvalidOperationError(
                    callable.__name__,
                    type(x).__name__,
                )
E               deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

deeplake/util/invalid_view_op.py:18: InvalidOperationError

Check failure on line 596 in deeplake/api/tests/test_update_samples.py

github-actions / JUnit Test Report

test_update_samples.test_ds_update_sequence

deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

Raw output


            self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f196ae97bd0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f196ae77cd0>])
samples = [[Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_...cat.jpeg)], [Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg)]]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='seq_image')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
>           (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )

deeplake/core/chunk_engine.py:1423: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/chunk_engine.py:2894: in _sequence_update
    flat_verified_samples: List = self._update(
deeplake/core/chunk_engine.py:1688: in _update
    samples = make_sequence(samples, index_length)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

samples = [Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_l...es/cat.jpeg), Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg)]
index_length = 6

    def make_sequence(
        samples: Union[np.ndarray, Sequence[InputSample], InputSample], index_length: int
    ) -> Sequence[InputSample]:
        """Make `samples` a sequence of `InputSample`s.
    
        Args:
            samples (Union[np.ndarray, Sequence[InputSample]]): Incoming samples to be made into a sequence.
            index_length (int): Number of expected samples in the sequence.
    
        Raises:
            ValueError: If `index_length` is incompatible with the true length of `samples`.
    
        Returns:
            Sequence[InputSample]: Sequence of `InputSample`s with the same length as `index_length`.
        """
    
        if index_length == 1:
            if hasattr(samples, "__len__"):
                if len(samples) != 1:  # type: ignore
                    samples = [samples]
            elif hasattr(samples, "shape"):
                if len(samples.shape) > 0 and samples.shape[0] != 1:  # type: ignore
                    samples = [samples]
            else:
                samples = [samples]
    
        if not hasattr(samples, "__len__"):
            samples = [samples]
    
        if index_length != len(samples):  # type: ignore
>           raise ValueError(
                f"Index length ({index_length}) and length of samples ({len(samples)}) must be equal for updating a tensor."  # type: ignore
            )
E           ValueError: Index length (6) and length of samples (5) must be equal for updating a tensor.

deeplake/util/chunk_engine.py:67: ValueError

The above exception was the direct cause of the following exception:

self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_sequence', index=Index([slice(3, None, None)]), tensors=['seq', 'seq_image'])
sample = {'seq': [[1, 2, 3], [1, 2, 3], [1, 2, 3]], 'seq_image': [[Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplak...at.jpeg)], [Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg)]]}

    def update(self, sample: Dict[str, Any]):
        """Update existing samples in the dataset with new values.
    
        Examples:
    
            >>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
    
            >>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
            >>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
    
        Args:
            sample (dict): Dictionary with tensor names as keys and samples as values.
    
        Raises:
            ValueError: If partial update of a sample is attempted.
            Exception: Error while attempting to rollback updates.
        """
        if len(self.index) > 1:
            raise ValueError(
                "Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
            )
    
        # remove update hooks from view base so that the view is not invalidated
        if self._view_base:
            saved_update_hooks = self._view_base._update_hooks
            self._view_base._update_hooks = {}
    
        with self:
            try:
                self._commit("Backup before update", None, False)
                for k, v in sample.items():
                    if deeplake.shutdown_event.is_set():
                        sys.exit()
>                   self[k] = v

deeplake/core/dataset/dataset.py:3379: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:633: in __setitem__
    tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
    self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f196ae97bd0>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f196ae77cd0>])
samples = [[Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_...cat.jpeg)], [Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg)]]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='seq_image')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
            (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )
        except Exception as e:
            if cmap is not None:
                key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
                self.meta_cache[key] = cmap
                self._commit_chunk_map = cmap
                self.meta_cache.register_deeplake_object(key, cmap)
>           raise SampleUpdateError(self.name) from e
E           deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor seq_image.

deeplake/core/chunk_engine.py:1435: SampleUpdateError

During handling of the above exception, another exception occurred:

local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_sequence', tensors=['seq', 'seq_image'])
cat_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg'
dog_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg'

    @pytest.mark.slow
    def test_ds_update_sequence(local_ds, cat_path, dog_path):
        with local_ds as ds:
            ds.create_tensor("seq", htype="sequence")
            ds.create_tensor("seq_image", htype="sequence[image]", sample_compression="png")
    
            seq_samples = [[1, 2, 3], [4, 5, 6], [4, 5, 6]] * 2
            ds.seq.extend(seq_samples)
    
            dog = deeplake.read(dog_path)
            cat = deeplake.read(cat_path)
            seq_image_samples = [[cat, cat], [dog, dog], [dog, dog]] * 2
            ds.seq_image.extend(seq_image_samples)
    
        ds[1].update({"seq": [1, 2, 3], "seq_image": [cat, cat]})
        np.testing.assert_array_equal(ds[1].seq.numpy(), [[1], [2], [3]])
        assert ds[1].seq_image.shape == (2, 900, 900, 3)
    
        ds[:3].update({"seq": [[1, 2, 3]] * 3, "seq_image": [[cat, cat]] * 3})
        np.testing.assert_array_equal(ds[:3].seq.numpy(), [[[1], [2], [3]]] * 3)
        assert ds[:3].seq_image.shape == (3, 2, 900, 900, 3)
    
        with pytest.raises(SampleUpdateError):
>           ds[3:].update(
                {"seq": [[1, 2, 3]] * 3, "seq_image": [[cat, cat], [cat, cat], [dog]]}
            )

deeplake/api/tests/test_update_samples.py:596: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:3382: in update
    self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_sequence', index=Index([slice(3, None, None)]), tensors=['seq', 'seq_image'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_sequence', index=Index([slice(3, None, None)]), tensors=['seq', 'seq_image'])
is_del = False, managed_view = False, has_vds = False, is_view = True

    @wraps(callable)
    def inner(x, *args, **kwargs):
        ds = x if isinstance(x, deeplake.Dataset) else x.dataset
        if not ds.__dict__.get("_allow_view_updates"):
            is_del = callable.__name__ == "delete"
            managed_view = "_view_entry" in ds.__dict__
            has_vds = "_vds" in ds.__dict__
            is_view = not x.index.is_trivial() or has_vds or managed_view
            if is_view and not (is_del and (has_vds or managed_view)):
>               raise InvalidOperationError(
                    callable.__name__,
                    type(x).__name__,
                )
E               deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

deeplake/util/invalid_view_op.py:18: InvalidOperationError

Check failure on line 708 in deeplake/api/tests/test_update_samples.py

github-actions / JUnit Test Report

test_update_samples.test_ds_update_tiles

deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

Raw output


            file = 'bad_sample', compression = None

    def read_meta_from_compressed_file(
        file, compression: Optional[str] = None
    ) -> Tuple[str, Tuple[int], str]:
        """Reads shape, dtype and format without decompressing or verifying the sample."""
        path = None
        if isinstance(file, (str, Path)):
            path = str(file)
            try:
>               f = open(file, "rb")
E               FileNotFoundError: [Errno 2] No such file or directory: 'bad_sample'

deeplake/core/compression.py:637: FileNotFoundError

The above exception was the direct cause of the following exception:

self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f188f628090>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f188f6281d0>])
samples = [Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_l...=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='images2')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
>           (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )

deeplake/core/chunk_engine.py:1423: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/chunk_engine.py:1701: in _update
    self._update_tiled_sample(
deeplake/core/chunk_engine.py:1294: in _update_tiled_sample
    self._replace_tiled_sample(global_sample_index, sample)
deeplake/core/chunk_engine.py:1279: in _replace_tiled_sample
    new_chunk_ids, tiles = self._samples_to_chunks(
deeplake/core/chunk_engine.py:877: in _samples_to_chunks
    num_samples_added = current_chunk.extend_if_has_space(
deeplake/core/chunk/sample_compressed_chunk.py:26: in extend_if_has_space
    serialized_sample, shape = self.serialize_sample(incoming_sample, compr)
deeplake/core/chunk/base_chunk.py:354: in serialize_sample
    incoming_sample, shape = serialize_sample_object(  # type: ignore
deeplake/core/serialize.py:610: in serialize_sample_object
    shape = incoming_sample.shape
deeplake/core/sample.py:161: in shape
    self._read_meta()
deeplake/core/sample.py:203: in _read_meta
    self._compression, self._shape, self._typestr = read_meta_from_compressed_file(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

file = 'bad_sample', compression = None

    def read_meta_from_compressed_file(
        file, compression: Optional[str] = None
    ) -> Tuple[str, Tuple[int], str]:
        """Reads shape, dtype and format without decompressing or verifying the sample."""
        path = None
        if isinstance(file, (str, Path)):
            path = str(file)
            try:
                f = open(file, "rb")
            except FileNotFoundError as e:
>               raise SampleReadError(path) from e
E               deeplake.util.exceptions.SampleReadError: Unable to read sample from bad_sample

deeplake/core/compression.py:639: SampleReadError

The above exception was the direct cause of the following exception:

self = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_tiles', index=Index([slice(3, None, None)]), tensors=['images1', 'images2'])
sample = {'images1': [Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg),.../home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_lazy=True, path=bad_sample)]}

    def update(self, sample: Dict[str, Any]):
        """Update existing samples in the dataset with new values.
    
        Examples:
    
            >>> ds[0].update({"images": deeplake.read("new_image.png"), "labels": 1})
    
            >>> new_images = [deeplake.read(f"new_image_{i}.png") for i in range(3)]
            >>> ds[:3].update({"images": new_images, "labels": [1, 2, 3]})
    
        Args:
            sample (dict): Dictionary with tensor names as keys and samples as values.
    
        Raises:
            ValueError: If partial update of a sample is attempted.
            Exception: Error while attempting to rollback updates.
        """
        if len(self.index) > 1:
            raise ValueError(
                "Cannot make partial updates to samples using `ds.update`. Use `ds.tensor[index] = value` instead."
            )
    
        # remove update hooks from view base so that the view is not invalidated
        if self._view_base:
            saved_update_hooks = self._view_base._update_hooks
            self._view_base._update_hooks = {}
    
        with self:
            try:
                self._commit("Backup before update", None, False)
                for k, v in sample.items():
                    if deeplake.shutdown_event.is_set():
                        sys.exit()
>                   self[k] = v

deeplake/core/dataset/dataset.py:3379: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:633: in __setitem__
    tensor._update(self.index, value)
deeplake/core/tensor.py:788: in _update
    self.chunk_engine.update(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7f188f628090>
index = Index(values=[<deeplake.core.index.index.IndexEntry object at 0x7f188f6281d0>])
samples = [Sample(is_lazy=True, path=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_l...=/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg), Sample(is_lazy=True, path=bad_sample)]
operator = None
link_callback = <bound method Tensor._update_links of Tensor(key='images2')>

    def update(
        self,
        index: Index,
        samples: Union[np.ndarray, Sequence[InputSample], InputSample],
        operator: Optional[str] = None,
        link_callback: Optional[Callable] = None,
    ):
        """Update data at `index` with `samples`."""
    
        cmap = self.commit_chunk_map
        if cmap is not None:
            cmap = CommitChunkMap.frombuffer(cmap.tobytes())
        try:
            self.check_link_ready()
            (self._sequence_update if self.is_sequence else self._update)(  # type: ignore
                index,
                samples,
                operator,
                link_callback=link_callback,
            )
        except Exception as e:
            if cmap is not None:
                key = get_tensor_commit_chunk_map_key(self.key, self.commit_id)
                self.meta_cache[key] = cmap
                self._commit_chunk_map = cmap
                self.meta_cache.register_deeplake_object(key, cmap)
>           raise SampleUpdateError(self.name) from e
E           deeplake.util.exceptions.SampleUpdateError: Unable to update sample in tensor images2.

deeplake/core/chunk_engine.py:1435: SampleUpdateError

During handling of the above exception, another exception occurred:

local_ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_tiles', tensors=['images1', 'images2'])
cat_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg'
dog_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/dog2.jpg'

    @pytest.mark.slow
    def test_ds_update_tiles(local_ds, cat_path, dog_path):
        with local_ds as ds:
            ds.create_tensor(
                "images1", htype="image", sample_compression="jpg", tiling_threshold=1 * KB
            )
            ds.create_tensor(
                "images2", htype="image", sample_compression="jpg", tiling_threshold=1 * KB
            )
    
            cat = deeplake.read(cat_path)
            dog = deeplake.read(dog_path)
    
            ds.images1.extend([cat] * 6)
            ds.images2.extend([dog] * 6)
    
        ds[0].update({"images1": dog, "images2": cat})
        assert ds[0].images1.shape == (323, 480, 3)
        assert ds[0].images2.shape == (900, 900, 3)
    
        ds[:3].update({"images1": [dog] * 3, "images2": [cat] * 3})
        assert ds[:3].images1.shape == (3, 323, 480, 3)
        assert ds[:3].images2.shape == (3, 900, 900, 3)
    
        with pytest.raises(SampleUpdateError):
>           ds[3:].update(
                {
                    "images1": [dog] * 3,
                    "images2": [cat] * 2 + [deeplake.read("bad_sample")],
                }
            )

deeplake/api/tests/test_update_samples.py:708: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/dataset/dataset.py:3382: in update
    self.reset(verbose=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

x = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_tiles', index=Index([slice(3, None, None)]), tensors=['images1', 'images2'])
args = (), kwargs = {'verbose': False}
ds = Dataset(path='./hub_pytest/test_update_samples/test_ds_update_tiles', index=Index([slice(3, None, None)]), tensors=['images1', 'images2'])
is_del = False, managed_view = False, has_vds = False, is_view = True

    @wraps(callable)
    def inner(x, *args, **kwargs):
        ds = x if isinstance(x, deeplake.Dataset) else x.dataset
        if not ds.__dict__.get("_allow_view_updates"):
            is_del = callable.__name__ == "delete"
            managed_view = "_view_entry" in ds.__dict__
            has_vds = "_vds" in ds.__dict__
            is_view = not x.index.is_trivial() or has_vds or managed_view
            if is_view and not (is_del and (has_vds or managed_view)):
>               raise InvalidOperationError(
                    callable.__name__,
                    type(x).__name__,
                )
E               deeplake.util.exceptions.InvalidOperationError: reset method cannot be called on a Dataset view.

deeplake/util/invalid_view_op.py:18: InvalidOperationError

Check failure on line 1 in deeplake/client/test_client.py

github-actions / JUnit Test Report

test_client.test_deepmemory_delete

failed on setup with "NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet."

Raw output


            self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7f1870fd7c90>

    @property
    def dataset(self):
        """Returns the dataset"""
        try:
>           return self.dataset_handler.dataset
E           AttributeError: 'ManagedDH' object has no attribute 'dataset'

deeplake/core/vectorstore/deeplake_vectorstore.py:523: AttributeError

During handling of the above exception, another exception occurred:

request = <SubRequest 'corpus_query_relevances_copy' for <Function test_deepmemory_delete>>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjkyNiwiZXhwIjoxNzA5NTIyOTI2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.2XGeUNIpCWWJJziZICzU4BvM4U5qi1guCi6GPuVDyGTtQxCjPbk4Vnme0X8JLk8E0QmlD70XKhrQN1zfuQC9-Q'

    @pytest.fixture
    def corpus_query_relevances_copy(request, hub_cloud_dev_token):
        if not is_opt_true(request, HUB_CLOUD_OPT):
            pytest.skip(f"{HUB_CLOUD_OPT} flag not set")
            return
    
        corpus = _get_storage_path(request, HUB_CLOUD)
        query_vs = VectorStore(
            path=f"hub://{HUB_CLOUD_DEV_USERNAME}/deepmemory_test_queries2",
            runtime={"tensor_db": True},
            token=hub_cloud_dev_token,
        )
>       queries = query_vs.dataset.text.data()["value"]

deeplake/tests/path_fixtures.py:487: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.vectorstore.deeplake_vectorstore.VectorStore object at 0x7f1870fd7c90>

    @property
    def dataset(self):
        """Returns the dataset"""
        try:
            return self.dataset_handler.dataset
        except AttributeError:
>           raise NotImplementedError(
                "Acessing the dataset is not implemented for managed Vector Store yet."
            )
E           NotImplementedError: Acessing the dataset is not implemented for managed Vector Store yet.

deeplake/core/vectorstore/deeplake_vectorstore.py:525: NotImplementedError

Check failure on line 611 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

github-actions / JUnit Test Report

test_deeplake_vectorstore.test_index_basic

Exception: An error occurred while regenerating VDB indexes: request_failed

Raw output


            tensor = Tensor(key='embedding'), indexes = [10], index_operation = 2

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
>                   tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )

deeplake/core/index_maintenance.py:194: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Tensor(key='embedding'), operation_kind = 2, row_ids = [10]

    def update_vdb_index(
        self,
        operation_kind: int,
        row_ids: List[int] = [],
    ):
        self.storage.check_readonly()
        if self.meta.htype != "embedding":
            raise Exception(f"Only supported for embedding tensors.")
        self.invalidate_libdeeplake_dataset()
        self.dataset.flush()
        from deeplake.enterprise.convert_to_libdeeplake import (
            dataset_to_libdeeplake,
        )
    
        ds = dataset_to_libdeeplake(self.dataset)
        ts = getattr(ds, self.meta.name)
        from deeplake.enterprise.convert_to_libdeeplake import (
            import_indra_api,
        )
    
        api = import_indra_api()
    
        commit_id = self.version_state["commit_id"]
        if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
            try:
                indexes = api.vdb.add_samples_to_index(
                    ts,
                    add_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
            try:
>               indexes = api.vdb.remove_samples_from_index(
                    ts,
                    remove_indices=row_ids,
                )
E               RuntimeError: request_failed

deeplake/core/tensor.py:1568: RuntimeError

During handling of the above exception, another exception occurred:

local_path = './hub_pytest/test_deeplake_vectorstore/test_index_basic'
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyMjkyNiwiZXhwIjoxNzA5NTIyOTI2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.2XGeUNIpCWWJJziZICzU4BvM4U5qi1guCi6GPuVDyGTtQxCjPbk4Vnme0X8JLk8E0QmlD70XKhrQN1zfuQC9-Q'

    @pytest.mark.slow
    @requires_libdeeplake
    def test_index_basic(local_path, hub_cloud_dev_token):
        # Start by testing behavior without an index
        vector_store = VectorStore(
            path=local_path,
            overwrite=True,
            token=hub_cloud_dev_token,
        )
    
        assert vector_store.dataset_handler.distance_metric_index is None
    
        # Then test behavior when index is added
        vector_store = VectorStore(
            path=local_path, token=hub_cloud_dev_token, index_params={"threshold": 1}
        )
    
        vector_store.add(embedding=embeddings, text=texts, metadata=metadatas)
        es = vector_store.dataset_handler.dataset.embedding.get_vdb_indexes()
    
        assert (
            es[0]["distance"] == METRIC_TO_INDEX_METRIC[DEFAULT_VECTORSTORE_DISTANCE_METRIC]
        )
    
        # Then test behavior when index is added previously and the dataset is reloaded
        vector_store = VectorStore(path=local_path, token=hub_cloud_dev_token)
        es = vector_store.dataset_handler.dataset.embedding.get_vdb_indexes()
    
        assert (
            es[0]["distance"] == METRIC_TO_INDEX_METRIC[DEFAULT_VECTORSTORE_DISTANCE_METRIC]
        )
    
        # Test index with sample updates
        pre_update_index = vector_store.dataset_handler.dataset.embedding.get_vdb_indexes()[
            0
        ]
        vector_store.add(
            embedding=[embeddings[0]], text=[texts[0]], metadata=[metadatas[0]]
        )
        post_update_index = (
            vector_store.dataset_handler.dataset.embedding.get_vdb_indexes()[0]
        )
    
        assert pre_update_index == post_update_index
    
        # Test index with sample deletion
        pre_delete_index = vector_store.dataset_handler.dataset.embedding.get_vdb_indexes()[
            0
        ]
>       vector_store.delete(row_ids=[len(vector_store) - 1])

deeplake/core/vectorstore/test_deeplake_vectorstore.py:611: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/deeplake_vectorstore.py:375: in delete
    return self.dataset_handler.delete(
deeplake/core/vectorstore/dataset_handlers/embedded_dataset_handler.py:310: in delete
    self.dataset.pop(row_ids)
deeplake/util/invalid_view_op.py:22: in inner
    return callable(x, *args, **kwargs)
deeplake/core/dataset/dataset.py:4711: in pop
    index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
    _incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

tensor = Tensor(key='embedding'), indexes = [10], index_operation = 2

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
                    tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )
        except Exception as e:
>           raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E           Exception: An error occurred while regenerating VDB indexes: request_failed

deeplake/core/index_maintenance.py:199: Exception

Check failure on line 943 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

github-actions / JUnit Test Report

test_deeplake_vectorstore.test_update_embedding[embedding_fn3-local_auth_ds-vector_store_hash_ids-None-None-None-None-hub_cloud_dev_token]

Exception: An error occurred while regenerating VDB indexes: request_failed

Raw output


            tensor = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
indexes = [0, 1, 2, 3, 4], index_operation = 3

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
>                   tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )

deeplake/core/index_maintenance.py:194: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
operation_kind = 3, row_ids = [0, 1, 2, 3, 4]

    def update_vdb_index(
        self,
        operation_kind: int,
        row_ids: List[int] = [],
    ):
        self.storage.check_readonly()
        if self.meta.htype != "embedding":
            raise Exception(f"Only supported for embedding tensors.")
        self.invalidate_libdeeplake_dataset()
        self.dataset.flush()
        from deeplake.enterprise.convert_to_libdeeplake import (
            dataset_to_libdeeplake,
        )
    
        ds = dataset_to_libdeeplake(self.dataset)
        ts = getattr(ds, self.meta.name)
        from deeplake.enterprise.convert_to_libdeeplake import (
            import_indra_api,
        )
    
        api = import_indra_api()
    
        commit_id = self.version_state["commit_id"]
        if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
            try:
                indexes = api.vdb.add_samples_to_index(
                    ts,
                    add_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
            try:
                indexes = api.vdb.remove_samples_from_index(
                    ts,
                    remove_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["UPDATE"]:
            try:
>               indexes = api.vdb.update_samples_in_index(
                    ts,
                    update_indices=row_ids,
                )
E               RuntimeError: request_failed

deeplake/core/tensor.py:1581: RuntimeError

During handling of the above exception, another exception occurred:

ds = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_update_embedding-embedding_fn3-local_auth_ds-vector_store_hash_ids-None-None-None-None-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = ['0', '1', '2', '3', '4'], vector_store_row_ids = None
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = None
init_embedding_function = <function embedding_fn3 at 0x7f194287e700>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyNDQwMSwiZXhwIjoxNzA5NTI0NDAxfQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.XdQ6_DC_JPn-ellR43c4Fm1XysTy01hd8zlLAsUShzvo91lcCK_uyt24k-yruEbuxDdz0f3zdU9EiC1OANiv2g'

    @requires_libdeeplake
    @pytest.mark.parametrize(
        "ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
        [
            (
                "local_auth_ds",
                "vector_store_hash_ids",
                None,
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                "vector_store_row_ids",
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                None,
                "vector_store_filter_udf",
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                "vector_store_filters",
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "hub_cloud_ds",
                None,
                None,
                None,
                None,
                "vector_store_query",
                "hub_cloud_dev_token",
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
    @pytest.mark.slow
    @requires_libdeeplake
    def test_update_embedding(
        ds,
        vector_store_hash_ids,
        vector_store_row_ids,
        vector_store_filters,
        vector_store_filter_udf,
        vector_store_query,
        init_embedding_function,
        hub_cloud_dev_token,
    ):
        vector_store_filters = vector_store_filters or vector_store_filter_udf
    
        exec_option = "compute_engine"
        if vector_store_filter_udf:
            exec_option = "python"
    
        embedding_tensor = "embedding"
        embedding_source_tensor = "text"
        # dataset has a single embedding_tensor:
    
        path = ds.path
        vector_store = DeepLakeVectorStore(
            path=path,
            overwrite=True,
            verbose=False,
            exec_option=exec_option,
            embedding_function=init_embedding_function,
            index_params={"threshold": 10},
            token=hub_cloud_dev_token,
        )
    
        # add data to the dataset:
        metadatas[1:6] = [{"a": 1} for _ in range(5)]
        vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
    
        # case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
>       vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
            embedding_tensor=embedding_tensor,
        )

deeplake/core/vectorstore/test_deeplake_vectorstore.py:943: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/deeplake_vectorstore.py:442: in update_embedding
    self.dataset_handler.update_embedding(
deeplake/core/vectorstore/dataset_handlers/embedded_dataset_handler.py:390: in update_embedding
    self.dataset[row_ids].update(embedding_tensor_data)
deeplake/core/dataset/dataset.py:3387: in update
    index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
    _incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

tensor = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
indexes = [0, 1, 2, 3, 4], index_operation = 3

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
                    tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )
        except Exception as e:
>           raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E           Exception: An error occurred while regenerating VDB indexes: request_failed

deeplake/core/index_maintenance.py:199: Exception

Check failure on line 943 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

github-actions / JUnit Test Report

test_deeplake_vectorstore.test_update_embedding[embedding_fn3-local_auth_ds-None-vector_store_row_ids-None-None-None-hub_cloud_dev_token]

Exception: An error occurred while regenerating VDB indexes: request_failed

Raw output


            tensor = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
indexes = [0, 1, 2, 3, 4], index_operation = 3

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
>                   tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )

deeplake/core/index_maintenance.py:194: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
operation_kind = 3, row_ids = [0, 1, 2, 3, 4]

    def update_vdb_index(
        self,
        operation_kind: int,
        row_ids: List[int] = [],
    ):
        self.storage.check_readonly()
        if self.meta.htype != "embedding":
            raise Exception(f"Only supported for embedding tensors.")
        self.invalidate_libdeeplake_dataset()
        self.dataset.flush()
        from deeplake.enterprise.convert_to_libdeeplake import (
            dataset_to_libdeeplake,
        )
    
        ds = dataset_to_libdeeplake(self.dataset)
        ts = getattr(ds, self.meta.name)
        from deeplake.enterprise.convert_to_libdeeplake import (
            import_indra_api,
        )
    
        api = import_indra_api()
    
        commit_id = self.version_state["commit_id"]
        if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
            try:
                indexes = api.vdb.add_samples_to_index(
                    ts,
                    add_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
            try:
                indexes = api.vdb.remove_samples_from_index(
                    ts,
                    remove_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["UPDATE"]:
            try:
>               indexes = api.vdb.update_samples_in_index(
                    ts,
                    update_indices=row_ids,
                )
E               RuntimeError: request_failed

deeplake/core/tensor.py:1581: RuntimeError

During handling of the above exception, another exception occurred:

ds = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_update_embedding-embedding_fn3-local_auth_ds-None-vector_store_row_ids-None-None-None-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = [0, 1, 2, 3, 4]
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = None
init_embedding_function = <function embedding_fn3 at 0x7f194287e700>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyNDQwNiwiZXhwIjoxNzA5NTI0NDA2fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.RR3A5aVK4PwIrKvzAl4eLnlZZqHcE-R6u9UqVdbU5X_5TUyHU5ved5Ofum6mwgzyVT4nQ6dh7VqKF1g42PXG3g'

    @requires_libdeeplake
    @pytest.mark.parametrize(
        "ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
        [
            (
                "local_auth_ds",
                "vector_store_hash_ids",
                None,
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                "vector_store_row_ids",
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                None,
                "vector_store_filter_udf",
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                "vector_store_filters",
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "hub_cloud_ds",
                None,
                None,
                None,
                None,
                "vector_store_query",
                "hub_cloud_dev_token",
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
    @pytest.mark.slow
    @requires_libdeeplake
    def test_update_embedding(
        ds,
        vector_store_hash_ids,
        vector_store_row_ids,
        vector_store_filters,
        vector_store_filter_udf,
        vector_store_query,
        init_embedding_function,
        hub_cloud_dev_token,
    ):
        vector_store_filters = vector_store_filters or vector_store_filter_udf
    
        exec_option = "compute_engine"
        if vector_store_filter_udf:
            exec_option = "python"
    
        embedding_tensor = "embedding"
        embedding_source_tensor = "text"
        # dataset has a single embedding_tensor:
    
        path = ds.path
        vector_store = DeepLakeVectorStore(
            path=path,
            overwrite=True,
            verbose=False,
            exec_option=exec_option,
            embedding_function=init_embedding_function,
            index_params={"threshold": 10},
            token=hub_cloud_dev_token,
        )
    
        # add data to the dataset:
        metadatas[1:6] = [{"a": 1} for _ in range(5)]
        vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
    
        # case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
>       vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
            embedding_tensor=embedding_tensor,
        )

deeplake/core/vectorstore/test_deeplake_vectorstore.py:943: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/deeplake_vectorstore.py:442: in update_embedding
    self.dataset_handler.update_embedding(
deeplake/core/vectorstore/dataset_handlers/embedded_dataset_handler.py:390: in update_embedding
    self.dataset[row_ids].update(embedding_tensor_data)
deeplake/core/dataset/dataset.py:3387: in update
    index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
    _incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

tensor = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
indexes = [0, 1, 2, 3, 4], index_operation = 3

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
                    tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )
        except Exception as e:
>           raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E           Exception: An error occurred while regenerating VDB indexes: request_failed

deeplake/core/index_maintenance.py:199: Exception

Check failure on line 943 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

github-actions / JUnit Test Report

test_deeplake_vectorstore.test_update_embedding[embedding_fn3-local_auth_ds-None-None-None-vector_store_filter_udf-None-hub_cloud_dev_token]

Exception: An error occurred while regenerating VDB indexes: request_failed

Raw output


            tensor = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
indexes = [1, 2, 3, 4, 5], index_operation = 3

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
>                   tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )

deeplake/core/index_maintenance.py:194: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
operation_kind = 3, row_ids = [1, 2, 3, 4, 5]

    def update_vdb_index(
        self,
        operation_kind: int,
        row_ids: List[int] = [],
    ):
        self.storage.check_readonly()
        if self.meta.htype != "embedding":
            raise Exception(f"Only supported for embedding tensors.")
        self.invalidate_libdeeplake_dataset()
        self.dataset.flush()
        from deeplake.enterprise.convert_to_libdeeplake import (
            dataset_to_libdeeplake,
        )
    
        ds = dataset_to_libdeeplake(self.dataset)
        ts = getattr(ds, self.meta.name)
        from deeplake.enterprise.convert_to_libdeeplake import (
            import_indra_api,
        )
    
        api = import_indra_api()
    
        commit_id = self.version_state["commit_id"]
        if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
            try:
                indexes = api.vdb.add_samples_to_index(
                    ts,
                    add_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
            try:
                indexes = api.vdb.remove_samples_from_index(
                    ts,
                    remove_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["UPDATE"]:
            try:
>               indexes = api.vdb.update_samples_in_index(
                    ts,
                    update_indices=row_ids,
                )
E               RuntimeError: request_failed

deeplake/core/tensor.py:1581: RuntimeError

During handling of the above exception, another exception occurred:

ds = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_update_embedding-embedding_fn3-local_auth_ds-None-None-None-vector_store_filter_udf-None-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = None
vector_store_filters = <function vector_store_filter_udf.<locals>.filter_udf at 0x7f18708d79c0>
vector_store_filter_udf = <function vector_store_filter_udf.<locals>.filter_udf at 0x7f18708d79c0>
vector_store_query = None
init_embedding_function = <function embedding_fn3 at 0x7f194287e700>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyNDQxMCwiZXhwIjoxNzA5NTI0NDEwfQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.R6QpGBQ2r6eqGQjDs40KfhojgHV6CPmtd_rZDMIfcTgabV6B4HqL9i4WSnwVH40LHFLk0Czm8ZPpsW4MCW8Jig'

    @requires_libdeeplake
    @pytest.mark.parametrize(
        "ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
        [
            (
                "local_auth_ds",
                "vector_store_hash_ids",
                None,
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                "vector_store_row_ids",
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                None,
                "vector_store_filter_udf",
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                "vector_store_filters",
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "hub_cloud_ds",
                None,
                None,
                None,
                None,
                "vector_store_query",
                "hub_cloud_dev_token",
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
    @pytest.mark.slow
    @requires_libdeeplake
    def test_update_embedding(
        ds,
        vector_store_hash_ids,
        vector_store_row_ids,
        vector_store_filters,
        vector_store_filter_udf,
        vector_store_query,
        init_embedding_function,
        hub_cloud_dev_token,
    ):
        vector_store_filters = vector_store_filters or vector_store_filter_udf
    
        exec_option = "compute_engine"
        if vector_store_filter_udf:
            exec_option = "python"
    
        embedding_tensor = "embedding"
        embedding_source_tensor = "text"
        # dataset has a single embedding_tensor:
    
        path = ds.path
        vector_store = DeepLakeVectorStore(
            path=path,
            overwrite=True,
            verbose=False,
            exec_option=exec_option,
            embedding_function=init_embedding_function,
            index_params={"threshold": 10},
            token=hub_cloud_dev_token,
        )
    
        # add data to the dataset:
        metadatas[1:6] = [{"a": 1} for _ in range(5)]
        vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
    
        # case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
>       vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
            embedding_tensor=embedding_tensor,
        )

deeplake/core/vectorstore/test_deeplake_vectorstore.py:943: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/deeplake_vectorstore.py:442: in update_embedding
    self.dataset_handler.update_embedding(
deeplake/core/vectorstore/dataset_handlers/embedded_dataset_handler.py:390: in update_embedding
    self.dataset[row_ids].update(embedding_tensor_data)
deeplake/core/dataset/dataset.py:3387: in update
    index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
    _incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

tensor = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
indexes = [1, 2, 3, 4, 5], index_operation = 3

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
                    tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )
        except Exception as e:
>           raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E           Exception: An error occurred while regenerating VDB indexes: request_failed

deeplake/core/index_maintenance.py:199: Exception

Check failure on line 943 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

github-actions / JUnit Test Report

test_deeplake_vectorstore.test_update_embedding[embedding_fn3-local_auth_ds-None-None-vector_store_filters-None-None-hub_cloud_dev_token]

Exception: An error occurred while regenerating VDB indexes: request_failed

Raw output


            tensor = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
indexes = [1, 2, 3, 4, 5], index_operation = 3

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
>                   tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )

deeplake/core/index_maintenance.py:194: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
operation_kind = 3, row_ids = [1, 2, 3, 4, 5]

    def update_vdb_index(
        self,
        operation_kind: int,
        row_ids: List[int] = [],
    ):
        self.storage.check_readonly()
        if self.meta.htype != "embedding":
            raise Exception(f"Only supported for embedding tensors.")
        self.invalidate_libdeeplake_dataset()
        self.dataset.flush()
        from deeplake.enterprise.convert_to_libdeeplake import (
            dataset_to_libdeeplake,
        )
    
        ds = dataset_to_libdeeplake(self.dataset)
        ts = getattr(ds, self.meta.name)
        from deeplake.enterprise.convert_to_libdeeplake import (
            import_indra_api,
        )
    
        api = import_indra_api()
    
        commit_id = self.version_state["commit_id"]
        if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
            try:
                indexes = api.vdb.add_samples_to_index(
                    ts,
                    add_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
            try:
                indexes = api.vdb.remove_samples_from_index(
                    ts,
                    remove_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["UPDATE"]:
            try:
>               indexes = api.vdb.update_samples_in_index(
                    ts,
                    update_indices=row_ids,
                )
E               RuntimeError: request_failed

deeplake/core/tensor.py:1581: RuntimeError

During handling of the above exception, another exception occurred:

ds = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_update_embedding-embedding_fn3-local_auth_ds-None-None-vector_store_filters-None-None-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = None
vector_store_filters = {'metadata': {'a': 1}}, vector_store_filter_udf = None
vector_store_query = None
init_embedding_function = <function embedding_fn3 at 0x7f194287e700>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyNDQxNSwiZXhwIjoxNzA5NTI0NDE1fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.mzLgXK4KftVTIDlcJRMZ0sMBncfo5BiEj_LdNG3VaIfa2ecEncU5qrQUI1jrcarHkytgI69d926H7_WAi6naAw'

    @requires_libdeeplake
    @pytest.mark.parametrize(
        "ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
        [
            (
                "local_auth_ds",
                "vector_store_hash_ids",
                None,
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                "vector_store_row_ids",
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                None,
                "vector_store_filter_udf",
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                "vector_store_filters",
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "hub_cloud_ds",
                None,
                None,
                None,
                None,
                "vector_store_query",
                "hub_cloud_dev_token",
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
    @pytest.mark.slow
    @requires_libdeeplake
    def test_update_embedding(
        ds,
        vector_store_hash_ids,
        vector_store_row_ids,
        vector_store_filters,
        vector_store_filter_udf,
        vector_store_query,
        init_embedding_function,
        hub_cloud_dev_token,
    ):
        vector_store_filters = vector_store_filters or vector_store_filter_udf
    
        exec_option = "compute_engine"
        if vector_store_filter_udf:
            exec_option = "python"
    
        embedding_tensor = "embedding"
        embedding_source_tensor = "text"
        # dataset has a single embedding_tensor:
    
        path = ds.path
        vector_store = DeepLakeVectorStore(
            path=path,
            overwrite=True,
            verbose=False,
            exec_option=exec_option,
            embedding_function=init_embedding_function,
            index_params={"threshold": 10},
            token=hub_cloud_dev_token,
        )
    
        # add data to the dataset:
        metadatas[1:6] = [{"a": 1} for _ in range(5)]
        vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
    
        # case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
>       vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
            embedding_tensor=embedding_tensor,
        )

deeplake/core/vectorstore/test_deeplake_vectorstore.py:943: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/deeplake_vectorstore.py:442: in update_embedding
    self.dataset_handler.update_embedding(
deeplake/core/vectorstore/dataset_handlers/embedded_dataset_handler.py:390: in update_embedding
    self.dataset[row_ids].update(embedding_tensor_data)
deeplake/core/dataset/dataset.py:3387: in update
    index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
    _incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

tensor = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
indexes = [1, 2, 3, 4, 5], index_operation = 3

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
                    tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )
        except Exception as e:
>           raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E           Exception: An error occurred while regenerating VDB indexes: request_failed

deeplake/core/index_maintenance.py:199: Exception

Check failure on line 943 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

github-actions / JUnit Test Report

test_deeplake_vectorstore.test_update_embedding[embedding_fn3-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token]

Exception: An error occurred while regenerating VDB indexes: request_failed

Raw output


            tensor = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
indexes = [1, 2, 3, 4, 5], index_operation = 3

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
>                   tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )

deeplake/core/index_maintenance.py:194: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
operation_kind = 3, row_ids = [1, 2, 3, 4, 5]

    def update_vdb_index(
        self,
        operation_kind: int,
        row_ids: List[int] = [],
    ):
        self.storage.check_readonly()
        if self.meta.htype != "embedding":
            raise Exception(f"Only supported for embedding tensors.")
        self.invalidate_libdeeplake_dataset()
        self.dataset.flush()
        from deeplake.enterprise.convert_to_libdeeplake import (
            dataset_to_libdeeplake,
        )
    
        ds = dataset_to_libdeeplake(self.dataset)
        ts = getattr(ds, self.meta.name)
        from deeplake.enterprise.convert_to_libdeeplake import (
            import_indra_api,
        )
    
        api = import_indra_api()
    
        commit_id = self.version_state["commit_id"]
        if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
            try:
                indexes = api.vdb.add_samples_to_index(
                    ts,
                    add_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
            try:
                indexes = api.vdb.remove_samples_from_index(
                    ts,
                    remove_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["UPDATE"]:
            try:
>               indexes = api.vdb.update_samples_in_index(
                    ts,
                    update_indices=row_ids,
                )
E               RuntimeError: request_failed

deeplake/core/tensor.py:1581: RuntimeError

During handling of the above exception, another exception occurred:

ds = Dataset(path='hub://testingacc2/tmp089d_test_deeplake_vectorstore_test_update_embedding-embedding_fn3-hub_cloud_ds-None-None-None-None-vector_store_query-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = None
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = "select * where metadata['a']==1"
init_embedding_function = <function embedding_fn3 at 0x7f194287e700>
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyNDQyMCwiZXhwIjoxNzA5NTI0NDIwfQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.oOkNaNz9us5JhMymDW1wzX2fX8zGciDISCFsQA-DQLZTAwtdA-2bNP67EpqxY2jXeqoDZKc_6AEWTKqCpVdYUA'

    @requires_libdeeplake
    @pytest.mark.parametrize(
        "ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
        [
            (
                "local_auth_ds",
                "vector_store_hash_ids",
                None,
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                "vector_store_row_ids",
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                None,
                "vector_store_filter_udf",
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                "vector_store_filters",
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "hub_cloud_ds",
                None,
                None,
                None,
                None,
                "vector_store_query",
                "hub_cloud_dev_token",
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
    @pytest.mark.slow
    @requires_libdeeplake
    def test_update_embedding(
        ds,
        vector_store_hash_ids,
        vector_store_row_ids,
        vector_store_filters,
        vector_store_filter_udf,
        vector_store_query,
        init_embedding_function,
        hub_cloud_dev_token,
    ):
        vector_store_filters = vector_store_filters or vector_store_filter_udf
    
        exec_option = "compute_engine"
        if vector_store_filter_udf:
            exec_option = "python"
    
        embedding_tensor = "embedding"
        embedding_source_tensor = "text"
        # dataset has a single embedding_tensor:
    
        path = ds.path
        vector_store = DeepLakeVectorStore(
            path=path,
            overwrite=True,
            verbose=False,
            exec_option=exec_option,
            embedding_function=init_embedding_function,
            index_params={"threshold": 10},
            token=hub_cloud_dev_token,
        )
    
        # add data to the dataset:
        metadatas[1:6] = [{"a": 1} for _ in range(5)]
        vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
    
        # case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
>       vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
            embedding_tensor=embedding_tensor,
        )

deeplake/core/vectorstore/test_deeplake_vectorstore.py:943: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/deeplake_vectorstore.py:442: in update_embedding
    self.dataset_handler.update_embedding(
deeplake/core/vectorstore/dataset_handlers/embedded_dataset_handler.py:390: in update_embedding
    self.dataset[row_ids].update(embedding_tensor_data)
deeplake/core/dataset/dataset.py:3387: in update
    index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
    _incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

tensor = Tensor(key='embedding', index=Index([(1, 2, 3, 4, 5)]))
indexes = [1, 2, 3, 4, 5], index_operation = 3

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
                    tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )
        except Exception as e:
>           raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E           Exception: An error occurred while regenerating VDB indexes: request_failed

deeplake/core/index_maintenance.py:199: Exception

Check failure on line 943 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

github-actions / JUnit Test Report

test_deeplake_vectorstore.test_update_embedding[None-local_auth_ds-vector_store_hash_ids-None-None-None-None-hub_cloud_dev_token]

Exception: An error occurred while regenerating VDB indexes: request_failed

Raw output


            tensor = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
indexes = [0, 1, 2, 3, 4], index_operation = 3

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
>                   tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )

deeplake/core/index_maintenance.py:194: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
operation_kind = 3, row_ids = [0, 1, 2, 3, 4]

    def update_vdb_index(
        self,
        operation_kind: int,
        row_ids: List[int] = [],
    ):
        self.storage.check_readonly()
        if self.meta.htype != "embedding":
            raise Exception(f"Only supported for embedding tensors.")
        self.invalidate_libdeeplake_dataset()
        self.dataset.flush()
        from deeplake.enterprise.convert_to_libdeeplake import (
            dataset_to_libdeeplake,
        )
    
        ds = dataset_to_libdeeplake(self.dataset)
        ts = getattr(ds, self.meta.name)
        from deeplake.enterprise.convert_to_libdeeplake import (
            import_indra_api,
        )
    
        api = import_indra_api()
    
        commit_id = self.version_state["commit_id"]
        if operation_kind == _INDEX_OPERATION_MAPPING["ADD"]:
            try:
                indexes = api.vdb.add_samples_to_index(
                    ts,
                    add_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["REMOVE"]:
            try:
                indexes = api.vdb.remove_samples_from_index(
                    ts,
                    remove_indices=row_ids,
                )
                for id, index in indexes:
                    b = index.serialize()
                    commit_id = self.version_state["commit_id"]
                    self.storage[get_tensor_vdb_index_key(self.key, commit_id, id)] = b
                self.storage.flush()
            except:
                raise
        elif operation_kind == _INDEX_OPERATION_MAPPING["UPDATE"]:
            try:
>               indexes = api.vdb.update_samples_in_index(
                    ts,
                    update_indices=row_ids,
                )
E               RuntimeError: request_failed

deeplake/core/tensor.py:1581: RuntimeError

During handling of the above exception, another exception occurred:

ds = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_update_embedding-None-local_auth_ds-vector_store_hash_ids-None-None-None-None-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = ['0', '1', '2', '3', '4'], vector_store_row_ids = None
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = None, init_embedding_function = None
hub_cloud_dev_token = 'eyJhbGciOiJIUzUxMiIsImlhdCI6MTcwNTkyNDQ1NSwiZXhwIjoxNzA5NTI0NDU1fQ.eyJpZCI6InRlc3RpbmdhY2MyIn0.Y5H0zpYR3dJtvmsbnjqYXSVq7dgNnAlhDAcjPhnbhV0o8V-_FkfN3jo577oW9NTKhywTLaUVaU-RNowC9qb_ng'

    @requires_libdeeplake
    @pytest.mark.parametrize(
        "ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
        [
            (
                "local_auth_ds",
                "vector_store_hash_ids",
                None,
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                "vector_store_row_ids",
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                None,
                "vector_store_filter_udf",
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                "vector_store_filters",
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "hub_cloud_ds",
                None,
                None,
                None,
                None,
                "vector_store_query",
                "hub_cloud_dev_token",
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
    @pytest.mark.slow
    @requires_libdeeplake
    def test_update_embedding(
        ds,
        vector_store_hash_ids,
        vector_store_row_ids,
        vector_store_filters,
        vector_store_filter_udf,
        vector_store_query,
        init_embedding_function,
        hub_cloud_dev_token,
    ):
        vector_store_filters = vector_store_filters or vector_store_filter_udf
    
        exec_option = "compute_engine"
        if vector_store_filter_udf:
            exec_option = "python"
    
        embedding_tensor = "embedding"
        embedding_source_tensor = "text"
        # dataset has a single embedding_tensor:
    
        path = ds.path
        vector_store = DeepLakeVectorStore(
            path=path,
            overwrite=True,
            verbose=False,
            exec_option=exec_option,
            embedding_function=init_embedding_function,
            index_params={"threshold": 10},
            token=hub_cloud_dev_token,
        )
    
        # add data to the dataset:
        metadatas[1:6] = [{"a": 1} for _ in range(5)]
        vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
    
        # case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
>       vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
            embedding_tensor=embedding_tensor,
        )

deeplake/core/vectorstore/test_deeplake_vectorstore.py:943: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/deeplake_vectorstore.py:442: in update_embedding
    self.dataset_handler.update_embedding(
deeplake/core/vectorstore/dataset_handlers/embedded_dataset_handler.py:390: in update_embedding
    self.dataset[row_ids].update(embedding_tensor_data)
deeplake/core/dataset/dataset.py:3387: in update
    index_maintenance.index_operation_dataset(
deeplake/core/index_maintenance.py:275: in index_operation_dataset
    _incr_maintenance_vdb_indexes(emb_tensor, rowids, dml_type)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

tensor = Tensor(key='embedding', index=Index([(0, 1, 2, 3, 4)]))
indexes = [0, 1, 2, 3, 4], index_operation = 3

    def _incr_maintenance_vdb_indexes(tensor, indexes, index_operation):
        try:
            is_embedding = tensor.htype == "embedding"
            has_vdb_indexes = hasattr(tensor.meta, "vdb_indexes")
            try:
                vdb_index_ids_present = len(tensor.meta.vdb_indexes) > 0
            except AttributeError:
                vdb_index_ids_present = False
    
            if is_embedding and has_vdb_indexes and vdb_index_ids_present:
                for vdb_index in tensor.meta.vdb_indexes:
                    tensor.update_vdb_index(
                        operation_kind=index_operation,
                        row_ids=indexes,
                    )
        except Exception as e:
>           raise Exception(f"An error occurred while regenerating VDB indexes: {e}")
E           Exception: An error occurred while regenerating VDB indexes: request_failed

deeplake/core/index_maintenance.py:199: Exception

View more details on GitHub Actions

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[ManagedDB] Rest API based Thin client for ManagedService #11878

JUnit Test Report