Skip to content

Commit b7e6dcf

Browse files
Merge pull request pinecone-io#142 from pinecone-io/test_dataframe
Added UT for upsert_dataframe()
2 parents 93546d8 + f704693 commit b7e6dcf

File tree

1 file changed

+87
-17
lines changed

1 file changed

+87
-17
lines changed

tests/unit/test_grpc_index.py

+87-17
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from copy import deepcopy
22

33
import numpy as np
4+
import pandas as pd
45
import pytest
56

67
import pinecone
@@ -32,12 +33,17 @@ def setup_method(self):
3233
self.expected_vec2 = Vector(id='vec2', values=self.vals2, metadata={})
3334
self.expected_vec_md1 = Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1))
3435
self.expected_vec_md2 = Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))
36+
self.expected_vec_md_sparse1 = Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1),
37+
sparse_values=SparseValues(indices=self.sparse_indices_1, values=self.sparse_values_1))
38+
self.expected_vec_md_sparse2 = Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2),
39+
sparse_values=SparseValues(indices=self.sparse_indices_2, values=self.sparse_values_2))
3540

36-
# region: upsert tests
3741

38-
def _assert_called_once(self, vectors):
42+
# region: upsert tests
43+
44+
def _assert_called_once(self, vectors, async_call=False):
3945
self.index._wrap_grpc_call.assert_called_once_with(
40-
self.index.stub.Upsert,
46+
self.index.stub.Upsert.future if async_call else self.index.stub.Upsert,
4147
UpsertRequest(
4248
vectors=vectors,
4349
namespace='ns'),
@@ -80,12 +86,7 @@ def test_upsert_vectors_upsertInputVectorsSparse(self, mocker):
8086
Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2),
8187
sparse_values=SparseValues(indices=self.sparse_indices_2, values=self.sparse_values_2))],
8288
namespace='ns')
83-
self._assert_called_once([
84-
Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1),
85-
sparse_values=SparseValues(indices=self.sparse_indices_1, values=self.sparse_values_1)),
86-
Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2),
87-
sparse_values=SparseValues(indices=self.sparse_indices_2, values=self.sparse_values_2))],
88-
)
89+
self._assert_called_once([self.expected_vec_md_sparse1, self.expected_vec_md_sparse2])
8990

9091
def test_upsert_dict(self, mocker):
9192
mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
@@ -207,20 +208,82 @@ def test_upsert_dict_negative_types_sparse(self, mocker, key, new_val):
207208
assert 'sparse' in str(e.value)
208209
assert key in str(e.value)
209210

211+
def test_updsert_dataframe(self, mocker):
212+
mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True,
213+
side_effect=lambda stub, upsert_request, timeout: MockUpsertDelegate(UpsertResponse(
214+
upserted_count=len(upsert_request.vectors))))
215+
df = pd.DataFrame([
216+
{'id': 'vec1', 'values': self.vals1,
217+
'sparse_values': {'indices': self.sparse_indices_1, 'values': self.sparse_values_1},
218+
'metadata': self.md1},
219+
{'id': 'vec2', 'values': self.vals2,
220+
'sparse_values': {'indices': self.sparse_indices_2, 'values': self.sparse_values_2},
221+
'metadata': self.md2}
222+
])
223+
self.index.upsert_from_dataframe(df, namespace='ns')
224+
self._assert_called_once([self.expected_vec_md_sparse1, self.expected_vec_md_sparse2],
225+
async_call=True
226+
)
227+
228+
229+
def test_updsert_dataframe_sync(self, mocker):
230+
mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True,
231+
side_effect=lambda stub, upsert_request, timeout: UpsertResponse(
232+
upserted_count=len(upsert_request.vectors)))
233+
df = pd.DataFrame([
234+
{'id': 'vec1', 'values': self.vals1,
235+
'sparse_values': {'indices': self.sparse_indices_1, 'values': self.sparse_values_1},
236+
'metadata': self.md1},
237+
{'id': 'vec2', 'values': self.vals2,
238+
'sparse_values': {'indices': self.sparse_indices_2, 'values': self.sparse_values_2},
239+
'metadata': self.md2}
240+
])
241+
self.index.upsert_from_dataframe(df, namespace='ns', use_async_requests=False)
242+
self._assert_called_once([self.expected_vec_md_sparse1, self.expected_vec_md_sparse2],
243+
async_call=False
244+
)
245+
246+
def test_upsert_dataframe_negative(self, mocker):
247+
mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
248+
full_dict1 = {'id': 'vec1', 'values': self.vals1,
249+
'sparse_values': {'indices': self.sparse_indices_1, 'values': self.sparse_values_1},
250+
'metadata': self.md1}
251+
full_df = pd.DataFrame([full_dict1])
252+
253+
# Not a DF
254+
with pytest.raises(ValueError):
255+
self.index.upsert_from_dataframe([full_dict1])
256+
with pytest.raises(ValueError):
257+
self.index.upsert_from_dataframe(full_dict1)
258+
259+
# Missing Cols
260+
df = full_df.copy()
261+
df.drop(columns=['id'], inplace=True)
262+
with pytest.raises(ValueError):
263+
self.index.upsert_from_dataframe(df)
264+
265+
# Excess cols
266+
df = full_df.copy()
267+
df['animals'] = ['dog']
268+
with pytest.raises(ValueError):
269+
self.index.upsert_from_dataframe(df)
270+
271+
df = full_df.copy()
272+
df['metadat'] = df['metadata']
273+
with pytest.raises(ValueError):
274+
self.index.upsert_from_dataframe(df)
275+
210276
def test_upsert_async_upsertInputVectorsAsync(self, mocker):
211277
mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
212278
self.index.upsert([self.expected_vec_md1,
213279
self.expected_vec_md2],
214280
namespace='ns',
215281
async_req=True)
216-
self.index._wrap_grpc_call.assert_called_once_with(
217-
self.index.stub.Upsert.future,
218-
UpsertRequest(
219-
vectors=[
220-
self.expected_vec_md1,
221-
self.expected_vec_md2],
222-
namespace='ns'),
223-
timeout=None)
282+
self._assert_called_once([
283+
self.expected_vec_md1,
284+
self.expected_vec_md2],
285+
async_call=True
286+
)
224287

225288
def test_upsert_vectorListIsMultiplyOfBatchSize_vectorsUpsertedInBatches(self, mocker):
226289
mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True,
@@ -516,3 +579,10 @@ def test_describeIndexStats_callWithFilter_CalledWithFilter(self, mocker):
516579
)
517580

518581
# endregion
582+
583+
class MockUpsertDelegate:
584+
def __init__(self, upsert_response: UpsertResponse):
585+
self.response = upsert_response
586+
587+
def result(self, timeout):
588+
return self.response

0 commit comments

Comments
 (0)