1
1
from copy import deepcopy
2
2
3
3
import numpy as np
4
+ import pandas as pd
4
5
import pytest
5
6
6
7
import pinecone
@@ -32,12 +33,17 @@ def setup_method(self):
32
33
self .expected_vec2 = Vector (id = 'vec2' , values = self .vals2 , metadata = {})
33
34
self .expected_vec_md1 = Vector (id = 'vec1' , values = self .vals1 , metadata = dict_to_proto_struct (self .md1 ))
34
35
self .expected_vec_md2 = Vector (id = 'vec2' , values = self .vals2 , metadata = dict_to_proto_struct (self .md2 ))
36
+ self .expected_vec_md_sparse1 = Vector (id = 'vec1' , values = self .vals1 , metadata = dict_to_proto_struct (self .md1 ),
37
+ sparse_values = SparseValues (indices = self .sparse_indices_1 , values = self .sparse_values_1 ))
38
+ self .expected_vec_md_sparse2 = Vector (id = 'vec2' , values = self .vals2 , metadata = dict_to_proto_struct (self .md2 ),
39
+ sparse_values = SparseValues (indices = self .sparse_indices_2 , values = self .sparse_values_2 ))
35
40
36
- # region: upsert tests
37
41
38
- def _assert_called_once (self , vectors ):
42
+ # region: upsert tests
43
+
44
+ def _assert_called_once (self , vectors , async_call = False ):
39
45
self .index ._wrap_grpc_call .assert_called_once_with (
40
- self .index .stub .Upsert ,
46
+ self .index .stub .Upsert . future if async_call else self . index . stub . Upsert ,
41
47
UpsertRequest (
42
48
vectors = vectors ,
43
49
namespace = 'ns' ),
@@ -80,12 +86,7 @@ def test_upsert_vectors_upsertInputVectorsSparse(self, mocker):
80
86
Vector (id = 'vec2' , values = self .vals2 , metadata = dict_to_proto_struct (self .md2 ),
81
87
sparse_values = SparseValues (indices = self .sparse_indices_2 , values = self .sparse_values_2 ))],
82
88
namespace = 'ns' )
83
- self ._assert_called_once ([
84
- Vector (id = 'vec1' , values = self .vals1 , metadata = dict_to_proto_struct (self .md1 ),
85
- sparse_values = SparseValues (indices = self .sparse_indices_1 , values = self .sparse_values_1 )),
86
- Vector (id = 'vec2' , values = self .vals2 , metadata = dict_to_proto_struct (self .md2 ),
87
- sparse_values = SparseValues (indices = self .sparse_indices_2 , values = self .sparse_values_2 ))],
88
- )
89
+ self ._assert_called_once ([self .expected_vec_md_sparse1 , self .expected_vec_md_sparse2 ])
89
90
90
91
def test_upsert_dict (self , mocker ):
91
92
mocker .patch .object (self .index , '_wrap_grpc_call' , autospec = True )
@@ -207,20 +208,82 @@ def test_upsert_dict_negative_types_sparse(self, mocker, key, new_val):
207
208
assert 'sparse' in str (e .value )
208
209
assert key in str (e .value )
209
210
211
+ def test_updsert_dataframe (self , mocker ):
212
+ mocker .patch .object (self .index , '_wrap_grpc_call' , autospec = True ,
213
+ side_effect = lambda stub , upsert_request , timeout : MockUpsertDelegate (UpsertResponse (
214
+ upserted_count = len (upsert_request .vectors ))))
215
+ df = pd .DataFrame ([
216
+ {'id' : 'vec1' , 'values' : self .vals1 ,
217
+ 'sparse_values' : {'indices' : self .sparse_indices_1 , 'values' : self .sparse_values_1 },
218
+ 'metadata' : self .md1 },
219
+ {'id' : 'vec2' , 'values' : self .vals2 ,
220
+ 'sparse_values' : {'indices' : self .sparse_indices_2 , 'values' : self .sparse_values_2 },
221
+ 'metadata' : self .md2 }
222
+ ])
223
+ self .index .upsert_from_dataframe (df , namespace = 'ns' )
224
+ self ._assert_called_once ([self .expected_vec_md_sparse1 , self .expected_vec_md_sparse2 ],
225
+ async_call = True
226
+ )
227
+
228
+
229
+ def test_updsert_dataframe_sync (self , mocker ):
230
+ mocker .patch .object (self .index , '_wrap_grpc_call' , autospec = True ,
231
+ side_effect = lambda stub , upsert_request , timeout : UpsertResponse (
232
+ upserted_count = len (upsert_request .vectors )))
233
+ df = pd .DataFrame ([
234
+ {'id' : 'vec1' , 'values' : self .vals1 ,
235
+ 'sparse_values' : {'indices' : self .sparse_indices_1 , 'values' : self .sparse_values_1 },
236
+ 'metadata' : self .md1 },
237
+ {'id' : 'vec2' , 'values' : self .vals2 ,
238
+ 'sparse_values' : {'indices' : self .sparse_indices_2 , 'values' : self .sparse_values_2 },
239
+ 'metadata' : self .md2 }
240
+ ])
241
+ self .index .upsert_from_dataframe (df , namespace = 'ns' , use_async_requests = False )
242
+ self ._assert_called_once ([self .expected_vec_md_sparse1 , self .expected_vec_md_sparse2 ],
243
+ async_call = False
244
+ )
245
+
246
+ def test_upsert_dataframe_negative (self , mocker ):
247
+ mocker .patch .object (self .index , '_wrap_grpc_call' , autospec = True )
248
+ full_dict1 = {'id' : 'vec1' , 'values' : self .vals1 ,
249
+ 'sparse_values' : {'indices' : self .sparse_indices_1 , 'values' : self .sparse_values_1 },
250
+ 'metadata' : self .md1 }
251
+ full_df = pd .DataFrame ([full_dict1 ])
252
+
253
+ # Not a DF
254
+ with pytest .raises (ValueError ):
255
+ self .index .upsert_from_dataframe ([full_dict1 ])
256
+ with pytest .raises (ValueError ):
257
+ self .index .upsert_from_dataframe (full_dict1 )
258
+
259
+ # Missing Cols
260
+ df = full_df .copy ()
261
+ df .drop (columns = ['id' ], inplace = True )
262
+ with pytest .raises (ValueError ):
263
+ self .index .upsert_from_dataframe (df )
264
+
265
+ # Excess cols
266
+ df = full_df .copy ()
267
+ df ['animals' ] = ['dog' ]
268
+ with pytest .raises (ValueError ):
269
+ self .index .upsert_from_dataframe (df )
270
+
271
+ df = full_df .copy ()
272
+ df ['metadat' ] = df ['metadata' ]
273
+ with pytest .raises (ValueError ):
274
+ self .index .upsert_from_dataframe (df )
275
+
210
276
def test_upsert_async_upsertInputVectorsAsync (self , mocker ):
211
277
mocker .patch .object (self .index , '_wrap_grpc_call' , autospec = True )
212
278
self .index .upsert ([self .expected_vec_md1 ,
213
279
self .expected_vec_md2 ],
214
280
namespace = 'ns' ,
215
281
async_req = True )
216
- self .index ._wrap_grpc_call .assert_called_once_with (
217
- self .index .stub .Upsert .future ,
218
- UpsertRequest (
219
- vectors = [
220
- self .expected_vec_md1 ,
221
- self .expected_vec_md2 ],
222
- namespace = 'ns' ),
223
- timeout = None )
282
+ self ._assert_called_once ([
283
+ self .expected_vec_md1 ,
284
+ self .expected_vec_md2 ],
285
+ async_call = True
286
+ )
224
287
225
288
def test_upsert_vectorListIsMultiplyOfBatchSize_vectorsUpsertedInBatches (self , mocker ):
226
289
mocker .patch .object (self .index , '_wrap_grpc_call' , autospec = True ,
@@ -516,3 +579,10 @@ def test_describeIndexStats_callWithFilter_CalledWithFilter(self, mocker):
516
579
)
517
580
518
581
# endregion
582
+
583
+ class MockUpsertDelegate :
584
+ def __init__ (self , upsert_response : UpsertResponse ):
585
+ self .response = upsert_response
586
+
587
+ def result (self , timeout ):
588
+ return self .response
0 commit comments