5
5
from abc import ABC , abstractmethod
6
6
from functools import wraps
7
7
from typing import NamedTuple , Optional , Dict , Iterable , Union , List , Tuple , Any
8
+ from collections .abc import Mapping
8
9
9
10
import certifi
10
11
import grpc
@@ -263,7 +264,7 @@ def stub_class(self):
263
264
return VectorServiceStub
264
265
265
266
def upsert (self ,
266
- vectors : Union [List [GRPCVector ], List [Tuple ]],
267
+ vectors : Union [List [GRPCVector ], List [tuple ], List [ dict ]],
267
268
async_req : bool = False ,
268
269
namespace : Optional [str ] = None ,
269
270
batch_size : Optional [int ] = None ,
@@ -274,18 +275,25 @@ def upsert(self,
274
275
If a new value is upserted for an existing vector id, it will overwrite the previous value.
275
276
276
277
Examples:
277
- >>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])],
278
- >>> namespace='ns1', async_req=True)
278
+ >>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}),
279
+ ('id2', [1.0, 2.0, 3.0])
280
+ ],
281
+ namespace='ns1', async_req=True)
282
+ >>> index.upsert([{'id': 'id1', 'values': [1.0, 2.0, 3.0], 'metadata': {'key': 'value'}},
283
+ {'id': 'id2',
284
+ 'values': [1.0, 2.0, 3.0],
285
+ 'sprase_values': {'indices': [1, 8], 'values': [0.2, 0.4]},
286
+ ])
279
287
>>> index.upsert([GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}),
280
- >>> GRPCVector(id='id2', values=[1.0, 2.0, 3.0]),
281
- >>> GRPCVector(id='id3',
282
- >>> values=[1.0, 2.0, 3.0],
283
- >>> sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))])
288
+ GRPCVector(id='id2', values=[1.0, 2.0, 3.0]),
289
+ GRPCVector(id='id3',
290
+ values=[1.0, 2.0, 3.0],
291
+ sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))])
284
292
285
293
Args:
286
294
vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert.
287
295
288
- A vector can be represented by a 1) GRPCVector object or a 2) tuple.
296
+ A vector can be represented by a 1) GRPCVector object, a 2) tuple or 3) a dictionary
289
297
1) if a tuple is used, it must be of the form (id, values, metadata) or (id, values).
290
298
where id is a string, vector is a list of floats, and metadata is a dict.
291
299
Examples: ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])
@@ -299,6 +307,10 @@ def upsert(self,
299
307
values=[1.0, 2.0, 3.0],
300
308
sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))
301
309
310
+ 3) if a dictionary is used, it must be in the form
311
+ {'id': str, 'values': List[float], 'sparse_values': {'indices': List[int], 'values': List[float]},
312
+ 'metadata': dict}
313
+
302
314
Note: the dimension of each vector must match the dimension of the index.
303
315
async_req (bool): If True, the upsert operation will be performed asynchronously.
304
316
Cannot be used with batch_size.
@@ -320,9 +332,20 @@ def upsert(self,
320
332
def _vector_transform (item ):
321
333
if isinstance (item , GRPCVector ):
322
334
return item
323
- if isinstance (item , tuple ):
335
+ elif isinstance (item , tuple ):
336
+ if len (item ) > 3 :
337
+ raise ValueError (f"Found a tuple of length { len (item )} which is not supported. "
338
+ f"Vectors can be represented as tuples either the form (id, values, metadata) or (id, values). "
339
+ f"To pass sparse values please use either dicts or a GRPCVector objects as inputs." )
324
340
id , values , metadata = fix_tuple_length (item , 3 )
325
341
return GRPCVector (id = id , values = values , metadata = dict_to_proto_struct (metadata ) or {})
342
+ elif isinstance (item , Mapping ):
343
+ sparse_values = None
344
+ if 'sparse_values' in item :
345
+ indices = item ['sparse_values' ].get ('indices' , None )
346
+ values = item ['sparse_values' ].get ('values' , None )
347
+ sparse_values = GRPCSparseValues (indices = indices , values = values )
348
+ return GRPCVector (id = item ['id' ], values = item ['values' ], sparse_values = sparse_values , metadata = dict_to_proto_struct (item .get ('metadata' , None )))
326
349
raise ValueError (f"Invalid vector value passed: cannot interpret type { type (item )} " )
327
350
328
351
timeout = kwargs .pop ('timeout' , None )
0 commit comments