2
2
# Copyright (c) 2020-2021 Pinecone Systems Inc. All right reserved.
3
3
#
4
4
import logging
5
+ import numbers
6
+ import warnings
5
7
from abc import ABC , abstractmethod
6
8
from functools import wraps
7
9
from typing import NamedTuple , Optional , Dict , Iterable , Union , List , Tuple , Any
25
27
from pinecone .core .grpc .protos .vector_service_pb2_grpc import VectorServiceStub
26
28
from pinecone .core .grpc .retry import RetryOnRpcErrorClientInterceptor , RetryConfig
27
29
from pinecone .core .utils import _generate_request_id , dict_to_proto_struct , fix_tuple_length
28
- from pinecone .core .utils .constants import MAX_MSG_SIZE , REQUEST_ID , CLIENT_VERSION
30
+ from pinecone .core .utils .constants import MAX_MSG_SIZE , REQUEST_ID , CLIENT_VERSION , REQUIRED_VECTOR_FIELDS , \
31
+ OPTIONAL_VECTOR_FIELDS
29
32
from pinecone .exceptions import PineconeException
30
33
31
34
__all__ = ["GRPCIndex" , "GRPCVector" , "GRPCQueryVector" , "GRPCSparseValues" ]
@@ -329,6 +332,44 @@ def upsert(self,
329
332
'To upsert in parallel, please follow: '
330
333
'https://docs.pinecone.io/docs/performance-tuning' )
331
334
335
+ def _dict_to_grpc_vector (item ):
336
+ item_keys = set (item .keys ())
337
+ if not item_keys .issuperset (REQUIRED_VECTOR_FIELDS ):
338
+ raise ValueError (
339
+ f"Vector dictionary is missing required fields: { list (REQUIRED_VECTOR_FIELDS - item_keys )} " )
340
+
341
+ excessive_keys = item_keys - (REQUIRED_VECTOR_FIELDS | OPTIONAL_VECTOR_FIELDS )
342
+ if len (excessive_keys ) > 0 :
343
+ warnings .warn (f"Found excessive keys in the vector dictionary: { list (excessive_keys )} . "
344
+ f"These keys will be ignored. The allowed keys are: { list (REQUIRED_VECTOR_FIELDS | OPTIONAL_VECTOR_FIELDS )} " )
345
+
346
+ sparse_values = None
347
+ if 'sparse_values' in item :
348
+ if not isinstance (item ['sparse_values' ], Mapping ):
349
+ raise ValueError (f"Column `sparse_values` is expected to be a dictionary, found { type (item ['sparse_values' ])} " )
350
+ indices = item ['sparse_values' ].get ('indices' , None )
351
+ values = item ['sparse_values' ].get ('values' , None )
352
+ try :
353
+ sparse_values = GRPCSparseValues (indices = indices , values = values )
354
+ except TypeError as e :
355
+ raise ValueError ("Found unexpected data in column `sparse_values`. "
356
+ "Expected format is `'sparse_values': {'indices': List[int], 'values': List[float]}`."
357
+ ) from e
358
+
359
+ metadata = item .get ('metadata' , None )
360
+ if metadata is not None and not isinstance (metadata , Mapping ):
361
+ raise TypeError (f"Column `metadata` is expected to be a dictionary, found { type (metadata )} " )
362
+
363
+ try :
364
+ return GRPCVector (id = item ['id' ], values = item ['values' ], sparse_values = sparse_values ,
365
+ metadata = dict_to_proto_struct (metadata ))
366
+
367
+ except TypeError as e :
368
+ # No need to raise a dedicated error for `id` - protobuf's error message is clear enough
369
+ if not isinstance (item ['values' ], Iterable ) or not isinstance (item ['values' ][0 ], numbers .Real ):
370
+ raise TypeError (f"Column `values` is expected to be a list of floats" )
371
+ raise
372
+
332
373
def _vector_transform (item ):
333
374
if isinstance (item , GRPCVector ):
334
375
return item
@@ -340,12 +381,7 @@ def _vector_transform(item):
340
381
id , values , metadata = fix_tuple_length (item , 3 )
341
382
return GRPCVector (id = id , values = values , metadata = dict_to_proto_struct (metadata ) or {})
342
383
elif isinstance (item , Mapping ):
343
- sparse_values = None
344
- if 'sparse_values' in item :
345
- indices = item ['sparse_values' ].get ('indices' , None )
346
- values = item ['sparse_values' ].get ('values' , None )
347
- sparse_values = GRPCSparseValues (indices = indices , values = values )
348
- return GRPCVector (id = item ['id' ], values = item ['values' ], sparse_values = sparse_values , metadata = dict_to_proto_struct (item .get ('metadata' , None )))
384
+ return _dict_to_grpc_vector (item )
349
385
raise ValueError (f"Invalid vector value passed: cannot interpret type { type (item )} " )
350
386
351
387
timeout = kwargs .pop ('timeout' , None )
0 commit comments