@@ -450,28 +450,45 @@ def _upsert_batch(self,
450
450
request = UpsertRequest (vectors = vectors , ** args_dict )
451
451
return self ._wrap_grpc_call (self .stub .Upsert , request , timeout = timeout , ** kwargs )
452
452
453
- def upsert_dataframe (self ,
453
+ def upsert_from_dataframe (self ,
454
454
df ,
455
- namespase : str = None ,
455
+ namespace : str = None ,
456
456
batch_size : int = 500 ,
457
+ use_async_requests : bool = True ,
457
458
show_progress : bool = True ) -> None :
458
459
"""Upserts a dataframe into the index.
459
460
460
461
Args:
461
462
df: A pandas dataframe with the following columns: id, vector, and metadata.
462
463
namespace: The namespace to upsert into.
463
464
batch_size: The number of rows to upsert in a single batch.
465
+ use_async_requests: Whether to upsert multiple requests at the same time using asynchronous request mechanism.
466
+ Set to `False`
464
467
show_progress: Whether to show a progress bar.
465
468
"""
466
- if find_spec ("pandas" ) is None :
467
- raise ImportError ("pandas not found. Please install pandas to use this method." )
468
-
469
- async_results = [
470
- self .upsert (vectors = chunk , namespace = namespase , async_req = True )
471
- for chunk in tqdm (self ._iter_dataframe (df , batch_size = batch_size ),
472
- total = len (df ) // batch_size , disable = not show_progress )
473
- ]
474
- res = [async_result .result () for async_result in async_results ]
469
+ try :
470
+ import pandas as pd
471
+ except ImportError :
472
+ raise RuntimeError ("The `pandas` package is not installed. Please install pandas to use `upsert_from_dataframe()`" )
473
+
474
+ if not isinstance (df , pd .DataFrame ):
475
+ raise ValueError (f"Only pandas dataframes are supported. Found: { type (df )} " )
476
+
477
+ pbar = tqdm (total = len (df ), disable = not show_progress , desc = "sending upsert requests" )
478
+ results = []
479
+ for chunk in self ._iter_dataframe (df , batch_size = batch_size ):
480
+ res = self .upsert (vectors = chunk , namespace = namespace , async_req = use_async_requests )
481
+ pbar .update (len (chunk ))
482
+ results .append (res )
483
+
484
+ if use_async_requests :
485
+ results = [async_result .result () for async_result in tqdm (results , desc = "collecting async responses" )]
486
+
487
+ upserted_count = 0
488
+ for res in results :
489
+ upserted_count += res .upserted_count
490
+
491
+ return UpsertResponse (upserted_count = upserted_count )
475
492
476
493
@staticmethod
477
494
def _iter_dataframe (df , batch_size ):
0 commit comments