Skip to content

Commit f4f4376

Browse files
DF input verification
Verify that the input DF is indeed a pandas dataframe
1 parent f67fe8d commit f4f4376

File tree

2 files changed

+21
-8
lines changed

2 files changed

+21
-8
lines changed

pinecone/core/grpc/index_grpc.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -417,22 +417,30 @@ def _upsert_batch(self,
417417

418418
def upsert_dataframe(self,
419419
df,
420-
namespase: str = None,
420+
namespace: str = None,
421421
batch_size: int = 500,
422+
use_async_requests: bool = True,
422423
show_progress: bool = True) -> None:
423424
"""Upserts a dataframe into the index.
424425
425426
Args:
426427
df: A pandas dataframe with the following columns: id, vector, and metadata.
427428
namespace: The namespace to upsert into.
428429
batch_size: The number of rows to upsert in a single batch.
430+
use_async_requests: Whether to upsert multiple requests at the same time using asynchronous request mechanism.
431+
Set to `False`
429432
show_progress: Whether to show a progress bar.
430433
"""
431-
if find_spec("pandas") is None:
432-
raise ImportError("pandas not found. Please install pandas to use this method.")
434+
try:
435+
import pandas as pd
436+
except ImportError:
437+
raise RuntimeError("The `pandas` package is not installed. Please install pandas to use `upsert_from_dataframe()`")
438+
439+
if not isinstance(df, pd.DataFrame):
440+
raise ValueError(f"Only pandas dataframes are supported. Found: {type(df)}")
433441

434442
async_results = [
435-
self.upsert(vectors=chunk, namespace=namespase, async_req=True)
443+
self.upsert(vectors=chunk, namespace=namespace, async_req=True)
436444
for chunk in tqdm(self._iter_dataframe(df, batch_size=batch_size),
437445
total=len(df) // batch_size, disable=not show_progress)
438446
]

pinecone/index.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def _vector_transform(item: Union[Vector, Tuple]):
195195

196196
def upsert_dataframe(self,
197197
df,
198-
namespase: str = None,
198+
namespace: str = None,
199199
batch_size: int = 500,
200200
show_progress: bool = True) -> None:
201201
"""Upserts a dataframe into the index.
@@ -206,13 +206,18 @@ def upsert_dataframe(self,
206206
batch_size: The number of rows to upsert in a single batch.
207207
show_progress: Whether to show a progress bar.
208208
"""
209-
if find_spec("pandas") is None:
210-
raise ImportError("pandas not found. Please install pandas to use this method.")
209+
try:
210+
import pandas as pd
211+
except ImportError:
212+
raise RuntimeError("The `pandas` package is not installed. Please install pandas to use `upsert_from_dataframe()`")
213+
214+
if not isinstance(df, pd.DataFrame):
215+
raise ValueError(f"Only pandas dataframes are supported. Found: {type(df)}")
211216

212217
pbar = tqdm(total=len(df), disable=not show_progress)
213218
for i in range(0, len(df), batch_size):
214219
batch = df.iloc[i:i + batch_size].to_dict(orient="records")
215-
self.upsert(batch, namespace=namespase)
220+
self.upsert(batch, namespace=namespace)
216221
pbar.update(len(batch))
217222

218223
@validate_and_convert_errors

0 commit comments

Comments
 (0)