From aaa7104e00e07f89891d8bfa1d3dd5d40be20c09 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Fri, 11 Apr 2025 12:34:30 -0400 Subject: [PATCH 01/13] Refactor PluginAware to do lazy loading --- pinecone/control/pinecone.py | 7 +- pinecone/data/features/inference/inference.py | 4 +- pinecone/data/index.py | 6 +- pinecone/utils/plugin_aware.py | 92 +++++++++++++++++-- 4 files changed, 91 insertions(+), 18 deletions(-) diff --git a/pinecone/control/pinecone.py b/pinecone/control/pinecone.py index f3c8f404..17b3d953 100644 --- a/pinecone/control/pinecone.py +++ b/pinecone/control/pinecone.py @@ -43,7 +43,7 @@ """ @private """ -class Pinecone(PineconeDBControlInterface, PluginAware): +class Pinecone(PluginAware, PineconeDBControlInterface): """ A client for interacting with Pinecone's vector database. @@ -107,9 +107,8 @@ def __init__( self.index_host_store = IndexHostStore() """ @private """ - self.load_plugins( - config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads - ) + # Initialize PluginAware first, which will then call PineconeDBControlInterface.__init__ + super().__init__() @property def inference(self): diff --git a/pinecone/data/features/inference/inference.py b/pinecone/data/features/inference/inference.py index 71ada564..9ab34e33 100644 --- a/pinecone/data/features/inference/inference.py +++ b/pinecone/data/features/inference/inference.py @@ -63,9 +63,7 @@ def __init__(self, config, openapi_config, **kwargs) -> None: api_version=API_VERSION, ) - self.load_plugins( - config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads - ) + super().__init__() # Initialize PluginAware def embed( self, diff --git a/pinecone/data/index.py b/pinecone/data/index.py index ebd5cecd..a228bfbe 100644 --- a/pinecone/data/index.py +++ b/pinecone/data/index.py @@ -55,7 +55,7 @@ def parse_query_response(response: QueryResponse): return response -class Index(IndexInterface, ImportFeatureMixin, PluginAware): +class Index(PluginAware, IndexInterface, ImportFeatureMixin): """ A client for interacting with a Pinecone index via REST API. For improved performance, use the Pinecone GRPC index client. @@ -101,10 +101,6 @@ def __init__( # Pass the same api_client to the ImportFeatureMixin super().__init__(api_client=self._api_client) - self.load_plugins( - config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads - ) - def _openapi_kwargs(self, kwargs: Dict[str, Any]) -> Dict[str, Any]: return filter_dict(kwargs, OPENAPI_ENDPOINT_PARAMS) diff --git a/pinecone/utils/plugin_aware.py b/pinecone/utils/plugin_aware.py index ce1e4b87..a99223e2 100644 --- a/pinecone/utils/plugin_aware.py +++ b/pinecone/utils/plugin_aware.py @@ -1,8 +1,8 @@ +from typing import Any from .setup_openapi_client import build_plugin_setup_client from pinecone.config import Config from pinecone.openapi_support.configuration import Configuration as OpenApiConfig - from pinecone_plugin_interface import load_and_install as install_plugins import logging @@ -11,17 +11,97 @@ class PluginAware: + """ + Base class for classes that support plugin loading. + + This class provides functionality to lazily load plugins when they are first accessed. + Subclasses must set the following attributes before calling super().__init__(): + - config: Config + - openapi_config: OpenApiConfig + - pool_threads: int + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + """ + Initialize the PluginAware class. + + Args: + *args: Variable length argument list. + **kwargs: Arbitrary keyword arguments. + + Raises: + AttributeError: If required attributes are not set in the subclass. + """ + logger.debug("PluginAware __init__ called for %s", self.__class__.__name__) + + # Check for required attributes after super().__init__ has been called + missing_attrs = [] + if not hasattr(self, "config"): + missing_attrs.append("config") + if not hasattr(self, "openapi_config"): + missing_attrs.append("openapi_config") + if not hasattr(self, "pool_threads"): + missing_attrs.append("pool_threads") + + if missing_attrs: + raise AttributeError( + f"PluginAware class requires the following attributes: {', '.join(missing_attrs)}. " + f"These must be set in the {self.__class__.__name__} class's __init__ method " + f"before calling super().__init__()." + ) + + self._plugins_loaded = False + """ @private """ + + def __getattr__(self, name: str) -> Any: + """ + Called when an attribute is not found through the normal lookup process. + This allows for lazy loading of plugins when they are first accessed. + + Args: + name: The name of the attribute being accessed. + + Returns: + The requested attribute. + + Raises: + AttributeError: If the attribute cannot be found after loading plugins. + """ + if not self._plugins_loaded: + logger.debug("Loading plugins for %s", self.__class__.__name__) + self.load_plugins( + config=self.config, + openapi_config=self.openapi_config, + pool_threads=self.pool_threads, + ) + self._plugins_loaded = True + try: + return object.__getattribute__(self, name) + except AttributeError: + pass + + raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") + def load_plugins( self, config: Config, openapi_config: OpenApiConfig, pool_threads: int ) -> None: - """@private""" + """ + Load plugins for the parent class. + + Args: + config: The Pinecone configuration. + openapi_config: The OpenAPI configuration. + pool_threads: The number of threads in the pool. + """ try: - # I don't expect this to ever throw, but wrapping this in a - # try block just in case to make sure a bad plugin doesn't - # halt client initialization. + # Build the OpenAPI client for plugin setup openapi_client_builder = build_plugin_setup_client( config=config, openapi_config=openapi_config, pool_threads=pool_threads ) + # Install plugins install_plugins(self, openapi_client_builder) + logger.debug("Plugins loaded successfully for %s", self.__class__.__name__) + except ImportError as e: + logger.warning("Failed to import plugin module: %s", e) except Exception as e: - logger.error(f"Error loading plugins: {e}") + logger.error("Error loading plugins: %s", e, exc_info=True) From b3bc5a47e48876a1e9a478f3d71be230dd61dda4 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Fri, 11 Apr 2025 12:55:16 -0400 Subject: [PATCH 02/13] Fix unit test --- tests/unit/test_control.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_control.py b/tests/unit/test_control.py index c0b909dd..ad3b2872 100644 --- a/tests/unit/test_control.py +++ b/tests/unit/test_control.py @@ -77,9 +77,12 @@ def index_list_response(): class TestControl: - def test_plugins_are_installed(self): + def test_plugins_are_lazily_loaded(self): with patch.object(PluginAware, "load_plugins") as mock_install_plugins: - Pinecone(api_key="asdf") + pc = Pinecone(api_key="asdf") + mock_install_plugins.assert_not_called() + with pytest.raises(AttributeError): + pc.foo() # Accessing a non-existent attribute should raise an AttributeError after PluginAware installs any applicable plugins mock_install_plugins.assert_called_once() def test_default_host(self): From 7b9b383bc9c029ce3be1d292e32eadff4a2efb25 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Mon, 14 Apr 2025 11:13:13 -0400 Subject: [PATCH 03/13] Add unit tests for PluginAware --- pinecone/utils/plugin_aware.py | 35 ++++++++++++++++------- tests/unit/test_plugin_aware.py | 49 +++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 10 deletions(-) create mode 100644 tests/unit/test_plugin_aware.py diff --git a/pinecone/utils/plugin_aware.py b/pinecone/utils/plugin_aware.py index a99223e2..8410397a 100644 --- a/pinecone/utils/plugin_aware.py +++ b/pinecone/utils/plugin_aware.py @@ -34,6 +34,9 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: """ logger.debug("PluginAware __init__ called for %s", self.__class__.__name__) + self._plugins_loaded = False + """ @private """ + # Check for required attributes after super().__init__ has been called missing_attrs = [] if not hasattr(self, "config"): @@ -50,9 +53,6 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: f"before calling super().__init__()." ) - self._plugins_loaded = False - """ @private """ - def __getattr__(self, name: str) -> Any: """ Called when an attribute is not found through the normal lookup process. @@ -67,17 +67,32 @@ def __getattr__(self, name: str) -> Any: Raises: AttributeError: If the attribute cannot be found after loading plugins. """ + # Check if this is one of the required attributes that should be set by subclasses + required_attrs = ["config", "openapi_config", "pool_threads"] + if name in required_attrs: + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute '{name}'. " + f"This attribute must be set in the subclass's __init__ method " + f"before calling super().__init__()." + ) + if not self._plugins_loaded: logger.debug("Loading plugins for %s", self.__class__.__name__) - self.load_plugins( - config=self.config, - openapi_config=self.openapi_config, - pool_threads=self.pool_threads, - ) - self._plugins_loaded = True + # Use object.__getattribute__ to avoid triggering __getattr__ again try: - return object.__getattribute__(self, name) + config = object.__getattribute__(self, "config") + openapi_config = object.__getattribute__(self, "openapi_config") + pool_threads = object.__getattribute__(self, "pool_threads") + self.load_plugins( + config=config, openapi_config=openapi_config, pool_threads=pool_threads + ) + self._plugins_loaded = True + try: + return object.__getattribute__(self, name) + except AttributeError: + pass except AttributeError: + # If we can't get the required attributes, we can't load plugins pass raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") diff --git a/tests/unit/test_plugin_aware.py b/tests/unit/test_plugin_aware.py new file mode 100644 index 00000000..7f4329d1 --- /dev/null +++ b/tests/unit/test_plugin_aware.py @@ -0,0 +1,49 @@ +import pytest +from pinecone.utils.plugin_aware import PluginAware +from pinecone.config import Config +from pinecone.openapi_support.configuration import Configuration as OpenApiConfig + + +class TestPluginAware: + def test_errors_when_required_attributes_are_missing(self): + class Foo(PluginAware): + def __init__(self): + # does not set config, openapi_config, or pool_threads + super().__init__() + + with pytest.raises(AttributeError) as e: + Foo() + + assert "config" in str(e.value) + assert "openapi_config" in str(e.value) + assert "pool_threads" in str(e.value) + + def test_correctly_raise_attribute_errors(self): + class Foo(PluginAware): + def __init__(self): + self.config = Config() + self.openapi_config = OpenApiConfig() + self.pool_threads = 1 + + super().__init__() + + foo = Foo() + + with pytest.raises(AttributeError) as e: + foo.bar() + + assert "bar" in str(e.value) + + def test_plugins_are_lazily_loaded(self): + class Pinecone(PluginAware): + def __init__(self): + self.config = Config() + self.openapi_config = OpenApiConfig() + self.pool_threads = 10 + + super().__init__() + + pc = Pinecone() + assert "assistant" not in dir(pc) + + assert pc.assistant is not None From 79c73a851a8a8691602977017d294faddfa753e3 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Mon, 14 Apr 2025 11:32:28 -0400 Subject: [PATCH 04/13] Add assistant plugin to dev deps --- poetry.lock | 25 ++++++++++++++++++++----- pyproject.toml | 1 + 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 427dc1e2..fb037257 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1001,13 +1001,13 @@ files = [ [[package]] name = "packaging" -version = "23.2" +version = "24.2" description = "Core utilities for Python packages" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, - {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] [[package]] @@ -1130,6 +1130,21 @@ pygments = ">=2.12.0" [package.extras] dev = ["hypothesis", "mypy", "pdoc-pyo3-sample-library (==1.0.11)", "pygments (>=2.14.0)", "pytest", "pytest-cov", "pytest-timeout", "ruff", "tox", "types-pygments"] +[[package]] +name = "pinecone-plugin-assistant" +version = "1.6.0" +description = "Assistant plugin for Pinecone SDK" +optional = false +python-versions = "<4.0,>=3.9" +files = [ + {file = "pinecone_plugin_assistant-1.6.0-py3-none-any.whl", hash = "sha256:d742273d136fba66d020f1af01af2c6bfbc802f7ff9ddf46c590b7ea26932175"}, + {file = "pinecone_plugin_assistant-1.6.0.tar.gz", hash = "sha256:b7c531743f87269ba567dd6084b1464b62636a011564d414bc53147571b2f2c1"}, +] + +[package.dependencies] +packaging = ">=24.2,<25.0" +requests = ">=2.32.3,<3.0.0" + [[package]] name = "pinecone-plugin-interface" version = "0.0.7" @@ -1899,4 +1914,4 @@ grpc = ["googleapis-common-protos", "grpcio", "grpcio", "grpcio", "lz4", "protob [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "8a10046c5826a9773836e6b3ee50271bb0077d0faf32d709f1e65c4bb1fc53ea" +content-hash = "6e2107c224f622bcd0492b87d8a92f36318d9487af485e766b0e944e378e083a" diff --git a/pyproject.toml b/pyproject.toml index 0525d08d..ff491308 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,7 @@ urllib3_mock = "0.3.3" responses = ">=0.8.1" ruff = "^0.9.3" beautifulsoup4 = "^4.13.3" +pinecone-plugin-assistant = "^1.6.0" [tool.poetry.extras] From 7933e801fc7f0d5f2b926fd71cdf0d5af8e5fbd0 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Tue, 8 Apr 2025 10:23:05 -0400 Subject: [PATCH 05/13] Refactoring --- pinecone/control/db_control.py | 54 ++++ pinecone/control/db_control_asyncio.py | 55 ++++ ...erface.py => legacy_pinecone_interface.py} | 86 +++--- pinecone/control/pinecone.py | 250 +++++++----------- pinecone/control/pinecone_asyncio.py | 216 ++++++--------- .../control/pinecone_interface_asyncio.py | 89 ++++--- pinecone/control/resources/__init__.py | 2 + pinecone/control/resources/collection.py | 27 ++ pinecone/control/resources/index.py | 185 +++++++++++++ .../control/resources_asyncio/collection.py | 32 +++ pinecone/control/resources_asyncio/index.py | 173 ++++++++++++ poetry.lock | 50 +++- pyproject.toml | 2 + 13 files changed, 837 insertions(+), 384 deletions(-) create mode 100644 pinecone/control/db_control.py create mode 100644 pinecone/control/db_control_asyncio.py rename pinecone/control/{pinecone_interface.py => legacy_pinecone_interface.py} (95%) create mode 100644 pinecone/control/resources/__init__.py create mode 100644 pinecone/control/resources/collection.py create mode 100644 pinecone/control/resources/index.py create mode 100644 pinecone/control/resources_asyncio/collection.py create mode 100644 pinecone/control/resources_asyncio/index.py diff --git a/pinecone/control/db_control.py b/pinecone/control/db_control.py new file mode 100644 index 00000000..ca9f54d5 --- /dev/null +++ b/pinecone/control/db_control.py @@ -0,0 +1,54 @@ +import logging +from typing import Optional, TYPE_CHECKING + +from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi +from pinecone.openapi_support.api_client import ApiClient + +from pinecone.utils import setup_openapi_client +from pinecone.core.openapi.db_control import API_VERSION + + +logger = logging.getLogger(__name__) +""" @private """ + +if TYPE_CHECKING: + from .resources.index import IndexResource + from .resources.collection import CollectionResource + + +class DBControl: + def __init__(self, config, openapi_config, pool_threads): + self.config = config + """ @private """ + + self.index_api = setup_openapi_client( + api_client_klass=ApiClient, + api_klass=ManageIndexesApi, + config=self.config, + openapi_config=self.openapi_config, + pool_threads=pool_threads, + api_version=API_VERSION, + ) + """ @private """ + + self._index_resource: Optional["IndexResource"] = None + """ @private """ + + self._collection_resource: Optional["CollectionResource"] = None + """ @private """ + + @property + def index(self) -> "IndexResource": + if self._index_resource is None: + from .resources.index import IndexResource + + self._index_resource = IndexResource(index_api=self.index_api, config=self.config) + return self._index_resource + + @property + def collection(self) -> "CollectionResource": + if self._collection_resource is None: + from .resources.collection import CollectionResource + + self._collection_resource = CollectionResource(self.index_api) + return self._collection_resource diff --git a/pinecone/control/db_control_asyncio.py b/pinecone/control/db_control_asyncio.py new file mode 100644 index 00000000..3f10df6b --- /dev/null +++ b/pinecone/control/db_control_asyncio.py @@ -0,0 +1,55 @@ +import logging +from typing import Optional, TYPE_CHECKING + +from pinecone.core.openapi.db_control.api.manage_indexes_api import AsyncioManageIndexesApi +from pinecone.openapi_support import AsyncioApiClient + +from pinecone.utils import setup_async_openapi_client +from pinecone.core.openapi.db_control import API_VERSION + +logger = logging.getLogger(__name__) +""" @private """ + + +if TYPE_CHECKING: + from .resources_asyncio.index import IndexResourceAsyncio + from .resources_asyncio.collection import CollectionResourceAsyncio + + +class DBControlAsyncio: + def __init__(self, config, openapi_config, pool_threads): + self.config = config + """ @private """ + + self.index_api = setup_async_openapi_client( + api_client_klass=AsyncioApiClient, + api_klass=AsyncioManageIndexesApi, + config=self.config, + openapi_config=self.openapi_config, + api_version=API_VERSION, + ) + """ @private """ + + self._index_resource: Optional["IndexResourceAsyncio"] = None + """ @private """ + + self._collection_resource: Optional["CollectionResourceAsyncio"] = None + """ @private """ + + @property + def index(self) -> "IndexResourceAsyncio": + if self._index_resource is None: + from .resources_asyncio.index import IndexResourceAsyncio + + self._index_resource = IndexResourceAsyncio( + index_api=self.index_api, config=self.config + ) + return self._index_resource + + @property + def collection(self) -> "CollectionResourceAsyncio": + if self._collection_resource is None: + from .resources_asyncio.collection import CollectionResourceAsyncio + + self._collection_resource = CollectionResourceAsyncio(self.index_api) + return self._collection_resource diff --git a/pinecone/control/pinecone_interface.py b/pinecone/control/legacy_pinecone_interface.py similarity index 95% rename from pinecone/control/pinecone_interface.py rename to pinecone/control/legacy_pinecone_interface.py index c183e611..a6a7779a 100644 --- a/pinecone/control/pinecone_interface.py +++ b/pinecone/control/legacy_pinecone_interface.py @@ -1,30 +1,30 @@ from abc import ABC, abstractmethod -from typing import Optional, Dict, Union - - -from pinecone.models import ( - ServerlessSpec, - PodSpec, - IndexList, - CollectionList, - IndexModel, - IndexEmbed, -) -from pinecone.enums import ( - Metric, - VectorType, - DeletionProtection, - PodType, - CloudProvider, - AwsRegion, - GcpRegion, - AzureRegion, -) -from .types import CreateIndexForModelEmbedTypedDict - - -class PineconeDBControlInterface(ABC): +from typing import Optional, Dict, Union, TYPE_CHECKING + +if TYPE_CHECKING: + from pinecone.models import ( + ServerlessSpec, + PodSpec, + IndexList, + CollectionList, + IndexModel, + IndexEmbed, + ) + from pinecone.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, + ) + from .types import CreateIndexForModelEmbedTypedDict + + +class LegacyPineconeDBControlInterface(ABC): @abstractmethod def __init__( self, @@ -190,14 +190,16 @@ def __init__( def create_index( self, name: str, - spec: Union[Dict, ServerlessSpec, PodSpec], + spec: Union[Dict, "ServerlessSpec", "PodSpec"], dimension: Optional[int], - metric: Optional[Union[Metric, str]] = Metric.COSINE, + metric: Optional[Union["Metric", str]] = "Metric.COSINE", timeout: Optional[int] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", + vector_type: Optional[Union["VectorType", str]] = "VectorType.DENSE", tags: Optional[Dict[str, str]] = None, - ) -> IndexModel: + ) -> "IndexModel": """Creates a Pinecone index. :param name: The name of the index to create. Must be unique within your project and @@ -299,13 +301,15 @@ def create_index( def create_index_for_model( self, name: str, - cloud: Union[CloudProvider, str], - region: Union[AwsRegion, GcpRegion, AzureRegion, str], - embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + cloud: Union["CloudProvider", str], + region: Union["AwsRegion", "GcpRegion", "AzureRegion", str], + embed: Union["IndexEmbed", "CreateIndexForModelEmbedTypedDict"], tags: Optional[Dict[str, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", timeout: Optional[int] = None, - ) -> IndexModel: + ) -> "IndexModel": """ :param name: The name of the index to create. Must be unique within your project and cannot be changed once created. Allowed characters are lowercase letters, numbers, @@ -414,7 +418,7 @@ def delete_index(self, name: str, timeout: Optional[int] = None): pass @abstractmethod - def list_indexes(self) -> IndexList: + def list_indexes(self) -> "IndexList": """ :return: Returns an `IndexList` object, which is iterable and contains a list of `IndexModel` objects. The `IndexList` also has a convenience method `names()` @@ -447,7 +451,7 @@ def list_indexes(self) -> IndexList: pass @abstractmethod - def describe_index(self, name: str) -> IndexModel: + def describe_index(self, name: str) -> "IndexModel": """ :param name: the name of the index to describe. :return: Returns an `IndexModel` object @@ -534,8 +538,8 @@ def configure_index( self, name: str, replicas: Optional[int] = None, - pod_type: Optional[Union[PodType, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = None, + pod_type: Optional[Union["PodType", str]] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = None, tags: Optional[Dict[str, str]] = None, ): """ @@ -622,7 +626,7 @@ def configure_index( pass @abstractmethod - def create_collection(self, name: str, source: str): + def create_collection(self, name: str, source: str) -> None: """Create a collection from a pod-based index :param name: Name of the collection @@ -631,7 +635,7 @@ def create_collection(self, name: str, source: str): pass @abstractmethod - def list_collections(self) -> CollectionList: + def list_collections(self) -> "CollectionList": """List all collections ```python diff --git a/pinecone/control/pinecone.py b/pinecone/control/pinecone.py index 17b3d953..50937555 100644 --- a/pinecone/control/pinecone.py +++ b/pinecone/control/pinecone.py @@ -1,49 +1,47 @@ -import time import logging -from typing import Optional, Dict, Union +from typing import Optional, Dict, Union, TYPE_CHECKING from multiprocessing import cpu_count -from .index_host_store import IndexHostStore -from .pinecone_interface import PineconeDBControlInterface +from .legacy_pinecone_interface import LegacyPineconeDBControlInterface from pinecone.config import PineconeConfig, ConfigBuilder -from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi -from pinecone.openapi_support.api_client import ApiClient - - -from pinecone.utils import normalize_host, setup_openapi_client, PluginAware -from pinecone.core.openapi.db_control import API_VERSION -from pinecone.models import ( - ServerlessSpec, - PodSpec, - IndexModel, - IndexList, - CollectionList, - IndexEmbed, -) +from pinecone.utils import normalize_host, PluginAware from .langchain_import_warnings import _build_langchain_attribute_error_message from pinecone.utils import docslinks -from pinecone.data import _Index, _Inference, _IndexAsyncio - -from pinecone.enums import ( - Metric, - VectorType, - DeletionProtection, - PodType, - CloudProvider, - AwsRegion, - GcpRegion, - AzureRegion, -) -from .types import CreateIndexForModelEmbedTypedDict -from .request_factory import PineconeDBControlRequestFactory logger = logging.getLogger(__name__) """ @private """ - -class Pinecone(PluginAware, PineconeDBControlInterface): +if TYPE_CHECKING: + from .db_control import DBControl + from pinecone.data import ( + _Index as Index, + _Inference as Inference, + _IndexAsyncio as IndexAsyncio, + ) + from pinecone.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, + ) + from pinecone.models import ( + ServerlessSpec, + PodSpec, + IndexModel, + IndexList, + CollectionList, + IndexEmbed, + ) + from .types import CreateIndexForModelEmbedTypedDict + + +class Pinecone(PluginAware, LegacyPineconeDBControlInterface): """ A client for interacting with Pinecone's vector database. @@ -91,196 +89,126 @@ def __init__( self.pool_threads = pool_threads """ @private """ - self._inference = None # Lazy initialization - """ @private """ - - self.index_api = setup_openapi_client( - api_client_klass=ApiClient, - api_klass=ManageIndexesApi, - config=self.config, - openapi_config=self.openapi_config, - pool_threads=pool_threads, - api_version=API_VERSION, - ) + self._inference: Optional["Inference"] = None # Lazy initialization """ @private """ - self.index_host_store = IndexHostStore() + self._db_control: Optional["DBControl"] = None # Lazy initialization """ @private """ # Initialize PluginAware first, which will then call PineconeDBControlInterface.__init__ super().__init__() @property - def inference(self): + def inference(self) -> "Inference": """ Inference is a namespace where an instance of the `pinecone.data.features.inference.inference.Inference` class is lazily created and cached. """ if self._inference is None: + from pinecone.data import _Inference + self._inference = _Inference(config=self.config, openapi_config=self.openapi_config) return self._inference + @property + def db(self) -> "DBControl": + """ + DBControl is a namespace where an instance of the `pinecone.control.db_control.DBControl` class is lazily created and cached. + """ + if self._db_control is None: + from .db_control import DBControl + + self._db_control = DBControl( + config=self.config, + openapi_config=self.openapi_config, + pool_threads=self.pool_threads, + ) + return self._db_control + def create_index( self, name: str, - spec: Union[Dict, ServerlessSpec, PodSpec], + spec: Union[Dict, "ServerlessSpec", "PodSpec"], dimension: Optional[int] = None, - metric: Optional[Union[Metric, str]] = Metric.COSINE, + metric: Optional[Union["Metric", str]] = "Metric.COSINE", timeout: Optional[int] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", + vector_type: Optional[Union["VectorType", str]] = "VectorType.DENSE", tags: Optional[Dict[str, str]] = None, - ) -> IndexModel: - req = PineconeDBControlRequestFactory.create_index_request( + ) -> "IndexModel": + return self.db.index.create( name=name, spec=spec, dimension=dimension, metric=metric, + timeout=timeout, deletion_protection=deletion_protection, vector_type=vector_type, tags=tags, ) - resp = self.index_api.create_index(create_index_request=req) - - if timeout == -1: - return IndexModel(resp) - return self.__poll_describe_index_until_ready(name, timeout) def create_index_for_model( self, name: str, - cloud: Union[CloudProvider, str], - region: Union[AwsRegion, GcpRegion, AzureRegion, str], - embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + cloud: Union["CloudProvider", str], + region: Union["AwsRegion", "GcpRegion", "AzureRegion", str], + embed: Union["IndexEmbed", "CreateIndexForModelEmbedTypedDict"], tags: Optional[Dict[str, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", timeout: Optional[int] = None, - ) -> IndexModel: - req = PineconeDBControlRequestFactory.create_index_for_model_request( + ) -> "IndexModel": + return self.db.index.create_for_model( name=name, cloud=cloud, region=region, embed=embed, tags=tags, deletion_protection=deletion_protection, + timeout=timeout, ) - resp = self.index_api.create_index_for_model(req) - - if timeout == -1: - return IndexModel(resp) - return self.__poll_describe_index_until_ready(name, timeout) - - def __poll_describe_index_until_ready(self, name: str, timeout: Optional[int] = None): - description = None - - def is_ready() -> bool: - nonlocal description - description = self.describe_index(name=name) - return description.status.ready - - total_wait_time = 0 - if timeout is None: - # Wait indefinitely - while not is_ready(): - logger.debug( - f"Waiting for index {name} to be ready. Total wait time {total_wait_time} seconds." - ) - total_wait_time += 5 - time.sleep(5) - - else: - # Wait for a maximum of timeout seconds - while not is_ready(): - if timeout < 0: - logger.error(f"Index {name} is not ready. Timeout reached.") - link = docslinks["API_DESCRIBE_INDEX"] - timeout_msg = ( - f"Please call describe_index() to confirm index status. See docs at {link}" - ) - raise TimeoutError(timeout_msg) - - logger.debug( - f"Waiting for index {name} to be ready. Total wait time: {total_wait_time}" - ) - total_wait_time += 5 - time.sleep(5) - timeout -= 5 - - return description def delete_index(self, name: str, timeout: Optional[int] = None): - self.index_api.delete_index(name) - self.index_host_store.delete_host(self.config, name) + return self.db.index.delete(name=name, timeout=timeout) - if timeout == -1: - return - - if timeout is None: - while self.has_index(name): - time.sleep(5) - else: - while self.has_index(name) and timeout >= 0: - time.sleep(5) - timeout -= 5 - if timeout and timeout < 0: - raise ( - TimeoutError( - "Please call the list_indexes API ({}) to confirm if index is deleted".format( - "https://www.pinecone.io/docs/api/operation/list_indexes/" - ) - ) - ) + def list_indexes(self) -> "IndexList": + return self.db.index.list() - def list_indexes(self) -> IndexList: - response = self.index_api.list_indexes() - return IndexList(response) - - def describe_index(self, name: str) -> IndexModel: - api_instance = self.index_api - description = api_instance.describe_index(name) - host = description.host - self.index_host_store.set_host(self.config, name, host) - - return IndexModel(description) + def describe_index(self, name: str) -> "IndexModel": + return self.db.index.describe(name=name) def has_index(self, name: str) -> bool: - if name in self.list_indexes().names(): - return True - else: - return False + return self.db.index.has(name=name) def configure_index( self, name: str, replicas: Optional[int] = None, - pod_type: Optional[Union[PodType, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = None, + pod_type: Optional[Union["PodType", str]] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = None, tags: Optional[Dict[str, str]] = None, ): - api_instance = self.index_api - description = self.describe_index(name=name) - - req = PineconeDBControlRequestFactory.configure_index_request( - description=description, + return self.db.index.configure( + name=name, replicas=replicas, pod_type=pod_type, deletion_protection=deletion_protection, tags=tags, ) - api_instance.configure_index(name, configure_index_request=req) def create_collection(self, name: str, source: str) -> None: - req = PineconeDBControlRequestFactory.create_collection_request(name=name, source=source) - self.index_api.create_collection(create_collection_request=req) + return self.db.collection.create(name=name, source=source) - def list_collections(self) -> CollectionList: - response = self.index_api.list_collections() - return CollectionList(response) + def list_collections(self) -> "CollectionList": + return self.db.collection.list() def delete_collection(self, name: str) -> None: - self.index_api.delete_collection(name) + return self.db.collection.delete(name=name) def describe_collection(self, name: str): - return self.index_api.describe_collection(name).to_dict() + return self.db.collection.describe(name=name) @staticmethod def from_texts(*args, **kwargs): @@ -292,7 +220,9 @@ def from_documents(*args, **kwargs): """@private""" raise AttributeError(_build_langchain_attribute_error_message("from_documents")) - def Index(self, name: str = "", host: str = "", **kwargs): + def Index(self, name: str = "", host: str = "", **kwargs) -> "Index": + from pinecone.data import _Index + if name == "" and host == "": raise ValueError("Either name or host must be specified") @@ -318,7 +248,9 @@ def Index(self, name: str = "", host: str = "", **kwargs): **kwargs, ) - def IndexAsyncio(self, host: str, **kwargs): + def IndexAsyncio(self, host: str, **kwargs) -> "IndexAsyncio": + from pinecone.data import _IndexAsyncio + api_key = self.config.api_key openapi_config = self.openapi_config diff --git a/pinecone/control/pinecone_asyncio.py b/pinecone/control/pinecone_asyncio.py index 1373c8e4..efd5ca5e 100644 --- a/pinecone/control/pinecone_asyncio.py +++ b/pinecone/control/pinecone_asyncio.py @@ -1,40 +1,37 @@ import logging -import asyncio -from typing import Optional, Dict, Union +from typing import Optional, Dict, Union, TYPE_CHECKING from pinecone.config import PineconeConfig, ConfigBuilder -from pinecone.core.openapi.db_control.api.manage_indexes_api import AsyncioManageIndexesApi -from pinecone.openapi_support import AsyncioApiClient - -from pinecone.utils import normalize_host, setup_async_openapi_client -from pinecone.core.openapi.db_control import API_VERSION -from pinecone.models import ( - ServerlessSpec, - PodSpec, - IndexModel, - IndexList, - CollectionList, - IndexEmbed, -) +from pinecone.utils import normalize_host from pinecone.utils import docslinks -from pinecone.data import _IndexAsyncio, _AsyncioInference -from pinecone.enums import ( - Metric, - VectorType, - DeletionProtection, - PodType, - CloudProvider, - AwsRegion, - GcpRegion, - AzureRegion, -) -from .types import CreateIndexForModelEmbedTypedDict from .request_factory import PineconeDBControlRequestFactory from .pinecone_interface_asyncio import PineconeAsyncioDBControlInterface from .pinecone import check_realistic_host +if TYPE_CHECKING: + from .types import CreateIndexForModelEmbedTypedDict + from pinecone.data import _IndexAsyncio + from pinecone.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, + ) + from pinecone.models import ( + ServerlessSpec, + PodSpec, + IndexModel, + IndexList, + CollectionList, + IndexEmbed, + ) + logger = logging.getLogger(__name__) """ @private """ @@ -102,13 +99,7 @@ def __init__( self._inference = None # Lazy initialization """ @private """ - self.index_api = setup_async_openapi_client( - api_client_klass=AsyncioApiClient, - api_klass=AsyncioManageIndexesApi, - config=self.config, - openapi_config=self.openapi_config, - api_version=API_VERSION, - ) + self._db_control = None # Lazy initialization """ @private """ async def __aenter__(self): @@ -158,22 +149,38 @@ async def main(): @property def inference(self): - """Dynamically create and cache the Inference instance.""" + """Dynamically create and cache the AsyncioInference instance.""" if self._inference is None: + from pinecone.data import _AsyncioInference + self._inference = _AsyncioInference(api_client=self.index_api.api_client) return self._inference + @property + def db(self): + if self._db_control is None: + from .db_control_asyncio import DBControlAsyncio + + self._db_control = DBControlAsyncio( + config=self.config, + openapi_config=self.openapi_config, + pool_threads=self.pool_threads, + ) + return self._db_control + async def create_index( self, name: str, - spec: Union[Dict, ServerlessSpec, PodSpec], + spec: Union[Dict, "ServerlessSpec", "PodSpec"], dimension: Optional[int] = None, - metric: Optional[Union[Metric, str]] = Metric.COSINE, + metric: Optional[Union["Metric", str]] = "Metric.COSINE", timeout: Optional[int] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", + vector_type: Optional[Union["VectorType", str]] = "VectorType.DENSE", tags: Optional[Dict[str, str]] = None, - ) -> IndexModel: + ) -> "IndexModel": req = PineconeDBControlRequestFactory.create_index_request( name=name, spec=spec, @@ -183,145 +190,74 @@ async def create_index( vector_type=vector_type, tags=tags, ) - resp = await self.index_api.create_index(create_index_request=req) - - if timeout == -1: - return IndexModel(resp) - return await self.__poll_describe_index_until_ready(name, timeout) + resp = await self.db.index.create(create_index_request=req) + return resp async def create_index_for_model( self, name: str, - cloud: Union[CloudProvider, str], - region: Union[AwsRegion, GcpRegion, AzureRegion, str], - embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + cloud: Union["CloudProvider", str], + region: Union["AwsRegion", "GcpRegion", "AzureRegion", str], + embed: Union["IndexEmbed", "CreateIndexForModelEmbedTypedDict"], tags: Optional[Dict[str, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", timeout: Optional[int] = None, - ) -> IndexModel: - req = PineconeDBControlRequestFactory.create_index_for_model_request( + ) -> "IndexModel": + return await self.db.index.create_for_model( name=name, cloud=cloud, region=region, embed=embed, tags=tags, deletion_protection=deletion_protection, + timeout=timeout, ) - resp = await self.index_api.create_index_for_model(req) - - if timeout == -1: - return IndexModel(resp) - return await self.__poll_describe_index_until_ready(name, timeout) - - async def __poll_describe_index_until_ready(self, name: str, timeout: Optional[int] = None): - description = None - - async def is_ready() -> bool: - nonlocal description - description = await self.describe_index(name=name) - return description.status.ready - - total_wait_time = 0 - if timeout is None: - # Wait indefinitely - while not await is_ready(): - logger.debug( - f"Waiting for index {name} to be ready. Total wait time {total_wait_time} seconds." - ) - total_wait_time += 5 - await asyncio.sleep(5) - - else: - # Wait for a maximum of timeout seconds - while not await is_ready(): - if timeout < 0: - logger.error(f"Index {name} is not ready. Timeout reached.") - link = docslinks["API_DESCRIBE_INDEX"] - timeout_msg = ( - f"Please call describe_index() to confirm index status. See docs at {link}" - ) - raise TimeoutError(timeout_msg) - - logger.debug( - f"Waiting for index {name} to be ready. Total wait time: {total_wait_time}" - ) - total_wait_time += 5 - await asyncio.sleep(5) - timeout -= 5 - - return description async def delete_index(self, name: str, timeout: Optional[int] = None): - await self.index_api.delete_index(name) - - if timeout == -1: - return - - if timeout is None: - while await self.has_index(name): - await asyncio.sleep(5) - else: - while await self.has_index(name) and timeout >= 0: - await asyncio.sleep(5) - timeout -= 5 - if timeout and timeout < 0: - raise ( - TimeoutError( - "Please call the list_indexes API ({}) to confirm if index is deleted".format( - "https://www.pinecone.io/docs/api/operation/list_indexes/" - ) - ) - ) + return await self.db.index.delete(name=name, timeout=timeout) - async def list_indexes(self) -> IndexList: - response = await self.index_api.list_indexes() - return IndexList(response) + async def list_indexes(self) -> "IndexList": + return await self.db.index.list() - async def describe_index(self, name: str) -> IndexModel: - description = await self.index_api.describe_index(name) - return IndexModel(description) + async def describe_index(self, name: str) -> "IndexModel": + return await self.db.index.describe(name=name) async def has_index(self, name: str) -> bool: - available_indexes = await self.list_indexes() - if name in available_indexes.names(): - return True - else: - return False + return await self.db.index.has(name=name) async def configure_index( self, name: str, replicas: Optional[int] = None, - pod_type: Optional[Union[PodType, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = None, + pod_type: Optional[Union["PodType", str]] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = None, tags: Optional[Dict[str, str]] = None, ): - description = await self.describe_index(name=name) - - req = PineconeDBControlRequestFactory.configure_index_request( - description=description, + return await self.db.index.configure( + name=name, replicas=replicas, pod_type=pod_type, deletion_protection=deletion_protection, tags=tags, ) - await self.index_api.configure_index(name, configure_index_request=req) async def create_collection(self, name: str, source: str): - req = PineconeDBControlRequestFactory.create_collection_request(name=name, source=source) - await self.index_api.create_collection(create_collection_request=req) + return await self.db.collection.create(name=name, source=source) - async def list_collections(self) -> CollectionList: - response = await self.index_api.list_collections() - return CollectionList(response) + async def list_collections(self) -> "CollectionList": + return await self.db.collection.list() async def delete_collection(self, name: str): - await self.index_api.delete_collection(name) + return await self.db.collection.delete(name=name) async def describe_collection(self, name: str): - return await self.index_api.describe_collection(name).to_dict() + return await self.db.collection.describe(name=name) + + def IndexAsyncio(self, host: str, **kwargs) -> "_IndexAsyncio": + from pinecone.data import _IndexAsyncio - def IndexAsyncio(self, host: str, **kwargs) -> _IndexAsyncio: api_key = self.config.api_key openapi_config = self.openapi_config diff --git a/pinecone/control/pinecone_interface_asyncio.py b/pinecone/control/pinecone_interface_asyncio.py index a732bce9..139af7a5 100644 --- a/pinecone/control/pinecone_interface_asyncio.py +++ b/pinecone/control/pinecone_interface_asyncio.py @@ -1,32 +1,31 @@ from abc import ABC, abstractmethod -from typing import Optional, Dict, Union - - -from pinecone.config import Config - -from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi - - -from pinecone.models import ( - ServerlessSpec, - PodSpec, - IndexList, - CollectionList, - IndexModel, - IndexEmbed, -) -from pinecone.enums import ( - Metric, - VectorType, - DeletionProtection, - PodType, - CloudProvider, - AwsRegion, - GcpRegion, - AzureRegion, -) -from .types import CreateIndexForModelEmbedTypedDict +from typing import Optional, Dict, Union, TYPE_CHECKING + +if TYPE_CHECKING: + from pinecone.config import Config + + from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi + + from pinecone.models import ( + ServerlessSpec, + PodSpec, + IndexList, + CollectionList, + IndexModel, + IndexEmbed, + ) + from pinecone.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, + ) + from .types import CreateIndexForModelEmbedTypedDict class PineconeAsyncioDBControlInterface(ABC): @@ -39,10 +38,10 @@ def __init__( proxy_headers: Optional[Dict[str, str]] = None, ssl_ca_certs: Optional[str] = None, ssl_verify: Optional[bool] = None, - config: Optional[Config] = None, + config: Optional["Config"] = None, additional_headers: Optional[Dict[str, str]] = {}, pool_threads: Optional[int] = 1, - index_api: Optional[ManageIndexesApi] = None, + index_api: Optional["ManageIndexesApi"] = None, **kwargs, ): """ @@ -291,12 +290,14 @@ async def main(): async def create_index( self, name: str, - spec: Union[Dict, ServerlessSpec, PodSpec], + spec: Union[Dict, "ServerlessSpec", "PodSpec"], dimension: Optional[int], - metric: Optional[Union[Metric, str]] = Metric.COSINE, + metric: Optional[Union["Metric", str]] = "Metric.COSINE", timeout: Optional[int] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", + vector_type: Optional[Union["VectorType", str]] = "VectorType.DENSE", tags: Optional[Dict[str, str]] = None, ): """Creates a Pinecone index. @@ -408,13 +409,15 @@ async def main(): async def create_index_for_model( self, name: str, - cloud: Union[CloudProvider, str], - region: Union[AwsRegion, GcpRegion, AzureRegion, str], - embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + cloud: Union["CloudProvider", str], + region: Union["AwsRegion", "GcpRegion", "AzureRegion", str], + embed: Union["IndexEmbed", "CreateIndexForModelEmbedTypedDict"], tags: Optional[Dict[str, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", timeout: Optional[int] = None, - ) -> IndexModel: + ) -> "IndexModel": """ :param name: The name of the index to create. Must be unique within your project and cannot be changed once created. Allowed characters are lowercase letters, numbers, @@ -533,7 +536,7 @@ async def main(): pass @abstractmethod - async def list_indexes(self) -> IndexList: + async def list_indexes(self) -> "IndexList": """ :return: Returns an `IndexList` object, which is iterable and contains a list of `IndexModel` objects. The `IndexList` also has a convenience method `names()` @@ -574,7 +577,7 @@ async def main(): pass @abstractmethod - async def describe_index(self, name: str) -> IndexModel: + async def describe_index(self, name: str) -> "IndexModel": """ :param name: the name of the index to describe. :return: Returns an `IndexModel` object @@ -669,8 +672,8 @@ async def configure_index( self, name: str, replicas: Optional[int] = None, - pod_type: Optional[Union[PodType, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = None, + pod_type: Optional[Union["PodType", str]] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = None, tags: Optional[Dict[str, str]] = None, ): """ @@ -779,7 +782,7 @@ async def create_collection(self, name: str, source: str): pass @abstractmethod - async def list_collections(self) -> CollectionList: + async def list_collections(self) -> "CollectionList": """List all collections ```python diff --git a/pinecone/control/resources/__init__.py b/pinecone/control/resources/__init__.py new file mode 100644 index 00000000..cc904d53 --- /dev/null +++ b/pinecone/control/resources/__init__.py @@ -0,0 +1,2 @@ +from .index import IndexResource +from .collection import CollectionResource diff --git a/pinecone/control/resources/collection.py b/pinecone/control/resources/collection.py new file mode 100644 index 00000000..dba438b1 --- /dev/null +++ b/pinecone/control/resources/collection.py @@ -0,0 +1,27 @@ +import logging + +from pinecone.models import CollectionList +from ..request_factory import PineconeDBControlRequestFactory + +logger = logging.getLogger(__name__) +""" @private """ + + +class CollectionResource: + def __init__(self, index_api): + self.index_api = index_api + """ @private """ + + def create(self, name: str, source: str) -> None: + req = PineconeDBControlRequestFactory.create_collection_request(name=name, source=source) + self.index_api.create_collection(create_collection_request=req) + + def list(self) -> CollectionList: + response = self.index_api.list_collections() + return CollectionList(response) + + def delete(self, name: str) -> None: + self.index_api.delete_collection(name) + + def describe(self, name: str): + return self.index_api.describe_collection(name).to_dict() diff --git a/pinecone/control/resources/index.py b/pinecone/control/resources/index.py new file mode 100644 index 00000000..130b8e4f --- /dev/null +++ b/pinecone/control/resources/index.py @@ -0,0 +1,185 @@ +import time +import logging +from typing import Optional, Dict, Union + +from ..index_host_store import IndexHostStore + +from pinecone.models import ServerlessSpec, PodSpec, IndexModel, IndexList, IndexEmbed +from pinecone.utils import docslinks + +from pinecone.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, +) +from ..types import CreateIndexForModelEmbedTypedDict +from ..request_factory import PineconeDBControlRequestFactory + +logger = logging.getLogger(__name__) +""" @private """ + + +class IndexResource: + def __init__(self, index_api, config): + self.index_api = index_api + """ @private """ + + self.config = config + """ @private """ + + self.index_host_store = IndexHostStore() + """ @private """ + + def create( + self, + name: str, + spec: Union[Dict, ServerlessSpec, PodSpec], + dimension: Optional[int] = None, + metric: Optional[Union[Metric, str]] = Metric.COSINE, + timeout: Optional[int] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + tags: Optional[Dict[str, str]] = None, + ) -> IndexModel: + req = PineconeDBControlRequestFactory.create_index_request( + name=name, + spec=spec, + dimension=dimension, + metric=metric, + deletion_protection=deletion_protection, + vector_type=vector_type, + tags=tags, + ) + resp = self.index_api.create_index(create_index_request=req) + + if timeout == -1: + return IndexModel(resp) + return self.__poll_describe_index_until_ready(name, timeout) + + def create_for_model( + self, + name: str, + cloud: Union[CloudProvider, str], + region: Union[AwsRegion, GcpRegion, AzureRegion, str], + embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + tags: Optional[Dict[str, str]] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + timeout: Optional[int] = None, + ) -> IndexModel: + req = PineconeDBControlRequestFactory.create_index_for_model_request( + name=name, + cloud=cloud, + region=region, + embed=embed, + tags=tags, + deletion_protection=deletion_protection, + ) + resp = self.index_api.create_index_for_model(req) + + if timeout == -1: + return IndexModel(resp) + return self.__poll_describe_index_until_ready(name, timeout) + + def __poll_describe_index_until_ready(self, name: str, timeout: Optional[int] = None): + description = None + + def is_ready() -> bool: + nonlocal description + description = self.describe(name=name) + return description.status.ready + + total_wait_time = 0 + if timeout is None: + # Wait indefinitely + while not is_ready(): + logger.debug( + f"Waiting for index {name} to be ready. Total wait time {total_wait_time} seconds." + ) + total_wait_time += 5 + time.sleep(5) + + else: + # Wait for a maximum of timeout seconds + while not is_ready(): + if timeout < 0: + logger.error(f"Index {name} is not ready. Timeout reached.") + link = docslinks["API_DESCRIBE_INDEX"] + timeout_msg = ( + f"Please call describe_index() to confirm index status. See docs at {link}" + ) + raise TimeoutError(timeout_msg) + + logger.debug( + f"Waiting for index {name} to be ready. Total wait time: {total_wait_time}" + ) + total_wait_time += 5 + time.sleep(5) + timeout -= 5 + + return description + + def delete(self, name: str, timeout: Optional[int] = None): + self.index_api.delete_index(name) + self.index_host_store.delete_host(self.config, name) + + if timeout == -1: + return + + if timeout is None: + while self.has_index(name): + time.sleep(5) + else: + while self.has_index(name) and timeout >= 0: + time.sleep(5) + timeout -= 5 + if timeout and timeout < 0: + raise ( + TimeoutError( + "Please call the list_indexes API ({}) to confirm if index is deleted".format( + "https://www.pinecone.io/docs/api/operation/list_indexes/" + ) + ) + ) + + def list(self) -> IndexList: + response = self.index_api.list_indexes() + return IndexList(response) + + def describe(self, name: str) -> IndexModel: + api_instance = self.index_api + description = api_instance.describe_index(name) + host = description.host + self.index_host_store.set_host(self.config, name, host) + + return IndexModel(description) + + def has(self, name: str) -> bool: + if name in self.list().names(): + return True + else: + return False + + def configure( + self, + name: str, + replicas: Optional[int] = None, + pod_type: Optional[Union[PodType, str]] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = None, + tags: Optional[Dict[str, str]] = None, + ): + api_instance = self.index_api + description = self.describe(name=name) + + req = PineconeDBControlRequestFactory.configure_index_request( + description=description, + replicas=replicas, + pod_type=pod_type, + deletion_protection=deletion_protection, + tags=tags, + ) + api_instance.configure_index(name, configure_index_request=req) diff --git a/pinecone/control/resources_asyncio/collection.py b/pinecone/control/resources_asyncio/collection.py new file mode 100644 index 00000000..42d430eb --- /dev/null +++ b/pinecone/control/resources_asyncio/collection.py @@ -0,0 +1,32 @@ +import logging +from typing import TYPE_CHECKING + + +from pinecone.models import CollectionList + +from ..request_factory import PineconeDBControlRequestFactory + +logger = logging.getLogger(__name__) +""" @private """ + +if TYPE_CHECKING: + pass + + +class CollectionResourceAsyncio: + def __init__(self, index_api): + self.index_api = index_api + + async def create(self, name: str, source: str): + req = PineconeDBControlRequestFactory.create_collection_request(name=name, source=source) + await self.index_api.create_collection(create_collection_request=req) + + async def list(self) -> CollectionList: + response = await self.index_api.list_collections() + return CollectionList(response) + + async def delete(self, name: str): + await self.index_api.delete_collection(name) + + async def describe(self, name: str): + return await self.index_api.describe_collection(name).to_dict() diff --git a/pinecone/control/resources_asyncio/index.py b/pinecone/control/resources_asyncio/index.py new file mode 100644 index 00000000..17faffc5 --- /dev/null +++ b/pinecone/control/resources_asyncio/index.py @@ -0,0 +1,173 @@ +import logging +import asyncio +from typing import Optional, Dict, Union + + +from pinecone.models import ServerlessSpec, PodSpec, IndexModel, IndexList, IndexEmbed +from pinecone.utils import docslinks + +from pinecone.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, +) +from .types import CreateIndexForModelEmbedTypedDict +from .request_factory import PineconeDBControlRequestFactory + +logger = logging.getLogger(__name__) +""" @private """ + + +class IndexResourceAsyncio: + def __init__(self, index_api, config): + self.index_api = index_api + self.config = config + + async def create( + self, + name: str, + spec: Union[Dict, ServerlessSpec, PodSpec], + dimension: Optional[int] = None, + metric: Optional[Union[Metric, str]] = Metric.COSINE, + timeout: Optional[int] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + tags: Optional[Dict[str, str]] = None, + ) -> IndexModel: + req = PineconeDBControlRequestFactory.create_index_request( + name=name, + spec=spec, + dimension=dimension, + metric=metric, + deletion_protection=deletion_protection, + vector_type=vector_type, + tags=tags, + ) + resp = await self.index_api.create_index(create_index_request=req) + + if timeout == -1: + return IndexModel(resp) + return await self.__poll_describe_index_until_ready(name, timeout) + + async def create_for_model( + self, + name: str, + cloud: Union[CloudProvider, str], + region: Union[AwsRegion, GcpRegion, AzureRegion, str], + embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + tags: Optional[Dict[str, str]] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + timeout: Optional[int] = None, + ) -> IndexModel: + req = PineconeDBControlRequestFactory.create_index_for_model_request( + name=name, + cloud=cloud, + region=region, + embed=embed, + tags=tags, + deletion_protection=deletion_protection, + ) + resp = await self.index_api.create_index_for_model(req) + + if timeout == -1: + return IndexModel(resp) + return await self.__poll_describe_index_until_ready(name, timeout) + + async def __poll_describe_index_until_ready(self, name: str, timeout: Optional[int] = None): + description = None + + async def is_ready() -> bool: + nonlocal description + description = await self.describe(name=name) + return description.status.ready + + total_wait_time = 0 + if timeout is None: + # Wait indefinitely + while not await is_ready(): + logger.debug( + f"Waiting for index {name} to be ready. Total wait time {total_wait_time} seconds." + ) + total_wait_time += 5 + await asyncio.sleep(5) + + else: + # Wait for a maximum of timeout seconds + while not await is_ready(): + if timeout < 0: + logger.error(f"Index {name} is not ready. Timeout reached.") + link = docslinks["API_DESCRIBE_INDEX"] + timeout_msg = ( + f"Please call describe_index() to confirm index status. See docs at {link}" + ) + raise TimeoutError(timeout_msg) + + logger.debug( + f"Waiting for index {name} to be ready. Total wait time: {total_wait_time}" + ) + total_wait_time += 5 + await asyncio.sleep(5) + timeout -= 5 + + return description + + async def delete(self, name: str, timeout: Optional[int] = None): + await self.index_api.delete_index(name) + + if timeout == -1: + return + + if timeout is None: + while await self.has(name): + await asyncio.sleep(5) + else: + while await self.has(name) and timeout >= 0: + await asyncio.sleep(5) + timeout -= 5 + if timeout and timeout < 0: + raise ( + TimeoutError( + "Please call the list_indexes API ({}) to confirm if index is deleted".format( + "https://www.pinecone.io/docs/api/operation/list_indexes/" + ) + ) + ) + + async def list(self) -> IndexList: + response = await self.index_api.list_indexes() + return IndexList(response) + + async def describe(self, name: str) -> IndexModel: + description = await self.index_api.describe_index(name) + return IndexModel(description) + + async def has(self, name: str) -> bool: + available_indexes = await self.list() + if name in available_indexes.names(): + return True + else: + return False + + async def configure( + self, + name: str, + replicas: Optional[int] = None, + pod_type: Optional[Union[PodType, str]] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = None, + tags: Optional[Dict[str, str]] = None, + ): + description = await self.describe(name=name) + + req = PineconeDBControlRequestFactory.configure_index_request( + description=description, + replicas=replicas, + pod_type=pod_type, + deletion_protection=deletion_protection, + tags=tags, + ) + await self.index_api.configure_index(name, configure_index_request=req) diff --git a/poetry.lock b/poetry.lock index fb037257..048d84c6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1347,6 +1347,29 @@ files = [ googleapis-common-protos = "*" protobuf = ">=4.21.0" +[[package]] +name = "psutil" +version = "7.0.0" +description = "Cross-platform lib for process and system monitoring in Python. NOTE: the syntax of this script MUST be kept compatible with Python 2.7." +optional = false +python-versions = ">=3.6" +files = [ + {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"}, + {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993"}, + {file = "psutil-7.0.0-cp36-cp36m-win32.whl", hash = "sha256:84df4eb63e16849689f76b1ffcb36db7b8de703d1bc1fe41773db487621b6c17"}, + {file = "psutil-7.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:1e744154a6580bc968a0195fd25e80432d3afec619daf145b9e5ba16cc1d688e"}, + {file = "psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99"}, + {file = "psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553"}, + {file = "psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456"}, +] + +[package.extras] +dev = ["abi3audit", "black (==24.10.0)", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest", "pytest-cov", "pytest-xdist", "requests", "rstcheck", "ruff", "setuptools", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "vulture", "wheel"] +test = ["pytest", "pytest-xdist", "setuptools"] + [[package]] name = "py-cpuinfo" version = "9.0.0" @@ -1683,6 +1706,17 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "tuna" +version = "0.5.11" +description = "Visualize Python performance profiles" +optional = false +python-versions = ">=3.6" +files = [ + {file = "tuna-0.5.11-py3-none-any.whl", hash = "sha256:ab352a6d836014ace585ecd882148f1f7c68be9ea4bf9e9298b7127594dab2ef"}, + {file = "tuna-0.5.11.tar.gz", hash = "sha256:d47f3e39e80af961c8df016ac97d1643c3c60b5eb451299da0ab5fe411d8866c"}, +] + [[package]] name = "types-protobuf" version = "4.24.0.4" @@ -1811,6 +1845,20 @@ platformdirs = ">=3.9.1,<5" docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] +[[package]] +name = "vprof" +version = "0.38" +description = "Visual profiler for Python" +optional = false +python-versions = "*" +files = [ + {file = "vprof-0.38-py3-none-any.whl", hash = "sha256:91b91d8868176c29e0fe3426c9239d11cd192c7144c7baf26a211e48923a5ee8"}, + {file = "vprof-0.38.tar.gz", hash = "sha256:7f1000912eeb7a450c7c94d3cc96739af45ad0ff01d5abcc0b09a175d40ffadb"}, +] + +[package.dependencies] +psutil = ">=3" + [[package]] name = "yarl" version = "1.17.2" @@ -1914,4 +1962,4 @@ grpc = ["googleapis-common-protos", "grpcio", "grpcio", "grpcio", "lz4", "protob [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "6e2107c224f622bcd0492b87d8a92f36318d9487af485e766b0e944e378e083a" +content-hash = "96c8c770a4626bc9606a7b8e16537e217f238e20c217baa1206f4ef9debe5e82" diff --git a/pyproject.toml b/pyproject.toml index ff491308..5b8a11ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,6 +95,8 @@ responses = ">=0.8.1" ruff = "^0.9.3" beautifulsoup4 = "^4.13.3" pinecone-plugin-assistant = "^1.6.0" +vprof = "^0.38" +tuna = "^0.5.11" [tool.poetry.extras] From 2c6e1ce52b8d94376be62ba7b4c5bc1ecb99e979 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Tue, 8 Apr 2025 10:40:29 -0400 Subject: [PATCH 06/13] Refactoring --- pinecone/control/pinecone.py | 2 +- pinecone/control/resources/index.py | 10 ++++++++-- pinecone/control/resources_asyncio/index.py | 4 ++-- pinecone/grpc/pinecone.py | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pinecone/control/pinecone.py b/pinecone/control/pinecone.py index 50937555..87ae5b9d 100644 --- a/pinecone/control/pinecone.py +++ b/pinecone/control/pinecone.py @@ -237,7 +237,7 @@ def Index(self, name: str = "", host: str = "", **kwargs) -> "Index": index_host = normalize_host(host) else: # Otherwise, get host url from describe_index using the index name - index_host = self.index_host_store.get_host(self.index_api, self.config, name) + index_host = self.db.index._get_host(name) return _Index( host=index_host, diff --git a/pinecone/control/resources/index.py b/pinecone/control/resources/index.py index 130b8e4f..df6fd203 100644 --- a/pinecone/control/resources/index.py +++ b/pinecone/control/resources/index.py @@ -131,10 +131,10 @@ def delete(self, name: str, timeout: Optional[int] = None): return if timeout is None: - while self.has_index(name): + while self.has(name): time.sleep(5) else: - while self.has_index(name) and timeout >= 0: + while self.has(name) and timeout >= 0: time.sleep(5) timeout -= 5 if timeout and timeout < 0: @@ -183,3 +183,9 @@ def configure( tags=tags, ) api_instance.configure_index(name, configure_index_request=req) + + def _get_host(self, name: str) -> str: + """@private""" + return self.index_host_store.get_host( + api=self.index_api, config=self.config, index_name=name + ) diff --git a/pinecone/control/resources_asyncio/index.py b/pinecone/control/resources_asyncio/index.py index 17faffc5..d1bc037f 100644 --- a/pinecone/control/resources_asyncio/index.py +++ b/pinecone/control/resources_asyncio/index.py @@ -16,8 +16,8 @@ GcpRegion, AzureRegion, ) -from .types import CreateIndexForModelEmbedTypedDict -from .request_factory import PineconeDBControlRequestFactory +from pinecone.control.types import CreateIndexForModelEmbedTypedDict +from pinecone.control.request_factory import PineconeDBControlRequestFactory logger = logging.getLogger(__name__) """ @private """ diff --git a/pinecone/grpc/pinecone.py b/pinecone/grpc/pinecone.py index c78481ff..b03b139a 100644 --- a/pinecone/grpc/pinecone.py +++ b/pinecone/grpc/pinecone.py @@ -122,7 +122,7 @@ def Index(self, name: str = "", host: str = "", **kwargs): raise ValueError("Either name or host must be specified") # Use host if it is provided, otherwise get host from describe_index - index_host = host or self.index_host_store.get_host(self.index_api, self.config, name) + index_host = host or self.db.index._get_host(name) pt = kwargs.pop("pool_threads", None) or self.pool_threads From a08ae7348ea40ee9c6b660d6070eb574785e2aae Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 9 Apr 2025 13:07:12 -0400 Subject: [PATCH 07/13] WIP --- pinecone/__init__.py | 10 +- pinecone/config/__init__.py | 1 + pinecone/config/config.py | 14 +- .../{openapi.py => openapi_config_factory.py} | 13 +- pinecone/config/openapi_configuration.py | 441 +++++++++++++++++ pinecone/control/__init__.py | 6 - pinecone/core/openapi/db_control/__init__.py | 2 +- pinecone/core/openapi/db_data/__init__.py | 2 +- pinecone/core/openapi/inference/__init__.py | 2 +- pinecone/data/__init__.py | 34 -- pinecone/db_control/__init__.py | 7 + .../{control => db_control}/db_control.py | 14 +- .../db_control_asyncio.py | 8 +- pinecone/{ => db_control}/enums/__init__.py | 0 pinecone/{ => db_control}/enums/clouds.py | 0 .../enums/deletion_protection.py | 0 pinecone/{ => db_control}/enums/metric.py | 0 .../enums/pod_index_environment.py | 0 pinecone/{ => db_control}/enums/pod_type.py | 0 .../{ => db_control}/enums/vector_type.py | 0 .../index_host_store.py | 0 pinecone/{ => db_control}/models/__init__.py | 2 +- .../models/collection_description.py | 0 .../models/collection_list.py | 0 .../models/index_description.py | 0 .../{ => db_control}/models/index_list.py | 0 .../{ => db_control}/models/index_model.py | 2 +- .../{ => db_control}/models/list_response.py | 0 pinecone/{ => db_control}/models/pod_spec.py | 0 .../models/serverless_spec.py | 0 .../{control => db_control}/repr_overrides.py | 4 +- .../request_factory.py | 4 +- .../resources}/__init__.py | 0 .../db_control/resources/asyncio/__init__.py | 2 + .../resources/asyncio}/collection.py | 4 +- .../resources/asyncio}/index.py | 12 +- .../resources/sync}/__init__.py | 0 .../resources/sync}/collection.py | 4 +- .../resources/sync}/index.py | 13 +- .../{control => db_control}/types/__init__.py | 0 .../types/create_index_for_model_embed.py | 4 +- pinecone/db_data/__init__.py | 53 +++ .../{data => db_data}/dataclasses/__init__.py | 0 .../dataclasses/fetch_response.py | 0 .../dataclasses/search_query.py | 0 .../dataclasses/search_query_vector.py | 0 .../dataclasses/search_rerank.py | 0 .../dataclasses/sparse_values.py | 0 .../{data => db_data}/dataclasses/utils.py | 0 .../{data => db_data}/dataclasses/vector.py | 0 pinecone/{data => db_data}/errors.py | 0 pinecone/db_data/features/__init__.py | 13 + .../features/bulk_import/__init__.py | 0 .../features/bulk_import/bulk_import.py | 0 .../bulk_import/bulk_import_asyncio.py | 0 .../bulk_import_request_factory.py | 0 .../db_data/features/inference/__init__.py | 11 + pinecone/{data => db_data}/import_error.py | 0 pinecone/{data => db_data}/index.py | 0 pinecone/{data => db_data}/index_asyncio.py | 0 .../index_asyncio_interface.py | 0 pinecone/{data => db_data}/interfaces.py | 0 .../query_results_aggregator.py | 0 pinecone/{data => db_data}/request_factory.py | 0 .../sparse_values_factory.py | 0 pinecone/{data => db_data}/types/__init__.py | 0 .../{data => db_data}/types/query_filter.py | 0 .../types/search_query_typed_dict.py | 0 .../types/search_query_vector_typed_dict.py | 0 .../types/search_rerank_typed_dict.py | 0 .../types/sparse_vector_typed_dict.py | 0 .../types/vector_metadata_dict.py | 0 .../{data => db_data}/types/vector_tuple.py | 0 .../types/vector_typed_dict.py | 0 pinecone/{data => db_data}/vector_factory.py | 0 pinecone/exceptions/__init__.py | 6 +- pinecone/exceptions/exceptions.py | 141 +++++- pinecone/grpc/__init__.py | 2 +- pinecone/grpc/index_grpc.py | 6 +- pinecone/grpc/pinecone.py | 4 +- pinecone/grpc/sparse_values_factory.py | 4 +- pinecone/grpc/utils.py | 2 +- pinecone/grpc/vector_factory_grpc.py | 4 +- .../{data/features => }/inference/__init__.py | 0 .../features => }/inference/inference.py | 0 .../inference/inference_asyncio.py | 0 .../inference/inference_request_builder.py | 0 .../inference/models/__init__.py | 0 .../inference/models/embedding_list.py | 0 .../{ => inference}/models/index_embed.py | 4 +- .../inference/models/rerank_result.py | 0 .../features => }/inference/repl_overrides.py | 0 .../langchain_import_warnings.py | 0 .../legacy_pinecone_interface.py | 6 +- pinecone/openapi_support/__init__.py | 2 +- pinecone/openapi_support/api_client.py | 21 +- .../openapi_support/asyncio_api_client.py | 2 +- pinecone/openapi_support/configuration.py | 442 +----------------- .../openapi_support/configuration_lazy.py | 7 + pinecone/openapi_support/endpoint_utils.py | 2 +- pinecone/openapi_support/exceptions.py | 141 +----- pinecone/openapi_support/model_utils.py | 3 +- pinecone/openapi_support/rest_aiohttp.py | 2 +- pinecone/openapi_support/rest_urllib3.py | 2 +- pinecone/{control => }/pinecone.py | 24 +- pinecone/{control => }/pinecone_asyncio.py | 31 +- .../pinecone_interface_asyncio.py | 6 +- pinecone/utils/docslinks.py | 7 +- pinecone/utils/plugin_aware.py | 2 +- .../data/test_query_namespaces_sparse.py | 2 +- .../data/test_search_and_upsert_records.py | 2 +- .../data/test_upsert_from_dataframe.py | 2 +- tests/integration/data_asyncio/conftest.py | 2 +- tests/integration/helpers/helpers.py | 2 +- tests/perf/test_query_results_aggregator.py | 2 +- tests/unit/data/test_bulk_import.py | 2 +- tests/unit/data/test_request_factory.py | 2 +- tests/unit/data/test_vector_factory.py | 2 +- tests/unit/test_config.py | 2 +- tests/unit/test_config_builder.py | 2 +- tests/unit/test_index.py | 2 +- tests/unit/test_query_results_aggregator.py | 2 +- tests/upgrade/test_v6_upgrade.py | 174 +++++++ 123 files changed, 1000 insertions(+), 755 deletions(-) rename pinecone/config/{openapi.py => openapi_config_factory.py} (92%) create mode 100644 pinecone/config/openapi_configuration.py delete mode 100644 pinecone/control/__init__.py delete mode 100644 pinecone/data/__init__.py create mode 100644 pinecone/db_control/__init__.py rename pinecone/{control => db_control}/db_control.py (79%) rename pinecone/{control => db_control}/db_control_asyncio.py (86%) rename pinecone/{ => db_control}/enums/__init__.py (100%) rename pinecone/{ => db_control}/enums/clouds.py (100%) rename pinecone/{ => db_control}/enums/deletion_protection.py (100%) rename pinecone/{ => db_control}/enums/metric.py (100%) rename pinecone/{ => db_control}/enums/pod_index_environment.py (100%) rename pinecone/{ => db_control}/enums/pod_type.py (100%) rename pinecone/{ => db_control}/enums/vector_type.py (100%) rename pinecone/{control => db_control}/index_host_store.py (100%) rename pinecone/{ => db_control}/models/__init__.py (90%) rename pinecone/{ => db_control}/models/collection_description.py (100%) rename pinecone/{ => db_control}/models/collection_list.py (100%) rename pinecone/{ => db_control}/models/index_description.py (100%) rename pinecone/{ => db_control}/models/index_list.py (100%) rename pinecone/{ => db_control}/models/index_model.py (81%) rename pinecone/{ => db_control}/models/list_response.py (100%) rename pinecone/{ => db_control}/models/pod_spec.py (100%) rename pinecone/{ => db_control}/models/serverless_spec.py (100%) rename pinecone/{control => db_control}/repr_overrides.py (79%) rename pinecone/{control => db_control}/request_factory.py (98%) rename pinecone/{data/features => db_control/resources}/__init__.py (100%) create mode 100644 pinecone/db_control/resources/asyncio/__init__.py rename pinecone/{control/resources_asyncio => db_control/resources/asyncio}/collection.py (85%) rename pinecone/{control/resources_asyncio => db_control/resources/asyncio}/index.py (93%) rename pinecone/{control/resources => db_control/resources/sync}/__init__.py (100%) rename pinecone/{control/resources => db_control/resources/sync}/collection.py (84%) rename pinecone/{control/resources => db_control/resources/sync}/index.py (92%) rename pinecone/{control => db_control}/types/__init__.py (100%) rename pinecone/{control => db_control}/types/create_index_for_model_embed.py (72%) create mode 100644 pinecone/db_data/__init__.py rename pinecone/{data => db_data}/dataclasses/__init__.py (100%) rename pinecone/{data => db_data}/dataclasses/fetch_response.py (100%) rename pinecone/{data => db_data}/dataclasses/search_query.py (100%) rename pinecone/{data => db_data}/dataclasses/search_query_vector.py (100%) rename pinecone/{data => db_data}/dataclasses/search_rerank.py (100%) rename pinecone/{data => db_data}/dataclasses/sparse_values.py (100%) rename pinecone/{data => db_data}/dataclasses/utils.py (100%) rename pinecone/{data => db_data}/dataclasses/vector.py (100%) rename pinecone/{data => db_data}/errors.py (100%) create mode 100644 pinecone/db_data/features/__init__.py rename pinecone/{data => db_data}/features/bulk_import/__init__.py (100%) rename pinecone/{data => db_data}/features/bulk_import/bulk_import.py (100%) rename pinecone/{data => db_data}/features/bulk_import/bulk_import_asyncio.py (100%) rename pinecone/{data => db_data}/features/bulk_import/bulk_import_request_factory.py (100%) create mode 100644 pinecone/db_data/features/inference/__init__.py rename pinecone/{data => db_data}/import_error.py (100%) rename pinecone/{data => db_data}/index.py (100%) rename pinecone/{data => db_data}/index_asyncio.py (100%) rename pinecone/{data => db_data}/index_asyncio_interface.py (100%) rename pinecone/{data => db_data}/interfaces.py (100%) rename pinecone/{data => db_data}/query_results_aggregator.py (100%) rename pinecone/{data => db_data}/request_factory.py (100%) rename pinecone/{data => db_data}/sparse_values_factory.py (100%) rename pinecone/{data => db_data}/types/__init__.py (100%) rename pinecone/{data => db_data}/types/query_filter.py (100%) rename pinecone/{data => db_data}/types/search_query_typed_dict.py (100%) rename pinecone/{data => db_data}/types/search_query_vector_typed_dict.py (100%) rename pinecone/{data => db_data}/types/search_rerank_typed_dict.py (100%) rename pinecone/{data => db_data}/types/sparse_vector_typed_dict.py (100%) rename pinecone/{data => db_data}/types/vector_metadata_dict.py (100%) rename pinecone/{data => db_data}/types/vector_tuple.py (100%) rename pinecone/{data => db_data}/types/vector_typed_dict.py (100%) rename pinecone/{data => db_data}/vector_factory.py (100%) rename pinecone/{data/features => }/inference/__init__.py (100%) rename pinecone/{data/features => }/inference/inference.py (100%) rename pinecone/{data/features => }/inference/inference_asyncio.py (100%) rename pinecone/{data/features => }/inference/inference_request_builder.py (100%) rename pinecone/{data/features => }/inference/models/__init__.py (100%) rename pinecone/{data/features => }/inference/models/embedding_list.py (100%) rename pinecone/{ => inference}/models/index_embed.py (94%) rename pinecone/{data/features => }/inference/models/rerank_result.py (100%) rename pinecone/{data/features => }/inference/repl_overrides.py (100%) rename pinecone/{control => }/langchain_import_warnings.py (100%) rename pinecone/{control => }/legacy_pinecone_interface.py (99%) create mode 100644 pinecone/openapi_support/configuration_lazy.py rename pinecone/{control => }/pinecone.py (93%) rename pinecone/{control => }/pinecone_asyncio.py (91%) rename pinecone/{control => }/pinecone_interface_asyncio.py (99%) create mode 100644 tests/upgrade/test_v6_upgrade.py diff --git a/pinecone/__init__.py b/pinecone/__init__.py index 13a65bd1..4af444d7 100644 --- a/pinecone/__init__.py +++ b/pinecone/__init__.py @@ -4,12 +4,12 @@ from .deprecated_plugins import check_for_deprecated_plugins from .deprecation_warnings import * -from .config import * +from .pinecone import Pinecone +from .pinecone_asyncio import PineconeAsyncio from .exceptions import * -from .control import * -from .data import * -from .models import * -from .enums import * +# from .config import * +# from .db_control import * +# from .db_data import * from .utils import __version__ diff --git a/pinecone/config/__init__.py b/pinecone/config/__init__.py index 7abb7278..f292622f 100644 --- a/pinecone/config/__init__.py +++ b/pinecone/config/__init__.py @@ -2,6 +2,7 @@ import os from .config import ConfigBuilder, Config +from .openapi_configuration import Configuration as OpenApiConfiguration from .pinecone_config import PineconeConfig if os.getenv("PINECONE_DEBUG") is not None: diff --git a/pinecone/config/config.py b/pinecone/config/config.py index 01a703e0..86c03649 100644 --- a/pinecone/config/config.py +++ b/pinecone/config/config.py @@ -1,9 +1,11 @@ -from typing import NamedTuple, Optional, Dict +from typing import NamedTuple, Optional, Dict, TYPE_CHECKING import os -from pinecone.exceptions.exceptions import PineconeConfigurationError -from pinecone.config.openapi import OpenApiConfigFactory -from pinecone.openapi_support.configuration import Configuration as OpenApiConfiguration +from pinecone.exceptions import PineconeConfigurationError +from pinecone.config.openapi_config_factory import OpenApiConfigFactory + +if TYPE_CHECKING: + from pinecone.openapi_support.configuration import Configuration as OpenApiConfiguration # Duplicated this util to help resolve circular imports @@ -81,8 +83,8 @@ def build( @staticmethod def build_openapi_config( - config: Config, openapi_config: Optional[OpenApiConfiguration] = None, **kwargs - ) -> OpenApiConfiguration: + config: Config, openapi_config: Optional["OpenApiConfiguration"] = None, **kwargs + ) -> "OpenApiConfiguration": if openapi_config: openapi_config = OpenApiConfigFactory.copy( openapi_config=openapi_config, api_key=config.api_key, host=config.host diff --git a/pinecone/config/openapi.py b/pinecone/config/openapi_config_factory.py similarity index 92% rename from pinecone/config/openapi.py rename to pinecone/config/openapi_config_factory.py index d6bdf702..d7730458 100644 --- a/pinecone/config/openapi.py +++ b/pinecone/config/openapi_config_factory.py @@ -1,13 +1,11 @@ import sys -from typing import List, Optional +from typing import List, Optional, Tuple import certifi import socket import copy -from urllib3.connection import HTTPConnection - -from pinecone.openapi_support.configuration import Configuration as OpenApiConfiguration +from pinecone.config.openapi_configuration import Configuration as OpenApiConfiguration TCP_KEEPINTVL = 60 # Sec TCP_KEEPIDLE = 300 # Sec @@ -58,7 +56,7 @@ def _get_socket_options( keep_alive_idle_sec: int = TCP_KEEPIDLE, keep_alive_interval_sec: int = TCP_KEEPINTVL, keep_alive_tries: int = TCP_KEEPCNT, - ) -> List[tuple]: + ) -> List[Tuple[int, int, int]]: """ Returns the socket options to pass to OpenAPI's Rest client Args: @@ -72,7 +70,10 @@ def _get_socket_options( """ # Source: https://www.finbourne.com/blog/the-mysterious-hanging-client-tcp-keep-alives - socket_params = HTTPConnection.default_socket_options + # urllib3.connection.HTTPConnection.default_socket_options + socket_params = [ + (socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + ] if not do_keep_alive: return socket_params diff --git a/pinecone/config/openapi_configuration.py b/pinecone/config/openapi_configuration.py new file mode 100644 index 00000000..9be701be --- /dev/null +++ b/pinecone/config/openapi_configuration.py @@ -0,0 +1,441 @@ +import copy +import logging +import multiprocessing + +from http import client as http_client +from pinecone.exceptions import PineconeApiValueError +from typing import TypedDict + + +class HostSetting(TypedDict): + url: str + description: str + + +JSON_SCHEMA_VALIDATION_KEYWORDS = { + "multipleOf", + "maximum", + "exclusiveMaximum", + "minimum", + "exclusiveMinimum", + "maxLength", + "minLength", + "pattern", + "maxItems", + "minItems", +} + + +class Configuration: + """Class to hold the configuration of the API client. + + :param host: Base url + :param api_key: Dict to store API key(s). + Each entry in the dict specifies an API key. + The dict key is the name of the security scheme in the OAS specification. + The dict value is the API key secret. + :param api_key_prefix: Dict to store API prefix (e.g. Bearer) + The dict key is the name of the security scheme in the OAS specification. + The dict value is an API key prefix when generating the auth data. + :param discard_unknown_keys: Boolean value indicating whether to discard + unknown properties. A server may send a response that includes additional + properties that are not known by the client in the following scenarios: + 1. The OpenAPI document is incomplete, i.e. it does not match the server + implementation. + 2. The client was generated using an older version of the OpenAPI document + and the server has been upgraded since then. + If a schema in the OpenAPI document defines the additionalProperties attribute, + then all undeclared properties received by the server are injected into the + additional properties map. In that case, there are undeclared properties, and + nothing to discard. + :param disabled_client_side_validations (string): Comma-separated list of + JSON schema validation keywords to disable JSON schema structural validation + rules. The following keywords may be specified: multipleOf, maximum, + exclusiveMaximum, minimum, exclusiveMinimum, maxLength, minLength, pattern, + maxItems, minItems. + By default, the validation is performed for data generated locally by the client + and data received from the server, independent of any validation performed by + the server side. If the input data does not satisfy the JSON schema validation + rules specified in the OpenAPI document, an exception is raised. + If disabled_client_side_validations is set, structural validation is + disabled. This can be useful to troubleshoot data validation problem, such as + when the OpenAPI document validation rules do not match the actual API data + received by the server. + :param server_operation_index: Mapping from operation ID to an index to server + configuration. + :param server_operation_variables: Mapping from operation ID to a mapping with + string values to replace variables in templated server configuration. + The validation of enums is performed for variables with defined enum values before. + :param ssl_ca_cert: str - the path to a file of concatenated CA certificates + in PEM format + + :Example: + + API Key Authentication Example. + Given the following security scheme in the OpenAPI specification: + components: + securitySchemes: + cookieAuth: # name for the security scheme + type: apiKey + in: cookie + name: JSESSIONID # cookie name + + You can programmatically set the cookie: + + conf = pinecone.openapi_support.Configuration( + api_key={'cookieAuth': 'abc123'} + api_key_prefix={'cookieAuth': 'JSESSIONID'} + ) + + The following cookie will be added to the HTTP request: + Cookie: JSESSIONID abc123 + """ + + _default = None + + def __init__( + self, + host=None, + api_key=None, + api_key_prefix=None, + discard_unknown_keys=False, + disabled_client_side_validations="", + server_index=None, + server_variables=None, + server_operation_index=None, + server_operation_variables=None, + ssl_ca_cert=None, + ): + """Constructor""" + self._base_path = "https://api.pinecone.io" if host is None else host + """Default Base url + """ + self.server_index = 0 if server_index is None and host is None else server_index + self.server_operation_index = server_operation_index or {} + """Default server index + """ + self.server_variables = server_variables or {} + self.server_operation_variables = server_operation_variables or {} + """Default server variables + """ + self.temp_folder_path = None + """Temp file folder for downloading files + """ + # Authentication Settings + self.api_key = {} + if api_key: + self.api_key = api_key + """dict to store API key(s) + """ + self.api_key_prefix = {} + if api_key_prefix: + self.api_key_prefix = api_key_prefix + """dict to store API prefix (e.g. Bearer) + """ + self.refresh_api_key_hook = None + """function hook to refresh API key if expired + """ + self.discard_unknown_keys = discard_unknown_keys + self.disabled_client_side_validations = disabled_client_side_validations + self.logger = {} + """Logging Settings + """ + self.logger["package_logger"] = logging.getLogger("pinecone.openapi_support") + self.logger["urllib3_logger"] = logging.getLogger("urllib3") + self.logger_format = "%(asctime)s %(levelname)s %(message)s" + """Log format + """ + self.logger_stream_handler = None + """Log stream handler + """ + self.logger_file_handler = None + """Log file handler + """ + self.logger_file = None + """Debug file location + """ + self.debug = False + """Debug switch + """ + + self.verify_ssl = True + """SSL/TLS verification + Set this to false to skip verifying SSL certificate when calling API + from https server. + """ + self.ssl_ca_cert = ssl_ca_cert + """Set this to customize the certificate file to verify the peer. + """ + self.cert_file = None + """client certificate file + """ + self.key_file = None + """client key file + """ + self.assert_hostname = None + """Set this to True/False to enable/disable SSL hostname verification. + """ + + self.connection_pool_maxsize = multiprocessing.cpu_count() * 5 + """urllib3 connection pool's maximum number of connections saved + per pool. urllib3 uses 1 connection as default value, but this is + not the best value when you are making a lot of possibly parallel + requests to the same host, which is often the case here. + cpu_count * 5 is used as default value to increase performance. + """ + + self.proxy = None + """Proxy URL + """ + self.proxy_headers = None + """Proxy headers + """ + self.safe_chars_for_path_param = "" + """Safe chars for path_param + """ + self.retries = None + """Adding retries to override urllib3 default value 3 + """ + # Enable client side validation + self.client_side_validation = True + + # Options to pass down to the underlying urllib3 socket + self.socket_options = None + + def __deepcopy__(self, memo): + cls = self.__class__ + result = cls.__new__(cls) + memo[id(self)] = result + for k, v in self.__dict__.items(): + if k not in ("logger", "logger_file_handler"): + setattr(result, k, copy.deepcopy(v, memo)) + # shallow copy of loggers + result.logger = copy.copy(self.logger) + # use setters to configure loggers + result.logger_file = self.logger_file + result.debug = self.debug + return result + + def __setattr__(self, name, value): + object.__setattr__(self, name, value) + if name == "disabled_client_side_validations": + s = set(filter(None, value.split(","))) + for v in s: + if v not in JSON_SCHEMA_VALIDATION_KEYWORDS: + raise PineconeApiValueError("Invalid keyword: '{0}''".format(v)) + self._disabled_client_side_validations = s + + @classmethod + def set_default(cls, default): + """Set default instance of configuration. + + It stores default configuration, which can be + returned by get_default_copy method. + + :param default: object of Configuration + """ + cls._default = copy.deepcopy(default) + + @classmethod + def get_default_copy(cls): + """Return new instance of configuration. + + This method returns newly created, based on default constructor, + object of Configuration class or returns a copy of default + configuration passed by the set_default method. + + :return: The configuration object. + """ + if cls._default is not None: + return copy.deepcopy(cls._default) + return Configuration() + + @property + def logger_file(self): + """The logger file. + + If the logger_file is None, then add stream handler and remove file + handler. Otherwise, add file handler and remove stream handler. + + :param value: The logger_file path. + :type: str + """ + return self.__logger_file + + @logger_file.setter + def logger_file(self, value): + """The logger file. + + If the logger_file is None, then add stream handler and remove file + handler. Otherwise, add file handler and remove stream handler. + + :param value: The logger_file path. + :type: str + """ + self.__logger_file = value + if self.__logger_file: + # If set logging file, + # then add file handler and remove stream handler. + self.logger_file_handler = logging.FileHandler(self.__logger_file) + self.logger_file_handler.setFormatter(self.logger_formatter) + for _, logger in self.logger.items(): + logger.addHandler(self.logger_file_handler) + + @property + def debug(self): + """Debug status + + :param value: The debug status, True or False. + :type: bool + """ + return self.__debug + + @debug.setter + def debug(self, value): + """Debug status + + :param value: The debug status, True or False. + :type: bool + """ + self.__debug = value + if self.__debug: + # if debug status is True, turn on debug logging + for _, logger in self.logger.items(): + logger.setLevel(logging.DEBUG) + # turn on http_client debug + http_client.HTTPConnection.debuglevel = 1 + else: + # if debug status is False, turn off debug logging, + # setting log level to default `logging.WARNING` + for _, logger in self.logger.items(): + logger.setLevel(logging.WARNING) + # turn off http_client debug + http_client.HTTPConnection.debuglevel = 0 + + @property + def logger_format(self): + """The logger format. + + The logger_formatter will be updated when sets logger_format. + + :param value: The format string. + :type: str + """ + return self.__logger_format + + @logger_format.setter + def logger_format(self, value): + """The logger format. + + The logger_formatter will be updated when sets logger_format. + + :param value: The format string. + :type: str + """ + self.__logger_format = value + self.logger_formatter = logging.Formatter(self.__logger_format) + + def get_api_key_with_prefix(self, identifier, alias=None): + """Gets API key (with prefix if set). + + :param identifier: The identifier of apiKey. + :param alias: The alternative identifier of apiKey. + :return: The token for api key authentication. + """ + if self.refresh_api_key_hook is not None: + self.refresh_api_key_hook(self) + key = self.api_key.get(identifier, self.api_key.get(alias) if alias is not None else None) + if key: + prefix = self.api_key_prefix.get(identifier) + if prefix: + return "%s %s" % (prefix, key) + else: + return key + + def auth_settings(self): + """Gets Auth Settings dict for api client. + + :return: The Auth Settings information dict. + """ + auth = {} + if "ApiKeyAuth" in self.api_key: + auth["ApiKeyAuth"] = { + "type": "api_key", + "in": "header", + "key": "Api-Key", + "value": self.get_api_key_with_prefix("ApiKeyAuth"), + } + return auth + + def get_host_settings(self): + """Gets an array of host settings + + :return: An array of host settings + """ + return [{"url": "https://api.pinecone.io", "description": "Production API endpoints"}] + + def get_host_from_settings(self, index, variables=None, servers=None): + """Gets host URL based on the index and variables + :param index: array index of the host settings + :param variables: hash of variable and the corresponding value + :param servers: an array of host settings or None + :return: URL based on host settings + """ + if index is None: + return self._base_path + + variables = {} if variables is None else variables + servers = self.get_host_settings() if servers is None else servers + + try: + server = servers[index] + except IndexError: + raise ValueError( + "Invalid index {0} when selecting the host settings. Must be less than {1}".format( + index, len(servers) + ) + ) + + url = server["url"] + + # go through variables and replace placeholders + for variable_name, variable in server.get("variables", {}).items(): + used_value = variables.get(variable_name, variable["default_value"]) + + if "enum_values" in variable and used_value not in variable["enum_values"]: + raise ValueError( + "The variable `{0}` in the host URL has invalid value {1}. Must be {2}.".format( + variable_name, variables[variable_name], variable["enum_values"] + ) + ) + + url = url.replace("{" + variable_name + "}", used_value) + + return url + + @property + def host(self): + """Return generated host.""" + return self.get_host_from_settings(self.server_index, variables=self.server_variables) + + @host.setter + def host(self, value): + """Fix base path.""" + self._base_path = value + self.server_index = None + + def __repr__(self): + attrs = [ + f"host={self.host}", + "api_key=***", + f"api_key_prefix={self.api_key_prefix}", + f"connection_pool_maxsize={self.connection_pool_maxsize}", + f"discard_unknown_keys={self.discard_unknown_keys}", + f"disabled_client_side_validations={self.disabled_client_side_validations}", + f"server_index={self.server_index}", + f"server_variables={self.server_variables}", + f"server_operation_index={self.server_operation_index}", + f"server_operation_variables={self.server_operation_variables}", + f"ssl_ca_cert={self.ssl_ca_cert}", + ] + return f"Configuration({', '.join(attrs)})" diff --git a/pinecone/control/__init__.py b/pinecone/control/__init__.py deleted file mode 100644 index a26e352a..00000000 --- a/pinecone/control/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .pinecone import Pinecone -from .pinecone_asyncio import PineconeAsyncio - -from .repr_overrides import install_repr_overrides - -install_repr_overrides() diff --git a/pinecone/core/openapi/db_control/__init__.py b/pinecone/core/openapi/db_control/__init__.py index 1a6949bb..31408552 100644 --- a/pinecone/core/openapi/db_control/__init__.py +++ b/pinecone/core/openapi/db_control/__init__.py @@ -17,7 +17,7 @@ from pinecone.openapi_support.api_client import ApiClient # import Configuration -from pinecone.openapi_support.configuration import Configuration +from pinecone.config.openapi_configuration import Configuration # import exceptions from pinecone.openapi_support.exceptions import PineconeException diff --git a/pinecone/core/openapi/db_data/__init__.py b/pinecone/core/openapi/db_data/__init__.py index e8cbbfe1..76701561 100644 --- a/pinecone/core/openapi/db_data/__init__.py +++ b/pinecone/core/openapi/db_data/__init__.py @@ -17,7 +17,7 @@ from pinecone.openapi_support.api_client import ApiClient # import Configuration -from pinecone.openapi_support.configuration import Configuration +from pinecone.config.openapi_configuration import Configuration # import exceptions from pinecone.openapi_support.exceptions import PineconeException diff --git a/pinecone/core/openapi/inference/__init__.py b/pinecone/core/openapi/inference/__init__.py index d878080c..9bf0fcdb 100644 --- a/pinecone/core/openapi/inference/__init__.py +++ b/pinecone/core/openapi/inference/__init__.py @@ -17,7 +17,7 @@ from pinecone.openapi_support.api_client import ApiClient # import Configuration -from pinecone.openapi_support.configuration import Configuration +from pinecone.config.openapi_configuration import Configuration # import exceptions from pinecone.openapi_support.exceptions import PineconeException diff --git a/pinecone/data/__init__.py b/pinecone/data/__init__.py deleted file mode 100644 index 8e040056..00000000 --- a/pinecone/data/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -from .index import ( - Index as _Index, - FetchResponse, - QueryResponse, - DescribeIndexStatsResponse, - UpsertResponse, - SparseValues, - Vector, -) -from .dataclasses import * -from .import_error import ( - Index, - IndexClientInstantiationError, - Inference, - InferenceInstantiationError, -) -from .index_asyncio import * -from .errors import ( - VectorDictionaryMissingKeysError, - VectorDictionaryExcessKeysError, - VectorTupleLengthError, - SparseValuesTypeError, - SparseValuesMissingKeysError, - SparseValuesDictionaryExpectedError, - MetadataDictionaryExpectedError, -) - -from .features.bulk_import import ImportErrorMode -from .features.inference import ( - Inference as _Inference, - AsyncioInference as _AsyncioInference, - RerankModel, - EmbedModel, -) diff --git a/pinecone/db_control/__init__.py b/pinecone/db_control/__init__.py new file mode 100644 index 00000000..73d82468 --- /dev/null +++ b/pinecone/db_control/__init__.py @@ -0,0 +1,7 @@ +from .enums import * +from .models import * +from .db_control import DBControl +from .db_control_asyncio import DBControlAsyncio +from .repr_overrides import install_repr_overrides + +install_repr_overrides() diff --git a/pinecone/control/db_control.py b/pinecone/db_control/db_control.py similarity index 79% rename from pinecone/control/db_control.py rename to pinecone/db_control/db_control.py index ca9f54d5..f03f349d 100644 --- a/pinecone/control/db_control.py +++ b/pinecone/db_control/db_control.py @@ -12,8 +12,8 @@ """ @private """ if TYPE_CHECKING: - from .resources.index import IndexResource - from .resources.collection import CollectionResource + from .resources.sync.index import IndexResource + from .resources.sync.collection import CollectionResource class DBControl: @@ -21,6 +21,12 @@ def __init__(self, config, openapi_config, pool_threads): self.config = config """ @private """ + self.openapi_config = openapi_config + """ @private """ + + self.pool_threads = pool_threads + """ @private """ + self.index_api = setup_openapi_client( api_client_klass=ApiClient, api_klass=ManageIndexesApi, @@ -40,7 +46,7 @@ def __init__(self, config, openapi_config, pool_threads): @property def index(self) -> "IndexResource": if self._index_resource is None: - from .resources.index import IndexResource + from .resources.sync.index import IndexResource self._index_resource = IndexResource(index_api=self.index_api, config=self.config) return self._index_resource @@ -48,7 +54,7 @@ def index(self) -> "IndexResource": @property def collection(self) -> "CollectionResource": if self._collection_resource is None: - from .resources.collection import CollectionResource + from .resources.sync.collection import CollectionResource self._collection_resource = CollectionResource(self.index_api) return self._collection_resource diff --git a/pinecone/control/db_control_asyncio.py b/pinecone/db_control/db_control_asyncio.py similarity index 86% rename from pinecone/control/db_control_asyncio.py rename to pinecone/db_control/db_control_asyncio.py index 3f10df6b..2fce306e 100644 --- a/pinecone/control/db_control_asyncio.py +++ b/pinecone/db_control/db_control_asyncio.py @@ -12,8 +12,8 @@ if TYPE_CHECKING: - from .resources_asyncio.index import IndexResourceAsyncio - from .resources_asyncio.collection import CollectionResourceAsyncio + from .resources.asyncio.index import IndexResourceAsyncio + from .resources.asyncio.collection import CollectionResourceAsyncio class DBControlAsyncio: @@ -39,7 +39,7 @@ def __init__(self, config, openapi_config, pool_threads): @property def index(self) -> "IndexResourceAsyncio": if self._index_resource is None: - from .resources_asyncio.index import IndexResourceAsyncio + from .resources.asyncio.index import IndexResourceAsyncio self._index_resource = IndexResourceAsyncio( index_api=self.index_api, config=self.config @@ -49,7 +49,7 @@ def index(self) -> "IndexResourceAsyncio": @property def collection(self) -> "CollectionResourceAsyncio": if self._collection_resource is None: - from .resources_asyncio.collection import CollectionResourceAsyncio + from .resources.asyncio.collection import CollectionResourceAsyncio self._collection_resource = CollectionResourceAsyncio(self.index_api) return self._collection_resource diff --git a/pinecone/enums/__init__.py b/pinecone/db_control/enums/__init__.py similarity index 100% rename from pinecone/enums/__init__.py rename to pinecone/db_control/enums/__init__.py diff --git a/pinecone/enums/clouds.py b/pinecone/db_control/enums/clouds.py similarity index 100% rename from pinecone/enums/clouds.py rename to pinecone/db_control/enums/clouds.py diff --git a/pinecone/enums/deletion_protection.py b/pinecone/db_control/enums/deletion_protection.py similarity index 100% rename from pinecone/enums/deletion_protection.py rename to pinecone/db_control/enums/deletion_protection.py diff --git a/pinecone/enums/metric.py b/pinecone/db_control/enums/metric.py similarity index 100% rename from pinecone/enums/metric.py rename to pinecone/db_control/enums/metric.py diff --git a/pinecone/enums/pod_index_environment.py b/pinecone/db_control/enums/pod_index_environment.py similarity index 100% rename from pinecone/enums/pod_index_environment.py rename to pinecone/db_control/enums/pod_index_environment.py diff --git a/pinecone/enums/pod_type.py b/pinecone/db_control/enums/pod_type.py similarity index 100% rename from pinecone/enums/pod_type.py rename to pinecone/db_control/enums/pod_type.py diff --git a/pinecone/enums/vector_type.py b/pinecone/db_control/enums/vector_type.py similarity index 100% rename from pinecone/enums/vector_type.py rename to pinecone/db_control/enums/vector_type.py diff --git a/pinecone/control/index_host_store.py b/pinecone/db_control/index_host_store.py similarity index 100% rename from pinecone/control/index_host_store.py rename to pinecone/db_control/index_host_store.py diff --git a/pinecone/models/__init__.py b/pinecone/db_control/models/__init__.py similarity index 90% rename from pinecone/models/__init__.py rename to pinecone/db_control/models/__init__.py index 86306c1e..34003bfe 100644 --- a/pinecone/models/__init__.py +++ b/pinecone/db_control/models/__init__.py @@ -5,7 +5,7 @@ from .index_list import IndexList from .collection_list import CollectionList from .index_model import IndexModel -from .index_embed import IndexEmbed +from ...inference.models.index_embed import IndexEmbed __all__ = [ "CollectionDescription", diff --git a/pinecone/models/collection_description.py b/pinecone/db_control/models/collection_description.py similarity index 100% rename from pinecone/models/collection_description.py rename to pinecone/db_control/models/collection_description.py diff --git a/pinecone/models/collection_list.py b/pinecone/db_control/models/collection_list.py similarity index 100% rename from pinecone/models/collection_list.py rename to pinecone/db_control/models/collection_list.py diff --git a/pinecone/models/index_description.py b/pinecone/db_control/models/index_description.py similarity index 100% rename from pinecone/models/index_description.py rename to pinecone/db_control/models/index_description.py diff --git a/pinecone/models/index_list.py b/pinecone/db_control/models/index_list.py similarity index 100% rename from pinecone/models/index_list.py rename to pinecone/db_control/models/index_list.py diff --git a/pinecone/models/index_model.py b/pinecone/db_control/models/index_model.py similarity index 81% rename from pinecone/models/index_model.py rename to pinecone/db_control/models/index_model.py index 7deb2d7d..75ba1f30 100644 --- a/pinecone/models/index_model.py +++ b/pinecone/db_control/models/index_model.py @@ -1,4 +1,4 @@ -from pinecone.core.openapi.db_control.models import IndexModel as OpenAPIIndexModel +from pinecone.core.openapi.db_control.model.index_model import IndexModel as OpenAPIIndexModel class IndexModel: diff --git a/pinecone/models/list_response.py b/pinecone/db_control/models/list_response.py similarity index 100% rename from pinecone/models/list_response.py rename to pinecone/db_control/models/list_response.py diff --git a/pinecone/models/pod_spec.py b/pinecone/db_control/models/pod_spec.py similarity index 100% rename from pinecone/models/pod_spec.py rename to pinecone/db_control/models/pod_spec.py diff --git a/pinecone/models/serverless_spec.py b/pinecone/db_control/models/serverless_spec.py similarity index 100% rename from pinecone/models/serverless_spec.py rename to pinecone/db_control/models/serverless_spec.py diff --git a/pinecone/control/repr_overrides.py b/pinecone/db_control/repr_overrides.py similarity index 79% rename from pinecone/control/repr_overrides.py rename to pinecone/db_control/repr_overrides.py index 98e4b4d4..714b8dfb 100644 --- a/pinecone/control/repr_overrides.py +++ b/pinecone/db_control/repr_overrides.py @@ -1,6 +1,6 @@ from pinecone.utils import install_json_repr_override -from pinecone.models.index_model import IndexModel -from pinecone.core.openapi.db_control.models import CollectionModel +from pinecone.db_control.models.index_model import IndexModel +from pinecone.core.openapi.db_control.model.collection_model import CollectionModel def install_repr_overrides(): diff --git a/pinecone/control/request_factory.py b/pinecone/db_control/request_factory.py similarity index 98% rename from pinecone/control/request_factory.py rename to pinecone/db_control/request_factory.py index d4d0ce63..2e796745 100644 --- a/pinecone/control/request_factory.py +++ b/pinecone/db_control/request_factory.py @@ -19,10 +19,10 @@ PodSpec as PodSpecModel, PodSpecMetadataConfig, ) -from pinecone.models import ServerlessSpec, PodSpec, IndexModel, IndexEmbed +from pinecone.db_control.models import ServerlessSpec, PodSpec, IndexModel, IndexEmbed from pinecone.utils import parse_non_empty_args -from pinecone.enums import ( +from pinecone.db_control.enums import ( Metric, VectorType, DeletionProtection, diff --git a/pinecone/data/features/__init__.py b/pinecone/db_control/resources/__init__.py similarity index 100% rename from pinecone/data/features/__init__.py rename to pinecone/db_control/resources/__init__.py diff --git a/pinecone/db_control/resources/asyncio/__init__.py b/pinecone/db_control/resources/asyncio/__init__.py new file mode 100644 index 00000000..9a4841d3 --- /dev/null +++ b/pinecone/db_control/resources/asyncio/__init__.py @@ -0,0 +1,2 @@ +from .index import IndexResourceAsyncio +from .collection import CollectionResourceAsyncio diff --git a/pinecone/control/resources_asyncio/collection.py b/pinecone/db_control/resources/asyncio/collection.py similarity index 85% rename from pinecone/control/resources_asyncio/collection.py rename to pinecone/db_control/resources/asyncio/collection.py index 42d430eb..33c1f3d0 100644 --- a/pinecone/control/resources_asyncio/collection.py +++ b/pinecone/db_control/resources/asyncio/collection.py @@ -2,9 +2,9 @@ from typing import TYPE_CHECKING -from pinecone.models import CollectionList +from pinecone.db_control.models import CollectionList -from ..request_factory import PineconeDBControlRequestFactory +from pinecone.db_control.request_factory import PineconeDBControlRequestFactory logger = logging.getLogger(__name__) """ @private """ diff --git a/pinecone/control/resources_asyncio/index.py b/pinecone/db_control/resources/asyncio/index.py similarity index 93% rename from pinecone/control/resources_asyncio/index.py rename to pinecone/db_control/resources/asyncio/index.py index d1bc037f..2d93ae01 100644 --- a/pinecone/control/resources_asyncio/index.py +++ b/pinecone/db_control/resources/asyncio/index.py @@ -3,10 +3,10 @@ from typing import Optional, Dict, Union -from pinecone.models import ServerlessSpec, PodSpec, IndexModel, IndexList, IndexEmbed +from pinecone.db_control.models import ServerlessSpec, PodSpec, IndexModel, IndexList, IndexEmbed from pinecone.utils import docslinks -from pinecone.enums import ( +from pinecone.db_control.enums import ( Metric, VectorType, DeletionProtection, @@ -16,8 +16,10 @@ GcpRegion, AzureRegion, ) -from pinecone.control.types import CreateIndexForModelEmbedTypedDict -from pinecone.control.request_factory import PineconeDBControlRequestFactory +from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict +from pinecone.db_control.request_factory import PineconeDBControlRequestFactory +from pinecone.core.openapi.db_control import API_VERSION + logger = logging.getLogger(__name__) """ @private """ @@ -101,7 +103,7 @@ async def is_ready() -> bool: while not await is_ready(): if timeout < 0: logger.error(f"Index {name} is not ready. Timeout reached.") - link = docslinks["API_DESCRIBE_INDEX"] + link = docslinks["API_DESCRIBE_INDEX"](API_VERSION) timeout_msg = ( f"Please call describe_index() to confirm index status. See docs at {link}" ) diff --git a/pinecone/control/resources/__init__.py b/pinecone/db_control/resources/sync/__init__.py similarity index 100% rename from pinecone/control/resources/__init__.py rename to pinecone/db_control/resources/sync/__init__.py diff --git a/pinecone/control/resources/collection.py b/pinecone/db_control/resources/sync/collection.py similarity index 84% rename from pinecone/control/resources/collection.py rename to pinecone/db_control/resources/sync/collection.py index dba438b1..1d8d11d8 100644 --- a/pinecone/control/resources/collection.py +++ b/pinecone/db_control/resources/sync/collection.py @@ -1,7 +1,7 @@ import logging -from pinecone.models import CollectionList -from ..request_factory import PineconeDBControlRequestFactory +from pinecone.db_control.models import CollectionList +from pinecone.db_control.request_factory import PineconeDBControlRequestFactory logger = logging.getLogger(__name__) """ @private """ diff --git a/pinecone/control/resources/index.py b/pinecone/db_control/resources/sync/index.py similarity index 92% rename from pinecone/control/resources/index.py rename to pinecone/db_control/resources/sync/index.py index df6fd203..85876d6c 100644 --- a/pinecone/control/resources/index.py +++ b/pinecone/db_control/resources/sync/index.py @@ -2,12 +2,12 @@ import logging from typing import Optional, Dict, Union -from ..index_host_store import IndexHostStore +from pinecone.db_control.index_host_store import IndexHostStore -from pinecone.models import ServerlessSpec, PodSpec, IndexModel, IndexList, IndexEmbed +from pinecone.db_control.models import ServerlessSpec, PodSpec, IndexModel, IndexList, IndexEmbed from pinecone.utils import docslinks -from pinecone.enums import ( +from pinecone.db_control.enums import ( Metric, VectorType, DeletionProtection, @@ -17,8 +17,9 @@ GcpRegion, AzureRegion, ) -from ..types import CreateIndexForModelEmbedTypedDict -from ..request_factory import PineconeDBControlRequestFactory +from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict +from pinecone.db_control.request_factory import PineconeDBControlRequestFactory +from pinecone.core.openapi.db_control import API_VERSION logger = logging.getLogger(__name__) """ @private """ @@ -108,7 +109,7 @@ def is_ready() -> bool: while not is_ready(): if timeout < 0: logger.error(f"Index {name} is not ready. Timeout reached.") - link = docslinks["API_DESCRIBE_INDEX"] + link = docslinks["API_DESCRIBE_INDEX"](API_VERSION) timeout_msg = ( f"Please call describe_index() to confirm index status. See docs at {link}" ) diff --git a/pinecone/control/types/__init__.py b/pinecone/db_control/types/__init__.py similarity index 100% rename from pinecone/control/types/__init__.py rename to pinecone/db_control/types/__init__.py diff --git a/pinecone/control/types/create_index_for_model_embed.py b/pinecone/db_control/types/create_index_for_model_embed.py similarity index 72% rename from pinecone/control/types/create_index_for_model_embed.py rename to pinecone/db_control/types/create_index_for_model_embed.py index 123474a0..ab7e43ac 100644 --- a/pinecone/control/types/create_index_for_model_embed.py +++ b/pinecone/db_control/types/create_index_for_model_embed.py @@ -1,6 +1,6 @@ from typing import TypedDict, Dict, Union -from ...enums import Metric -from ...data.features.inference import EmbedModel +from pinecone.db_control.enums import Metric +from pinecone.inference import EmbedModel class CreateIndexForModelEmbedTypedDict(TypedDict): diff --git a/pinecone/db_data/__init__.py b/pinecone/db_data/__init__.py new file mode 100644 index 00000000..7c76f04f --- /dev/null +++ b/pinecone/db_data/__init__.py @@ -0,0 +1,53 @@ +from .index import ( + Index as _Index, + FetchResponse, + QueryResponse, + DescribeIndexStatsResponse, + UpsertResponse, + SparseValues, + Vector, +) +from .dataclasses import * +from .import_error import ( + Index, + IndexClientInstantiationError, + Inference, + InferenceInstantiationError, +) +from .index_asyncio import * +from .errors import ( + VectorDictionaryMissingKeysError, + VectorDictionaryExcessKeysError, + VectorTupleLengthError, + SparseValuesTypeError, + SparseValuesMissingKeysError, + SparseValuesDictionaryExpectedError, + MetadataDictionaryExpectedError, +) + +from .features.bulk_import import ImportErrorMode + + +import warnings + +def _get_deprecated_import(name, from_module, to_module): + warnings.warn( + f"The import of `{name}` from `{from_module}` has moved to `{to_module}`. " + f"Please update your imports from `from {from_module} import {name}` " + f"to `from {to_module} import {name}`. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, + ) + # Import from the new location + from pinecone.inference import Inference as _Inference, AsyncioInference as _AsyncioInference, RerankModel, EmbedModel + return locals()[name] + +moved = ["_Inference", "_AsyncioInference", "RerankModel", "EmbedModel"] + +def __getattr__(name): + if name in locals(): + return locals()[name] + elif name in moved: + return _get_deprecated_import(name, "pinecone.data", "pinecone.inference") + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") + diff --git a/pinecone/data/dataclasses/__init__.py b/pinecone/db_data/dataclasses/__init__.py similarity index 100% rename from pinecone/data/dataclasses/__init__.py rename to pinecone/db_data/dataclasses/__init__.py diff --git a/pinecone/data/dataclasses/fetch_response.py b/pinecone/db_data/dataclasses/fetch_response.py similarity index 100% rename from pinecone/data/dataclasses/fetch_response.py rename to pinecone/db_data/dataclasses/fetch_response.py diff --git a/pinecone/data/dataclasses/search_query.py b/pinecone/db_data/dataclasses/search_query.py similarity index 100% rename from pinecone/data/dataclasses/search_query.py rename to pinecone/db_data/dataclasses/search_query.py diff --git a/pinecone/data/dataclasses/search_query_vector.py b/pinecone/db_data/dataclasses/search_query_vector.py similarity index 100% rename from pinecone/data/dataclasses/search_query_vector.py rename to pinecone/db_data/dataclasses/search_query_vector.py diff --git a/pinecone/data/dataclasses/search_rerank.py b/pinecone/db_data/dataclasses/search_rerank.py similarity index 100% rename from pinecone/data/dataclasses/search_rerank.py rename to pinecone/db_data/dataclasses/search_rerank.py diff --git a/pinecone/data/dataclasses/sparse_values.py b/pinecone/db_data/dataclasses/sparse_values.py similarity index 100% rename from pinecone/data/dataclasses/sparse_values.py rename to pinecone/db_data/dataclasses/sparse_values.py diff --git a/pinecone/data/dataclasses/utils.py b/pinecone/db_data/dataclasses/utils.py similarity index 100% rename from pinecone/data/dataclasses/utils.py rename to pinecone/db_data/dataclasses/utils.py diff --git a/pinecone/data/dataclasses/vector.py b/pinecone/db_data/dataclasses/vector.py similarity index 100% rename from pinecone/data/dataclasses/vector.py rename to pinecone/db_data/dataclasses/vector.py diff --git a/pinecone/data/errors.py b/pinecone/db_data/errors.py similarity index 100% rename from pinecone/data/errors.py rename to pinecone/db_data/errors.py diff --git a/pinecone/db_data/features/__init__.py b/pinecone/db_data/features/__init__.py new file mode 100644 index 00000000..b8f2fddb --- /dev/null +++ b/pinecone/db_data/features/__init__.py @@ -0,0 +1,13 @@ +import warnings + +# Display a warning for old imports +warnings.warn( + "The module at `pinecone.data.features.inference` has moved to `pinecone.inference`. " + "Please update your imports from `from pinecone.data.features.inference import Inference, AsyncioInference, RerankModel, EmbedModel` " + "to `from pinecone.inference import Inference, AsyncioInference, RerankModel, EmbedModel`. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) + +# Import from the new location to maintain backward compatibility +from pinecone.inference import Inference, AsyncioInference, RerankModel, EmbedModel diff --git a/pinecone/data/features/bulk_import/__init__.py b/pinecone/db_data/features/bulk_import/__init__.py similarity index 100% rename from pinecone/data/features/bulk_import/__init__.py rename to pinecone/db_data/features/bulk_import/__init__.py diff --git a/pinecone/data/features/bulk_import/bulk_import.py b/pinecone/db_data/features/bulk_import/bulk_import.py similarity index 100% rename from pinecone/data/features/bulk_import/bulk_import.py rename to pinecone/db_data/features/bulk_import/bulk_import.py diff --git a/pinecone/data/features/bulk_import/bulk_import_asyncio.py b/pinecone/db_data/features/bulk_import/bulk_import_asyncio.py similarity index 100% rename from pinecone/data/features/bulk_import/bulk_import_asyncio.py rename to pinecone/db_data/features/bulk_import/bulk_import_asyncio.py diff --git a/pinecone/data/features/bulk_import/bulk_import_request_factory.py b/pinecone/db_data/features/bulk_import/bulk_import_request_factory.py similarity index 100% rename from pinecone/data/features/bulk_import/bulk_import_request_factory.py rename to pinecone/db_data/features/bulk_import/bulk_import_request_factory.py diff --git a/pinecone/db_data/features/inference/__init__.py b/pinecone/db_data/features/inference/__init__.py new file mode 100644 index 00000000..897b4f4f --- /dev/null +++ b/pinecone/db_data/features/inference/__init__.py @@ -0,0 +1,11 @@ +import warnings + +warnings.warn( + "The module at `pinecone.data.features.inference` has moved to `pinecone.inference`. " + "Please update your imports from `from pinecone.data.features.inference import Inference, AsyncioInference, RerankModel, EmbedModel` " + "to `from pinecone.inference import Inference, AsyncioInference, RerankModel, EmbedModel`. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) + +from pinecone.inference import * diff --git a/pinecone/data/import_error.py b/pinecone/db_data/import_error.py similarity index 100% rename from pinecone/data/import_error.py rename to pinecone/db_data/import_error.py diff --git a/pinecone/data/index.py b/pinecone/db_data/index.py similarity index 100% rename from pinecone/data/index.py rename to pinecone/db_data/index.py diff --git a/pinecone/data/index_asyncio.py b/pinecone/db_data/index_asyncio.py similarity index 100% rename from pinecone/data/index_asyncio.py rename to pinecone/db_data/index_asyncio.py diff --git a/pinecone/data/index_asyncio_interface.py b/pinecone/db_data/index_asyncio_interface.py similarity index 100% rename from pinecone/data/index_asyncio_interface.py rename to pinecone/db_data/index_asyncio_interface.py diff --git a/pinecone/data/interfaces.py b/pinecone/db_data/interfaces.py similarity index 100% rename from pinecone/data/interfaces.py rename to pinecone/db_data/interfaces.py diff --git a/pinecone/data/query_results_aggregator.py b/pinecone/db_data/query_results_aggregator.py similarity index 100% rename from pinecone/data/query_results_aggregator.py rename to pinecone/db_data/query_results_aggregator.py diff --git a/pinecone/data/request_factory.py b/pinecone/db_data/request_factory.py similarity index 100% rename from pinecone/data/request_factory.py rename to pinecone/db_data/request_factory.py diff --git a/pinecone/data/sparse_values_factory.py b/pinecone/db_data/sparse_values_factory.py similarity index 100% rename from pinecone/data/sparse_values_factory.py rename to pinecone/db_data/sparse_values_factory.py diff --git a/pinecone/data/types/__init__.py b/pinecone/db_data/types/__init__.py similarity index 100% rename from pinecone/data/types/__init__.py rename to pinecone/db_data/types/__init__.py diff --git a/pinecone/data/types/query_filter.py b/pinecone/db_data/types/query_filter.py similarity index 100% rename from pinecone/data/types/query_filter.py rename to pinecone/db_data/types/query_filter.py diff --git a/pinecone/data/types/search_query_typed_dict.py b/pinecone/db_data/types/search_query_typed_dict.py similarity index 100% rename from pinecone/data/types/search_query_typed_dict.py rename to pinecone/db_data/types/search_query_typed_dict.py diff --git a/pinecone/data/types/search_query_vector_typed_dict.py b/pinecone/db_data/types/search_query_vector_typed_dict.py similarity index 100% rename from pinecone/data/types/search_query_vector_typed_dict.py rename to pinecone/db_data/types/search_query_vector_typed_dict.py diff --git a/pinecone/data/types/search_rerank_typed_dict.py b/pinecone/db_data/types/search_rerank_typed_dict.py similarity index 100% rename from pinecone/data/types/search_rerank_typed_dict.py rename to pinecone/db_data/types/search_rerank_typed_dict.py diff --git a/pinecone/data/types/sparse_vector_typed_dict.py b/pinecone/db_data/types/sparse_vector_typed_dict.py similarity index 100% rename from pinecone/data/types/sparse_vector_typed_dict.py rename to pinecone/db_data/types/sparse_vector_typed_dict.py diff --git a/pinecone/data/types/vector_metadata_dict.py b/pinecone/db_data/types/vector_metadata_dict.py similarity index 100% rename from pinecone/data/types/vector_metadata_dict.py rename to pinecone/db_data/types/vector_metadata_dict.py diff --git a/pinecone/data/types/vector_tuple.py b/pinecone/db_data/types/vector_tuple.py similarity index 100% rename from pinecone/data/types/vector_tuple.py rename to pinecone/db_data/types/vector_tuple.py diff --git a/pinecone/data/types/vector_typed_dict.py b/pinecone/db_data/types/vector_typed_dict.py similarity index 100% rename from pinecone/data/types/vector_typed_dict.py rename to pinecone/db_data/types/vector_typed_dict.py diff --git a/pinecone/data/vector_factory.py b/pinecone/db_data/vector_factory.py similarity index 100% rename from pinecone/data/vector_factory.py rename to pinecone/db_data/vector_factory.py diff --git a/pinecone/exceptions/__init__.py b/pinecone/exceptions/__init__.py index 92b05fd7..f437e90b 100644 --- a/pinecone/exceptions/__init__.py +++ b/pinecone/exceptions/__init__.py @@ -1,4 +1,7 @@ -from pinecone.openapi_support.exceptions import ( +from .exceptions import ( + PineconeConfigurationError, + PineconeProtocolError, + ListConversionException, PineconeException, PineconeApiAttributeError, PineconeApiTypeError, @@ -10,7 +13,6 @@ ForbiddenException, ServiceException, ) -from .exceptions import PineconeConfigurationError, PineconeProtocolError, ListConversionException __all__ = [ "PineconeConfigurationError", diff --git a/pinecone/exceptions/exceptions.py b/pinecone/exceptions/exceptions.py index 3860dc8b..32eed99f 100644 --- a/pinecone/exceptions/exceptions.py +++ b/pinecone/exceptions/exceptions.py @@ -1,4 +1,143 @@ -from pinecone.openapi_support.exceptions import PineconeException +class PineconeException(Exception): + """The base exception class for all exceptions in the Pinecone Python SDK""" + + +class PineconeApiTypeError(PineconeException, TypeError): + def __init__(self, msg, path_to_item=None, valid_classes=None, key_type=None) -> None: + """Raises an exception for TypeErrors + + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (list): a list of keys an indices to get to the + current_item + None if unset + valid_classes (tuple): the primitive classes that current item + should be an instance of + None if unset + key_type (bool): False if our value is a value in a dict + True if it is a key in a dict + False if our item is an item in a list + None if unset + """ + self.path_to_item = path_to_item + self.valid_classes = valid_classes + self.key_type = key_type + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(PineconeApiTypeError, self).__init__(full_msg) + + +class PineconeApiValueError(PineconeException, ValueError): + def __init__(self, msg, path_to_item=None) -> None: + """ + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (list) the path to the exception in the + received_data dict. None if unset + """ + + self.path_to_item = path_to_item + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(PineconeApiValueError, self).__init__(full_msg) + + +class PineconeApiAttributeError(PineconeException, AttributeError): + def __init__(self, msg, path_to_item=None) -> None: + """ + Raised when an attribute reference or assignment fails. + + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (None/list) the path to the exception in the + received_data dict + """ + self.path_to_item = path_to_item + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(PineconeApiAttributeError, self).__init__(full_msg) + + +class PineconeApiKeyError(PineconeException, KeyError): + def __init__(self, msg, path_to_item=None) -> None: + """ + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (None/list) the path to the exception in the + received_data dict + """ + self.path_to_item = path_to_item + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(PineconeApiKeyError, self).__init__(full_msg) + + +class PineconeApiException(PineconeException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + if http_resp: + self.status = http_resp.status + self.reason = http_resp.reason + self.body = http_resp.data + self.headers = http_resp.getheaders() + else: + self.status = status + self.reason = reason + self.body = None + self.headers = None + + def __str__(self): + """Custom error messages for exception""" + error_message = "({0})\nReason: {1}\n".format(self.status, self.reason) + if self.headers: + error_message += "HTTP response headers: {0}\n".format(self.headers) + + if self.body: + error_message += "HTTP response body: {0}\n".format(self.body) + + return error_message + + +class NotFoundException(PineconeApiException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + super(NotFoundException, self).__init__(status, reason, http_resp) + + +class UnauthorizedException(PineconeApiException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + super(UnauthorizedException, self).__init__(status, reason, http_resp) + + +class ForbiddenException(PineconeApiException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + super(ForbiddenException, self).__init__(status, reason, http_resp) + + +class ServiceException(PineconeApiException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + super(ServiceException, self).__init__(status, reason, http_resp) + + +def render_path(path_to_item): + """Returns a string representation of a path""" + result = "" + for pth in path_to_item: + if isinstance(pth, int): + result += "[{0}]".format(pth) + else: + result += "['{0}']".format(pth) + return result class PineconeProtocolError(PineconeException): diff --git a/pinecone/grpc/__init__.py b/pinecone/grpc/__init__.py index 350047ca..66adb916 100644 --- a/pinecone/grpc/__init__.py +++ b/pinecone/grpc/__init__.py @@ -49,7 +49,7 @@ from .config import GRPCClientConfig from .future import PineconeGrpcFuture -from pinecone.data.dataclasses import Vector, SparseValues +from pinecone.db_data.dataclasses import Vector, SparseValues from pinecone.core.grpc.protos.db_data_2025_01_pb2 import ( Vector as GRPCVector, diff --git a/pinecone/grpc/index_grpc.py b/pinecone/grpc/index_grpc.py index 9b68c0b6..bfaf8fff 100644 --- a/pinecone/grpc/index_grpc.py +++ b/pinecone/grpc/index_grpc.py @@ -21,7 +21,7 @@ QueryResponse, IndexDescription as DescribeIndexStatsResponse, ) -from pinecone.models.list_response import ListResponse as SimpleListResponse, Pagination +from pinecone.db_control.models.list_response import ListResponse as SimpleListResponse, Pagination from pinecone.core.grpc.protos.db_data_2025_01_pb2 import ( Vector as GRPCVector, QueryVector as GRPCQueryVector, @@ -38,11 +38,11 @@ SparseValues as GRPCSparseValues, ) from pinecone import Vector, SparseValues -from pinecone.data.query_results_aggregator import QueryNamespacesResults, QueryResultsAggregator +from pinecone.db_data.query_results_aggregator import QueryNamespacesResults, QueryResultsAggregator from pinecone.core.grpc.protos.db_data_2025_01_pb2_grpc import VectorServiceStub from .base import GRPCIndexBase from .future import PineconeGrpcFuture -from ..data.types import ( +from ..db_data.types import ( SparseVectorTypedDict, VectorTypedDict, VectorTuple, diff --git a/pinecone/grpc/pinecone.py b/pinecone/grpc/pinecone.py index b03b139a..28a13622 100644 --- a/pinecone/grpc/pinecone.py +++ b/pinecone/grpc/pinecone.py @@ -1,5 +1,5 @@ -from ..control.pinecone import Pinecone -from ..config.config import ConfigBuilder +from pinecone import Pinecone +from pinecone.config import ConfigBuilder from .index_grpc import GRPCIndex diff --git a/pinecone/grpc/sparse_values_factory.py b/pinecone/grpc/sparse_values_factory.py index 240cd8e1..5bb14685 100644 --- a/pinecone/grpc/sparse_values_factory.py +++ b/pinecone/grpc/sparse_values_factory.py @@ -3,8 +3,8 @@ from ..utils import convert_to_list -from ..data import SparseValuesTypeError, SparseValuesMissingKeysError -from ..data.types import SparseVectorTypedDict +from ..db_data import SparseValuesTypeError, SparseValuesMissingKeysError +from ..db_data.types import SparseVectorTypedDict from pinecone.core.grpc.protos.db_data_2025_01_pb2 import SparseValues as GRPCSparseValues from pinecone.core.openapi.db_data.models import SparseValues as OpenApiSparseValues diff --git a/pinecone/grpc/utils.py b/pinecone/grpc/utils.py index dcd19710..c2869e73 100644 --- a/pinecone/grpc/utils.py +++ b/pinecone/grpc/utils.py @@ -13,7 +13,7 @@ IndexDescription as DescribeIndexStatsResponse, NamespaceSummary, ) -from pinecone.data.dataclasses import FetchResponse +from pinecone.db_data.dataclasses import FetchResponse from google.protobuf.struct_pb2 import Struct diff --git a/pinecone/grpc/vector_factory_grpc.py b/pinecone/grpc/vector_factory_grpc.py index 1fe9572b..22efd269 100644 --- a/pinecone/grpc/vector_factory_grpc.py +++ b/pinecone/grpc/vector_factory_grpc.py @@ -8,13 +8,13 @@ from .utils import dict_to_proto_struct from ..utils import fix_tuple_length, convert_to_list from ..utils.constants import REQUIRED_VECTOR_FIELDS, OPTIONAL_VECTOR_FIELDS -from ..data import ( +from ..db_data import ( VectorDictionaryMissingKeysError, VectorDictionaryExcessKeysError, VectorTupleLengthError, MetadataDictionaryExpectedError, ) -from ..data.types import VectorTuple, VectorTypedDict +from ..db_data.types import VectorTuple, VectorTypedDict from .sparse_values_factory import SparseValuesFactory from pinecone.core.grpc.protos.db_data_2025_01_pb2 import ( diff --git a/pinecone/data/features/inference/__init__.py b/pinecone/inference/__init__.py similarity index 100% rename from pinecone/data/features/inference/__init__.py rename to pinecone/inference/__init__.py diff --git a/pinecone/data/features/inference/inference.py b/pinecone/inference/inference.py similarity index 100% rename from pinecone/data/features/inference/inference.py rename to pinecone/inference/inference.py diff --git a/pinecone/data/features/inference/inference_asyncio.py b/pinecone/inference/inference_asyncio.py similarity index 100% rename from pinecone/data/features/inference/inference_asyncio.py rename to pinecone/inference/inference_asyncio.py diff --git a/pinecone/data/features/inference/inference_request_builder.py b/pinecone/inference/inference_request_builder.py similarity index 100% rename from pinecone/data/features/inference/inference_request_builder.py rename to pinecone/inference/inference_request_builder.py diff --git a/pinecone/data/features/inference/models/__init__.py b/pinecone/inference/models/__init__.py similarity index 100% rename from pinecone/data/features/inference/models/__init__.py rename to pinecone/inference/models/__init__.py diff --git a/pinecone/data/features/inference/models/embedding_list.py b/pinecone/inference/models/embedding_list.py similarity index 100% rename from pinecone/data/features/inference/models/embedding_list.py rename to pinecone/inference/models/embedding_list.py diff --git a/pinecone/models/index_embed.py b/pinecone/inference/models/index_embed.py similarity index 94% rename from pinecone/models/index_embed.py rename to pinecone/inference/models/index_embed.py index 4d1ccfe3..4c3306d0 100644 --- a/pinecone/models/index_embed.py +++ b/pinecone/inference/models/index_embed.py @@ -1,8 +1,8 @@ from dataclasses import dataclass from typing import Optional, Dict, Any, Union -from ..enums import Metric -from ..data.features.inference import EmbedModel +from pinecone.db_control.enums import Metric +from pinecone.inference.inference_request_builder import EmbedModel @dataclass(frozen=True) diff --git a/pinecone/data/features/inference/models/rerank_result.py b/pinecone/inference/models/rerank_result.py similarity index 100% rename from pinecone/data/features/inference/models/rerank_result.py rename to pinecone/inference/models/rerank_result.py diff --git a/pinecone/data/features/inference/repl_overrides.py b/pinecone/inference/repl_overrides.py similarity index 100% rename from pinecone/data/features/inference/repl_overrides.py rename to pinecone/inference/repl_overrides.py diff --git a/pinecone/control/langchain_import_warnings.py b/pinecone/langchain_import_warnings.py similarity index 100% rename from pinecone/control/langchain_import_warnings.py rename to pinecone/langchain_import_warnings.py diff --git a/pinecone/control/legacy_pinecone_interface.py b/pinecone/legacy_pinecone_interface.py similarity index 99% rename from pinecone/control/legacy_pinecone_interface.py rename to pinecone/legacy_pinecone_interface.py index a6a7779a..0b097261 100644 --- a/pinecone/control/legacy_pinecone_interface.py +++ b/pinecone/legacy_pinecone_interface.py @@ -3,7 +3,7 @@ from typing import Optional, Dict, Union, TYPE_CHECKING if TYPE_CHECKING: - from pinecone.models import ( + from pinecone.db_control.models import ( ServerlessSpec, PodSpec, IndexList, @@ -11,7 +11,7 @@ IndexModel, IndexEmbed, ) - from pinecone.enums import ( + from pinecone.db_control.enums import ( Metric, VectorType, DeletionProtection, @@ -21,7 +21,7 @@ GcpRegion, AzureRegion, ) - from .types import CreateIndexForModelEmbedTypedDict + from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict class LegacyPineconeDBControlInterface(ABC): diff --git a/pinecone/openapi_support/__init__.py b/pinecone/openapi_support/__init__.py index 63e3fb0a..890c3007 100644 --- a/pinecone/openapi_support/__init__.py +++ b/pinecone/openapi_support/__init__.py @@ -8,7 +8,7 @@ from .endpoint_utils import ExtraOpenApiKwargsTypedDict, KwargsWithOpenApiKwargDefaultsTypedDict from .asyncio_api_client import AsyncioApiClient from .asyncio_endpoint import AsyncioEndpoint -from .configuration import Configuration +from .configuration_lazy import Configuration from .exceptions import ( PineconeException, PineconeApiAttributeError, diff --git a/pinecone/openapi_support/api_client.py b/pinecone/openapi_support/api_client.py index 421d56cc..9e1f1f7e 100644 --- a/pinecone/openapi_support/api_client.py +++ b/pinecone/openapi_support/api_client.py @@ -1,14 +1,14 @@ import atexit -from multiprocessing.pool import ThreadPool -from concurrent.futures import ThreadPoolExecutor import io -from typing import Optional, List, Tuple, Dict, Any, Union -from .deserializer import Deserializer +from typing import Optional, List, Tuple, Dict, Any, Union, TYPE_CHECKING +if TYPE_CHECKING: + from multiprocessing.pool import ThreadPool + from concurrent.futures import ThreadPoolExecutor from .rest_urllib3 import Urllib3RestClient -from .configuration import Configuration +from ..config.openapi_configuration import Configuration from .exceptions import PineconeApiValueError, PineconeApiException from .api_client_utils import ( parameters_to_tuples, @@ -30,8 +30,8 @@ class ApiClient(object): to the API. More threads means more concurrent API requests. """ - _pool: Optional[ThreadPool] = None - _threadpool_executor: Optional[ThreadPoolExecutor] = None + _pool: Optional["ThreadPool"] = None + _threadpool_executor: Optional["ThreadPoolExecutor"] = None def __init__( self, configuration: Optional[Configuration] = None, pool_threads: Optional[int] = 1 @@ -64,18 +64,20 @@ def close(self): atexit.unregister(self.close) @property - def pool(self): + def pool(self) -> "ThreadPool": """Create thread pool on first request avoids instantiating unused threadpool for blocking clients. """ if self._pool is None: + from multiprocessing.pool import ThreadPool atexit.register(self.close) self._pool = ThreadPool(self.pool_threads) return self._pool @property - def threadpool_executor(self): + def threadpool_executor(self) -> "ThreadPoolExecutor": if self._threadpool_executor is None: + from concurrent.futures import ThreadPoolExecutor self._threadpool_executor = ThreadPoolExecutor(max_workers=self.pool_threads) return self._threadpool_executor @@ -186,6 +188,7 @@ def __call_api( # deserialize response data if response_type: + from .deserializer import Deserializer Deserializer.decode_response(response_type=response_type, response=response_data) return_data = Deserializer.deserialize( response=response_data, diff --git a/pinecone/openapi_support/asyncio_api_client.py b/pinecone/openapi_support/asyncio_api_client.py index 51f2e0ce..43c8e17b 100644 --- a/pinecone/openapi_support/asyncio_api_client.py +++ b/pinecone/openapi_support/asyncio_api_client.py @@ -7,7 +7,7 @@ from .rest_aiohttp import AiohttpRestClient -from .configuration import Configuration +from ..config.openapi_configuration import Configuration from .exceptions import PineconeApiValueError, PineconeApiException from .api_client_utils import ( parameters_to_tuples, diff --git a/pinecone/openapi_support/configuration.py b/pinecone/openapi_support/configuration.py index fb6d7d19..cd31b0b9 100644 --- a/pinecone/openapi_support/configuration.py +++ b/pinecone/openapi_support/configuration.py @@ -1,441 +1 @@ -import copy -import logging -import multiprocessing - -from http import client as http_client -from .exceptions import PineconeApiValueError -from typing import TypedDict - - -class HostSetting(TypedDict): - url: str - description: str - - -JSON_SCHEMA_VALIDATION_KEYWORDS = { - "multipleOf", - "maximum", - "exclusiveMaximum", - "minimum", - "exclusiveMinimum", - "maxLength", - "minLength", - "pattern", - "maxItems", - "minItems", -} - - -class Configuration: - """Class to hold the configuration of the API client. - - :param host: Base url - :param api_key: Dict to store API key(s). - Each entry in the dict specifies an API key. - The dict key is the name of the security scheme in the OAS specification. - The dict value is the API key secret. - :param api_key_prefix: Dict to store API prefix (e.g. Bearer) - The dict key is the name of the security scheme in the OAS specification. - The dict value is an API key prefix when generating the auth data. - :param discard_unknown_keys: Boolean value indicating whether to discard - unknown properties. A server may send a response that includes additional - properties that are not known by the client in the following scenarios: - 1. The OpenAPI document is incomplete, i.e. it does not match the server - implementation. - 2. The client was generated using an older version of the OpenAPI document - and the server has been upgraded since then. - If a schema in the OpenAPI document defines the additionalProperties attribute, - then all undeclared properties received by the server are injected into the - additional properties map. In that case, there are undeclared properties, and - nothing to discard. - :param disabled_client_side_validations (string): Comma-separated list of - JSON schema validation keywords to disable JSON schema structural validation - rules. The following keywords may be specified: multipleOf, maximum, - exclusiveMaximum, minimum, exclusiveMinimum, maxLength, minLength, pattern, - maxItems, minItems. - By default, the validation is performed for data generated locally by the client - and data received from the server, independent of any validation performed by - the server side. If the input data does not satisfy the JSON schema validation - rules specified in the OpenAPI document, an exception is raised. - If disabled_client_side_validations is set, structural validation is - disabled. This can be useful to troubleshoot data validation problem, such as - when the OpenAPI document validation rules do not match the actual API data - received by the server. - :param server_operation_index: Mapping from operation ID to an index to server - configuration. - :param server_operation_variables: Mapping from operation ID to a mapping with - string values to replace variables in templated server configuration. - The validation of enums is performed for variables with defined enum values before. - :param ssl_ca_cert: str - the path to a file of concatenated CA certificates - in PEM format - - :Example: - - API Key Authentication Example. - Given the following security scheme in the OpenAPI specification: - components: - securitySchemes: - cookieAuth: # name for the security scheme - type: apiKey - in: cookie - name: JSESSIONID # cookie name - - You can programmatically set the cookie: - - conf = pinecone.openapi_support.Configuration( - api_key={'cookieAuth': 'abc123'} - api_key_prefix={'cookieAuth': 'JSESSIONID'} - ) - - The following cookie will be added to the HTTP request: - Cookie: JSESSIONID abc123 - """ - - _default = None - - def __init__( - self, - host=None, - api_key=None, - api_key_prefix=None, - discard_unknown_keys=False, - disabled_client_side_validations="", - server_index=None, - server_variables=None, - server_operation_index=None, - server_operation_variables=None, - ssl_ca_cert=None, - ): - """Constructor""" - self._base_path = "https://api.pinecone.io" if host is None else host - """Default Base url - """ - self.server_index = 0 if server_index is None and host is None else server_index - self.server_operation_index = server_operation_index or {} - """Default server index - """ - self.server_variables = server_variables or {} - self.server_operation_variables = server_operation_variables or {} - """Default server variables - """ - self.temp_folder_path = None - """Temp file folder for downloading files - """ - # Authentication Settings - self.api_key = {} - if api_key: - self.api_key = api_key - """dict to store API key(s) - """ - self.api_key_prefix = {} - if api_key_prefix: - self.api_key_prefix = api_key_prefix - """dict to store API prefix (e.g. Bearer) - """ - self.refresh_api_key_hook = None - """function hook to refresh API key if expired - """ - self.discard_unknown_keys = discard_unknown_keys - self.disabled_client_side_validations = disabled_client_side_validations - self.logger = {} - """Logging Settings - """ - self.logger["package_logger"] = logging.getLogger("pinecone.openapi_support") - self.logger["urllib3_logger"] = logging.getLogger("urllib3") - self.logger_format = "%(asctime)s %(levelname)s %(message)s" - """Log format - """ - self.logger_stream_handler = None - """Log stream handler - """ - self.logger_file_handler = None - """Log file handler - """ - self.logger_file = None - """Debug file location - """ - self.debug = False - """Debug switch - """ - - self.verify_ssl = True - """SSL/TLS verification - Set this to false to skip verifying SSL certificate when calling API - from https server. - """ - self.ssl_ca_cert = ssl_ca_cert - """Set this to customize the certificate file to verify the peer. - """ - self.cert_file = None - """client certificate file - """ - self.key_file = None - """client key file - """ - self.assert_hostname = None - """Set this to True/False to enable/disable SSL hostname verification. - """ - - self.connection_pool_maxsize = multiprocessing.cpu_count() * 5 - """urllib3 connection pool's maximum number of connections saved - per pool. urllib3 uses 1 connection as default value, but this is - not the best value when you are making a lot of possibly parallel - requests to the same host, which is often the case here. - cpu_count * 5 is used as default value to increase performance. - """ - - self.proxy = None - """Proxy URL - """ - self.proxy_headers = None - """Proxy headers - """ - self.safe_chars_for_path_param = "" - """Safe chars for path_param - """ - self.retries = None - """Adding retries to override urllib3 default value 3 - """ - # Enable client side validation - self.client_side_validation = True - - # Options to pass down to the underlying urllib3 socket - self.socket_options = None - - def __deepcopy__(self, memo): - cls = self.__class__ - result = cls.__new__(cls) - memo[id(self)] = result - for k, v in self.__dict__.items(): - if k not in ("logger", "logger_file_handler"): - setattr(result, k, copy.deepcopy(v, memo)) - # shallow copy of loggers - result.logger = copy.copy(self.logger) - # use setters to configure loggers - result.logger_file = self.logger_file - result.debug = self.debug - return result - - def __setattr__(self, name, value): - object.__setattr__(self, name, value) - if name == "disabled_client_side_validations": - s = set(filter(None, value.split(","))) - for v in s: - if v not in JSON_SCHEMA_VALIDATION_KEYWORDS: - raise PineconeApiValueError("Invalid keyword: '{0}''".format(v)) - self._disabled_client_side_validations = s - - @classmethod - def set_default(cls, default): - """Set default instance of configuration. - - It stores default configuration, which can be - returned by get_default_copy method. - - :param default: object of Configuration - """ - cls._default = copy.deepcopy(default) - - @classmethod - def get_default_copy(cls): - """Return new instance of configuration. - - This method returns newly created, based on default constructor, - object of Configuration class or returns a copy of default - configuration passed by the set_default method. - - :return: The configuration object. - """ - if cls._default is not None: - return copy.deepcopy(cls._default) - return Configuration() - - @property - def logger_file(self): - """The logger file. - - If the logger_file is None, then add stream handler and remove file - handler. Otherwise, add file handler and remove stream handler. - - :param value: The logger_file path. - :type: str - """ - return self.__logger_file - - @logger_file.setter - def logger_file(self, value): - """The logger file. - - If the logger_file is None, then add stream handler and remove file - handler. Otherwise, add file handler and remove stream handler. - - :param value: The logger_file path. - :type: str - """ - self.__logger_file = value - if self.__logger_file: - # If set logging file, - # then add file handler and remove stream handler. - self.logger_file_handler = logging.FileHandler(self.__logger_file) - self.logger_file_handler.setFormatter(self.logger_formatter) - for _, logger in self.logger.items(): - logger.addHandler(self.logger_file_handler) - - @property - def debug(self): - """Debug status - - :param value: The debug status, True or False. - :type: bool - """ - return self.__debug - - @debug.setter - def debug(self, value): - """Debug status - - :param value: The debug status, True or False. - :type: bool - """ - self.__debug = value - if self.__debug: - # if debug status is True, turn on debug logging - for _, logger in self.logger.items(): - logger.setLevel(logging.DEBUG) - # turn on http_client debug - http_client.HTTPConnection.debuglevel = 1 - else: - # if debug status is False, turn off debug logging, - # setting log level to default `logging.WARNING` - for _, logger in self.logger.items(): - logger.setLevel(logging.WARNING) - # turn off http_client debug - http_client.HTTPConnection.debuglevel = 0 - - @property - def logger_format(self): - """The logger format. - - The logger_formatter will be updated when sets logger_format. - - :param value: The format string. - :type: str - """ - return self.__logger_format - - @logger_format.setter - def logger_format(self, value): - """The logger format. - - The logger_formatter will be updated when sets logger_format. - - :param value: The format string. - :type: str - """ - self.__logger_format = value - self.logger_formatter = logging.Formatter(self.__logger_format) - - def get_api_key_with_prefix(self, identifier, alias=None): - """Gets API key (with prefix if set). - - :param identifier: The identifier of apiKey. - :param alias: The alternative identifier of apiKey. - :return: The token for api key authentication. - """ - if self.refresh_api_key_hook is not None: - self.refresh_api_key_hook(self) - key = self.api_key.get(identifier, self.api_key.get(alias) if alias is not None else None) - if key: - prefix = self.api_key_prefix.get(identifier) - if prefix: - return "%s %s" % (prefix, key) - else: - return key - - def auth_settings(self): - """Gets Auth Settings dict for api client. - - :return: The Auth Settings information dict. - """ - auth = {} - if "ApiKeyAuth" in self.api_key: - auth["ApiKeyAuth"] = { - "type": "api_key", - "in": "header", - "key": "Api-Key", - "value": self.get_api_key_with_prefix("ApiKeyAuth"), - } - return auth - - def get_host_settings(self): - """Gets an array of host settings - - :return: An array of host settings - """ - return [{"url": "https://api.pinecone.io", "description": "Production API endpoints"}] - - def get_host_from_settings(self, index, variables=None, servers=None): - """Gets host URL based on the index and variables - :param index: array index of the host settings - :param variables: hash of variable and the corresponding value - :param servers: an array of host settings or None - :return: URL based on host settings - """ - if index is None: - return self._base_path - - variables = {} if variables is None else variables - servers = self.get_host_settings() if servers is None else servers - - try: - server = servers[index] - except IndexError: - raise ValueError( - "Invalid index {0} when selecting the host settings. Must be less than {1}".format( - index, len(servers) - ) - ) - - url = server["url"] - - # go through variables and replace placeholders - for variable_name, variable in server.get("variables", {}).items(): - used_value = variables.get(variable_name, variable["default_value"]) - - if "enum_values" in variable and used_value not in variable["enum_values"]: - raise ValueError( - "The variable `{0}` in the host URL has invalid value {1}. Must be {2}.".format( - variable_name, variables[variable_name], variable["enum_values"] - ) - ) - - url = url.replace("{" + variable_name + "}", used_value) - - return url - - @property - def host(self): - """Return generated host.""" - return self.get_host_from_settings(self.server_index, variables=self.server_variables) - - @host.setter - def host(self, value): - """Fix base path.""" - self._base_path = value - self.server_index = None - - def __repr__(self): - attrs = [ - f"host={self.host}", - "api_key=***", - f"api_key_prefix={self.api_key_prefix}", - f"connection_pool_maxsize={self.connection_pool_maxsize}", - f"discard_unknown_keys={self.discard_unknown_keys}", - f"disabled_client_side_validations={self.disabled_client_side_validations}", - f"server_index={self.server_index}", - f"server_variables={self.server_variables}", - f"server_operation_index={self.server_operation_index}", - f"server_operation_variables={self.server_operation_variables}", - f"ssl_ca_cert={self.ssl_ca_cert}", - ] - return f"Configuration({', '.join(attrs)})" +from pinecone.config import OpenApiConfiguration as Configuration \ No newline at end of file diff --git a/pinecone/openapi_support/configuration_lazy.py b/pinecone/openapi_support/configuration_lazy.py new file mode 100644 index 00000000..7d278f9f --- /dev/null +++ b/pinecone/openapi_support/configuration_lazy.py @@ -0,0 +1,7 @@ +""" +Lazy import for the Configuration class to avoid loading the entire openapi_support package. +""" + +from ..config.openapi_configuration import Configuration + +__all__ = ["Configuration"] \ No newline at end of file diff --git a/pinecone/openapi_support/endpoint_utils.py b/pinecone/openapi_support/endpoint_utils.py index 13522e85..867232b6 100644 --- a/pinecone/openapi_support/endpoint_utils.py +++ b/pinecone/openapi_support/endpoint_utils.py @@ -2,7 +2,7 @@ from .exceptions import PineconeApiTypeError, PineconeApiValueError from typing import Optional, Dict, Tuple, TypedDict, List, Literal, Any from .types import PropertyValidationTypedDict -from .configuration import Configuration +from ..config.openapi_configuration import Configuration from .model_utils import validate_and_convert_types, check_allowed_values, check_validations diff --git a/pinecone/openapi_support/exceptions.py b/pinecone/openapi_support/exceptions.py index fcc37da3..987403f0 100644 --- a/pinecone/openapi_support/exceptions.py +++ b/pinecone/openapi_support/exceptions.py @@ -1,140 +1 @@ -class PineconeException(Exception): - """The base exception class for all exceptions in the Pinecone Python SDK""" - - -class PineconeApiTypeError(PineconeException, TypeError): - def __init__(self, msg, path_to_item=None, valid_classes=None, key_type=None) -> None: - """Raises an exception for TypeErrors - - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (list): a list of keys an indices to get to the - current_item - None if unset - valid_classes (tuple): the primitive classes that current item - should be an instance of - None if unset - key_type (bool): False if our value is a value in a dict - True if it is a key in a dict - False if our item is an item in a list - None if unset - """ - self.path_to_item = path_to_item - self.valid_classes = valid_classes - self.key_type = key_type - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(PineconeApiTypeError, self).__init__(full_msg) - - -class PineconeApiValueError(PineconeException, ValueError): - def __init__(self, msg, path_to_item=None) -> None: - """ - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (list) the path to the exception in the - received_data dict. None if unset - """ - - self.path_to_item = path_to_item - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(PineconeApiValueError, self).__init__(full_msg) - - -class PineconeApiAttributeError(PineconeException, AttributeError): - def __init__(self, msg, path_to_item=None) -> None: - """ - Raised when an attribute reference or assignment fails. - - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (None/list) the path to the exception in the - received_data dict - """ - self.path_to_item = path_to_item - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(PineconeApiAttributeError, self).__init__(full_msg) - - -class PineconeApiKeyError(PineconeException, KeyError): - def __init__(self, msg, path_to_item=None) -> None: - """ - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (None/list) the path to the exception in the - received_data dict - """ - self.path_to_item = path_to_item - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(PineconeApiKeyError, self).__init__(full_msg) - - -class PineconeApiException(PineconeException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - if http_resp: - self.status = http_resp.status - self.reason = http_resp.reason - self.body = http_resp.data - self.headers = http_resp.getheaders() - else: - self.status = status - self.reason = reason - self.body = None - self.headers = None - - def __str__(self): - """Custom error messages for exception""" - error_message = "({0})\nReason: {1}\n".format(self.status, self.reason) - if self.headers: - error_message += "HTTP response headers: {0}\n".format(self.headers) - - if self.body: - error_message += "HTTP response body: {0}\n".format(self.body) - - return error_message - - -class NotFoundException(PineconeApiException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - super(NotFoundException, self).__init__(status, reason, http_resp) - - -class UnauthorizedException(PineconeApiException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - super(UnauthorizedException, self).__init__(status, reason, http_resp) - - -class ForbiddenException(PineconeApiException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - super(ForbiddenException, self).__init__(status, reason, http_resp) - - -class ServiceException(PineconeApiException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - super(ServiceException, self).__init__(status, reason, http_resp) - - -def render_path(path_to_item): - """Returns a string representation of a path""" - result = "" - for pth in path_to_item: - if isinstance(pth, int): - result += "[{0}]".format(pth) - else: - result += "['{0}']".format(pth) - return result +from pinecone.exceptions import * \ No newline at end of file diff --git a/pinecone/openapi_support/model_utils.py b/pinecone/openapi_support/model_utils.py index 4fc4cf0f..163f94b4 100644 --- a/pinecone/openapi_support/model_utils.py +++ b/pinecone/openapi_support/model_utils.py @@ -1,5 +1,4 @@ from datetime import date, datetime # noqa: F401 -from dateutil.parser import parse import inspect import io @@ -1149,6 +1148,8 @@ def deserialize_primitive(data, klass, path_to_item): additional_message = "" try: if klass in {datetime, date}: + from dateutil.parser import parse + additional_message = ( "If you need your parameter to have a fallback " "string value, please set its type as `type: {}` in your " diff --git a/pinecone/openapi_support/rest_aiohttp.py b/pinecone/openapi_support/rest_aiohttp.py index c7121a11..3cab099a 100644 --- a/pinecone/openapi_support/rest_aiohttp.py +++ b/pinecone/openapi_support/rest_aiohttp.py @@ -2,7 +2,7 @@ import certifi import json from .rest_utils import RestClientInterface, RESTResponse, raise_exceptions_or_return -from .configuration import Configuration +from ..config.openapi_configuration import Configuration class AiohttpRestClient(RestClientInterface): diff --git a/pinecone/openapi_support/rest_urllib3.py b/pinecone/openapi_support/rest_urllib3.py index 85d008da..0c1a1c5a 100644 --- a/pinecone/openapi_support/rest_urllib3.py +++ b/pinecone/openapi_support/rest_urllib3.py @@ -4,7 +4,7 @@ import os from typing import Optional from urllib.parse import urlencode, quote -from .configuration import Configuration +from ..config.openapi_configuration import Configuration from .rest_utils import raise_exceptions_or_return, RESTResponse, RestClientInterface import urllib3 diff --git a/pinecone/control/pinecone.py b/pinecone/pinecone.py similarity index 93% rename from pinecone/control/pinecone.py rename to pinecone/pinecone.py index 87ae5b9d..a694dcb0 100644 --- a/pinecone/control/pinecone.py +++ b/pinecone/pinecone.py @@ -6,21 +6,21 @@ from pinecone.config import PineconeConfig, ConfigBuilder -from pinecone.utils import normalize_host, PluginAware +from pinecone.utils import normalize_host, PluginAware, docslinks from .langchain_import_warnings import _build_langchain_attribute_error_message -from pinecone.utils import docslinks logger = logging.getLogger(__name__) """ @private """ if TYPE_CHECKING: - from .db_control import DBControl - from pinecone.data import ( + from pinecone.db_data import ( _Index as Index, _Inference as Inference, _IndexAsyncio as IndexAsyncio, ) - from pinecone.enums import ( + from pinecone.db_control import DBControl + from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict + from pinecone.db_control.enums import ( Metric, VectorType, DeletionProtection, @@ -30,7 +30,7 @@ GcpRegion, AzureRegion, ) - from pinecone.models import ( + from pinecone.db_control.models import ( ServerlessSpec, PodSpec, IndexModel, @@ -38,7 +38,6 @@ CollectionList, IndexEmbed, ) - from .types import CreateIndexForModelEmbedTypedDict class Pinecone(PluginAware, LegacyPineconeDBControlInterface): @@ -95,8 +94,7 @@ def __init__( self._db_control: Optional["DBControl"] = None # Lazy initialization """ @private """ - # Initialize PluginAware first, which will then call PineconeDBControlInterface.__init__ - super().__init__() + super().__init__() # Initialize PluginAware @property def inference(self) -> "Inference": @@ -104,7 +102,7 @@ def inference(self) -> "Inference": Inference is a namespace where an instance of the `pinecone.data.features.inference.inference.Inference` class is lazily created and cached. """ if self._inference is None: - from pinecone.data import _Inference + from pinecone.db_data import _Inference self._inference = _Inference(config=self.config, openapi_config=self.openapi_config) return self._inference @@ -115,7 +113,7 @@ def db(self) -> "DBControl": DBControl is a namespace where an instance of the `pinecone.control.db_control.DBControl` class is lazily created and cached. """ if self._db_control is None: - from .db_control import DBControl + from pinecone.db_control import DBControl self._db_control = DBControl( config=self.config, @@ -221,7 +219,7 @@ def from_documents(*args, **kwargs): raise AttributeError(_build_langchain_attribute_error_message("from_documents")) def Index(self, name: str = "", host: str = "", **kwargs) -> "Index": - from pinecone.data import _Index + from pinecone.db_data import _Index if name == "" and host == "": raise ValueError("Either name or host must be specified") @@ -249,7 +247,7 @@ def Index(self, name: str = "", host: str = "", **kwargs) -> "Index": ) def IndexAsyncio(self, host: str, **kwargs) -> "IndexAsyncio": - from pinecone.data import _IndexAsyncio + from pinecone.db_data import _IndexAsyncio api_key = self.config.api_key openapi_config = self.openapi_config diff --git a/pinecone/control/pinecone_asyncio.py b/pinecone/pinecone_asyncio.py similarity index 91% rename from pinecone/control/pinecone_asyncio.py rename to pinecone/pinecone_asyncio.py index efd5ca5e..c0b552a4 100644 --- a/pinecone/control/pinecone_asyncio.py +++ b/pinecone/pinecone_asyncio.py @@ -6,14 +6,13 @@ from pinecone.utils import normalize_host from pinecone.utils import docslinks -from .request_factory import PineconeDBControlRequestFactory from .pinecone_interface_asyncio import PineconeAsyncioDBControlInterface from .pinecone import check_realistic_host if TYPE_CHECKING: - from .types import CreateIndexForModelEmbedTypedDict - from pinecone.data import _IndexAsyncio - from pinecone.enums import ( + from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict + from pinecone.db_data import _IndexAsyncio + from pinecone.db_control.enums import ( Metric, VectorType, DeletionProtection, @@ -23,7 +22,7 @@ GcpRegion, AzureRegion, ) - from pinecone.models import ( + from pinecone.db_control.models import ( ServerlessSpec, PodSpec, IndexModel, @@ -151,7 +150,7 @@ async def main(): def inference(self): """Dynamically create and cache the AsyncioInference instance.""" if self._inference is None: - from pinecone.data import _AsyncioInference + from pinecone.db_data import _AsyncioInference self._inference = _AsyncioInference(api_client=self.index_api.api_client) return self._inference @@ -159,7 +158,7 @@ def inference(self): @property def db(self): if self._db_control is None: - from .db_control_asyncio import DBControlAsyncio + from .db_control.db_control_asyncio import DBControlAsyncio self._db_control = DBControlAsyncio( config=self.config, @@ -181,16 +180,16 @@ async def create_index( vector_type: Optional[Union["VectorType", str]] = "VectorType.DENSE", tags: Optional[Dict[str, str]] = None, ) -> "IndexModel": - req = PineconeDBControlRequestFactory.create_index_request( - name=name, - spec=spec, - dimension=dimension, - metric=metric, - deletion_protection=deletion_protection, - vector_type=vector_type, + resp = await self.db.index.create( + name=name, + spec=spec, + dimension=dimension, + metric=metric, + deletion_protection=deletion_protection, + vector_type=vector_type, tags=tags, + timeout=timeout ) - resp = await self.db.index.create(create_index_request=req) return resp async def create_index_for_model( @@ -256,7 +255,7 @@ async def describe_collection(self, name: str): return await self.db.collection.describe(name=name) def IndexAsyncio(self, host: str, **kwargs) -> "_IndexAsyncio": - from pinecone.data import _IndexAsyncio + from pinecone.db_data import _IndexAsyncio api_key = self.config.api_key openapi_config = self.openapi_config diff --git a/pinecone/control/pinecone_interface_asyncio.py b/pinecone/pinecone_interface_asyncio.py similarity index 99% rename from pinecone/control/pinecone_interface_asyncio.py rename to pinecone/pinecone_interface_asyncio.py index 139af7a5..31d1feba 100644 --- a/pinecone/control/pinecone_interface_asyncio.py +++ b/pinecone/pinecone_interface_asyncio.py @@ -7,7 +7,7 @@ from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi - from pinecone.models import ( + from pinecone.db_control.models import ( ServerlessSpec, PodSpec, IndexList, @@ -15,7 +15,7 @@ IndexModel, IndexEmbed, ) - from pinecone.enums import ( + from pinecone.db_control.enums import ( Metric, VectorType, DeletionProtection, @@ -25,7 +25,7 @@ GcpRegion, AzureRegion, ) - from .types import CreateIndexForModelEmbedTypedDict + from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict class PineconeAsyncioDBControlInterface(ABC): diff --git a/pinecone/utils/docslinks.py b/pinecone/utils/docslinks.py index a86dd1da..bab6a151 100644 --- a/pinecone/utils/docslinks.py +++ b/pinecone/utils/docslinks.py @@ -1,10 +1,11 @@ -from pinecone.core.openapi.db_control import API_VERSION +def versioned_url(template: str): + return lambda version: template.format(version) docslinks = { "README": "https://github.com/pinecone-io/pinecone-python-client/blob/main/README.md", "GITHUB_REPO": "https://github.com/pinecone-io/pinecone-python-client", "LANGCHAIN_IMPORT_KB_ARTICLE": "https://docs.pinecone.io/troubleshooting/pinecone-attribute-errors-with-langchain", - "API_DESCRIBE_INDEX": "https://docs.pinecone.io/reference/api/{}/control-plane/describe_index".format( - API_VERSION + "API_DESCRIBE_INDEX": versioned_url( + "https://docs.pinecone.io/reference/api/{}/control-plane/describe_index", ), } diff --git a/pinecone/utils/plugin_aware.py b/pinecone/utils/plugin_aware.py index 8410397a..92093fcb 100644 --- a/pinecone/utils/plugin_aware.py +++ b/pinecone/utils/plugin_aware.py @@ -1,7 +1,7 @@ from typing import Any from .setup_openapi_client import build_plugin_setup_client from pinecone.config import Config -from pinecone.openapi_support.configuration import Configuration as OpenApiConfig +from pinecone.config.openapi_configuration import Configuration as OpenApiConfig from pinecone_plugin_interface import load_and_install as install_plugins import logging diff --git a/tests/integration/data/test_query_namespaces_sparse.py b/tests/integration/data/test_query_namespaces_sparse.py index 607798ea..958368b5 100644 --- a/tests/integration/data/test_query_namespaces_sparse.py +++ b/tests/integration/data/test_query_namespaces_sparse.py @@ -1,6 +1,6 @@ import pytest from ..helpers import random_string, poll_stats_for_namespace -from pinecone.data.query_results_aggregator import QueryResultsAggregatorInvalidTopKError +from pinecone.db_data.query_results_aggregator import QueryResultsAggregatorInvalidTopKError from pinecone import Vector, SparseValues diff --git a/tests/integration/data/test_search_and_upsert_records.py b/tests/integration/data/test_search_and_upsert_records.py index e83a5cd8..0a269a49 100644 --- a/tests/integration/data/test_search_and_upsert_records.py +++ b/tests/integration/data/test_search_and_upsert_records.py @@ -6,7 +6,7 @@ import os from pinecone import RerankModel, PineconeApiException -from pinecone.data import _Index +from pinecone.db_data import _Index logger = logging.getLogger(__name__) diff --git a/tests/integration/data/test_upsert_from_dataframe.py b/tests/integration/data/test_upsert_from_dataframe.py index 49bc9abc..4534bc4f 100644 --- a/tests/integration/data/test_upsert_from_dataframe.py +++ b/tests/integration/data/test_upsert_from_dataframe.py @@ -1,5 +1,5 @@ import pandas as pd -from pinecone.data import _Index +from pinecone.db_data import _Index from ..helpers import embedding_values, random_string diff --git a/tests/integration/data_asyncio/conftest.py b/tests/integration/data_asyncio/conftest.py index 6401e073..9769a5e9 100644 --- a/tests/integration/data_asyncio/conftest.py +++ b/tests/integration/data_asyncio/conftest.py @@ -2,7 +2,7 @@ import json import asyncio from ..helpers import get_environment_var, generate_index_name -from pinecone.data import _IndexAsyncio +from pinecone.db_data import _IndexAsyncio import logging from typing import Callable, Optional, Awaitable, Union diff --git a/tests/integration/helpers/helpers.py b/tests/integration/helpers/helpers.py index 480585e5..6688f288 100644 --- a/tests/integration/helpers/helpers.py +++ b/tests/integration/helpers/helpers.py @@ -7,7 +7,7 @@ from typing import Any from datetime import datetime import json -from pinecone.data import _Index +from pinecone.db_data import _Index from typing import List logger = logging.getLogger(__name__) diff --git a/tests/perf/test_query_results_aggregator.py b/tests/perf/test_query_results_aggregator.py index 29ac4c35..9f33c149 100644 --- a/tests/perf/test_query_results_aggregator.py +++ b/tests/perf/test_query_results_aggregator.py @@ -1,5 +1,5 @@ import random -from pinecone.data.query_results_aggregator import QueryResultsAggregator +from pinecone.db_data.query_results_aggregator import QueryResultsAggregator def fake_results(i): diff --git a/tests/unit/data/test_bulk_import.py b/tests/unit/data/test_bulk_import.py index b1bcd4cc..c7ad5a14 100644 --- a/tests/unit/data/test_bulk_import.py +++ b/tests/unit/data/test_bulk_import.py @@ -6,7 +6,7 @@ ImportErrorMode as ImportErrorModeGeneratedClass, ) -from pinecone.data.features.bulk_import import ImportFeatureMixin, ImportErrorMode +from pinecone.db_data.features.bulk_import import ImportFeatureMixin, ImportErrorMode def build_client_w_faked_response(mocker, body: str, status: int = 200): diff --git a/tests/unit/data/test_request_factory.py b/tests/unit/data/test_request_factory.py index 087436c9..ea04acdf 100644 --- a/tests/unit/data/test_request_factory.py +++ b/tests/unit/data/test_request_factory.py @@ -1,5 +1,5 @@ import pytest -from pinecone.data.request_factory import ( +from pinecone.db_data.request_factory import ( IndexRequestFactory, SearchQuery, SearchQueryVector, diff --git a/tests/unit/data/test_vector_factory.py b/tests/unit/data/test_vector_factory.py index 52fd1eac..adeeaf9c 100644 --- a/tests/unit/data/test_vector_factory.py +++ b/tests/unit/data/test_vector_factory.py @@ -2,7 +2,7 @@ import pandas as pd import pytest -from pinecone.data.vector_factory import VectorFactory +from pinecone.db_data.vector_factory import VectorFactory from pinecone import Vector, SparseValues, ListConversionException from pinecone.core.openapi.db_data.models import ( Vector as OpenApiVector, diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index f33519b6..f1a00508 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -1,7 +1,7 @@ from pinecone import Pinecone from pinecone.exceptions.exceptions import PineconeConfigurationError from pinecone.config import PineconeConfig -from pinecone.openapi_support.configuration import Configuration as OpenApiConfiguration +from pinecone.config.openapi_configuration import Configuration as OpenApiConfiguration import pytest import os diff --git a/tests/unit/test_config_builder.py b/tests/unit/test_config_builder.py index 3122c080..7307f153 100644 --- a/tests/unit/test_config_builder.py +++ b/tests/unit/test_config_builder.py @@ -1,6 +1,6 @@ import pytest -from pinecone.openapi_support.configuration import Configuration as OpenApiConfiguration +from pinecone.config.openapi_configuration import Configuration as OpenApiConfiguration from pinecone.config import ConfigBuilder from pinecone import PineconeConfigurationError diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 72ed7422..6e880016 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,7 +1,7 @@ import pandas as pd import pytest -from pinecone.data import _Index +from pinecone.db_data import _Index import pinecone.core.openapi.db_data.models as oai from pinecone import QueryResponse, UpsertResponse, Vector diff --git a/tests/unit/test_query_results_aggregator.py b/tests/unit/test_query_results_aggregator.py index b40a11d2..d3c97f87 100644 --- a/tests/unit/test_query_results_aggregator.py +++ b/tests/unit/test_query_results_aggregator.py @@ -1,4 +1,4 @@ -from pinecone.data.query_results_aggregator import ( +from pinecone.db_data.query_results_aggregator import ( QueryResultsAggregator, QueryResultsAggregatorInvalidTopKError, ) diff --git a/tests/upgrade/test_v6_upgrade.py b/tests/upgrade/test_v6_upgrade.py new file mode 100644 index 00000000..3806576c --- /dev/null +++ b/tests/upgrade/test_v6_upgrade.py @@ -0,0 +1,174 @@ +import pinecone + +class TestV6Upgrade: + def test_v6_upgrade_root_imports(self): + v6_dir_items = [ + 'CollectionDescription', + 'CollectionList', + 'Config', + 'ConfigBuilder', + 'DeleteRequest', + 'DescribeIndexStatsRequest', + 'DescribeIndexStatsResponse', + 'FetchResponse', + 'ForbiddenException', + 'ImportErrorMode', + 'Index', + 'IndexList', + 'IndexModel', + 'ListConversionException', + 'MetadataDictionaryExpectedError', + 'NotFoundException', + 'Pinecone', + 'PineconeApiAttributeError', + 'PineconeApiException', + 'PineconeApiKeyError', + 'PineconeApiTypeError', + 'PineconeApiValueError', + 'PineconeConfig', + 'PineconeConfigurationError', + 'PineconeException', + 'PineconeProtocolError', + 'PodSpec', + 'PodSpecDefinition', + 'QueryRequest', + 'QueryResponse', + 'RpcStatus', + 'ScoredVector', + 'ServerlessSpec', + 'ServerlessSpecDefinition', + 'ServiceException', + 'SingleQueryResults', + 'SparseValues', + 'SparseValuesDictionaryExpectedError', + 'SparseValuesMissingKeysError', + 'SparseValuesTypeError', + 'TqdmExperimentalWarning', + 'UnauthorizedException', + 'UpdateRequest', + 'UpsertRequest', + 'UpsertResponse', + 'Vector', + 'VectorDictionaryExcessKeysError', + 'VectorDictionaryMissingKeysError', + 'VectorTupleLengthError', + '__builtins__', + '__cached__', + '__doc__', + '__file__', + '__loader__', + '__name__', + '__package__', + '__path__', + '__spec__', + '__version__', + 'config', + 'configure_index', + 'control', + 'core', + 'core_ea', + 'create_collection', + 'create_index', + 'data', + 'delete_collection', + 'delete_index', + 'deprecation_warnings', + 'describe_collection', + 'describe_index', + 'errors', + 'exceptions', + 'features', + 'index', + 'index_host_store', + 'init', + 'install_repr_overrides', + 'langchain_import_warnings', + 'list_collections', + 'list_indexes', + 'logging', + 'models', + 'openapi', + 'os', + 'pinecone', + 'pinecone_config', + 'repr_overrides', + 'scale_index', + 'sparse_vector_factory', + 'utils', + 'vector_factory', + 'warnings' + ] + + missing_items = [] + for item in v6_dir_items: + if item not in dir(pinecone): + missing_items.append(item) + + assert len(missing_items) == 0, f"Missing items: {missing_items}" + + def test_v6_upgrade_data_imports(self): + v6_data_dir_items = [ + 'DescribeIndexStatsResponse', + 'EmbedModel', + 'FetchResponse', + 'ImportErrorMode', + 'Index', + 'IndexClientInstantiationError', + 'Inference', + 'InferenceInstantiationError', + 'MetadataDictionaryExpectedError', + 'QueryResponse', + 'RerankModel', + 'SearchQuery', + 'SearchQueryVector', + 'SearchRerank', + 'SparseValues', + 'SparseValuesDictionaryExpectedError', + 'SparseValuesMissingKeysError', + 'SparseValuesTypeError', + 'UpsertResponse', + 'Vector', + 'VectorDictionaryExcessKeysError', + 'VectorDictionaryMissingKeysError', + 'VectorTupleLengthError', + '_AsyncioInference', + '_Index', + '_IndexAsyncio', + '_Inference', + '__builtins__', + '__cached__', + '__doc__', + '__file__', + '__loader__', + '__name__', + '__package__', + '__path__', + '__spec__', + 'dataclasses', + 'errors', + 'features', + 'fetch_response', + 'import_error', + 'index', + 'index_asyncio', + 'index_asyncio_interface', + 'interfaces', + 'query_results_aggregator', + 'request_factory', + 'search_query', + 'search_query_vector', + 'search_rerank', + 'sparse_values', + 'sparse_values_factory', + 'types', + 'utils', + 'vector', + 'vector_factory' + ] + + missing_items = [] + for item in v6_data_dir_items: + if item not in dir(pinecone.db_data): + missing_items.append(item) + + assert len(missing_items) == 0, f"Missing items: {missing_items}" From 67323cb7866e22082cd680603ec826a35f4e652f Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 9 Apr 2025 13:07:25 -0400 Subject: [PATCH 08/13] WIP --- pinecone/config/openapi_config_factory.py | 4 +- pinecone/db_control/enums/clouds.py | 24 +- pinecone/db_data/__init__.py | 12 +- pinecone/openapi_support/api_client.py | 3 + pinecone/openapi_support/configuration.py | 1 - .../openapi_support/configuration_lazy.py | 2 +- pinecone/openapi_support/exceptions.py | 2 +- pinecone/pinecone_asyncio.py | 14 +- pinecone/utils/docslinks.py | 3 +- tests/upgrade/test_v6_upgrade.py | 301 +++++++++--------- 10 files changed, 188 insertions(+), 178 deletions(-) diff --git a/pinecone/config/openapi_config_factory.py b/pinecone/config/openapi_config_factory.py index d7730458..56a1de64 100644 --- a/pinecone/config/openapi_config_factory.py +++ b/pinecone/config/openapi_config_factory.py @@ -71,9 +71,7 @@ def _get_socket_options( # Source: https://www.finbourne.com/blog/the-mysterious-hanging-client-tcp-keep-alives # urllib3.connection.HTTPConnection.default_socket_options - socket_params = [ - (socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) - ] + socket_params = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] if not do_keep_alive: return socket_params diff --git a/pinecone/db_control/enums/clouds.py b/pinecone/db_control/enums/clouds.py index 192b3da5..8903f40a 100644 --- a/pinecone/db_control/enums/clouds.py +++ b/pinecone/db_control/enums/clouds.py @@ -3,10 +3,10 @@ class CloudProvider(Enum): """Cloud providers available for use with Pinecone serverless indexes - + This list could expand or change over time as more cloud providers are supported. - Check the Pinecone documentation for the most up-to-date list of supported cloud - providers. If you want to use a cloud provider that is not listed here, you can + Check the Pinecone documentation for the most up-to-date list of supported cloud + providers. If you want to use a cloud provider that is not listed here, you can pass a string value directly without using this enum. """ @@ -17,10 +17,10 @@ class CloudProvider(Enum): class AwsRegion(Enum): """AWS (Amazon Web Services) regions available for use with Pinecone serverless indexes - + This list could expand or change over time as more regions are supported. - Check the Pinecone documentation for the most up-to-date list of supported - regions. If you want to use a region that is not listed here, you can + Check the Pinecone documentation for the most up-to-date list of supported + regions. If you want to use a region that is not listed here, you can pass a string value directly without using this enum. """ @@ -31,10 +31,10 @@ class AwsRegion(Enum): class GcpRegion(Enum): """GCP (Google Cloud Platform) regions available for use with Pinecone serverless indexes - + This list could expand or change over time as more regions are supported. - Check the Pinecone documentation for the most up-to-date list of supported - regions. If you want to use a region that is not listed here, you can + Check the Pinecone documentation for the most up-to-date list of supported + regions. If you want to use a region that is not listed here, you can pass a string value directly without using this enum. """ @@ -44,10 +44,10 @@ class GcpRegion(Enum): class AzureRegion(Enum): """Azure regions available for use with Pinecone serverless indexes - + This list could expand or change over time as more regions are supported. - Check the Pinecone documentation for the most up-to-date list of supported - regions. If you want to use a region that is not listed here, you can + Check the Pinecone documentation for the most up-to-date list of supported + regions. If you want to use a region that is not listed here, you can pass a string value directly without using this enum. """ diff --git a/pinecone/db_data/__init__.py b/pinecone/db_data/__init__.py index 7c76f04f..f2db9a63 100644 --- a/pinecone/db_data/__init__.py +++ b/pinecone/db_data/__init__.py @@ -30,6 +30,7 @@ import warnings + def _get_deprecated_import(name, from_module, to_module): warnings.warn( f"The import of `{name}` from `{from_module}` has moved to `{to_module}`. " @@ -39,15 +40,22 @@ def _get_deprecated_import(name, from_module, to_module): DeprecationWarning, ) # Import from the new location - from pinecone.inference import Inference as _Inference, AsyncioInference as _AsyncioInference, RerankModel, EmbedModel + from pinecone.inference import ( + Inference as _Inference, + AsyncioInference as _AsyncioInference, + RerankModel, + EmbedModel, + ) + return locals()[name] + moved = ["_Inference", "_AsyncioInference", "RerankModel", "EmbedModel"] + def __getattr__(name): if name in locals(): return locals()[name] elif name in moved: return _get_deprecated_import(name, "pinecone.data", "pinecone.inference") raise AttributeError(f"module '{__name__}' has no attribute '{name}'") - diff --git a/pinecone/openapi_support/api_client.py b/pinecone/openapi_support/api_client.py index 9e1f1f7e..ee1e4649 100644 --- a/pinecone/openapi_support/api_client.py +++ b/pinecone/openapi_support/api_client.py @@ -70,6 +70,7 @@ def pool(self) -> "ThreadPool": """ if self._pool is None: from multiprocessing.pool import ThreadPool + atexit.register(self.close) self._pool = ThreadPool(self.pool_threads) return self._pool @@ -78,6 +79,7 @@ def pool(self) -> "ThreadPool": def threadpool_executor(self) -> "ThreadPoolExecutor": if self._threadpool_executor is None: from concurrent.futures import ThreadPoolExecutor + self._threadpool_executor = ThreadPoolExecutor(max_workers=self.pool_threads) return self._threadpool_executor @@ -189,6 +191,7 @@ def __call_api( # deserialize response data if response_type: from .deserializer import Deserializer + Deserializer.decode_response(response_type=response_type, response=response_data) return_data = Deserializer.deserialize( response=response_data, diff --git a/pinecone/openapi_support/configuration.py b/pinecone/openapi_support/configuration.py index cd31b0b9..e69de29b 100644 --- a/pinecone/openapi_support/configuration.py +++ b/pinecone/openapi_support/configuration.py @@ -1 +0,0 @@ -from pinecone.config import OpenApiConfiguration as Configuration \ No newline at end of file diff --git a/pinecone/openapi_support/configuration_lazy.py b/pinecone/openapi_support/configuration_lazy.py index 7d278f9f..27e90a34 100644 --- a/pinecone/openapi_support/configuration_lazy.py +++ b/pinecone/openapi_support/configuration_lazy.py @@ -4,4 +4,4 @@ from ..config.openapi_configuration import Configuration -__all__ = ["Configuration"] \ No newline at end of file +__all__ = ["Configuration"] diff --git a/pinecone/openapi_support/exceptions.py b/pinecone/openapi_support/exceptions.py index 987403f0..5729b13e 100644 --- a/pinecone/openapi_support/exceptions.py +++ b/pinecone/openapi_support/exceptions.py @@ -1 +1 @@ -from pinecone.exceptions import * \ No newline at end of file +from pinecone.exceptions import * diff --git a/pinecone/pinecone_asyncio.py b/pinecone/pinecone_asyncio.py index c0b552a4..3da739f7 100644 --- a/pinecone/pinecone_asyncio.py +++ b/pinecone/pinecone_asyncio.py @@ -181,14 +181,14 @@ async def create_index( tags: Optional[Dict[str, str]] = None, ) -> "IndexModel": resp = await self.db.index.create( - name=name, - spec=spec, - dimension=dimension, - metric=metric, - deletion_protection=deletion_protection, - vector_type=vector_type, + name=name, + spec=spec, + dimension=dimension, + metric=metric, + deletion_protection=deletion_protection, + vector_type=vector_type, tags=tags, - timeout=timeout + timeout=timeout, ) return resp diff --git a/pinecone/utils/docslinks.py b/pinecone/utils/docslinks.py index bab6a151..cdfe66cd 100644 --- a/pinecone/utils/docslinks.py +++ b/pinecone/utils/docslinks.py @@ -1,11 +1,12 @@ def versioned_url(template: str): return lambda version: template.format(version) + docslinks = { "README": "https://github.com/pinecone-io/pinecone-python-client/blob/main/README.md", "GITHUB_REPO": "https://github.com/pinecone-io/pinecone-python-client", "LANGCHAIN_IMPORT_KB_ARTICLE": "https://docs.pinecone.io/troubleshooting/pinecone-attribute-errors-with-langchain", "API_DESCRIBE_INDEX": versioned_url( - "https://docs.pinecone.io/reference/api/{}/control-plane/describe_index", + "https://docs.pinecone.io/reference/api/{}/control-plane/describe_index" ), } diff --git a/tests/upgrade/test_v6_upgrade.py b/tests/upgrade/test_v6_upgrade.py index 3806576c..358ce0e5 100644 --- a/tests/upgrade/test_v6_upgrade.py +++ b/tests/upgrade/test_v6_upgrade.py @@ -1,102 +1,103 @@ import pinecone + class TestV6Upgrade: def test_v6_upgrade_root_imports(self): v6_dir_items = [ - 'CollectionDescription', - 'CollectionList', - 'Config', - 'ConfigBuilder', - 'DeleteRequest', - 'DescribeIndexStatsRequest', - 'DescribeIndexStatsResponse', - 'FetchResponse', - 'ForbiddenException', - 'ImportErrorMode', - 'Index', - 'IndexList', - 'IndexModel', - 'ListConversionException', - 'MetadataDictionaryExpectedError', - 'NotFoundException', - 'Pinecone', - 'PineconeApiAttributeError', - 'PineconeApiException', - 'PineconeApiKeyError', - 'PineconeApiTypeError', - 'PineconeApiValueError', - 'PineconeConfig', - 'PineconeConfigurationError', - 'PineconeException', - 'PineconeProtocolError', - 'PodSpec', - 'PodSpecDefinition', - 'QueryRequest', - 'QueryResponse', - 'RpcStatus', - 'ScoredVector', - 'ServerlessSpec', - 'ServerlessSpecDefinition', - 'ServiceException', - 'SingleQueryResults', - 'SparseValues', - 'SparseValuesDictionaryExpectedError', - 'SparseValuesMissingKeysError', - 'SparseValuesTypeError', - 'TqdmExperimentalWarning', - 'UnauthorizedException', - 'UpdateRequest', - 'UpsertRequest', - 'UpsertResponse', - 'Vector', - 'VectorDictionaryExcessKeysError', - 'VectorDictionaryMissingKeysError', - 'VectorTupleLengthError', - '__builtins__', - '__cached__', - '__doc__', - '__file__', - '__loader__', - '__name__', - '__package__', - '__path__', - '__spec__', - '__version__', - 'config', - 'configure_index', - 'control', - 'core', - 'core_ea', - 'create_collection', - 'create_index', - 'data', - 'delete_collection', - 'delete_index', - 'deprecation_warnings', - 'describe_collection', - 'describe_index', - 'errors', - 'exceptions', - 'features', - 'index', - 'index_host_store', - 'init', - 'install_repr_overrides', - 'langchain_import_warnings', - 'list_collections', - 'list_indexes', - 'logging', - 'models', - 'openapi', - 'os', - 'pinecone', - 'pinecone_config', - 'repr_overrides', - 'scale_index', - 'sparse_vector_factory', - 'utils', - 'vector_factory', - 'warnings' + "CollectionDescription", + "CollectionList", + "Config", + "ConfigBuilder", + "DeleteRequest", + "DescribeIndexStatsRequest", + "DescribeIndexStatsResponse", + "FetchResponse", + "ForbiddenException", + "ImportErrorMode", + "Index", + "IndexList", + "IndexModel", + "ListConversionException", + "MetadataDictionaryExpectedError", + "NotFoundException", + "Pinecone", + "PineconeApiAttributeError", + "PineconeApiException", + "PineconeApiKeyError", + "PineconeApiTypeError", + "PineconeApiValueError", + "PineconeConfig", + "PineconeConfigurationError", + "PineconeException", + "PineconeProtocolError", + "PodSpec", + "PodSpecDefinition", + "QueryRequest", + "QueryResponse", + "RpcStatus", + "ScoredVector", + "ServerlessSpec", + "ServerlessSpecDefinition", + "ServiceException", + "SingleQueryResults", + "SparseValues", + "SparseValuesDictionaryExpectedError", + "SparseValuesMissingKeysError", + "SparseValuesTypeError", + "TqdmExperimentalWarning", + "UnauthorizedException", + "UpdateRequest", + "UpsertRequest", + "UpsertResponse", + "Vector", + "VectorDictionaryExcessKeysError", + "VectorDictionaryMissingKeysError", + "VectorTupleLengthError", + "__builtins__", + "__cached__", + "__doc__", + "__file__", + "__loader__", + "__name__", + "__package__", + "__path__", + "__spec__", + "__version__", + "config", + "configure_index", + "control", + "core", + "core_ea", + "create_collection", + "create_index", + "data", + "delete_collection", + "delete_index", + "deprecation_warnings", + "describe_collection", + "describe_index", + "errors", + "exceptions", + "features", + "index", + "index_host_store", + "init", + "install_repr_overrides", + "langchain_import_warnings", + "list_collections", + "list_indexes", + "logging", + "models", + "openapi", + "os", + "pinecone", + "pinecone_config", + "repr_overrides", + "scale_index", + "sparse_vector_factory", + "utils", + "vector_factory", + "warnings", ] missing_items = [] @@ -108,62 +109,62 @@ def test_v6_upgrade_root_imports(self): def test_v6_upgrade_data_imports(self): v6_data_dir_items = [ - 'DescribeIndexStatsResponse', - 'EmbedModel', - 'FetchResponse', - 'ImportErrorMode', - 'Index', - 'IndexClientInstantiationError', - 'Inference', - 'InferenceInstantiationError', - 'MetadataDictionaryExpectedError', - 'QueryResponse', - 'RerankModel', - 'SearchQuery', - 'SearchQueryVector', - 'SearchRerank', - 'SparseValues', - 'SparseValuesDictionaryExpectedError', - 'SparseValuesMissingKeysError', - 'SparseValuesTypeError', - 'UpsertResponse', - 'Vector', - 'VectorDictionaryExcessKeysError', - 'VectorDictionaryMissingKeysError', - 'VectorTupleLengthError', - '_AsyncioInference', - '_Index', - '_IndexAsyncio', - '_Inference', - '__builtins__', - '__cached__', - '__doc__', - '__file__', - '__loader__', - '__name__', - '__package__', - '__path__', - '__spec__', - 'dataclasses', - 'errors', - 'features', - 'fetch_response', - 'import_error', - 'index', - 'index_asyncio', - 'index_asyncio_interface', - 'interfaces', - 'query_results_aggregator', - 'request_factory', - 'search_query', - 'search_query_vector', - 'search_rerank', - 'sparse_values', - 'sparse_values_factory', - 'types', - 'utils', - 'vector', - 'vector_factory' + "DescribeIndexStatsResponse", + "EmbedModel", + "FetchResponse", + "ImportErrorMode", + "Index", + "IndexClientInstantiationError", + "Inference", + "InferenceInstantiationError", + "MetadataDictionaryExpectedError", + "QueryResponse", + "RerankModel", + "SearchQuery", + "SearchQueryVector", + "SearchRerank", + "SparseValues", + "SparseValuesDictionaryExpectedError", + "SparseValuesMissingKeysError", + "SparseValuesTypeError", + "UpsertResponse", + "Vector", + "VectorDictionaryExcessKeysError", + "VectorDictionaryMissingKeysError", + "VectorTupleLengthError", + "_AsyncioInference", + "_Index", + "_IndexAsyncio", + "_Inference", + "__builtins__", + "__cached__", + "__doc__", + "__file__", + "__loader__", + "__name__", + "__package__", + "__path__", + "__spec__", + "dataclasses", + "errors", + "features", + "fetch_response", + "import_error", + "index", + "index_asyncio", + "index_asyncio_interface", + "interfaces", + "query_results_aggregator", + "request_factory", + "search_query", + "search_query_vector", + "search_rerank", + "sparse_values", + "sparse_values_factory", + "types", + "utils", + "vector", + "vector_factory", ] missing_items = [] From b7bdd4f471b25f249382c87326996cf2e09fe881 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Thu, 17 Apr 2025 15:45:44 -0400 Subject: [PATCH 09/13] WIP --- pinecone/__init__.py | 148 +++++++++++- pinecone/config/config.py | 2 +- pinecone/config/openapi_configuration.py | 2 +- pinecone/control/__init__.py | 9 + pinecone/data/__init__.py | 10 + pinecone/data/features/__init__.py | 10 + .../data/features/bulk_imports/__init__.py | 10 + .../features/inference/__init__.py | 3 +- pinecone/db_control/models/collection_list.py | 4 +- pinecone/db_control/models/index_list.py | 2 +- pinecone/db_control/request_factory.py | 31 ++- pinecone/db_data/dataclasses/search_rerank.py | 2 +- pinecone/db_data/features/__init__.py | 13 - pinecone/db_data/models/__init__.py | 1 + .../db_data/types/search_rerank_typed_dict.py | 2 +- pinecone/models/__init__.py | 9 + pinecone/pinecone.py | 13 +- pinecone/utils/find_legacy_imports.py | 143 +++++++++++ pinecone/utils/lazy_imports.py | 76 ++++++ pinecone/utils/legacy_imports.py | 112 +++++++++ tests/unit/test_control.py | 51 ++-- tests/unit/test_plugin_aware.py | 7 +- tests/upgrade/test_all.py | 28 +++ tests/upgrade/test_reorganization.py | 19 ++ tests/upgrade/test_v6_upgrade.py | 222 ++++++++++++------ 25 files changed, 789 insertions(+), 140 deletions(-) create mode 100644 pinecone/control/__init__.py create mode 100644 pinecone/data/__init__.py create mode 100644 pinecone/data/features/__init__.py create mode 100644 pinecone/data/features/bulk_imports/__init__.py rename pinecone/{db_data => data}/features/inference/__init__.py (53%) delete mode 100644 pinecone/db_data/features/__init__.py create mode 100644 pinecone/db_data/models/__init__.py create mode 100644 pinecone/models/__init__.py create mode 100755 pinecone/utils/find_legacy_imports.py create mode 100644 pinecone/utils/lazy_imports.py create mode 100644 pinecone/utils/legacy_imports.py create mode 100644 tests/upgrade/test_all.py create mode 100644 tests/upgrade/test_reorganization.py diff --git a/pinecone/__init__.py b/pinecone/__init__.py index 4af444d7..f7d8fce9 100644 --- a/pinecone/__init__.py +++ b/pinecone/__init__.py @@ -2,22 +2,160 @@ .. include:: ../pdoc/README.md """ -from .deprecated_plugins import check_for_deprecated_plugins +from .deprecated_plugins import check_for_deprecated_plugins as _check_for_deprecated_plugins from .deprecation_warnings import * from .pinecone import Pinecone from .pinecone_asyncio import PineconeAsyncio from .exceptions import * -# from .config import * -# from .db_control import * -# from .db_data import * from .utils import __version__ import logging +# Set up lazy import handling +from .utils.lazy_imports import setup_lazy_imports as _setup_lazy_imports + +_inference_lazy_imports = { + "RerankModel": ("pinecone.inference", "RerankModel"), + "EmbedModel": ("pinecone.inference", "EmbedModel"), +} + +_db_data_lazy_imports = { + "Vector": ("pinecone.db_data.models", "Vector"), + "FetchResponse": ("pinecone.db_data.models", "FetchResponse"), + "DeleteRequest": ("pinecone.db_data.models", "DeleteRequest"), + "DescribeIndexStatsRequest": ("pinecone.db_data.models", "DescribeIndexStatsRequest"), + "DescribeIndexStatsResponse": ("pinecone.db_data.models", "IndexDescription"), + "RpcStatus": ("pinecone.db_data.models", "RpcStatus"), + "ScoredVector": ("pinecone.db_data.models", "ScoredVector"), + "SingleQueryResults": ("pinecone.db_data.models", "SingleQueryResults"), + "QueryRequest": ("pinecone.db_data.models", "QueryRequest"), + "QueryResponse": ("pinecone.db_data.models", "QueryResponse"), + "SearchQuery": ("pinecone.db_data.dataclasses", "SearchQuery"), + "SearchQueryVector": ("pinecone.db_data.dataclasses", "SearchQueryVector"), + "SearchRerank": ("pinecone.db_data.dataclasses", "SearchRerank"), + "UpsertResponse": ("pinecone.db_data.models", "UpsertResponse"), + "UpdateRequest": ("pinecone.db_data.models", "UpdateRequest"), + "SparseValues": ("pinecone.db_data.models", "SparseValues"), +} + +_db_control_lazy_imports = { + "CloudProvider": ("pinecone.db_control.enums", "CloudProvider"), + "AwsRegion": ("pinecone.db_control.enums", "AwsRegion"), + "GcpRegion": ("pinecone.db_control.enums", "GcpRegion"), + "AzureRegion": ("pinecone.db_control.enums", "AzureRegion"), + "PodIndexEnvironment": ("pinecone.db_control.enums", "PodIndexEnvironment"), + "Metric": ("pinecone.db_control.enums", "Metric"), + "VectorType": ("pinecone.db_control.enums", "VectorType"), + "DeletionProtection": ("pinecone.db_control.enums", "DeletionProtection"), + "CollectionDescription": ("pinecone.db_control.models", "CollectionDescription"), + "CollectionList": ("pinecone.db_control.models", "CollectionList"), + "IndexList": ("pinecone.db_control.models", "IndexList"), + "IndexModel": ("pinecone.db_control.models", "IndexModel"), + "IndexEmbed": ("pinecone.db_control.models", "IndexEmbed"), + "ServerlessSpec": ("pinecone.db_control.models", "ServerlessSpec"), + "ServerlessSpecDefinition": ("pinecone.db_control.models", "ServerlessSpecDefinition"), + "PodSpec": ("pinecone.db_control.models", "PodSpec"), + "PodSpecDefinition": ("pinecone.db_control.models", "PodSpecDefinition"), + "PodType": ("pinecone.db_control.enums", "PodType"), +} + +_config_lazy_imports = { + "Config": ("pinecone.config", "Config"), + "ConfigBuilder": ("pinecone.config", "ConfigBuilder"), + "PineconeConfig": ("pinecone.config", "PineconeConfig"), +} + +# Define imports to be lazily loaded +_LAZY_IMPORTS = { + **_inference_lazy_imports, + **_db_data_lazy_imports, + **_db_control_lazy_imports, + **_config_lazy_imports, +} + +# Set up the lazy import handler +_setup_lazy_imports(_LAZY_IMPORTS) + # Raise an exception if the user is attempting to use the SDK with # deprecated plugins installed in their project. -check_for_deprecated_plugins() +_check_for_deprecated_plugins() # Silence annoying log messages from the plugin interface logging.getLogger("pinecone_plugin_interface").setLevel(logging.CRITICAL) + +__all__ = [ + "__version__", + # Deprecated top-levelfunctions + "init", + "create_index", + "delete_index", + "list_indexes", + "describe_index", + "configure_index", + "scale_index", + "create_collection", + "delete_collection", + "describe_collection", + "list_collections", + # Primary client classes + "Pinecone", + "PineconeAsyncio", + # Config classes + "Config", + "ConfigBuilder", + "PineconeConfig", + # DB control classes + "CloudProvider", + "AwsRegion", + "GcpRegion", + "AzureRegion", + "PodIndexEnvironment", + "Metric", + "VectorType", + "DeletionProtection", + "CollectionDescription", + "CollectionList", + "IndexList", + "IndexModel", + "IndexEmbed", + "ServerlessSpec", + "ServerlessSpecDefinition", + "PodSpec", + "PodSpecDefinition", + "PodType", + "Vector", + "FetchResponse", + "DeleteRequest", + "DescribeIndexStatsRequest", + "DescribeIndexStatsResponse", + "RpcStatus", + "ScoredVector", + "SingleQueryResults", + "QueryRequest", + "QueryResponse", + "SearchQuery", + "SearchQueryVector", + "SearchRerank", + "UpsertResponse", + "UpdateRequest", + "SparseValues", + # Inference classes + "RerankModel", + "EmbedModel", + # Exception classes + "PineconeException", + "PineconeApiException", + "PineconeConfigurationError", + "PineconeProtocolError", + "PineconeApiAttributeError", + "PineconeApiTypeError", + "PineconeApiValueError", + "PineconeApiKeyError", + "PineconeApiException", + "NotFoundException", + "UnauthorizedException", + "ForbiddenException", + "ServiceException", + "ListConversionException", +] diff --git a/pinecone/config/config.py b/pinecone/config/config.py index 86c03649..9029c45a 100644 --- a/pinecone/config/config.py +++ b/pinecone/config/config.py @@ -5,7 +5,7 @@ from pinecone.config.openapi_config_factory import OpenApiConfigFactory if TYPE_CHECKING: - from pinecone.openapi_support.configuration import Configuration as OpenApiConfiguration + from pinecone.config.openapi_configuration import Configuration as OpenApiConfiguration # Duplicated this util to help resolve circular imports diff --git a/pinecone/config/openapi_configuration.py b/pinecone/config/openapi_configuration.py index 9be701be..fce6defc 100644 --- a/pinecone/config/openapi_configuration.py +++ b/pinecone/config/openapi_configuration.py @@ -82,7 +82,7 @@ class Configuration: You can programmatically set the cookie: - conf = pinecone.openapi_support.Configuration( + conf = pinecone.config.openapi_configuration.Configuration( api_key={'cookieAuth': 'abc123'} api_key_prefix={'cookieAuth': 'JSESSIONID'} ) diff --git a/pinecone/control/__init__.py b/pinecone/control/__init__.py new file mode 100644 index 00000000..4f04e477 --- /dev/null +++ b/pinecone/control/__init__.py @@ -0,0 +1,9 @@ +import warnings + +warnings.warn( + "The module at `pinecone.control` has moved to `pinecone.db_control`. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) + +from pinecone.db_control import * diff --git a/pinecone/data/__init__.py b/pinecone/data/__init__.py new file mode 100644 index 00000000..0268ac16 --- /dev/null +++ b/pinecone/data/__init__.py @@ -0,0 +1,10 @@ +import warnings + +warnings.warn( + "The module at `pinecone.data` has moved to `pinecone.db_data`. " + "Please update your imports. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) + +from pinecone.db_data import * diff --git a/pinecone/data/features/__init__.py b/pinecone/data/features/__init__.py new file mode 100644 index 00000000..fd64a554 --- /dev/null +++ b/pinecone/data/features/__init__.py @@ -0,0 +1,10 @@ +import warnings + +warnings.warn( + "The module at `pinecone.data.features` has moved to `pinecone.db_data.features`. " + "Please update your imports. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) + +from pinecone.db_data.features import * diff --git a/pinecone/data/features/bulk_imports/__init__.py b/pinecone/data/features/bulk_imports/__init__.py new file mode 100644 index 00000000..740d503b --- /dev/null +++ b/pinecone/data/features/bulk_imports/__init__.py @@ -0,0 +1,10 @@ +import warnings + +warnings.warn( + "The module at `pinecone.data.features.bulk_import` has moved to `pinecone.db_data.features.bulk_import`. " + "Please update your imports. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) + +from pinecone.db_data.features.bulk_import import * diff --git a/pinecone/db_data/features/inference/__init__.py b/pinecone/data/features/inference/__init__.py similarity index 53% rename from pinecone/db_data/features/inference/__init__.py rename to pinecone/data/features/inference/__init__.py index 897b4f4f..b0918dd5 100644 --- a/pinecone/db_data/features/inference/__init__.py +++ b/pinecone/data/features/inference/__init__.py @@ -2,8 +2,7 @@ warnings.warn( "The module at `pinecone.data.features.inference` has moved to `pinecone.inference`. " - "Please update your imports from `from pinecone.data.features.inference import Inference, AsyncioInference, RerankModel, EmbedModel` " - "to `from pinecone.inference import Inference, AsyncioInference, RerankModel, EmbedModel`. " + "Please update your imports. " "This warning will become an error in a future version of the Pinecone Python SDK.", DeprecationWarning, ) diff --git a/pinecone/db_control/models/collection_list.py b/pinecone/db_control/models/collection_list.py index 508ec685..f36a9708 100644 --- a/pinecone/db_control/models/collection_list.py +++ b/pinecone/db_control/models/collection_list.py @@ -1,5 +1,7 @@ import json -from pinecone.core.openapi.db_control.models import CollectionList as OpenAPICollectionList +from pinecone.core.openapi.db_control.model.collection_list import ( + CollectionList as OpenAPICollectionList, +) class CollectionList: diff --git a/pinecone/db_control/models/index_list.py b/pinecone/db_control/models/index_list.py index 71242e24..e918b4f5 100644 --- a/pinecone/db_control/models/index_list.py +++ b/pinecone/db_control/models/index_list.py @@ -1,5 +1,5 @@ import json -from pinecone.core.openapi.db_control.models import IndexList as OpenAPIIndexList +from pinecone.core.openapi.db_control.model.index_list import IndexList as OpenAPIIndexList from .index_model import IndexModel from typing import List diff --git a/pinecone/db_control/request_factory.py b/pinecone/db_control/request_factory.py index 2e796745..c2ecc905 100644 --- a/pinecone/db_control/request_factory.py +++ b/pinecone/db_control/request_factory.py @@ -2,25 +2,35 @@ from typing import Optional, Dict, Any, Union from enum import Enum +from pinecone.utils import parse_non_empty_args, convert_enum_to_string -from pinecone.utils import convert_enum_to_string -from pinecone.core.openapi.db_control.models import ( - CreateCollectionRequest, +from pinecone.core.openapi.db_control.model.create_collection_request import CreateCollectionRequest +from pinecone.core.openapi.db_control.model.create_index_for_model_request import ( CreateIndexForModelRequest, +) +from pinecone.core.openapi.db_control.model.create_index_for_model_request_embed import ( CreateIndexForModelRequestEmbed, - CreateIndexRequest, - ConfigureIndexRequest, +) +from pinecone.core.openapi.db_control.model.create_index_request import CreateIndexRequest +from pinecone.core.openapi.db_control.model.configure_index_request import ConfigureIndexRequest +from pinecone.core.openapi.db_control.model.configure_index_request_spec import ( ConfigureIndexRequestSpec, +) +from pinecone.core.openapi.db_control.model.configure_index_request_spec_pod import ( ConfigureIndexRequestSpecPod, +) +from pinecone.core.openapi.db_control.model.deletion_protection import ( DeletionProtection as DeletionProtectionModel, - IndexSpec, - IndexTags, +) +from pinecone.core.openapi.db_control.model.index_spec import IndexSpec +from pinecone.core.openapi.db_control.model.index_tags import IndexTags +from pinecone.core.openapi.db_control.model.serverless_spec import ( ServerlessSpec as ServerlessSpecModel, - PodSpec as PodSpecModel, - PodSpecMetadataConfig, ) +from pinecone.core.openapi.db_control.model.pod_spec import PodSpec as PodSpecModel +from pinecone.core.openapi.db_control.model.pod_spec_metadata_config import PodSpecMetadataConfig + from pinecone.db_control.models import ServerlessSpec, PodSpec, IndexModel, IndexEmbed -from pinecone.utils import parse_non_empty_args from pinecone.db_control.enums import ( Metric, @@ -58,6 +68,7 @@ def __parse_deletion_protection( deletion_protection: Union[DeletionProtection, str], ) -> DeletionProtectionModel: deletion_protection = convert_enum_to_string(deletion_protection) + print(deletion_protection) if deletion_protection in ["enabled", "disabled"]: return DeletionProtectionModel(deletion_protection) else: diff --git a/pinecone/db_data/dataclasses/search_rerank.py b/pinecone/db_data/dataclasses/search_rerank.py index 1b9534ba..0ac4ca4e 100644 --- a/pinecone/db_data/dataclasses/search_rerank.py +++ b/pinecone/db_data/dataclasses/search_rerank.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from typing import Optional, Dict, Any, List -from ..features.inference import RerankModel +from pinecone.inference import RerankModel @dataclass diff --git a/pinecone/db_data/features/__init__.py b/pinecone/db_data/features/__init__.py deleted file mode 100644 index b8f2fddb..00000000 --- a/pinecone/db_data/features/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -import warnings - -# Display a warning for old imports -warnings.warn( - "The module at `pinecone.data.features.inference` has moved to `pinecone.inference`. " - "Please update your imports from `from pinecone.data.features.inference import Inference, AsyncioInference, RerankModel, EmbedModel` " - "to `from pinecone.inference import Inference, AsyncioInference, RerankModel, EmbedModel`. " - "This warning will become an error in a future version of the Pinecone Python SDK.", - DeprecationWarning, -) - -# Import from the new location to maintain backward compatibility -from pinecone.inference import Inference, AsyncioInference, RerankModel, EmbedModel diff --git a/pinecone/db_data/models/__init__.py b/pinecone/db_data/models/__init__.py new file mode 100644 index 00000000..a14d3600 --- /dev/null +++ b/pinecone/db_data/models/__init__.py @@ -0,0 +1 @@ +from pinecone.core.openapi.db_data.models import * diff --git a/pinecone/db_data/types/search_rerank_typed_dict.py b/pinecone/db_data/types/search_rerank_typed_dict.py index 89c4f8d8..2d04fe82 100644 --- a/pinecone/db_data/types/search_rerank_typed_dict.py +++ b/pinecone/db_data/types/search_rerank_typed_dict.py @@ -1,5 +1,5 @@ from typing import TypedDict, Optional, Union, Dict, Any -from ..features.inference import RerankModel +from pinecone.inference import RerankModel class SearchRerankTypedDict(TypedDict): diff --git a/pinecone/models/__init__.py b/pinecone/models/__init__.py new file mode 100644 index 00000000..74a1658c --- /dev/null +++ b/pinecone/models/__init__.py @@ -0,0 +1,9 @@ +import warnings + +warnings.warn( + "The module at `pinecone.models` has moved to `pinecone.db_control.models`. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) + +from pinecone.db_control.models import * diff --git a/pinecone/pinecone.py b/pinecone/pinecone.py index a694dcb0..38462390 100644 --- a/pinecone/pinecone.py +++ b/pinecone/pinecone.py @@ -42,10 +42,7 @@ class Pinecone(PluginAware, LegacyPineconeDBControlInterface): """ - A client for interacting with Pinecone's vector database. - - This class implements methods for managing and interacting with Pinecone resources - such as collections and indexes. + A client for interacting with Pinecone APIs. """ def __init__( @@ -127,12 +124,10 @@ def create_index( name: str, spec: Union[Dict, "ServerlessSpec", "PodSpec"], dimension: Optional[int] = None, - metric: Optional[Union["Metric", str]] = "Metric.COSINE", + metric: Optional[Union["Metric", str]] = "cosine", timeout: Optional[int] = None, - deletion_protection: Optional[ - Union["DeletionProtection", str] - ] = "DeletionProtection.DISABLED", - vector_type: Optional[Union["VectorType", str]] = "VectorType.DENSE", + deletion_protection: Optional[Union["DeletionProtection", str]] = "disabled", + vector_type: Optional[Union["VectorType", str]] = "dense", tags: Optional[Dict[str, str]] = None, ) -> "IndexModel": return self.db.index.create( diff --git a/pinecone/utils/find_legacy_imports.py b/pinecone/utils/find_legacy_imports.py new file mode 100755 index 00000000..5421de28 --- /dev/null +++ b/pinecone/utils/find_legacy_imports.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +""" +Script to identify legacy imports that were previously available via star imports. + +This script analyzes the codebase to find all imports that were previously available +via star imports but are no longer imported at the top level. +""" + +import ast +import os +from typing import Set + + +def find_star_imports(file_path: str) -> Set[str]: + """ + Find all star imports in a file. + + Args: + file_path: Path to the file to analyze. + + Returns: + Set of module names that are imported with star imports. + """ + with open(file_path, "r") as f: + content = f.read() + + try: + tree = ast.parse(content) + except SyntaxError: + print(f"Warning: Could not parse {file_path}") + return set() + + star_imports = set() + + for node in ast.walk(tree): + if isinstance(node, ast.ImportFrom) and node.names[0].name == "*": + module_name = node.module + if module_name: + star_imports.add(module_name) + + return star_imports + + +def find_imported_names(file_path: str) -> Set[str]: + """ + Find all names that are imported in a file. + + Args: + file_path: Path to the file to analyze. + + Returns: + Set of imported names. + """ + with open(file_path, "r") as f: + content = f.read() + + try: + tree = ast.parse(content) + except SyntaxError: + print(f"Warning: Could not parse {file_path}") + return set() + + imported_names = set() + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for name in node.names: + imported_names.add(name.name) + elif isinstance(node, ast.ImportFrom): + for name in node.names: + if name.name != "*": + imported_names.add(name.name) + + return imported_names + + +def find_module_exports(module_path: str) -> Set[str]: + """ + Find all names that are exported by a module. + + Args: + module_path: Path to the module to analyze. + + Returns: + Set of exported names. + """ + try: + module = __import__(module_path, fromlist=["*"]) + return set(dir(module)) + except ImportError: + print(f"Warning: Could not import {module_path}") + return set() + + +def main(): + """ + Main function to find legacy imports. + """ + # Get the package root directory + package_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + # Find the __init__.py file + init_file = os.path.join(package_root, "__init__.py") + + # Find star imports in the __init__.py file + star_imports = find_star_imports(init_file) + + # Find all imported names in the __init__.py file + imported_names = find_imported_names(init_file) + + # Find all module exports + module_exports = {} + for module_name in star_imports: + module_exports[module_name] = find_module_exports(module_name) + + # Find all files in the package + package_files = [] + for root, _, files in os.walk(package_root): + for file in files: + if file.endswith(".py") and not file.startswith("__"): + package_files.append(os.path.join(root, file)) + + # Find all imports in the package + package_imports = set() + for file in package_files: + package_imports.update(find_imported_names(file)) + + # Find legacy imports + legacy_imports = {} + for module_name, exports in module_exports.items(): + for export in exports: + if export in package_imports and export not in imported_names: + legacy_imports[f"pinecone.{export}"] = (module_name, export) + + # Print the legacy imports + print("LEGACY_IMPORTS = {") + for legacy_name, (module_path, actual_name) in sorted(legacy_imports.items()): + print(f" '{legacy_name}': ('{module_path}', '{actual_name}'),") + print("}") + + +if __name__ == "__main__": + main() diff --git a/pinecone/utils/lazy_imports.py b/pinecone/utils/lazy_imports.py new file mode 100644 index 00000000..0a55c8f4 --- /dev/null +++ b/pinecone/utils/lazy_imports.py @@ -0,0 +1,76 @@ +""" +Lazy import handler for Pinecone. + +This module provides a way to lazily load imports that were previously +available via star imports but are no longer imported at the top level. +""" + +import importlib +import sys +from types import ModuleType +from typing import Dict, Optional, Tuple, cast + +# Dictionary mapping import names to their actual module paths +# Format: 'name': ('module_path', 'actual_name') +LAZY_IMPORTS: Dict[str, Tuple[str, str]] = { + # Example: 'Vector': ('pinecone.db_data.models', 'Vector') + # Add all your lazy imports here +} + + +class LazyModule: + def __init__(self, original_module, lazy_imports): + self._original_module = original_module + self._lazy_imports = lazy_imports + self._loaded_attrs = {} + + def __dir__(self): + # Get the base directory listing from the original module + base_dir = dir(self._original_module) + + # Add lazy-loaded items + lazy_dir = list(self._lazy_imports.keys()) + + # Return combined list + return sorted(set(base_dir + lazy_dir)) + + def __getattr__(self, name): + # First try the original module + try: + return getattr(self._original_module, name) + except AttributeError: + pass + + # Then try lazy imports + if name in self._lazy_imports: + if name not in self._loaded_attrs: + module_path, item_name = self._lazy_imports[name] + module = importlib.import_module(module_path) + self._loaded_attrs[name] = getattr(module, item_name) + return self._loaded_attrs[name] + + raise AttributeError(f"module '{self._original_module.__name__}' has no attribute '{name}'") + + +def setup_lazy_imports(lazy_imports: Optional[Dict[str, Tuple[str, str]]] = None) -> None: + """ + Set up the lazy import handler. + + Args: + lazy_imports: Optional dictionary of imports to handle lazily. + If None, uses the default LAZY_IMPORTS dictionary. + """ + if lazy_imports is None: + lazy_imports = LAZY_IMPORTS + + # Only proceed if the pinecone module is already loaded + if "pinecone" not in sys.modules: + return + + # Create a proxy for the pinecone module + original_module = sys.modules["pinecone"] + proxy = LazyModule(original_module, lazy_imports) + + # Replace the pinecone module with our proxy + # Use a type cast to satisfy the type checker + sys.modules["pinecone"] = cast(ModuleType, proxy) diff --git a/pinecone/utils/legacy_imports.py b/pinecone/utils/legacy_imports.py new file mode 100644 index 00000000..9013acdd --- /dev/null +++ b/pinecone/utils/legacy_imports.py @@ -0,0 +1,112 @@ +""" +Legacy import handler for Pinecone. + +This module provides a simple way to handle legacy imports that were previously +available via star imports but are no longer imported at the top level. +""" + +import importlib +import sys +from types import ModuleType +from typing import Dict, Optional, Set, Any, Tuple, cast + +# Dictionary mapping legacy import names to their actual module paths +# Format: 'name': ('module_path', 'actual_name') +LEGACY_IMPORTS: Dict[str, Tuple[str, str]] = { + # Example: 'Vector': ('pinecone.db_data.models', 'Vector') + # Add all your legacy imports here +} + + +class LegacyImportProxy: + """ + A proxy module that handles legacy imports with warnings. + + This class is used to replace the pinecone module in sys.modules + to handle legacy imports that were previously available via star imports. + """ + + def __init__(self, original_module: Any, legacy_imports: Dict[str, Tuple[str, str]]): + """ + Initialize the proxy module. + + Args: + original_module: The original module to proxy. + legacy_imports: Dictionary of legacy imports to handle. + """ + self._original_module = original_module + self._legacy_imports = legacy_imports + self._warned_imports: Set[str] = set() + self._loaded_modules: Dict[str, Any] = {} + + def __getattr__(self, name: str) -> Any: + """ + Handle attribute access for legacy imports. + + Args: + name: The name of the attribute being accessed. + + Returns: + The requested attribute. + + Raises: + AttributeError: If the attribute cannot be found. + """ + # First, try to get the attribute from the original module + try: + return getattr(self._original_module, name) + except AttributeError: + pass + + # Check if this is a legacy import + if name in self._legacy_imports: + module_path, actual_name = self._legacy_imports[name] + + # Only warn once per import + # if name not in self._warned_imports: + # warnings.warn( + # f"Importing '{name}' directly from 'pinecone' is deprecated. " + # f"Please import it from '{module_path}' instead. " + # f"This import will be removed in a future version.", + # DeprecationWarning, + # stacklevel=2 + # ) + # self._warned_imports.add(name) + + # Load the module if not already loaded + if module_path not in self._loaded_modules: + try: + self._loaded_modules[module_path] = importlib.import_module(module_path) + except ImportError: + raise AttributeError(f"module 'pinecone' has no attribute '{name}'") + + # Get the actual object + module = self._loaded_modules[module_path] + if hasattr(module, actual_name): + return getattr(module, actual_name) + + raise AttributeError(f"module 'pinecone' has no attribute '{name}'") + + +def setup_legacy_imports(legacy_imports: Optional[Dict[str, Tuple[str, str]]] = None) -> None: + """ + Set up the legacy import handler. + + Args: + legacy_imports: Optional dictionary of legacy imports to handle. + If None, uses the default LEGACY_IMPORTS dictionary. + """ + if legacy_imports is None: + legacy_imports = LEGACY_IMPORTS + + # Only proceed if the pinecone module is already loaded + if "pinecone" not in sys.modules: + return + + # Create a proxy for the pinecone module + original_module = sys.modules["pinecone"] + proxy = LegacyImportProxy(original_module, legacy_imports) + + # Replace the pinecone module with our proxy + # Use a type cast to satisfy the type checker + sys.modules["pinecone"] = cast(ModuleType, proxy) diff --git a/tests/unit/test_control.py b/tests/unit/test_control.py index ad3b2872..da252063 100644 --- a/tests/unit/test_control.py +++ b/tests/unit/test_control.py @@ -87,37 +87,38 @@ def test_plugins_are_lazily_loaded(self): def test_default_host(self): p = Pinecone(api_key="123-456-789") - assert p.index_api.api_client.configuration.host == "https://api.pinecone.io" + assert p.db.index_api.api_client.configuration.host == "https://api.pinecone.io" def test_passing_host(self): p = Pinecone(api_key="123-456-789", host="my-host.pinecone.io") - assert p.index_api.api_client.configuration.host == "https://my-host.pinecone.io" + assert p.db.index_api.api_client.configuration.host == "https://my-host.pinecone.io" def test_passing_additional_headers(self): extras = {"header1": "my-value", "header2": "my-value2"} p = Pinecone(api_key="123-456-789", additional_headers=extras) for key, value in extras.items(): - assert p.index_api.api_client.default_headers[key] == value - assert "User-Agent" in p.index_api.api_client.default_headers - assert "X-Pinecone-API-Version" in p.index_api.api_client.default_headers - assert "header1" in p.index_api.api_client.default_headers - assert "header2" in p.index_api.api_client.default_headers - assert len(p.index_api.api_client.default_headers) == 4 + assert p.db.index_api.api_client.default_headers[key] == value + assert "User-Agent" in p.db.index_api.api_client.default_headers + assert "X-Pinecone-API-Version" in p.db.index_api.api_client.default_headers + assert "header1" in p.db.index_api.api_client.default_headers + assert "header2" in p.db.index_api.api_client.default_headers + assert len(p.db.index_api.api_client.default_headers) == 4 def test_overwrite_useragent(self): # This doesn't seem like a common use case, but we may want to allow this # when embedding the client in other pinecone tools such as canopy. extras = {"User-Agent": "test-user-agent"} p = Pinecone(api_key="123-456-789", additional_headers=extras) - assert "X-Pinecone-API-Version" in p.index_api.api_client.default_headers - assert p.index_api.api_client.default_headers["User-Agent"] == "test-user-agent" - assert len(p.index_api.api_client.default_headers) == 2 + assert "X-Pinecone-API-Version" in p.db.index_api.api_client.default_headers + assert p.db.index_api.api_client.default_headers["User-Agent"] == "test-user-agent" + assert len(p.db.index_api.api_client.default_headers) == 2 def test_set_source_tag_in_useragent(self): p = Pinecone(api_key="123-456-789", source_tag="test_source_tag") assert ( - re.search(r"source_tag=test_source_tag", p.index_api.api_client.user_agent) is not None + re.search(r"source_tag=test_source_tag", p.db.index_api.api_client.user_agent) + is not None ) @pytest.mark.parametrize( @@ -149,8 +150,8 @@ def test_create_index_with_timeout( expected_sleep_calls, ): p = Pinecone(api_key="123-456-789") - mocker.patch.object(p.index_api, "describe_index", side_effect=describe_index_responses) - mocker.patch.object(p.index_api, "create_index") + mocker.patch.object(p.db.index_api, "describe_index", side_effect=describe_index_responses) + mocker.patch.object(p.db.index_api, "create_index") mocker.patch("time.sleep") p.create_index( @@ -160,8 +161,8 @@ def test_create_index_with_timeout( timeout=timeout_value, ) - assert p.index_api.create_index.call_count == 1 - assert p.index_api.describe_index.call_count == expected_describe_index_calls + assert p.db.index_api.create_index.call_count == 1 + assert p.db.index_api.describe_index.call_count == expected_describe_index_calls assert time.sleep.call_count == expected_sleep_calls @pytest.mark.parametrize( @@ -210,7 +211,7 @@ def test_create_index_with_spec_dictionary(self, mocker, index_spec): p = Pinecone(api_key="123-456-789") mock_api = MagicMock() - mocker.patch.object(p, "index_api", mock_api) + mocker.patch.object(p.db, "index_api", mock_api) p.create_index(name="my-index", dimension=10, spec=index_spec) @@ -245,8 +246,8 @@ def test_create_index_from_source_collection( expected_sleep_calls, ): p = Pinecone(api_key="123-456-789") - mocker.patch.object(p.index_api, "describe_index", side_effect=describe_index_responses) - mocker.patch.object(p.index_api, "create_index") + mocker.patch.object(p.db.index_api, "describe_index", side_effect=describe_index_responses) + mocker.patch.object(p.db.index_api, "create_index") mocker.patch("time.sleep") p.create_index( @@ -256,17 +257,19 @@ def test_create_index_from_source_collection( timeout=timeout_value, ) - assert p.index_api.create_index.call_count == 1 - assert p.index_api.describe_index.call_count == expected_describe_index_calls + assert p.db.index_api.create_index.call_count == 1 + assert p.db.index_api.describe_index.call_count == expected_describe_index_calls assert time.sleep.call_count == expected_sleep_calls def test_create_index_when_timeout_exceeded(self, mocker): with pytest.raises(TimeoutError): p = Pinecone(api_key="123-456-789") - mocker.patch.object(p.index_api, "create_index") + mocker.patch.object(p.db.index_api, "create_index") describe_index_response = [description_with_status(False)] * 5 - mocker.patch.object(p.index_api, "describe_index", side_effect=describe_index_response) + mocker.patch.object( + p.db.index_api, "describe_index", side_effect=describe_index_response + ) mocker.patch("time.sleep") p.create_index( @@ -276,7 +279,7 @@ def test_create_index_when_timeout_exceeded(self, mocker): def test_list_indexes_returns_iterable(self, mocker, index_list_response): p = Pinecone(api_key="123-456-789") - mocker.patch.object(p.index_api, "list_indexes", side_effect=[index_list_response]) + mocker.patch.object(p.db.index_api, "list_indexes", side_effect=[index_list_response]) response = p.list_indexes() assert [i.name for i in response] == ["index1", "index2", "index3"] diff --git a/tests/unit/test_plugin_aware.py b/tests/unit/test_plugin_aware.py index 7f4329d1..315bd225 100644 --- a/tests/unit/test_plugin_aware.py +++ b/tests/unit/test_plugin_aware.py @@ -1,7 +1,6 @@ import pytest from pinecone.utils.plugin_aware import PluginAware -from pinecone.config import Config -from pinecone.openapi_support.configuration import Configuration as OpenApiConfig +from pinecone.config import Config, OpenApiConfiguration class TestPluginAware: @@ -22,7 +21,7 @@ def test_correctly_raise_attribute_errors(self): class Foo(PluginAware): def __init__(self): self.config = Config() - self.openapi_config = OpenApiConfig() + self.openapi_config = OpenApiConfiguration() self.pool_threads = 1 super().__init__() @@ -38,7 +37,7 @@ def test_plugins_are_lazily_loaded(self): class Pinecone(PluginAware): def __init__(self): self.config = Config() - self.openapi_config = OpenApiConfig() + self.openapi_config = OpenApiConfiguration() self.pool_threads = 10 super().__init__() diff --git a/tests/upgrade/test_all.py b/tests/upgrade/test_all.py new file mode 100644 index 00000000..acabf620 --- /dev/null +++ b/tests/upgrade/test_all.py @@ -0,0 +1,28 @@ +class TestAll: + def test_all_is_complete(self): + """Test that __all__ is complete and accurate.""" + # Import the module + import pinecone + + # Get all public names (those that don't start with _) + public_names = {name for name in dir(pinecone) if not name.startswith("_")} + + # Get __all__ if it exists, otherwise empty set + all_names = set(getattr(pinecone, "__all__", [])) + + # Check that __all__ exists + assert hasattr(pinecone, "__all__"), "Module should have __all__ defined" + + # Check that all names in __all__ are actually importable + for name in all_names: + assert getattr(pinecone, name) is not None, f"Name {name} in __all__ is not importable" + + # Check that all public names are in __all__ + missing_from_all = public_names - all_names + for name in missing_from_all: + print(f"Public name {name} is not in __all__") + assert not missing_from_all, f"Public names not in __all__: {missing_from_all}" + + # Check that __all__ doesn't contain any private names + private_in_all = {name for name in all_names if name.startswith("_")} + assert not private_in_all, f"Private names in __all__: {private_in_all}" diff --git a/tests/upgrade/test_reorganization.py b/tests/upgrade/test_reorganization.py new file mode 100644 index 00000000..331681b7 --- /dev/null +++ b/tests/upgrade/test_reorganization.py @@ -0,0 +1,19 @@ +import pytest + + +class TestReorganization: + def test_data(self): + with pytest.warns(DeprecationWarning) as warning_info: + from pinecone.data import Index + + assert Index is not None + assert len(warning_info) > 0 + assert "has moved to" in str(warning_info[0].message) + + def test_config(self): + with pytest.warns(DeprecationWarning) as warning_info: + from pinecone.config import PineconeConfig + + assert PineconeConfig is not None + assert len(warning_info) > 0 + assert "has moved to" in str(warning_info[0].message) diff --git a/tests/upgrade/test_v6_upgrade.py b/tests/upgrade/test_v6_upgrade.py index 358ce0e5..6532f65f 100644 --- a/tests/upgrade/test_v6_upgrade.py +++ b/tests/upgrade/test_v6_upgrade.py @@ -1,7 +1,79 @@ import pinecone +import logging +logger = logging.getLogger(__name__) + + +class TestExpectedImports_UpgradeFromV6: + def test_mapped_data_imports(self): + data_imports = [ + "Vector", + "QueryRequest", + "FetchResponse", + "DeleteRequest", + "DescribeIndexStatsRequest", + "DescribeIndexStatsResponse", + "RpcStatus", + "ScoredVector", + "ServiceException", + "SingleQueryResults", + "QueryResponse", + "RerankModel", + "SearchQuery", + "SearchQueryVector", + "SearchRerank", + "UpsertResponse", + "UpdateRequest", + ] + + control_imports = [ + "CollectionDescription", + "CollectionList", + "ServerlessSpec", + "ServerlessSpecDefinition", + "PodSpec", + "PodSpecDefinition", + # 'ForbiddenException', + # 'ImportErrorMode', + # 'Index', + "IndexList", + "IndexModel", + # 'ListConversionException', + # 'MetadataDictionaryExpectedError', + # 'NotFoundException', + ] + + config_imports = [ + "Config", + "ConfigBuilder", + "PineconeConfig", + "PineconeConfigurationError", + "PineconeException", + "PineconeProtocolError", + "PineconeApiAttributeError", + "PineconeApiException", + ] + + exception_imports = [ + "PineconeConfigurationError", + "PineconeProtocolError", + "PineconeException", + "PineconeApiAttributeError", + "PineconeApiTypeError", + "PineconeApiValueError", + "PineconeApiKeyError", + "PineconeApiException", + "NotFoundException", + "UnauthorizedException", + "ForbiddenException", + "ServiceException", + "ListConversionException", + ] + mapped_imports = data_imports + control_imports + config_imports + exception_imports + + for import_name in mapped_imports: + assert hasattr(pinecone, import_name), f"Import {import_name} not found in pinecone" -class TestV6Upgrade: def test_v6_upgrade_root_imports(self): v6_dir_items = [ "CollectionDescription", @@ -100,76 +172,92 @@ def test_v6_upgrade_root_imports(self): "warnings", ] + intentionally_removed_items = ["os"] + + expected_items = [item for item in v6_dir_items if item not in intentionally_removed_items] + missing_items = [] - for item in v6_dir_items: - if item not in dir(pinecone): + for item in expected_items: + if not hasattr(pinecone, item): missing_items.append(item) + logger.debug(f"Exported: ❌ {item}") + else: + logger.debug(f"Exported: ✅ {item}") + + extra_items = [] + for item in intentionally_removed_items: + if hasattr(pinecone, item): + extra_items.append(item) + logger.debug(f"Removed: ❌ {item}") + else: + logger.debug(f"Removed: ✅ {item}") assert len(missing_items) == 0, f"Missing items: {missing_items}" + assert len(extra_items) == 0, f"Extra items: {extra_items}" - def test_v6_upgrade_data_imports(self): - v6_data_dir_items = [ - "DescribeIndexStatsResponse", - "EmbedModel", - "FetchResponse", - "ImportErrorMode", - "Index", - "IndexClientInstantiationError", - "Inference", - "InferenceInstantiationError", - "MetadataDictionaryExpectedError", - "QueryResponse", - "RerankModel", - "SearchQuery", - "SearchQueryVector", - "SearchRerank", - "SparseValues", - "SparseValuesDictionaryExpectedError", - "SparseValuesMissingKeysError", - "SparseValuesTypeError", - "UpsertResponse", - "Vector", - "VectorDictionaryExcessKeysError", - "VectorDictionaryMissingKeysError", - "VectorTupleLengthError", - "_AsyncioInference", - "_Index", - "_IndexAsyncio", - "_Inference", - "__builtins__", - "__cached__", - "__doc__", - "__file__", - "__loader__", - "__name__", - "__package__", - "__path__", - "__spec__", - "dataclasses", - "errors", - "features", - "fetch_response", - "import_error", - "index", - "index_asyncio", - "index_asyncio_interface", - "interfaces", - "query_results_aggregator", - "request_factory", - "search_query", - "search_query_vector", - "search_rerank", - "sparse_values", - "sparse_values_factory", - "types", - "utils", - "vector", - "vector_factory", - ] + # def test_v6_upgrade_data_imports(self): + # v6_data_dir_items = [ + # "DescribeIndexStatsResponse", + # "EmbedModel", + # "FetchResponse", + # "ImportErrorMode", + # "Index", + # "IndexClientInstantiationError", + # "Inference", + # "InferenceInstantiationError", + # "MetadataDictionaryExpectedError", + # "QueryResponse", + # "RerankModel", + # "SearchQuery", + # "SearchQueryVector", + # "SearchRerank", + # "SparseValues", + # "SparseValuesDictionaryExpectedError", + # "SparseValuesMissingKeysError", + # "SparseValuesTypeError", + # "UpsertResponse", + # "Vector", + # "VectorDictionaryExcessKeysError", + # "VectorDictionaryMissingKeysError", + # "VectorTupleLengthError", + # "_AsyncioInference", + # "_Index", + # "_IndexAsyncio", + # "_Inference", + # "__builtins__", + # "__cached__", + # "__doc__", + # "__file__", + # "__loader__", + # "__name__", + # "__package__", + # "__path__", + # "__spec__", + # "dataclasses", + # "errors", + # "features", + # "fetch_response", + # "import_error", + # "index", + # "index_asyncio", + # "index_asyncio_interface", + # "interfaces", + # "query_results_aggregator", + # "request_factory", + # "search_query", + # "search_query_vector", + # "search_rerank", + # "sparse_values", + # "sparse_values_factory", + # "types", + # "utils", + # "vector", + # "vector_factory", + # ] - missing_items = [] - for item in v6_data_dir_items: - if item not in dir(pinecone.db_data): - missing_items.append(item) + # missing_items = [] + # for item in v6_data_dir_items: + # if item not in dir(pinecone.db_data): + # missing_items.append(item) - assert len(missing_items) == 0, f"Missing items: {missing_items}" + # assert len(missing_items) == 0, f"Missing items: {missing_items}" From 0584c6320f4aca69ad9bf73ca01a0961c8b5e61f Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 23 Apr 2025 15:32:55 -0400 Subject: [PATCH 10/13] Add missing exports --- pinecone/__init__.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/pinecone/__init__.py b/pinecone/__init__.py index f7d8fce9..2714bb6c 100644 --- a/pinecone/__init__.py +++ b/pinecone/__init__.py @@ -37,6 +37,22 @@ "UpsertResponse": ("pinecone.db_data.models", "UpsertResponse"), "UpdateRequest": ("pinecone.db_data.models", "UpdateRequest"), "SparseValues": ("pinecone.db_data.models", "SparseValues"), + "ImportErrorMode": ("pinecone.core.openapi.db_data.model", "ImportErrorMode"), + "VectorDictionaryMissingKeysError": ( + "pinecone.db_data.errors", + "VectorDictionaryMissingKeysError", + ), + "VectorDictionaryExcessKeysError": ( + "pinecone.db_data.errors", + "VectorDictionaryExcessKeysError", + ), + "VectorTupleLengthError": ("pinecone.db_data.errors", "VectorTupleLengthError"), + "SparseValuesTypeError": ("pinecone.db_data.errors", "SparseValuesTypeError"), + "SparseValuesMissingKeysError": ("pinecone.db_data.errors", "SparseValuesMissingKeysError"), + "SparseValuesDictionaryExpectedError": ( + "pinecone.db_data.errors", + "SparseValuesDictionaryExpectedError", + ), } _db_control_lazy_imports = { @@ -105,7 +121,7 @@ "Config", "ConfigBuilder", "PineconeConfig", - # DB control classes + # OpenAPI classes "CloudProvider", "AwsRegion", "GcpRegion", @@ -119,6 +135,7 @@ "IndexList", "IndexModel", "IndexEmbed", + "ImportErrorMode", "ServerlessSpec", "ServerlessSpecDefinition", "PodSpec", @@ -158,4 +175,10 @@ "ForbiddenException", "ServiceException", "ListConversionException", + "VectorDictionaryMissingKeysError", + "VectorDictionaryExcessKeysError", + "VectorTupleLengthError", + "SparseValuesTypeError", + "SparseValuesMissingKeysError", + "SparseValuesDictionaryExpectedError", ] From cd15bf90dc29995daaca72698ceb881fbd7deb3b Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Fri, 25 Apr 2025 14:42:27 -0400 Subject: [PATCH 11/13] Fix unit tests --- pinecone/__init__.py | 10 +++++----- pyproject.toml | 2 +- tests/unit/test_config.py | 14 +++++++++++--- tests/unit/test_index_initialization.py | 3 ++- tests/unit/utils/test_docs_links.py | 10 ++++++++-- 5 files changed, 27 insertions(+), 12 deletions(-) diff --git a/pinecone/__init__.py b/pinecone/__init__.py index 2714bb6c..f228eddf 100644 --- a/pinecone/__init__.py +++ b/pinecone/__init__.py @@ -21,7 +21,11 @@ } _db_data_lazy_imports = { - "Vector": ("pinecone.db_data.models", "Vector"), + "Vector": ("pinecone.db_data.dataclasses", "Vector"), + "SparseValues": ("pinecone.db_data.dataclasses", "SparseValues"), + "SearchQuery": ("pinecone.db_data.dataclasses", "SearchQuery"), + "SearchQueryVector": ("pinecone.db_data.dataclasses", "SearchQueryVector"), + "SearchRerank": ("pinecone.db_data.dataclasses", "SearchRerank"), "FetchResponse": ("pinecone.db_data.models", "FetchResponse"), "DeleteRequest": ("pinecone.db_data.models", "DeleteRequest"), "DescribeIndexStatsRequest": ("pinecone.db_data.models", "DescribeIndexStatsRequest"), @@ -31,12 +35,8 @@ "SingleQueryResults": ("pinecone.db_data.models", "SingleQueryResults"), "QueryRequest": ("pinecone.db_data.models", "QueryRequest"), "QueryResponse": ("pinecone.db_data.models", "QueryResponse"), - "SearchQuery": ("pinecone.db_data.dataclasses", "SearchQuery"), - "SearchQueryVector": ("pinecone.db_data.dataclasses", "SearchQueryVector"), - "SearchRerank": ("pinecone.db_data.dataclasses", "SearchRerank"), "UpsertResponse": ("pinecone.db_data.models", "UpsertResponse"), "UpdateRequest": ("pinecone.db_data.models", "UpdateRequest"), - "SparseValues": ("pinecone.db_data.models", "SparseValues"), "ImportErrorMode": ("pinecone.core.openapi.db_data.model", "ImportErrorMode"), "VectorDictionaryMissingKeysError": ( "pinecone.db_data.errors", diff --git a/pyproject.toml b/pyproject.toml index 5b8a11ac..7a1b9a0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -155,7 +155,7 @@ docstring-code-line-length = "dynamic" [tool.ruff.lint.per-file-ignores] # F403 Allow star imports # F401 allow imported but unused -"__init__.py" = ["F401", "F403"] +"__init__.py" = ["F401", "F403", "F405"] # E402 Allow module level import not at top of file so # tqdm warnings can be disabled ahead of loading any code diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index f1a00508..1da981ad 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -103,7 +103,11 @@ def test_config_pool_threads(self): pc = Pinecone( api_key="test-api-key", host="test-controller-host.pinecone.io", pool_threads=10 ) - assert pc.index_api.api_client.pool_threads == 10 + # DBControl object is created lazily, so we need to access this property + # to trigger the setup so we can inspect the config + assert pc.db is not None + + assert pc.db.index_api.api_client.pool_threads == 10 idx = pc.Index(host="my-index-host.pinecone.io", name="my-index-name") assert idx._vector_api.api_client.pool_threads == 10 @@ -146,5 +150,9 @@ def test_proxy_config(self): assert pc.openapi_config.proxy == "http://localhost:8080" assert pc.openapi_config.ssl_ca_cert == "path/to/cert-bundle.pem" - assert pc.index_api.api_client.configuration.proxy == "http://localhost:8080" - assert pc.index_api.api_client.configuration.ssl_ca_cert == "path/to/cert-bundle.pem" + # DBControl object is created lazily, so we need to access this property + # to trigger the setup so we can inspect the config + assert pc.db is not None + + assert pc.db.index_api.api_client.configuration.proxy == "http://localhost:8080" + assert pc.db.index_api.api_client.configuration.ssl_ca_cert == "path/to/cert-bundle.pem" diff --git a/tests/unit/test_index_initialization.py b/tests/unit/test_index_initialization.py index 3d10d636..29928fbc 100644 --- a/tests/unit/test_index_initialization.py +++ b/tests/unit/test_index_initialization.py @@ -51,5 +51,6 @@ def test_overwrite_useragent(self): def test_set_source_tag(self): pc = Pinecone(api_key="123-456-789", source_tag="test_source_tag") assert ( - re.search(r"source_tag=test_source_tag", pc.index_api.api_client.user_agent) is not None + re.search(r"source_tag=test_source_tag", pc.db.index_api.api_client.user_agent) + is not None ) diff --git a/tests/unit/utils/test_docs_links.py b/tests/unit/utils/test_docs_links.py index 478ba3b2..c1d01b21 100644 --- a/tests/unit/utils/test_docs_links.py +++ b/tests/unit/utils/test_docs_links.py @@ -1,11 +1,17 @@ import pytest import requests from pinecone.utils import docslinks +from pinecone import __version__ urls = list(docslinks.values()) @pytest.mark.parametrize("url", urls) def test_valid_links(url): - response = requests.get(url) - assert response.status_code == 200, f"Docs link is invalid: {url}" + if isinstance(url, str): + response = requests.get(url) + assert response.status_code == 200, f"Docs link is invalid: {url}" + else: + versioned_url = url(__version__) + response = requests.get(versioned_url) + assert response.status_code == 200, f"Docs link is invalid: {versioned_url}" From 7fed334b9b9ab912f0e6238aa40c88b7c27867ea Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Fri, 25 Apr 2025 14:52:27 -0400 Subject: [PATCH 12/13] Update lockfile --- poetry.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 048d84c6..e923876f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1962,4 +1962,4 @@ grpc = ["googleapis-common-protos", "grpcio", "grpcio", "grpcio", "lz4", "protob [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "96c8c770a4626bc9606a7b8e16537e217f238e20c217baa1206f4ef9debe5e82" +content-hash = "33aa755910ac34e4443a3e03a180ac1ece72735367f9c53d76908ca95ea2fd48" From 85d48422fb2fd11c8f6d2349c2c2f0ab81d07871 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Fri, 2 May 2025 16:04:34 -0400 Subject: [PATCH 13/13] Add integration tests for reorg methods --- .github/workflows/testing-integration.yaml | 25 ++ pinecone/db_control/request_factory.py | 1 - poetry.lock | 16 +- pyproject.toml | 1 + tests/__init__.py | 6 +- .../control/collections/__init__.py | 0 .../control/collections/conftest.py | 136 ++++++++ .../control/collections/helpers.py | 57 +++ .../control/collections/test_dense_index.py | 172 +++++++++ tests/integration/control/index/__init__.py | 0 tests/integration/control/index/conftest.py | 178 ++++++++++ .../control/index/test_configure.py | 43 +++ .../integration/control/index/test_create.py | 328 ++++++++++++++++++ .../integration/control/index/test_delete.py | 0 .../control/index/test_describe.py | 46 +++ tests/integration/control/index/test_has.py | 18 + tests/integration/control/index/test_list.py | 27 ++ 17 files changed, 1049 insertions(+), 5 deletions(-) create mode 100644 tests/integration/control/collections/__init__.py create mode 100644 tests/integration/control/collections/conftest.py create mode 100644 tests/integration/control/collections/helpers.py create mode 100644 tests/integration/control/collections/test_dense_index.py create mode 100644 tests/integration/control/index/__init__.py create mode 100644 tests/integration/control/index/conftest.py create mode 100644 tests/integration/control/index/test_configure.py create mode 100644 tests/integration/control/index/test_create.py create mode 100644 tests/integration/control/index/test_delete.py create mode 100644 tests/integration/control/index/test_describe.py create mode 100644 tests/integration/control/index/test_has.py create mode 100644 tests/integration/control/index/test_list.py diff --git a/.github/workflows/testing-integration.yaml b/.github/workflows/testing-integration.yaml index 8275a5f1..53023bde 100644 --- a/.github/workflows/testing-integration.yaml +++ b/.github/workflows/testing-integration.yaml @@ -3,6 +3,31 @@ name: "Integration Tests" workflow_call: {} jobs: + reorg: + name: Reorg tests + runs-on: ubuntu-latest + env: + PINECONE_DEBUG_CURL: 'true' + PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' + strategy: + matrix: + python_version: [3.9, 3.12] + steps: + - uses: actions/checkout@v4 + - name: 'Set up Python ${{ matrix.python_version }}' + uses: actions/setup-python@v5 + with: + python-version: '${{ matrix.python_version }}' + - name: Setup Poetry + uses: ./.github/actions/setup-poetry + with: + include_asyncio: true + - name: 'Run index tests' + run: poetry run pytest tests/integration/control/index --retries 5 --retry-delay 35 -s -vv --log-cli-level=DEBUG + - name: 'Run collection tests' + run: poetry run pytest tests/integration/control/collections --retries 5 --retry-delay 35 -s -vv --log-cli-level=DEBUG + inference: name: Inference tests diff --git a/pinecone/db_control/request_factory.py b/pinecone/db_control/request_factory.py index c2ecc905..719f71a1 100644 --- a/pinecone/db_control/request_factory.py +++ b/pinecone/db_control/request_factory.py @@ -68,7 +68,6 @@ def __parse_deletion_protection( deletion_protection: Union[DeletionProtection, str], ) -> DeletionProtectionModel: deletion_protection = convert_enum_to_string(deletion_protection) - print(deletion_protection) if deletion_protection in ["enabled", "disabled"]: return DeletionProtectionModel(deletion_protection) else: diff --git a/poetry.lock b/poetry.lock index e923876f..2e4de34b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1535,6 +1535,20 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-dotenv" +version = "1.1.0" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.9" +files = [ + {file = "python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d"}, + {file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pytz" version = "2023.3.post1" @@ -1962,4 +1976,4 @@ grpc = ["googleapis-common-protos", "grpcio", "grpcio", "grpcio", "lz4", "protob [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "33aa755910ac34e4443a3e03a180ac1ece72735367f9c53d76908ca95ea2fd48" +content-hash = "0145fb2ae02a1cdd6fe06b191a6761dcee4f4c67fe057b48d6b501d7b0b504da" diff --git a/pyproject.toml b/pyproject.toml index 7a1b9a0a..788b2870 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,6 +97,7 @@ beautifulsoup4 = "^4.13.3" pinecone-plugin-assistant = "^1.6.0" vprof = "^0.38" tuna = "^0.5.11" +python-dotenv = "^1.1.0" [tool.poetry.extras] diff --git a/tests/__init__.py b/tests/__init__.py index f2dab92a..84ca0481 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,5 +1,5 @@ import logging -logging.basicConfig( - format="%(levelname)s [%(asctime)s] %(name)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S" -) +# logging.basicConfig( +# format="%(levelname)s [%(asctime)s] %(name)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S" +# ) diff --git a/tests/integration/control/collections/__init__.py b/tests/integration/control/collections/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/control/collections/conftest.py b/tests/integration/control/collections/conftest.py new file mode 100644 index 00000000..bb592cee --- /dev/null +++ b/tests/integration/control/collections/conftest.py @@ -0,0 +1,136 @@ +import pytest +import uuid +import time +import logging +import dotenv +import os +from datetime import datetime +from pinecone import Pinecone, NotFoundException, PineconeApiException +from ...helpers import get_environment_var + +dotenv.load_dotenv() + +logger = logging.getLogger(__name__) +""" @private """ + +# Generate a unique ID for the entire test run +RUN_ID = str(uuid.uuid4()) + + +@pytest.fixture() +def index_tags(request): + test_name = request.node.name + if test_name is None: + test_name = "" + else: + test_name = test_name.replace(":", "_").replace("[", "_").replace("]", "_") + + tags = { + "test-suite": "pinecone-python-client", + "test-run": RUN_ID, + "test": test_name, + "created-at": datetime.now().strftime("%Y-%m-%d"), + } + + if os.getenv("USER"): + tags["user"] = os.getenv("USER") + return tags + + +@pytest.fixture() +def pc(): + api_key = get_environment_var("PINECONE_API_KEY") + return Pinecone( + api_key=api_key, additional_headers={"sdk-test-suite": "pinecone-python-client"} + ) + + +@pytest.fixture() +def pod_environment(): + return get_environment_var("PINECONE_ENVIRONMENT", "us-east1-gcp") + + +def delete_with_retry(pc, index_name, retries=0, sleep_interval=5): + logger.debug( + "Deleting index " + + index_name + + ", retry " + + str(retries) + + ", next sleep interval " + + str(sleep_interval) + ) + try: + pc.db.index.delete(name=index_name, timeout=-1) + except NotFoundException: + pass + except PineconeApiException as e: + if e.error.code == "PRECONDITON_FAILED": + if retries > 5: + raise Exception("Unable to delete index " + index_name) + time.sleep(sleep_interval) + delete_with_retry(pc, index_name, retries + 1, sleep_interval * 2) + else: + logger.error(e.__class__) + logger.error(e) + raise Exception("Unable to delete index " + index_name) + except Exception as e: + logger.error(e.__class__) + logger.error(e) + raise Exception("Unable to delete index " + index_name) + + +def pytest_sessionfinish(session, exitstatus): + """ + Hook that runs after all tests have completed. + This is a good place to clean up any resources that were created during the test session. + """ + logger.info("Running final cleanup after all collection tests...") + + try: + pc = Pinecone() + indexes = pc.db.index.list() + test_indexes = [ + idx for idx in indexes if idx.tags is not None and idx.tags.get("test-run") == RUN_ID + ] + + logger.info(f"Indexes to delete: {[idx.name for idx in test_indexes]}") + + for idx in test_indexes: + if idx.deletion_protection == "enabled": + logger.info(f"Disabling deletion protection for index: {idx.name}") + pc.db.index.configure(name=idx.name, deletion_protection="disabled") + # Wait for index to be updated with status ready + logger.info(f"Waiting for index {idx.name} to be ready...") + timeout = 60 + while True and timeout > 0: + is_ready = pc.db.index.describe(name=idx.name).ready + if is_ready: + break + time.sleep(1) + timeout -= 1 + if timeout <= 0: + logger.warning(f"Index {idx.name} did not become ready in time") + else: + logger.info(f"Deletion protection is already disabled for index: {idx.name}") + + for idx in test_indexes: + try: + logger.info(f"Deleting index: {idx.name}") + pc.db.index.delete(name=idx.name, timeout=-1) + except Exception as e: + logger.warning(f"Failed to delete index {idx.name}: {str(e)}") + + collections = pc.db.collection.list() + logger.info(f"Collections to delete: {[col.name for col in collections]}") + + for col in collections: + try: + logger.info(f"Deleting collection: {col.name}") + pc.db.collection.delete(name=col.name) + except Exception as e: + logger.warning(f"Failed to delete collection {col.name}: {str(e)}") + + except Exception as e: + logger.error(f"Error during final cleanup: {str(e)}") + + logger.info("Final cleanup of collections tests completed") diff --git a/tests/integration/control/collections/helpers.py b/tests/integration/control/collections/helpers.py new file mode 100644 index 00000000..58633a69 --- /dev/null +++ b/tests/integration/control/collections/helpers.py @@ -0,0 +1,57 @@ +import time +import random +import logging + +logger = logging.getLogger(__name__) + + +def random_vector(dimension): + return [random.uniform(0, 1) for _ in range(dimension)] + + +def attempt_cleanup_collection(pc, collection_name): + max_wait = 120 + time_waited = 0 + deleted = False + + while time_waited < max_wait: + try: + pc.db.collection.delete(name=collection_name) + deleted = True + break + except Exception as e: + # Failures here usually happen because the backend thinks there is still some + # operation pending on the resource. + # These orphaned resources will get cleaned up by the cleanup job later. + logger.debug(f"Error while cleaning up collection: {e}") + logger.debug( + f"Waiting for collection {collection_name} to be deleted. Waited {time_waited} seconds..." + ) + time.sleep(10) + time_waited += 10 + if not deleted: + logger.warning(f"Collection {collection_name} was not deleted after {max_wait} seconds") + + +def attempt_cleanup_index(pc, index_name): + max_wait = 120 + time_waited = 0 + deleted = False + + while time_waited < max_wait: + try: + pc.db.index.delete(name=index_name) + deleted = True + break + except Exception as e: + # Failures here usually happen because the backend thinks there is still some + # operation pending on the resource. + # These orphaned resources will get cleaned up by the cleanup job later. + logger.debug(f"Error while cleaning up index: {e}") + logger.debug( + f"Waiting for index {index_name} to be deleted. Waited {time_waited} seconds..." + ) + time.sleep(10) + time_waited += 10 + if not deleted: + logger.warning(f"Index {index_name} was not deleted after {max_wait} seconds") diff --git a/tests/integration/control/collections/test_dense_index.py b/tests/integration/control/collections/test_dense_index.py new file mode 100644 index 00000000..58ad0832 --- /dev/null +++ b/tests/integration/control/collections/test_dense_index.py @@ -0,0 +1,172 @@ +import time +from pinecone import PodSpec +from ...helpers import generate_index_name, generate_collection_name +import logging +from .helpers import attempt_cleanup_collection, attempt_cleanup_index, random_vector + +logger = logging.getLogger(__name__) + + +class TestCollectionsHappyPath: + def test_dense_index_to_collection_to_index(self, pc, pod_environment, index_tags): + # Create a pod index + index_name = generate_index_name("pod-index") + dimension = 10 + metric = "cosine" + pod_index = pc.db.index.create( + name=index_name, + dimension=dimension, + metric=metric, + spec=PodSpec(environment=pod_environment), + tags=index_tags, + ) + + # Insert some vectors into the pod index + idx = pc.Index(host=pod_index.host) + num_vectors = 10 + namespaces = ["", "test-ns1", "test-ns2"] + for namespace in namespaces: + vectors = [(str(i), random_vector(dimension)) for i in range(num_vectors)] + idx.upsert(vectors=vectors, namespace=namespace) + + # Wait for the vectors to be available + all_vectors_available = False + max_wait = 180 + time_waited = 0 + while not all_vectors_available and time_waited < max_wait: + all_vectors_available = True + desc = idx.describe_index_stats() + for namespace in namespaces: + if ( + desc.namespaces.get(namespace, None) is None + or desc.namespaces[namespace]["vector_count"] != num_vectors + ): + logger.debug(f"Waiting for vectors to be available in namespace {namespace}...") + all_vectors_available = False + break + for namespace in namespaces: + for i in range(num_vectors): + try: + idx.fetch(ids=[str(i)], namespace=namespace) + except Exception: + logger.debug( + f"Waiting for vector {i} to be available in namespace {namespace}..." + ) + all_vectors_available = False + break + if not all_vectors_available: + time.sleep(5) + time_waited += 5 + if not all_vectors_available: + raise Exception(f"Vectors were not available after {max_wait} seconds") + + # Create a collection from the pod index + collection_name = generate_collection_name("coll1") + pc.db.collection.create(name=collection_name, source=index_name) + collection_desc = pc.db.collection.describe(name=collection_name) + logger.debug(f"Collection desc: {collection_desc}") + assert collection_desc["name"] == collection_name + assert collection_desc["environment"] == pod_environment + assert collection_desc["status"] is not None + + # Wait for the collection to be ready + time_waited = 0 + max_wait = 120 + collection_ready = collection_desc["status"] + while collection_ready.lower() != "ready" and time_waited < max_wait: + logger.debug( + f"Waiting for collection {collection_name} to be ready. Waited {time_waited} seconds..." + ) + desc = pc.db.collection.describe(name=collection_name) + logger.debug(f"Collection desc: {desc}") + collection_ready = desc["status"] + if collection_ready.lower() != "ready": + time.sleep(10) + time_waited += 10 + if collection_ready.lower() != "ready": + raise Exception(f"Collection {collection_name} is not ready after {max_wait} seconds") + + # Verify the collection was created + assert collection_name in pc.db.collection.list().names() + + # Verify the collection has the correct info + collection_desc = pc.db.collection.describe(name=collection_name) + logger.debug(f"Collection desc: {collection_desc}") + assert collection_desc["name"] == collection_name + assert collection_desc["environment"] == pod_environment + assert collection_desc["status"] == "Ready" + assert collection_desc["dimension"] == dimension + assert collection_desc["vector_count"] == len(namespaces) * num_vectors + assert collection_desc["size"] is not None + assert collection_desc["size"] > 0 + + # Create new index from collection + index_name2 = generate_index_name("index-from-collection-" + collection_name) + print(f"Creating index {index_name} from collection {collection_name}...") + new_index = pc.db.index.create( + name=index_name2, + dimension=dimension, + metric=metric, + spec=PodSpec(environment=pod_environment, source_collection=collection_name), + tags=index_tags, + ) + logger.debug(f"Created index {index_name2} from collection {collection_name}: {new_index}") + + # Wait for the index to be ready + max_wait = 120 + time_waited = 0 + index_ready = False + while not index_ready and time_waited < max_wait: + logger.debug( + f"Waiting for index {index_name} to be ready. Waited {time_waited} seconds..." + ) + desc = pc.db.index.describe(name=index_name) + logger.debug(f"Index {index_name} status: {desc['status']}") + index_ready = desc["status"]["ready"] == True + if not index_ready: + time.sleep(10) + time_waited += 10 + if not index_ready: + raise Exception(f"Index {index_name} is not ready after {max_wait} seconds") + + new_index_desc = pc.db.index.describe(name=index_name) + logger.debug(f"New index desc: {new_index_desc}") + assert new_index_desc["name"] == index_name + assert new_index_desc["status"]["ready"] == True + + new_idx = pc.Index(name=index_name) + + # Verify stats reflect the vectors present in the collection + stats = new_idx.describe_index_stats() + logger.debug(f"New index stats: {stats}") + assert stats.total_vector_count == len(namespaces) * num_vectors + + # Verify the vectors from the collection can be fetched + for namespace in namespaces: + results = new_idx.fetch(ids=[v[0] for v in vectors], namespace=namespace) + logger.debug(f"Results for namespace {namespace}: {results}") + assert len(results.vectors) != 0 + + # Verify the vectors from the collection can be queried by id + for namespace in namespaces: + for i in range(num_vectors): + results = new_idx.query(top_k=3, id=str(i), namespace=namespace) + logger.debug( + f"Query results for namespace {namespace} and id {i} in index {index_name2}: {results}" + ) + assert len(results.matches) == 3 + + # Compapre with results from original index + original_results = idx.query(top_k=3, id=str(i), namespace=namespace) + logger.debug( + f"Original query results for namespace {namespace} and id {i} in index {index_name}: {original_results}" + ) + assert len(original_results.matches) == 3 + assert original_results.matches[0].id == results.matches[0].id + assert original_results.matches[1].id == results.matches[1].id + assert original_results.matches[2].id == results.matches[2].id + + # Cleanup + attempt_cleanup_collection(pc, collection_name) + attempt_cleanup_index(pc, index_name) + attempt_cleanup_index(pc, index_name2) diff --git a/tests/integration/control/index/__init__.py b/tests/integration/control/index/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/control/index/conftest.py b/tests/integration/control/index/conftest.py new file mode 100644 index 00000000..805795f6 --- /dev/null +++ b/tests/integration/control/index/conftest.py @@ -0,0 +1,178 @@ +import pytest +import uuid +import time +import logging +import dotenv +import os +from datetime import datetime +from pinecone import Pinecone, NotFoundException, PineconeApiException +from ...helpers import generate_index_name, get_environment_var + +dotenv.load_dotenv() + +logger = logging.getLogger(__name__) +""" @private """ + +# Generate a unique ID for the entire test run +RUN_ID = str(uuid.uuid4()) + + +@pytest.fixture() +def index_tags(request): + test_name = request.node.name + if test_name is None: + test_name = "" + else: + test_name = test_name.replace(":", "_").replace("[", "_").replace("]", "_") + + tags = { + "test-suite": "pinecone-python-client", + "test-run": RUN_ID, + "test": test_name, + "created-at": datetime.now().strftime("%Y-%m-%d"), + } + + if os.getenv("USER"): + tags["user"] = os.getenv("USER") + return tags + + +@pytest.fixture() +def pc(): + api_key = get_environment_var("PINECONE_API_KEY") + return Pinecone( + api_key=api_key, additional_headers={"sdk-test-suite": "pinecone-python-client"} + ) + + +@pytest.fixture() +def pod_environment(): + return get_environment_var("PINECONE_ENVIRONMENT", "us-east1-gcp") + + +@pytest.fixture() +def serverless_cloud(): + return get_environment_var("SERVERLESS_CLOUD", "aws") + + +@pytest.fixture() +def serverless_region(): + return get_environment_var("SERVERLESS_REGION", "us-west-2") + + +@pytest.fixture() +def create_sl_index_params(index_name, serverless_cloud, serverless_region, index_tags): + spec = {"serverless": {"cloud": serverless_cloud, "region": serverless_region}} + return dict(name=index_name, dimension=10, metric="cosine", spec=spec, tags=index_tags) + + +@pytest.fixture() +def index_name(request): + test_name = request.node.name + return generate_index_name(test_name) + + +@pytest.fixture() +def ready_sl_index(pc, index_name, create_sl_index_params): + create_sl_index_params["timeout"] = None + pc.create_index(**create_sl_index_params) + yield index_name + pc.db.index.delete(name=index_name, timeout=-1) + + +@pytest.fixture() +def notready_sl_index(pc, index_name, create_sl_index_params): + create_sl_index_params["timeout"] = -1 + pc.create_index(**create_sl_index_params) + yield index_name + + +def delete_with_retry(pc, index_name, retries=0, sleep_interval=5): + logger.debug( + "Deleting index " + + index_name + + ", retry " + + str(retries) + + ", next sleep interval " + + str(sleep_interval) + ) + try: + pc.db.index.delete(name=index_name, timeout=-1) + except NotFoundException: + pass + except PineconeApiException as e: + if e.error.code == "PRECONDITON_FAILED": + if retries > 5: + raise Exception("Unable to delete index " + index_name) + time.sleep(sleep_interval) + delete_with_retry(pc, index_name, retries + 1, sleep_interval * 2) + else: + logger.error(e.__class__) + logger.error(e) + raise Exception("Unable to delete index " + index_name) + except Exception as e: + logger.error(e.__class__) + logger.error(e) + raise Exception("Unable to delete index " + index_name) + + +@pytest.fixture(autouse=True) +def cleanup(pc, index_name): + yield + + try: + desc = pc.db.index.describe(name=index_name) + if desc.deletion_protection == "enabled": + logger.info(f"Disabling deletion protection for index: {index_name}") + pc.db.index.configure(name=index_name, deletion_protection="disabled") + logger.debug("Attempting to delete index with name: " + index_name) + pc.db.index.delete(name=index_name, timeout=-1) + except Exception: + pass + + +def pytest_sessionfinish(session, exitstatus): + """ + Hook that runs after all tests have completed. + This is a good place to clean up any resources that were created during the test session. + """ + logger.info("Running final cleanup after all tests...") + + try: + pc = Pinecone() + indexes = pc.db.index.list() + test_indexes = [ + idx for idx in indexes if idx.tags is not None and idx.tags.get("test-run") == RUN_ID + ] + + logger.info(f"Indexes to delete: {[idx.name for idx in test_indexes]}") + + for idx in test_indexes: + if idx.deletion_protection == "enabled": + logger.info(f"Disabling deletion protection for index: {idx.name}") + pc.db.index.configure(name=idx.name, deletion_protection="disabled") + # Wait for index to be updated with status ready + logger.info(f"Waiting for index {idx.name} to be ready...") + timeout = 60 + while True and timeout > 0: + is_ready = pc.db.index.describe(name=idx.name).ready + if is_ready: + break + time.sleep(1) + timeout -= 1 + if timeout <= 0: + logger.warning(f"Index {idx.name} did not become ready in time") + else: + logger.info(f"Deletion protection is already disabled for index: {idx.name}") + + for idx in test_indexes: + try: + logger.info(f"Deleting index: {idx.name}") + pc.db.index.delete(name=idx.name, timeout=-1) + except Exception as e: + logger.warning(f"Failed to delete index {idx.name}: {str(e)}") + + except Exception as e: + logger.error(f"Error during final cleanup: {str(e)}") + + logger.info("Final cleanup completed") diff --git a/tests/integration/control/index/test_configure.py b/tests/integration/control/index/test_configure.py new file mode 100644 index 00000000..f4c73094 --- /dev/null +++ b/tests/integration/control/index/test_configure.py @@ -0,0 +1,43 @@ +class TestConfigureIndexTags: + def test_add_index_tags(self, pc, ready_sl_index): + starting_tags = pc.db.index.describe(name=ready_sl_index).tags + assert "foo" not in starting_tags + assert "bar" not in starting_tags + + pc.db.index.configure(name=ready_sl_index, tags={"foo": "FOO", "bar": "BAR"}) + + found_tags = pc.db.index.describe(name=ready_sl_index).tags.to_dict() + assert found_tags is not None + assert found_tags["foo"] == "FOO" + assert found_tags["bar"] == "BAR" + + def test_remove_tags_by_setting_empty_value_for_key(self, pc, ready_sl_index): + pc.db.index.configure(name=ready_sl_index, tags={"foo": "FOO", "bar": "BAR"}) + pc.db.index.configure(name=ready_sl_index, tags={}) + found_tags = pc.db.index.describe(name=ready_sl_index).tags.to_dict() + assert found_tags is not None + assert found_tags.get("foo", None) == "FOO", "foo should not be removed" + assert found_tags.get("bar", None) == "BAR", "bar should not be removed" + + pc.db.index.configure(name=ready_sl_index, tags={"foo": ""}) + found_tags2 = pc.db.index.describe(name=ready_sl_index).tags.to_dict() + assert found_tags2 is not None + assert found_tags2.get("foo", None) is None, "foo should be removed" + assert found_tags2.get("bar", None) == "BAR", "bar should not be removed" + + def test_merge_new_tags_with_existing_tags(self, pc, ready_sl_index): + pc.db.index.configure(name=ready_sl_index, tags={"foo": "FOO", "bar": "BAR"}) + pc.db.index.configure(name=ready_sl_index, tags={"baz": "BAZ"}) + found_tags = pc.db.index.describe(name=ready_sl_index).tags.to_dict() + assert found_tags is not None + assert found_tags.get("foo", None) == "FOO", "foo should not be removed" + assert found_tags.get("bar", None) == "BAR", "bar should not be removed" + assert found_tags.get("baz", None) == "BAZ", "baz should be added" + + def test_remove_multiple_tags(self, pc, ready_sl_index): + pc.db.index.configure(name=ready_sl_index, tags={"foo": "FOO", "bar": "BAR"}) + pc.db.index.configure(name=ready_sl_index, tags={"foo": "", "bar": ""}) + found_tags = pc.db.index.describe(name=ready_sl_index).tags.to_dict() + assert found_tags is not None + assert found_tags.get("foo", None) is None, "foo should be removed" + assert found_tags.get("bar", None) is None, "bar should be removed" diff --git a/tests/integration/control/index/test_create.py b/tests/integration/control/index/test_create.py new file mode 100644 index 00000000..75ffabf2 --- /dev/null +++ b/tests/integration/control/index/test_create.py @@ -0,0 +1,328 @@ +import pytest +import time +from pinecone import ( + Pinecone, + Metric, + VectorType, + DeletionProtection, + ServerlessSpec, + PodSpec, + CloudProvider, + AwsRegion, + PineconeApiValueError, + PineconeApiException, + PineconeApiTypeError, + PodIndexEnvironment, +) + + +class TestCreateServerlessIndexHappyPath: + def test_create_index(self, pc: Pinecone, index_name): + resp = pc.db.index.create( + name=index_name, + dimension=10, + spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + ) + assert resp.name == index_name + assert resp.dimension == 10 + assert resp.metric == "cosine" # default value + assert resp.vector_type == "dense" # default value + assert resp.deletion_protection == "disabled" # default value + + desc = pc.db.index.describe(name=index_name) + assert desc.name == index_name + assert desc.dimension == 10 + assert desc.metric == "cosine" + assert desc.deletion_protection == "disabled" # default value + assert desc.vector_type == "dense" # default value + + def test_create_skip_wait(self, pc, index_name): + resp = pc.db.index.create( + name=index_name, + dimension=10, + spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + timeout=-1, + ) + assert resp.name == index_name + assert resp.dimension == 10 + assert resp.metric == "cosine" + + def test_create_infinite_wait(self, pc, index_name): + resp = pc.db.index.create( + name=index_name, + dimension=10, + spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + timeout=None, + ) + assert resp.name == index_name + assert resp.dimension == 10 + assert resp.metric == "cosine" + + @pytest.mark.parametrize("metric", ["cosine", "euclidean", "dotproduct"]) + def test_create_default_index_with_metric(self, pc, create_sl_index_params, metric): + create_sl_index_params["metric"] = metric + pc.db.index.create(**create_sl_index_params) + desc = pc.db.index.describe(create_sl_index_params["name"]) + if isinstance(metric, str): + assert desc.metric == metric + else: + assert desc.metric == metric.value + assert desc.vector_type == "dense" + + @pytest.mark.parametrize( + "metric_enum,vector_type_enum,dim,tags", + [ + (Metric.COSINE, VectorType.DENSE, 10, None), + (Metric.EUCLIDEAN, VectorType.DENSE, 10, {"env": "prod"}), + (Metric.DOTPRODUCT, VectorType.SPARSE, None, {"env": "dev"}), + ], + ) + def test_create_with_enum_values( + self, pc, index_name, metric_enum, vector_type_enum, dim, tags + ): + args = { + "name": index_name, + "metric": metric_enum, + "vector_type": vector_type_enum, + "deletion_protection": DeletionProtection.DISABLED, + "spec": ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + "tags": tags, + } + if dim is not None: + args["dimension"] = dim + + pc.db.index.create(**args) + + desc = pc.db.index.describe(index_name) + assert desc.metric == metric_enum.value + assert desc.vector_type == vector_type_enum.value + assert desc.dimension == dim + assert desc.deletion_protection == DeletionProtection.DISABLED.value + assert desc.name == index_name + assert desc.spec.serverless.cloud == "aws" + assert desc.spec.serverless.region == "us-east-1" + if tags: + assert desc.tags.to_dict() == tags + + @pytest.mark.parametrize("metric", ["cosine", "euclidean", "dotproduct"]) + def test_create_dense_index_with_metric(self, pc, create_sl_index_params, metric): + create_sl_index_params["metric"] = metric + create_sl_index_params["vector_type"] = VectorType.DENSE + pc.db.index.create(**create_sl_index_params) + desc = pc.db.index.describe(create_sl_index_params["name"]) + assert desc.metric == metric + assert desc.vector_type == "dense" + + def test_create_with_optional_tags(self, pc, create_sl_index_params): + tags = {"foo": "FOO", "bar": "BAR"} + create_sl_index_params["tags"] = tags + pc.db.index.create(**create_sl_index_params) + desc = pc.db.index.describe(create_sl_index_params["name"]) + assert desc.tags.to_dict() == tags + + +class TestCreatePodIndexHappyPath: + def test_create_index_minimal_config( + self, pc: Pinecone, index_name, pod_environment, index_tags + ): + pc.db.index.create( + name=index_name, + dimension=10, + metric="cosine", + spec=PodSpec(environment=pod_environment), + tags=index_tags, + ) + + desc = pc.db.index.describe(name=index_name) + assert desc.name == index_name + assert desc.dimension == 10 + assert desc.metric == "cosine" + assert desc.spec.pod.environment == pod_environment + assert desc.tags.to_dict() == index_tags + assert desc.status.ready == True + assert desc.status.state == "Ready" + assert desc.vector_type == "dense" + + def test_create_index_with_spec_options( + self, pc: Pinecone, index_name, pod_environment, index_tags + ): + pc.db.index.create( + name=index_name, + dimension=10, + metric="cosine", + spec=PodSpec( + environment=pod_environment, + pod_type="p1.x2", + replicas=2, + metadata_config={"indexed": ["foo", "bar"]}, + ), + tags=index_tags, + ) + + desc = pc.db.index.describe(name=index_name) + assert desc.name == index_name + assert desc.dimension == 10 + assert desc.metric == "cosine" + assert desc.spec.pod.environment == pod_environment + assert desc.spec.pod.pod_type == "p1.x2" + assert desc.spec.pod.replicas == 2 + assert desc.spec.pod.metadata_config.indexed == ["foo", "bar"] + + def test_create_index_with_deletion_protection( + self, pc: Pinecone, index_name, pod_environment, index_tags + ): + pc.db.index.create( + name=index_name, + dimension=10, + metric="cosine", + spec=PodSpec(environment=pod_environment), + tags=index_tags, + deletion_protection=DeletionProtection.ENABLED, + ) + + try: + pc.db.index.delete(name=index_name) + except PineconeApiException as e: + assert "Deletion protection is enabled for this index" in str(e) + + pc.db.index.configure(name=index_name, deletion_protection=DeletionProtection.DISABLED) + max_wait_time = 60 + while pc.db.index.describe(name=index_name).status.ready == False: + time.sleep(1) + max_wait_time -= 1 + if max_wait_time <= 0: + raise Exception("Index did not become ready in time") + + pc.db.index.delete(name=index_name) + assert pc.db.index.has(name=index_name) == False + + +class TestCreatePodIndexApiErrorCases: + def test_pod_index_does_not_support_sparse_vectors(self, pc, index_name, index_tags): + with pytest.raises(PineconeApiException) as e: + pc.db.index.create( + name=index_name, + metric="dotproduct", + spec=PodSpec(environment=PodIndexEnvironment.AWS_US_EAST_1), + vector_type="sparse", + tags=index_tags, + ) + assert "Sparse vector type is not supported for pod indexes" in str(e.value) + + +class TestCreateServerlessIndexApiErrorCases: + def test_create_index_with_invalid_name(self, pc, create_sl_index_params): + create_sl_index_params["name"] = "Invalid-name" + with pytest.raises(PineconeApiException): + pc.db.index.create(**create_sl_index_params) + + def test_create_index_invalid_metric(self, pc, create_sl_index_params): + create_sl_index_params["metric"] = "invalid" + with pytest.raises(PineconeApiValueError): + pc.db.index.create(**create_sl_index_params) + + def test_create_index_with_invalid_neg_dimension(self, pc, create_sl_index_params): + create_sl_index_params["dimension"] = -1 + with pytest.raises(PineconeApiValueError): + pc.db.index.create(**create_sl_index_params) + + def test_create_index_that_already_exists(self, pc, create_sl_index_params): + pc.db.index.create(**create_sl_index_params) + with pytest.raises(PineconeApiException): + pc.db.index.create(**create_sl_index_params) + + +class TestCreateServerlessIndexWithTimeout: + def test_create_index_default_timeout(self, pc, create_sl_index_params): + create_sl_index_params["timeout"] = None + pc.db.index.create(**create_sl_index_params) + # Waits infinitely for index to be ready + desc = pc.db.index.describe(create_sl_index_params["name"]) + assert desc.status.ready == True + + def test_create_index_when_timeout_set(self, pc, create_sl_index_params): + create_sl_index_params["timeout"] = ( + 1000 # effectively infinite, but different code path from None + ) + pc.db.index.create(**create_sl_index_params) + desc = pc.db.index.describe(name=create_sl_index_params["name"]) + assert desc.status.ready == True + + def test_create_index_with_negative_timeout(self, pc, create_sl_index_params): + create_sl_index_params["timeout"] = -1 + pc.db.index.create(**create_sl_index_params) + desc = pc.db.index.describe(create_sl_index_params["name"]) + # Returns immediately without waiting for index to be ready + assert desc.status.ready in [False, True] + + +class TestCreateIndexTypeErrorCases: + def test_create_index_with_invalid_str_dimension(self, pc, create_sl_index_params): + create_sl_index_params["dimension"] = "10" + with pytest.raises(PineconeApiTypeError): + pc.db.index.create(**create_sl_index_params) + + def test_create_index_with_missing_dimension(self, pc, create_sl_index_params): + del create_sl_index_params["dimension"] + with pytest.raises(PineconeApiException): + pc.db.index.create(**create_sl_index_params) + + def test_create_index_w_incompatible_options(self, pc, create_sl_index_params): + create_sl_index_params["pod_type"] = "p1.x2" + create_sl_index_params["environment"] = "us-east1-gcp" + create_sl_index_params["replicas"] = 2 + with pytest.raises(TypeError): + pc.db.index.create(**create_sl_index_params) + + @pytest.mark.parametrize("required_option", ["name", "spec", "dimension"]) + def test_create_with_missing_required_options( + self, pc, create_sl_index_params, required_option + ): + del create_sl_index_params[required_option] + with pytest.raises(Exception) as e: + pc.db.index.create(**create_sl_index_params) + assert required_option.lower() in str(e.value).lower() + + +class TestSparseIndex: + def test_create_sparse_index_minimal_config(self, pc: Pinecone, index_name, index_tags): + pc.db.index.create( + name=index_name, + metric="dotproduct", + spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + vector_type=VectorType.SPARSE, + tags=index_tags, + ) + + desc = pc.db.index.describe(name=index_name) + assert desc.name == index_name + assert desc.metric == "dotproduct" + assert desc.vector_type == "sparse" + + +class TestSparseIndexErrorCases: + def test_sending_dimension_with_sparse_index(self, pc, index_tags): + with pytest.raises(ValueError) as e: + pc.db.index.create( + name="test-index", + dimension=10, + metric="dotproduct", + vector_type=VectorType.SPARSE, + spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + tags=index_tags, + ) + assert "dimension should not be specified for sparse indexes" in str(e.value) + + @pytest.mark.parametrize("bad_metric", ["cosine", "euclidean"]) + def test_sending_metric_other_than_dotproduct_with_sparse_index( + self, pc, index_tags, bad_metric + ): + with pytest.raises(PineconeApiException) as e: + pc.db.index.create( + name="test-index", + metric=bad_metric, + vector_type=VectorType.SPARSE, + spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + tags=index_tags, + ) + assert "Sparse vector indexes must use the metric dotproduct" in str(e.value) diff --git a/tests/integration/control/index/test_delete.py b/tests/integration/control/index/test_delete.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/control/index/test_describe.py b/tests/integration/control/index/test_describe.py new file mode 100644 index 00000000..df7f5896 --- /dev/null +++ b/tests/integration/control/index/test_describe.py @@ -0,0 +1,46 @@ +from pinecone import IndexModel + + +class TestDescribeIndex: + def test_describe_index_when_ready(self, pc, ready_sl_index, create_sl_index_params): + description = pc.db.index.describe(ready_sl_index) + + assert isinstance(description, IndexModel) + assert description.name == ready_sl_index + assert description.dimension == create_sl_index_params["dimension"] + assert description.metric == create_sl_index_params["metric"] + assert ( + description.spec.serverless["cloud"] + == create_sl_index_params["spec"]["serverless"]["cloud"] + ) + assert ( + description.spec.serverless["region"] + == create_sl_index_params["spec"]["serverless"]["region"] + ) + + assert isinstance(description.host, str) + assert description.host != "" + assert ready_sl_index in description.host + + assert description.status.state == "Ready" + assert description.status.ready == True + + def test_describe_index_when_not_ready(self, pc, notready_sl_index, create_sl_index_params): + description = pc.db.index.describe(notready_sl_index) + + assert isinstance(description, IndexModel) + assert description.name == notready_sl_index + assert description.dimension == create_sl_index_params["dimension"] + assert description.metric == create_sl_index_params["metric"] + assert ( + description.spec.serverless["cloud"] + == create_sl_index_params["spec"]["serverless"]["cloud"] + ) + assert ( + description.spec.serverless["region"] + == create_sl_index_params["spec"]["serverless"]["region"] + ) + + assert isinstance(description.host, str) + assert description.host != "" + assert notready_sl_index in description.host diff --git a/tests/integration/control/index/test_has.py b/tests/integration/control/index/test_has.py new file mode 100644 index 00000000..1a356a99 --- /dev/null +++ b/tests/integration/control/index/test_has.py @@ -0,0 +1,18 @@ +from tests.integration.helpers import random_string + + +class TestHasIndex: + def test_index_exists_success(self, pc, create_sl_index_params): + name = create_sl_index_params["name"] + pc.db.index.create(**create_sl_index_params) + has_index = pc.db.index.has(name) + assert has_index == True + + def test_index_does_not_exist(self, pc): + name = random_string(8) + has_index = pc.db.index.has(name) + assert has_index == False + + def test_has_index_with_null_index_name(self, pc): + has_index = pc.db.index.has("") + assert has_index == False diff --git a/tests/integration/control/index/test_list.py b/tests/integration/control/index/test_list.py new file mode 100644 index 00000000..4e217ea5 --- /dev/null +++ b/tests/integration/control/index/test_list.py @@ -0,0 +1,27 @@ +from pinecone import IndexModel + + +class TestListIndexes: + def test_list_indexes_includes_ready_indexes(self, pc, ready_sl_index, create_sl_index_params): + list_response = pc.db.index.list() + assert len(list_response.indexes) != 0 + assert isinstance(list_response.indexes[0], IndexModel) + + created_index = [index for index in list_response.indexes if index.name == ready_sl_index][ + 0 + ] + assert created_index.name == ready_sl_index + assert created_index.dimension == create_sl_index_params["dimension"] + assert created_index.metric == create_sl_index_params["metric"] + assert ready_sl_index in created_index.host + + def test_list_indexes_includes_not_ready_indexes(self, pc, notready_sl_index): + list_response = pc.db.index.list() + assert len(list_response.indexes) != 0 + assert isinstance(list_response.indexes[0], IndexModel) + + created_index = [ + index for index in list_response.indexes if index.name == notready_sl_index + ][0] + assert created_index.name == notready_sl_index + assert notready_sl_index in created_index.name