diff --git a/pinecone/__init__.py b/pinecone/__init__.py index 13a65bd1..f228eddf 100644 --- a/pinecone/__init__.py +++ b/pinecone/__init__.py @@ -2,22 +2,183 @@ .. include:: ../pdoc/README.md """ -from .deprecated_plugins import check_for_deprecated_plugins +from .deprecated_plugins import check_for_deprecated_plugins as _check_for_deprecated_plugins from .deprecation_warnings import * -from .config import * +from .pinecone import Pinecone +from .pinecone_asyncio import PineconeAsyncio from .exceptions import * -from .control import * -from .data import * -from .models import * -from .enums import * from .utils import __version__ import logging +# Set up lazy import handling +from .utils.lazy_imports import setup_lazy_imports as _setup_lazy_imports + +_inference_lazy_imports = { + "RerankModel": ("pinecone.inference", "RerankModel"), + "EmbedModel": ("pinecone.inference", "EmbedModel"), +} + +_db_data_lazy_imports = { + "Vector": ("pinecone.db_data.dataclasses", "Vector"), + "SparseValues": ("pinecone.db_data.dataclasses", "SparseValues"), + "SearchQuery": ("pinecone.db_data.dataclasses", "SearchQuery"), + "SearchQueryVector": ("pinecone.db_data.dataclasses", "SearchQueryVector"), + "SearchRerank": ("pinecone.db_data.dataclasses", "SearchRerank"), + "FetchResponse": ("pinecone.db_data.models", "FetchResponse"), + "DeleteRequest": ("pinecone.db_data.models", "DeleteRequest"), + "DescribeIndexStatsRequest": ("pinecone.db_data.models", "DescribeIndexStatsRequest"), + "DescribeIndexStatsResponse": ("pinecone.db_data.models", "IndexDescription"), + "RpcStatus": ("pinecone.db_data.models", "RpcStatus"), + "ScoredVector": ("pinecone.db_data.models", "ScoredVector"), + "SingleQueryResults": ("pinecone.db_data.models", "SingleQueryResults"), + "QueryRequest": ("pinecone.db_data.models", "QueryRequest"), + "QueryResponse": ("pinecone.db_data.models", "QueryResponse"), + "UpsertResponse": ("pinecone.db_data.models", "UpsertResponse"), + "UpdateRequest": ("pinecone.db_data.models", "UpdateRequest"), + "ImportErrorMode": ("pinecone.core.openapi.db_data.model", "ImportErrorMode"), + "VectorDictionaryMissingKeysError": ( + "pinecone.db_data.errors", + "VectorDictionaryMissingKeysError", + ), + "VectorDictionaryExcessKeysError": ( + "pinecone.db_data.errors", + "VectorDictionaryExcessKeysError", + ), + "VectorTupleLengthError": ("pinecone.db_data.errors", "VectorTupleLengthError"), + "SparseValuesTypeError": ("pinecone.db_data.errors", "SparseValuesTypeError"), + "SparseValuesMissingKeysError": ("pinecone.db_data.errors", "SparseValuesMissingKeysError"), + "SparseValuesDictionaryExpectedError": ( + "pinecone.db_data.errors", + "SparseValuesDictionaryExpectedError", + ), +} + +_db_control_lazy_imports = { + "CloudProvider": ("pinecone.db_control.enums", "CloudProvider"), + "AwsRegion": ("pinecone.db_control.enums", "AwsRegion"), + "GcpRegion": ("pinecone.db_control.enums", "GcpRegion"), + "AzureRegion": ("pinecone.db_control.enums", "AzureRegion"), + "PodIndexEnvironment": ("pinecone.db_control.enums", "PodIndexEnvironment"), + "Metric": ("pinecone.db_control.enums", "Metric"), + "VectorType": ("pinecone.db_control.enums", "VectorType"), + "DeletionProtection": ("pinecone.db_control.enums", "DeletionProtection"), + "CollectionDescription": ("pinecone.db_control.models", "CollectionDescription"), + "CollectionList": ("pinecone.db_control.models", "CollectionList"), + "IndexList": ("pinecone.db_control.models", "IndexList"), + "IndexModel": ("pinecone.db_control.models", "IndexModel"), + "IndexEmbed": ("pinecone.db_control.models", "IndexEmbed"), + "ServerlessSpec": ("pinecone.db_control.models", "ServerlessSpec"), + "ServerlessSpecDefinition": ("pinecone.db_control.models", "ServerlessSpecDefinition"), + "PodSpec": ("pinecone.db_control.models", "PodSpec"), + "PodSpecDefinition": ("pinecone.db_control.models", "PodSpecDefinition"), + "PodType": ("pinecone.db_control.enums", "PodType"), +} + +_config_lazy_imports = { + "Config": ("pinecone.config", "Config"), + "ConfigBuilder": ("pinecone.config", "ConfigBuilder"), + "PineconeConfig": ("pinecone.config", "PineconeConfig"), +} + +# Define imports to be lazily loaded +_LAZY_IMPORTS = { + **_inference_lazy_imports, + **_db_data_lazy_imports, + **_db_control_lazy_imports, + **_config_lazy_imports, +} + +# Set up the lazy import handler +_setup_lazy_imports(_LAZY_IMPORTS) + # Raise an exception if the user is attempting to use the SDK with # deprecated plugins installed in their project. -check_for_deprecated_plugins() +_check_for_deprecated_plugins() # Silence annoying log messages from the plugin interface logging.getLogger("pinecone_plugin_interface").setLevel(logging.CRITICAL) + +__all__ = [ + "__version__", + # Deprecated top-levelfunctions + "init", + "create_index", + "delete_index", + "list_indexes", + "describe_index", + "configure_index", + "scale_index", + "create_collection", + "delete_collection", + "describe_collection", + "list_collections", + # Primary client classes + "Pinecone", + "PineconeAsyncio", + # Config classes + "Config", + "ConfigBuilder", + "PineconeConfig", + # OpenAPI classes + "CloudProvider", + "AwsRegion", + "GcpRegion", + "AzureRegion", + "PodIndexEnvironment", + "Metric", + "VectorType", + "DeletionProtection", + "CollectionDescription", + "CollectionList", + "IndexList", + "IndexModel", + "IndexEmbed", + "ImportErrorMode", + "ServerlessSpec", + "ServerlessSpecDefinition", + "PodSpec", + "PodSpecDefinition", + "PodType", + "Vector", + "FetchResponse", + "DeleteRequest", + "DescribeIndexStatsRequest", + "DescribeIndexStatsResponse", + "RpcStatus", + "ScoredVector", + "SingleQueryResults", + "QueryRequest", + "QueryResponse", + "SearchQuery", + "SearchQueryVector", + "SearchRerank", + "UpsertResponse", + "UpdateRequest", + "SparseValues", + # Inference classes + "RerankModel", + "EmbedModel", + # Exception classes + "PineconeException", + "PineconeApiException", + "PineconeConfigurationError", + "PineconeProtocolError", + "PineconeApiAttributeError", + "PineconeApiTypeError", + "PineconeApiValueError", + "PineconeApiKeyError", + "PineconeApiException", + "NotFoundException", + "UnauthorizedException", + "ForbiddenException", + "ServiceException", + "ListConversionException", + "VectorDictionaryMissingKeysError", + "VectorDictionaryExcessKeysError", + "VectorTupleLengthError", + "SparseValuesTypeError", + "SparseValuesMissingKeysError", + "SparseValuesDictionaryExpectedError", +] diff --git a/pinecone/config/__init__.py b/pinecone/config/__init__.py index 7abb7278..f292622f 100644 --- a/pinecone/config/__init__.py +++ b/pinecone/config/__init__.py @@ -2,6 +2,7 @@ import os from .config import ConfigBuilder, Config +from .openapi_configuration import Configuration as OpenApiConfiguration from .pinecone_config import PineconeConfig if os.getenv("PINECONE_DEBUG") is not None: diff --git a/pinecone/config/config.py b/pinecone/config/config.py index 01a703e0..9029c45a 100644 --- a/pinecone/config/config.py +++ b/pinecone/config/config.py @@ -1,9 +1,11 @@ -from typing import NamedTuple, Optional, Dict +from typing import NamedTuple, Optional, Dict, TYPE_CHECKING import os -from pinecone.exceptions.exceptions import PineconeConfigurationError -from pinecone.config.openapi import OpenApiConfigFactory -from pinecone.openapi_support.configuration import Configuration as OpenApiConfiguration +from pinecone.exceptions import PineconeConfigurationError +from pinecone.config.openapi_config_factory import OpenApiConfigFactory + +if TYPE_CHECKING: + from pinecone.config.openapi_configuration import Configuration as OpenApiConfiguration # Duplicated this util to help resolve circular imports @@ -81,8 +83,8 @@ def build( @staticmethod def build_openapi_config( - config: Config, openapi_config: Optional[OpenApiConfiguration] = None, **kwargs - ) -> OpenApiConfiguration: + config: Config, openapi_config: Optional["OpenApiConfiguration"] = None, **kwargs + ) -> "OpenApiConfiguration": if openapi_config: openapi_config = OpenApiConfigFactory.copy( openapi_config=openapi_config, api_key=config.api_key, host=config.host diff --git a/pinecone/config/openapi.py b/pinecone/config/openapi_config_factory.py similarity index 93% rename from pinecone/config/openapi.py rename to pinecone/config/openapi_config_factory.py index d6bdf702..56a1de64 100644 --- a/pinecone/config/openapi.py +++ b/pinecone/config/openapi_config_factory.py @@ -1,13 +1,11 @@ import sys -from typing import List, Optional +from typing import List, Optional, Tuple import certifi import socket import copy -from urllib3.connection import HTTPConnection - -from pinecone.openapi_support.configuration import Configuration as OpenApiConfiguration +from pinecone.config.openapi_configuration import Configuration as OpenApiConfiguration TCP_KEEPINTVL = 60 # Sec TCP_KEEPIDLE = 300 # Sec @@ -58,7 +56,7 @@ def _get_socket_options( keep_alive_idle_sec: int = TCP_KEEPIDLE, keep_alive_interval_sec: int = TCP_KEEPINTVL, keep_alive_tries: int = TCP_KEEPCNT, - ) -> List[tuple]: + ) -> List[Tuple[int, int, int]]: """ Returns the socket options to pass to OpenAPI's Rest client Args: @@ -72,7 +70,8 @@ def _get_socket_options( """ # Source: https://www.finbourne.com/blog/the-mysterious-hanging-client-tcp-keep-alives - socket_params = HTTPConnection.default_socket_options + # urllib3.connection.HTTPConnection.default_socket_options + socket_params = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] if not do_keep_alive: return socket_params diff --git a/pinecone/config/openapi_configuration.py b/pinecone/config/openapi_configuration.py new file mode 100644 index 00000000..fce6defc --- /dev/null +++ b/pinecone/config/openapi_configuration.py @@ -0,0 +1,441 @@ +import copy +import logging +import multiprocessing + +from http import client as http_client +from pinecone.exceptions import PineconeApiValueError +from typing import TypedDict + + +class HostSetting(TypedDict): + url: str + description: str + + +JSON_SCHEMA_VALIDATION_KEYWORDS = { + "multipleOf", + "maximum", + "exclusiveMaximum", + "minimum", + "exclusiveMinimum", + "maxLength", + "minLength", + "pattern", + "maxItems", + "minItems", +} + + +class Configuration: + """Class to hold the configuration of the API client. + + :param host: Base url + :param api_key: Dict to store API key(s). + Each entry in the dict specifies an API key. + The dict key is the name of the security scheme in the OAS specification. + The dict value is the API key secret. + :param api_key_prefix: Dict to store API prefix (e.g. Bearer) + The dict key is the name of the security scheme in the OAS specification. + The dict value is an API key prefix when generating the auth data. + :param discard_unknown_keys: Boolean value indicating whether to discard + unknown properties. A server may send a response that includes additional + properties that are not known by the client in the following scenarios: + 1. The OpenAPI document is incomplete, i.e. it does not match the server + implementation. + 2. The client was generated using an older version of the OpenAPI document + and the server has been upgraded since then. + If a schema in the OpenAPI document defines the additionalProperties attribute, + then all undeclared properties received by the server are injected into the + additional properties map. In that case, there are undeclared properties, and + nothing to discard. + :param disabled_client_side_validations (string): Comma-separated list of + JSON schema validation keywords to disable JSON schema structural validation + rules. The following keywords may be specified: multipleOf, maximum, + exclusiveMaximum, minimum, exclusiveMinimum, maxLength, minLength, pattern, + maxItems, minItems. + By default, the validation is performed for data generated locally by the client + and data received from the server, independent of any validation performed by + the server side. If the input data does not satisfy the JSON schema validation + rules specified in the OpenAPI document, an exception is raised. + If disabled_client_side_validations is set, structural validation is + disabled. This can be useful to troubleshoot data validation problem, such as + when the OpenAPI document validation rules do not match the actual API data + received by the server. + :param server_operation_index: Mapping from operation ID to an index to server + configuration. + :param server_operation_variables: Mapping from operation ID to a mapping with + string values to replace variables in templated server configuration. + The validation of enums is performed for variables with defined enum values before. + :param ssl_ca_cert: str - the path to a file of concatenated CA certificates + in PEM format + + :Example: + + API Key Authentication Example. + Given the following security scheme in the OpenAPI specification: + components: + securitySchemes: + cookieAuth: # name for the security scheme + type: apiKey + in: cookie + name: JSESSIONID # cookie name + + You can programmatically set the cookie: + + conf = pinecone.config.openapi_configuration.Configuration( + api_key={'cookieAuth': 'abc123'} + api_key_prefix={'cookieAuth': 'JSESSIONID'} + ) + + The following cookie will be added to the HTTP request: + Cookie: JSESSIONID abc123 + """ + + _default = None + + def __init__( + self, + host=None, + api_key=None, + api_key_prefix=None, + discard_unknown_keys=False, + disabled_client_side_validations="", + server_index=None, + server_variables=None, + server_operation_index=None, + server_operation_variables=None, + ssl_ca_cert=None, + ): + """Constructor""" + self._base_path = "https://api.pinecone.io" if host is None else host + """Default Base url + """ + self.server_index = 0 if server_index is None and host is None else server_index + self.server_operation_index = server_operation_index or {} + """Default server index + """ + self.server_variables = server_variables or {} + self.server_operation_variables = server_operation_variables or {} + """Default server variables + """ + self.temp_folder_path = None + """Temp file folder for downloading files + """ + # Authentication Settings + self.api_key = {} + if api_key: + self.api_key = api_key + """dict to store API key(s) + """ + self.api_key_prefix = {} + if api_key_prefix: + self.api_key_prefix = api_key_prefix + """dict to store API prefix (e.g. Bearer) + """ + self.refresh_api_key_hook = None + """function hook to refresh API key if expired + """ + self.discard_unknown_keys = discard_unknown_keys + self.disabled_client_side_validations = disabled_client_side_validations + self.logger = {} + """Logging Settings + """ + self.logger["package_logger"] = logging.getLogger("pinecone.openapi_support") + self.logger["urllib3_logger"] = logging.getLogger("urllib3") + self.logger_format = "%(asctime)s %(levelname)s %(message)s" + """Log format + """ + self.logger_stream_handler = None + """Log stream handler + """ + self.logger_file_handler = None + """Log file handler + """ + self.logger_file = None + """Debug file location + """ + self.debug = False + """Debug switch + """ + + self.verify_ssl = True + """SSL/TLS verification + Set this to false to skip verifying SSL certificate when calling API + from https server. + """ + self.ssl_ca_cert = ssl_ca_cert + """Set this to customize the certificate file to verify the peer. + """ + self.cert_file = None + """client certificate file + """ + self.key_file = None + """client key file + """ + self.assert_hostname = None + """Set this to True/False to enable/disable SSL hostname verification. + """ + + self.connection_pool_maxsize = multiprocessing.cpu_count() * 5 + """urllib3 connection pool's maximum number of connections saved + per pool. urllib3 uses 1 connection as default value, but this is + not the best value when you are making a lot of possibly parallel + requests to the same host, which is often the case here. + cpu_count * 5 is used as default value to increase performance. + """ + + self.proxy = None + """Proxy URL + """ + self.proxy_headers = None + """Proxy headers + """ + self.safe_chars_for_path_param = "" + """Safe chars for path_param + """ + self.retries = None + """Adding retries to override urllib3 default value 3 + """ + # Enable client side validation + self.client_side_validation = True + + # Options to pass down to the underlying urllib3 socket + self.socket_options = None + + def __deepcopy__(self, memo): + cls = self.__class__ + result = cls.__new__(cls) + memo[id(self)] = result + for k, v in self.__dict__.items(): + if k not in ("logger", "logger_file_handler"): + setattr(result, k, copy.deepcopy(v, memo)) + # shallow copy of loggers + result.logger = copy.copy(self.logger) + # use setters to configure loggers + result.logger_file = self.logger_file + result.debug = self.debug + return result + + def __setattr__(self, name, value): + object.__setattr__(self, name, value) + if name == "disabled_client_side_validations": + s = set(filter(None, value.split(","))) + for v in s: + if v not in JSON_SCHEMA_VALIDATION_KEYWORDS: + raise PineconeApiValueError("Invalid keyword: '{0}''".format(v)) + self._disabled_client_side_validations = s + + @classmethod + def set_default(cls, default): + """Set default instance of configuration. + + It stores default configuration, which can be + returned by get_default_copy method. + + :param default: object of Configuration + """ + cls._default = copy.deepcopy(default) + + @classmethod + def get_default_copy(cls): + """Return new instance of configuration. + + This method returns newly created, based on default constructor, + object of Configuration class or returns a copy of default + configuration passed by the set_default method. + + :return: The configuration object. + """ + if cls._default is not None: + return copy.deepcopy(cls._default) + return Configuration() + + @property + def logger_file(self): + """The logger file. + + If the logger_file is None, then add stream handler and remove file + handler. Otherwise, add file handler and remove stream handler. + + :param value: The logger_file path. + :type: str + """ + return self.__logger_file + + @logger_file.setter + def logger_file(self, value): + """The logger file. + + If the logger_file is None, then add stream handler and remove file + handler. Otherwise, add file handler and remove stream handler. + + :param value: The logger_file path. + :type: str + """ + self.__logger_file = value + if self.__logger_file: + # If set logging file, + # then add file handler and remove stream handler. + self.logger_file_handler = logging.FileHandler(self.__logger_file) + self.logger_file_handler.setFormatter(self.logger_formatter) + for _, logger in self.logger.items(): + logger.addHandler(self.logger_file_handler) + + @property + def debug(self): + """Debug status + + :param value: The debug status, True or False. + :type: bool + """ + return self.__debug + + @debug.setter + def debug(self, value): + """Debug status + + :param value: The debug status, True or False. + :type: bool + """ + self.__debug = value + if self.__debug: + # if debug status is True, turn on debug logging + for _, logger in self.logger.items(): + logger.setLevel(logging.DEBUG) + # turn on http_client debug + http_client.HTTPConnection.debuglevel = 1 + else: + # if debug status is False, turn off debug logging, + # setting log level to default `logging.WARNING` + for _, logger in self.logger.items(): + logger.setLevel(logging.WARNING) + # turn off http_client debug + http_client.HTTPConnection.debuglevel = 0 + + @property + def logger_format(self): + """The logger format. + + The logger_formatter will be updated when sets logger_format. + + :param value: The format string. + :type: str + """ + return self.__logger_format + + @logger_format.setter + def logger_format(self, value): + """The logger format. + + The logger_formatter will be updated when sets logger_format. + + :param value: The format string. + :type: str + """ + self.__logger_format = value + self.logger_formatter = logging.Formatter(self.__logger_format) + + def get_api_key_with_prefix(self, identifier, alias=None): + """Gets API key (with prefix if set). + + :param identifier: The identifier of apiKey. + :param alias: The alternative identifier of apiKey. + :return: The token for api key authentication. + """ + if self.refresh_api_key_hook is not None: + self.refresh_api_key_hook(self) + key = self.api_key.get(identifier, self.api_key.get(alias) if alias is not None else None) + if key: + prefix = self.api_key_prefix.get(identifier) + if prefix: + return "%s %s" % (prefix, key) + else: + return key + + def auth_settings(self): + """Gets Auth Settings dict for api client. + + :return: The Auth Settings information dict. + """ + auth = {} + if "ApiKeyAuth" in self.api_key: + auth["ApiKeyAuth"] = { + "type": "api_key", + "in": "header", + "key": "Api-Key", + "value": self.get_api_key_with_prefix("ApiKeyAuth"), + } + return auth + + def get_host_settings(self): + """Gets an array of host settings + + :return: An array of host settings + """ + return [{"url": "https://api.pinecone.io", "description": "Production API endpoints"}] + + def get_host_from_settings(self, index, variables=None, servers=None): + """Gets host URL based on the index and variables + :param index: array index of the host settings + :param variables: hash of variable and the corresponding value + :param servers: an array of host settings or None + :return: URL based on host settings + """ + if index is None: + return self._base_path + + variables = {} if variables is None else variables + servers = self.get_host_settings() if servers is None else servers + + try: + server = servers[index] + except IndexError: + raise ValueError( + "Invalid index {0} when selecting the host settings. Must be less than {1}".format( + index, len(servers) + ) + ) + + url = server["url"] + + # go through variables and replace placeholders + for variable_name, variable in server.get("variables", {}).items(): + used_value = variables.get(variable_name, variable["default_value"]) + + if "enum_values" in variable and used_value not in variable["enum_values"]: + raise ValueError( + "The variable `{0}` in the host URL has invalid value {1}. Must be {2}.".format( + variable_name, variables[variable_name], variable["enum_values"] + ) + ) + + url = url.replace("{" + variable_name + "}", used_value) + + return url + + @property + def host(self): + """Return generated host.""" + return self.get_host_from_settings(self.server_index, variables=self.server_variables) + + @host.setter + def host(self, value): + """Fix base path.""" + self._base_path = value + self.server_index = None + + def __repr__(self): + attrs = [ + f"host={self.host}", + "api_key=***", + f"api_key_prefix={self.api_key_prefix}", + f"connection_pool_maxsize={self.connection_pool_maxsize}", + f"discard_unknown_keys={self.discard_unknown_keys}", + f"disabled_client_side_validations={self.disabled_client_side_validations}", + f"server_index={self.server_index}", + f"server_variables={self.server_variables}", + f"server_operation_index={self.server_operation_index}", + f"server_operation_variables={self.server_operation_variables}", + f"ssl_ca_cert={self.ssl_ca_cert}", + ] + return f"Configuration({', '.join(attrs)})" diff --git a/pinecone/control/__init__.py b/pinecone/control/__init__.py index a26e352a..4f04e477 100644 --- a/pinecone/control/__init__.py +++ b/pinecone/control/__init__.py @@ -1,6 +1,9 @@ -from .pinecone import Pinecone -from .pinecone_asyncio import PineconeAsyncio +import warnings -from .repr_overrides import install_repr_overrides +warnings.warn( + "The module at `pinecone.control` has moved to `pinecone.db_control`. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) -install_repr_overrides() +from pinecone.db_control import * diff --git a/pinecone/control/pinecone.py b/pinecone/control/pinecone.py deleted file mode 100644 index f3c8f404..00000000 --- a/pinecone/control/pinecone.py +++ /dev/null @@ -1,354 +0,0 @@ -import time -import logging -from typing import Optional, Dict, Union -from multiprocessing import cpu_count - -from .index_host_store import IndexHostStore -from .pinecone_interface import PineconeDBControlInterface - -from pinecone.config import PineconeConfig, ConfigBuilder - -from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi -from pinecone.openapi_support.api_client import ApiClient - - -from pinecone.utils import normalize_host, setup_openapi_client, PluginAware -from pinecone.core.openapi.db_control import API_VERSION -from pinecone.models import ( - ServerlessSpec, - PodSpec, - IndexModel, - IndexList, - CollectionList, - IndexEmbed, -) -from .langchain_import_warnings import _build_langchain_attribute_error_message -from pinecone.utils import docslinks -from pinecone.data import _Index, _Inference, _IndexAsyncio - -from pinecone.enums import ( - Metric, - VectorType, - DeletionProtection, - PodType, - CloudProvider, - AwsRegion, - GcpRegion, - AzureRegion, -) -from .types import CreateIndexForModelEmbedTypedDict -from .request_factory import PineconeDBControlRequestFactory - -logger = logging.getLogger(__name__) -""" @private """ - - -class Pinecone(PineconeDBControlInterface, PluginAware): - """ - A client for interacting with Pinecone's vector database. - - This class implements methods for managing and interacting with Pinecone resources - such as collections and indexes. - """ - - def __init__( - self, - api_key: Optional[str] = None, - host: Optional[str] = None, - proxy_url: Optional[str] = None, - proxy_headers: Optional[Dict[str, str]] = None, - ssl_ca_certs: Optional[str] = None, - ssl_verify: Optional[bool] = None, - additional_headers: Optional[Dict[str, str]] = {}, - pool_threads: Optional[int] = None, - **kwargs, - ): - for deprecated_kwarg in {"config", "openapi_config", "index_api"}: - if deprecated_kwarg in kwargs: - raise NotImplementedError( - f"Passing {deprecated_kwarg} is no longer supported. Please pass individual settings such as proxy_url, proxy_headers, ssl_ca_certs, and ssl_verify directly to the Pinecone constructor as keyword arguments. See the README at {docslinks['README']} for examples." - ) - - self.config = PineconeConfig.build( - api_key=api_key, - host=host, - additional_headers=additional_headers, - proxy_url=proxy_url, - proxy_headers=proxy_headers, - ssl_ca_certs=ssl_ca_certs, - ssl_verify=ssl_verify, - **kwargs, - ) - """ @private """ - - self.openapi_config = ConfigBuilder.build_openapi_config(self.config, **kwargs) - """ @private """ - - if pool_threads is None: - self.pool_threads = 5 * cpu_count() - """ @private """ - else: - self.pool_threads = pool_threads - """ @private """ - - self._inference = None # Lazy initialization - """ @private """ - - self.index_api = setup_openapi_client( - api_client_klass=ApiClient, - api_klass=ManageIndexesApi, - config=self.config, - openapi_config=self.openapi_config, - pool_threads=pool_threads, - api_version=API_VERSION, - ) - """ @private """ - - self.index_host_store = IndexHostStore() - """ @private """ - - self.load_plugins( - config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads - ) - - @property - def inference(self): - """ - Inference is a namespace where an instance of the `pinecone.data.features.inference.inference.Inference` class is lazily created and cached. - """ - if self._inference is None: - self._inference = _Inference(config=self.config, openapi_config=self.openapi_config) - return self._inference - - def create_index( - self, - name: str, - spec: Union[Dict, ServerlessSpec, PodSpec], - dimension: Optional[int] = None, - metric: Optional[Union[Metric, str]] = Metric.COSINE, - timeout: Optional[int] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, - tags: Optional[Dict[str, str]] = None, - ) -> IndexModel: - req = PineconeDBControlRequestFactory.create_index_request( - name=name, - spec=spec, - dimension=dimension, - metric=metric, - deletion_protection=deletion_protection, - vector_type=vector_type, - tags=tags, - ) - resp = self.index_api.create_index(create_index_request=req) - - if timeout == -1: - return IndexModel(resp) - return self.__poll_describe_index_until_ready(name, timeout) - - def create_index_for_model( - self, - name: str, - cloud: Union[CloudProvider, str], - region: Union[AwsRegion, GcpRegion, AzureRegion, str], - embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], - tags: Optional[Dict[str, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - timeout: Optional[int] = None, - ) -> IndexModel: - req = PineconeDBControlRequestFactory.create_index_for_model_request( - name=name, - cloud=cloud, - region=region, - embed=embed, - tags=tags, - deletion_protection=deletion_protection, - ) - resp = self.index_api.create_index_for_model(req) - - if timeout == -1: - return IndexModel(resp) - return self.__poll_describe_index_until_ready(name, timeout) - - def __poll_describe_index_until_ready(self, name: str, timeout: Optional[int] = None): - description = None - - def is_ready() -> bool: - nonlocal description - description = self.describe_index(name=name) - return description.status.ready - - total_wait_time = 0 - if timeout is None: - # Wait indefinitely - while not is_ready(): - logger.debug( - f"Waiting for index {name} to be ready. Total wait time {total_wait_time} seconds." - ) - total_wait_time += 5 - time.sleep(5) - - else: - # Wait for a maximum of timeout seconds - while not is_ready(): - if timeout < 0: - logger.error(f"Index {name} is not ready. Timeout reached.") - link = docslinks["API_DESCRIBE_INDEX"] - timeout_msg = ( - f"Please call describe_index() to confirm index status. See docs at {link}" - ) - raise TimeoutError(timeout_msg) - - logger.debug( - f"Waiting for index {name} to be ready. Total wait time: {total_wait_time}" - ) - total_wait_time += 5 - time.sleep(5) - timeout -= 5 - - return description - - def delete_index(self, name: str, timeout: Optional[int] = None): - self.index_api.delete_index(name) - self.index_host_store.delete_host(self.config, name) - - if timeout == -1: - return - - if timeout is None: - while self.has_index(name): - time.sleep(5) - else: - while self.has_index(name) and timeout >= 0: - time.sleep(5) - timeout -= 5 - if timeout and timeout < 0: - raise ( - TimeoutError( - "Please call the list_indexes API ({}) to confirm if index is deleted".format( - "https://www.pinecone.io/docs/api/operation/list_indexes/" - ) - ) - ) - - def list_indexes(self) -> IndexList: - response = self.index_api.list_indexes() - return IndexList(response) - - def describe_index(self, name: str) -> IndexModel: - api_instance = self.index_api - description = api_instance.describe_index(name) - host = description.host - self.index_host_store.set_host(self.config, name, host) - - return IndexModel(description) - - def has_index(self, name: str) -> bool: - if name in self.list_indexes().names(): - return True - else: - return False - - def configure_index( - self, - name: str, - replicas: Optional[int] = None, - pod_type: Optional[Union[PodType, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = None, - tags: Optional[Dict[str, str]] = None, - ): - api_instance = self.index_api - description = self.describe_index(name=name) - - req = PineconeDBControlRequestFactory.configure_index_request( - description=description, - replicas=replicas, - pod_type=pod_type, - deletion_protection=deletion_protection, - tags=tags, - ) - api_instance.configure_index(name, configure_index_request=req) - - def create_collection(self, name: str, source: str) -> None: - req = PineconeDBControlRequestFactory.create_collection_request(name=name, source=source) - self.index_api.create_collection(create_collection_request=req) - - def list_collections(self) -> CollectionList: - response = self.index_api.list_collections() - return CollectionList(response) - - def delete_collection(self, name: str) -> None: - self.index_api.delete_collection(name) - - def describe_collection(self, name: str): - return self.index_api.describe_collection(name).to_dict() - - @staticmethod - def from_texts(*args, **kwargs): - """@private""" - raise AttributeError(_build_langchain_attribute_error_message("from_texts")) - - @staticmethod - def from_documents(*args, **kwargs): - """@private""" - raise AttributeError(_build_langchain_attribute_error_message("from_documents")) - - def Index(self, name: str = "", host: str = "", **kwargs): - if name == "" and host == "": - raise ValueError("Either name or host must be specified") - - pt = kwargs.pop("pool_threads", None) or self.pool_threads - api_key = self.config.api_key - openapi_config = self.openapi_config - - if host != "": - check_realistic_host(host) - - # Use host url if it is provided - index_host = normalize_host(host) - else: - # Otherwise, get host url from describe_index using the index name - index_host = self.index_host_store.get_host(self.index_api, self.config, name) - - return _Index( - host=index_host, - api_key=api_key, - pool_threads=pt, - openapi_config=openapi_config, - source_tag=self.config.source_tag, - **kwargs, - ) - - def IndexAsyncio(self, host: str, **kwargs): - api_key = self.config.api_key - openapi_config = self.openapi_config - - if host is None or host == "": - raise ValueError("A host must be specified") - - check_realistic_host(host) - index_host = normalize_host(host) - - return _IndexAsyncio( - host=index_host, - api_key=api_key, - openapi_config=openapi_config, - source_tag=self.config.source_tag, - **kwargs, - ) - - -def check_realistic_host(host: str) -> None: - """@private - - Checks whether a user-provided host string seems plausible. - Someone could erroneously pass an index name as the host by - mistake, and if they have done that we'd like to give them a - simple error message as feedback rather than attempting to - call the url and getting a more cryptic DNS resolution error. - """ - - if "." not in host and "localhost" not in host: - raise ValueError( - f"You passed '{host}' as the host but this does not appear to be valid. Call describe_index() to confirm the host of the index." - ) diff --git a/pinecone/core/openapi/db_control/__init__.py b/pinecone/core/openapi/db_control/__init__.py index 1a6949bb..31408552 100644 --- a/pinecone/core/openapi/db_control/__init__.py +++ b/pinecone/core/openapi/db_control/__init__.py @@ -17,7 +17,7 @@ from pinecone.openapi_support.api_client import ApiClient # import Configuration -from pinecone.openapi_support.configuration import Configuration +from pinecone.config.openapi_configuration import Configuration # import exceptions from pinecone.openapi_support.exceptions import PineconeException diff --git a/pinecone/core/openapi/db_data/__init__.py b/pinecone/core/openapi/db_data/__init__.py index e8cbbfe1..76701561 100644 --- a/pinecone/core/openapi/db_data/__init__.py +++ b/pinecone/core/openapi/db_data/__init__.py @@ -17,7 +17,7 @@ from pinecone.openapi_support.api_client import ApiClient # import Configuration -from pinecone.openapi_support.configuration import Configuration +from pinecone.config.openapi_configuration import Configuration # import exceptions from pinecone.openapi_support.exceptions import PineconeException diff --git a/pinecone/core/openapi/inference/__init__.py b/pinecone/core/openapi/inference/__init__.py index d878080c..9bf0fcdb 100644 --- a/pinecone/core/openapi/inference/__init__.py +++ b/pinecone/core/openapi/inference/__init__.py @@ -17,7 +17,7 @@ from pinecone.openapi_support.api_client import ApiClient # import Configuration -from pinecone.openapi_support.configuration import Configuration +from pinecone.config.openapi_configuration import Configuration # import exceptions from pinecone.openapi_support.exceptions import PineconeException diff --git a/pinecone/data/__init__.py b/pinecone/data/__init__.py index 8e040056..0268ac16 100644 --- a/pinecone/data/__init__.py +++ b/pinecone/data/__init__.py @@ -1,34 +1,10 @@ -from .index import ( - Index as _Index, - FetchResponse, - QueryResponse, - DescribeIndexStatsResponse, - UpsertResponse, - SparseValues, - Vector, -) -from .dataclasses import * -from .import_error import ( - Index, - IndexClientInstantiationError, - Inference, - InferenceInstantiationError, -) -from .index_asyncio import * -from .errors import ( - VectorDictionaryMissingKeysError, - VectorDictionaryExcessKeysError, - VectorTupleLengthError, - SparseValuesTypeError, - SparseValuesMissingKeysError, - SparseValuesDictionaryExpectedError, - MetadataDictionaryExpectedError, -) +import warnings -from .features.bulk_import import ImportErrorMode -from .features.inference import ( - Inference as _Inference, - AsyncioInference as _AsyncioInference, - RerankModel, - EmbedModel, +warnings.warn( + "The module at `pinecone.data` has moved to `pinecone.db_data`. " + "Please update your imports. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, ) + +from pinecone.db_data import * diff --git a/pinecone/data/features/__init__.py b/pinecone/data/features/__init__.py index e69de29b..fd64a554 100644 --- a/pinecone/data/features/__init__.py +++ b/pinecone/data/features/__init__.py @@ -0,0 +1,10 @@ +import warnings + +warnings.warn( + "The module at `pinecone.data.features` has moved to `pinecone.db_data.features`. " + "Please update your imports. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) + +from pinecone.db_data.features import * diff --git a/pinecone/data/features/bulk_imports/__init__.py b/pinecone/data/features/bulk_imports/__init__.py new file mode 100644 index 00000000..740d503b --- /dev/null +++ b/pinecone/data/features/bulk_imports/__init__.py @@ -0,0 +1,10 @@ +import warnings + +warnings.warn( + "The module at `pinecone.data.features.bulk_import` has moved to `pinecone.db_data.features.bulk_import`. " + "Please update your imports. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) + +from pinecone.db_data.features.bulk_import import * diff --git a/pinecone/data/features/inference/__init__.py b/pinecone/data/features/inference/__init__.py index 30e93330..b0918dd5 100644 --- a/pinecone/data/features/inference/__init__.py +++ b/pinecone/data/features/inference/__init__.py @@ -1,6 +1,10 @@ -from .repl_overrides import install_repl_overrides -from .inference import Inference -from .inference_asyncio import AsyncioInference -from .inference_request_builder import RerankModel, EmbedModel +import warnings -install_repl_overrides() +warnings.warn( + "The module at `pinecone.data.features.inference` has moved to `pinecone.inference`. " + "Please update your imports. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) + +from pinecone.inference import * diff --git a/pinecone/db_control/__init__.py b/pinecone/db_control/__init__.py new file mode 100644 index 00000000..73d82468 --- /dev/null +++ b/pinecone/db_control/__init__.py @@ -0,0 +1,7 @@ +from .enums import * +from .models import * +from .db_control import DBControl +from .db_control_asyncio import DBControlAsyncio +from .repr_overrides import install_repr_overrides + +install_repr_overrides() diff --git a/pinecone/db_control/db_control.py b/pinecone/db_control/db_control.py new file mode 100644 index 00000000..f03f349d --- /dev/null +++ b/pinecone/db_control/db_control.py @@ -0,0 +1,60 @@ +import logging +from typing import Optional, TYPE_CHECKING + +from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi +from pinecone.openapi_support.api_client import ApiClient + +from pinecone.utils import setup_openapi_client +from pinecone.core.openapi.db_control import API_VERSION + + +logger = logging.getLogger(__name__) +""" @private """ + +if TYPE_CHECKING: + from .resources.sync.index import IndexResource + from .resources.sync.collection import CollectionResource + + +class DBControl: + def __init__(self, config, openapi_config, pool_threads): + self.config = config + """ @private """ + + self.openapi_config = openapi_config + """ @private """ + + self.pool_threads = pool_threads + """ @private """ + + self.index_api = setup_openapi_client( + api_client_klass=ApiClient, + api_klass=ManageIndexesApi, + config=self.config, + openapi_config=self.openapi_config, + pool_threads=pool_threads, + api_version=API_VERSION, + ) + """ @private """ + + self._index_resource: Optional["IndexResource"] = None + """ @private """ + + self._collection_resource: Optional["CollectionResource"] = None + """ @private """ + + @property + def index(self) -> "IndexResource": + if self._index_resource is None: + from .resources.sync.index import IndexResource + + self._index_resource = IndexResource(index_api=self.index_api, config=self.config) + return self._index_resource + + @property + def collection(self) -> "CollectionResource": + if self._collection_resource is None: + from .resources.sync.collection import CollectionResource + + self._collection_resource = CollectionResource(self.index_api) + return self._collection_resource diff --git a/pinecone/db_control/db_control_asyncio.py b/pinecone/db_control/db_control_asyncio.py new file mode 100644 index 00000000..2fce306e --- /dev/null +++ b/pinecone/db_control/db_control_asyncio.py @@ -0,0 +1,55 @@ +import logging +from typing import Optional, TYPE_CHECKING + +from pinecone.core.openapi.db_control.api.manage_indexes_api import AsyncioManageIndexesApi +from pinecone.openapi_support import AsyncioApiClient + +from pinecone.utils import setup_async_openapi_client +from pinecone.core.openapi.db_control import API_VERSION + +logger = logging.getLogger(__name__) +""" @private """ + + +if TYPE_CHECKING: + from .resources.asyncio.index import IndexResourceAsyncio + from .resources.asyncio.collection import CollectionResourceAsyncio + + +class DBControlAsyncio: + def __init__(self, config, openapi_config, pool_threads): + self.config = config + """ @private """ + + self.index_api = setup_async_openapi_client( + api_client_klass=AsyncioApiClient, + api_klass=AsyncioManageIndexesApi, + config=self.config, + openapi_config=self.openapi_config, + api_version=API_VERSION, + ) + """ @private """ + + self._index_resource: Optional["IndexResourceAsyncio"] = None + """ @private """ + + self._collection_resource: Optional["CollectionResourceAsyncio"] = None + """ @private """ + + @property + def index(self) -> "IndexResourceAsyncio": + if self._index_resource is None: + from .resources.asyncio.index import IndexResourceAsyncio + + self._index_resource = IndexResourceAsyncio( + index_api=self.index_api, config=self.config + ) + return self._index_resource + + @property + def collection(self) -> "CollectionResourceAsyncio": + if self._collection_resource is None: + from .resources.asyncio.collection import CollectionResourceAsyncio + + self._collection_resource = CollectionResourceAsyncio(self.index_api) + return self._collection_resource diff --git a/pinecone/enums/__init__.py b/pinecone/db_control/enums/__init__.py similarity index 100% rename from pinecone/enums/__init__.py rename to pinecone/db_control/enums/__init__.py diff --git a/pinecone/enums/clouds.py b/pinecone/db_control/enums/clouds.py similarity index 92% rename from pinecone/enums/clouds.py rename to pinecone/db_control/enums/clouds.py index 192b3da5..8903f40a 100644 --- a/pinecone/enums/clouds.py +++ b/pinecone/db_control/enums/clouds.py @@ -3,10 +3,10 @@ class CloudProvider(Enum): """Cloud providers available for use with Pinecone serverless indexes - + This list could expand or change over time as more cloud providers are supported. - Check the Pinecone documentation for the most up-to-date list of supported cloud - providers. If you want to use a cloud provider that is not listed here, you can + Check the Pinecone documentation for the most up-to-date list of supported cloud + providers. If you want to use a cloud provider that is not listed here, you can pass a string value directly without using this enum. """ @@ -17,10 +17,10 @@ class CloudProvider(Enum): class AwsRegion(Enum): """AWS (Amazon Web Services) regions available for use with Pinecone serverless indexes - + This list could expand or change over time as more regions are supported. - Check the Pinecone documentation for the most up-to-date list of supported - regions. If you want to use a region that is not listed here, you can + Check the Pinecone documentation for the most up-to-date list of supported + regions. If you want to use a region that is not listed here, you can pass a string value directly without using this enum. """ @@ -31,10 +31,10 @@ class AwsRegion(Enum): class GcpRegion(Enum): """GCP (Google Cloud Platform) regions available for use with Pinecone serverless indexes - + This list could expand or change over time as more regions are supported. - Check the Pinecone documentation for the most up-to-date list of supported - regions. If you want to use a region that is not listed here, you can + Check the Pinecone documentation for the most up-to-date list of supported + regions. If you want to use a region that is not listed here, you can pass a string value directly without using this enum. """ @@ -44,10 +44,10 @@ class GcpRegion(Enum): class AzureRegion(Enum): """Azure regions available for use with Pinecone serverless indexes - + This list could expand or change over time as more regions are supported. - Check the Pinecone documentation for the most up-to-date list of supported - regions. If you want to use a region that is not listed here, you can + Check the Pinecone documentation for the most up-to-date list of supported + regions. If you want to use a region that is not listed here, you can pass a string value directly without using this enum. """ diff --git a/pinecone/enums/deletion_protection.py b/pinecone/db_control/enums/deletion_protection.py similarity index 100% rename from pinecone/enums/deletion_protection.py rename to pinecone/db_control/enums/deletion_protection.py diff --git a/pinecone/enums/metric.py b/pinecone/db_control/enums/metric.py similarity index 100% rename from pinecone/enums/metric.py rename to pinecone/db_control/enums/metric.py diff --git a/pinecone/enums/pod_index_environment.py b/pinecone/db_control/enums/pod_index_environment.py similarity index 100% rename from pinecone/enums/pod_index_environment.py rename to pinecone/db_control/enums/pod_index_environment.py diff --git a/pinecone/enums/pod_type.py b/pinecone/db_control/enums/pod_type.py similarity index 100% rename from pinecone/enums/pod_type.py rename to pinecone/db_control/enums/pod_type.py diff --git a/pinecone/enums/vector_type.py b/pinecone/db_control/enums/vector_type.py similarity index 100% rename from pinecone/enums/vector_type.py rename to pinecone/db_control/enums/vector_type.py diff --git a/pinecone/control/index_host_store.py b/pinecone/db_control/index_host_store.py similarity index 100% rename from pinecone/control/index_host_store.py rename to pinecone/db_control/index_host_store.py diff --git a/pinecone/db_control/models/__init__.py b/pinecone/db_control/models/__init__.py new file mode 100644 index 00000000..34003bfe --- /dev/null +++ b/pinecone/db_control/models/__init__.py @@ -0,0 +1,20 @@ +from .index_description import ServerlessSpecDefinition, PodSpecDefinition +from .collection_description import CollectionDescription +from .serverless_spec import ServerlessSpec +from .pod_spec import PodSpec +from .index_list import IndexList +from .collection_list import CollectionList +from .index_model import IndexModel +from ...inference.models.index_embed import IndexEmbed + +__all__ = [ + "CollectionDescription", + "PodSpec", + "PodSpecDefinition", + "ServerlessSpec", + "ServerlessSpecDefinition", + "IndexList", + "CollectionList", + "IndexModel", + "IndexEmbed", +] diff --git a/pinecone/models/collection_description.py b/pinecone/db_control/models/collection_description.py similarity index 100% rename from pinecone/models/collection_description.py rename to pinecone/db_control/models/collection_description.py diff --git a/pinecone/models/collection_list.py b/pinecone/db_control/models/collection_list.py similarity index 87% rename from pinecone/models/collection_list.py rename to pinecone/db_control/models/collection_list.py index 508ec685..f36a9708 100644 --- a/pinecone/models/collection_list.py +++ b/pinecone/db_control/models/collection_list.py @@ -1,5 +1,7 @@ import json -from pinecone.core.openapi.db_control.models import CollectionList as OpenAPICollectionList +from pinecone.core.openapi.db_control.model.collection_list import ( + CollectionList as OpenAPICollectionList, +) class CollectionList: diff --git a/pinecone/models/index_description.py b/pinecone/db_control/models/index_description.py similarity index 100% rename from pinecone/models/index_description.py rename to pinecone/db_control/models/index_description.py diff --git a/pinecone/models/index_list.py b/pinecone/db_control/models/index_list.py similarity index 89% rename from pinecone/models/index_list.py rename to pinecone/db_control/models/index_list.py index 71242e24..e918b4f5 100644 --- a/pinecone/models/index_list.py +++ b/pinecone/db_control/models/index_list.py @@ -1,5 +1,5 @@ import json -from pinecone.core.openapi.db_control.models import IndexList as OpenAPIIndexList +from pinecone.core.openapi.db_control.model.index_list import IndexList as OpenAPIIndexList from .index_model import IndexModel from typing import List diff --git a/pinecone/models/index_model.py b/pinecone/db_control/models/index_model.py similarity index 81% rename from pinecone/models/index_model.py rename to pinecone/db_control/models/index_model.py index 7deb2d7d..75ba1f30 100644 --- a/pinecone/models/index_model.py +++ b/pinecone/db_control/models/index_model.py @@ -1,4 +1,4 @@ -from pinecone.core.openapi.db_control.models import IndexModel as OpenAPIIndexModel +from pinecone.core.openapi.db_control.model.index_model import IndexModel as OpenAPIIndexModel class IndexModel: diff --git a/pinecone/models/list_response.py b/pinecone/db_control/models/list_response.py similarity index 100% rename from pinecone/models/list_response.py rename to pinecone/db_control/models/list_response.py diff --git a/pinecone/models/pod_spec.py b/pinecone/db_control/models/pod_spec.py similarity index 100% rename from pinecone/models/pod_spec.py rename to pinecone/db_control/models/pod_spec.py diff --git a/pinecone/models/serverless_spec.py b/pinecone/db_control/models/serverless_spec.py similarity index 100% rename from pinecone/models/serverless_spec.py rename to pinecone/db_control/models/serverless_spec.py diff --git a/pinecone/control/repr_overrides.py b/pinecone/db_control/repr_overrides.py similarity index 79% rename from pinecone/control/repr_overrides.py rename to pinecone/db_control/repr_overrides.py index 98e4b4d4..714b8dfb 100644 --- a/pinecone/control/repr_overrides.py +++ b/pinecone/db_control/repr_overrides.py @@ -1,6 +1,6 @@ from pinecone.utils import install_json_repr_override -from pinecone.models.index_model import IndexModel -from pinecone.core.openapi.db_control.models import CollectionModel +from pinecone.db_control.models.index_model import IndexModel +from pinecone.core.openapi.db_control.model.collection_model import CollectionModel def install_repr_overrides(): diff --git a/pinecone/control/request_factory.py b/pinecone/db_control/request_factory.py similarity index 87% rename from pinecone/control/request_factory.py rename to pinecone/db_control/request_factory.py index d4d0ce63..c2ecc905 100644 --- a/pinecone/control/request_factory.py +++ b/pinecone/db_control/request_factory.py @@ -2,27 +2,37 @@ from typing import Optional, Dict, Any, Union from enum import Enum +from pinecone.utils import parse_non_empty_args, convert_enum_to_string -from pinecone.utils import convert_enum_to_string -from pinecone.core.openapi.db_control.models import ( - CreateCollectionRequest, +from pinecone.core.openapi.db_control.model.create_collection_request import CreateCollectionRequest +from pinecone.core.openapi.db_control.model.create_index_for_model_request import ( CreateIndexForModelRequest, +) +from pinecone.core.openapi.db_control.model.create_index_for_model_request_embed import ( CreateIndexForModelRequestEmbed, - CreateIndexRequest, - ConfigureIndexRequest, +) +from pinecone.core.openapi.db_control.model.create_index_request import CreateIndexRequest +from pinecone.core.openapi.db_control.model.configure_index_request import ConfigureIndexRequest +from pinecone.core.openapi.db_control.model.configure_index_request_spec import ( ConfigureIndexRequestSpec, +) +from pinecone.core.openapi.db_control.model.configure_index_request_spec_pod import ( ConfigureIndexRequestSpecPod, +) +from pinecone.core.openapi.db_control.model.deletion_protection import ( DeletionProtection as DeletionProtectionModel, - IndexSpec, - IndexTags, +) +from pinecone.core.openapi.db_control.model.index_spec import IndexSpec +from pinecone.core.openapi.db_control.model.index_tags import IndexTags +from pinecone.core.openapi.db_control.model.serverless_spec import ( ServerlessSpec as ServerlessSpecModel, - PodSpec as PodSpecModel, - PodSpecMetadataConfig, ) -from pinecone.models import ServerlessSpec, PodSpec, IndexModel, IndexEmbed -from pinecone.utils import parse_non_empty_args +from pinecone.core.openapi.db_control.model.pod_spec import PodSpec as PodSpecModel +from pinecone.core.openapi.db_control.model.pod_spec_metadata_config import PodSpecMetadataConfig + +from pinecone.db_control.models import ServerlessSpec, PodSpec, IndexModel, IndexEmbed -from pinecone.enums import ( +from pinecone.db_control.enums import ( Metric, VectorType, DeletionProtection, @@ -58,6 +68,7 @@ def __parse_deletion_protection( deletion_protection: Union[DeletionProtection, str], ) -> DeletionProtectionModel: deletion_protection = convert_enum_to_string(deletion_protection) + print(deletion_protection) if deletion_protection in ["enabled", "disabled"]: return DeletionProtectionModel(deletion_protection) else: diff --git a/pinecone/db_control/resources/__init__.py b/pinecone/db_control/resources/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pinecone/db_control/resources/asyncio/__init__.py b/pinecone/db_control/resources/asyncio/__init__.py new file mode 100644 index 00000000..9a4841d3 --- /dev/null +++ b/pinecone/db_control/resources/asyncio/__init__.py @@ -0,0 +1,2 @@ +from .index import IndexResourceAsyncio +from .collection import CollectionResourceAsyncio diff --git a/pinecone/db_control/resources/asyncio/collection.py b/pinecone/db_control/resources/asyncio/collection.py new file mode 100644 index 00000000..33c1f3d0 --- /dev/null +++ b/pinecone/db_control/resources/asyncio/collection.py @@ -0,0 +1,32 @@ +import logging +from typing import TYPE_CHECKING + + +from pinecone.db_control.models import CollectionList + +from pinecone.db_control.request_factory import PineconeDBControlRequestFactory + +logger = logging.getLogger(__name__) +""" @private """ + +if TYPE_CHECKING: + pass + + +class CollectionResourceAsyncio: + def __init__(self, index_api): + self.index_api = index_api + + async def create(self, name: str, source: str): + req = PineconeDBControlRequestFactory.create_collection_request(name=name, source=source) + await self.index_api.create_collection(create_collection_request=req) + + async def list(self) -> CollectionList: + response = await self.index_api.list_collections() + return CollectionList(response) + + async def delete(self, name: str): + await self.index_api.delete_collection(name) + + async def describe(self, name: str): + return await self.index_api.describe_collection(name).to_dict() diff --git a/pinecone/db_control/resources/asyncio/index.py b/pinecone/db_control/resources/asyncio/index.py new file mode 100644 index 00000000..2d93ae01 --- /dev/null +++ b/pinecone/db_control/resources/asyncio/index.py @@ -0,0 +1,175 @@ +import logging +import asyncio +from typing import Optional, Dict, Union + + +from pinecone.db_control.models import ServerlessSpec, PodSpec, IndexModel, IndexList, IndexEmbed +from pinecone.utils import docslinks + +from pinecone.db_control.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, +) +from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict +from pinecone.db_control.request_factory import PineconeDBControlRequestFactory +from pinecone.core.openapi.db_control import API_VERSION + + +logger = logging.getLogger(__name__) +""" @private """ + + +class IndexResourceAsyncio: + def __init__(self, index_api, config): + self.index_api = index_api + self.config = config + + async def create( + self, + name: str, + spec: Union[Dict, ServerlessSpec, PodSpec], + dimension: Optional[int] = None, + metric: Optional[Union[Metric, str]] = Metric.COSINE, + timeout: Optional[int] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + tags: Optional[Dict[str, str]] = None, + ) -> IndexModel: + req = PineconeDBControlRequestFactory.create_index_request( + name=name, + spec=spec, + dimension=dimension, + metric=metric, + deletion_protection=deletion_protection, + vector_type=vector_type, + tags=tags, + ) + resp = await self.index_api.create_index(create_index_request=req) + + if timeout == -1: + return IndexModel(resp) + return await self.__poll_describe_index_until_ready(name, timeout) + + async def create_for_model( + self, + name: str, + cloud: Union[CloudProvider, str], + region: Union[AwsRegion, GcpRegion, AzureRegion, str], + embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + tags: Optional[Dict[str, str]] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + timeout: Optional[int] = None, + ) -> IndexModel: + req = PineconeDBControlRequestFactory.create_index_for_model_request( + name=name, + cloud=cloud, + region=region, + embed=embed, + tags=tags, + deletion_protection=deletion_protection, + ) + resp = await self.index_api.create_index_for_model(req) + + if timeout == -1: + return IndexModel(resp) + return await self.__poll_describe_index_until_ready(name, timeout) + + async def __poll_describe_index_until_ready(self, name: str, timeout: Optional[int] = None): + description = None + + async def is_ready() -> bool: + nonlocal description + description = await self.describe(name=name) + return description.status.ready + + total_wait_time = 0 + if timeout is None: + # Wait indefinitely + while not await is_ready(): + logger.debug( + f"Waiting for index {name} to be ready. Total wait time {total_wait_time} seconds." + ) + total_wait_time += 5 + await asyncio.sleep(5) + + else: + # Wait for a maximum of timeout seconds + while not await is_ready(): + if timeout < 0: + logger.error(f"Index {name} is not ready. Timeout reached.") + link = docslinks["API_DESCRIBE_INDEX"](API_VERSION) + timeout_msg = ( + f"Please call describe_index() to confirm index status. See docs at {link}" + ) + raise TimeoutError(timeout_msg) + + logger.debug( + f"Waiting for index {name} to be ready. Total wait time: {total_wait_time}" + ) + total_wait_time += 5 + await asyncio.sleep(5) + timeout -= 5 + + return description + + async def delete(self, name: str, timeout: Optional[int] = None): + await self.index_api.delete_index(name) + + if timeout == -1: + return + + if timeout is None: + while await self.has(name): + await asyncio.sleep(5) + else: + while await self.has(name) and timeout >= 0: + await asyncio.sleep(5) + timeout -= 5 + if timeout and timeout < 0: + raise ( + TimeoutError( + "Please call the list_indexes API ({}) to confirm if index is deleted".format( + "https://www.pinecone.io/docs/api/operation/list_indexes/" + ) + ) + ) + + async def list(self) -> IndexList: + response = await self.index_api.list_indexes() + return IndexList(response) + + async def describe(self, name: str) -> IndexModel: + description = await self.index_api.describe_index(name) + return IndexModel(description) + + async def has(self, name: str) -> bool: + available_indexes = await self.list() + if name in available_indexes.names(): + return True + else: + return False + + async def configure( + self, + name: str, + replicas: Optional[int] = None, + pod_type: Optional[Union[PodType, str]] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = None, + tags: Optional[Dict[str, str]] = None, + ): + description = await self.describe(name=name) + + req = PineconeDBControlRequestFactory.configure_index_request( + description=description, + replicas=replicas, + pod_type=pod_type, + deletion_protection=deletion_protection, + tags=tags, + ) + await self.index_api.configure_index(name, configure_index_request=req) diff --git a/pinecone/db_control/resources/sync/__init__.py b/pinecone/db_control/resources/sync/__init__.py new file mode 100644 index 00000000..cc904d53 --- /dev/null +++ b/pinecone/db_control/resources/sync/__init__.py @@ -0,0 +1,2 @@ +from .index import IndexResource +from .collection import CollectionResource diff --git a/pinecone/db_control/resources/sync/collection.py b/pinecone/db_control/resources/sync/collection.py new file mode 100644 index 00000000..1d8d11d8 --- /dev/null +++ b/pinecone/db_control/resources/sync/collection.py @@ -0,0 +1,27 @@ +import logging + +from pinecone.db_control.models import CollectionList +from pinecone.db_control.request_factory import PineconeDBControlRequestFactory + +logger = logging.getLogger(__name__) +""" @private """ + + +class CollectionResource: + def __init__(self, index_api): + self.index_api = index_api + """ @private """ + + def create(self, name: str, source: str) -> None: + req = PineconeDBControlRequestFactory.create_collection_request(name=name, source=source) + self.index_api.create_collection(create_collection_request=req) + + def list(self) -> CollectionList: + response = self.index_api.list_collections() + return CollectionList(response) + + def delete(self, name: str) -> None: + self.index_api.delete_collection(name) + + def describe(self, name: str): + return self.index_api.describe_collection(name).to_dict() diff --git a/pinecone/db_control/resources/sync/index.py b/pinecone/db_control/resources/sync/index.py new file mode 100644 index 00000000..85876d6c --- /dev/null +++ b/pinecone/db_control/resources/sync/index.py @@ -0,0 +1,192 @@ +import time +import logging +from typing import Optional, Dict, Union + +from pinecone.db_control.index_host_store import IndexHostStore + +from pinecone.db_control.models import ServerlessSpec, PodSpec, IndexModel, IndexList, IndexEmbed +from pinecone.utils import docslinks + +from pinecone.db_control.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, +) +from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict +from pinecone.db_control.request_factory import PineconeDBControlRequestFactory +from pinecone.core.openapi.db_control import API_VERSION + +logger = logging.getLogger(__name__) +""" @private """ + + +class IndexResource: + def __init__(self, index_api, config): + self.index_api = index_api + """ @private """ + + self.config = config + """ @private """ + + self.index_host_store = IndexHostStore() + """ @private """ + + def create( + self, + name: str, + spec: Union[Dict, ServerlessSpec, PodSpec], + dimension: Optional[int] = None, + metric: Optional[Union[Metric, str]] = Metric.COSINE, + timeout: Optional[int] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + tags: Optional[Dict[str, str]] = None, + ) -> IndexModel: + req = PineconeDBControlRequestFactory.create_index_request( + name=name, + spec=spec, + dimension=dimension, + metric=metric, + deletion_protection=deletion_protection, + vector_type=vector_type, + tags=tags, + ) + resp = self.index_api.create_index(create_index_request=req) + + if timeout == -1: + return IndexModel(resp) + return self.__poll_describe_index_until_ready(name, timeout) + + def create_for_model( + self, + name: str, + cloud: Union[CloudProvider, str], + region: Union[AwsRegion, GcpRegion, AzureRegion, str], + embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + tags: Optional[Dict[str, str]] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + timeout: Optional[int] = None, + ) -> IndexModel: + req = PineconeDBControlRequestFactory.create_index_for_model_request( + name=name, + cloud=cloud, + region=region, + embed=embed, + tags=tags, + deletion_protection=deletion_protection, + ) + resp = self.index_api.create_index_for_model(req) + + if timeout == -1: + return IndexModel(resp) + return self.__poll_describe_index_until_ready(name, timeout) + + def __poll_describe_index_until_ready(self, name: str, timeout: Optional[int] = None): + description = None + + def is_ready() -> bool: + nonlocal description + description = self.describe(name=name) + return description.status.ready + + total_wait_time = 0 + if timeout is None: + # Wait indefinitely + while not is_ready(): + logger.debug( + f"Waiting for index {name} to be ready. Total wait time {total_wait_time} seconds." + ) + total_wait_time += 5 + time.sleep(5) + + else: + # Wait for a maximum of timeout seconds + while not is_ready(): + if timeout < 0: + logger.error(f"Index {name} is not ready. Timeout reached.") + link = docslinks["API_DESCRIBE_INDEX"](API_VERSION) + timeout_msg = ( + f"Please call describe_index() to confirm index status. See docs at {link}" + ) + raise TimeoutError(timeout_msg) + + logger.debug( + f"Waiting for index {name} to be ready. Total wait time: {total_wait_time}" + ) + total_wait_time += 5 + time.sleep(5) + timeout -= 5 + + return description + + def delete(self, name: str, timeout: Optional[int] = None): + self.index_api.delete_index(name) + self.index_host_store.delete_host(self.config, name) + + if timeout == -1: + return + + if timeout is None: + while self.has(name): + time.sleep(5) + else: + while self.has(name) and timeout >= 0: + time.sleep(5) + timeout -= 5 + if timeout and timeout < 0: + raise ( + TimeoutError( + "Please call the list_indexes API ({}) to confirm if index is deleted".format( + "https://www.pinecone.io/docs/api/operation/list_indexes/" + ) + ) + ) + + def list(self) -> IndexList: + response = self.index_api.list_indexes() + return IndexList(response) + + def describe(self, name: str) -> IndexModel: + api_instance = self.index_api + description = api_instance.describe_index(name) + host = description.host + self.index_host_store.set_host(self.config, name, host) + + return IndexModel(description) + + def has(self, name: str) -> bool: + if name in self.list().names(): + return True + else: + return False + + def configure( + self, + name: str, + replicas: Optional[int] = None, + pod_type: Optional[Union[PodType, str]] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = None, + tags: Optional[Dict[str, str]] = None, + ): + api_instance = self.index_api + description = self.describe(name=name) + + req = PineconeDBControlRequestFactory.configure_index_request( + description=description, + replicas=replicas, + pod_type=pod_type, + deletion_protection=deletion_protection, + tags=tags, + ) + api_instance.configure_index(name, configure_index_request=req) + + def _get_host(self, name: str) -> str: + """@private""" + return self.index_host_store.get_host( + api=self.index_api, config=self.config, index_name=name + ) diff --git a/pinecone/control/types/__init__.py b/pinecone/db_control/types/__init__.py similarity index 100% rename from pinecone/control/types/__init__.py rename to pinecone/db_control/types/__init__.py diff --git a/pinecone/control/types/create_index_for_model_embed.py b/pinecone/db_control/types/create_index_for_model_embed.py similarity index 72% rename from pinecone/control/types/create_index_for_model_embed.py rename to pinecone/db_control/types/create_index_for_model_embed.py index 123474a0..ab7e43ac 100644 --- a/pinecone/control/types/create_index_for_model_embed.py +++ b/pinecone/db_control/types/create_index_for_model_embed.py @@ -1,6 +1,6 @@ from typing import TypedDict, Dict, Union -from ...enums import Metric -from ...data.features.inference import EmbedModel +from pinecone.db_control.enums import Metric +from pinecone.inference import EmbedModel class CreateIndexForModelEmbedTypedDict(TypedDict): diff --git a/pinecone/db_data/__init__.py b/pinecone/db_data/__init__.py new file mode 100644 index 00000000..f2db9a63 --- /dev/null +++ b/pinecone/db_data/__init__.py @@ -0,0 +1,61 @@ +from .index import ( + Index as _Index, + FetchResponse, + QueryResponse, + DescribeIndexStatsResponse, + UpsertResponse, + SparseValues, + Vector, +) +from .dataclasses import * +from .import_error import ( + Index, + IndexClientInstantiationError, + Inference, + InferenceInstantiationError, +) +from .index_asyncio import * +from .errors import ( + VectorDictionaryMissingKeysError, + VectorDictionaryExcessKeysError, + VectorTupleLengthError, + SparseValuesTypeError, + SparseValuesMissingKeysError, + SparseValuesDictionaryExpectedError, + MetadataDictionaryExpectedError, +) + +from .features.bulk_import import ImportErrorMode + + +import warnings + + +def _get_deprecated_import(name, from_module, to_module): + warnings.warn( + f"The import of `{name}` from `{from_module}` has moved to `{to_module}`. " + f"Please update your imports from `from {from_module} import {name}` " + f"to `from {to_module} import {name}`. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, + ) + # Import from the new location + from pinecone.inference import ( + Inference as _Inference, + AsyncioInference as _AsyncioInference, + RerankModel, + EmbedModel, + ) + + return locals()[name] + + +moved = ["_Inference", "_AsyncioInference", "RerankModel", "EmbedModel"] + + +def __getattr__(name): + if name in locals(): + return locals()[name] + elif name in moved: + return _get_deprecated_import(name, "pinecone.data", "pinecone.inference") + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") diff --git a/pinecone/data/dataclasses/__init__.py b/pinecone/db_data/dataclasses/__init__.py similarity index 100% rename from pinecone/data/dataclasses/__init__.py rename to pinecone/db_data/dataclasses/__init__.py diff --git a/pinecone/data/dataclasses/fetch_response.py b/pinecone/db_data/dataclasses/fetch_response.py similarity index 100% rename from pinecone/data/dataclasses/fetch_response.py rename to pinecone/db_data/dataclasses/fetch_response.py diff --git a/pinecone/data/dataclasses/search_query.py b/pinecone/db_data/dataclasses/search_query.py similarity index 100% rename from pinecone/data/dataclasses/search_query.py rename to pinecone/db_data/dataclasses/search_query.py diff --git a/pinecone/data/dataclasses/search_query_vector.py b/pinecone/db_data/dataclasses/search_query_vector.py similarity index 100% rename from pinecone/data/dataclasses/search_query_vector.py rename to pinecone/db_data/dataclasses/search_query_vector.py diff --git a/pinecone/data/dataclasses/search_rerank.py b/pinecone/db_data/dataclasses/search_rerank.py similarity index 97% rename from pinecone/data/dataclasses/search_rerank.py rename to pinecone/db_data/dataclasses/search_rerank.py index 1b9534ba..0ac4ca4e 100644 --- a/pinecone/data/dataclasses/search_rerank.py +++ b/pinecone/db_data/dataclasses/search_rerank.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from typing import Optional, Dict, Any, List -from ..features.inference import RerankModel +from pinecone.inference import RerankModel @dataclass diff --git a/pinecone/data/dataclasses/sparse_values.py b/pinecone/db_data/dataclasses/sparse_values.py similarity index 100% rename from pinecone/data/dataclasses/sparse_values.py rename to pinecone/db_data/dataclasses/sparse_values.py diff --git a/pinecone/data/dataclasses/utils.py b/pinecone/db_data/dataclasses/utils.py similarity index 100% rename from pinecone/data/dataclasses/utils.py rename to pinecone/db_data/dataclasses/utils.py diff --git a/pinecone/data/dataclasses/vector.py b/pinecone/db_data/dataclasses/vector.py similarity index 100% rename from pinecone/data/dataclasses/vector.py rename to pinecone/db_data/dataclasses/vector.py diff --git a/pinecone/data/errors.py b/pinecone/db_data/errors.py similarity index 100% rename from pinecone/data/errors.py rename to pinecone/db_data/errors.py diff --git a/pinecone/data/features/bulk_import/__init__.py b/pinecone/db_data/features/bulk_import/__init__.py similarity index 100% rename from pinecone/data/features/bulk_import/__init__.py rename to pinecone/db_data/features/bulk_import/__init__.py diff --git a/pinecone/data/features/bulk_import/bulk_import.py b/pinecone/db_data/features/bulk_import/bulk_import.py similarity index 100% rename from pinecone/data/features/bulk_import/bulk_import.py rename to pinecone/db_data/features/bulk_import/bulk_import.py diff --git a/pinecone/data/features/bulk_import/bulk_import_asyncio.py b/pinecone/db_data/features/bulk_import/bulk_import_asyncio.py similarity index 100% rename from pinecone/data/features/bulk_import/bulk_import_asyncio.py rename to pinecone/db_data/features/bulk_import/bulk_import_asyncio.py diff --git a/pinecone/data/features/bulk_import/bulk_import_request_factory.py b/pinecone/db_data/features/bulk_import/bulk_import_request_factory.py similarity index 100% rename from pinecone/data/features/bulk_import/bulk_import_request_factory.py rename to pinecone/db_data/features/bulk_import/bulk_import_request_factory.py diff --git a/pinecone/data/import_error.py b/pinecone/db_data/import_error.py similarity index 100% rename from pinecone/data/import_error.py rename to pinecone/db_data/import_error.py diff --git a/pinecone/data/index.py b/pinecone/db_data/index.py similarity index 98% rename from pinecone/data/index.py rename to pinecone/db_data/index.py index ebd5cecd..a228bfbe 100644 --- a/pinecone/data/index.py +++ b/pinecone/db_data/index.py @@ -55,7 +55,7 @@ def parse_query_response(response: QueryResponse): return response -class Index(IndexInterface, ImportFeatureMixin, PluginAware): +class Index(PluginAware, IndexInterface, ImportFeatureMixin): """ A client for interacting with a Pinecone index via REST API. For improved performance, use the Pinecone GRPC index client. @@ -101,10 +101,6 @@ def __init__( # Pass the same api_client to the ImportFeatureMixin super().__init__(api_client=self._api_client) - self.load_plugins( - config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads - ) - def _openapi_kwargs(self, kwargs: Dict[str, Any]) -> Dict[str, Any]: return filter_dict(kwargs, OPENAPI_ENDPOINT_PARAMS) diff --git a/pinecone/data/index_asyncio.py b/pinecone/db_data/index_asyncio.py similarity index 100% rename from pinecone/data/index_asyncio.py rename to pinecone/db_data/index_asyncio.py diff --git a/pinecone/data/index_asyncio_interface.py b/pinecone/db_data/index_asyncio_interface.py similarity index 100% rename from pinecone/data/index_asyncio_interface.py rename to pinecone/db_data/index_asyncio_interface.py diff --git a/pinecone/data/interfaces.py b/pinecone/db_data/interfaces.py similarity index 100% rename from pinecone/data/interfaces.py rename to pinecone/db_data/interfaces.py diff --git a/pinecone/db_data/models/__init__.py b/pinecone/db_data/models/__init__.py new file mode 100644 index 00000000..a14d3600 --- /dev/null +++ b/pinecone/db_data/models/__init__.py @@ -0,0 +1 @@ +from pinecone.core.openapi.db_data.models import * diff --git a/pinecone/data/query_results_aggregator.py b/pinecone/db_data/query_results_aggregator.py similarity index 100% rename from pinecone/data/query_results_aggregator.py rename to pinecone/db_data/query_results_aggregator.py diff --git a/pinecone/data/request_factory.py b/pinecone/db_data/request_factory.py similarity index 100% rename from pinecone/data/request_factory.py rename to pinecone/db_data/request_factory.py diff --git a/pinecone/data/sparse_values_factory.py b/pinecone/db_data/sparse_values_factory.py similarity index 100% rename from pinecone/data/sparse_values_factory.py rename to pinecone/db_data/sparse_values_factory.py diff --git a/pinecone/data/types/__init__.py b/pinecone/db_data/types/__init__.py similarity index 100% rename from pinecone/data/types/__init__.py rename to pinecone/db_data/types/__init__.py diff --git a/pinecone/data/types/query_filter.py b/pinecone/db_data/types/query_filter.py similarity index 100% rename from pinecone/data/types/query_filter.py rename to pinecone/db_data/types/query_filter.py diff --git a/pinecone/data/types/search_query_typed_dict.py b/pinecone/db_data/types/search_query_typed_dict.py similarity index 100% rename from pinecone/data/types/search_query_typed_dict.py rename to pinecone/db_data/types/search_query_typed_dict.py diff --git a/pinecone/data/types/search_query_vector_typed_dict.py b/pinecone/db_data/types/search_query_vector_typed_dict.py similarity index 100% rename from pinecone/data/types/search_query_vector_typed_dict.py rename to pinecone/db_data/types/search_query_vector_typed_dict.py diff --git a/pinecone/data/types/search_rerank_typed_dict.py b/pinecone/db_data/types/search_rerank_typed_dict.py similarity index 96% rename from pinecone/data/types/search_rerank_typed_dict.py rename to pinecone/db_data/types/search_rerank_typed_dict.py index 89c4f8d8..2d04fe82 100644 --- a/pinecone/data/types/search_rerank_typed_dict.py +++ b/pinecone/db_data/types/search_rerank_typed_dict.py @@ -1,5 +1,5 @@ from typing import TypedDict, Optional, Union, Dict, Any -from ..features.inference import RerankModel +from pinecone.inference import RerankModel class SearchRerankTypedDict(TypedDict): diff --git a/pinecone/data/types/sparse_vector_typed_dict.py b/pinecone/db_data/types/sparse_vector_typed_dict.py similarity index 100% rename from pinecone/data/types/sparse_vector_typed_dict.py rename to pinecone/db_data/types/sparse_vector_typed_dict.py diff --git a/pinecone/data/types/vector_metadata_dict.py b/pinecone/db_data/types/vector_metadata_dict.py similarity index 100% rename from pinecone/data/types/vector_metadata_dict.py rename to pinecone/db_data/types/vector_metadata_dict.py diff --git a/pinecone/data/types/vector_tuple.py b/pinecone/db_data/types/vector_tuple.py similarity index 100% rename from pinecone/data/types/vector_tuple.py rename to pinecone/db_data/types/vector_tuple.py diff --git a/pinecone/data/types/vector_typed_dict.py b/pinecone/db_data/types/vector_typed_dict.py similarity index 100% rename from pinecone/data/types/vector_typed_dict.py rename to pinecone/db_data/types/vector_typed_dict.py diff --git a/pinecone/data/vector_factory.py b/pinecone/db_data/vector_factory.py similarity index 100% rename from pinecone/data/vector_factory.py rename to pinecone/db_data/vector_factory.py diff --git a/pinecone/exceptions/__init__.py b/pinecone/exceptions/__init__.py index 92b05fd7..f437e90b 100644 --- a/pinecone/exceptions/__init__.py +++ b/pinecone/exceptions/__init__.py @@ -1,4 +1,7 @@ -from pinecone.openapi_support.exceptions import ( +from .exceptions import ( + PineconeConfigurationError, + PineconeProtocolError, + ListConversionException, PineconeException, PineconeApiAttributeError, PineconeApiTypeError, @@ -10,7 +13,6 @@ ForbiddenException, ServiceException, ) -from .exceptions import PineconeConfigurationError, PineconeProtocolError, ListConversionException __all__ = [ "PineconeConfigurationError", diff --git a/pinecone/exceptions/exceptions.py b/pinecone/exceptions/exceptions.py index 3860dc8b..32eed99f 100644 --- a/pinecone/exceptions/exceptions.py +++ b/pinecone/exceptions/exceptions.py @@ -1,4 +1,143 @@ -from pinecone.openapi_support.exceptions import PineconeException +class PineconeException(Exception): + """The base exception class for all exceptions in the Pinecone Python SDK""" + + +class PineconeApiTypeError(PineconeException, TypeError): + def __init__(self, msg, path_to_item=None, valid_classes=None, key_type=None) -> None: + """Raises an exception for TypeErrors + + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (list): a list of keys an indices to get to the + current_item + None if unset + valid_classes (tuple): the primitive classes that current item + should be an instance of + None if unset + key_type (bool): False if our value is a value in a dict + True if it is a key in a dict + False if our item is an item in a list + None if unset + """ + self.path_to_item = path_to_item + self.valid_classes = valid_classes + self.key_type = key_type + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(PineconeApiTypeError, self).__init__(full_msg) + + +class PineconeApiValueError(PineconeException, ValueError): + def __init__(self, msg, path_to_item=None) -> None: + """ + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (list) the path to the exception in the + received_data dict. None if unset + """ + + self.path_to_item = path_to_item + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(PineconeApiValueError, self).__init__(full_msg) + + +class PineconeApiAttributeError(PineconeException, AttributeError): + def __init__(self, msg, path_to_item=None) -> None: + """ + Raised when an attribute reference or assignment fails. + + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (None/list) the path to the exception in the + received_data dict + """ + self.path_to_item = path_to_item + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(PineconeApiAttributeError, self).__init__(full_msg) + + +class PineconeApiKeyError(PineconeException, KeyError): + def __init__(self, msg, path_to_item=None) -> None: + """ + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (None/list) the path to the exception in the + received_data dict + """ + self.path_to_item = path_to_item + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(PineconeApiKeyError, self).__init__(full_msg) + + +class PineconeApiException(PineconeException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + if http_resp: + self.status = http_resp.status + self.reason = http_resp.reason + self.body = http_resp.data + self.headers = http_resp.getheaders() + else: + self.status = status + self.reason = reason + self.body = None + self.headers = None + + def __str__(self): + """Custom error messages for exception""" + error_message = "({0})\nReason: {1}\n".format(self.status, self.reason) + if self.headers: + error_message += "HTTP response headers: {0}\n".format(self.headers) + + if self.body: + error_message += "HTTP response body: {0}\n".format(self.body) + + return error_message + + +class NotFoundException(PineconeApiException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + super(NotFoundException, self).__init__(status, reason, http_resp) + + +class UnauthorizedException(PineconeApiException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + super(UnauthorizedException, self).__init__(status, reason, http_resp) + + +class ForbiddenException(PineconeApiException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + super(ForbiddenException, self).__init__(status, reason, http_resp) + + +class ServiceException(PineconeApiException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + super(ServiceException, self).__init__(status, reason, http_resp) + + +def render_path(path_to_item): + """Returns a string representation of a path""" + result = "" + for pth in path_to_item: + if isinstance(pth, int): + result += "[{0}]".format(pth) + else: + result += "['{0}']".format(pth) + return result class PineconeProtocolError(PineconeException): diff --git a/pinecone/grpc/__init__.py b/pinecone/grpc/__init__.py index 350047ca..66adb916 100644 --- a/pinecone/grpc/__init__.py +++ b/pinecone/grpc/__init__.py @@ -49,7 +49,7 @@ from .config import GRPCClientConfig from .future import PineconeGrpcFuture -from pinecone.data.dataclasses import Vector, SparseValues +from pinecone.db_data.dataclasses import Vector, SparseValues from pinecone.core.grpc.protos.db_data_2025_01_pb2 import ( Vector as GRPCVector, diff --git a/pinecone/grpc/index_grpc.py b/pinecone/grpc/index_grpc.py index 9b68c0b6..bfaf8fff 100644 --- a/pinecone/grpc/index_grpc.py +++ b/pinecone/grpc/index_grpc.py @@ -21,7 +21,7 @@ QueryResponse, IndexDescription as DescribeIndexStatsResponse, ) -from pinecone.models.list_response import ListResponse as SimpleListResponse, Pagination +from pinecone.db_control.models.list_response import ListResponse as SimpleListResponse, Pagination from pinecone.core.grpc.protos.db_data_2025_01_pb2 import ( Vector as GRPCVector, QueryVector as GRPCQueryVector, @@ -38,11 +38,11 @@ SparseValues as GRPCSparseValues, ) from pinecone import Vector, SparseValues -from pinecone.data.query_results_aggregator import QueryNamespacesResults, QueryResultsAggregator +from pinecone.db_data.query_results_aggregator import QueryNamespacesResults, QueryResultsAggregator from pinecone.core.grpc.protos.db_data_2025_01_pb2_grpc import VectorServiceStub from .base import GRPCIndexBase from .future import PineconeGrpcFuture -from ..data.types import ( +from ..db_data.types import ( SparseVectorTypedDict, VectorTypedDict, VectorTuple, diff --git a/pinecone/grpc/pinecone.py b/pinecone/grpc/pinecone.py index c78481ff..28a13622 100644 --- a/pinecone/grpc/pinecone.py +++ b/pinecone/grpc/pinecone.py @@ -1,5 +1,5 @@ -from ..control.pinecone import Pinecone -from ..config.config import ConfigBuilder +from pinecone import Pinecone +from pinecone.config import ConfigBuilder from .index_grpc import GRPCIndex @@ -122,7 +122,7 @@ def Index(self, name: str = "", host: str = "", **kwargs): raise ValueError("Either name or host must be specified") # Use host if it is provided, otherwise get host from describe_index - index_host = host or self.index_host_store.get_host(self.index_api, self.config, name) + index_host = host or self.db.index._get_host(name) pt = kwargs.pop("pool_threads", None) or self.pool_threads diff --git a/pinecone/grpc/sparse_values_factory.py b/pinecone/grpc/sparse_values_factory.py index 240cd8e1..5bb14685 100644 --- a/pinecone/grpc/sparse_values_factory.py +++ b/pinecone/grpc/sparse_values_factory.py @@ -3,8 +3,8 @@ from ..utils import convert_to_list -from ..data import SparseValuesTypeError, SparseValuesMissingKeysError -from ..data.types import SparseVectorTypedDict +from ..db_data import SparseValuesTypeError, SparseValuesMissingKeysError +from ..db_data.types import SparseVectorTypedDict from pinecone.core.grpc.protos.db_data_2025_01_pb2 import SparseValues as GRPCSparseValues from pinecone.core.openapi.db_data.models import SparseValues as OpenApiSparseValues diff --git a/pinecone/grpc/utils.py b/pinecone/grpc/utils.py index dcd19710..c2869e73 100644 --- a/pinecone/grpc/utils.py +++ b/pinecone/grpc/utils.py @@ -13,7 +13,7 @@ IndexDescription as DescribeIndexStatsResponse, NamespaceSummary, ) -from pinecone.data.dataclasses import FetchResponse +from pinecone.db_data.dataclasses import FetchResponse from google.protobuf.struct_pb2 import Struct diff --git a/pinecone/grpc/vector_factory_grpc.py b/pinecone/grpc/vector_factory_grpc.py index 1fe9572b..22efd269 100644 --- a/pinecone/grpc/vector_factory_grpc.py +++ b/pinecone/grpc/vector_factory_grpc.py @@ -8,13 +8,13 @@ from .utils import dict_to_proto_struct from ..utils import fix_tuple_length, convert_to_list from ..utils.constants import REQUIRED_VECTOR_FIELDS, OPTIONAL_VECTOR_FIELDS -from ..data import ( +from ..db_data import ( VectorDictionaryMissingKeysError, VectorDictionaryExcessKeysError, VectorTupleLengthError, MetadataDictionaryExpectedError, ) -from ..data.types import VectorTuple, VectorTypedDict +from ..db_data.types import VectorTuple, VectorTypedDict from .sparse_values_factory import SparseValuesFactory from pinecone.core.grpc.protos.db_data_2025_01_pb2 import ( diff --git a/pinecone/inference/__init__.py b/pinecone/inference/__init__.py new file mode 100644 index 00000000..30e93330 --- /dev/null +++ b/pinecone/inference/__init__.py @@ -0,0 +1,6 @@ +from .repl_overrides import install_repl_overrides +from .inference import Inference +from .inference_asyncio import AsyncioInference +from .inference_request_builder import RerankModel, EmbedModel + +install_repl_overrides() diff --git a/pinecone/data/features/inference/inference.py b/pinecone/inference/inference.py similarity index 98% rename from pinecone/data/features/inference/inference.py rename to pinecone/inference/inference.py index 71ada564..9ab34e33 100644 --- a/pinecone/data/features/inference/inference.py +++ b/pinecone/inference/inference.py @@ -63,9 +63,7 @@ def __init__(self, config, openapi_config, **kwargs) -> None: api_version=API_VERSION, ) - self.load_plugins( - config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads - ) + super().__init__() # Initialize PluginAware def embed( self, diff --git a/pinecone/data/features/inference/inference_asyncio.py b/pinecone/inference/inference_asyncio.py similarity index 100% rename from pinecone/data/features/inference/inference_asyncio.py rename to pinecone/inference/inference_asyncio.py diff --git a/pinecone/data/features/inference/inference_request_builder.py b/pinecone/inference/inference_request_builder.py similarity index 100% rename from pinecone/data/features/inference/inference_request_builder.py rename to pinecone/inference/inference_request_builder.py diff --git a/pinecone/data/features/inference/models/__init__.py b/pinecone/inference/models/__init__.py similarity index 100% rename from pinecone/data/features/inference/models/__init__.py rename to pinecone/inference/models/__init__.py diff --git a/pinecone/data/features/inference/models/embedding_list.py b/pinecone/inference/models/embedding_list.py similarity index 100% rename from pinecone/data/features/inference/models/embedding_list.py rename to pinecone/inference/models/embedding_list.py diff --git a/pinecone/models/index_embed.py b/pinecone/inference/models/index_embed.py similarity index 94% rename from pinecone/models/index_embed.py rename to pinecone/inference/models/index_embed.py index 4d1ccfe3..4c3306d0 100644 --- a/pinecone/models/index_embed.py +++ b/pinecone/inference/models/index_embed.py @@ -1,8 +1,8 @@ from dataclasses import dataclass from typing import Optional, Dict, Any, Union -from ..enums import Metric -from ..data.features.inference import EmbedModel +from pinecone.db_control.enums import Metric +from pinecone.inference.inference_request_builder import EmbedModel @dataclass(frozen=True) diff --git a/pinecone/data/features/inference/models/rerank_result.py b/pinecone/inference/models/rerank_result.py similarity index 100% rename from pinecone/data/features/inference/models/rerank_result.py rename to pinecone/inference/models/rerank_result.py diff --git a/pinecone/data/features/inference/repl_overrides.py b/pinecone/inference/repl_overrides.py similarity index 100% rename from pinecone/data/features/inference/repl_overrides.py rename to pinecone/inference/repl_overrides.py diff --git a/pinecone/control/langchain_import_warnings.py b/pinecone/langchain_import_warnings.py similarity index 100% rename from pinecone/control/langchain_import_warnings.py rename to pinecone/langchain_import_warnings.py diff --git a/pinecone/control/pinecone_interface.py b/pinecone/legacy_pinecone_interface.py similarity index 94% rename from pinecone/control/pinecone_interface.py rename to pinecone/legacy_pinecone_interface.py index c183e611..0b097261 100644 --- a/pinecone/control/pinecone_interface.py +++ b/pinecone/legacy_pinecone_interface.py @@ -1,30 +1,30 @@ from abc import ABC, abstractmethod -from typing import Optional, Dict, Union - - -from pinecone.models import ( - ServerlessSpec, - PodSpec, - IndexList, - CollectionList, - IndexModel, - IndexEmbed, -) -from pinecone.enums import ( - Metric, - VectorType, - DeletionProtection, - PodType, - CloudProvider, - AwsRegion, - GcpRegion, - AzureRegion, -) -from .types import CreateIndexForModelEmbedTypedDict - - -class PineconeDBControlInterface(ABC): +from typing import Optional, Dict, Union, TYPE_CHECKING + +if TYPE_CHECKING: + from pinecone.db_control.models import ( + ServerlessSpec, + PodSpec, + IndexList, + CollectionList, + IndexModel, + IndexEmbed, + ) + from pinecone.db_control.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, + ) + from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict + + +class LegacyPineconeDBControlInterface(ABC): @abstractmethod def __init__( self, @@ -190,14 +190,16 @@ def __init__( def create_index( self, name: str, - spec: Union[Dict, ServerlessSpec, PodSpec], + spec: Union[Dict, "ServerlessSpec", "PodSpec"], dimension: Optional[int], - metric: Optional[Union[Metric, str]] = Metric.COSINE, + metric: Optional[Union["Metric", str]] = "Metric.COSINE", timeout: Optional[int] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", + vector_type: Optional[Union["VectorType", str]] = "VectorType.DENSE", tags: Optional[Dict[str, str]] = None, - ) -> IndexModel: + ) -> "IndexModel": """Creates a Pinecone index. :param name: The name of the index to create. Must be unique within your project and @@ -299,13 +301,15 @@ def create_index( def create_index_for_model( self, name: str, - cloud: Union[CloudProvider, str], - region: Union[AwsRegion, GcpRegion, AzureRegion, str], - embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + cloud: Union["CloudProvider", str], + region: Union["AwsRegion", "GcpRegion", "AzureRegion", str], + embed: Union["IndexEmbed", "CreateIndexForModelEmbedTypedDict"], tags: Optional[Dict[str, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", timeout: Optional[int] = None, - ) -> IndexModel: + ) -> "IndexModel": """ :param name: The name of the index to create. Must be unique within your project and cannot be changed once created. Allowed characters are lowercase letters, numbers, @@ -414,7 +418,7 @@ def delete_index(self, name: str, timeout: Optional[int] = None): pass @abstractmethod - def list_indexes(self) -> IndexList: + def list_indexes(self) -> "IndexList": """ :return: Returns an `IndexList` object, which is iterable and contains a list of `IndexModel` objects. The `IndexList` also has a convenience method `names()` @@ -447,7 +451,7 @@ def list_indexes(self) -> IndexList: pass @abstractmethod - def describe_index(self, name: str) -> IndexModel: + def describe_index(self, name: str) -> "IndexModel": """ :param name: the name of the index to describe. :return: Returns an `IndexModel` object @@ -534,8 +538,8 @@ def configure_index( self, name: str, replicas: Optional[int] = None, - pod_type: Optional[Union[PodType, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = None, + pod_type: Optional[Union["PodType", str]] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = None, tags: Optional[Dict[str, str]] = None, ): """ @@ -622,7 +626,7 @@ def configure_index( pass @abstractmethod - def create_collection(self, name: str, source: str): + def create_collection(self, name: str, source: str) -> None: """Create a collection from a pod-based index :param name: Name of the collection @@ -631,7 +635,7 @@ def create_collection(self, name: str, source: str): pass @abstractmethod - def list_collections(self) -> CollectionList: + def list_collections(self) -> "CollectionList": """List all collections ```python diff --git a/pinecone/models/__init__.py b/pinecone/models/__init__.py index 86306c1e..74a1658c 100644 --- a/pinecone/models/__init__.py +++ b/pinecone/models/__init__.py @@ -1,20 +1,9 @@ -from .index_description import ServerlessSpecDefinition, PodSpecDefinition -from .collection_description import CollectionDescription -from .serverless_spec import ServerlessSpec -from .pod_spec import PodSpec -from .index_list import IndexList -from .collection_list import CollectionList -from .index_model import IndexModel -from .index_embed import IndexEmbed +import warnings -__all__ = [ - "CollectionDescription", - "PodSpec", - "PodSpecDefinition", - "ServerlessSpec", - "ServerlessSpecDefinition", - "IndexList", - "CollectionList", - "IndexModel", - "IndexEmbed", -] +warnings.warn( + "The module at `pinecone.models` has moved to `pinecone.db_control.models`. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) + +from pinecone.db_control.models import * diff --git a/pinecone/openapi_support/__init__.py b/pinecone/openapi_support/__init__.py index 63e3fb0a..890c3007 100644 --- a/pinecone/openapi_support/__init__.py +++ b/pinecone/openapi_support/__init__.py @@ -8,7 +8,7 @@ from .endpoint_utils import ExtraOpenApiKwargsTypedDict, KwargsWithOpenApiKwargDefaultsTypedDict from .asyncio_api_client import AsyncioApiClient from .asyncio_endpoint import AsyncioEndpoint -from .configuration import Configuration +from .configuration_lazy import Configuration from .exceptions import ( PineconeException, PineconeApiAttributeError, diff --git a/pinecone/openapi_support/api_client.py b/pinecone/openapi_support/api_client.py index 421d56cc..ee1e4649 100644 --- a/pinecone/openapi_support/api_client.py +++ b/pinecone/openapi_support/api_client.py @@ -1,14 +1,14 @@ import atexit -from multiprocessing.pool import ThreadPool -from concurrent.futures import ThreadPoolExecutor import io -from typing import Optional, List, Tuple, Dict, Any, Union -from .deserializer import Deserializer +from typing import Optional, List, Tuple, Dict, Any, Union, TYPE_CHECKING +if TYPE_CHECKING: + from multiprocessing.pool import ThreadPool + from concurrent.futures import ThreadPoolExecutor from .rest_urllib3 import Urllib3RestClient -from .configuration import Configuration +from ..config.openapi_configuration import Configuration from .exceptions import PineconeApiValueError, PineconeApiException from .api_client_utils import ( parameters_to_tuples, @@ -30,8 +30,8 @@ class ApiClient(object): to the API. More threads means more concurrent API requests. """ - _pool: Optional[ThreadPool] = None - _threadpool_executor: Optional[ThreadPoolExecutor] = None + _pool: Optional["ThreadPool"] = None + _threadpool_executor: Optional["ThreadPoolExecutor"] = None def __init__( self, configuration: Optional[Configuration] = None, pool_threads: Optional[int] = 1 @@ -64,18 +64,22 @@ def close(self): atexit.unregister(self.close) @property - def pool(self): + def pool(self) -> "ThreadPool": """Create thread pool on first request avoids instantiating unused threadpool for blocking clients. """ if self._pool is None: + from multiprocessing.pool import ThreadPool + atexit.register(self.close) self._pool = ThreadPool(self.pool_threads) return self._pool @property - def threadpool_executor(self): + def threadpool_executor(self) -> "ThreadPoolExecutor": if self._threadpool_executor is None: + from concurrent.futures import ThreadPoolExecutor + self._threadpool_executor = ThreadPoolExecutor(max_workers=self.pool_threads) return self._threadpool_executor @@ -186,6 +190,8 @@ def __call_api( # deserialize response data if response_type: + from .deserializer import Deserializer + Deserializer.decode_response(response_type=response_type, response=response_data) return_data = Deserializer.deserialize( response=response_data, diff --git a/pinecone/openapi_support/asyncio_api_client.py b/pinecone/openapi_support/asyncio_api_client.py index 51f2e0ce..43c8e17b 100644 --- a/pinecone/openapi_support/asyncio_api_client.py +++ b/pinecone/openapi_support/asyncio_api_client.py @@ -7,7 +7,7 @@ from .rest_aiohttp import AiohttpRestClient -from .configuration import Configuration +from ..config.openapi_configuration import Configuration from .exceptions import PineconeApiValueError, PineconeApiException from .api_client_utils import ( parameters_to_tuples, diff --git a/pinecone/openapi_support/configuration.py b/pinecone/openapi_support/configuration.py index fb6d7d19..e69de29b 100644 --- a/pinecone/openapi_support/configuration.py +++ b/pinecone/openapi_support/configuration.py @@ -1,441 +0,0 @@ -import copy -import logging -import multiprocessing - -from http import client as http_client -from .exceptions import PineconeApiValueError -from typing import TypedDict - - -class HostSetting(TypedDict): - url: str - description: str - - -JSON_SCHEMA_VALIDATION_KEYWORDS = { - "multipleOf", - "maximum", - "exclusiveMaximum", - "minimum", - "exclusiveMinimum", - "maxLength", - "minLength", - "pattern", - "maxItems", - "minItems", -} - - -class Configuration: - """Class to hold the configuration of the API client. - - :param host: Base url - :param api_key: Dict to store API key(s). - Each entry in the dict specifies an API key. - The dict key is the name of the security scheme in the OAS specification. - The dict value is the API key secret. - :param api_key_prefix: Dict to store API prefix (e.g. Bearer) - The dict key is the name of the security scheme in the OAS specification. - The dict value is an API key prefix when generating the auth data. - :param discard_unknown_keys: Boolean value indicating whether to discard - unknown properties. A server may send a response that includes additional - properties that are not known by the client in the following scenarios: - 1. The OpenAPI document is incomplete, i.e. it does not match the server - implementation. - 2. The client was generated using an older version of the OpenAPI document - and the server has been upgraded since then. - If a schema in the OpenAPI document defines the additionalProperties attribute, - then all undeclared properties received by the server are injected into the - additional properties map. In that case, there are undeclared properties, and - nothing to discard. - :param disabled_client_side_validations (string): Comma-separated list of - JSON schema validation keywords to disable JSON schema structural validation - rules. The following keywords may be specified: multipleOf, maximum, - exclusiveMaximum, minimum, exclusiveMinimum, maxLength, minLength, pattern, - maxItems, minItems. - By default, the validation is performed for data generated locally by the client - and data received from the server, independent of any validation performed by - the server side. If the input data does not satisfy the JSON schema validation - rules specified in the OpenAPI document, an exception is raised. - If disabled_client_side_validations is set, structural validation is - disabled. This can be useful to troubleshoot data validation problem, such as - when the OpenAPI document validation rules do not match the actual API data - received by the server. - :param server_operation_index: Mapping from operation ID to an index to server - configuration. - :param server_operation_variables: Mapping from operation ID to a mapping with - string values to replace variables in templated server configuration. - The validation of enums is performed for variables with defined enum values before. - :param ssl_ca_cert: str - the path to a file of concatenated CA certificates - in PEM format - - :Example: - - API Key Authentication Example. - Given the following security scheme in the OpenAPI specification: - components: - securitySchemes: - cookieAuth: # name for the security scheme - type: apiKey - in: cookie - name: JSESSIONID # cookie name - - You can programmatically set the cookie: - - conf = pinecone.openapi_support.Configuration( - api_key={'cookieAuth': 'abc123'} - api_key_prefix={'cookieAuth': 'JSESSIONID'} - ) - - The following cookie will be added to the HTTP request: - Cookie: JSESSIONID abc123 - """ - - _default = None - - def __init__( - self, - host=None, - api_key=None, - api_key_prefix=None, - discard_unknown_keys=False, - disabled_client_side_validations="", - server_index=None, - server_variables=None, - server_operation_index=None, - server_operation_variables=None, - ssl_ca_cert=None, - ): - """Constructor""" - self._base_path = "https://api.pinecone.io" if host is None else host - """Default Base url - """ - self.server_index = 0 if server_index is None and host is None else server_index - self.server_operation_index = server_operation_index or {} - """Default server index - """ - self.server_variables = server_variables or {} - self.server_operation_variables = server_operation_variables or {} - """Default server variables - """ - self.temp_folder_path = None - """Temp file folder for downloading files - """ - # Authentication Settings - self.api_key = {} - if api_key: - self.api_key = api_key - """dict to store API key(s) - """ - self.api_key_prefix = {} - if api_key_prefix: - self.api_key_prefix = api_key_prefix - """dict to store API prefix (e.g. Bearer) - """ - self.refresh_api_key_hook = None - """function hook to refresh API key if expired - """ - self.discard_unknown_keys = discard_unknown_keys - self.disabled_client_side_validations = disabled_client_side_validations - self.logger = {} - """Logging Settings - """ - self.logger["package_logger"] = logging.getLogger("pinecone.openapi_support") - self.logger["urllib3_logger"] = logging.getLogger("urllib3") - self.logger_format = "%(asctime)s %(levelname)s %(message)s" - """Log format - """ - self.logger_stream_handler = None - """Log stream handler - """ - self.logger_file_handler = None - """Log file handler - """ - self.logger_file = None - """Debug file location - """ - self.debug = False - """Debug switch - """ - - self.verify_ssl = True - """SSL/TLS verification - Set this to false to skip verifying SSL certificate when calling API - from https server. - """ - self.ssl_ca_cert = ssl_ca_cert - """Set this to customize the certificate file to verify the peer. - """ - self.cert_file = None - """client certificate file - """ - self.key_file = None - """client key file - """ - self.assert_hostname = None - """Set this to True/False to enable/disable SSL hostname verification. - """ - - self.connection_pool_maxsize = multiprocessing.cpu_count() * 5 - """urllib3 connection pool's maximum number of connections saved - per pool. urllib3 uses 1 connection as default value, but this is - not the best value when you are making a lot of possibly parallel - requests to the same host, which is often the case here. - cpu_count * 5 is used as default value to increase performance. - """ - - self.proxy = None - """Proxy URL - """ - self.proxy_headers = None - """Proxy headers - """ - self.safe_chars_for_path_param = "" - """Safe chars for path_param - """ - self.retries = None - """Adding retries to override urllib3 default value 3 - """ - # Enable client side validation - self.client_side_validation = True - - # Options to pass down to the underlying urllib3 socket - self.socket_options = None - - def __deepcopy__(self, memo): - cls = self.__class__ - result = cls.__new__(cls) - memo[id(self)] = result - for k, v in self.__dict__.items(): - if k not in ("logger", "logger_file_handler"): - setattr(result, k, copy.deepcopy(v, memo)) - # shallow copy of loggers - result.logger = copy.copy(self.logger) - # use setters to configure loggers - result.logger_file = self.logger_file - result.debug = self.debug - return result - - def __setattr__(self, name, value): - object.__setattr__(self, name, value) - if name == "disabled_client_side_validations": - s = set(filter(None, value.split(","))) - for v in s: - if v not in JSON_SCHEMA_VALIDATION_KEYWORDS: - raise PineconeApiValueError("Invalid keyword: '{0}''".format(v)) - self._disabled_client_side_validations = s - - @classmethod - def set_default(cls, default): - """Set default instance of configuration. - - It stores default configuration, which can be - returned by get_default_copy method. - - :param default: object of Configuration - """ - cls._default = copy.deepcopy(default) - - @classmethod - def get_default_copy(cls): - """Return new instance of configuration. - - This method returns newly created, based on default constructor, - object of Configuration class or returns a copy of default - configuration passed by the set_default method. - - :return: The configuration object. - """ - if cls._default is not None: - return copy.deepcopy(cls._default) - return Configuration() - - @property - def logger_file(self): - """The logger file. - - If the logger_file is None, then add stream handler and remove file - handler. Otherwise, add file handler and remove stream handler. - - :param value: The logger_file path. - :type: str - """ - return self.__logger_file - - @logger_file.setter - def logger_file(self, value): - """The logger file. - - If the logger_file is None, then add stream handler and remove file - handler. Otherwise, add file handler and remove stream handler. - - :param value: The logger_file path. - :type: str - """ - self.__logger_file = value - if self.__logger_file: - # If set logging file, - # then add file handler and remove stream handler. - self.logger_file_handler = logging.FileHandler(self.__logger_file) - self.logger_file_handler.setFormatter(self.logger_formatter) - for _, logger in self.logger.items(): - logger.addHandler(self.logger_file_handler) - - @property - def debug(self): - """Debug status - - :param value: The debug status, True or False. - :type: bool - """ - return self.__debug - - @debug.setter - def debug(self, value): - """Debug status - - :param value: The debug status, True or False. - :type: bool - """ - self.__debug = value - if self.__debug: - # if debug status is True, turn on debug logging - for _, logger in self.logger.items(): - logger.setLevel(logging.DEBUG) - # turn on http_client debug - http_client.HTTPConnection.debuglevel = 1 - else: - # if debug status is False, turn off debug logging, - # setting log level to default `logging.WARNING` - for _, logger in self.logger.items(): - logger.setLevel(logging.WARNING) - # turn off http_client debug - http_client.HTTPConnection.debuglevel = 0 - - @property - def logger_format(self): - """The logger format. - - The logger_formatter will be updated when sets logger_format. - - :param value: The format string. - :type: str - """ - return self.__logger_format - - @logger_format.setter - def logger_format(self, value): - """The logger format. - - The logger_formatter will be updated when sets logger_format. - - :param value: The format string. - :type: str - """ - self.__logger_format = value - self.logger_formatter = logging.Formatter(self.__logger_format) - - def get_api_key_with_prefix(self, identifier, alias=None): - """Gets API key (with prefix if set). - - :param identifier: The identifier of apiKey. - :param alias: The alternative identifier of apiKey. - :return: The token for api key authentication. - """ - if self.refresh_api_key_hook is not None: - self.refresh_api_key_hook(self) - key = self.api_key.get(identifier, self.api_key.get(alias) if alias is not None else None) - if key: - prefix = self.api_key_prefix.get(identifier) - if prefix: - return "%s %s" % (prefix, key) - else: - return key - - def auth_settings(self): - """Gets Auth Settings dict for api client. - - :return: The Auth Settings information dict. - """ - auth = {} - if "ApiKeyAuth" in self.api_key: - auth["ApiKeyAuth"] = { - "type": "api_key", - "in": "header", - "key": "Api-Key", - "value": self.get_api_key_with_prefix("ApiKeyAuth"), - } - return auth - - def get_host_settings(self): - """Gets an array of host settings - - :return: An array of host settings - """ - return [{"url": "https://api.pinecone.io", "description": "Production API endpoints"}] - - def get_host_from_settings(self, index, variables=None, servers=None): - """Gets host URL based on the index and variables - :param index: array index of the host settings - :param variables: hash of variable and the corresponding value - :param servers: an array of host settings or None - :return: URL based on host settings - """ - if index is None: - return self._base_path - - variables = {} if variables is None else variables - servers = self.get_host_settings() if servers is None else servers - - try: - server = servers[index] - except IndexError: - raise ValueError( - "Invalid index {0} when selecting the host settings. Must be less than {1}".format( - index, len(servers) - ) - ) - - url = server["url"] - - # go through variables and replace placeholders - for variable_name, variable in server.get("variables", {}).items(): - used_value = variables.get(variable_name, variable["default_value"]) - - if "enum_values" in variable and used_value not in variable["enum_values"]: - raise ValueError( - "The variable `{0}` in the host URL has invalid value {1}. Must be {2}.".format( - variable_name, variables[variable_name], variable["enum_values"] - ) - ) - - url = url.replace("{" + variable_name + "}", used_value) - - return url - - @property - def host(self): - """Return generated host.""" - return self.get_host_from_settings(self.server_index, variables=self.server_variables) - - @host.setter - def host(self, value): - """Fix base path.""" - self._base_path = value - self.server_index = None - - def __repr__(self): - attrs = [ - f"host={self.host}", - "api_key=***", - f"api_key_prefix={self.api_key_prefix}", - f"connection_pool_maxsize={self.connection_pool_maxsize}", - f"discard_unknown_keys={self.discard_unknown_keys}", - f"disabled_client_side_validations={self.disabled_client_side_validations}", - f"server_index={self.server_index}", - f"server_variables={self.server_variables}", - f"server_operation_index={self.server_operation_index}", - f"server_operation_variables={self.server_operation_variables}", - f"ssl_ca_cert={self.ssl_ca_cert}", - ] - return f"Configuration({', '.join(attrs)})" diff --git a/pinecone/openapi_support/configuration_lazy.py b/pinecone/openapi_support/configuration_lazy.py new file mode 100644 index 00000000..27e90a34 --- /dev/null +++ b/pinecone/openapi_support/configuration_lazy.py @@ -0,0 +1,7 @@ +""" +Lazy import for the Configuration class to avoid loading the entire openapi_support package. +""" + +from ..config.openapi_configuration import Configuration + +__all__ = ["Configuration"] diff --git a/pinecone/openapi_support/endpoint_utils.py b/pinecone/openapi_support/endpoint_utils.py index 13522e85..867232b6 100644 --- a/pinecone/openapi_support/endpoint_utils.py +++ b/pinecone/openapi_support/endpoint_utils.py @@ -2,7 +2,7 @@ from .exceptions import PineconeApiTypeError, PineconeApiValueError from typing import Optional, Dict, Tuple, TypedDict, List, Literal, Any from .types import PropertyValidationTypedDict -from .configuration import Configuration +from ..config.openapi_configuration import Configuration from .model_utils import validate_and_convert_types, check_allowed_values, check_validations diff --git a/pinecone/openapi_support/exceptions.py b/pinecone/openapi_support/exceptions.py index fcc37da3..5729b13e 100644 --- a/pinecone/openapi_support/exceptions.py +++ b/pinecone/openapi_support/exceptions.py @@ -1,140 +1 @@ -class PineconeException(Exception): - """The base exception class for all exceptions in the Pinecone Python SDK""" - - -class PineconeApiTypeError(PineconeException, TypeError): - def __init__(self, msg, path_to_item=None, valid_classes=None, key_type=None) -> None: - """Raises an exception for TypeErrors - - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (list): a list of keys an indices to get to the - current_item - None if unset - valid_classes (tuple): the primitive classes that current item - should be an instance of - None if unset - key_type (bool): False if our value is a value in a dict - True if it is a key in a dict - False if our item is an item in a list - None if unset - """ - self.path_to_item = path_to_item - self.valid_classes = valid_classes - self.key_type = key_type - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(PineconeApiTypeError, self).__init__(full_msg) - - -class PineconeApiValueError(PineconeException, ValueError): - def __init__(self, msg, path_to_item=None) -> None: - """ - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (list) the path to the exception in the - received_data dict. None if unset - """ - - self.path_to_item = path_to_item - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(PineconeApiValueError, self).__init__(full_msg) - - -class PineconeApiAttributeError(PineconeException, AttributeError): - def __init__(self, msg, path_to_item=None) -> None: - """ - Raised when an attribute reference or assignment fails. - - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (None/list) the path to the exception in the - received_data dict - """ - self.path_to_item = path_to_item - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(PineconeApiAttributeError, self).__init__(full_msg) - - -class PineconeApiKeyError(PineconeException, KeyError): - def __init__(self, msg, path_to_item=None) -> None: - """ - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (None/list) the path to the exception in the - received_data dict - """ - self.path_to_item = path_to_item - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(PineconeApiKeyError, self).__init__(full_msg) - - -class PineconeApiException(PineconeException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - if http_resp: - self.status = http_resp.status - self.reason = http_resp.reason - self.body = http_resp.data - self.headers = http_resp.getheaders() - else: - self.status = status - self.reason = reason - self.body = None - self.headers = None - - def __str__(self): - """Custom error messages for exception""" - error_message = "({0})\nReason: {1}\n".format(self.status, self.reason) - if self.headers: - error_message += "HTTP response headers: {0}\n".format(self.headers) - - if self.body: - error_message += "HTTP response body: {0}\n".format(self.body) - - return error_message - - -class NotFoundException(PineconeApiException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - super(NotFoundException, self).__init__(status, reason, http_resp) - - -class UnauthorizedException(PineconeApiException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - super(UnauthorizedException, self).__init__(status, reason, http_resp) - - -class ForbiddenException(PineconeApiException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - super(ForbiddenException, self).__init__(status, reason, http_resp) - - -class ServiceException(PineconeApiException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - super(ServiceException, self).__init__(status, reason, http_resp) - - -def render_path(path_to_item): - """Returns a string representation of a path""" - result = "" - for pth in path_to_item: - if isinstance(pth, int): - result += "[{0}]".format(pth) - else: - result += "['{0}']".format(pth) - return result +from pinecone.exceptions import * diff --git a/pinecone/openapi_support/model_utils.py b/pinecone/openapi_support/model_utils.py index 4fc4cf0f..163f94b4 100644 --- a/pinecone/openapi_support/model_utils.py +++ b/pinecone/openapi_support/model_utils.py @@ -1,5 +1,4 @@ from datetime import date, datetime # noqa: F401 -from dateutil.parser import parse import inspect import io @@ -1149,6 +1148,8 @@ def deserialize_primitive(data, klass, path_to_item): additional_message = "" try: if klass in {datetime, date}: + from dateutil.parser import parse + additional_message = ( "If you need your parameter to have a fallback " "string value, please set its type as `type: {}` in your " diff --git a/pinecone/openapi_support/rest_aiohttp.py b/pinecone/openapi_support/rest_aiohttp.py index c7121a11..3cab099a 100644 --- a/pinecone/openapi_support/rest_aiohttp.py +++ b/pinecone/openapi_support/rest_aiohttp.py @@ -2,7 +2,7 @@ import certifi import json from .rest_utils import RestClientInterface, RESTResponse, raise_exceptions_or_return -from .configuration import Configuration +from ..config.openapi_configuration import Configuration class AiohttpRestClient(RestClientInterface): diff --git a/pinecone/openapi_support/rest_urllib3.py b/pinecone/openapi_support/rest_urllib3.py index 85d008da..0c1a1c5a 100644 --- a/pinecone/openapi_support/rest_urllib3.py +++ b/pinecone/openapi_support/rest_urllib3.py @@ -4,7 +4,7 @@ import os from typing import Optional from urllib.parse import urlencode, quote -from .configuration import Configuration +from ..config.openapi_configuration import Configuration from .rest_utils import raise_exceptions_or_return, RESTResponse, RestClientInterface import urllib3 diff --git a/pinecone/pinecone.py b/pinecone/pinecone.py new file mode 100644 index 00000000..38462390 --- /dev/null +++ b/pinecone/pinecone.py @@ -0,0 +1,278 @@ +import logging +from typing import Optional, Dict, Union, TYPE_CHECKING +from multiprocessing import cpu_count + +from .legacy_pinecone_interface import LegacyPineconeDBControlInterface + +from pinecone.config import PineconeConfig, ConfigBuilder + +from pinecone.utils import normalize_host, PluginAware, docslinks +from .langchain_import_warnings import _build_langchain_attribute_error_message + +logger = logging.getLogger(__name__) +""" @private """ + +if TYPE_CHECKING: + from pinecone.db_data import ( + _Index as Index, + _Inference as Inference, + _IndexAsyncio as IndexAsyncio, + ) + from pinecone.db_control import DBControl + from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict + from pinecone.db_control.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, + ) + from pinecone.db_control.models import ( + ServerlessSpec, + PodSpec, + IndexModel, + IndexList, + CollectionList, + IndexEmbed, + ) + + +class Pinecone(PluginAware, LegacyPineconeDBControlInterface): + """ + A client for interacting with Pinecone APIs. + """ + + def __init__( + self, + api_key: Optional[str] = None, + host: Optional[str] = None, + proxy_url: Optional[str] = None, + proxy_headers: Optional[Dict[str, str]] = None, + ssl_ca_certs: Optional[str] = None, + ssl_verify: Optional[bool] = None, + additional_headers: Optional[Dict[str, str]] = {}, + pool_threads: Optional[int] = None, + **kwargs, + ): + for deprecated_kwarg in {"config", "openapi_config", "index_api"}: + if deprecated_kwarg in kwargs: + raise NotImplementedError( + f"Passing {deprecated_kwarg} is no longer supported. Please pass individual settings such as proxy_url, proxy_headers, ssl_ca_certs, and ssl_verify directly to the Pinecone constructor as keyword arguments. See the README at {docslinks['README']} for examples." + ) + + self.config = PineconeConfig.build( + api_key=api_key, + host=host, + additional_headers=additional_headers, + proxy_url=proxy_url, + proxy_headers=proxy_headers, + ssl_ca_certs=ssl_ca_certs, + ssl_verify=ssl_verify, + **kwargs, + ) + """ @private """ + + self.openapi_config = ConfigBuilder.build_openapi_config(self.config, **kwargs) + """ @private """ + + if pool_threads is None: + self.pool_threads = 5 * cpu_count() + """ @private """ + else: + self.pool_threads = pool_threads + """ @private """ + + self._inference: Optional["Inference"] = None # Lazy initialization + """ @private """ + + self._db_control: Optional["DBControl"] = None # Lazy initialization + """ @private """ + + super().__init__() # Initialize PluginAware + + @property + def inference(self) -> "Inference": + """ + Inference is a namespace where an instance of the `pinecone.data.features.inference.inference.Inference` class is lazily created and cached. + """ + if self._inference is None: + from pinecone.db_data import _Inference + + self._inference = _Inference(config=self.config, openapi_config=self.openapi_config) + return self._inference + + @property + def db(self) -> "DBControl": + """ + DBControl is a namespace where an instance of the `pinecone.control.db_control.DBControl` class is lazily created and cached. + """ + if self._db_control is None: + from pinecone.db_control import DBControl + + self._db_control = DBControl( + config=self.config, + openapi_config=self.openapi_config, + pool_threads=self.pool_threads, + ) + return self._db_control + + def create_index( + self, + name: str, + spec: Union[Dict, "ServerlessSpec", "PodSpec"], + dimension: Optional[int] = None, + metric: Optional[Union["Metric", str]] = "cosine", + timeout: Optional[int] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = "disabled", + vector_type: Optional[Union["VectorType", str]] = "dense", + tags: Optional[Dict[str, str]] = None, + ) -> "IndexModel": + return self.db.index.create( + name=name, + spec=spec, + dimension=dimension, + metric=metric, + timeout=timeout, + deletion_protection=deletion_protection, + vector_type=vector_type, + tags=tags, + ) + + def create_index_for_model( + self, + name: str, + cloud: Union["CloudProvider", str], + region: Union["AwsRegion", "GcpRegion", "AzureRegion", str], + embed: Union["IndexEmbed", "CreateIndexForModelEmbedTypedDict"], + tags: Optional[Dict[str, str]] = None, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", + timeout: Optional[int] = None, + ) -> "IndexModel": + return self.db.index.create_for_model( + name=name, + cloud=cloud, + region=region, + embed=embed, + tags=tags, + deletion_protection=deletion_protection, + timeout=timeout, + ) + + def delete_index(self, name: str, timeout: Optional[int] = None): + return self.db.index.delete(name=name, timeout=timeout) + + def list_indexes(self) -> "IndexList": + return self.db.index.list() + + def describe_index(self, name: str) -> "IndexModel": + return self.db.index.describe(name=name) + + def has_index(self, name: str) -> bool: + return self.db.index.has(name=name) + + def configure_index( + self, + name: str, + replicas: Optional[int] = None, + pod_type: Optional[Union["PodType", str]] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = None, + tags: Optional[Dict[str, str]] = None, + ): + return self.db.index.configure( + name=name, + replicas=replicas, + pod_type=pod_type, + deletion_protection=deletion_protection, + tags=tags, + ) + + def create_collection(self, name: str, source: str) -> None: + return self.db.collection.create(name=name, source=source) + + def list_collections(self) -> "CollectionList": + return self.db.collection.list() + + def delete_collection(self, name: str) -> None: + return self.db.collection.delete(name=name) + + def describe_collection(self, name: str): + return self.db.collection.describe(name=name) + + @staticmethod + def from_texts(*args, **kwargs): + """@private""" + raise AttributeError(_build_langchain_attribute_error_message("from_texts")) + + @staticmethod + def from_documents(*args, **kwargs): + """@private""" + raise AttributeError(_build_langchain_attribute_error_message("from_documents")) + + def Index(self, name: str = "", host: str = "", **kwargs) -> "Index": + from pinecone.db_data import _Index + + if name == "" and host == "": + raise ValueError("Either name or host must be specified") + + pt = kwargs.pop("pool_threads", None) or self.pool_threads + api_key = self.config.api_key + openapi_config = self.openapi_config + + if host != "": + check_realistic_host(host) + + # Use host url if it is provided + index_host = normalize_host(host) + else: + # Otherwise, get host url from describe_index using the index name + index_host = self.db.index._get_host(name) + + return _Index( + host=index_host, + api_key=api_key, + pool_threads=pt, + openapi_config=openapi_config, + source_tag=self.config.source_tag, + **kwargs, + ) + + def IndexAsyncio(self, host: str, **kwargs) -> "IndexAsyncio": + from pinecone.db_data import _IndexAsyncio + + api_key = self.config.api_key + openapi_config = self.openapi_config + + if host is None or host == "": + raise ValueError("A host must be specified") + + check_realistic_host(host) + index_host = normalize_host(host) + + return _IndexAsyncio( + host=index_host, + api_key=api_key, + openapi_config=openapi_config, + source_tag=self.config.source_tag, + **kwargs, + ) + + +def check_realistic_host(host: str) -> None: + """@private + + Checks whether a user-provided host string seems plausible. + Someone could erroneously pass an index name as the host by + mistake, and if they have done that we'd like to give them a + simple error message as feedback rather than attempting to + call the url and getting a more cryptic DNS resolution error. + """ + + if "." not in host and "localhost" not in host: + raise ValueError( + f"You passed '{host}' as the host but this does not appear to be valid. Call describe_index() to confirm the host of the index." + ) diff --git a/pinecone/control/pinecone_asyncio.py b/pinecone/pinecone_asyncio.py similarity index 51% rename from pinecone/control/pinecone_asyncio.py rename to pinecone/pinecone_asyncio.py index 1373c8e4..3da739f7 100644 --- a/pinecone/control/pinecone_asyncio.py +++ b/pinecone/pinecone_asyncio.py @@ -1,40 +1,36 @@ import logging -import asyncio -from typing import Optional, Dict, Union +from typing import Optional, Dict, Union, TYPE_CHECKING from pinecone.config import PineconeConfig, ConfigBuilder -from pinecone.core.openapi.db_control.api.manage_indexes_api import AsyncioManageIndexesApi -from pinecone.openapi_support import AsyncioApiClient - -from pinecone.utils import normalize_host, setup_async_openapi_client -from pinecone.core.openapi.db_control import API_VERSION -from pinecone.models import ( - ServerlessSpec, - PodSpec, - IndexModel, - IndexList, - CollectionList, - IndexEmbed, -) +from pinecone.utils import normalize_host from pinecone.utils import docslinks -from pinecone.data import _IndexAsyncio, _AsyncioInference -from pinecone.enums import ( - Metric, - VectorType, - DeletionProtection, - PodType, - CloudProvider, - AwsRegion, - GcpRegion, - AzureRegion, -) -from .types import CreateIndexForModelEmbedTypedDict -from .request_factory import PineconeDBControlRequestFactory from .pinecone_interface_asyncio import PineconeAsyncioDBControlInterface from .pinecone import check_realistic_host +if TYPE_CHECKING: + from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict + from pinecone.db_data import _IndexAsyncio + from pinecone.db_control.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, + ) + from pinecone.db_control.models import ( + ServerlessSpec, + PodSpec, + IndexModel, + IndexList, + CollectionList, + IndexEmbed, + ) + logger = logging.getLogger(__name__) """ @private """ @@ -102,13 +98,7 @@ def __init__( self._inference = None # Lazy initialization """ @private """ - self.index_api = setup_async_openapi_client( - api_client_klass=AsyncioApiClient, - api_klass=AsyncioManageIndexesApi, - config=self.config, - openapi_config=self.openapi_config, - api_version=API_VERSION, - ) + self._db_control = None # Lazy initialization """ @private """ async def __aenter__(self): @@ -158,23 +148,39 @@ async def main(): @property def inference(self): - """Dynamically create and cache the Inference instance.""" + """Dynamically create and cache the AsyncioInference instance.""" if self._inference is None: + from pinecone.db_data import _AsyncioInference + self._inference = _AsyncioInference(api_client=self.index_api.api_client) return self._inference + @property + def db(self): + if self._db_control is None: + from .db_control.db_control_asyncio import DBControlAsyncio + + self._db_control = DBControlAsyncio( + config=self.config, + openapi_config=self.openapi_config, + pool_threads=self.pool_threads, + ) + return self._db_control + async def create_index( self, name: str, - spec: Union[Dict, ServerlessSpec, PodSpec], + spec: Union[Dict, "ServerlessSpec", "PodSpec"], dimension: Optional[int] = None, - metric: Optional[Union[Metric, str]] = Metric.COSINE, + metric: Optional[Union["Metric", str]] = "Metric.COSINE", timeout: Optional[int] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", + vector_type: Optional[Union["VectorType", str]] = "VectorType.DENSE", tags: Optional[Dict[str, str]] = None, - ) -> IndexModel: - req = PineconeDBControlRequestFactory.create_index_request( + ) -> "IndexModel": + resp = await self.db.index.create( name=name, spec=spec, dimension=dimension, @@ -182,146 +188,75 @@ async def create_index( deletion_protection=deletion_protection, vector_type=vector_type, tags=tags, + timeout=timeout, ) - resp = await self.index_api.create_index(create_index_request=req) - - if timeout == -1: - return IndexModel(resp) - return await self.__poll_describe_index_until_ready(name, timeout) + return resp async def create_index_for_model( self, name: str, - cloud: Union[CloudProvider, str], - region: Union[AwsRegion, GcpRegion, AzureRegion, str], - embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + cloud: Union["CloudProvider", str], + region: Union["AwsRegion", "GcpRegion", "AzureRegion", str], + embed: Union["IndexEmbed", "CreateIndexForModelEmbedTypedDict"], tags: Optional[Dict[str, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", timeout: Optional[int] = None, - ) -> IndexModel: - req = PineconeDBControlRequestFactory.create_index_for_model_request( + ) -> "IndexModel": + return await self.db.index.create_for_model( name=name, cloud=cloud, region=region, embed=embed, tags=tags, deletion_protection=deletion_protection, + timeout=timeout, ) - resp = await self.index_api.create_index_for_model(req) - - if timeout == -1: - return IndexModel(resp) - return await self.__poll_describe_index_until_ready(name, timeout) - - async def __poll_describe_index_until_ready(self, name: str, timeout: Optional[int] = None): - description = None - - async def is_ready() -> bool: - nonlocal description - description = await self.describe_index(name=name) - return description.status.ready - - total_wait_time = 0 - if timeout is None: - # Wait indefinitely - while not await is_ready(): - logger.debug( - f"Waiting for index {name} to be ready. Total wait time {total_wait_time} seconds." - ) - total_wait_time += 5 - await asyncio.sleep(5) - - else: - # Wait for a maximum of timeout seconds - while not await is_ready(): - if timeout < 0: - logger.error(f"Index {name} is not ready. Timeout reached.") - link = docslinks["API_DESCRIBE_INDEX"] - timeout_msg = ( - f"Please call describe_index() to confirm index status. See docs at {link}" - ) - raise TimeoutError(timeout_msg) - - logger.debug( - f"Waiting for index {name} to be ready. Total wait time: {total_wait_time}" - ) - total_wait_time += 5 - await asyncio.sleep(5) - timeout -= 5 - - return description async def delete_index(self, name: str, timeout: Optional[int] = None): - await self.index_api.delete_index(name) - - if timeout == -1: - return - - if timeout is None: - while await self.has_index(name): - await asyncio.sleep(5) - else: - while await self.has_index(name) and timeout >= 0: - await asyncio.sleep(5) - timeout -= 5 - if timeout and timeout < 0: - raise ( - TimeoutError( - "Please call the list_indexes API ({}) to confirm if index is deleted".format( - "https://www.pinecone.io/docs/api/operation/list_indexes/" - ) - ) - ) + return await self.db.index.delete(name=name, timeout=timeout) - async def list_indexes(self) -> IndexList: - response = await self.index_api.list_indexes() - return IndexList(response) + async def list_indexes(self) -> "IndexList": + return await self.db.index.list() - async def describe_index(self, name: str) -> IndexModel: - description = await self.index_api.describe_index(name) - return IndexModel(description) + async def describe_index(self, name: str) -> "IndexModel": + return await self.db.index.describe(name=name) async def has_index(self, name: str) -> bool: - available_indexes = await self.list_indexes() - if name in available_indexes.names(): - return True - else: - return False + return await self.db.index.has(name=name) async def configure_index( self, name: str, replicas: Optional[int] = None, - pod_type: Optional[Union[PodType, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = None, + pod_type: Optional[Union["PodType", str]] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = None, tags: Optional[Dict[str, str]] = None, ): - description = await self.describe_index(name=name) - - req = PineconeDBControlRequestFactory.configure_index_request( - description=description, + return await self.db.index.configure( + name=name, replicas=replicas, pod_type=pod_type, deletion_protection=deletion_protection, tags=tags, ) - await self.index_api.configure_index(name, configure_index_request=req) async def create_collection(self, name: str, source: str): - req = PineconeDBControlRequestFactory.create_collection_request(name=name, source=source) - await self.index_api.create_collection(create_collection_request=req) + return await self.db.collection.create(name=name, source=source) - async def list_collections(self) -> CollectionList: - response = await self.index_api.list_collections() - return CollectionList(response) + async def list_collections(self) -> "CollectionList": + return await self.db.collection.list() async def delete_collection(self, name: str): - await self.index_api.delete_collection(name) + return await self.db.collection.delete(name=name) async def describe_collection(self, name: str): - return await self.index_api.describe_collection(name).to_dict() + return await self.db.collection.describe(name=name) + + def IndexAsyncio(self, host: str, **kwargs) -> "_IndexAsyncio": + from pinecone.db_data import _IndexAsyncio - def IndexAsyncio(self, host: str, **kwargs) -> _IndexAsyncio: api_key = self.config.api_key openapi_config = self.openapi_config diff --git a/pinecone/control/pinecone_interface_asyncio.py b/pinecone/pinecone_interface_asyncio.py similarity index 95% rename from pinecone/control/pinecone_interface_asyncio.py rename to pinecone/pinecone_interface_asyncio.py index a732bce9..31d1feba 100644 --- a/pinecone/control/pinecone_interface_asyncio.py +++ b/pinecone/pinecone_interface_asyncio.py @@ -1,32 +1,31 @@ from abc import ABC, abstractmethod -from typing import Optional, Dict, Union - - -from pinecone.config import Config - -from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi - - -from pinecone.models import ( - ServerlessSpec, - PodSpec, - IndexList, - CollectionList, - IndexModel, - IndexEmbed, -) -from pinecone.enums import ( - Metric, - VectorType, - DeletionProtection, - PodType, - CloudProvider, - AwsRegion, - GcpRegion, - AzureRegion, -) -from .types import CreateIndexForModelEmbedTypedDict +from typing import Optional, Dict, Union, TYPE_CHECKING + +if TYPE_CHECKING: + from pinecone.config import Config + + from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi + + from pinecone.db_control.models import ( + ServerlessSpec, + PodSpec, + IndexList, + CollectionList, + IndexModel, + IndexEmbed, + ) + from pinecone.db_control.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, + ) + from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict class PineconeAsyncioDBControlInterface(ABC): @@ -39,10 +38,10 @@ def __init__( proxy_headers: Optional[Dict[str, str]] = None, ssl_ca_certs: Optional[str] = None, ssl_verify: Optional[bool] = None, - config: Optional[Config] = None, + config: Optional["Config"] = None, additional_headers: Optional[Dict[str, str]] = {}, pool_threads: Optional[int] = 1, - index_api: Optional[ManageIndexesApi] = None, + index_api: Optional["ManageIndexesApi"] = None, **kwargs, ): """ @@ -291,12 +290,14 @@ async def main(): async def create_index( self, name: str, - spec: Union[Dict, ServerlessSpec, PodSpec], + spec: Union[Dict, "ServerlessSpec", "PodSpec"], dimension: Optional[int], - metric: Optional[Union[Metric, str]] = Metric.COSINE, + metric: Optional[Union["Metric", str]] = "Metric.COSINE", timeout: Optional[int] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", + vector_type: Optional[Union["VectorType", str]] = "VectorType.DENSE", tags: Optional[Dict[str, str]] = None, ): """Creates a Pinecone index. @@ -408,13 +409,15 @@ async def main(): async def create_index_for_model( self, name: str, - cloud: Union[CloudProvider, str], - region: Union[AwsRegion, GcpRegion, AzureRegion, str], - embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + cloud: Union["CloudProvider", str], + region: Union["AwsRegion", "GcpRegion", "AzureRegion", str], + embed: Union["IndexEmbed", "CreateIndexForModelEmbedTypedDict"], tags: Optional[Dict[str, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", timeout: Optional[int] = None, - ) -> IndexModel: + ) -> "IndexModel": """ :param name: The name of the index to create. Must be unique within your project and cannot be changed once created. Allowed characters are lowercase letters, numbers, @@ -533,7 +536,7 @@ async def main(): pass @abstractmethod - async def list_indexes(self) -> IndexList: + async def list_indexes(self) -> "IndexList": """ :return: Returns an `IndexList` object, which is iterable and contains a list of `IndexModel` objects. The `IndexList` also has a convenience method `names()` @@ -574,7 +577,7 @@ async def main(): pass @abstractmethod - async def describe_index(self, name: str) -> IndexModel: + async def describe_index(self, name: str) -> "IndexModel": """ :param name: the name of the index to describe. :return: Returns an `IndexModel` object @@ -669,8 +672,8 @@ async def configure_index( self, name: str, replicas: Optional[int] = None, - pod_type: Optional[Union[PodType, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = None, + pod_type: Optional[Union["PodType", str]] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = None, tags: Optional[Dict[str, str]] = None, ): """ @@ -779,7 +782,7 @@ async def create_collection(self, name: str, source: str): pass @abstractmethod - async def list_collections(self) -> CollectionList: + async def list_collections(self) -> "CollectionList": """List all collections ```python diff --git a/pinecone/utils/docslinks.py b/pinecone/utils/docslinks.py index a86dd1da..cdfe66cd 100644 --- a/pinecone/utils/docslinks.py +++ b/pinecone/utils/docslinks.py @@ -1,10 +1,12 @@ -from pinecone.core.openapi.db_control import API_VERSION +def versioned_url(template: str): + return lambda version: template.format(version) + docslinks = { "README": "https://github.com/pinecone-io/pinecone-python-client/blob/main/README.md", "GITHUB_REPO": "https://github.com/pinecone-io/pinecone-python-client", "LANGCHAIN_IMPORT_KB_ARTICLE": "https://docs.pinecone.io/troubleshooting/pinecone-attribute-errors-with-langchain", - "API_DESCRIBE_INDEX": "https://docs.pinecone.io/reference/api/{}/control-plane/describe_index".format( - API_VERSION + "API_DESCRIBE_INDEX": versioned_url( + "https://docs.pinecone.io/reference/api/{}/control-plane/describe_index" ), } diff --git a/pinecone/utils/find_legacy_imports.py b/pinecone/utils/find_legacy_imports.py new file mode 100755 index 00000000..5421de28 --- /dev/null +++ b/pinecone/utils/find_legacy_imports.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +""" +Script to identify legacy imports that were previously available via star imports. + +This script analyzes the codebase to find all imports that were previously available +via star imports but are no longer imported at the top level. +""" + +import ast +import os +from typing import Set + + +def find_star_imports(file_path: str) -> Set[str]: + """ + Find all star imports in a file. + + Args: + file_path: Path to the file to analyze. + + Returns: + Set of module names that are imported with star imports. + """ + with open(file_path, "r") as f: + content = f.read() + + try: + tree = ast.parse(content) + except SyntaxError: + print(f"Warning: Could not parse {file_path}") + return set() + + star_imports = set() + + for node in ast.walk(tree): + if isinstance(node, ast.ImportFrom) and node.names[0].name == "*": + module_name = node.module + if module_name: + star_imports.add(module_name) + + return star_imports + + +def find_imported_names(file_path: str) -> Set[str]: + """ + Find all names that are imported in a file. + + Args: + file_path: Path to the file to analyze. + + Returns: + Set of imported names. + """ + with open(file_path, "r") as f: + content = f.read() + + try: + tree = ast.parse(content) + except SyntaxError: + print(f"Warning: Could not parse {file_path}") + return set() + + imported_names = set() + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for name in node.names: + imported_names.add(name.name) + elif isinstance(node, ast.ImportFrom): + for name in node.names: + if name.name != "*": + imported_names.add(name.name) + + return imported_names + + +def find_module_exports(module_path: str) -> Set[str]: + """ + Find all names that are exported by a module. + + Args: + module_path: Path to the module to analyze. + + Returns: + Set of exported names. + """ + try: + module = __import__(module_path, fromlist=["*"]) + return set(dir(module)) + except ImportError: + print(f"Warning: Could not import {module_path}") + return set() + + +def main(): + """ + Main function to find legacy imports. + """ + # Get the package root directory + package_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + # Find the __init__.py file + init_file = os.path.join(package_root, "__init__.py") + + # Find star imports in the __init__.py file + star_imports = find_star_imports(init_file) + + # Find all imported names in the __init__.py file + imported_names = find_imported_names(init_file) + + # Find all module exports + module_exports = {} + for module_name in star_imports: + module_exports[module_name] = find_module_exports(module_name) + + # Find all files in the package + package_files = [] + for root, _, files in os.walk(package_root): + for file in files: + if file.endswith(".py") and not file.startswith("__"): + package_files.append(os.path.join(root, file)) + + # Find all imports in the package + package_imports = set() + for file in package_files: + package_imports.update(find_imported_names(file)) + + # Find legacy imports + legacy_imports = {} + for module_name, exports in module_exports.items(): + for export in exports: + if export in package_imports and export not in imported_names: + legacy_imports[f"pinecone.{export}"] = (module_name, export) + + # Print the legacy imports + print("LEGACY_IMPORTS = {") + for legacy_name, (module_path, actual_name) in sorted(legacy_imports.items()): + print(f" '{legacy_name}': ('{module_path}', '{actual_name}'),") + print("}") + + +if __name__ == "__main__": + main() diff --git a/pinecone/utils/lazy_imports.py b/pinecone/utils/lazy_imports.py new file mode 100644 index 00000000..0a55c8f4 --- /dev/null +++ b/pinecone/utils/lazy_imports.py @@ -0,0 +1,76 @@ +""" +Lazy import handler for Pinecone. + +This module provides a way to lazily load imports that were previously +available via star imports but are no longer imported at the top level. +""" + +import importlib +import sys +from types import ModuleType +from typing import Dict, Optional, Tuple, cast + +# Dictionary mapping import names to their actual module paths +# Format: 'name': ('module_path', 'actual_name') +LAZY_IMPORTS: Dict[str, Tuple[str, str]] = { + # Example: 'Vector': ('pinecone.db_data.models', 'Vector') + # Add all your lazy imports here +} + + +class LazyModule: + def __init__(self, original_module, lazy_imports): + self._original_module = original_module + self._lazy_imports = lazy_imports + self._loaded_attrs = {} + + def __dir__(self): + # Get the base directory listing from the original module + base_dir = dir(self._original_module) + + # Add lazy-loaded items + lazy_dir = list(self._lazy_imports.keys()) + + # Return combined list + return sorted(set(base_dir + lazy_dir)) + + def __getattr__(self, name): + # First try the original module + try: + return getattr(self._original_module, name) + except AttributeError: + pass + + # Then try lazy imports + if name in self._lazy_imports: + if name not in self._loaded_attrs: + module_path, item_name = self._lazy_imports[name] + module = importlib.import_module(module_path) + self._loaded_attrs[name] = getattr(module, item_name) + return self._loaded_attrs[name] + + raise AttributeError(f"module '{self._original_module.__name__}' has no attribute '{name}'") + + +def setup_lazy_imports(lazy_imports: Optional[Dict[str, Tuple[str, str]]] = None) -> None: + """ + Set up the lazy import handler. + + Args: + lazy_imports: Optional dictionary of imports to handle lazily. + If None, uses the default LAZY_IMPORTS dictionary. + """ + if lazy_imports is None: + lazy_imports = LAZY_IMPORTS + + # Only proceed if the pinecone module is already loaded + if "pinecone" not in sys.modules: + return + + # Create a proxy for the pinecone module + original_module = sys.modules["pinecone"] + proxy = LazyModule(original_module, lazy_imports) + + # Replace the pinecone module with our proxy + # Use a type cast to satisfy the type checker + sys.modules["pinecone"] = cast(ModuleType, proxy) diff --git a/pinecone/utils/legacy_imports.py b/pinecone/utils/legacy_imports.py new file mode 100644 index 00000000..9013acdd --- /dev/null +++ b/pinecone/utils/legacy_imports.py @@ -0,0 +1,112 @@ +""" +Legacy import handler for Pinecone. + +This module provides a simple way to handle legacy imports that were previously +available via star imports but are no longer imported at the top level. +""" + +import importlib +import sys +from types import ModuleType +from typing import Dict, Optional, Set, Any, Tuple, cast + +# Dictionary mapping legacy import names to their actual module paths +# Format: 'name': ('module_path', 'actual_name') +LEGACY_IMPORTS: Dict[str, Tuple[str, str]] = { + # Example: 'Vector': ('pinecone.db_data.models', 'Vector') + # Add all your legacy imports here +} + + +class LegacyImportProxy: + """ + A proxy module that handles legacy imports with warnings. + + This class is used to replace the pinecone module in sys.modules + to handle legacy imports that were previously available via star imports. + """ + + def __init__(self, original_module: Any, legacy_imports: Dict[str, Tuple[str, str]]): + """ + Initialize the proxy module. + + Args: + original_module: The original module to proxy. + legacy_imports: Dictionary of legacy imports to handle. + """ + self._original_module = original_module + self._legacy_imports = legacy_imports + self._warned_imports: Set[str] = set() + self._loaded_modules: Dict[str, Any] = {} + + def __getattr__(self, name: str) -> Any: + """ + Handle attribute access for legacy imports. + + Args: + name: The name of the attribute being accessed. + + Returns: + The requested attribute. + + Raises: + AttributeError: If the attribute cannot be found. + """ + # First, try to get the attribute from the original module + try: + return getattr(self._original_module, name) + except AttributeError: + pass + + # Check if this is a legacy import + if name in self._legacy_imports: + module_path, actual_name = self._legacy_imports[name] + + # Only warn once per import + # if name not in self._warned_imports: + # warnings.warn( + # f"Importing '{name}' directly from 'pinecone' is deprecated. " + # f"Please import it from '{module_path}' instead. " + # f"This import will be removed in a future version.", + # DeprecationWarning, + # stacklevel=2 + # ) + # self._warned_imports.add(name) + + # Load the module if not already loaded + if module_path not in self._loaded_modules: + try: + self._loaded_modules[module_path] = importlib.import_module(module_path) + except ImportError: + raise AttributeError(f"module 'pinecone' has no attribute '{name}'") + + # Get the actual object + module = self._loaded_modules[module_path] + if hasattr(module, actual_name): + return getattr(module, actual_name) + + raise AttributeError(f"module 'pinecone' has no attribute '{name}'") + + +def setup_legacy_imports(legacy_imports: Optional[Dict[str, Tuple[str, str]]] = None) -> None: + """ + Set up the legacy import handler. + + Args: + legacy_imports: Optional dictionary of legacy imports to handle. + If None, uses the default LEGACY_IMPORTS dictionary. + """ + if legacy_imports is None: + legacy_imports = LEGACY_IMPORTS + + # Only proceed if the pinecone module is already loaded + if "pinecone" not in sys.modules: + return + + # Create a proxy for the pinecone module + original_module = sys.modules["pinecone"] + proxy = LegacyImportProxy(original_module, legacy_imports) + + # Replace the pinecone module with our proxy + # Use a type cast to satisfy the type checker + sys.modules["pinecone"] = cast(ModuleType, proxy) diff --git a/pinecone/utils/plugin_aware.py b/pinecone/utils/plugin_aware.py index ce1e4b87..92093fcb 100644 --- a/pinecone/utils/plugin_aware.py +++ b/pinecone/utils/plugin_aware.py @@ -1,7 +1,7 @@ +from typing import Any from .setup_openapi_client import build_plugin_setup_client from pinecone.config import Config -from pinecone.openapi_support.configuration import Configuration as OpenApiConfig - +from pinecone.config.openapi_configuration import Configuration as OpenApiConfig from pinecone_plugin_interface import load_and_install as install_plugins import logging @@ -11,17 +11,112 @@ class PluginAware: + """ + Base class for classes that support plugin loading. + + This class provides functionality to lazily load plugins when they are first accessed. + Subclasses must set the following attributes before calling super().__init__(): + - config: Config + - openapi_config: OpenApiConfig + - pool_threads: int + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + """ + Initialize the PluginAware class. + + Args: + *args: Variable length argument list. + **kwargs: Arbitrary keyword arguments. + + Raises: + AttributeError: If required attributes are not set in the subclass. + """ + logger.debug("PluginAware __init__ called for %s", self.__class__.__name__) + + self._plugins_loaded = False + """ @private """ + + # Check for required attributes after super().__init__ has been called + missing_attrs = [] + if not hasattr(self, "config"): + missing_attrs.append("config") + if not hasattr(self, "openapi_config"): + missing_attrs.append("openapi_config") + if not hasattr(self, "pool_threads"): + missing_attrs.append("pool_threads") + + if missing_attrs: + raise AttributeError( + f"PluginAware class requires the following attributes: {', '.join(missing_attrs)}. " + f"These must be set in the {self.__class__.__name__} class's __init__ method " + f"before calling super().__init__()." + ) + + def __getattr__(self, name: str) -> Any: + """ + Called when an attribute is not found through the normal lookup process. + This allows for lazy loading of plugins when they are first accessed. + + Args: + name: The name of the attribute being accessed. + + Returns: + The requested attribute. + + Raises: + AttributeError: If the attribute cannot be found after loading plugins. + """ + # Check if this is one of the required attributes that should be set by subclasses + required_attrs = ["config", "openapi_config", "pool_threads"] + if name in required_attrs: + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute '{name}'. " + f"This attribute must be set in the subclass's __init__ method " + f"before calling super().__init__()." + ) + + if not self._plugins_loaded: + logger.debug("Loading plugins for %s", self.__class__.__name__) + # Use object.__getattribute__ to avoid triggering __getattr__ again + try: + config = object.__getattribute__(self, "config") + openapi_config = object.__getattribute__(self, "openapi_config") + pool_threads = object.__getattribute__(self, "pool_threads") + self.load_plugins( + config=config, openapi_config=openapi_config, pool_threads=pool_threads + ) + self._plugins_loaded = True + try: + return object.__getattribute__(self, name) + except AttributeError: + pass + except AttributeError: + # If we can't get the required attributes, we can't load plugins + pass + + raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") + def load_plugins( self, config: Config, openapi_config: OpenApiConfig, pool_threads: int ) -> None: - """@private""" + """ + Load plugins for the parent class. + + Args: + config: The Pinecone configuration. + openapi_config: The OpenAPI configuration. + pool_threads: The number of threads in the pool. + """ try: - # I don't expect this to ever throw, but wrapping this in a - # try block just in case to make sure a bad plugin doesn't - # halt client initialization. + # Build the OpenAPI client for plugin setup openapi_client_builder = build_plugin_setup_client( config=config, openapi_config=openapi_config, pool_threads=pool_threads ) + # Install plugins install_plugins(self, openapi_client_builder) + logger.debug("Plugins loaded successfully for %s", self.__class__.__name__) + except ImportError as e: + logger.warning("Failed to import plugin module: %s", e) except Exception as e: - logger.error(f"Error loading plugins: {e}") + logger.error("Error loading plugins: %s", e, exc_info=True) diff --git a/poetry.lock b/poetry.lock index 427dc1e2..e923876f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1001,13 +1001,13 @@ files = [ [[package]] name = "packaging" -version = "23.2" +version = "24.2" description = "Core utilities for Python packages" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, - {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] [[package]] @@ -1130,6 +1130,21 @@ pygments = ">=2.12.0" [package.extras] dev = ["hypothesis", "mypy", "pdoc-pyo3-sample-library (==1.0.11)", "pygments (>=2.14.0)", "pytest", "pytest-cov", "pytest-timeout", "ruff", "tox", "types-pygments"] +[[package]] +name = "pinecone-plugin-assistant" +version = "1.6.0" +description = "Assistant plugin for Pinecone SDK" +optional = false +python-versions = "<4.0,>=3.9" +files = [ + {file = "pinecone_plugin_assistant-1.6.0-py3-none-any.whl", hash = "sha256:d742273d136fba66d020f1af01af2c6bfbc802f7ff9ddf46c590b7ea26932175"}, + {file = "pinecone_plugin_assistant-1.6.0.tar.gz", hash = "sha256:b7c531743f87269ba567dd6084b1464b62636a011564d414bc53147571b2f2c1"}, +] + +[package.dependencies] +packaging = ">=24.2,<25.0" +requests = ">=2.32.3,<3.0.0" + [[package]] name = "pinecone-plugin-interface" version = "0.0.7" @@ -1332,6 +1347,29 @@ files = [ googleapis-common-protos = "*" protobuf = ">=4.21.0" +[[package]] +name = "psutil" +version = "7.0.0" +description = "Cross-platform lib for process and system monitoring in Python. NOTE: the syntax of this script MUST be kept compatible with Python 2.7." +optional = false +python-versions = ">=3.6" +files = [ + {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"}, + {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993"}, + {file = "psutil-7.0.0-cp36-cp36m-win32.whl", hash = "sha256:84df4eb63e16849689f76b1ffcb36db7b8de703d1bc1fe41773db487621b6c17"}, + {file = "psutil-7.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:1e744154a6580bc968a0195fd25e80432d3afec619daf145b9e5ba16cc1d688e"}, + {file = "psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99"}, + {file = "psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553"}, + {file = "psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456"}, +] + +[package.extras] +dev = ["abi3audit", "black (==24.10.0)", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest", "pytest-cov", "pytest-xdist", "requests", "rstcheck", "ruff", "setuptools", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "vulture", "wheel"] +test = ["pytest", "pytest-xdist", "setuptools"] + [[package]] name = "py-cpuinfo" version = "9.0.0" @@ -1668,6 +1706,17 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "tuna" +version = "0.5.11" +description = "Visualize Python performance profiles" +optional = false +python-versions = ">=3.6" +files = [ + {file = "tuna-0.5.11-py3-none-any.whl", hash = "sha256:ab352a6d836014ace585ecd882148f1f7c68be9ea4bf9e9298b7127594dab2ef"}, + {file = "tuna-0.5.11.tar.gz", hash = "sha256:d47f3e39e80af961c8df016ac97d1643c3c60b5eb451299da0ab5fe411d8866c"}, +] + [[package]] name = "types-protobuf" version = "4.24.0.4" @@ -1796,6 +1845,20 @@ platformdirs = ">=3.9.1,<5" docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] +[[package]] +name = "vprof" +version = "0.38" +description = "Visual profiler for Python" +optional = false +python-versions = "*" +files = [ + {file = "vprof-0.38-py3-none-any.whl", hash = "sha256:91b91d8868176c29e0fe3426c9239d11cd192c7144c7baf26a211e48923a5ee8"}, + {file = "vprof-0.38.tar.gz", hash = "sha256:7f1000912eeb7a450c7c94d3cc96739af45ad0ff01d5abcc0b09a175d40ffadb"}, +] + +[package.dependencies] +psutil = ">=3" + [[package]] name = "yarl" version = "1.17.2" @@ -1899,4 +1962,4 @@ grpc = ["googleapis-common-protos", "grpcio", "grpcio", "grpcio", "lz4", "protob [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "8a10046c5826a9773836e6b3ee50271bb0077d0faf32d709f1e65c4bb1fc53ea" +content-hash = "33aa755910ac34e4443a3e03a180ac1ece72735367f9c53d76908ca95ea2fd48" diff --git a/pyproject.toml b/pyproject.toml index 0525d08d..7a1b9a0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,9 @@ urllib3_mock = "0.3.3" responses = ">=0.8.1" ruff = "^0.9.3" beautifulsoup4 = "^4.13.3" +pinecone-plugin-assistant = "^1.6.0" +vprof = "^0.38" +tuna = "^0.5.11" [tool.poetry.extras] @@ -152,7 +155,7 @@ docstring-code-line-length = "dynamic" [tool.ruff.lint.per-file-ignores] # F403 Allow star imports # F401 allow imported but unused -"__init__.py" = ["F401", "F403"] +"__init__.py" = ["F401", "F403", "F405"] # E402 Allow module level import not at top of file so # tqdm warnings can be disabled ahead of loading any code diff --git a/tests/integration/data/test_query_namespaces_sparse.py b/tests/integration/data/test_query_namespaces_sparse.py index 607798ea..958368b5 100644 --- a/tests/integration/data/test_query_namespaces_sparse.py +++ b/tests/integration/data/test_query_namespaces_sparse.py @@ -1,6 +1,6 @@ import pytest from ..helpers import random_string, poll_stats_for_namespace -from pinecone.data.query_results_aggregator import QueryResultsAggregatorInvalidTopKError +from pinecone.db_data.query_results_aggregator import QueryResultsAggregatorInvalidTopKError from pinecone import Vector, SparseValues diff --git a/tests/integration/data/test_search_and_upsert_records.py b/tests/integration/data/test_search_and_upsert_records.py index e83a5cd8..0a269a49 100644 --- a/tests/integration/data/test_search_and_upsert_records.py +++ b/tests/integration/data/test_search_and_upsert_records.py @@ -6,7 +6,7 @@ import os from pinecone import RerankModel, PineconeApiException -from pinecone.data import _Index +from pinecone.db_data import _Index logger = logging.getLogger(__name__) diff --git a/tests/integration/data/test_upsert_from_dataframe.py b/tests/integration/data/test_upsert_from_dataframe.py index 49bc9abc..4534bc4f 100644 --- a/tests/integration/data/test_upsert_from_dataframe.py +++ b/tests/integration/data/test_upsert_from_dataframe.py @@ -1,5 +1,5 @@ import pandas as pd -from pinecone.data import _Index +from pinecone.db_data import _Index from ..helpers import embedding_values, random_string diff --git a/tests/integration/data_asyncio/conftest.py b/tests/integration/data_asyncio/conftest.py index 6401e073..9769a5e9 100644 --- a/tests/integration/data_asyncio/conftest.py +++ b/tests/integration/data_asyncio/conftest.py @@ -2,7 +2,7 @@ import json import asyncio from ..helpers import get_environment_var, generate_index_name -from pinecone.data import _IndexAsyncio +from pinecone.db_data import _IndexAsyncio import logging from typing import Callable, Optional, Awaitable, Union diff --git a/tests/integration/helpers/helpers.py b/tests/integration/helpers/helpers.py index 480585e5..6688f288 100644 --- a/tests/integration/helpers/helpers.py +++ b/tests/integration/helpers/helpers.py @@ -7,7 +7,7 @@ from typing import Any from datetime import datetime import json -from pinecone.data import _Index +from pinecone.db_data import _Index from typing import List logger = logging.getLogger(__name__) diff --git a/tests/perf/test_query_results_aggregator.py b/tests/perf/test_query_results_aggregator.py index 29ac4c35..9f33c149 100644 --- a/tests/perf/test_query_results_aggregator.py +++ b/tests/perf/test_query_results_aggregator.py @@ -1,5 +1,5 @@ import random -from pinecone.data.query_results_aggregator import QueryResultsAggregator +from pinecone.db_data.query_results_aggregator import QueryResultsAggregator def fake_results(i): diff --git a/tests/unit/data/test_bulk_import.py b/tests/unit/data/test_bulk_import.py index b1bcd4cc..c7ad5a14 100644 --- a/tests/unit/data/test_bulk_import.py +++ b/tests/unit/data/test_bulk_import.py @@ -6,7 +6,7 @@ ImportErrorMode as ImportErrorModeGeneratedClass, ) -from pinecone.data.features.bulk_import import ImportFeatureMixin, ImportErrorMode +from pinecone.db_data.features.bulk_import import ImportFeatureMixin, ImportErrorMode def build_client_w_faked_response(mocker, body: str, status: int = 200): diff --git a/tests/unit/data/test_request_factory.py b/tests/unit/data/test_request_factory.py index 087436c9..ea04acdf 100644 --- a/tests/unit/data/test_request_factory.py +++ b/tests/unit/data/test_request_factory.py @@ -1,5 +1,5 @@ import pytest -from pinecone.data.request_factory import ( +from pinecone.db_data.request_factory import ( IndexRequestFactory, SearchQuery, SearchQueryVector, diff --git a/tests/unit/data/test_vector_factory.py b/tests/unit/data/test_vector_factory.py index 52fd1eac..adeeaf9c 100644 --- a/tests/unit/data/test_vector_factory.py +++ b/tests/unit/data/test_vector_factory.py @@ -2,7 +2,7 @@ import pandas as pd import pytest -from pinecone.data.vector_factory import VectorFactory +from pinecone.db_data.vector_factory import VectorFactory from pinecone import Vector, SparseValues, ListConversionException from pinecone.core.openapi.db_data.models import ( Vector as OpenApiVector, diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index f33519b6..1da981ad 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -1,7 +1,7 @@ from pinecone import Pinecone from pinecone.exceptions.exceptions import PineconeConfigurationError from pinecone.config import PineconeConfig -from pinecone.openapi_support.configuration import Configuration as OpenApiConfiguration +from pinecone.config.openapi_configuration import Configuration as OpenApiConfiguration import pytest import os @@ -103,7 +103,11 @@ def test_config_pool_threads(self): pc = Pinecone( api_key="test-api-key", host="test-controller-host.pinecone.io", pool_threads=10 ) - assert pc.index_api.api_client.pool_threads == 10 + # DBControl object is created lazily, so we need to access this property + # to trigger the setup so we can inspect the config + assert pc.db is not None + + assert pc.db.index_api.api_client.pool_threads == 10 idx = pc.Index(host="my-index-host.pinecone.io", name="my-index-name") assert idx._vector_api.api_client.pool_threads == 10 @@ -146,5 +150,9 @@ def test_proxy_config(self): assert pc.openapi_config.proxy == "http://localhost:8080" assert pc.openapi_config.ssl_ca_cert == "path/to/cert-bundle.pem" - assert pc.index_api.api_client.configuration.proxy == "http://localhost:8080" - assert pc.index_api.api_client.configuration.ssl_ca_cert == "path/to/cert-bundle.pem" + # DBControl object is created lazily, so we need to access this property + # to trigger the setup so we can inspect the config + assert pc.db is not None + + assert pc.db.index_api.api_client.configuration.proxy == "http://localhost:8080" + assert pc.db.index_api.api_client.configuration.ssl_ca_cert == "path/to/cert-bundle.pem" diff --git a/tests/unit/test_config_builder.py b/tests/unit/test_config_builder.py index 3122c080..7307f153 100644 --- a/tests/unit/test_config_builder.py +++ b/tests/unit/test_config_builder.py @@ -1,6 +1,6 @@ import pytest -from pinecone.openapi_support.configuration import Configuration as OpenApiConfiguration +from pinecone.config.openapi_configuration import Configuration as OpenApiConfiguration from pinecone.config import ConfigBuilder from pinecone import PineconeConfigurationError diff --git a/tests/unit/test_control.py b/tests/unit/test_control.py index c0b909dd..da252063 100644 --- a/tests/unit/test_control.py +++ b/tests/unit/test_control.py @@ -77,44 +77,48 @@ def index_list_response(): class TestControl: - def test_plugins_are_installed(self): + def test_plugins_are_lazily_loaded(self): with patch.object(PluginAware, "load_plugins") as mock_install_plugins: - Pinecone(api_key="asdf") + pc = Pinecone(api_key="asdf") + mock_install_plugins.assert_not_called() + with pytest.raises(AttributeError): + pc.foo() # Accessing a non-existent attribute should raise an AttributeError after PluginAware installs any applicable plugins mock_install_plugins.assert_called_once() def test_default_host(self): p = Pinecone(api_key="123-456-789") - assert p.index_api.api_client.configuration.host == "https://api.pinecone.io" + assert p.db.index_api.api_client.configuration.host == "https://api.pinecone.io" def test_passing_host(self): p = Pinecone(api_key="123-456-789", host="my-host.pinecone.io") - assert p.index_api.api_client.configuration.host == "https://my-host.pinecone.io" + assert p.db.index_api.api_client.configuration.host == "https://my-host.pinecone.io" def test_passing_additional_headers(self): extras = {"header1": "my-value", "header2": "my-value2"} p = Pinecone(api_key="123-456-789", additional_headers=extras) for key, value in extras.items(): - assert p.index_api.api_client.default_headers[key] == value - assert "User-Agent" in p.index_api.api_client.default_headers - assert "X-Pinecone-API-Version" in p.index_api.api_client.default_headers - assert "header1" in p.index_api.api_client.default_headers - assert "header2" in p.index_api.api_client.default_headers - assert len(p.index_api.api_client.default_headers) == 4 + assert p.db.index_api.api_client.default_headers[key] == value + assert "User-Agent" in p.db.index_api.api_client.default_headers + assert "X-Pinecone-API-Version" in p.db.index_api.api_client.default_headers + assert "header1" in p.db.index_api.api_client.default_headers + assert "header2" in p.db.index_api.api_client.default_headers + assert len(p.db.index_api.api_client.default_headers) == 4 def test_overwrite_useragent(self): # This doesn't seem like a common use case, but we may want to allow this # when embedding the client in other pinecone tools such as canopy. extras = {"User-Agent": "test-user-agent"} p = Pinecone(api_key="123-456-789", additional_headers=extras) - assert "X-Pinecone-API-Version" in p.index_api.api_client.default_headers - assert p.index_api.api_client.default_headers["User-Agent"] == "test-user-agent" - assert len(p.index_api.api_client.default_headers) == 2 + assert "X-Pinecone-API-Version" in p.db.index_api.api_client.default_headers + assert p.db.index_api.api_client.default_headers["User-Agent"] == "test-user-agent" + assert len(p.db.index_api.api_client.default_headers) == 2 def test_set_source_tag_in_useragent(self): p = Pinecone(api_key="123-456-789", source_tag="test_source_tag") assert ( - re.search(r"source_tag=test_source_tag", p.index_api.api_client.user_agent) is not None + re.search(r"source_tag=test_source_tag", p.db.index_api.api_client.user_agent) + is not None ) @pytest.mark.parametrize( @@ -146,8 +150,8 @@ def test_create_index_with_timeout( expected_sleep_calls, ): p = Pinecone(api_key="123-456-789") - mocker.patch.object(p.index_api, "describe_index", side_effect=describe_index_responses) - mocker.patch.object(p.index_api, "create_index") + mocker.patch.object(p.db.index_api, "describe_index", side_effect=describe_index_responses) + mocker.patch.object(p.db.index_api, "create_index") mocker.patch("time.sleep") p.create_index( @@ -157,8 +161,8 @@ def test_create_index_with_timeout( timeout=timeout_value, ) - assert p.index_api.create_index.call_count == 1 - assert p.index_api.describe_index.call_count == expected_describe_index_calls + assert p.db.index_api.create_index.call_count == 1 + assert p.db.index_api.describe_index.call_count == expected_describe_index_calls assert time.sleep.call_count == expected_sleep_calls @pytest.mark.parametrize( @@ -207,7 +211,7 @@ def test_create_index_with_spec_dictionary(self, mocker, index_spec): p = Pinecone(api_key="123-456-789") mock_api = MagicMock() - mocker.patch.object(p, "index_api", mock_api) + mocker.patch.object(p.db, "index_api", mock_api) p.create_index(name="my-index", dimension=10, spec=index_spec) @@ -242,8 +246,8 @@ def test_create_index_from_source_collection( expected_sleep_calls, ): p = Pinecone(api_key="123-456-789") - mocker.patch.object(p.index_api, "describe_index", side_effect=describe_index_responses) - mocker.patch.object(p.index_api, "create_index") + mocker.patch.object(p.db.index_api, "describe_index", side_effect=describe_index_responses) + mocker.patch.object(p.db.index_api, "create_index") mocker.patch("time.sleep") p.create_index( @@ -253,17 +257,19 @@ def test_create_index_from_source_collection( timeout=timeout_value, ) - assert p.index_api.create_index.call_count == 1 - assert p.index_api.describe_index.call_count == expected_describe_index_calls + assert p.db.index_api.create_index.call_count == 1 + assert p.db.index_api.describe_index.call_count == expected_describe_index_calls assert time.sleep.call_count == expected_sleep_calls def test_create_index_when_timeout_exceeded(self, mocker): with pytest.raises(TimeoutError): p = Pinecone(api_key="123-456-789") - mocker.patch.object(p.index_api, "create_index") + mocker.patch.object(p.db.index_api, "create_index") describe_index_response = [description_with_status(False)] * 5 - mocker.patch.object(p.index_api, "describe_index", side_effect=describe_index_response) + mocker.patch.object( + p.db.index_api, "describe_index", side_effect=describe_index_response + ) mocker.patch("time.sleep") p.create_index( @@ -273,7 +279,7 @@ def test_create_index_when_timeout_exceeded(self, mocker): def test_list_indexes_returns_iterable(self, mocker, index_list_response): p = Pinecone(api_key="123-456-789") - mocker.patch.object(p.index_api, "list_indexes", side_effect=[index_list_response]) + mocker.patch.object(p.db.index_api, "list_indexes", side_effect=[index_list_response]) response = p.list_indexes() assert [i.name for i in response] == ["index1", "index2", "index3"] diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 72ed7422..6e880016 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,7 +1,7 @@ import pandas as pd import pytest -from pinecone.data import _Index +from pinecone.db_data import _Index import pinecone.core.openapi.db_data.models as oai from pinecone import QueryResponse, UpsertResponse, Vector diff --git a/tests/unit/test_index_initialization.py b/tests/unit/test_index_initialization.py index 3d10d636..29928fbc 100644 --- a/tests/unit/test_index_initialization.py +++ b/tests/unit/test_index_initialization.py @@ -51,5 +51,6 @@ def test_overwrite_useragent(self): def test_set_source_tag(self): pc = Pinecone(api_key="123-456-789", source_tag="test_source_tag") assert ( - re.search(r"source_tag=test_source_tag", pc.index_api.api_client.user_agent) is not None + re.search(r"source_tag=test_source_tag", pc.db.index_api.api_client.user_agent) + is not None ) diff --git a/tests/unit/test_plugin_aware.py b/tests/unit/test_plugin_aware.py new file mode 100644 index 00000000..315bd225 --- /dev/null +++ b/tests/unit/test_plugin_aware.py @@ -0,0 +1,48 @@ +import pytest +from pinecone.utils.plugin_aware import PluginAware +from pinecone.config import Config, OpenApiConfiguration + + +class TestPluginAware: + def test_errors_when_required_attributes_are_missing(self): + class Foo(PluginAware): + def __init__(self): + # does not set config, openapi_config, or pool_threads + super().__init__() + + with pytest.raises(AttributeError) as e: + Foo() + + assert "config" in str(e.value) + assert "openapi_config" in str(e.value) + assert "pool_threads" in str(e.value) + + def test_correctly_raise_attribute_errors(self): + class Foo(PluginAware): + def __init__(self): + self.config = Config() + self.openapi_config = OpenApiConfiguration() + self.pool_threads = 1 + + super().__init__() + + foo = Foo() + + with pytest.raises(AttributeError) as e: + foo.bar() + + assert "bar" in str(e.value) + + def test_plugins_are_lazily_loaded(self): + class Pinecone(PluginAware): + def __init__(self): + self.config = Config() + self.openapi_config = OpenApiConfiguration() + self.pool_threads = 10 + + super().__init__() + + pc = Pinecone() + assert "assistant" not in dir(pc) + + assert pc.assistant is not None diff --git a/tests/unit/test_query_results_aggregator.py b/tests/unit/test_query_results_aggregator.py index b40a11d2..d3c97f87 100644 --- a/tests/unit/test_query_results_aggregator.py +++ b/tests/unit/test_query_results_aggregator.py @@ -1,4 +1,4 @@ -from pinecone.data.query_results_aggregator import ( +from pinecone.db_data.query_results_aggregator import ( QueryResultsAggregator, QueryResultsAggregatorInvalidTopKError, ) diff --git a/tests/unit/utils/test_docs_links.py b/tests/unit/utils/test_docs_links.py index 478ba3b2..c1d01b21 100644 --- a/tests/unit/utils/test_docs_links.py +++ b/tests/unit/utils/test_docs_links.py @@ -1,11 +1,17 @@ import pytest import requests from pinecone.utils import docslinks +from pinecone import __version__ urls = list(docslinks.values()) @pytest.mark.parametrize("url", urls) def test_valid_links(url): - response = requests.get(url) - assert response.status_code == 200, f"Docs link is invalid: {url}" + if isinstance(url, str): + response = requests.get(url) + assert response.status_code == 200, f"Docs link is invalid: {url}" + else: + versioned_url = url(__version__) + response = requests.get(versioned_url) + assert response.status_code == 200, f"Docs link is invalid: {versioned_url}" diff --git a/tests/upgrade/test_all.py b/tests/upgrade/test_all.py new file mode 100644 index 00000000..acabf620 --- /dev/null +++ b/tests/upgrade/test_all.py @@ -0,0 +1,28 @@ +class TestAll: + def test_all_is_complete(self): + """Test that __all__ is complete and accurate.""" + # Import the module + import pinecone + + # Get all public names (those that don't start with _) + public_names = {name for name in dir(pinecone) if not name.startswith("_")} + + # Get __all__ if it exists, otherwise empty set + all_names = set(getattr(pinecone, "__all__", [])) + + # Check that __all__ exists + assert hasattr(pinecone, "__all__"), "Module should have __all__ defined" + + # Check that all names in __all__ are actually importable + for name in all_names: + assert getattr(pinecone, name) is not None, f"Name {name} in __all__ is not importable" + + # Check that all public names are in __all__ + missing_from_all = public_names - all_names + for name in missing_from_all: + print(f"Public name {name} is not in __all__") + assert not missing_from_all, f"Public names not in __all__: {missing_from_all}" + + # Check that __all__ doesn't contain any private names + private_in_all = {name for name in all_names if name.startswith("_")} + assert not private_in_all, f"Private names in __all__: {private_in_all}" diff --git a/tests/upgrade/test_reorganization.py b/tests/upgrade/test_reorganization.py new file mode 100644 index 00000000..331681b7 --- /dev/null +++ b/tests/upgrade/test_reorganization.py @@ -0,0 +1,19 @@ +import pytest + + +class TestReorganization: + def test_data(self): + with pytest.warns(DeprecationWarning) as warning_info: + from pinecone.data import Index + + assert Index is not None + assert len(warning_info) > 0 + assert "has moved to" in str(warning_info[0].message) + + def test_config(self): + with pytest.warns(DeprecationWarning) as warning_info: + from pinecone.config import PineconeConfig + + assert PineconeConfig is not None + assert len(warning_info) > 0 + assert "has moved to" in str(warning_info[0].message) diff --git a/tests/upgrade/test_v6_upgrade.py b/tests/upgrade/test_v6_upgrade.py new file mode 100644 index 00000000..6532f65f --- /dev/null +++ b/tests/upgrade/test_v6_upgrade.py @@ -0,0 +1,263 @@ +import pinecone +import logging + +logger = logging.getLogger(__name__) + + +class TestExpectedImports_UpgradeFromV6: + def test_mapped_data_imports(self): + data_imports = [ + "Vector", + "QueryRequest", + "FetchResponse", + "DeleteRequest", + "DescribeIndexStatsRequest", + "DescribeIndexStatsResponse", + "RpcStatus", + "ScoredVector", + "ServiceException", + "SingleQueryResults", + "QueryResponse", + "RerankModel", + "SearchQuery", + "SearchQueryVector", + "SearchRerank", + "UpsertResponse", + "UpdateRequest", + ] + + control_imports = [ + "CollectionDescription", + "CollectionList", + "ServerlessSpec", + "ServerlessSpecDefinition", + "PodSpec", + "PodSpecDefinition", + # 'ForbiddenException', + # 'ImportErrorMode', + # 'Index', + "IndexList", + "IndexModel", + # 'ListConversionException', + # 'MetadataDictionaryExpectedError', + # 'NotFoundException', + ] + + config_imports = [ + "Config", + "ConfigBuilder", + "PineconeConfig", + "PineconeConfigurationError", + "PineconeException", + "PineconeProtocolError", + "PineconeApiAttributeError", + "PineconeApiException", + ] + + exception_imports = [ + "PineconeConfigurationError", + "PineconeProtocolError", + "PineconeException", + "PineconeApiAttributeError", + "PineconeApiTypeError", + "PineconeApiValueError", + "PineconeApiKeyError", + "PineconeApiException", + "NotFoundException", + "UnauthorizedException", + "ForbiddenException", + "ServiceException", + "ListConversionException", + ] + mapped_imports = data_imports + control_imports + config_imports + exception_imports + + for import_name in mapped_imports: + assert hasattr(pinecone, import_name), f"Import {import_name} not found in pinecone" + + def test_v6_upgrade_root_imports(self): + v6_dir_items = [ + "CollectionDescription", + "CollectionList", + "Config", + "ConfigBuilder", + "DeleteRequest", + "DescribeIndexStatsRequest", + "DescribeIndexStatsResponse", + "FetchResponse", + "ForbiddenException", + "ImportErrorMode", + "Index", + "IndexList", + "IndexModel", + "ListConversionException", + "MetadataDictionaryExpectedError", + "NotFoundException", + "Pinecone", + "PineconeApiAttributeError", + "PineconeApiException", + "PineconeApiKeyError", + "PineconeApiTypeError", + "PineconeApiValueError", + "PineconeConfig", + "PineconeConfigurationError", + "PineconeException", + "PineconeProtocolError", + "PodSpec", + "PodSpecDefinition", + "QueryRequest", + "QueryResponse", + "RpcStatus", + "ScoredVector", + "ServerlessSpec", + "ServerlessSpecDefinition", + "ServiceException", + "SingleQueryResults", + "SparseValues", + "SparseValuesDictionaryExpectedError", + "SparseValuesMissingKeysError", + "SparseValuesTypeError", + "TqdmExperimentalWarning", + "UnauthorizedException", + "UpdateRequest", + "UpsertRequest", + "UpsertResponse", + "Vector", + "VectorDictionaryExcessKeysError", + "VectorDictionaryMissingKeysError", + "VectorTupleLengthError", + "__builtins__", + "__cached__", + "__doc__", + "__file__", + "__loader__", + "__name__", + "__package__", + "__path__", + "__spec__", + "__version__", + "config", + "configure_index", + "control", + "core", + "core_ea", + "create_collection", + "create_index", + "data", + "delete_collection", + "delete_index", + "deprecation_warnings", + "describe_collection", + "describe_index", + "errors", + "exceptions", + "features", + "index", + "index_host_store", + "init", + "install_repr_overrides", + "langchain_import_warnings", + "list_collections", + "list_indexes", + "logging", + "models", + "openapi", + "os", + "pinecone", + "pinecone_config", + "repr_overrides", + "scale_index", + "sparse_vector_factory", + "utils", + "vector_factory", + "warnings", + ] + + intentionally_removed_items = ["os"] + + expected_items = [item for item in v6_dir_items if item not in intentionally_removed_items] + + missing_items = [] + for item in expected_items: + if not hasattr(pinecone, item): + missing_items.append(item) + logger.debug(f"Exported: ❌ {item}") + else: + logger.debug(f"Exported: ✅ {item}") + + extra_items = [] + for item in intentionally_removed_items: + if hasattr(pinecone, item): + extra_items.append(item) + logger.debug(f"Removed: ❌ {item}") + else: + logger.debug(f"Removed: ✅ {item}") + + assert len(missing_items) == 0, f"Missing items: {missing_items}" + assert len(extra_items) == 0, f"Extra items: {extra_items}" + + # def test_v6_upgrade_data_imports(self): + # v6_data_dir_items = [ + # "DescribeIndexStatsResponse", + # "EmbedModel", + # "FetchResponse", + # "ImportErrorMode", + # "Index", + # "IndexClientInstantiationError", + # "Inference", + # "InferenceInstantiationError", + # "MetadataDictionaryExpectedError", + # "QueryResponse", + # "RerankModel", + # "SearchQuery", + # "SearchQueryVector", + # "SearchRerank", + # "SparseValues", + # "SparseValuesDictionaryExpectedError", + # "SparseValuesMissingKeysError", + # "SparseValuesTypeError", + # "UpsertResponse", + # "Vector", + # "VectorDictionaryExcessKeysError", + # "VectorDictionaryMissingKeysError", + # "VectorTupleLengthError", + # "_AsyncioInference", + # "_Index", + # "_IndexAsyncio", + # "_Inference", + # "__builtins__", + # "__cached__", + # "__doc__", + # "__file__", + # "__loader__", + # "__name__", + # "__package__", + # "__path__", + # "__spec__", + # "dataclasses", + # "errors", + # "features", + # "fetch_response", + # "import_error", + # "index", + # "index_asyncio", + # "index_asyncio_interface", + # "interfaces", + # "query_results_aggregator", + # "request_factory", + # "search_query", + # "search_query_vector", + # "search_rerank", + # "sparse_values", + # "sparse_values_factory", + # "types", + # "utils", + # "vector", + # "vector_factory", + # ] + + # missing_items = [] + # for item in v6_data_dir_items: + # if item not in dir(pinecone.db_data): + # missing_items.append(item) + + # assert len(missing_items) == 0, f"Missing items: {missing_items}"