diff --git a/Makefile b/Makefile index 24c03d9..f142424 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ PIP := $(PYTHON) -m pip ifneq ("$(wildcard .env)","") include .env else - $(shell echo -e "OPENAI_API_ORGANIZATION=PLEASE-ADD-ME\nOPENAI_API_KEY=PLEASE-ADD-ME\nPINECONE_API_KEY=PLEASE-ADD-ME\nPINECONE_ENVIRONMENT=gcp-starter\nGOOGLE_MAPS_API_KEY=PLEASE-ADD-ME\nDEBUG_MODE=True\n" >> .env) + $(shell echo -e "OPENAI_API_ORGANIZATION=PLEASE-ADD-ME\nOPENAI_API_KEY=PLEASE-ADD-ME\nPINECONE_API_KEY=PLEASE-ADD-ME\nPINECONE_ENVIRONMENT=gcp-starter\nGOOGLE_MAPS_API_KEY=PLEASE-ADD-ME\nDEBUG_MODE=True\SETTINGS_AWS_S3_BUCKET=PLEASE-ADD-ME\n" >> .env) endif .PHONY: analyze pre-commit api-init api-activate api-lint api-clean api-test client-init client-lint client-update client-run client-build client-release diff --git a/api/terraform/python/openai_api/common/conf.py b/api/terraform/python/openai_api/common/conf.py index 697a56b..13f6643 100644 --- a/api/terraform/python/openai_api/common/conf.py +++ b/api/terraform/python/openai_api/common/conf.py @@ -170,6 +170,7 @@ class SettingsDefaults: GOOGLE_MAPS_API_KEY: str = TFVARS.get("google_maps_api_key", None) LANGCHAIN_MEMORY_KEY = "chat_history" + SETTINGS_AWS_S3_BUCKET: str = None OPENAI_API_ORGANIZATION: str = None OPENAI_API_KEY = SecretStr(None) OPENAI_ENDPOINT_IMAGE_N = 4 @@ -357,6 +358,7 @@ def __init__(self, **data: Any): # noqa: C901 env="GOOGLE_MAPS_API_KEY", ) langchain_memory_key: Optional[str] = Field(SettingsDefaults.LANGCHAIN_MEMORY_KEY, env="LANGCHAIN_MEMORY_KEY") + settings_aws_bucket: Optional[str] = Field(SettingsDefaults.SETTINGS_AWS_S3_BUCKET, env="SETTINGS_AWS_S3_BUCKET") openai_api_organization: Optional[str] = Field( SettingsDefaults.OPENAI_API_ORGANIZATION, env="OPENAI_API_ORGANIZATION" ) @@ -564,6 +566,7 @@ def get_installed_packages(): "google_maps_api_key": self.google_maps_api_key, }, "openai_api": { + "settings_aws_bucket": self.settings_aws_bucket, "langchain_memory_key": self.langchain_memory_key, "openai_endpoint_image_n": self.openai_endpoint_image_n, "openai_endpoint_image_size": self.openai_endpoint_image_size, @@ -688,6 +691,13 @@ def check_langchain_memory_key(cls, v) -> str: return SettingsDefaults.LANGCHAIN_MEMORY_KEY return v + @field_validator("settings_aws_bucket") + def check_lambda_openai_function_config_url(cls, v) -> str: + """Check settings_aws_bucket""" + if v in [None, ""]: + return SettingsDefaults.SETTINGS_AWS_S3_BUCKET + return v + @field_validator("openai_api_organization") def check_openai_api_organization(cls, v) -> str: """Check openai_api_organization""" diff --git a/api/terraform/python/openai_api/lambda_openai_function/function_refers_to.py b/api/terraform/python/openai_api/lambda_openai_function/function_refers_to.py index fbaec41..ee7108e 100644 --- a/api/terraform/python/openai_api/lambda_openai_function/function_refers_to.py +++ b/api/terraform/python/openai_api/lambda_openai_function/function_refers_to.py @@ -7,7 +7,7 @@ from openai_api.common.const import PYTHON_ROOT from openai_api.lambda_openai_function.natural_language_processing import does_refer_to -from openai_api.lambda_openai_function.refers_to import RefersTo +from openai_api.lambda_openai_function.refers_to import CustomConfig from openai_api.lambda_openai_function.refers_to import config as refers_to_config @@ -34,7 +34,7 @@ def search_terms_are_in_messages(messages: list, search_terms: list = None, sear return False -def customized_prompt(config: RefersTo, messages: list) -> list: +def customized_prompt(config: CustomConfig, messages: list) -> list: """Modify the system prompt based on the custom configuration object""" for i, message in enumerate(messages): @@ -65,7 +65,7 @@ def get_additional_info(inquiry_type: str) -> str: raise KeyError(f"Invalid inquiry_type: {inquiry_type}") -def info_tool_factory(config: RefersTo): +def info_tool_factory(config: CustomConfig): """ Return a dictionary of chat completion tools. """ diff --git a/api/terraform/python/openai_api/lambda_openai_function/refers_to.py b/api/terraform/python/openai_api/lambda_openai_function/refers_to.py index 033f2cb..72a6a89 100644 --- a/api/terraform/python/openai_api/lambda_openai_function/refers_to.py +++ b/api/terraform/python/openai_api/lambda_openai_function/refers_to.py @@ -1,32 +1,65 @@ # -*- coding: utf-8 -*- """ -This module contains the RefersTo class, which is used to parse YAML config files for +This module contains the CustomConfig class, which is used to parse YAML config files for function_refers_to.get_additional_info(). """ import json +import logging import os import re import yaml +from openai_api.common.conf import settings from openai_api.common.const import PYTHON_ROOT +log = logging.getLogger(__name__) CONFIG_PATH = PYTHON_ROOT + "/openai_api/lambda_openai_function/config/" -class SystemPrompt: - """System prompt of a RefersTo object""" +class CustomConfigBase: + """Base class for CustomConfig and CustomConfigs""" + + def __init__(self) -> None: + pass + + def __repr__(self): + return f"{self.__class__.__name__}({self.__dict__})" + + def __str__(self): + return f"{self.__dict__}" + + def do_error(self, err: str) -> None: + """Print the error message and raise a ValueError""" + print(err) + log.error(err) + raise ValueError(err) + + +class SystemPrompt(CustomConfigBase): + """System prompt of a CustomConfig object""" config: str = None def __init__(self, system_prompt=None): + super().__init__() + system_prompt = system_prompt or "" self.config = system_prompt self.validate() + @property + def is_valid(self) -> bool: + """Return True if the config file is valid""" + try: + self.validate() + return True + except ValueError: + return False + def validate(self) -> None: """Validate the config file""" if not isinstance(self.system_prompt, str): - raise ValueError(f"Expected a string but received {type(self.config)}") + self.do_error(f"Expected a string but received {type(self.system_prompt)}") @property def system_prompt(self) -> str: @@ -37,10 +70,11 @@ def __str__(self): return f"{self.config}" -class SearchTerms: - """Search terms of a RefersTo object""" +class SearchTerms(CustomConfigBase): + """Search terms of a CustomConfig object""" def __init__(self, search_terms: dict = None): + super().__init__() self.config_json = search_terms self.validate() @@ -54,24 +88,33 @@ def pairs(self) -> list: """Return a list of search terms""" return self.config_json["pairs"] + @property + def is_valid(self) -> bool: + """Return True if the config file is valid""" + try: + self.validate() + return True + except ValueError: + return False + def validate(self) -> None: """Validate the config file""" required_keys = ["strings", "pairs"] if not self.config_json: - raise ValueError("search_terms is empty") + self.do_error("search_terms is empty") for key in required_keys: if key not in self.config_json: - raise ValueError(f"Invalid search_terms: {self.config_json}. Missing key: {key}.") + self.do_error(f"Invalid search_terms: {self.config_json}. Missing key: {key}.") if not all(isinstance(item, str) for item in self.strings): - raise ValueError(f"Invalid config file: {self.config_json}. 'strings' should be a list of strings.") + self.do_error(f"Invalid config file: {self.config_json}. 'strings' should be a list of strings.") if not all( isinstance(pair, list) and len(pair) == 2 and all(isinstance(item, str) for item in pair) for pair in self.pairs ): - raise ValueError(f"Invalid config file: {self.config_json}. 'pairs' should be a list of pairs of strings.") + self.do_error(f"Invalid config file: {self.config_json}. 'pairs' should be a list of pairs of strings.") def to_json(self) -> json: """Return the config file as a JSON object""" @@ -82,12 +125,13 @@ def __str__(self): return f"{self.to_json()}" -class AdditionalInformation: - """Additional information of a RefersTo object""" +class AdditionalInformation(CustomConfigBase): + """Additional information of a CustomConfig object""" config_json: dict = None def __init__(self, additional_information: dict = None): + super().__init__() self.config_json = additional_information self.validate() @@ -96,10 +140,19 @@ def keys(self) -> list: """Return a list of keys for additional information""" return list(self.config_json.keys()) + @property + def is_valid(self) -> bool: + """Return True if the config file is valid""" + try: + self.validate() + return True + except ValueError: + return False + def validate(self) -> None: """Validate the config file""" if not isinstance(self.config_json, dict): - raise ValueError(f"Expected a dict but received {type(self.config_json)}") + self.do_error(f"Expected a dict but received {type(self.config_json)}") def to_json(self) -> json: """Return the config file as a JSON object""" @@ -110,7 +163,7 @@ def __str__(self): return f"{self.to_json()}" -class RefersTo: +class CustomConfig(CustomConfigBase): """Parse the YAML config file for a given Lambda function""" additional_information: str = None @@ -119,15 +172,24 @@ class RefersTo: additional_information: AdditionalInformation = None system_prompt: SystemPrompt = None - def __init__(self, config_path: str, index: int = 0): + def __init__(self, config_path: str = None, config_json: dict = None, index: int = 0): + super().__init__() self.config_path = config_path self.index = index + if not config_path and not config_json: + raise ValueError("Expected a config_path or config_json") + if config_path and config_json: + raise ValueError("Expected a config_path or config_json, not both") + + if config_path: + if not self.file_name.endswith((".yaml", ".yml")): + raise ValueError(f"Invalid file type: {self.file_name}. Expected a YAML file.") - if not self.file_name.endswith((".yaml", ".yml")): - raise ValueError(f"Invalid file type: {self.file_name}. Expected a YAML file.") + with open(self.config_path, "r", encoding="utf-8") as file: + self.config_json = yaml.safe_load(file) - with open(self.config_path, "r", encoding="utf-8") as file: - self.config_json = yaml.safe_load(file) + if config_json: + self.config_json = config_json self.validate() self.search_terms = SearchTerms(search_terms=self.config_json["search_terms"]) @@ -140,32 +202,47 @@ def __init__(self, config_path: str, index: int = 0): @property def name(self) -> str: """Return a name in the format: "WillyWonka""" - return self.parsed_filename.replace(" ", "") + if self.parsed_filename: + return self.parsed_filename.replace(" ", "") + return None @property def file_name(self) -> str: """Return the name of the config file""" - return os.path.basename(self.config_path) + if self.config_path: + return os.path.basename(self.config_path) + return None @property def parsed_filename(self) -> str: """Return a name in the format: "Willy Wonka" """ - name = os.path.splitext(self.file_name)[0] - name = re.sub("-", " ", name) - name = re.sub(r"(?<=[a-z])(?=[A-Z])", " ", name) - name = name.title() - name = "".join(name.split()) - return name + if self.file_name: + name = os.path.splitext(self.file_name)[0] + name = re.sub("-", " ", name) + name = re.sub(r"(?<=[a-z])(?=[A-Z])", " ", name) + name = name.title() + name = "".join(name.split()) + return name + return None + + @property + def is_valid(self) -> bool: + """Return True if the config file is valid""" + try: + self.validate() + except ValueError: + return False + return self.additional_information.is_valid and self.search_terms.is_valid and self.system_prompt.is_valid def validate(self) -> None: """Validate the config file""" required_keys = ["search_terms", "system_prompt", "function_description", "additional_information"] if not self.config_json: - raise ValueError(f"Invalid config file: {self.config_path}") + self.do_error(f"Invalid config file: {self.config_path}") for key in required_keys: if key not in self.config_json: - raise ValueError(f"Invalid config file: {self.config_path}. Missing key: {key}.") + self.do_error(f"Invalid config file: {self.config_path}. Missing key: {key}.") def to_json(self) -> json: """Return the config file as a JSON object""" @@ -181,17 +258,116 @@ def __str__(self): return f"{self.to_json()}" -def loader() -> list[RefersTo]: - """Load all the config files""" - i = 0 - retval = [] - for filename in os.listdir(CONFIG_PATH): - if filename.endswith((".yaml", ".yml")): - i += 1 - config_path = os.path.join(CONFIG_PATH, filename) - refers_to = RefersTo(config_path=config_path, index=i) - retval.append(refers_to) - return retval +class CustomConfigs: + """List of CustomConfig objects""" + + _custom_configs: list[CustomConfig] = None + _aws_bucket_name: str = None + _aws_bucket_path: str = "/aws_openai/lambda_openai_function/custom_configs/" + _aws_bucket_path_validated: bool = False + + def __init__(self, config_path: str = None, aws_s3_bucket_name: str = None): + i = 0 + self._custom_configs = [] + self._aws_bucket_name = aws_s3_bucket_name + self.verify_bucket(bucket_name=aws_s3_bucket_name) + + # Load all the config files in this repo + for filename in os.listdir(config_path): + if filename.endswith((".yaml", ".yml")): + i += 1 + full_config_path = os.path.join(config_path, filename) + custom_config = CustomConfig(config_path=full_config_path, index=i) + self._custom_configs.append(custom_config) + + # Load config files from the AWS S3 bucket + if self.aws_bucket_path_validated: + s3 = settings.aws_session.resource("s3") + bucket = s3.Bucket(self._aws_bucket_name) + + for obj in bucket.objects.filter(Prefix=self._aws_bucket_path): + i += 1 + file_content = obj.get()["Body"].read().decode("utf-8") + config_json = yaml.safe_load(file_content) + custom_config = CustomConfig(config_json=config_json, index=i) + self._custom_configs.append(custom_config) + + def list_yaml_files(bucket_name): + """List all the YAML files in the AWS S3 bucket""" + s3 = settings.aws_session.resource("s3") + bucket = s3.Bucket(bucket_name) + + for obj in bucket.objects.all(): + if obj.key.endswith(".yaml") or obj.key.endswith(".yml"): + print("Found YAML file:", obj.key) + + @property + def valid_configs(self) -> list[CustomConfig]: + """Return a list of valid configs""" + return [config for config in self._custom_configs if config.is_valid] + + @property + def invalid_configs(self) -> list[CustomConfig]: + """Return a list of invalid configs""" + return [config for config in self._custom_configs if not config.is_valid] + + @property + def aws_bucket_path_validated(self) -> bool: + """Return True if the remote host is valid""" + return self._aws_bucket_path_validated + + @property + def aws_bucket_full_path(self) -> str: + """Return the remote host""" + if self.aws_bucket_path_validated: + return self._aws_bucket_name + self._aws_bucket_path + return None + + def verify_bucket(self, bucket_name: str): + """Verify that the remote host is valid""" + s3 = settings.aws_session.resource("s3") + bucket = s3.Bucket(bucket_name) + folder_path = self._aws_bucket_path + try: + # Check if bucket exists + s3.meta.client.head_bucket(Bucket=bucket_name) + # pylint: disable=broad-exception-caught + except Exception: + return + + try: + # Create any missing folders + if not any(s3_object.key.startswith(folder_path) for s3_object in bucket.objects.all()): + s3.Object(bucket_name, folder_path).put() + self._aws_bucket_path_validated = True + # pylint: disable=broad-exception-caught + except Exception: + pass + + def to_json(self) -> json: + """Return the _custom_configs list as a JSON object""" + return self.valid_configs + + +class SingletonCustomConfigs: + """Singleton for Settings""" + + _instance = None + _custom_configs = None + + def __new__(cls): + """Create a new instance of Settings""" + if cls._instance is None: + cls._instance = super(SingletonCustomConfigs, cls).__new__(cls) + cls._instance._custom_configs = CustomConfigs( + config_path=CONFIG_PATH, aws_s3_bucket_name=settings.aws_s3_bucket_name + ) + return cls._instance + + @property + def custom_configs(self) -> CustomConfigs: + """Return the settings""" + return self._custom_configs -config: list[RefersTo] = loader() +config = SingletonCustomConfigs().custom_configs.valid_configs diff --git a/api/terraform/python/openai_api/lambda_openai_function/tests/test_lambda_openai_function.py b/api/terraform/python/openai_api/lambda_openai_function/tests/test_lambda_openai_function.py index 4997a7d..f23f2c7 100644 --- a/api/terraform/python/openai_api/lambda_openai_function/tests/test_lambda_openai_function.py +++ b/api/terraform/python/openai_api/lambda_openai_function/tests/test_lambda_openai_function.py @@ -23,7 +23,7 @@ search_terms_are_in_messages, ) from openai_api.lambda_openai_function.natural_language_processing import does_refer_to -from openai_api.lambda_openai_function.refers_to import RefersTo +from openai_api.lambda_openai_function.refers_to import CustomConfig from openai_api.lambda_openai_function.tests.test_setup import ( get_test_file, get_test_file_path, @@ -36,7 +36,7 @@ class TestLambdaOpenai(unittest.TestCase): def setUp(self): """Set up test fixtures.""" self.config_path = get_test_file_path("config/everlasting-gobbstopper.yaml") - self.config = RefersTo(config_path=self.config_path) + self.config = CustomConfig(config_path=self.config_path) def check_response(self, response): """Check response structure from lambda_handler.""" diff --git a/api/terraform/python/openai_api/lambda_openai_function/tests/test_lambda_openai_refers_to.py b/api/terraform/python/openai_api/lambda_openai_function/tests/test_lambda_openai_refers_to.py index c19f24d..8f7af48 100644 --- a/api/terraform/python/openai_api/lambda_openai_function/tests/test_lambda_openai_refers_to.py +++ b/api/terraform/python/openai_api/lambda_openai_function/tests/test_lambda_openai_refers_to.py @@ -25,7 +25,7 @@ get_additional_info, info_tool_factory, ) -from openai_api.lambda_openai_function.refers_to import RefersTo +from openai_api.lambda_openai_function.refers_to import CustomConfig from openai_api.lambda_openai_function.tests.test_setup import get_test_file_path @@ -35,7 +35,7 @@ class TestLambdaOpenaiFunctionRefersTo(unittest.TestCase): def setUp(self): """Set up test fixtures.""" self.config_path = get_test_file_path("config/everlasting-gobbstopper.yaml") - self.config = RefersTo(config_path=self.config_path) + self.config = CustomConfig(config_path=self.config_path) # pylint: disable=broad-exception-caught def test_get_additional_info(self): diff --git a/api/terraform/python/openai_api/lambda_openai_function/tests/test_refers_to.py b/api/terraform/python/openai_api/lambda_openai_function/tests/test_refers_to.py index 8b71d44..dedee47 100644 --- a/api/terraform/python/openai_api/lambda_openai_function/tests/test_refers_to.py +++ b/api/terraform/python/openai_api/lambda_openai_function/tests/test_refers_to.py @@ -23,7 +23,7 @@ # pylint: disable=no-name-in-module from openai_api.lambda_openai_function.refers_to import ( AdditionalInformation, - RefersTo, + CustomConfig, SearchTerms, SystemPrompt, ) @@ -99,7 +99,7 @@ def test_additional_information_invalid(self): def test_refers_to(self): """Test refers_to.""" - refers_to = RefersTo(config_path=get_test_file_path("config/everlasting-gobbstopper.yaml")) + refers_to = CustomConfig(config_path=get_test_file_path("config/everlasting-gobbstopper.yaml")) self.assertEqual(refers_to.name, "EverlastingGobbstopper") self.assertEqual(refers_to.file_name, "everlasting-gobbstopper.yaml")