From 7ac46e44f32d37a79fe1c6effb9101599d43907d Mon Sep 17 00:00:00 2001 From: Himanshu Gupta <161701569+himanshugt16@users.noreply.github.com> Date: Mon, 13 May 2024 11:55:16 +0530 Subject: [PATCH] Upload Download Bot Content (#1196) * Upload and Download Bot Content * Upload and Download Bot Content * Updated Test Cases * Updated code to accomodate codacy suggestions * Removed unused import and replaced print statements * Fixed Test Cases --- kairon/importer/data_importer.py | 4 +- .../validator/bot_content_schema.yaml | 15 + kairon/importer/validator/file_validator.py | 133 ++++- kairon/shared/data/constant.py | 4 +- kairon/shared/data/processor.py | 194 +++++- kairon/shared/data/utils.py | 27 +- kairon/shared/importer/data_objects.py | 1 + kairon/shared/importer/processor.py | 2 +- kairon/shared/utils.py | 45 +- requirements/dev.txt | 1 + requirements/prod.txt | 1 + tests/integration_test/services_test.py | 558 +++++++++++++++--- tests/testing_data/all/bot_content.yml | 31 + .../testing_data/bot_content/bot_content.yml | 31 + .../bot_content/invalid_bot_content.yml | 28 + .../validator/append/bot_content.yml | 31 + .../yml_training_files/bot_content.yml | 31 + .../data_processor/data_processor_test.py | 77 ++- tests/unit_test/events/events_test.py | 6 +- tests/unit_test/utility_test.py | 5 + .../validator/training_data_validator_test.py | 59 ++ 21 files changed, 1114 insertions(+), 170 deletions(-) create mode 100644 kairon/importer/validator/bot_content_schema.yaml create mode 100644 tests/testing_data/all/bot_content.yml create mode 100644 tests/testing_data/bot_content/bot_content.yml create mode 100644 tests/testing_data/bot_content/invalid_bot_content.yml create mode 100644 tests/testing_data/validator/append/bot_content.yml create mode 100644 tests/testing_data/yml_training_files/bot_content.yml diff --git a/kairon/importer/data_importer.py b/kairon/importer/data_importer.py index b84e0ee6e..d864d8d7e 100644 --- a/kairon/importer/data_importer.py +++ b/kairon/importer/data_importer.py @@ -2,7 +2,6 @@ from typing import Text from rasa.shared.constants import DEFAULT_DOMAIN_PATH, DEFAULT_CONFIG_PATH, DEFAULT_DATA_PATH - from kairon.shared.data.constant import REQUIREMENTS from kairon.shared.data.processor import MongoProcessor from .validator.file_validator import TrainingDataValidator @@ -38,7 +37,7 @@ async def validate(self): TrainingDataValidator.validate_domain(domain_path) self.validator = await TrainingDataValidator.from_training_files(data_path, domain_path, config_path, self.path) - self.validator.validate_training_data(False) + self.validator.validate_training_data(False, self.bot, self.user, self.save_data, self.overwrite) return self.validator.summary, self.validator.component_count def import_data(self): @@ -54,5 +53,6 @@ def import_data(self): self.validator.intents, self.validator.actions, self.validator.multiflow_stories, + self.validator.bot_content, self.validator.chat_client_config.get('config'), self.overwrite, self.files_to_save) diff --git a/kairon/importer/validator/bot_content_schema.yaml b/kairon/importer/validator/bot_content_schema.yaml new file mode 100644 index 000000000..018d6e898 --- /dev/null +++ b/kairon/importer/validator/bot_content_schema.yaml @@ -0,0 +1,15 @@ +type: seq +sequence: + - type: map + mapping: + collection: + required: true + type: str + data: + type: any + metadata: + required: true + type: any + type: + required: true + type: str \ No newline at end of file diff --git a/kairon/importer/validator/file_validator.py b/kairon/importer/validator/file_validator.py index f0b51298b..b55b3f0e6 100644 --- a/kairon/importer/validator/file_validator.py +++ b/kairon/importer/validator/file_validator.py @@ -1,6 +1,6 @@ import os from collections import defaultdict -from typing import Optional, Dict, Text +from typing import Optional, Dict, Text, List from loguru import logger from rasa.core.training.story_conflict import find_story_conflicts @@ -15,15 +15,18 @@ from rasa.shared.nlu import constants from rasa.shared.utils.validation import YamlValidationException from rasa.validator import Validator +from pykwalify.core import Core from kairon.exceptions import AppException from kairon.shared.actions.data_objects import FormValidationAction, SlotSetAction, JiraAction, GoogleSearchAction, \ ZendeskAction, EmailActionConfig, HttpActionConfig, PipedriveLeadsAction, PromptAction, RazorpayAction, \ PyscriptActionConfig, DatabaseAction from kairon.shared.actions.models import ActionType, ActionParameterType, DbActionOperationType +from kairon.shared.cognition.data_objects import CognitionSchema from kairon.shared.constants import DEFAULT_ACTIONS, DEFAULT_INTENTS, SYSTEM_TRIGGERED_UTTERANCES, SLOT_SET_TYPE from kairon.shared.data.constant import KAIRON_TWO_STAGE_FALLBACK from kairon.shared.data.data_objects import MultiflowStories +from kairon.shared.data.processor import MongoProcessor from kairon.shared.data.utils import DataUtility from kairon.shared.models import StoryStepType from kairon.shared.utils import Utility, StoryValidator @@ -79,6 +82,8 @@ async def from_training_files(cls, training_data_paths: str, domain_path: str, c multiflow_stories = Utility.read_yaml(os.path.join(root_dir, 'multiflow_stories.yml')) cls.multiflow_stories = multiflow_stories if multiflow_stories else {} cls.multiflow_stories_graph = StoryValidator.create_multiflow_story_graphs(multiflow_stories) + bot_content = Utility.read_yaml(os.path.join(root_dir, 'bot_content.yml')) + cls.bot_content = bot_content if bot_content else {} return await TrainingDataValidator.from_importer(file_importer) except YamlValidationException as e: @@ -267,14 +272,16 @@ def verify_utterances_in_stories(self, raise_exception: bool = True): multiflow_utterance = set() multiflow_actions = set() if self.multiflow_stories: - multiflow_utterance, multiflow_actions = self.verify_utterance_and_actions_in_multiflow_stories(raise_exception) + multiflow_utterance, multiflow_actions = self.verify_utterance_and_actions_in_multiflow_stories( + raise_exception) for story in self.story_graph.story_steps: for event in story.events: if not isinstance(event, ActionExecuted): continue if not event.action_name.startswith(UTTER_PREFIX): - if event.action_name != 'action_restart' and event.action_name != '...' and not event.action_name.startswith('intent'): + if event.action_name != 'action_restart' and event.action_name != '...' and not event.action_name.startswith( + 'intent'): story_actions.add(event.action_name) continue @@ -305,14 +312,17 @@ def verify_utterances_in_stories(self, raise_exception: bool = True): for slot in form_data.get('required_slots', {}): form_utterances.add(f"utter_ask_{form}_{slot}") - unused_utterances = set(utterance_in_domain) - form_utterances.union(set(self.domain.form_names)) - stories_utterances - multiflow_utterance - system_triggered_actions.union(fallback_action) + unused_utterances = set(utterance_in_domain) - form_utterances.union( + set(self.domain.form_names)) - stories_utterances - multiflow_utterance - system_triggered_actions.union( + fallback_action) for utterance in unused_utterances: msg = f"The utterance '{utterance}' is not used in any story." if raise_exception: raise AppException(msg) utterance_mismatch_summary.append(msg) - unused_actions = user_actions - utterance_in_domain - set(story_actions) - set(multiflow_actions) - {f'validate_{name}' for name in self.domain.form_names} + unused_actions = user_actions - utterance_in_domain - set(story_actions) - set(multiflow_actions) - { + f'validate_{name}' for name in self.domain.form_names} for action in unused_actions: if action not in system_triggered_actions.union(fallback_action): msg = f"The action '{action}' is not used in any story." @@ -357,9 +367,9 @@ def verify_utterance_and_actions_in_multiflow_stories(self, raise_exception: boo for utterance in utterances: if utterance not in user_actions: msg = f"The action '{utterance}' is used in the multiflow_stories, " \ - f"but is not a valid utterance action. Please make sure " \ - f"the action is listed in your domain and there is a " \ - f"template defined with its name." + f"but is not a valid utterance action. Please make sure " \ + f"the action is listed in your domain and there is a " \ + f"template defined with its name." if raise_exception: raise AppException(msg) utterance_mismatch_summary.append(msg) @@ -454,7 +464,8 @@ def verify_multiflow_story_structure(multiflow_story: list): for story in multiflow_story: if isinstance(story, dict): if len(required_fields.difference(set(story.keys()))) > 0: - story_error.append(f'Required fields {required_fields} not found in story: {story.get("block_name")}') + story_error.append( + f'Required fields {required_fields} not found in story: {story.get("block_name")}') continue if story.get('events'): errors = StoryValidator.validate_multiflow_story_steps_file_validator(story.get('events'), @@ -599,7 +610,8 @@ def __validate_slot_set_actions(slot_set_actions: list): data_error = [] actions_present = set() - required_fields = {k for k, v in SlotSetAction._fields.items() if v.required and k not in {'bot', 'user', 'timestamp', 'status'}} + required_fields = {k for k, v in SlotSetAction._fields.items() if + v.required and k not in {'bot', 'user', 'timestamp', 'status'}} for action in slot_set_actions: if isinstance(action, dict): if len(required_fields.difference(set(action.keys()))) > 0: @@ -630,7 +642,8 @@ def __validate_form_validation_actions(form_actions: list): data_error = [] actions_present = set() - required_fields = {k for k, v in FormValidationAction._fields.items() if v.required and k not in {'bot', 'user', 'timestamp', 'status'}} + required_fields = {k for k, v in FormValidationAction._fields.items() if + v.required and k not in {'bot', 'user', 'timestamp', 'status'}} for action in form_actions: if isinstance(action, dict): if len(required_fields.difference(set(action.keys()))) > 0: @@ -641,16 +654,19 @@ def __validate_form_validation_actions(form_actions: list): if action.get('slot_set'): if Utility.check_empty_string(action['slot_set'].get('type')): data_error.append('slot_set should have type current as default!') - if action['slot_set'].get('type') == 'current' and not Utility.check_empty_string(action['slot_set'].get('value')): + if action['slot_set'].get('type') == 'current' and not Utility.check_empty_string( + action['slot_set'].get('value')): data_error.append('slot_set with type current should not have any value!') - if action['slot_set'].get('type') == 'slot' and Utility.check_empty_string(action['slot_set'].get('value')): + if action['slot_set'].get('type') == 'slot' and Utility.check_empty_string( + action['slot_set'].get('value')): data_error.append('slot_set with type slot should have a valid slot value!') if action['slot_set'].get('type') not in ['current', 'custom', 'slot']: data_error.append('Invalid slot_set type!') else: data_error.append('slot_set must be present') if f"{action['name']}_{action['slot']}" in actions_present: - data_error.append(f"Duplicate form validation action found for slot {action['slot']}: {action['name']}") + data_error.append( + f"Duplicate form validation action found for slot {action['slot']}: {action['name']}") actions_present.add(f"{action['name']}_{action['slot']}") else: data_error.append('Invalid action configuration format. Dictionary expected.') @@ -671,13 +687,17 @@ def __validate_prompt_actions(prompt_actions: list): for action in prompt_actions: if isinstance(action, dict): if len(required_fields.difference(set(action.keys()))) > 0: - data_error.append(f'Required fields {sorted(required_fields)} not found in action: {action.get("name")}') + data_error.append( + f'Required fields {sorted(required_fields)} not found in action: {action.get("name")}') continue - if action.get('num_bot_responses') and (action['num_bot_responses'] > 5 or not isinstance(action['num_bot_responses'], int)): - data_error.append(f'num_bot_responses should not be greater than 5 and of type int: {action.get("name")}') + if action.get('num_bot_responses') and ( + action['num_bot_responses'] > 5 or not isinstance(action['num_bot_responses'], int)): + data_error.append( + f'num_bot_responses should not be greater than 5 and of type int: {action.get("name")}') llm_prompts_errors = TrainingDataValidator.__validate_llm_prompts(action['llm_prompts']) if action.get('hyperparameters') is not None: - llm_hyperparameters_errors = TrainingDataValidator.__validate_llm_prompts_hyperparamters(action.get('hyperparameters')) + llm_hyperparameters_errors = TrainingDataValidator.__validate_llm_prompts_hyperparamters( + action.get('hyperparameters')) data_error.extend(llm_hyperparameters_errors) data_error.extend(llm_prompts_errors) if action['name'] in actions_present: @@ -784,7 +804,8 @@ def __validate_llm_prompts_hyperparamters(hyperparameters: dict): error_list.append("logit_bias must be a dictionary!") elif key == 'stop': if value and (not isinstance(value, (str, int, list)) or (isinstance(value, list) and len(value) > 4)): - error_list.append("Stop must be None, a string, an integer, or an array of 4 or fewer strings or integers.") + error_list.append( + "Stop must be None, a string, an integer, or an array of 4 or fewer strings or integers.") return error_list @staticmethod @@ -1028,11 +1049,77 @@ def validate_actions(self, raise_exception: bool = True): if not is_data_invalid and raise_exception: raise AppException("Invalid actions.yml. Check logs!") - def validate_training_data(self, raise_exception: bool = True): + @staticmethod + def validate_content(bot: Text, user: Text, bot_content: List, save_data: bool = False, + overwrite: bool = True): + + bot_content_errors = [] + + settings = MongoProcessor.get_bot_settings(bot, user) + if not settings.to_mongo().to_dict()['llm_settings'].get('enable_faq'): + bot_content_errors.append("Please enable GPT on bot before uploading") + + current_dir = os.path.dirname(os.path.realpath(__file__)) + bot_content_schema_file_path = os.path.join(current_dir, "bot_content_schema.yaml") + schema_validator = Core(source_data=bot_content, schema_files=[bot_content_schema_file_path]) + try: + schema_validator.validate(raise_exception=True) + logger.info("Validation successful!") + except Exception as e: + logger.info(f"Validation failed: {e}") + bot_content_errors.append(f"Invalid bot_content.yml. Content does not match required schema: {e}") + + if save_data and not overwrite: + for item in bot_content: + if item.get('type') == 'json': + collection_name = item.get('collection') + existing_schema = CognitionSchema.objects(bot=bot, collection_name=collection_name).first() + if existing_schema: + existing_metadata = existing_schema.metadata + uploaded_metadata = item.get('metadata') + if len(existing_metadata) == len(uploaded_metadata): + for existing_meta, uploaded_meta in zip(existing_metadata, uploaded_metadata): + if existing_meta.column_name != uploaded_meta['column_name'] or \ + existing_meta.create_embeddings != uploaded_meta['create_embeddings'] or \ + existing_meta.data_type != uploaded_meta['data_type'] or \ + existing_meta.enable_search != uploaded_meta['enable_search']: + bot_content_errors.append("Invalid bot_content.yml. Collection with same name and " + "different metadata cannot be uploaded") + break + else: + bot_content_errors.append("Invalid bot_content.yml. Collection with same name and " + "different metadata cannot be uploaded") + + return bot_content_errors + + def validate_bot_content(self, bot: Text, user: Text, save_data: bool = True, + overwrite: bool = True, raise_exception: bool = True): + """ + Validates bot_content.yml. + :param bot: bot id + :param user: user id + :param save_data: flag to save data + :param overwrite: flag to overwrite data + :param raise_exception: Set this flag to false to prevent raising exceptions. + :return: + """ + if self.bot_content: + errors = TrainingDataValidator.validate_content(bot, user, self.bot_content, save_data, overwrite) + self.summary['bot_content'] = errors + if errors and raise_exception: + raise AppException("Invalid bot_content.yml. Check logs!") + + def validate_training_data(self, raise_exception: bool = True, bot: Text = None, user: Text = None, + save_data: bool = True, + overwrite: bool = True): """ Validate training data. - @param raise_exception: Set this flag to false to prevent raising exceptions. - @return: + :param raise_exception: Set this flag to false to prevent raising exceptions. + :param bot: bot id + :param user: user id + :param save_data: flag to save data + :param overwrite: flag to overwrite data + :return: """ try: self.verify_story_structure(raise_exception) @@ -1041,6 +1128,8 @@ def validate_training_data(self, raise_exception: bool = True): self.validate_actions(raise_exception) self.validate_config(raise_exception) self.validate_multiflow(raise_exception) + self.validate_bot_content(bot, user, save_data, overwrite, raise_exception) + except Exception as e: logger.error(str(e)) raise AppException(e) diff --git a/kairon/shared/data/constant.py b/kairon/shared/data/constant.py index a94319920..00b573548 100644 --- a/kairon/shared/data/constant.py +++ b/kairon/shared/data/constant.py @@ -180,7 +180,9 @@ class ModelTestType(str, Enum): ALLOWED_ACTIONS_FORMATS = {'actions.yaml', 'actions.yml'} ALLOWED_CHAT_CLIENT_CONFIG_FORMATS = {'chat_client_config.yml', 'chat_client_config.yaml'} ALLOWED_MULTIFLOW_STORIES_FORMATS = {'multiflow_stories.yaml', 'multiflow_stories.yml'} -REQUIREMENTS = {'nlu', 'domain', 'config', 'stories', 'rules', 'actions', 'chat_client_config', 'multiflow_stories'} +ALLOWED_BOT_CONTENT_FORMATS = {'bot_content.yaml', 'bot_content.yml'} +REQUIREMENTS = {'nlu', 'domain', 'config', 'stories', 'rules', 'actions', 'chat_client_config', 'multiflow_stories', + 'bot_content'} COMPONENT_COUNT = {'intents': 0, 'utterances': 0, 'stories': 0, 'training_examples': 0, 'http_actions': 0, 'jira_actions': 0, 'google_search_actions': 0, 'zendesk_actions': 0, 'email_actions': 0, 'slot_set_actions': 0, 'form_validation_actions': 0, 'rules': 0, diff --git a/kairon/shared/data/processor.py b/kairon/shared/data/processor.py index 65c780b6e..34c01c134 100644 --- a/kairon/shared/data/processor.py +++ b/kairon/shared/data/processor.py @@ -6,7 +6,7 @@ from copy import deepcopy from datetime import datetime, timedelta, timezone from pathlib import Path -from typing import Text, Dict, List +from typing import Text, Dict, List, Any from urllib.parse import urljoin import networkx as nx @@ -154,11 +154,10 @@ ) from .utils import DataUtility from ..chat.broadcast.data_objects import MessageBroadcastLogs -from ..cognition.data_objects import CognitionSchema +from ..cognition.data_objects import CognitionSchema, CognitionData, ColumnMetadata from ..constants import KaironSystemSlots, PluginTypes, EventClass from ..custom_widgets.data_objects import CustomWidgets from ..importer.data_objects import ValidationLogs -from ..live_agent.live_agent import LiveAgentHandler from ..multilingual.data_objects import BotReplicationLogs from ..test.data_objects import ModelTestingLogs @@ -169,17 +168,18 @@ class MongoProcessor: """ async def upload_and_save( - self, - nlu: File, - domain: File, - stories: File, - config: File, - rules: File, - http_action: File, - multiflow_stories: File, - bot: Text, - user: Text, - overwrite: bool = True, + self, + nlu: File, + domain: File, + stories: File, + config: File, + rules: File, + http_action: File, + multiflow_stories: File, + bot_content: File, + bot: Text, + user: Text, + overwrite: bool = True, ): """ loads the training data into database @@ -191,13 +191,14 @@ async def upload_and_save( :param http_action: http_actions data :param config: config data :param multiflow_stories: multiflow_stories data + :param bot_content: bot_content data :param bot: bot id :param user: user id :param overwrite: whether to append or overwrite, default is overwite :return: None """ training_file_loc = await DataUtility.save_training_files( - nlu, domain, config, stories, rules, http_action, multiflow_stories + nlu, domain, config, stories, rules, http_action, multiflow_stories, bot_content ) await self.save_from_path(training_file_loc["root"], bot, overwrite, user) Utility.delete_directory(training_file_loc["root"]) @@ -223,6 +224,7 @@ def download_files(self, bot: Text, user: Text, download_multiflow: bool = False rules = rules.merge(multiflow_stories[1]) multiflow_stories = self.load_multiflow_stories_yaml(bot) actions = self.load_action_configurations(bot) + bot_content = self.load_bot_content(bot) return Utility.create_zip_file( nlu, domain, @@ -233,6 +235,7 @@ def download_files(self, bot: Text, user: Text, download_multiflow: bool = False actions, multiflow_stories, chat_client_config, + bot_content ) async def apply_template(self, template: Text, bot: Text, user: Text): @@ -272,6 +275,7 @@ async def save_from_path( config_path = os.path.join(path, DEFAULT_CONFIG_PATH) actions_yml = os.path.join(path, "actions.yml") multiflow_stories_yml = os.path.join(path, "multiflow_stories.yml") + bot_content_yml = os.path.join(path, "bot_content.yml") importer = RasaFileImporter.load_from_config( config_path=config_path, domain_path=domain_path, @@ -287,6 +291,11 @@ async def save_from_path( if multiflow_stories_yml else None ) + bot_content = ( + Utility.read_yaml(bot_content_yml) + if bot_content_yml + else None + ) TrainingDataValidator.validate_custom_actions(actions) self.save_training_data( @@ -298,6 +307,7 @@ async def save_from_path( nlu, actions, multiflow_stories, + bot_content, overwrite=overwrite, what=REQUIREMENTS.copy() - {"chat_client_config"}, ) @@ -306,18 +316,19 @@ async def save_from_path( raise AppException(e) def save_training_data( - self, - bot: Text, - user: Text, - config: dict = None, - domain: Domain = None, - story_graph: StoryGraph = None, - nlu: TrainingData = None, - actions: dict = None, - multiflow_stories: dict = None, - chat_client_config: dict = None, - overwrite: bool = False, - what: set = REQUIREMENTS.copy(), + self, + bot: Text, + user: Text, + config: dict = None, + domain: Domain = None, + story_graph: StoryGraph = None, + nlu: TrainingData = None, + actions: dict = None, + multiflow_stories: dict = None, + bot_content: list = None, + chat_client_config: dict = None, + overwrite: bool = False, + what: set = REQUIREMENTS.copy(), ): if overwrite: self.delete_bot_data(bot, user, what) @@ -338,6 +349,8 @@ def save_training_data( self.save_chat_client_config(chat_client_config, bot, user) if "multiflow_stories" in what: self.save_multiflow_stories(multiflow_stories, bot, user) + if "bot_content" in what: + self.save_bot_content(bot_content, bot, user) def apply_config(self, template: Text, bot: Text, user: Text): """ @@ -366,6 +379,67 @@ def load_multiflow_stories_yaml(self, bot: Text): multiflow = json.loads(multiflow) return {"multiflow_story": multiflow} + def load_bot_content(self, bot: Text): + doc = BotSettings.objects(bot=bot).get().to_mongo().to_dict() + bot_content = [] + if doc['llm_settings'].get('enable_faq'): + bot_content = self.__prepare_cognition_data_for_bot(bot) + return bot_content + + def __prepare_cognition_data_for_bot(self, bot: Text) -> List[Dict[str, Any]]: + """ + Aggregate cognition data for a specific bot. + This function queries the cognition schema database to get collections and metadata + for a particular bot, and then queries the cognition data database to fetch content_type + and data field values for each collection. It returns the aggregated data in the form + of a list of dictionaries, where each dictionary contains collection, type, metadata, + and data fields. + :param bot: The ID of the bot for which to aggregate data. + :return: A list of dictionaries containing aggregated data for the bot. + """ + schema_results = CognitionSchema.objects(bot=bot).only("collection_name", "metadata") + + formatted_result = [] + for schema_result in schema_results: + collection_name = schema_result.collection_name + metadata = [{"column_name": meta.column_name, + "data_type": meta.data_type, + "enable_search": meta.enable_search, + "create_embeddings": meta.create_embeddings} + for meta in schema_result.metadata] + + if not metadata: + type_value = "text" + else: + type_value = "json" + + collection_data = { + "collection": collection_name, + "type": type_value, + "metadata": metadata, + "data": [] + } + + data_results = CognitionData.objects(bot=bot, collection=collection_name).only("content_type", "data") + for data_result in data_results: + collection_data["data"].append(data_result.data) + + formatted_result.append(collection_data) + + data_results_no_collection = CognitionData.objects(bot=bot, collection=None).only("content_type", "data") + default_collection_data = { + "collection": "Default", + "type": "text", + "metadata": [], + "data": [] + } + for data_result in data_results_no_collection: + default_collection_data["data"].append(data_result.data) + + formatted_result.append(default_collection_data) + + return formatted_result + def get_config_templates(self): """ fetches list of available config template @@ -401,6 +475,8 @@ def delete_bot_data(self, bot: Text, user: Text, what=REQUIREMENTS.copy()): self.delete_bot_actions(bot, user) if "multiflow_stories" in what: self.delete_multiflow_stories(bot, user) + if "bot_content" in what: + self.delete_bot_content(bot, user) def save_nlu(self, nlu: TrainingData, bot: Text, user: Text): """ @@ -553,6 +629,16 @@ def delete_multiflow_stories(self, bot: Text, user: Text): """ Utility.hard_delete_document([MultiflowStories], bot=bot) + def delete_bot_content(self, bot: Text, user: Text): + """ + soft deletes stories + :param bot: bot id + :param user: user id + :return: None + """ + Utility.hard_delete_document([CognitionSchema], bot=bot) + Utility.hard_delete_document([CognitionData], bot=bot) + def save_multiflow_stories(self, multiflow_stories: dict, bot: Text, user: Text): """ saves multiflow stories data @@ -566,6 +652,60 @@ def save_multiflow_stories(self, multiflow_stories: dict, bot: Text, user: Text) multiflow_stories["multiflow_story"], bot, user ) + def __save_cognition_schema(self, bot_content: list, bot: Text, user: Text): + for data_item in bot_content: + if data_item['collection'] != 'Default': + existing_schema = CognitionSchema.objects(bot=bot, collection_name=data_item['collection']).first() + if not existing_schema: + cognition_schema = CognitionSchema( + metadata=[ColumnMetadata(**md) for md in data_item['metadata']], + collection_name=data_item['collection'], + user=user, + bot=bot, + timestamp=datetime.utcnow() + ) + cognition_schema.save() + + def __save_cognition_data(self, bot_content: list, bot: Text, user: Text): + for data_item in bot_content: + + collection_name = data_item['collection'] + if collection_name == 'Default': + collection_name = None + + if data_item['type'] == 'text': + for text_data in data_item['data']: + cognition_data = CognitionData( + data=text_data, + content_type='text', + collection=collection_name, + user=user, + bot=bot + ) + cognition_data.save() + elif data_item['type'] == 'json': + for json_data in data_item['data']: + cognition_data = CognitionData( + data=json_data, + content_type='json', + collection=collection_name, + user=user, + bot=bot + ) + cognition_data.save() + + def save_bot_content(self, bot_content: list, bot: Text, user: Text): + """ + saves bot content data + :param bot_content: bot content data + :param bot: bot id + :param user: user id + :return: None + """ + if bot_content: + self.__save_cognition_schema(bot_content, bot, user) + self.__save_cognition_data(bot_content, bot, user) + def load_linear_flows_from_multiflow_stories( self, bot: Text ) -> (StoryGraph, StoryGraph): diff --git a/kairon/shared/data/utils.py b/kairon/shared/data/utils.py index d3b2cabb9..98d5bbc57 100644 --- a/kairon/shared/data/utils.py +++ b/kairon/shared/data/utils.py @@ -16,7 +16,8 @@ from .constant import ALLOWED_NLU_FORMATS, ALLOWED_STORIES_FORMATS, \ ALLOWED_DOMAIN_FORMATS, ALLOWED_CONFIG_FORMATS, EVENT_STATUS, ALLOWED_RULES_FORMATS, ALLOWED_ACTIONS_FORMATS, \ - REQUIREMENTS, ACCESS_ROLES, TOKEN_TYPE, ALLOWED_CHAT_CLIENT_CONFIG_FORMATS, ALLOWED_MULTIFLOW_STORIES_FORMATS + REQUIREMENTS, ACCESS_ROLES, TOKEN_TYPE, ALLOWED_CHAT_CLIENT_CONFIG_FORMATS, ALLOWED_MULTIFLOW_STORIES_FORMATS, \ + ALLOWED_BOT_CONTENT_FORMATS from .constant import RESPONSE from .data_objects import MultiflowStories from .training_data_generation_processor import TrainingDataGenerationProcessor @@ -66,7 +67,7 @@ async def save_uploaded_data(bot: Text, training_files: [File]): path = os.path.join(data_path, file.filename) Utility.write_to_file(path, await file.read()) elif file.filename in ALLOWED_CONFIG_FORMATS.union(ALLOWED_DOMAIN_FORMATS).union( - ALLOWED_ACTIONS_FORMATS, ALLOWED_CHAT_CLIENT_CONFIG_FORMATS, ALLOWED_MULTIFLOW_STORIES_FORMATS): + ALLOWED_ACTIONS_FORMATS, ALLOWED_CHAT_CLIENT_CONFIG_FORMATS, ALLOWED_MULTIFLOW_STORIES_FORMATS, ALLOWED_BOT_CONTENT_FORMATS): path = os.path.join(bot_data_home_dir, file.filename) Utility.write_to_file(path, await file.read()) @@ -123,6 +124,8 @@ def validate_and_get_requirements(bot_data_home_dir: Text, delete_dir_on_excepti requirements.add('chat_client_config') if ALLOWED_MULTIFLOW_STORIES_FORMATS.intersection(files_received).__len__() < 1: requirements.add('multiflow_stories') + if ALLOWED_BOT_CONTENT_FORMATS.intersection(files_received).__len__() < 1: + requirements.add('bot_content') if requirements == REQUIREMENTS: if delete_dir_on_exception: @@ -132,7 +135,7 @@ def validate_and_get_requirements(bot_data_home_dir: Text, delete_dir_on_excepti @staticmethod async def save_training_files(nlu: File, domain: File, config: File, stories: File, rules: File = None, - http_action: File = None, multiflow_stories: File = None): + http_action: File = None, multiflow_stories: File = None, bot_content: File = None): """ convert mongo data to individual files @@ -142,7 +145,8 @@ async def save_training_files(nlu: File, domain: File, config: File, stories: Fi :param config: config data :param rules: rules data :param http_action: http actions data - param multiflow_stories: multiflow_stories data + :param multiflow_stories: multiflow_stories data + :param bot_content: bot_content data :return: files path """ from rasa.shared.constants import DEFAULT_DATA_PATH @@ -165,6 +169,8 @@ async def save_training_files(nlu: File, domain: File, config: File, stories: Fi training_file_loc['http_action'] = await DataUtility.write_http_data(tmp_dir, http_action) training_file_loc['multiflow_stories'] = await DataUtility.write_multiflow_stories_data(tmp_dir, multiflow_stories) + training_file_loc['bot_content'] = await DataUtility.write_bot_content_data(tmp_dir, + bot_content) training_file_loc['nlu'] = nlu_path training_file_loc['config'] = config_path training_file_loc['stories'] = stories_path @@ -213,6 +219,19 @@ async def write_multiflow_stories_data(data_path: str, multiflow_stories: File = Utility.write_to_file(multiflow_stories_path, await multiflow_stories.read()) return multiflow_stories_path + @staticmethod + async def write_bot_content_data(temp_path: str, bot_content: File = None): + """ + writes the bot content data to file and returns the file path + :param temp_path: path of the data files + :param bot_content: bot_content data + :return: bot_content file path + """ + if bot_content and bot_content.filename: + bot_content_path: str = os.path.join(temp_path, bot_content.filename) + Utility.write_to_file(bot_content_path, await bot_content.read()) + return bot_content_path + @staticmethod def extract_text_and_entities(text: Text): """ diff --git a/kairon/shared/importer/data_objects.py b/kairon/shared/importer/data_objects.py index 9e4b0150d..e143a1b51 100644 --- a/kairon/shared/importer/data_objects.py +++ b/kairon/shared/importer/data_objects.py @@ -31,6 +31,7 @@ class ValidationLogs(DynamicDocument): rules = EmbeddedDocumentField(TrainingComponentLog, default=TrainingComponentLog) actions = ListField() multiflow_stories = EmbeddedDocumentField(TrainingComponentLog, default=TrainingComponentLog) + bot_content = EmbeddedDocumentField(TrainingComponentLog, default=TrainingComponentLog) user_actions = EmbeddedDocumentField(TrainingComponentLog, default=TrainingComponentLog) exception = StringField(default="") is_data_uploaded = BooleanField(default=False) diff --git a/kairon/shared/importer/processor.py b/kairon/shared/importer/processor.py index b43ccbb81..df014f1fd 100644 --- a/kairon/shared/importer/processor.py +++ b/kairon/shared/importer/processor.py @@ -7,7 +7,6 @@ from kairon.shared.data.constant import EVENT_STATUS from kairon.exceptions import AppException from kairon.shared.data.data_objects import BotSettings -from kairon.shared.utils import Utility from kairon.shared.importer.data_objects import ValidationLogs, TrainingComponentLog, DomainLog @@ -85,6 +84,7 @@ def update_summary(bot: str, user: str, component_count: dict, summary: dict, st summary.keys() if s in {f'{a_type.value}s' for a_type in ActionType}] doc.multiflow_stories = TrainingComponentLog(count=component_count.get('multiflow_stories'), data=summary.get('multiflow_stories')) + doc.bot_content = TrainingComponentLog(data=summary.get('bot_content')) doc.user_actions = TrainingComponentLog(count=component_count.get('user_actions'), data=summary.get('user_actions')) doc.actions = action_summary diff --git a/kairon/shared/utils.py b/kairon/shared/utils.py index 37d3964aa..9392f6fda 100644 --- a/kairon/shared/utils.py +++ b/kairon/shared/utils.py @@ -1108,14 +1108,15 @@ def is_data_import_allowed(summary: dict, bot: Text, user: Text): @staticmethod def write_training_data( - nlu, - domain, - config: dict, - stories, - rules=None, - actions: dict = None, - chat_client_config: dict = None, - multiflow_stories: dict = None, + nlu, + domain, + config: dict, + stories, + rules=None, + actions: dict = None, + chat_client_config: dict = None, + multiflow_stories: dict = None, + bot_content: list = None, ): """ convert mongo data to individual files @@ -1128,6 +1129,7 @@ def write_training_data( :param rules: rules data :param actions: action configuration data :param multiflow_stories: multiflow_stories configurations + :param bot_content: bot content :return: files path """ from rasa.shared.core.training_data.story_writer.yaml_story_writer import ( @@ -1151,7 +1153,7 @@ def write_training_data( actions_path = os.path.join(temp_path, "actions.yml") chat_client_config_path = os.path.join(temp_path, "chat_client_config.yml") multiflow_stories_config_path = os.path.join(temp_path, "multiflow_stories.yml") - + bot_content_path = os.path.join(temp_path, "bot_content.yml") nlu_as_str = nlu.nlu_as_yaml().encode() config_as_str = yaml.dump(config).encode() @@ -1176,19 +1178,24 @@ def write_training_data( Utility.write_to_file( multiflow_stories_config_path, multiflow_stories_as_str ) + bot_content_as_str = yaml.dump(bot_content).encode() + Utility.write_to_file( + bot_content_path, bot_content_as_str + ) return temp_path @staticmethod def create_zip_file( - nlu, - domain, - stories, - config: Dict, - bot: Text, - rules=None, - actions: Dict = None, - multiflow_stories: Dict = None, - chat_client_config: Dict = None, + nlu, + domain, + stories, + config: Dict, + bot: Text, + rules=None, + actions: Dict = None, + multiflow_stories: Dict = None, + chat_client_config: Dict = None, + bot_content: List = None, ): """ adds training files to zip @@ -1202,6 +1209,7 @@ def create_zip_file( :param rules: rules data :param actions: action configurations :param multiflow_stories: multiflow_stories configurations + :param bot_content: bot_content :return: None """ directory = Utility.write_training_data( @@ -1213,6 +1221,7 @@ def create_zip_file( actions, chat_client_config, multiflow_stories, + bot_content, ) zip_path = os.path.join(tempfile.gettempdir(), bot) zip_file = shutil.make_archive(zip_path, format="zip", root_dir=directory) diff --git a/requirements/dev.txt b/requirements/dev.txt index 0943e58a7..d411ab022 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -13,3 +13,4 @@ pytest-cov==5.0.0 pytest-html==4.1.1 pytest-aioresponses==0.2.0 aioresponses==0.7.6 +pykwalify==1.8.0 \ No newline at end of file diff --git a/requirements/prod.txt b/requirements/prod.txt index 1f6251416..99d38795a 100644 --- a/requirements/prod.txt +++ b/requirements/prod.txt @@ -62,4 +62,5 @@ opentelemetry-instrumentation-system-metrics==0.45b0 opentelemetry-instrumentation-starlette==0.45b0 opentelemetry-instrumentation-grpc==0.45b0 +pykwalify==1.8.0 diff --git a/tests/integration_test/services_test.py b/tests/integration_test/services_test.py index 35bfa672d..bf4577282 100644 --- a/tests/integration_test/services_test.py +++ b/tests/integration_test/services_test.py @@ -33,6 +33,7 @@ from kairon.shared.actions.utils import ActionUtility from kairon.shared.auth import Authentication from kairon.shared.cloud.utils import CloudUtility +from kairon.shared.cognition.data_objects import CognitionSchema, CognitionData from kairon.shared.constants import EventClass, ChannelTypes from kairon.shared.data.audit.data_objects import AuditLogData from kairon.shared.data.constant import ( @@ -106,8 +107,9 @@ def complete_end_to_end_event_execution(bot, user, event_class, **kwargs): from kairon.events.definitions.model_testing import ModelTestingEvent from kairon.events.definitions.history_delete import DeleteHistoryEvent + overwrite = kwargs.get('overwrite', True) if event_class == EventClass.data_importer: - TrainingDataImporterEvent(bot, user, import_data=True, overwrite=True).execute() + TrainingDataImporterEvent(bot, user, import_data=True, overwrite=overwrite).execute() elif event_class == EventClass.model_training: ModelTrainingEvent(bot, user).execute() elif event_class == EventClass.model_testing: @@ -497,6 +499,7 @@ def test_account_registation_invalid_email(): } ] + @responses.activate def test_account_registation_invalid_email_quick_email_valid(): email = "test@temporay.com" @@ -645,7 +648,7 @@ def test_api_wrong_password(): assert not actual["success"] assert actual["message"] == "Incorrect username or password" value = list(AuditLogData.objects(user="integration@demo.ai", action='activity', entity='invalid_login')) - + assert value[0]["entity"] == "invalid_login" assert value[0]["timestamp"] assert len(value) == 1 @@ -1123,6 +1126,428 @@ def test_list_bots(): assert response["data"]["account_owned"][1]["_id"] assert response["data"]["shared"] == [] + +def test_get_client_config_with_nudge_server_url(): + expected_app_server_url = Utility.environment['app']['server_url'] + expected_nudge_server_url = Utility.environment['nudge']['server_url'] + expected_chat_server_url = Utility.environment['model']['agent']['url'] + + response = client.get(f"/api/bot/{pytest.bot}/chat/client/config", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}) + actual = response.json() + assert actual["success"] + assert actual["error_code"] == 0 + assert actual["data"] + assert actual["data"]["welcomeMessage"] == 'Hello! How are you?' + assert actual["data"]["name"] == 'kairon' + assert actual["data"]["buttonType"] == 'button' + assert actual["data"]["whitelist"] == ["*"] + assert actual["data"]["nudge_server_url"] == expected_nudge_server_url + assert actual["data"]["api_server_host_url"] == expected_app_server_url + assert actual["data"]["chat_server_base_url"] == expected_chat_server_url + + +@responses.activate +def test_upload_with_bot_content_only_validate_content_data(): + bot_settings = BotSettings.objects(bot=pytest.bot).get() + bot_settings.data_importer_limit_per_day = 10 + bot_settings.llm_settings['enable_faq'] = True + bot_settings.save() + + event_url = urljoin( + Utility.environment["events"]["server_url"], + f"/api/events/execute/{EventClass.data_importer}", + ) + responses.add( + "POST", + event_url, + json={"success": True, "message": "Event triggered successfully!"}, + ) + + files = ( + ( + "training_files", + ( + "bot_content.yml", + open("tests/testing_data/all/bot_content.yml", "rb"), + ), + ), + ) + response = client.post( + f"/api/bot/{pytest.bot}/upload?import_data=true&overwrite=true", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + files=files, + ) + actual = response.json() + assert actual["message"] == "Upload in progress! Check logs." + assert actual["error_code"] == 0 + assert actual["data"] is None + assert actual["success"] + + complete_end_to_end_event_execution( + pytest.bot, "integration@demo.ai", EventClass.data_importer + ) + + response = client.get( + f"/api/bot/{pytest.bot}/importer/logs?start_idx=0&page_size=10", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + ) + actual = response.json() + assert actual["success"] + assert actual["error_code"] == 0 + assert actual["data"]["logs"][0]["event_status"] == EVENT_STATUS.COMPLETED.value + assert set(actual["data"]["logs"][0]["files_received"]) == {"bot_content"} + assert actual["data"]["logs"][0]["is_data_uploaded"] + assert actual["data"]["logs"][0]["start_timestamp"] + assert actual["data"]["logs"][0]["end_timestamp"] + + response = client.get( + f"/api/bot/{pytest.bot}/data/cognition", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + ) + actual = response.json() + print(actual) + + assert actual["success"] + assert actual["error_code"] == 0 + assert "data" in actual + assert "rows" in actual["data"] + assert len(actual["data"]["rows"]) == 3 + assert all(row["content_type"] == "text" for row in actual["data"]["rows"]) + assert actual["data"]["rows"][0]["data"] == "I am testing upload download bot content in Default Collection 3" + assert all(row["collection"] is None for row in actual["data"]["rows"]) + + CognitionData.objects(bot=pytest.bot).delete() + CognitionSchema.objects(bot=pytest.bot).delete() + + bot_settings = BotSettings.objects(bot=pytest.bot).get() + bot_settings.llm_settings['enable_faq'] = False + bot_settings.save() + + +@responses.activate +def test_upload_with_bot_content_valifdate_payload_data(): + bot_settings = BotSettings.objects(bot=pytest.bot).get() + bot_settings.data_importer_limit_per_day = 10 + bot_settings.llm_settings['enable_faq'] = True + bot_settings.save() + + event_url = urljoin( + Utility.environment["events"]["server_url"], + f"/api/events/execute/{EventClass.data_importer}", + ) + responses.add( + "POST", + event_url, + json={"success": True, "message": "Event triggered successfully!"}, + ) + + files = ( + ( + "training_files", + ("nlu.yml", open("template/use-cases/Hi-Hello/data/nlu.yml", "rb")), + ), + ( + "training_files", + ("domain.yml", open("template/use-cases/Hi-Hello/domain.yml", "rb")), + ), + ( + "training_files", + ("stories.yml", open("template/use-cases/Hi-Hello/data/stories.yml", "rb")), + ), + ( + "training_files", + ("config.yml", open("template/use-cases/Hi-Hello/config.yml", "rb")), + ), + ( + "training_files", + ( + "chat_client_config.yml", + open("tests/testing_data/all/chat_client_config.yml", "rb"), + ), + ), + ( + "training_files", + ( + "bot_content.yml", + open("tests/testing_data/all/bot_content.yml", "rb"), + ), + ), + ) + response = client.post( + f"/api/bot/{pytest.bot}/upload?import_data=true&overwrite=true", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + files=files, + ) + actual = response.json() + assert actual["message"] == "Upload in progress! Check logs." + assert actual["error_code"] == 0 + assert actual["data"] is None + assert actual["success"] + + complete_end_to_end_event_execution( + pytest.bot, "integration@demo.ai", EventClass.data_importer + ) + + response = client.get( + f"/api/bot/{pytest.bot}/importer/logs?start_idx=0&page_size=10", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + ) + actual = response.json() + assert actual["success"] + assert actual["error_code"] == 0 + assert actual["data"]["logs"][0]["event_status"] == EVENT_STATUS.COMPLETED.value + assert actual["data"]["logs"][0]["is_data_uploaded"] + assert actual["data"]["logs"][0]["start_timestamp"] + assert actual["data"]["logs"][0]["end_timestamp"] + + response = client.get( + f"/api/bot/{pytest.bot}/data/cognition", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + params={"collection": "test_payload_collection"} + + ) + actual = response.json() + print(actual) + + assert actual["success"] + assert actual["error_code"] == 0 + assert "data" in actual + assert "rows" in actual["data"] + assert len(actual["data"]["rows"]) == 3 + assert all(row["content_type"] == "json" for row in actual["data"]["rows"]) + assert actual["data"]["rows"][0]["data"]["city"] == "City 3" + assert actual["data"]["rows"][1]["data"]["population"] == "200" + assert all(row["collection"] == "test_payload_collection" for row in actual["data"]["rows"]) + + CognitionData.objects(bot=pytest.bot).delete() + CognitionSchema.objects(bot=pytest.bot).delete() + + bot_settings = BotSettings.objects(bot=pytest.bot).get() + bot_settings.llm_settings['enable_faq'] = False + bot_settings.save() + + +@responses.activate +def test_upload_with_bot_content_using_event_append_validate_content_data(): + bot_settings = BotSettings.objects(bot=pytest.bot).get() + bot_settings.data_importer_limit_per_day = 10 + bot_settings.llm_settings['enable_faq'] = True + bot_settings.save() + + event_url = urljoin( + Utility.environment["events"]["server_url"], + f"/api/events/execute/{EventClass.data_importer}", + ) + responses.add( + "POST", + event_url, + json={"success": True, "message": "Event triggered successfully!"}, + ) + + files = ( + ( + "training_files", + ("nlu.yml", open("template/use-cases/Hi-Hello/data/nlu.yml", "rb")), + ), + ( + "training_files", + ("domain.yml", open("template/use-cases/Hi-Hello/domain.yml", "rb")), + ), + ( + "training_files", + ("stories.yml", open("template/use-cases/Hi-Hello/data/stories.yml", "rb")), + ), + ( + "training_files", + ("config.yml", open("template/use-cases/Hi-Hello/config.yml", "rb")), + ), + ( + "training_files", + ( + "chat_client_config.yml", + open("tests/testing_data/all/chat_client_config.yml", "rb"), + ), + ), + ( + "training_files", + ( + "bot_content.yml", + open("tests/testing_data/all/bot_content.yml", "rb"), + ), + ), + ) + response = client.post( + f"/api/bot/{pytest.bot}/upload?import_data=true&overwrite=true", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + files=files, + ) + complete_end_to_end_event_execution( + pytest.bot, "integration@demo.ai", EventClass.data_importer + ) + + response = client.post( + f"/api/bot/{pytest.bot}/upload?import_data=true&overwrite=false", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + files=files, + ) + + actual = response.json() + assert actual["message"] == "Upload in progress! Check logs." + assert actual["error_code"] == 0 + assert actual["data"] is None + assert actual["success"] + + complete_end_to_end_event_execution( + pytest.bot, "integration@demo.ai", EventClass.data_importer, overwrite=False + ) + + response = client.get( + f"/api/bot/{pytest.bot}/importer/logs?start_idx=0&page_size=10", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + ) + actual = response.json() + assert actual["success"] + assert actual["error_code"] == 0 + assert actual["data"]["logs"][0]["event_status"] == EVENT_STATUS.COMPLETED.value + assert actual["data"]["logs"][0]["is_data_uploaded"] + assert actual["data"]["logs"][0]["start_timestamp"] + assert actual["data"]["logs"][0]["end_timestamp"] + + response = client.get( + f"/api/bot/{pytest.bot}/data/cognition", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + params={"collection": "test_content_collection"} + ) + actual = response.json() + + assert actual["success"] + assert actual["error_code"] == 0 + assert "data" in actual + assert "rows" in actual["data"] + assert len(actual["data"]["rows"]) == 4 + assert all(row["content_type"] == "text" for row in actual["data"]["rows"]) + assert actual["data"]["rows"][2]["data"] == "I am testing upload download bot content in Content Collection 2" + assert all(row["collection"] == "test_content_collection" for row in actual["data"]["rows"]) + + CognitionData.objects(bot=pytest.bot).delete() + CognitionSchema.objects(bot=pytest.bot).delete() + + bot_settings = BotSettings.objects(bot=pytest.bot).get() + bot_settings.llm_settings['enable_faq'] = False + bot_settings.save() + + +@responses.activate +def test_upload_with_bot_content_event_append_validate_payload_data(): + bot_settings = BotSettings.objects(bot=pytest.bot).get() + bot_settings.data_importer_limit_per_day = 10 + bot_settings.llm_settings['enable_faq'] = True + bot_settings.save() + + event_url = urljoin( + Utility.environment["events"]["server_url"], + f"/api/events/execute/{EventClass.data_importer}", + ) + responses.add( + "POST", + event_url, + json={"success": True, "message": "Event triggered successfully!"}, + ) + + files = ( + ( + "training_files", + ("nlu.yml", open("template/use-cases/Hi-Hello/data/nlu.yml", "rb")), + ), + ( + "training_files", + ("domain.yml", open("template/use-cases/Hi-Hello/domain.yml", "rb")), + ), + ( + "training_files", + ("stories.yml", open("template/use-cases/Hi-Hello/data/stories.yml", "rb")), + ), + ( + "training_files", + ("config.yml", open("template/use-cases/Hi-Hello/config.yml", "rb")), + ), + ( + "training_files", + ( + "chat_client_config.yml", + open("tests/testing_data/all/chat_client_config.yml", "rb"), + ), + ), + ( + "training_files", + ( + "bot_content.yml", + open("tests/testing_data/all/bot_content.yml", "rb"), + ), + ), + ) + response = client.post( + f"/api/bot/{pytest.bot}/upload?import_data=true&overwrite=true", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + files=files, + ) + complete_end_to_end_event_execution( + pytest.bot, "integration@demo.ai", EventClass.data_importer + ) + + response = client.post( + f"/api/bot/{pytest.bot}/upload?import_data=true&overwrite=false", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + files=files, + ) + actual = response.json() + assert actual["message"] == "Upload in progress! Check logs." + assert actual["error_code"] == 0 + assert actual["data"] is None + assert actual["success"] + + complete_end_to_end_event_execution( + pytest.bot, "integration@demo.ai", EventClass.data_importer, overwrite=False + ) + + response = client.get( + f"/api/bot/{pytest.bot}/importer/logs?start_idx=0&page_size=10", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + ) + actual = response.json() + assert actual["success"] + assert actual["error_code"] == 0 + assert actual["data"]["logs"][0]["event_status"] == EVENT_STATUS.COMPLETED.value + assert actual["data"]["logs"][0]["is_data_uploaded"] + assert actual["data"]["logs"][0]["start_timestamp"] + assert actual["data"]["logs"][0]["end_timestamp"] + + response = client.get( + f"/api/bot/{pytest.bot}/data/cognition", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + params={"collection": "test_payload_collection"} + ) + actual = response.json() + + assert actual["success"] + assert actual["error_code"] == 0 + assert "data" in actual + assert "rows" in actual["data"] + assert len(actual["data"]["rows"]) == 6 + assert all(row["content_type"] == "json" for row in actual["data"]["rows"]) + assert actual["data"]["rows"][4]["data"]["city"] == "City 2" + assert actual["data"]["rows"][1]["data"]["population"] == "200" + assert all(row["collection"] == "test_payload_collection" for row in actual["data"]["rows"]) + + CognitionData.objects(bot=pytest.bot).delete() + CognitionSchema.objects(bot=pytest.bot).delete() + + bot_settings = BotSettings.objects(bot=pytest.bot).get() + bot_settings.llm_settings['enable_faq'] = False + bot_settings.save() + + def test_get_live_agent_with_no_live_agent(): response = client.get( url=f"/api/bot/{pytest.bot}/action/live_agent", @@ -1205,6 +1630,7 @@ def test_update_live_agent(): assert actual["message"] == 'Action updated!' assert actual["success"] + def test_get_live_agent_after_updated(): response = client.get( url=f"/api/bot/{pytest.bot}/action/live_agent", @@ -1523,26 +1949,6 @@ def test_list_pyscript_actions_after_action_deleted(): assert actual["data"][0]["dispatch_response"] -def test_get_client_config_with_nudge_server_url(): - expected_app_server_url = Utility.environment['app']['server_url'] - expected_nudge_server_url = Utility.environment['nudge']['server_url'] - expected_chat_server_url = Utility.environment['model']['agent']['url'] - - response = client.get(f"/api/bot/{pytest.bot}/chat/client/config", - headers={"Authorization": pytest.token_type + " " + pytest.access_token}) - actual = response.json() - assert actual["success"] - assert actual["error_code"] == 0 - assert actual["data"] - assert actual["data"]["welcomeMessage"] == 'Hello! How are you?' - assert actual["data"]["name"] == 'kairon' - assert actual["data"]["buttonType"] == 'button' - assert actual["data"]["whitelist"] == ["*"] - assert actual["data"]["nudge_server_url"] == expected_nudge_server_url - assert actual["data"]["api_server_host_url"] == expected_app_server_url - assert actual["data"]["chat_server_base_url"] == expected_chat_server_url - - def test_get_client_config_url_with_ip_info(monkeypatch): monkeypatch.setitem( Utility.environment["model"]["agent"], "url", "http://localhost" @@ -3751,10 +4157,12 @@ def _mock_get_bot_settings(*args, **kwargs): {'name': 'Similarity Prompt', 'data': 'default', 'instructions': 'Answer question based on the context above, if answer is not in the context go check previous logs.', 'type': 'user', 'source': 'bot_content', 'is_enabled': True}, - {'name': 'Query Prompt', 'data': 'A programming language is a system of notation for writing computer programs.[1] Most programming languages are text-based formal languages, but they may also be graphical. They are a kind of computer language.', + {'name': 'Query Prompt', + 'data': 'A programming language is a system of notation for writing computer programs.[1] Most programming languages are text-based formal languages, but they may also be graphical. They are a kind of computer language.', 'instructions': 'Answer according to the context', 'type': 'query', 'source': 'static', 'is_enabled': True}, - {'name': 'Query Prompt', 'data': 'If there is no specific query, assume that user is aking about java programming.', + {'name': 'Query Prompt', + 'data': 'If there is no specific query, assume that user is aking about java programming.', 'instructions': 'Answer according to the context', 'type': 'query', 'source': 'static', 'is_enabled': True}], 'instructions': ['Answer in a short manner.', 'Keep it simple.'], 'set_slots': [], @@ -3837,10 +4245,12 @@ def _mock_get_bot_settings(*args, **kwargs): {'name': 'Similarity Prompt', 'data': 'states', 'instructions': 'Answer question based on the context above, if answer is not in the context go check previous logs.', 'type': 'user', 'source': 'bot_content', 'is_enabled': True}, - {'name': 'Query Prompt', 'data': 'A programming language is a system of notation for writing computer programs.[1] Most programming languages are text-based formal languages, but they may also be graphical. They are a kind of computer language.', + {'name': 'Query Prompt', + 'data': 'A programming language is a system of notation for writing computer programs.[1] Most programming languages are text-based formal languages, but they may also be graphical. They are a kind of computer language.', 'instructions': 'Answer according to the context', 'type': 'query', 'source': 'static', 'is_enabled': True}, - {'name': 'Query Prompt', 'data': 'If there is no specific query, assume that user is aking about java programming.', + {'name': 'Query Prompt', + 'data': 'If there is no specific query, assume that user is aking about java programming.', 'instructions': 'Answer according to the context', 'type': 'query', 'source': 'static', 'is_enabled': True}], 'instructions': ['Answer in a short manner.', 'Keep it simple.'], 'set_slots': [], 'dispatch_response': True, 'status': True} @@ -3850,7 +4260,6 @@ def _mock_get_bot_settings(*args, **kwargs): def test_add_prompt_action_with_bot_content_prompt_with_content(monkeypatch): - def _mock_get_bot_settings(*args, **kwargs): return BotSettings(bot=pytest.bot, user="integration@demo.ai", llm_settings=LLMSettings(enable_faq=True), cognition_collections_limit=5) @@ -3927,10 +4336,12 @@ def _mock_get_bot_settings(*args, **kwargs): {'name': 'Similarity Prompt', 'data': 'python', 'instructions': 'Answer question based on the context above, if answer is not in the context go check previous logs.', 'type': 'user', 'source': 'bot_content', 'is_enabled': True}, - {'name': 'Query Prompt', 'data': 'A programming language is a system of notation for writing computer programs.[1] Most programming languages are text-based formal languages, but they may also be graphical. They are a kind of computer language.', + {'name': 'Query Prompt', + 'data': 'A programming language is a system of notation for writing computer programs.[1] Most programming languages are text-based formal languages, but they may also be graphical. They are a kind of computer language.', 'instructions': 'Answer according to the context', 'type': 'query', 'source': 'static', 'is_enabled': True}, - {'name': 'Query Prompt', 'data': 'If there is no specific query, assume that user is aking about java programming.', + {'name': 'Query Prompt', + 'data': 'If there is no specific query, assume that user is aking about java programming.', 'instructions': 'Answer according to the context', 'type': 'query', 'source': 'static', 'is_enabled': True}], 'instructions': ['Answer in a short manner.', 'Keep it simple.'], 'set_slots': [], @@ -4081,6 +4492,13 @@ def test_upload(): open("tests/testing_data/all/chat_client_config.yml", "rb"), ), ), + ( + "training_files", + ( + "bot_content.yml", + open("tests/testing_data/all/bot_content.yml", "rb"), + ), + ), ) response = client.post( f"/api/bot/{pytest.bot}/upload?import_data=true&overwrite=true", @@ -4227,7 +4645,7 @@ def test_upload_limit_exceeded(monkeypatch): @responses.activate def test_upload_using_event_failure(monkeypatch): bot_settings = BotSettings.objects(bot=pytest.bot).get() - bot_settings.data_importer_limit_per_day = 5 + bot_settings.data_importer_limit_per_day = 15 bot_settings.save() event_url = urljoin( Utility.environment["events"]["server_url"], @@ -4292,6 +4710,10 @@ def test_upload_using_event_failure(monkeypatch): @responses.activate def test_upload_using_event_append(monkeypatch): + bot_settings = BotSettings.objects(bot=pytest.bot).get() + bot_settings.data_importer_limit_per_day = 15 + bot_settings.save() + event_url = urljoin( Utility.environment["events"]["server_url"], f"/api/events/execute/{EventClass.data_importer}", @@ -4504,8 +4926,8 @@ def test_get_data_importer_logs(): actual = response.json() assert actual["success"] assert actual["error_code"] == 0 - assert len(actual["data"]["logs"]) == 4 - assert actual["data"]["total"] == 4 + assert len(actual["data"]["logs"]) == 10 + assert actual["data"]["total"] == 10 assert actual["data"]["logs"][0]["event_status"] == EVENT_STATUS.COMPLETED.value assert set(actual["data"]["logs"][0]["files_received"]) == { "stories", @@ -4546,6 +4968,7 @@ def test_get_data_importer_logs(): {'type': 'razorpay_actions', 'count': 0, 'data': []}, {'type': 'pyscript_actions', 'count': 0, 'data': []}], 'multiflow_stories': {'count': 0, 'data': []}, + 'bot_content': {'count': 0, 'data': []}, 'user_actions': {'count': 7, 'data': []}, 'exception': '', 'is_data_uploaded': True, @@ -4553,7 +4976,7 @@ def test_get_data_importer_logs(): assert actual['data']["logs"][2]['event_status'] == EVENT_STATUS.COMPLETED.value assert actual['data']["logs"][2]['status'] == 'Failure' assert set(actual['data']["logs"][2]['files_received']) == {'stories', 'nlu', 'domain', 'config', - 'chat_client_config'} + 'chat_client_config', 'bot_content'} assert actual['data']["logs"][2]['is_data_uploaded'] assert actual['data']["logs"][2]['start_timestamp'] assert actual['data']["logs"][2]['end_timestamp'] @@ -4561,7 +4984,7 @@ def test_get_data_importer_logs(): assert actual['data']["logs"][3]['event_status'] == EVENT_STATUS.COMPLETED.value assert actual['data']["logs"][3]['status'] == 'Failure' assert set(actual['data']["logs"][3]['files_received']) == {'rules', 'stories', 'nlu', 'domain', 'config', - 'actions', 'chat_client_config', 'multiflow_stories'} + 'actions', 'chat_client_config', 'multiflow_stories', 'bot_content'} assert actual['data']["logs"][3]['is_data_uploaded'] assert actual['data']["logs"][3]['start_timestamp'] assert actual['data']["logs"][3]['end_timestamp'] @@ -4593,7 +5016,7 @@ def test_get_data_importer_logs(): ] assert actual['data']["logs"][3]['is_data_uploaded'] assert set(actual['data']["logs"][3]['files_received']) == {'rules', 'stories', 'nlu', 'config', 'domain', - 'actions', 'chat_client_config', 'multiflow_stories'} + 'actions', 'chat_client_config', 'multiflow_stories','bot_content'} @responses.activate @@ -4760,11 +5183,12 @@ def test_download_data_with_chat_client_config(): ) file_bytes = BytesIO(response.content) zip_file = ZipFile(file_bytes, mode="r") - assert zip_file.filelist.__len__() == 9 + assert zip_file.filelist.__len__() == 10 assert zip_file.getinfo("chat_client_config.yml") assert zip_file.getinfo("config.yml") assert zip_file.getinfo("domain.yml") assert zip_file.getinfo("actions.yml") + assert zip_file.getinfo("bot_content.yml") assert zip_file.getinfo("multiflow_stories.yml") assert zip_file.getinfo("data/stories.yml") assert zip_file.getinfo("data/rules.yml") @@ -7903,7 +8327,7 @@ def test_download_data(): ) file_bytes = BytesIO(response.content) zip_file = ZipFile(file_bytes, mode="r") - assert zip_file.filelist.__len__() == 9 + assert zip_file.filelist.__len__() == 10 zip_file.close() file_bytes.close() @@ -8422,7 +8846,8 @@ def test_account_registration_with_confirmation(monkeypatch): response = client.post( "/api/account/bot", - headers={"Authorization": pytest.add_member_token_type + " " + pytest.add_member_token, 'Content-Type': 'application/json'}, + headers={"Authorization": pytest.add_member_token_type + " " + pytest.add_member_token, + 'Content-Type': 'application/json'}, json={"name": "Hi-Hello", "from_template": "Hi-Hello"}, ).json() assert response['message'] == "Bot created" @@ -14501,7 +14926,6 @@ def test_add_form(): assert actual["message"] == "Slot mapping added" assert actual["success"] - response = client.post( f"/api/bot/{pytest.bot}/slots", json={"name": "preferences", "type": "text"}, @@ -14611,9 +15035,9 @@ def test_add_form_with_any_slot(): json={ "slot": "user_feedback", "mapping": - {"type": "from_entity", "entity": "user_feedback"}, - }, - headers = {"Authorization": pytest.token_type + " " + pytest.access_token}, + {"type": "from_entity", "entity": "user_feedback"}, + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, ) actual = response.json() assert actual["message"] == "Slot mapping added" @@ -15602,9 +16026,9 @@ def test_edit_form_with_any_slot(): json={ "slot": "account_required", "mapping": - {"type": "from_intent", "intent": ["deny"], "value": False}, - }, - headers = {"Authorization": pytest.token_type + " " + pytest.access_token}, + {"type": "from_intent", "intent": ["deny"], "value": False}, + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, ) actual = response.json() assert actual["message"] == "Slot mapping added" @@ -15680,8 +16104,6 @@ def test_edit_form_with_any_slot(): def test_edit_slot_mapping(): - - response = client.post( f"/api/bot/{pytest.bot}/slots/mapping", json={ @@ -15713,7 +16135,6 @@ def test_edit_slot_mapping(): assert actual["error_code"] == 0 - def test_get_slot_mapping(): response = client.get( f"/api/bot/{pytest.bot}/slots/mapping", @@ -15725,7 +16146,6 @@ def test_get_slot_mapping(): for m in obj['mapping']: m.pop('_id') - assert not DeepDiff( actual["data"], [{'slot': 'ac_required', @@ -15733,7 +16153,7 @@ def test_get_slot_mapping(): {'type': 'from_intent', 'value': False, 'intent': ['deny']}]}, {'slot': 'account_required', 'mapping': [{'type': 'from_intent', 'value': True, 'intent': ['affirm']}, - {'type': 'from_intent', 'value': False, 'intent': ['deny'] }]}, + {'type': 'from_intent', 'value': False, 'intent': ['deny']}]}, {'slot': 'age', 'mapping': [ {'type': 'from_intent', 'value': '18', 'intent': ['get_age'], }]}, {'slot': 'cuisine', @@ -15741,11 +16161,11 @@ def test_get_slot_mapping(): {'type': 'from_intent', 'value': 'cuisine', 'intent': ['order', 'menu'], }]}, {'slot': 'feedback', 'mapping': [ - {'type': 'from_entity', 'entity': 'feedback'}]}, {'slot': 'location', - 'mapping': [{ - 'type': 'from_entity', - 'entity': 'location', - }]}, + {'type': 'from_entity', 'entity': 'feedback'}]}, {'slot': 'location', + 'mapping': [{ + 'type': 'from_entity', + 'entity': 'location', + }]}, {'slot': 'name', 'mapping': [{'type': 'from_text', 'value': 'user'}]}, {'slot': 'num_people', 'mapping': [ {'type': 'from_entity', 'entity': 'number', 'intent': ['inform', 'request_restaurant'], @@ -15757,14 +16177,14 @@ def test_get_slot_mapping(): {'type': 'from_trigger_intent', 'value': 'tester', 'intent': ['get_business', 'is_engineer', 'is_tester'], 'not_intent': ['get_age', 'get_name']}]}, {'slot': 'outdoor_seating', - 'mapping': [ - {'type': 'from_text', - 'not_intent': [ - 'affirm'], - 'conditions': [{ - 'active_loop': 'booking', - 'requested_slot': 'outdoor_seating'}], - }]}, + 'mapping': [ + {'type': 'from_text', + 'not_intent': [ + 'affirm'], + 'conditions': [{ + 'active_loop': 'booking', + 'requested_slot': 'outdoor_seating'}], + }]}, {'slot': 'preferences', 'mapping': [{'type': 'from_text', 'not_intent': ['affirm'], 'conditions': [ {'active_loop': 'booking', 'requested_slot': 'preferences'}]}]}, {'slot': 'user_feedback', 'mapping': [{'type': 'from_text'}, @@ -15783,7 +16203,12 @@ def test_get_slot_mapping(): for m in obj['mapping']: m.pop('_id') assert actual["success"] - assert actual["data"] ==[{'slot': 'outdoor_seating', 'mapping': [{'type': 'from_text', 'not_intent': ['affirm'], 'conditions': [{'active_loop': 'booking', 'requested_slot': 'outdoor_seating'}]}]}, {'slot': 'preferences', 'mapping': [{'type': 'from_text', 'not_intent': ['affirm'], 'conditions': [{'active_loop': 'booking', 'requested_slot': 'preferences'}]}]}] + assert actual["data"] == [{'slot': 'outdoor_seating', 'mapping': [{'type': 'from_text', 'not_intent': ['affirm'], + 'conditions': [{'active_loop': 'booking', + 'requested_slot': 'outdoor_seating'}]}]}, + {'slot': 'preferences', 'mapping': [{'type': 'from_text', 'not_intent': ['affirm'], + 'conditions': [{'active_loop': 'booking', + 'requested_slot': 'preferences'}]}]}] assert actual["error_code"] == 0 @@ -22135,9 +22560,9 @@ def test_get_auditlog_for_bot(): headers={"Authorization": pytest.token_type + " " + pytest.access_token}, ) actual = response.json() - + audit_log_data = actual["data"]["logs"] - + assert audit_log_data is not None actions = [d["action"] for d in audit_log_data] from collections import Counter @@ -22179,7 +22604,6 @@ def _password_reset(*args, **kwargs): assert counter.get(AuditlogActions.SOFT_DELETE.value) >= 1 assert counter.get(AuditlogActions.UPDATE.value) > 5 - assert audit_log_data[0]["action"] == AuditlogActions.ACTIVITY.value assert audit_log_data[0]["entity"] == "login" assert audit_log_data[0]["user"] == email diff --git a/tests/testing_data/all/bot_content.yml b/tests/testing_data/all/bot_content.yml new file mode 100644 index 000000000..8900b6b76 --- /dev/null +++ b/tests/testing_data/all/bot_content.yml @@ -0,0 +1,31 @@ +- collection: test_content_collection + data: + - I am testing upload download bot content in Content Collection 1 + - I am testing upload download bot content in Content Collection 2 + metadata: [] + type: text +- collection: test_payload_collection + data: + - city: City 1 + population: '100' + - city: City 2 + population: '200' + - city: City 3 + population: '300' + metadata: + - column_name: city + create_embeddings: true + data_type: str + enable_search: true + - column_name: population + create_embeddings: true + data_type: int + enable_search: true + type: json +- collection: Default + data: + - I am testing upload download bot content in Default Collection 1 + - I am testing upload download bot content in Default Collection 2 + - I am testing upload download bot content in Default Collection 3 + metadata: [] + type: text \ No newline at end of file diff --git a/tests/testing_data/bot_content/bot_content.yml b/tests/testing_data/bot_content/bot_content.yml new file mode 100644 index 000000000..8900b6b76 --- /dev/null +++ b/tests/testing_data/bot_content/bot_content.yml @@ -0,0 +1,31 @@ +- collection: test_content_collection + data: + - I am testing upload download bot content in Content Collection 1 + - I am testing upload download bot content in Content Collection 2 + metadata: [] + type: text +- collection: test_payload_collection + data: + - city: City 1 + population: '100' + - city: City 2 + population: '200' + - city: City 3 + population: '300' + metadata: + - column_name: city + create_embeddings: true + data_type: str + enable_search: true + - column_name: population + create_embeddings: true + data_type: int + enable_search: true + type: json +- collection: Default + data: + - I am testing upload download bot content in Default Collection 1 + - I am testing upload download bot content in Default Collection 2 + - I am testing upload download bot content in Default Collection 3 + metadata: [] + type: text \ No newline at end of file diff --git a/tests/testing_data/bot_content/invalid_bot_content.yml b/tests/testing_data/bot_content/invalid_bot_content.yml new file mode 100644 index 000000000..377690c8d --- /dev/null +++ b/tests/testing_data/bot_content/invalid_bot_content.yml @@ -0,0 +1,28 @@ +- collection: test_content_collection + data: + - I am testing upload download bot content in Content Collection 1 + - I am testing upload download bot content in Content Collection 2 + metadata: [] + type: text +- data: + - city: City 1 + population: '100' + - city: City 2 + population: '200' + - city: City 3 + population: '300' + metadata: + - column_name: city + create_embeddings: true + data_type: str + enable_search: true + - column_name: population + create_embeddings: true + data_type: int + enable_search: true +- collection: Default + data: + - I am testing upload download bot content in Default Collection 1 + - I am testing upload download bot content in Default Collection 2 + - I am testing upload download bot content in Default Collection 3 + type: text \ No newline at end of file diff --git a/tests/testing_data/validator/append/bot_content.yml b/tests/testing_data/validator/append/bot_content.yml new file mode 100644 index 000000000..8900b6b76 --- /dev/null +++ b/tests/testing_data/validator/append/bot_content.yml @@ -0,0 +1,31 @@ +- collection: test_content_collection + data: + - I am testing upload download bot content in Content Collection 1 + - I am testing upload download bot content in Content Collection 2 + metadata: [] + type: text +- collection: test_payload_collection + data: + - city: City 1 + population: '100' + - city: City 2 + population: '200' + - city: City 3 + population: '300' + metadata: + - column_name: city + create_embeddings: true + data_type: str + enable_search: true + - column_name: population + create_embeddings: true + data_type: int + enable_search: true + type: json +- collection: Default + data: + - I am testing upload download bot content in Default Collection 1 + - I am testing upload download bot content in Default Collection 2 + - I am testing upload download bot content in Default Collection 3 + metadata: [] + type: text \ No newline at end of file diff --git a/tests/testing_data/yml_training_files/bot_content.yml b/tests/testing_data/yml_training_files/bot_content.yml new file mode 100644 index 000000000..8900b6b76 --- /dev/null +++ b/tests/testing_data/yml_training_files/bot_content.yml @@ -0,0 +1,31 @@ +- collection: test_content_collection + data: + - I am testing upload download bot content in Content Collection 1 + - I am testing upload download bot content in Content Collection 2 + metadata: [] + type: text +- collection: test_payload_collection + data: + - city: City 1 + population: '100' + - city: City 2 + population: '200' + - city: City 3 + population: '300' + metadata: + - column_name: city + create_embeddings: true + data_type: str + enable_search: true + - column_name: population + create_embeddings: true + data_type: int + enable_search: true + type: json +- collection: Default + data: + - I am testing upload download bot content in Default Collection 1 + - I am testing upload download bot content in Default Collection 2 + - I am testing upload download bot content in Default Collection 3 + metadata: [] + type: text \ No newline at end of file diff --git a/tests/unit_test/data_processor/data_processor_test.py b/tests/unit_test/data_processor/data_processor_test.py index 9c8b17839..8500f0d1f 100644 --- a/tests/unit_test/data_processor/data_processor_test.py +++ b/tests/unit_test/data_processor/data_processor_test.py @@ -106,6 +106,7 @@ async def _read_and_get_data(path: str): chat_client_config_path = os.path.join(path, "chat_client_config.yml") http_actions_path = os.path.join(path, 'actions.yml') multiflow_story_path = os.path.join(path, 'multiflow_stories.yml') + bot_content_path = os.path.join(path, 'bot_content.yml') importer = RasaFileImporter.load_from_config(config_path=config_path, domain_path=domain_path, training_data_paths=training_data_path) @@ -115,8 +116,9 @@ async def _read_and_get_data(path: str): nlu = importer.get_nlu_data(config.get('language')) http_actions = Utility.read_yaml(http_actions_path) multiflow_stories = Utility.read_yaml(multiflow_story_path) + bot_content = Utility.read_yaml(bot_content_path) chat_client_config = Utility.read_yaml(chat_client_config_path) - return nlu, story_graph, domain, config, http_actions, multiflow_stories, chat_client_config + return nlu, story_graph, domain, config, http_actions, multiflow_stories, bot_content, chat_client_config return _read_and_get_data @@ -982,14 +984,14 @@ def test_delete_prompt_action_not_present(self): user = 'test_user' with pytest.raises(AppException, match=f'Action with name "non_existent_kairon_faq_action" not found'): processor.delete_action('non_existent_kairon_faq_action', bot, user) - + def test_get_live_agent(self): processor = MongoProcessor() bot = 'test_bot' user = 'test_user' live_agent = processor.get_live_agent(bot=bot) assert live_agent == [] - + def test_enable_live_agent(self): processor = MongoProcessor() bot = 'test_bot' @@ -1021,7 +1023,7 @@ def test_enable_live_agent_already_exist(self): } result = processor.enable_live_agent(request_data=request_data, bot=bot, user=user) assert result is False - + def test_edit_live_agent(self): processor = MongoProcessor() bot = 'test_bot' @@ -3053,7 +3055,7 @@ def _mock_bot_info(*args, **kwargs): file = processor.download_files("tests_download_empty_data", "user@integration.com") assert file.endswith(".zip") zip_file = ZipFile(file, mode='r') - assert zip_file.filelist.__len__() == 9 + assert zip_file.filelist.__len__() == 10 assert zip_file.getinfo('data/stories.yml') assert zip_file.getinfo('data/rules.yml') file_info_stories = zip_file.getinfo('data/stories.yml') @@ -3142,12 +3144,13 @@ def _mock_bot_info(*args, **kwargs): file_path = processor.download_files("tests", "user@integration.com") assert file_path.endswith(".zip") zip_file = ZipFile(file_path, mode='r') - assert zip_file.filelist.__len__() == 9 + assert zip_file.filelist.__len__() == 10 assert zip_file.getinfo('chat_client_config.yml') assert zip_file.getinfo('config.yml') assert zip_file.getinfo('domain.yml') assert zip_file.getinfo('actions.yml') assert zip_file.getinfo('multiflow_stories.yml') + assert zip_file.getinfo('bot_content.yml') assert zip_file.getinfo('data/stories.yml') assert zip_file.getinfo('data/rules.yml') assert zip_file.getinfo('data/nlu.yml') @@ -4443,7 +4446,7 @@ async def test_upload_and_save(self): stories = UploadFile(filename="stories.yml", file=BytesIO(stories_content)) config = UploadFile(filename="config.yml", file=BytesIO(config_content)) domain = UploadFile(filename="domain.yml", file=BytesIO(domain_content)) - await processor.upload_and_save(nlu, domain, stories, config, None, None, None, "test_upload_and_save", + await processor.upload_and_save(nlu, domain, stories, config, None, None, None, None, "test_upload_and_save", "rules_creator") assert len(list(Intents.objects(bot="test_upload_and_save", user="rules_creator"))) == 6 assert len(list(Stories.objects(bot="test_upload_and_save", user="rules_creator"))) == 1 @@ -4464,7 +4467,7 @@ async def test_upload_and_save_with_rules(self): config = UploadFile(filename="config.yml", file=BytesIO(config_content)) domain = UploadFile(filename="domain.yml", file=BytesIO(domain_content)) rules = UploadFile(filename="rules.yml", file=BytesIO(rules_content)) - await processor.upload_and_save(nlu, domain, stories, config, rules, None, None, "test_upload_and_save", + await processor.upload_and_save(nlu, domain, stories, config, rules, None, None, None, "test_upload_and_save", "rules_creator") assert len(list(Intents.objects(bot="test_upload_and_save", user="rules_creator", status=True))) == 6 assert len(list(Stories.objects(bot="test_upload_and_save", user="rules_creator", status=True))) == 1 @@ -4487,7 +4490,7 @@ async def test_upload_and_save_with_http_action(self): config = UploadFile(filename="config.yml", file=BytesIO(config_content)) domain = UploadFile(filename="domain.yml", file=BytesIO(domain_content)) http_action = UploadFile(filename="actions.yml", file=BytesIO(http_action_content)) - await processor.upload_and_save(nlu, domain, stories, config, None, http_action, None, "test_upload_and_save", + await processor.upload_and_save(nlu, domain, stories, config, None, http_action, None, None, "test_upload_and_save", "rules_creator") assert len(list(Intents.objects(bot="test_upload_and_save", user="rules_creator", status=True))) == 6 assert len(list(Stories.objects(bot="test_upload_and_save", user="rules_creator", status=True))) == 1 @@ -4512,7 +4515,7 @@ async def test_upload_and_save_with_empty_multiflow_stories(self): domain = UploadFile(filename="domain.yml", file=BytesIO(domain_content)) http_action = UploadFile(filename="actions.yml", file=BytesIO(http_action_content)) multiflow_story = UploadFile(filename="multiflow_stories.yml", file=BytesIO(multiflow_stories_content)) - await processor.upload_and_save(nlu, domain, stories, config, None, http_action, multiflow_story, + await processor.upload_and_save(nlu, domain, stories, config, None, http_action, multiflow_story, None, "test_upload_and_save", "rules_creator") assert len(list(Intents.objects(bot="test_upload_and_save", user="rules_creator", status=True))) == 6 @@ -4538,7 +4541,7 @@ async def test_upload_and_save_with_empty_multiflow_stories_none(self): domain = UploadFile(filename="domain.yml", file=BytesIO(domain_content)) http_action = UploadFile(filename="actions.yml", file=BytesIO(http_action_content)) multiflow_story = UploadFile(filename="multiflow_stories.yml", file=BytesIO(multiflow_stories_content)) - await processor.upload_and_save(nlu, domain, stories, config, None, http_action, multiflow_story, + await processor.upload_and_save(nlu, domain, stories, config, None, http_action, multiflow_story, None, "test_upload_and_save", "rules_creator") assert len(list(Intents.objects(bot="test_upload_and_save", user="rules_creator", status=True))) == 6 @@ -4564,7 +4567,7 @@ async def test_upload_and_save_with_multiflow_stories(self): domain = UploadFile(filename="domain.yml", file=BytesIO(domain_content)) http_action = UploadFile(filename="actions.yml", file=BytesIO(http_action_content)) multiflow_story = UploadFile(filename="multiflow_stories.yml", file=BytesIO(multiflow_stories_content)) - await processor.upload_and_save(nlu, domain, stories, config, None, http_action, multiflow_story, + await processor.upload_and_save(nlu, domain, stories, config, None, http_action, multiflow_story, None, "test_upload_and_save", "rules_creator") assert len(list(Intents.objects(bot="test_upload_and_save", user="rules_creator", status=True))) == 9 @@ -4766,11 +4769,11 @@ def _mock_bot_info(*args, **kwargs): path = 'tests/testing_data/yml_training_files' bot = 'test' user = 'test' - nlu, story_graph, domain, config, http_actions, multiflow_stories, chat_client_config = await get_training_data( + nlu, story_graph, domain, config, http_actions, multiflow_stories, bot_content, chat_client_config = await get_training_data( path) mongo_processor = MongoProcessor() - mongo_processor.save_training_data(bot, user, config, domain, story_graph, nlu, http_actions, multiflow_stories, + mongo_processor.save_training_data(bot, user, config, domain, story_graph, nlu, http_actions, multiflow_stories, bot_content, chat_client_config, True) training_data = mongo_processor.load_nlu(bot) @@ -4820,6 +4823,8 @@ def _mock_bot_info(*args, **kwargs): assert len(Actions.objects(type='http_action', bot=bot)) == 5 multiflow_stories = mongo_processor.load_multiflow_stories_yaml(bot) assert isinstance(multiflow_stories, dict) is True + bot_content = mongo_processor.load_bot_content(bot) + assert isinstance(bot_content, list) is True @pytest.mark.asyncio async def test_save_training_data_no_rules_and_http_actions(self, get_training_data, monkeypatch): @@ -4833,11 +4838,11 @@ def _mock_bot_info(*args, **kwargs): path = 'tests/testing_data/all' bot = 'test' user = 'test' - nlu, story_graph, domain, config, http_actions, multiflow_stories, chat_client_config = await get_training_data( + nlu, story_graph, domain, config, http_actions, multiflow_stories, bot_content, chat_client_config = await get_training_data( path) mongo_processor = MongoProcessor() - mongo_processor.save_training_data(bot, user, config, domain, story_graph, nlu, http_actions, multiflow_stories, + mongo_processor.save_training_data(bot, user, config, domain, story_graph, nlu, http_actions, multiflow_stories, bot_content, chat_client_config, True) training_data = mongo_processor.load_nlu(bot) @@ -4886,11 +4891,11 @@ def _mock_bot_info(*args, **kwargs): path = 'tests/testing_data/yml_training_files' bot = 'test' user = 'test' - nlu, story_graph, domain, config, http_actions, multiflow_stories, chat_client_config = await get_training_data( + nlu, story_graph, domain, config, http_actions, multiflow_stories, bot_content, chat_client_config = await get_training_data( path) mongo_processor = MongoProcessor() - mongo_processor.save_training_data(bot, user, config, domain, story_graph, nlu, http_actions, multiflow_stories, + mongo_processor.save_training_data(bot, user, config, domain, story_graph, nlu, http_actions, multiflow_stories, bot_content, chat_client_config, True) training_data = mongo_processor.load_nlu(bot) @@ -4950,11 +4955,11 @@ def _mock_bot_info(*args, **kwargs): path = 'tests/testing_data/validator/append' bot = 'test' user = 'test' - nlu, story_graph, domain, config, http_actions, multiflow_stories, chat_client_config = await get_training_data( + nlu, story_graph, domain, config, http_actions, multiflow_stories, bot_content, chat_client_config = await get_training_data( path) mongo_processor = MongoProcessor() - mongo_processor.save_training_data(bot, user, config, domain, story_graph, nlu, http_actions, multiflow_stories, + mongo_processor.save_training_data(bot, user, config, domain, story_graph, nlu, http_actions, multiflow_stories, bot_content, chat_client_config, False, REQUIREMENTS.copy() - {"chat_client_config"}) training_data = mongo_processor.load_nlu(bot) @@ -5057,7 +5062,7 @@ async def test_save_nlu_only(self, get_training_data): path = 'tests/testing_data/yml_training_files' bot = 'test' user = 'test' - nlu, story_graph, domain, config, http_actions, multiflow_stories, chat_client_config = await get_training_data( + nlu, story_graph, domain, config, http_actions, multiflow_stories, bot_content, chat_client_config = await get_training_data( path) mongo_processor = MongoProcessor() @@ -5150,7 +5155,7 @@ async def test_save_stories_only(self, get_training_data): path = 'tests/testing_data/yml_training_files' bot = 'test' user = 'test' - nlu, story_graph, domain, config, http_actions, multiflow_stories, chat_client_config = await get_training_data( + nlu, story_graph, domain, config, http_actions, multiflow_stories, bot_content, chat_client_config = await get_training_data( path) mongo_processor = MongoProcessor() @@ -5205,7 +5210,7 @@ async def test_save_actions_and_config_only(self, get_training_data): path = 'tests/testing_data/yml_training_files' bot = 'test' user = 'test' - nlu, story_graph, domain, config, http_actions, multiflow_stories, chat_client_config = await get_training_data( + nlu, story_graph, domain, config, http_actions, multiflow_stories, bot_content, chat_client_config = await get_training_data( path) config['language'] = 'fr' @@ -5253,7 +5258,7 @@ async def test_save_rules_and_domain_only(self, get_training_data): path = 'tests/testing_data/yml_training_files' bot = 'test' user = 'test' - nlu, story_graph, domain, config, http_actions, multiflow_stories, chat_client_config = await get_training_data( + nlu, story_graph, domain, config, http_actions, multiflow_stories, bot_content, chat_client_config = await get_training_data( path) mongo_processor = MongoProcessor() @@ -5463,7 +5468,7 @@ def _mock_bot_info(*args, **kwargs): file_path = processor.download_files(pytest.bot, "user@integration.com") assert file_path.endswith(".zip") zip_file = ZipFile(file_path, mode='r') - assert zip_file.filelist.__len__() == 9 + assert zip_file.filelist.__len__() == 10 assert zip_file.getinfo('chat_client_config.yml') @pytest.fixture() @@ -5488,7 +5493,7 @@ async def test_validate_and_prepare_data_save_training_files(self, resource_save pytest.chat_client_config] files_received, is_event_data, non_event_validation_summary = await processor.validate_and_prepare_data( pytest.bot, 'test', training_file, True) - assert REQUIREMENTS - {'multiflow_stories'} == files_received + assert REQUIREMENTS - {'multiflow_stories','bot_content'} == files_received assert is_event_data bot_data_home_dir = Utility.get_latest_file(os.path.join('training_data', pytest.bot)) assert os.path.exists(os.path.join(bot_data_home_dir, 'domain.yml')) @@ -14643,6 +14648,7 @@ def test_save_payload_metadata(self): user = 'testUser' settings = BotSettings.objects(bot=bot).get() settings.llm_settings = LLMSettings(enable_faq=True) + settings.cognition_collections_limit = 5 settings.save() schema = { "metadata": [ @@ -14709,6 +14715,25 @@ def test_save_payload_metadata(self): } with pytest.raises(AppException, match="Collection already exists!"): processor.save_cognition_schema(schema, user, bot) + + data = list(processor.list_cognition_schema(bot)) + + # Fetch all schema IDs + schema_ids = [schema['_id'] for schema in processor.list_cognition_schema(bot)] + + # Fetch all collection names + collection_names = [schema['collection_name'] for schema in processor.list_cognition_schema(bot)] + + # Delete all collection except the last one + for collection_name in collection_names[:-1]: + for data in CognitionData.objects(bot=bot, collection=collection_name): + data.delete() + + # Delete all schema except the last one + for schema_id in schema_ids[:-1]: + processor.delete_cognition_schema(schema_id, bot) + + data = list(processor.list_cognition_schema(bot)) settings = BotSettings.objects(bot=bot).get() settings.llm_settings = LLMSettings(enable_faq=False) settings.save() diff --git a/tests/unit_test/events/events_test.py b/tests/unit_test/events/events_test.py index f7997b8ea..fdbf2f264 100644 --- a/tests/unit_test/events/events_test.py +++ b/tests/unit_test/events/events_test.py @@ -18,10 +18,8 @@ from rasa.shared.importers.rasa import RasaFileImporter from responses import matchers -from kairon.events.definitions.scheduled_base import ScheduledEventsBase from kairon.shared.channels.broadcast.whatsapp import WhatsappBroadcast from kairon.shared.chat.data_objects import ChannelLogs -from kairon.shared.utils import Utility os.environ["system_file"] = "./tests/testing_data/system.yaml" @@ -266,6 +264,10 @@ def test_trigger_data_importer_validate_and_save_append(self, monkeypatch): test_data_path = os.path.join(pytest.tmp_dir, str(uuid.uuid4())) shutil.copytree('tests/testing_data/validator/append', test_data_path) + bot_content_path = os.path.join(test_data_path, 'bot_content.yml') + if os.path.exists(bot_content_path): + os.remove(bot_content_path) + def _path(*args, **kwargs): return test_data_path diff --git a/tests/unit_test/utility_test.py b/tests/unit_test/utility_test.py index 94083650a..6275974a6 100644 --- a/tests/unit_test/utility_test.py +++ b/tests/unit_test/utility_test.py @@ -505,6 +505,7 @@ def test_validate_only_stories_and_nlu( "domain", "chat_client_config", "multiflow_stories", + "bot_content", } == requirements def test_validate_only_http_actions(self, resource_validate_only_http_actions): @@ -519,6 +520,7 @@ def test_validate_only_http_actions(self, resource_validate_only_http_actions): "nlu", "chat_client_config", "multiflow_stories", + "bot_content", } == requirements def test_validate_only_multiflow_stories( @@ -535,6 +537,7 @@ def test_validate_only_multiflow_stories( "nlu", "rules", "domain", + "bot_content", } == requirements def test_validate_only_domain(self, resource_validate_only_domain): @@ -549,6 +552,7 @@ def test_validate_only_domain(self, resource_validate_only_domain): "nlu", "chat_client_config", "multiflow_stories", + "bot_content", } == requirements def test_validate_only_config(self, resource_validate_only_config): @@ -563,6 +567,7 @@ def test_validate_only_config(self, resource_validate_only_config): "nlu", "chat_client_config", "multiflow_stories", + "bot_content", } == requirements @pytest.mark.asyncio diff --git a/tests/unit_test/validator/training_data_validator_test.py b/tests/unit_test/validator/training_data_validator_test.py index c97341cf9..2633c01f1 100644 --- a/tests/unit_test/validator/training_data_validator_test.py +++ b/tests/unit_test/validator/training_data_validator_test.py @@ -1,12 +1,16 @@ import ujson as json import re import pytest +import yaml +from mongoengine import connect from kairon.exceptions import AppException from kairon.importer.validator.file_validator import TrainingDataValidator +from kairon.shared.data.processor import MongoProcessor from kairon.shared.utils import Utility import os from deepdiff import DeepDiff +from kairon.shared.data.data_objects import BotSettings, LLMSettings class TestTrainingDataValidator: @@ -14,6 +18,7 @@ class TestTrainingDataValidator: def init_connection(self): os.environ["system_file"] = "./tests/testing_data/system.yaml" Utility.load_environment() + connect(**Utility.mongoengine_connection(Utility.environment['database']["url"])) def test_config_validation(self): config = Utility.load_yaml("./tests/testing_data/yml_training_files/config.yml") @@ -862,3 +867,57 @@ def test_validate_multiflow_stories_empty_content(self): assert TrainingDataValidator.validate_multiflow_stories([{}]) test = {None} assert TrainingDataValidator.validate_multiflow_stories(test) + + def test_validate_content_disabled_gpt(self, monkeypatch): + def _mock_get_bot_settings(*args, **kwargs): + return BotSettings( + bot="your_bot_name", + user="integration@demo.ai", + llm_settings=LLMSettings(enable_faq=False), + ) + + monkeypatch.setattr(MongoProcessor, "get_bot_settings", _mock_get_bot_settings) + + yaml_file_path = "tests/testing_data/bot_content/bot_content.yml" + with open(yaml_file_path, "r") as file: + bot_content = yaml.safe_load(file) + + errors = TrainingDataValidator.validate_content("your_bot_name", "integration@demo.ai", bot_content) + + assert errors == ["Please enable GPT on bot before uploading"] + + def test_validate_content_valid_content(self, monkeypatch): + def _mock_get_bot_settings(*args, **kwargs): + return BotSettings( + bot="your_bot_name", + user="integration@demo.ai", + llm_settings=LLMSettings(enable_faq=True), + ) + + monkeypatch.setattr(MongoProcessor, "get_bot_settings", _mock_get_bot_settings) + + yaml_file_path = "tests/testing_data/bot_content/bot_content.yml" + with open(yaml_file_path, "r") as file: + bot_content = yaml.safe_load(file) + + errors = TrainingDataValidator.validate_content("your_bot_name", "integration@demo.ai", bot_content) + + assert not errors + + def test_validate_content_invalid_content(self, monkeypatch): + def _mock_get_bot_settings(*args, **kwargs): + return BotSettings( + bot="your_bot_name", + user="integration@demo.ai", + llm_settings=LLMSettings(enable_faq=True), + ) + + monkeypatch.setattr(MongoProcessor, "get_bot_settings", _mock_get_bot_settings) + + yaml_file_path = "tests/testing_data/bot_content/invalid_bot_content.yml" + with open(yaml_file_path, "r") as file: + bot_content = yaml.safe_load(file) + + errors = TrainingDataValidator.validate_content("your_bot_name", "integration@demo.ai", bot_content) + + assert errors