From 06602440178b5b646d2d650f3fddf30862a73402 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Wed, 20 Sep 2023 22:14:59 +1000 Subject: [PATCH 01/22] fabulous does not work on Windows --- pilot/database/database.py | 2 +- pilot/helpers/AgentConvo.py | 6 ++-- pilot/helpers/Project.py | 12 ++++---- pilot/helpers/agents/Architect.py | 4 +-- pilot/helpers/agents/Developer.py | 26 ++++++++-------- pilot/helpers/agents/ProductOwner.py | 8 ++--- pilot/helpers/agents/TechLead.py | 4 +-- pilot/helpers/cli.py | 8 ++--- pilot/helpers/files.py | 2 +- pilot/main.py | 4 +-- pilot/prompts/prompts.py | 5 +--- pilot/utils/llm_connection.py | 2 +- pilot/utils/questionary.py | 6 ++-- pilot/utils/style.py | 45 ++++++++++++++++++++++++++++ pilot/utils/utils.py | 4 +-- 15 files changed, 89 insertions(+), 49 deletions(-) create mode 100644 pilot/utils/style.py diff --git a/pilot/database/database.py b/pilot/database/database.py index 83bd5d993..fce56026b 100644 --- a/pilot/database/database.py +++ b/pilot/database/database.py @@ -1,6 +1,6 @@ from playhouse.shortcuts import model_to_dict from peewee import * -from fabulous.color import yellow, red +from utils.style import yellow, red from functools import reduce import operator import psycopg2 diff --git a/pilot/helpers/AgentConvo.py b/pilot/helpers/AgentConvo.py index ceffb6d8b..48f2682f7 100644 --- a/pilot/helpers/AgentConvo.py +++ b/pilot/helpers/AgentConvo.py @@ -1,7 +1,7 @@ import re import subprocess import uuid -from fabulous.color import yellow, bold +from utils.style import yellow, yellow_bold from database.database import get_saved_development_step, save_development_step, delete_all_subsequent_steps from helpers.files import get_files_content @@ -126,7 +126,7 @@ def continuous_conversation(self, prompt_path, prompt_data, function_calls=None) # Continue conversation until GPT response equals END_RESPONSE while response != END_RESPONSE: - print(yellow("Do you want to add anything else? If not, ") + yellow(bold('just press ENTER.'))) + print(yellow("Do you want to add anything else? If not, ") + yellow_bold('just press ENTER.')) user_message = ask_user(self.agent.project, response, False) if user_message == "": @@ -204,7 +204,7 @@ def log_message(self, content): print_msg = capitalize_first_word_with_underscores(self.high_level_step) if self.log_to_user: if self.agent.project.checkpoints['last_development_step'] is not None: - print(yellow("\nDev step ") + yellow(bold(str(self.agent.project.checkpoints['last_development_step']))) + '\n', end='') + print(yellow("\nDev step ") + yellow_bold(str(self.agent.project.checkpoints['last_development_step'])) + '\n', end='') print(f"\n{content}\n") logger.info(f"{print_msg}: {content}\n") diff --git a/pilot/helpers/Project.py b/pilot/helpers/Project.py index 4843c93e2..7497cd07d 100644 --- a/pilot/helpers/Project.py +++ b/pilot/helpers/Project.py @@ -1,6 +1,6 @@ import json -from fabulous.color import bold, green, yellow, cyan, white +from utils.style import green_bold, yellow_bold, cyan, white_bold from const.common import IGNORE_FOLDERS, STEPS from database.database import delete_unconnected_steps_from, delete_all_app_development_data from const.ipc import MESSAGE_TYPE @@ -67,10 +67,10 @@ def __init__(self, args, name=None, description=None, user_stories=None, user_ta # if development_plan is not None: # self.development_plan = development_plan - print(green(bold('\n------------------ STARTING NEW PROJECT ----------------------'))) + print(green_bold('\n------------------ STARTING NEW PROJECT ----------------------')) print(f"If you wish to continue with this project in future run:") - print(green(bold(f'python main.py app_id={args["app_id"]}'))) - print(green(bold('--------------------------------------------------------------\n'))) + print(green_bold(f'python main.py app_id={args["app_id"]}')) + print(green_bold('--------------------------------------------------------------\n')) def start(self): """ @@ -306,10 +306,10 @@ def ask_for_human_intervention(self, message, description=None, cbs={}, convo=No reset_branch_id = convo.save_branch() while answer != 'continue': - print(yellow(bold(message))) + print(yellow_bold(message)) if description is not None: print('\n' + '-'*100 + '\n' + - white(bold(description)) + + white_bold(description) + '\n' + '-'*100 + '\n') answer = styled_text( diff --git a/pilot/helpers/agents/Architect.py b/pilot/helpers/agents/Architect.py index 667fa64c1..0d4be0549 100644 --- a/pilot/helpers/agents/Architect.py +++ b/pilot/helpers/agents/Architect.py @@ -1,7 +1,7 @@ from utils.utils import step_already_finished from helpers.Agent import Agent import json -from fabulous.color import green, bold +from utils.style import green_bold from const.function_calls import ARCHITECTURE from utils.utils import should_execute_step, find_role_from_step, generate_app_data @@ -28,7 +28,7 @@ def get_architecture(self): return step['architecture'] # ARCHITECTURE - print(green(bold(f"Planning project architecture...\n"))) + print(green_bold(f"Planning project architecture...\n")) logger.info(f"Planning project architecture...") self.convo_architecture = AgentConvo(self) diff --git a/pilot/helpers/agents/Developer.py b/pilot/helpers/agents/Developer.py index db42aae6f..19af737e9 100644 --- a/pilot/helpers/agents/Developer.py +++ b/pilot/helpers/agents/Developer.py @@ -1,5 +1,5 @@ import uuid -from fabulous.color import yellow, green, red, bold, blue, white +from utils.style import yellow, green, red, blue, white, green_bold, yellow_bold, red_bold, blue_bold, white_bold from helpers.exceptions.TokenLimitError import TokenLimitError from const.code_execution import MAX_COMMAND_DEBUG_TRIES from helpers.exceptions.TooDeepRecursionError import TooDeepRecursionError @@ -31,7 +31,7 @@ def start_coding(self): self.project.skip_steps = False if ('skip_until_dev_step' in self.project.args and self.project.args['skip_until_dev_step'] == '0') else True # DEVELOPMENT - print(green(bold(f"Ok, great, now, let's start with the actual development...\n"))) + print(green_bold(f"Ok, great, now, let's start with the actual development...\n")) logger.info(f"Starting to create the actual code...") for i, dev_task in enumerate(self.project.development_plan): @@ -42,7 +42,7 @@ def start_coding(self): logger.info('The app is DONE!!! Yay...you can use it now.') def implement_task(self, i, development_task=None): - print(green(bold(f'Implementing task #{i + 1}: ')) + green(f' {development_task["description"]}\n')) + print(green_bold(f'Implementing task #{i + 1}: ') + green(f' {development_task["description"]}\n')) convo_dev_task = AgentConvo(self) task_description = convo_dev_task.send_message('development/task/breakdown.prompt', { @@ -96,7 +96,7 @@ def step_command_run(self, convo, step, i): def step_human_intervention(self, convo, step): while True: - human_intervention_description = step['human_intervention_description'] + yellow(bold('\n\nIf you want to run the app, just type "r" and press ENTER and that will run `' + self.run_command + '`')) if self.run_command is not None else step['human_intervention_description'] + human_intervention_description = step['human_intervention_description'] + yellow_bold('\n\nIf you want to run the app, just type "r" and press ENTER and that will run `' + self.run_command + '`') if self.run_command is not None else step['human_intervention_description'] response = self.project.ask_for_human_intervention('I need human intervention:', human_intervention_description, cbs={ 'r': lambda conv: run_command_until_success(self.run_command, None, conv, force=True, return_cli_response=True) }, @@ -151,8 +151,8 @@ def should_retry_step_implementation(self, step, step_implementation_try): if step_implementation_try >= MAX_COMMAND_DEBUG_TRIES: self.dev_help_needed(step) - print(red(bold(f'\n--------- LLM Reached Token Limit ----------'))) - print(red(bold(f'Can I retry implementing the entire development step?'))) + print(red_bold(f'\n--------- LLM Reached Token Limit ----------')) + print(red_bold(f'Can I retry implementing the entire development step?')) answer = '' while answer != 'y': @@ -169,9 +169,9 @@ def should_retry_step_implementation(self, step, step_implementation_try): def dev_help_needed(self, step): if step['type'] == 'command': - help_description = (red(bold(f'I tried running the following command but it doesn\'t seem to work:\n\n')) + - white(bold(step['command']['command'])) + - red(bold(f'\n\nCan you please make it work?'))) + help_description = (red_bold(f'I tried running the following command but it doesn\'t seem to work:\n\n') + + white_bold(step['command']['command']) + + red_bold(f'\n\nCan you please make it work?')) elif step['type'] == 'code_change': help_description = step['code_change_description'] elif step['type'] == 'human_intervention': @@ -190,9 +190,9 @@ def extract_substring(s): answer = '' while answer != 'continue': - print(red(bold(f'\n----------------------------- I need your help ------------------------------'))) + print(red_bold(f'\n----------------------------- I need your help ------------------------------')) print(extract_substring(str(help_description))) - print(red(bold(f'\n-----------------------------------------------------------------------------'))) + print(red_bold(f'\n-----------------------------------------------------------------------------')) answer = styled_text( self.project, 'Once you\'re done, type "continue"?' @@ -256,8 +256,8 @@ def execute_task(self, convo, task_steps, test_command=None, reset_convo=True, def continue_development(self, iteration_convo, last_branch_name, continue_description=''): while True: iteration_convo.load_branch(last_branch_name) - user_description = ('Here is a description of what should be working: \n\n' + blue(bold(continue_description)) + '\n') if continue_description != '' else '' - user_description = 'Can you check if the app works please? ' + user_description + '\nIf you want to run the app, ' + yellow(bold('just type "r" and press ENTER and that will run `' + self.run_command + '`')) + user_description = ('Here is a description of what should be working: \n\n' + blue_bold(continue_description) + '\n') if continue_description != '' else '' + user_description = 'Can you check if the app works please? ' + user_description + '\nIf you want to run the app, ' + yellow_bold('just type "r" and press ENTER and that will run `' + self.run_command + '`') # continue_description = '' response = self.project.ask_for_human_intervention( user_description, diff --git a/pilot/helpers/agents/ProductOwner.py b/pilot/helpers/agents/ProductOwner.py index b69bbd463..956e1ff03 100644 --- a/pilot/helpers/agents/ProductOwner.py +++ b/pilot/helpers/agents/ProductOwner.py @@ -1,4 +1,4 @@ -from fabulous.color import bold, green, yellow +from utils.style import green_bold from helpers.AgentConvo import AgentConvo from helpers.Agent import Agent @@ -48,7 +48,7 @@ def get_project_description(self): self.project, generate_messages_from_description(main_prompt, self.project.args['app_type'], self.project.args['name'])) - print(green(bold('Project Summary:\n'))) + print(green_bold('Project Summary:\n')) convo_project_description = AgentConvo(self) high_level_summary = convo_project_description.send_message('utils/summary.prompt', {'conversation': '\n'.join( @@ -80,7 +80,7 @@ def get_user_stories(self): # USER STORIES msg = f"User Stories:\n" - print(green(bold(msg))) + print(green_bold(msg)) logger.info(msg) self.project.user_stories = self.convo_user_stories.continuous_conversation('user_stories/specs.prompt', { @@ -114,7 +114,7 @@ def get_user_tasks(self): # USER TASKS msg = f"User Tasks:\n" - print(green(bold(msg))) + print(green_bold(msg)) logger.info(msg) self.project.user_tasks = self.convo_user_stories.continuous_conversation('user_stories/user_tasks.prompt', diff --git a/pilot/helpers/agents/TechLead.py b/pilot/helpers/agents/TechLead.py index 6e8eb450f..64237187c 100644 --- a/pilot/helpers/agents/TechLead.py +++ b/pilot/helpers/agents/TechLead.py @@ -1,7 +1,7 @@ from utils.utils import step_already_finished from helpers.Agent import Agent import json -from fabulous.color import green, bold +from utils.style import green_bold from const.function_calls import DEV_STEPS from helpers.cli import build_directory_tree from helpers.AgentConvo import AgentConvo @@ -32,7 +32,7 @@ def create_development_plan(self): return step['development_plan'] # DEVELOPMENT PLANNING - print(green(bold(f"Starting to create the action plan for development...\n"))) + print(green_bold(f"Starting to create the action plan for development...\n")) logger.info(f"Starting to create the action plan for development...") # TODO add clarifications diff --git a/pilot/helpers/cli.py b/pilot/helpers/cli.py index 8fbf44cf3..e85fa0b16 100644 --- a/pilot/helpers/cli.py +++ b/pilot/helpers/cli.py @@ -7,7 +7,7 @@ import uuid import platform -from fabulous.color import yellow, green, white, red, bold +from utils.style import yellow, green, white, red, yellow_bold, white_bold from database.database import get_saved_command_run, save_command_run from const.function_calls import DEBUG_STEPS_BREAKDOWN from helpers.exceptions.TooDeepRecursionError import TooDeepRecursionError @@ -101,8 +101,8 @@ def execute_command(project, command, timeout=None, force=False): timeout = min(max(timeout, MIN_COMMAND_RUN_TIME), MAX_COMMAND_RUN_TIME) if not force: - print(yellow(bold(f'\n--------- EXECUTE COMMAND ----------'))) - print(f'Can i execute the command: `' + yellow(bold(command)) + f'` with {timeout}ms timeout?') + print(yellow_bold(f'\n--------- EXECUTE COMMAND ----------')) + print(f'Can i execute the command: `' + yellow_bold(command) + f'` with {timeout}ms timeout?') answer = styled_text( project, @@ -143,7 +143,7 @@ def execute_command(project, command, timeout=None, force=False): while True and return_value is None: elapsed_time = time.time() - start_time if timeout is not None: - print(white(bold(f'\rt: {round(elapsed_time * 1000)}ms : ')), end='', flush=True) + print(white_bold(f'\rt: {round(elapsed_time * 1000)}ms : '), end='', flush=True) # Check if process has finished if process.poll() is not None: diff --git a/pilot/helpers/files.py b/pilot/helpers/files.py index 9145e996c..b5948877a 100644 --- a/pilot/helpers/files.py +++ b/pilot/helpers/files.py @@ -1,4 +1,4 @@ -from fabulous.color import green +from utils.style import green import os diff --git a/pilot/main.py b/pilot/main.py index 973c1539f..ea485f42d 100644 --- a/pilot/main.py +++ b/pilot/main.py @@ -11,7 +11,7 @@ from helpers.ipc import IPCClient from const.ipc import MESSAGE_TYPE from utils.utils import json_serial -from fabulous.color import red +from utils.style import red from helpers.Project import Project from utils.arguments import get_arguments @@ -36,8 +36,6 @@ def init(): return arguments - - def get_custom_print(args): built_in_print = builtins.print diff --git a/pilot/prompts/prompts.py b/pilot/prompts/prompts.py index 38a88b166..446689141 100644 --- a/pilot/prompts/prompts.py +++ b/pilot/prompts/prompts.py @@ -1,8 +1,5 @@ # prompts/prompts.py - -from fabulous.color import yellow -import questionary - +from utils.style import yellow from const import common from const.llm import MAX_QUESTIONS, END_RESPONSE from utils.llm_connection import create_gpt_chat_completion, get_prompt diff --git a/pilot/utils/llm_connection.py b/pilot/utils/llm_connection.py index bb84a4c6c..2cb6a4e38 100644 --- a/pilot/utils/llm_connection.py +++ b/pilot/utils/llm_connection.py @@ -7,12 +7,12 @@ import tiktoken import questionary +from utils.style import red from typing import List from jinja2 import Environment, FileSystemLoader from const.llm import MIN_TOKENS_FOR_GPT_RESPONSE, MAX_GPT_MODEL_TOKENS, MAX_QUESTIONS, END_RESPONSE from logger.logger import logger -from fabulous.color import red from helpers.exceptions.TokenLimitError import TokenLimitError from utils.utils import get_prompt_components, fix_json from utils.spinner import spinner_start, spinner_stop diff --git a/pilot/utils/questionary.py b/pilot/utils/questionary.py index d29214c71..fbfcd860c 100644 --- a/pilot/utils/questionary.py +++ b/pilot/utils/questionary.py @@ -1,6 +1,6 @@ from prompt_toolkit.styles import Style import questionary -from fabulous.color import yellow, bold +from utils.style import yellow_bold from database.database import save_user_input, get_saved_user_input from const.ipc import MESSAGE_TYPE @@ -26,8 +26,8 @@ def styled_text(project, question, ignore_user_input_count=False): if user_input is not None and user_input.user_input is not None and project.skip_steps: # if we do, use it project.checkpoints['last_user_input'] = user_input - print(yellow(bold(f'Restoring user input id {user_input.id}: ')), end='') - print(yellow(bold(f'{user_input.user_input}'))) + print(yellow_bold(f'Restoring user input id {user_input.id}: '), end='') + print(yellow_bold(f'{user_input.user_input}')) return user_input.user_input if project.ipc_client_instance is None or project.ipc_client_instance.client is None: diff --git a/pilot/utils/style.py b/pilot/utils/style.py new file mode 100644 index 000000000..157c2553b --- /dev/null +++ b/pilot/utils/style.py @@ -0,0 +1,45 @@ +from termcolor import colored + + +def red(text): + return colored(text, 'red') + + +def red_bold(text): + return colored(text, 'red', attrs=['bold']) + + +def yellow(text): + return colored(text, 'yellow') + + +def yellow_bold(text): + return colored(text, 'yellow', attrs=['bold']) + + +def green(text): + return colored(text, 'green') + + +def green_bold(text): + return colored(text, 'green', attrs=['bold']) + + +def blue(text): + return colored(text, 'blue') + + +def blue_bold(text): + return colored(text, 'blue', attrs=['bold']) + + +def cyan(text): + return colored(text, 'light_cyan') + + +def white(text): + return colored(text, 'white') + + +def white_bold(text): + return colored(text, 'white', attrs=['bold']) diff --git a/pilot/utils/utils.py b/pilot/utils/utils.py index db4b09ab0..89f019d7a 100644 --- a/pilot/utils/utils.py +++ b/pilot/utils/utils.py @@ -9,7 +9,7 @@ import hashlib import re from jinja2 import Environment, FileSystemLoader -from fabulous.color import green +from termcolor import colored from const.llm import MAX_QUESTIONS, END_RESPONSE from const.common import ROLES, STEPS @@ -126,7 +126,7 @@ def step_already_finished(args, step): args.update(step['app_data']) message = f"{capitalize_first_word_with_underscores(step['step'])} already done for this app_id: {args['app_id']}. Moving to next step..." - print(green(message)) + print(colored(message, 'green')) logger.info(message) From 0234c5f7e12501f806f77e2df151724accf7e1c8 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Wed, 20 Sep 2023 22:16:18 +1000 Subject: [PATCH 02/22] fixed logging for unit tests --- pilot/logger/logger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pilot/logger/logger.py b/pilot/logger/logger.py index 448b09205..a2b054b86 100644 --- a/pilot/logger/logger.py +++ b/pilot/logger/logger.py @@ -1,4 +1,4 @@ -# logger.py +import os import logging @@ -7,7 +7,7 @@ def setup_logger(): log_format = "%(asctime)s [%(filename)s:%(lineno)s - %(funcName)20s() ] %(levelname)s: %(message)s" # Create a log handler for file output - file_handler = logging.FileHandler(filename='logger/debug.log', mode='w') + file_handler = logging.FileHandler(filename=os.path.join(os.path.dirname(__file__), 'debug.log'), mode='w') file_handler.setLevel(logging.DEBUG) # Apply the custom format to the handler From 4b7aa2df222339d53fc104e70e1bbaf4637aa2be Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Wed, 20 Sep 2023 22:17:37 +1000 Subject: [PATCH 03/22] `get_prompt()` moved from `llm_connection` to `utils` and works from unit tests --- pilot/helpers/AgentConvo.py | 4 ++-- pilot/helpers/agents/Developer.py | 2 +- pilot/prompts/prompts.py | 4 ++-- pilot/utils/llm_connection.py | 27 ++-------------------- pilot/utils/utils.py | 38 ++++++++++++++++++++----------- 5 files changed, 32 insertions(+), 43 deletions(-) diff --git a/pilot/helpers/AgentConvo.py b/pilot/helpers/AgentConvo.py index 48f2682f7..d6163d8a7 100644 --- a/pilot/helpers/AgentConvo.py +++ b/pilot/helpers/AgentConvo.py @@ -7,8 +7,8 @@ from helpers.files import get_files_content from const.common import IGNORE_FOLDERS from helpers.exceptions.TokenLimitError import TokenLimitError -from utils.utils import array_of_objects_to_string -from utils.llm_connection import get_prompt, create_gpt_chat_completion +from utils.utils import array_of_objects_to_string, get_prompt +from utils.llm_connection import create_gpt_chat_completion from utils.utils import get_sys_message, find_role_from_step, capitalize_first_word_with_underscores from logger.logger import logger from prompts.prompts import ask_user diff --git a/pilot/helpers/agents/Developer.py b/pilot/helpers/agents/Developer.py index 19af737e9..53b3bcdec 100644 --- a/pilot/helpers/agents/Developer.py +++ b/pilot/helpers/agents/Developer.py @@ -11,7 +11,7 @@ from helpers.Agent import Agent from helpers.AgentConvo import AgentConvo from utils.utils import should_execute_step, array_of_objects_to_string, generate_app_data -from helpers.cli import run_command_until_success, execute_command_and_check_cli_response, debug +from helpers.cli import run_command_until_success, execute_command_and_check_cli_response from const.function_calls import FILTER_OS_TECHNOLOGIES, EXECUTE_COMMANDS, GET_TEST_TYPE, IMPLEMENT_TASK from database.database import save_progress, get_progress_steps from utils.utils import get_os_info diff --git a/pilot/prompts/prompts.py b/pilot/prompts/prompts.py index 446689141..0491080ca 100644 --- a/pilot/prompts/prompts.py +++ b/pilot/prompts/prompts.py @@ -2,8 +2,8 @@ from utils.style import yellow from const import common from const.llm import MAX_QUESTIONS, END_RESPONSE -from utils.llm_connection import create_gpt_chat_completion, get_prompt -from utils.utils import capitalize_first_word_with_underscores, get_sys_message, find_role_from_step +from utils.llm_connection import create_gpt_chat_completion +from utils.utils import capitalize_first_word_with_underscores, get_sys_message, find_role_from_step, get_prompt from utils.questionary import styled_select, styled_text from logger.logger import logger diff --git a/pilot/utils/llm_connection.py b/pilot/utils/llm_connection.py index 2cb6a4e38..cf00ef142 100644 --- a/pilot/utils/llm_connection.py +++ b/pilot/utils/llm_connection.py @@ -9,35 +9,12 @@ from utils.style import red from typing import List -from jinja2 import Environment, FileSystemLoader - -from const.llm import MIN_TOKENS_FOR_GPT_RESPONSE, MAX_GPT_MODEL_TOKENS, MAX_QUESTIONS, END_RESPONSE +from const.llm import MIN_TOKENS_FOR_GPT_RESPONSE, MAX_GPT_MODEL_TOKENS from logger.logger import logger from helpers.exceptions.TokenLimitError import TokenLimitError -from utils.utils import get_prompt_components, fix_json -from utils.spinner import spinner_start, spinner_stop - - -def get_prompt(prompt_name, data=None): - if data is None: - data = {} - - data.update(get_prompt_components()) - - logger.debug(f"Getting prompt for {prompt_name}") # logging here - # Create a file system loader with the directory of the templates - file_loader = FileSystemLoader('prompts') - - # Create the Jinja2 environment - env = Environment(loader=file_loader) - - # Load the template - template = env.get_template(prompt_name) +from utils.utils import fix_json - # Render the template with the provided data - output = template.render(data) - return output def get_tokens_in_messages(messages: List[str]) -> int: diff --git a/pilot/utils/utils.py b/pilot/utils/utils.py index 89f019d7a..1a2eb1c34 100644 --- a/pilot/utils/utils.py +++ b/pilot/utils/utils.py @@ -15,6 +15,10 @@ from const.common import ROLES, STEPS from logger.logger import logger +prompts_path = os.path.join(os.path.dirname(__file__), '..', 'prompts') +file_loader = FileSystemLoader(prompts_path) +env = Environment(loader=file_loader) + def capitalize_first_word_with_underscores(s): # Split the string into words based on underscores. @@ -29,6 +33,23 @@ def capitalize_first_word_with_underscores(s): return capitalized_string +def get_prompt(prompt_name, data=None): + if data is None: + data = {} + + data.update(get_prompt_components()) + + logger.debug(f"Getting prompt for {prompt_name}") # logging here + + # Load the template + template = env.get_template(prompt_name) + + # Render the template with the provided data + output = template.render(data) + + return output + + def get_prompt_components(): # This function reads and renders all prompts inside /prompts/components and returns them in dictionary @@ -40,7 +61,8 @@ def get_prompt_components(): } # Create a FileSystemLoader - file_loader = FileSystemLoader('prompts/components') + prompts_path = os.path.join(os.path.dirname(__file__), '..', 'prompts/components') + file_loader = FileSystemLoader(prompts_path) # Create the Jinja2 environment env = Environment(loader=file_loader) @@ -63,17 +85,7 @@ def get_prompt_components(): def get_sys_message(role): - # Create a FileSystemLoader - file_loader = FileSystemLoader('prompts/system_messages') - - # Create the Jinja2 environment - env = Environment(loader=file_loader) - - # Load the template - template = env.get_template(f'{role}.prompt') - - # Render the template with no variables - content = template.render() + content = get_prompt(f'system_messages/{role}.prompt') return { "role": "system", @@ -186,4 +198,4 @@ def json_serial(obj): elif isinstance(obj, uuid.UUID): return str(obj) else: - return str(obj) \ No newline at end of file + return str(obj) From 82690b2a171a941843d4220dea4b89a8eafd9d00 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Wed, 20 Sep 2023 22:20:05 +1000 Subject: [PATCH 04/22] added `test_end_to_end()` and llm_connection test --- .github/workflows/ci.yml | 2 +- pilot/test_main_e2e.py | 71 ++++++++++++++++++++++++++++++ pilot/utils/test_llm_connection.py | 43 ++++++++++++++++++ pytest.ini | 7 +++ 4 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 pilot/test_main_e2e.py create mode 100644 pilot/utils/test_llm_connection.py create mode 100644 pytest.ini diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c22a4400a..da98d3f7e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,4 +41,4 @@ jobs: run: | pip install pytest cd pilot - PYTHONPATH=. pytest + PYTHONPATH=. pytest -m "not slow" diff --git a/pilot/test_main_e2e.py b/pilot/test_main_e2e.py new file mode 100644 index 000000000..beeba9ef0 --- /dev/null +++ b/pilot/test_main_e2e.py @@ -0,0 +1,71 @@ +import builtins +import pytest +from unittest.mock import patch +from dotenv import load_dotenv +load_dotenv() + +from database.database import create_tables, drop_tables +from helpers.Project import Project +from .main import init, get_custom_print + + +def test_init(): + # When + args = init() + + # Then + for field in ['app_id', 'user_id', 'email']: + assert args[field] is not None + + for field in ['workspace', 'step']: + assert args[field] is None + + +class MockQuestionary(): + def __init__(self, answers=[]): + self.answers = iter(answers) + self.state = 'project_description' + + def text(self, question: str, style=None): + print('AI: ' + question) + if question.startswith('User Story'): + self.state = 'user_stories' + elif question.endswith('write "DONE"'): + self.state = 'DONE' + return self + + def unsafe_ask(self): + if self.state == 'user_stories': + answer = '' + elif self.state == 'DONE': + answer = 'DONE' + else: # if self.state == 'project_description': + answer = next(self.answers) + + print('User:', answer) + return answer + + +@pytest.mark.slow +@pytest.mark.skip(reason="Uses lots of tokens") +def test_end_to_end(): + # Given + create_tables() + args = init() + builtins.print, ipc_client_instance = get_custom_print(args) + project = Project(args) + mock_questionary = MockQuestionary([ + 'Test App', + 'A web-based chat app', + # 5 clarifying questions + 'Users can send direct messages to each other but with no group chat functionality', + 'No authentication is required at this stage', + 'Use your best judgement', + 'Use your best judgement', + 'Use your best judgement', + ]) + + # When + with patch('utils.questionary.questionary', mock_questionary): + project.start() + diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py new file mode 100644 index 000000000..38e16afb6 --- /dev/null +++ b/pilot/utils/test_llm_connection.py @@ -0,0 +1,43 @@ +from dotenv import load_dotenv +from const.function_calls import ARCHITECTURE +from helpers.AgentConvo import AgentConvo +from helpers.Project import Project +from helpers.agents.Architect import Architect +from .llm_connection import create_gpt_chat_completion + +load_dotenv() + +project = Project({'app_id': 'test-app'}, current_step='test') + + +class TestLlmConnection: + """Test the LLM connection class.""" + + def test_chat_completion_Architect(self): + """Test the chat completion method.""" + # Given + agent = Architect(project) + convo = AgentConvo(agent) + convo.construct_and_add_message_from_prompt('architecture/technologies.prompt', + { + 'name': 'Test App', + 'prompt': 'A web-based chat app', + 'app_type': 'web app', + 'user_stories': [ + 'As a user I want to be able view messages sent and received' + ] + }) + + messages = convo.messages + # messages = [{"role": "user", "content": "I want to create a website"}] + + # When + response = create_gpt_chat_completion(messages, '', function_calls=ARCHITECTURE) + # Then + assert response is not None + assert len(response) > 0 + # assert response != prompt + + + def _create_convo(self, agent): + convo = AgentConvo(agent) \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..a3b504a84 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,7 @@ +[pytest] +testpaths = . +python_files = test_*.py + +markers = + slow: marks tests as slow (deselect with '-m "not slow"') + daily: tests which should be run daily From 2b4186a4f89ee9ba9299d0d33e986eb746159653 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Wed, 20 Sep 2023 23:15:37 +1000 Subject: [PATCH 05/22] test_chat_completion_Architect passes --- pilot/test_main_e2e.py | 4 ++-- pilot/utils/llm_connection.py | 6 ++---- pilot/utils/test_llm_connection.py | 32 ++++++++++++++++++++++++++---- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/pilot/test_main_e2e.py b/pilot/test_main_e2e.py index beeba9ef0..4df59d509 100644 --- a/pilot/test_main_e2e.py +++ b/pilot/test_main_e2e.py @@ -4,7 +4,7 @@ from dotenv import load_dotenv load_dotenv() -from database.database import create_tables, drop_tables +from database.database import create_tables from helpers.Project import Project from .main import init, get_custom_print @@ -40,7 +40,7 @@ def unsafe_ask(self): elif self.state == 'DONE': answer = 'DONE' else: # if self.state == 'project_description': - answer = next(self.answers) + answer = next(self.answers, '') print('User:', answer) return answer diff --git a/pilot/utils/llm_connection.py b/pilot/utils/llm_connection.py index cf00ef142..624ad78bc 100644 --- a/pilot/utils/llm_connection.py +++ b/pilot/utils/llm_connection.py @@ -14,7 +14,8 @@ from helpers.exceptions.TokenLimitError import TokenLimitError from utils.utils import fix_json - +model = os.getenv('MODEL_NAME') +endpoint = os.getenv('ENDPOINT') def get_tokens_in_messages(messages: List[str]) -> int: @@ -22,9 +23,6 @@ def get_tokens_in_messages(messages: List[str]) -> int: tokenized_messages = [tokenizer.encode(message['content']) for message in messages] return sum(len(tokens) for tokens in tokenized_messages) -#get endpoint and model name from .ENV file -model = os.getenv('MODEL_NAME') -endpoint = os.getenv('ENDPOINT') def num_tokens_from_functions(functions, model=model): """Return the number of tokens used by a list of functions.""" diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py index 38e16afb6..93cdc5d8c 100644 --- a/pilot/utils/test_llm_connection.py +++ b/pilot/utils/test_llm_connection.py @@ -1,9 +1,11 @@ +import builtins from dotenv import load_dotenv from const.function_calls import ARCHITECTURE from helpers.AgentConvo import AgentConvo from helpers.Project import Project from helpers.agents.Architect import Architect from .llm_connection import create_gpt_chat_completion +from main import get_custom_print load_dotenv() @@ -11,7 +13,8 @@ class TestLlmConnection: - """Test the LLM connection class.""" + def setup_method(self): + builtins.print, ipc_client_instance = get_custom_print({}) def test_chat_completion_Architect(self): """Test the chat completion method.""" @@ -21,10 +24,27 @@ def test_chat_completion_Architect(self): convo.construct_and_add_message_from_prompt('architecture/technologies.prompt', { 'name': 'Test App', - 'prompt': 'A web-based chat app', + 'prompt': ''' + The project involves the development of a web-based chat application named "Test_App". + In this application, users can send direct messages to each other. + However, it does not include a group chat functionality. + Multimedia messaging, such as the exchange of images and videos, is not a requirement for this application. + No clear instructions were given for the inclusion of user profile customization features like profile + picture and status updates, as well as a feature for chat history. The project must be developed strictly + as a monolithic application, regardless of any other suggested methods. + The project's specifications are subject to the project manager's discretion, implying a need for + solution-oriented decision-making in areas where precise instructions were not provided.''', 'app_type': 'web app', 'user_stories': [ - 'As a user I want to be able view messages sent and received' + 'User will be able to send direct messages to another user.', + 'User will receive direct messages from other users.', + 'User will view the sent and received messages in a conversation view.', + 'User will select a user to send a direct message.', + 'User will be able to search for users to send direct messages to.', + 'Users can view the online status of other users.', + 'User will be able to log into the application using their credentials.', + 'User will be able to logout from the Test_App.', + 'User will be able to register a new account on Test_App.', ] }) @@ -34,9 +54,13 @@ def test_chat_completion_Architect(self): # When response = create_gpt_chat_completion(messages, '', function_calls=ARCHITECTURE) # Then + # You are and experienced software architect... + # You are working in a software development agency... + assert len(convo.messages) == 2 assert response is not None assert len(response) > 0 - # assert response != prompt + technologies: list[str] = response['function_calls']['arguments']['technologies'] + assert 'Node.js' in technologies def _create_convo(self, agent): From 714658a0c80e733f82a5a1cb7f33c84574251eb1 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Thu, 21 Sep 2023 00:25:25 +1000 Subject: [PATCH 06/22] tidy up tests --- pilot/helpers/AgentConvo.py | 3 ++- pilot/utils/test_llm_connection.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pilot/helpers/AgentConvo.py b/pilot/helpers/AgentConvo.py index d6163d8a7..59ce3ae0a 100644 --- a/pilot/helpers/AgentConvo.py +++ b/pilot/helpers/AgentConvo.py @@ -23,7 +23,8 @@ class AgentConvo: agent: An instance of the agent participating in the conversation. """ def __init__(self, agent): - self.messages = [] + # [{'role': 'system'|'user'|'assistant', 'content': ''}, ...] + self.messages: list[dict] = [] self.branches = {} self.log_to_user = True self.agent = agent diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py index 93cdc5d8c..35c86e7f7 100644 --- a/pilot/utils/test_llm_connection.py +++ b/pilot/utils/test_llm_connection.py @@ -49,14 +49,14 @@ def test_chat_completion_Architect(self): }) messages = convo.messages - # messages = [{"role": "user", "content": "I want to create a website"}] # When response = create_gpt_chat_completion(messages, '', function_calls=ARCHITECTURE) + # Then - # You are and experienced software architect... - # You are working in a software development agency... assert len(convo.messages) == 2 + assert convo.messages[0]['content'].startswith('You are an experienced software architect') + assert convo.messages[1]['content'].startswith('You are working in a software development agency') assert response is not None assert len(response) > 0 technologies: list[str] = response['function_calls']['arguments']['technologies'] From d4879a04b7c1357c509cdcfed336fe380fcc4ed1 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Thu, 21 Sep 2023 23:05:37 +1000 Subject: [PATCH 07/22] ARCHITECTURE function_calls works on meta-llama/codellama-34b-instruct --- .github/workflows/ci.yml | 4 +- README.md | 2 +- pilot/helpers/AgentConvo.py | 14 +- .../prompts/system_messages/architect.prompt | 2 +- pilot/utils/function_calling.py | 169 ++++++++++++++++++ pilot/utils/llm_connection.py | 33 ++-- pilot/utils/test_llm_connection.py | 104 ++++++++++- requirements.txt | 1 + 8 files changed, 297 insertions(+), 32 deletions(-) create mode 100644 pilot/utils/function_calling.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index da98d3f7e..54cf21e11 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,9 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + # 3.10 - 04 Oct 2021 + # 3.11 - 24 Oct 2022 + python-version: ['3.11'] steps: - uses: actions/checkout@v4 diff --git a/README.md b/README.md index 7ca495c11..dc9ab9399 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ https://github.com/Pythagora-io/gpt-pilot/assets/10895136/0495631b-511e-451b-93d # 🔌 Requirements -- **Python** +- **Python >= 3.11** - **PostgreSQL** (optional, projects default is SQLite) - DB is needed for multiple reasons like continuing app development if you had to stop at any point or app crashed, going back to specific step so you can change some later steps in development, easier debugging, for future we will add functionality to update project (change some things in existing project or add new features to the project and so on)... diff --git a/pilot/helpers/AgentConvo.py b/pilot/helpers/AgentConvo.py index 59ce3ae0a..1b2900d31 100644 --- a/pilot/helpers/AgentConvo.py +++ b/pilot/helpers/AgentConvo.py @@ -1,11 +1,10 @@ +import json import re import subprocess import uuid from utils.style import yellow, yellow_bold from database.database import get_saved_development_step, save_development_step, delete_all_subsequent_steps -from helpers.files import get_files_content -from const.common import IGNORE_FOLDERS from helpers.exceptions.TokenLimitError import TokenLimitError from utils.utils import array_of_objects_to_string, get_prompt from utils.llm_connection import create_gpt_chat_completion @@ -188,10 +187,17 @@ def postprocess_response(self, response, function_calls): """ if 'function_calls' in response and function_calls is not None: if 'send_convo' in function_calls: - response['function_calls']['arguments']['convo'] = self + response['function_calls']['arguments']['convo'] = self response = function_calls['functions'][response['function_calls']['name']](**response['function_calls']['arguments']) elif 'text' in response: - response = response['text'] + if function_calls: + values = list(json.loads(response['text']).values()) + if len(values) == 1: + return values[0] + else: + return tuple(values) + else: + response = response['text'] return response diff --git a/pilot/prompts/system_messages/architect.prompt b/pilot/prompts/system_messages/architect.prompt index 4d5700db1..343432d26 100644 --- a/pilot/prompts/system_messages/architect.prompt +++ b/pilot/prompts/system_messages/architect.prompt @@ -1,7 +1,7 @@ You are an experienced software architect. Your expertise is in creating an architecture for an MVP (minimum viable products) for {{ app_type }}s that can be developed as fast as possible by using as many ready-made technologies as possible. The technologies that you prefer using when other technologies are not explicitly specified are: **Scripts**: you prefer using Node.js for writing scripts that are meant to be ran just with the CLI. -**Backend**: you prefer using Node.js with Mongo database if not explicitely specified otherwise. When you're using Mongo, you always use Mongoose and when you're using Postgresql, you always use PeeWee as an ORM. +**Backend**: you prefer using Node.js with Mongo database if not explicitly specified otherwise. When you're using Mongo, you always use Mongoose and when you're using Postgresql, you always use PeeWee as an ORM. **Testing**: To create unit and integration tests, you prefer using Jest for Node.js projects and pytest for Python projects. To create end-to-end tests, you prefer using Cypress. diff --git a/pilot/utils/function_calling.py b/pilot/utils/function_calling.py new file mode 100644 index 000000000..dd36bc93b --- /dev/null +++ b/pilot/utils/function_calling.py @@ -0,0 +1,169 @@ +import json +# from local_llm_function_calling import Generator +# from local_llm_function_calling.model.llama import LlamaModel +# from local_llm_function_calling.model.huggingface import HuggingfaceModel +from local_llm_function_calling.prompter import FunctionType, CompletionModelPrompter, InstructModelPrompter +# from local_llm_function_calling.model.llama import LlamaInstructPrompter + +from typing import Literal, NotRequired, Protocol, TypeVar, TypedDict, Callable + + +class FunctionCallSet(TypedDict): + definitions: list[FunctionType] + functions: dict[str, Callable] + + +def add_function_calls_to_request(gpt_data, function_calls: FunctionCallSet | None): + if function_calls is None: + return + + if gpt_data['model'] == 'gpt-4': + gpt_data['functions'] = function_calls['definitions'] + if len(function_calls['definitions']) > 1: + gpt_data['function_call'] = 'auto' + else: + gpt_data['function_call'] = {'name': function_calls['definitions'][0]['name']} + return + + # prompter = CompletionModelPrompter() + # prompter = InstructModelPrompter() + prompter = LlamaInstructPrompter() + + if len(function_calls['definitions']) > 1: + function_call = None + else: + function_call = function_calls['definitions'][0]['name'] + + gpt_data['messages'].append({ + 'role': 'user', + 'content': prompter.prompt('', function_calls['definitions'], function_call) + }) + + +class LlamaInstructPrompter: + """ + A prompter for Llama2 instruct models. + Adapted from local_llm_function_calling + """ + + def function_descriptions( + self, functions: list[FunctionType], function_to_call: str + ) -> list[str]: + """Get the descriptions of the functions + + Args: + functions (list[FunctionType]): The functions to get the descriptions of + function_to_call (str): The function to call + + Returns: + list[str]: The descriptions of the functions + (empty if the function doesn't exist or has no description) + """ + return [ + "Function description: " + function["description"] + for function in functions + if function["name"] == function_to_call and "description" in function + ] + + def function_parameters( + self, functions: list[FunctionType], function_to_call: str + ) -> str: + """Get the parameters of the function + + Args: + functions (list[FunctionType]): The functions to get the parameters of + function_to_call (str): The function to call + + Returns: + str: The parameters of the function as a JSON schema + """ + return next( + json.dumps(function["parameters"]["properties"], indent=4) + for function in functions + if function["name"] == function_to_call + ) + + def function_data( + self, functions: list[FunctionType], function_to_call: str + ) -> str: + """Get the data for the function + + Args: + functions (list[FunctionType]): The functions to get the data for + function_to_call (str): The function to call + + Returns: + str: The data necessary to generate the arguments for the function + """ + return "\n".join( + self.function_descriptions(functions, function_to_call) + + [ + "Function parameters should follow this schema:", + "```jsonschema", + self.function_parameters(functions, function_to_call), + "```", + ] + ) + + def function_summary(self, function: FunctionType) -> str: + """Get a summary of a function + + Args: + function (FunctionType): The function to get the summary of + + Returns: + str: The summary of the function, as a bullet point + """ + return f"- {function['name']}" + ( + f" - {function['description']}" if "description" in function else "" + ) + + def functions_summary(self, functions: list[FunctionType]) -> str: + """Get a summary of the functions + + Args: + functions (list[FunctionType]): The functions to get the summary of + + Returns: + str: The summary of the functions, as a bulleted list + """ + return "Available functions:\n" + "\n".join( + self.function_summary(function) for function in functions + ) + + def prompt( + self, + prompt: str, + functions: list[FunctionType], + function_to_call: str | None = None, + ) -> str: + """Generate the llama prompt + + Args: + prompt (str): The prompt to generate the response to + functions (list[FunctionType]): The functions to generate the response from + function_to_call (str | None): The function to call. Defaults to None. + + Returns: + list[bytes | int]: The llama prompt, a function selection prompt if no + function is specified, or a function argument prompt if a function is + specified + """ + system = ( + "Help choose the appropriate function to call to answer the user's question." + if function_to_call is None + else f"Define the arguments for {function_to_call} to answer the user's question." + ) + "In your response you must only use JSON output and provide no notes or commentary." + data = ( + self.function_data(functions, function_to_call) + if function_to_call + else self.functions_summary(functions) + ) + response_start = ( + f"Here are the arguments for the `{function_to_call}` function: ```json\n" + if function_to_call + else "Here's the function the user should call: " + ) + return f"[INST] <>\n{system}\n\n{data}\n<>\n\n{prompt} [/INST]" + # {response_start}" + diff --git a/pilot/utils/llm_connection.py b/pilot/utils/llm_connection.py index 624ad78bc..cf20bcb3f 100644 --- a/pilot/utils/llm_connection.py +++ b/pilot/utils/llm_connection.py @@ -7,16 +7,14 @@ import tiktoken import questionary + from utils.style import red from typing import List from const.llm import MIN_TOKENS_FOR_GPT_RESPONSE, MAX_GPT_MODEL_TOKENS from logger.logger import logger from helpers.exceptions.TokenLimitError import TokenLimitError from utils.utils import fix_json - -model = os.getenv('MODEL_NAME') -endpoint = os.getenv('ENDPOINT') - +from utils.function_calling import add_function_calls_to_request def get_tokens_in_messages(messages: List[str]) -> int: tokenizer = tiktoken.get_encoding("cl100k_base") # GPT-4 tokenizer @@ -24,7 +22,7 @@ def get_tokens_in_messages(messages: List[str]) -> int: return sum(len(tokens) for tokens in tokenized_messages) -def num_tokens_from_functions(functions, model=model): +def num_tokens_from_functions(functions): """Return the number of tokens used by a list of functions.""" encoding = tiktoken.get_encoding("cl100k_base") @@ -96,13 +94,7 @@ def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TO if key in gpt_data: del gpt_data[key] - if function_calls is not None: - # Advise the LLM of the JSON response schema we are expecting - gpt_data['functions'] = function_calls['definitions'] - if len(function_calls['definitions']) > 1: - gpt_data['function_call'] = 'auto' - else: - gpt_data['function_call'] = {'name': function_calls['definitions'][0]['name']} + add_function_calls_to_request(gpt_data, function_calls) try: response = stream_gpt_completion(gpt_data, req_type) @@ -110,7 +102,7 @@ def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TO except TokenLimitError as e: raise e except Exception as e: - print('The request to OpenAI API failed. Here is the error message:') + print(f'The request to {os.getenv("ENDPOINT")} API failed. Here is the error message:') print(e) @@ -126,6 +118,7 @@ def count_lines_based_on_width(content, width): lines_required = sum(len(line) // width + 1 for line in content.split('\n')) return lines_required + def get_tokens_in_messages_from_openai_error(error_message): """ Extract the token count from a message. @@ -208,7 +201,10 @@ def return_result(result_data, lines_printed): logger.info(f'Request data: {data}') - # Check if the ENDPOINT is AZURE + # Configure for the selected ENDPOINT + model = os.getenv('MODEL_NAME') + endpoint = os.getenv('ENDPOINT') + if endpoint == 'AZURE': # If yes, get the AZURE_ENDPOINT from .ENV file endpoint_url = os.getenv('AZURE_ENDPOINT') + '/openai/deployments/' + model + '/chat/completions?api-version=2023-05-15' @@ -239,10 +235,9 @@ def return_result(result_data, lines_printed): gpt_response = '' function_calls = {'name': '', 'arguments': ''} - for line in response.iter_lines(): # Ignore keep-alive new lines - if line: + if line and line != b': OPENROUTER PROCESSING': line = line.decode("utf-8") # decode the bytes to string if line.startswith('data: '): @@ -262,11 +257,13 @@ def return_result(result_data, lines_printed): logger.error(f'Error in LLM response: {json_line}') raise ValueError(f'Error in LLM response: {json_line["error"]["message"]}') - if json_line['choices'][0]['finish_reason'] == 'function_call': + choice = json_line['choices'][0] + + if 'finish_reason' in choice and choice['finish_reason'] == 'function_call': function_calls['arguments'] = load_data_to_json(function_calls['arguments']) return return_result({'function_calls': function_calls}, lines_printed) - json_line = json_line['choices'][0]['delta'] + json_line = choice['delta'] except json.JSONDecodeError: logger.error(f'Unable to decode line: {line}') diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py index 35c86e7f7..3c1ea32c0 100644 --- a/pilot/utils/test_llm_connection.py +++ b/pilot/utils/test_llm_connection.py @@ -1,9 +1,14 @@ import builtins +import os from dotenv import load_dotenv -from const.function_calls import ARCHITECTURE +from unittest.mock import patch +from local_llm_function_calling.prompter import CompletionModelPrompter, InstructModelPrompter + +from const.function_calls import ARCHITECTURE, DEV_STEPS from helpers.AgentConvo import AgentConvo from helpers.Project import Project from helpers.agents.Architect import Architect +from helpers.agents.Developer import Developer from .llm_connection import create_gpt_chat_completion from main import get_custom_print @@ -16,7 +21,31 @@ class TestLlmConnection: def setup_method(self): builtins.print, ipc_client_instance = get_custom_print({}) - def test_chat_completion_Architect(self): + # def test_break_down_development_task(self): + # # Given + # agent = Developer(project) + # convo = AgentConvo(agent) + # # convo.construct_and_add_message_from_prompt('architecture/technologies.prompt', + # # { + # # 'name': 'Test App', + # # 'prompt': ''' + # + # messages = convo.messages + # function_calls = DEV_STEPS + # + # # When + # # response = create_gpt_chat_completion(messages, '', function_calls=function_calls) + # response = {'function_calls': { + # 'name': 'break_down_development_task', + # 'arguments': {'tasks': [{'type': 'command', 'description': 'Run the app'}]} + # }} + # response = convo.postprocess_response(response, function_calls) + # + # # Then + # # assert len(convo.messages) == 2 + # assert response == ([{'type': 'command', 'description': 'Run the app'}], 'more_tasks') + + def test_chat_completion_Architect(self, monkeypatch): """Test the chat completion method.""" # Given agent = Architect(project) @@ -49,19 +78,80 @@ def test_chat_completion_Architect(self): }) messages = convo.messages + function_calls = ARCHITECTURE + endpoint = 'OPENROUTER' + # monkeypatch.setattr('utils.llm_connection.endpoint', endpoint) + monkeypatch.setenv('ENDPOINT', endpoint) + monkeypatch.setenv('MODEL_NAME', 'meta-llama/codellama-34b-instruct') + # with patch('.llm_connection.endpoint', endpoint): # When - response = create_gpt_chat_completion(messages, '', function_calls=ARCHITECTURE) + response = create_gpt_chat_completion(messages, '', function_calls=function_calls) # Then - assert len(convo.messages) == 2 assert convo.messages[0]['content'].startswith('You are an experienced software architect') assert convo.messages[1]['content'].startswith('You are working in a software development agency') + assert response is not None - assert len(response) > 0 - technologies: list[str] = response['function_calls']['arguments']['technologies'] - assert 'Node.js' in technologies + response = convo.postprocess_response(response, function_calls) + # response = response['function_calls']['arguments']['technologies'] + assert 'Node.js' in response + + def test_completion_function_prompt(self): + # Given + prompter = CompletionModelPrompter() + + # When + prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies') + + # Then + assert prompt == '''Create a web-based chat app + +Available functions: +process_technologies - Print the list of technologies that are created. +```jsonschema +{ + "technologies": { + "type": "array", + "description": "List of technologies that are created in a list.", + "items": { + "type": "string", + "description": "technology" + } + } +} +``` + +Function call: + +Function call: ''' + + def test_instruct_function_prompter(self): + # Given + prompter = InstructModelPrompter() + + # When + prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies') + + # Then + assert prompt == '''Your task is to call a function when needed. You will be provided with a list of functions. Available functions: +process_technologies - Print the list of technologies that are created. +```jsonschema +{ + "technologies": { + "type": "array", + "description": "List of technologies that are created in a list.", + "items": { + "type": "string", + "description": "technology" + } + } +} +``` + +Create a web-based chat app +Function call: ''' def _create_convo(self, agent): convo = AgentConvo(agent) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 7a4eecaf2..602600180 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ charset-normalizer==3.2.0 distro==1.8.0 idna==3.4 Jinja2==3.1.2 +local_llm_function_calling==0.1.14 MarkupSafe==2.1.3 peewee==3.16.2 prompt-toolkit==3.0.39 From 4d91f381c13a278c78dc81e40cde15bd0520fdef Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Fri, 22 Sep 2023 19:11:50 +1000 Subject: [PATCH 08/22] Tested Developer.install_technology before refactoring without function_calling --- .github/workflows/ci.yml | 2 +- pilot/helpers/agents/test_Developer.py | 61 ++++++++++++++ pilot/test/mock_questionary.py | 25 ++++++ pilot/test_main_e2e.py | 27 +----- pilot/utils/test_function_calling.py | 112 +++++++++++++++++++++++++ pilot/utils/test_llm_connection.py | 56 +------------ pytest.ini | 1 + 7 files changed, 203 insertions(+), 81 deletions(-) create mode 100644 pilot/helpers/agents/test_Developer.py create mode 100644 pilot/test/mock_questionary.py create mode 100644 pilot/utils/test_function_calling.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 54cf21e11..c27c8fbec 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,4 +43,4 @@ jobs: run: | pip install pytest cd pilot - PYTHONPATH=. pytest -m "not slow" + PYTHONPATH=. pytest -m "not slow and not uses_tokens" diff --git a/pilot/helpers/agents/test_Developer.py b/pilot/helpers/agents/test_Developer.py new file mode 100644 index 000000000..0dc603b99 --- /dev/null +++ b/pilot/helpers/agents/test_Developer.py @@ -0,0 +1,61 @@ +import builtins +import os +from unittest.mock import patch, Mock + +from helpers.AgentConvo import AgentConvo +from dotenv import load_dotenv +load_dotenv() + +from main import get_custom_print +from .Developer import Developer, ENVIRONMENT_SETUP_STEP +from helpers.Project import Project + + +def mock_terminal_size(): + mock_size = Mock() + mock_size.columns = 80 # or whatever width you want + return mock_size + + +class TestDeveloper: + def setup_method(self): + builtins.print, ipc_client_instance = get_custom_print({}) + + name = 'TestDeveloper' + self.project = Project({ + 'app_id': 'test-developer', + 'name': name, + 'app_type': '' + }, + name=name, + architecture=[], + user_stories=[] + ) + + self.project.root_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), + '../../../workspace/TestDeveloper')) + self.project.technologies = [] + self.project.current_step = ENVIRONMENT_SETUP_STEP + self.developer = Developer(self.project) + + # @pytest.mark.uses_tokens + @patch('helpers.AgentConvo.get_saved_development_step') + @patch('helpers.AgentConvo.save_development_step') + @patch('helpers.AgentConvo.create_gpt_chat_completion', + return_value={'function_calls': { + 'name': 'execute_command', + 'arguments': {'command': 'python --version', 'timeout': 10} + }}) + @patch('helpers.cli.styled_text', return_value='no') + @patch('helpers.cli.execute_command', return_value=('', 'DONE')) + def test_install_technology(self, mock_execute_command, mock_styled_text, + mock_completion, mock_save, mock_get_saved_step): + # Given + self.developer.convo_os_specific_tech = AgentConvo(self.developer) + + # When + llm_response = self.developer.install_technology('python') + + # Then + assert llm_response == 'DONE' + mock_execute_command.assert_called_once_with(self.project, 'python --version', 10) diff --git a/pilot/test/mock_questionary.py b/pilot/test/mock_questionary.py new file mode 100644 index 000000000..56f1ed9b2 --- /dev/null +++ b/pilot/test/mock_questionary.py @@ -0,0 +1,25 @@ +class MockQuestionary: + def __init__(self, answers=None): + if answers is None: + answers = [] + self.answers = iter(answers) + self.state = 'project_description' + + def text(self, question: str, style=None): + print('AI: ' + question) + if question.startswith('User Story'): + self.state = 'user_stories' + elif question.endswith('write "DONE"'): + self.state = 'DONE' + return self + + def unsafe_ask(self): + if self.state == 'user_stories': + answer = '' + elif self.state == 'DONE': + answer = 'DONE' + else: # if self.state == 'project_description': + answer = next(self.answers, '') + + print('User:', answer) + return answer diff --git a/pilot/test_main_e2e.py b/pilot/test_main_e2e.py index 4df59d509..e865e7035 100644 --- a/pilot/test_main_e2e.py +++ b/pilot/test_main_e2e.py @@ -6,6 +6,7 @@ from database.database import create_tables from helpers.Project import Project +from test.mock_questionary import MockQuestionary from .main import init, get_custom_print @@ -21,32 +22,8 @@ def test_init(): assert args[field] is None -class MockQuestionary(): - def __init__(self, answers=[]): - self.answers = iter(answers) - self.state = 'project_description' - - def text(self, question: str, style=None): - print('AI: ' + question) - if question.startswith('User Story'): - self.state = 'user_stories' - elif question.endswith('write "DONE"'): - self.state = 'DONE' - return self - - def unsafe_ask(self): - if self.state == 'user_stories': - answer = '' - elif self.state == 'DONE': - answer = 'DONE' - else: # if self.state == 'project_description': - answer = next(self.answers, '') - - print('User:', answer) - return answer - - @pytest.mark.slow +@pytest.mark.uses_tokens @pytest.mark.skip(reason="Uses lots of tokens") def test_end_to_end(): # Given diff --git a/pilot/utils/test_function_calling.py b/pilot/utils/test_function_calling.py new file mode 100644 index 000000000..978e68a59 --- /dev/null +++ b/pilot/utils/test_function_calling.py @@ -0,0 +1,112 @@ +from local_llm_function_calling.prompter import CompletionModelPrompter, InstructModelPrompter + +from const.function_calls import ARCHITECTURE, DEV_STEPS +from .function_calling import JsonPrompter + + +def test_completion_function_prompt(): + # Given + prompter = CompletionModelPrompter() + + # When + prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies') + + # Then + assert prompt == '''Create a web-based chat app + +Available functions: +process_technologies - Print the list of technologies that are created. +```jsonschema +{ + "technologies": { + "type": "array", + "description": "List of technologies that are created in a list.", + "items": { + "type": "string", + "description": "technology" + } + } +} +``` + +Function call: + +Function call: ''' + + +def test_instruct_function_prompter(): + # Given + prompter = InstructModelPrompter() + + # When + prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies') + + # Then + assert prompt == '''Your task is to call a function when needed. You will be provided with a list of functions. Available functions: +process_technologies - Print the list of technologies that are created. +```jsonschema +{ + "technologies": { + "type": "array", + "description": "List of technologies that are created in a list.", + "items": { + "type": "string", + "description": "technology" + } + } +} +``` + +Create a web-based chat app + +Function call: ''' + + +def test_json_prompter(): + # Given + prompter = JsonPrompter() + + # When + prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies') + + # Then + assert prompt == '''[INST] <> +Help choose the appropriate function to call to answer the user's question. +In your response you must only use JSON output and provide no notes or commentary. + +Available functions: +- process_technologies - Print the list of technologies that are created. +<> + +Create a web-based chat app [/INST]''' + + +def test_llama_instruct_function_prompter_named(): + # Given + prompter = LlamaInstructPrompter() + + # When + prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'], 'process_technologies') + + # Then + assert prompt == '''[INST] <> +Define the arguments for process_technologies to answer the user's question. +In your response you must only use JSON output and provide no notes or commentary. + +Function description: Print the list of technologies that are created. +Function parameters should follow this schema: +```jsonschema +{ + "technologies": { + "type": "array", + "description": "List of technologies that are created in a list.", + "items": { + "type": "string", + "description": "technology" + } + } +} +``` +<> + +Create a web-based chat app [/INST]''' diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py index 3c1ea32c0..a5c82daae 100644 --- a/pilot/utils/test_llm_connection.py +++ b/pilot/utils/test_llm_connection.py @@ -2,7 +2,7 @@ import os from dotenv import load_dotenv from unittest.mock import patch -from local_llm_function_calling.prompter import CompletionModelPrompter, InstructModelPrompter + from const.function_calls import ARCHITECTURE, DEV_STEPS from helpers.AgentConvo import AgentConvo @@ -97,61 +97,7 @@ def test_chat_completion_Architect(self, monkeypatch): # response = response['function_calls']['arguments']['technologies'] assert 'Node.js' in response - def test_completion_function_prompt(self): - # Given - prompter = CompletionModelPrompter() - - # When - prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies') - - # Then - assert prompt == '''Create a web-based chat app - -Available functions: -process_technologies - Print the list of technologies that are created. -```jsonschema -{ - "technologies": { - "type": "array", - "description": "List of technologies that are created in a list.", - "items": { - "type": "string", - "description": "technology" - } - } -} -``` - -Function call: - -Function call: ''' - - def test_instruct_function_prompter(self): - # Given - prompter = InstructModelPrompter() - - # When - prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies') - - # Then - assert prompt == '''Your task is to call a function when needed. You will be provided with a list of functions. Available functions: -process_technologies - Print the list of technologies that are created. -```jsonschema -{ - "technologies": { - "type": "array", - "description": "List of technologies that are created in a list.", - "items": { - "type": "string", - "description": "technology" - } - } -} -``` - -Create a web-based chat app -Function call: ''' def _create_convo(self, agent): convo = AgentConvo(agent) \ No newline at end of file diff --git a/pytest.ini b/pytest.ini index a3b504a84..b0c4c733a 100644 --- a/pytest.ini +++ b/pytest.ini @@ -4,4 +4,5 @@ python_files = test_*.py markers = slow: marks tests as slow (deselect with '-m "not slow"') + uses_tokens: Integration tests which use tokens daily: tests which should be run daily From 575c25902ea15ca02f24ed0d84e1016460c78652 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Fri, 22 Sep 2023 22:41:38 +1000 Subject: [PATCH 09/22] Refactored AgentConvo.postprecess_response() into function_calling.process_json_response() --- pilot/helpers/AgentConvo.py | 33 +--------- pilot/utils/function_calling.py | 24 +++++++ pilot/utils/test_function_calling.py | 93 ++++++++++++++++++++++------ 3 files changed, 102 insertions(+), 48 deletions(-) diff --git a/pilot/helpers/AgentConvo.py b/pilot/helpers/AgentConvo.py index 1b2900d31..25e309b74 100644 --- a/pilot/helpers/AgentConvo.py +++ b/pilot/helpers/AgentConvo.py @@ -1,4 +1,3 @@ -import json import re import subprocess import uuid @@ -6,9 +5,9 @@ from database.database import get_saved_development_step, save_development_step, delete_all_subsequent_steps from helpers.exceptions.TokenLimitError import TokenLimitError -from utils.utils import array_of_objects_to_string, get_prompt +from utils.function_calling import parse_agent_response from utils.llm_connection import create_gpt_chat_completion -from utils.utils import get_sys_message, find_role_from_step, capitalize_first_word_with_underscores +from utils.utils import array_of_objects_to_string, get_prompt, get_sys_message, capitalize_first_word_with_underscores from logger.logger import logger from prompts.prompts import ask_user from const.llm import END_RESPONSE @@ -83,7 +82,7 @@ def send_message(self, prompt_path=None, prompt_data=None, function_calls=None): if response == {}: raise Exception("OpenAI API error happened.") - response = self.postprocess_response(response, function_calls) + response = parse_agent_response(response, function_calls) # TODO remove this once the database is set up properly message_content = response[0] if type(response) == tuple else response @@ -174,32 +173,6 @@ def replace_file_content(self, message, file_path, new_content): def convo_length(self): return len([msg for msg in self.messages if msg['role'] != 'system']) - def postprocess_response(self, response, function_calls): - """ - Post-processes the response from the agent. - - Args: - response: The response from the agent. - function_calls: Optional function calls associated with the response. - - Returns: - The post-processed response. - """ - if 'function_calls' in response and function_calls is not None: - if 'send_convo' in function_calls: - response['function_calls']['arguments']['convo'] = self - response = function_calls['functions'][response['function_calls']['name']](**response['function_calls']['arguments']) - elif 'text' in response: - if function_calls: - values = list(json.loads(response['text']).values()) - if len(values) == 1: - return values[0] - else: - return tuple(values) - else: - response = response['text'] - - return response def log_message(self, content): """ diff --git a/pilot/utils/function_calling.py b/pilot/utils/function_calling.py index dd36bc93b..0ec33603b 100644 --- a/pilot/utils/function_calling.py +++ b/pilot/utils/function_calling.py @@ -1,4 +1,5 @@ import json +import re # from local_llm_function_calling import Generator # from local_llm_function_calling.model.llama import LlamaModel # from local_llm_function_calling.model.huggingface import HuggingfaceModel @@ -40,6 +41,29 @@ def add_function_calls_to_request(gpt_data, function_calls: FunctionCallSet | No }) +def parse_agent_response(response, function_calls: FunctionCallSet | None): + """ + Post-processes the response from the agent. + + Args: + response: The response from the agent. + function_calls: Optional function calls associated with the response. + + Returns: + The post-processed response. + """ + + if function_calls: + text = re.sub(r'^```json\n', '', response['text']) + values = list(json.loads(text.strip('` \n')).values()) + if len(values) == 1: + return values[0] + else: + return tuple(values) + + return response['text'] + + class LlamaInstructPrompter: """ A prompter for Llama2 instruct models. diff --git a/pilot/utils/test_function_calling.py b/pilot/utils/test_function_calling.py index 978e68a59..635e1c61d 100644 --- a/pilot/utils/test_function_calling.py +++ b/pilot/utils/test_function_calling.py @@ -1,7 +1,64 @@ from local_llm_function_calling.prompter import CompletionModelPrompter, InstructModelPrompter from const.function_calls import ARCHITECTURE, DEV_STEPS -from .function_calling import JsonPrompter +from .function_calling import parse_agent_response, LlamaInstructPrompter + + +class TestFunctionCalling: + def test_parse_agent_response_text(self): + # Given + response = {'text': 'Hello world!'} + + # When + response = parse_agent_response(response, None) + + # Then + assert response == 'Hello world!' + + def test_parse_agent_response_json(self): + # Given + response = {'text': '{"greeting": "Hello world!"}'} + function_calls = {'definitions': [], 'functions': {}} + + # When + response = parse_agent_response(response, function_calls) + + # Then + assert response == 'Hello world!' + + def test_parse_agent_response_json_markdown(self): + # Given + response = {'text': '```json\n{"greeting": "Hello world!"}\n```'} + function_calls = {'definitions': [], 'functions': {}} + + # When + response = parse_agent_response(response, function_calls) + + # Then + assert response == 'Hello world!' + + def test_parse_agent_response_markdown(self): + # Given + response = {'text': '```\n{"greeting": "Hello world!"}\n```'} + function_calls = {'definitions': [], 'functions': {}} + + # When + response = parse_agent_response(response, function_calls) + + # Then + assert response == 'Hello world!' + + def test_parse_agent_response_multiple_args(self): + # Given + response = {'text': '{"greeting": "Hello", "name": "John"}'} + function_calls = {'definitions': [], 'functions': {}} + + # When + greeting, name = parse_agent_response(response, function_calls) + + # Then + assert greeting == 'Hello' + assert name == 'John' def test_completion_function_prompt(): @@ -62,23 +119,23 @@ def test_instruct_function_prompter(): Function call: ''' -def test_json_prompter(): - # Given - prompter = JsonPrompter() - - # When - prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies') - - # Then - assert prompt == '''[INST] <> -Help choose the appropriate function to call to answer the user's question. -In your response you must only use JSON output and provide no notes or commentary. - -Available functions: -- process_technologies - Print the list of technologies that are created. -<> - -Create a web-based chat app [/INST]''' +# def test_json_prompter(): +# # Given +# prompter = JsonPrompter() +# +# # When +# prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies') +# +# # Then +# assert prompt == '''[INST] <> +# Help choose the appropriate function to call to answer the user's question. +# In your response you must only use JSON output and provide no notes or commentary. +# +# Available functions: +# - process_technologies - Print the list of technologies that are created. +# <> +# +# Create a web-based chat app [/INST]''' def test_llama_instruct_function_prompter_named(): From 156b36126363974817ad944298f0fb159d006b2e Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Sat, 23 Sep 2023 00:45:23 +1000 Subject: [PATCH 10/22] function_call-style JSON response from gpt-4, gpt-3_5, codellama, palm-2-chat-bison --- .github/workflows/ci.yml | 3 +- pilot/utils/function_calling.py | 43 +++++++----- pilot/utils/llm_connection.py | 2 + pilot/utils/test_function_calling.py | 4 +- pilot/utils/test_llm_connection.py | 97 ++++++++++++++++++---------- 5 files changed, 96 insertions(+), 53 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c27c8fbec..2fca91108 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,9 +21,10 @@ jobs: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + cache: 'pip' - name: Install dependencies run: | diff --git a/pilot/utils/function_calling.py b/pilot/utils/function_calling.py index 0ec33603b..03eac2715 100644 --- a/pilot/utils/function_calling.py +++ b/pilot/utils/function_calling.py @@ -18,25 +18,33 @@ def add_function_calls_to_request(gpt_data, function_calls: FunctionCallSet | No if function_calls is None: return - if gpt_data['model'] == 'gpt-4': - gpt_data['functions'] = function_calls['definitions'] - if len(function_calls['definitions']) > 1: - gpt_data['function_call'] = 'auto' - else: - gpt_data['function_call'] = {'name': function_calls['definitions'][0]['name']} - return + model: str = gpt_data['model'] + is_llama = 'llama' in model + + # if model == 'gpt-4': + # gpt_data['functions'] = function_calls['definitions'] + # if len(function_calls['definitions']) > 1: + # gpt_data['function_call'] = 'auto' + # else: + # gpt_data['function_call'] = {'name': function_calls['definitions'][0]['name']} + # return # prompter = CompletionModelPrompter() # prompter = InstructModelPrompter() - prompter = LlamaInstructPrompter() + prompter = JsonPrompter(is_llama) if len(function_calls['definitions']) > 1: function_call = None else: function_call = function_calls['definitions'][0]['name'] + role = 'user' if '/' in model else 'system' + # role = 'user' + # role = 'system' + # is_llama = True + gpt_data['messages'].append({ - 'role': 'user', + 'role': role, 'content': prompter.prompt('', function_calls['definitions'], function_call) }) @@ -54,7 +62,7 @@ def parse_agent_response(response, function_calls: FunctionCallSet | None): """ if function_calls: - text = re.sub(r'^```json\n', '', response['text']) + text = re.sub(r'^.*```json\s*', '', response['text'], flags=re.DOTALL) values = list(json.loads(text.strip('` \n')).values()) if len(values) == 1: return values[0] @@ -64,11 +72,12 @@ def parse_agent_response(response, function_calls: FunctionCallSet | None): return response['text'] -class LlamaInstructPrompter: +class JsonPrompter: """ - A prompter for Llama2 instruct models. Adapted from local_llm_function_calling """ + def __init__(self, is_llama: bool = False): + self.is_llama = is_llama def function_descriptions( self, functions: list[FunctionType], function_to_call: str @@ -177,7 +186,9 @@ def prompt( "Help choose the appropriate function to call to answer the user's question." if function_to_call is None else f"Define the arguments for {function_to_call} to answer the user's question." - ) + "In your response you must only use JSON output and provide no notes or commentary." + # ) + "\nYou must return a JSON object without notes or commentary." + ) + " \nIn your response you must only use JSON output and provide no explanation or commentary." + data = ( self.function_data(functions, function_to_call) if function_to_call @@ -188,6 +199,8 @@ def prompt( if function_to_call else "Here's the function the user should call: " ) - return f"[INST] <>\n{system}\n\n{data}\n<>\n\n{prompt} [/INST]" - # {response_start}" + if self.is_llama: + return f"[INST] <>\n{system}\n\n{data}\n<>\n\n{prompt} [/INST]" + else: + return f"{system}\n\n{data}\n\n{prompt}" diff --git a/pilot/utils/llm_connection.py b/pilot/utils/llm_connection.py index cf20bcb3f..2aac35aca 100644 --- a/pilot/utils/llm_connection.py +++ b/pilot/utils/llm_connection.py @@ -264,6 +264,8 @@ def return_result(result_data, lines_printed): return return_result({'function_calls': function_calls}, lines_printed) json_line = choice['delta'] + # TODO: token healing? https://github.com/1rgs/jsonformer-claude + # ...Is this what local_llm_function_calling.constrainer is for? except json.JSONDecodeError: logger.error(f'Unable to decode line: {line}') diff --git a/pilot/utils/test_function_calling.py b/pilot/utils/test_function_calling.py index 635e1c61d..c64b2ce99 100644 --- a/pilot/utils/test_function_calling.py +++ b/pilot/utils/test_function_calling.py @@ -1,7 +1,7 @@ from local_llm_function_calling.prompter import CompletionModelPrompter, InstructModelPrompter from const.function_calls import ARCHITECTURE, DEV_STEPS -from .function_calling import parse_agent_response, LlamaInstructPrompter +from .function_calling import parse_agent_response, JsonPrompter class TestFunctionCalling: @@ -140,7 +140,7 @@ def test_instruct_function_prompter(): def test_llama_instruct_function_prompter_named(): # Given - prompter = LlamaInstructPrompter() + prompter = JsonPrompter() # When prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'], 'process_technologies') diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py index a5c82daae..fcba855b1 100644 --- a/pilot/utils/test_llm_connection.py +++ b/pilot/utils/test_llm_connection.py @@ -1,14 +1,14 @@ import builtins import os +import pytest from dotenv import load_dotenv -from unittest.mock import patch - from const.function_calls import ARCHITECTURE, DEV_STEPS from helpers.AgentConvo import AgentConvo from helpers.Project import Project from helpers.agents.Architect import Architect from helpers.agents.Developer import Developer +from utils.function_calling import parse_agent_response from .llm_connection import create_gpt_chat_completion from main import get_custom_print @@ -45,44 +45,72 @@ def setup_method(self): # # assert len(convo.messages) == 2 # assert response == ([{'type': 'command', 'description': 'Run the app'}], 'more_tasks') - def test_chat_completion_Architect(self, monkeypatch): - """Test the chat completion method.""" + # @pytest.fixture(params=[ + # {"endpoint": "OPENAI", "model": "gpt-4"}, + # {"endpoint": "OPENROUTER", "model": "openai/gpt-3.5-turbo"}, + # {"endpoint": "OPENROUTER", "model": "meta-llama/codellama-34b-instruct"}, + # {"endpoint": "OPENROUTER", "model": "anthropic/claude-2"}, + # {"endpoint": "OPENROUTER", "model": "google/palm-2-codechat-bison"}, + # {"endpoint": "OPENROUTER", "model": "google/palm-2-chat-bison"}, + # ]) + # def params(self, request): + # return request.param + + @pytest.mark.slow + @pytest.mark.uses_tokens + @pytest.mark.parametrize("endpoint, model", [ + ("OPENAI", "gpt-4"), # role: system + ("OPENROUTER", "openai/gpt-3.5-turbo"), # role: user + ("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama + ("OPENROUTER", "google/palm-2-chat-bison"), # role: user/system + + # See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py + # ("OPENROUTER", "anthropic/claude-2"), # role: user, prompt 2 + # ("OPENROUTER", "google/palm-2-codechat-bison"), # not working + ]) + def test_chat_completion_Architect(self, endpoint, model, monkeypatch): # Given agent = Architect(project) convo = AgentConvo(agent) convo.construct_and_add_message_from_prompt('architecture/technologies.prompt', - { - 'name': 'Test App', - 'prompt': ''' - The project involves the development of a web-based chat application named "Test_App". - In this application, users can send direct messages to each other. - However, it does not include a group chat functionality. - Multimedia messaging, such as the exchange of images and videos, is not a requirement for this application. - No clear instructions were given for the inclusion of user profile customization features like profile - picture and status updates, as well as a feature for chat history. The project must be developed strictly - as a monolithic application, regardless of any other suggested methods. - The project's specifications are subject to the project manager's discretion, implying a need for - solution-oriented decision-making in areas where precise instructions were not provided.''', - 'app_type': 'web app', - 'user_stories': [ - 'User will be able to send direct messages to another user.', - 'User will receive direct messages from other users.', - 'User will view the sent and received messages in a conversation view.', - 'User will select a user to send a direct message.', - 'User will be able to search for users to send direct messages to.', - 'Users can view the online status of other users.', - 'User will be able to log into the application using their credentials.', - 'User will be able to logout from the Test_App.', - 'User will be able to register a new account on Test_App.', - ] - }) + { + 'name': 'Test App', + 'prompt': ''' +The project involves the development of a web-based chat application named "Test_App". +In this application, users can send direct messages to each other. +However, it does not include a group chat functionality. +Multimedia messaging, such as the exchange of images and videos, is not a requirement for this application. +No clear instructions were given for the inclusion of user profile customization features like profile +picture and status updates, as well as a feature for chat history. The project must be developed strictly +as a monolithic application, regardless of any other suggested methods. +The project's specifications are subject to the project manager's discretion, implying a need for +solution-oriented decision-making in areas where precise instructions were not provided.''', + 'app_type': 'web app', + 'user_stories': [ + 'User will be able to send direct messages to another user.', + 'User will receive direct messages from other users.', + 'User will view the sent and received messages in a conversation view.', + 'User will select a user to send a direct message.', + 'User will be able to search for users to send direct messages to.', + 'Users can view the online status of other users.', + 'User will be able to log into the application using their credentials.', + 'User will be able to logout from the Test_App.', + 'User will be able to register a new account on Test_App.', + ] + }) - messages = convo.messages - function_calls = ARCHITECTURE - endpoint = 'OPENROUTER' + # endpoint = 'OPENROUTER' # monkeypatch.setattr('utils.llm_connection.endpoint', endpoint) monkeypatch.setenv('ENDPOINT', endpoint) - monkeypatch.setenv('MODEL_NAME', 'meta-llama/codellama-34b-instruct') + monkeypatch.setenv('MODEL_NAME', model) + # monkeypatch.setenv('MODEL_NAME', 'meta-llama/codellama-34b-instruct') + # monkeypatch.setenv('MODEL_NAME', 'openai/gpt-3.5-turbo-16k-0613') + # monkeypatch.setenv('MODEL_NAME', 'anthropic/claude-2') # TODO: remove ```json\n ... ``` + # monkeypatch.setenv('MODEL_NAME', 'google/palm-2-codechat-bison') # TODO: not JSON + # monkeypatch.setenv('MODEL_NAME', 'google/palm-2-chat-bison') # TODO: not JSON + + messages = convo.messages + function_calls = ARCHITECTURE # with patch('.llm_connection.endpoint', endpoint): # When @@ -93,11 +121,10 @@ def test_chat_completion_Architect(self, monkeypatch): assert convo.messages[1]['content'].startswith('You are working in a software development agency') assert response is not None - response = convo.postprocess_response(response, function_calls) + response = parse_agent_response(response, function_calls) # response = response['function_calls']['arguments']['technologies'] assert 'Node.js' in response - def _create_convo(self, agent): convo = AgentConvo(agent) \ No newline at end of file From 18aec507e8eb75d60ee133722069461321318777 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Sat, 23 Sep 2023 14:34:36 +1000 Subject: [PATCH 11/22] anthropic/claude-2 returns JSON gpt-3_5 returns technologies such as "Backend: Node.js with Mongo database (Mongoose)" codellama throws an error due to missing `choices` --- pilot/utils/function_calling.py | 11 ++++++----- pilot/utils/test_llm_connection.py | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pilot/utils/function_calling.py b/pilot/utils/function_calling.py index 03eac2715..e22b20e54 100644 --- a/pilot/utils/function_calling.py +++ b/pilot/utils/function_calling.py @@ -19,7 +19,7 @@ def add_function_calls_to_request(gpt_data, function_calls: FunctionCallSet | No return model: str = gpt_data['model'] - is_llama = 'llama' in model + is_llama = 'llama' in model or 'anthropic' in model # if model == 'gpt-4': # gpt_data['functions'] = function_calls['definitions'] @@ -93,7 +93,7 @@ def function_descriptions( (empty if the function doesn't exist or has no description) """ return [ - "Function description: " + function["description"] + function["description"] for function in functions if function["name"] == function_to_call and "description" in function ] @@ -131,8 +131,8 @@ def function_data( return "\n".join( self.function_descriptions(functions, function_to_call) + [ - "Function parameters should follow this schema:", - "```jsonschema", + "The response should be a JSON object matching this schema:", + "```json", self.function_parameters(functions, function_to_call), "```", ] @@ -187,7 +187,8 @@ def prompt( if function_to_call is None else f"Define the arguments for {function_to_call} to answer the user's question." # ) + "\nYou must return a JSON object without notes or commentary." - ) + " \nIn your response you must only use JSON output and provide no explanation or commentary." + # ) + " \nIn your response you must only use JSON output and provide no explanation or commentary." + ) + " \nThe response should contain only the JSON object, with no additional text or explanation." data = ( self.function_data(functions, function_to_call) diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py index fcba855b1..e896b2cf4 100644 --- a/pilot/utils/test_llm_connection.py +++ b/pilot/utils/test_llm_connection.py @@ -61,11 +61,11 @@ def setup_method(self): @pytest.mark.parametrize("endpoint, model", [ ("OPENAI", "gpt-4"), # role: system ("OPENROUTER", "openai/gpt-3.5-turbo"), # role: user - ("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama + ("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama missed "choices" ("OPENROUTER", "google/palm-2-chat-bison"), # role: user/system # See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py - # ("OPENROUTER", "anthropic/claude-2"), # role: user, prompt 2 + ("OPENROUTER", "anthropic/claude-2"), # role: user, prompt 2 - sometimes JSON, sometimes Python to generate JSON # ("OPENROUTER", "google/palm-2-codechat-bison"), # not working ]) def test_chat_completion_Architect(self, endpoint, model, monkeypatch): From f4789538a32fea0787113eed33138a043079c7a0 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Sat, 23 Sep 2023 14:52:57 +1000 Subject: [PATCH 12/22] 6 models returning JSON as per ARCHITECTURE function_calls schema --- pilot/const/function_calls.py | 2 +- pilot/helpers/agents/Developer.py | 22 +++++-------------- .../prompts/architecture/technologies.prompt | 2 +- pilot/utils/test_llm_connection.py | 8 +++---- 4 files changed, 11 insertions(+), 23 deletions(-) diff --git a/pilot/const/function_calls.py b/pilot/const/function_calls.py index 4bbf730db..c27194351 100644 --- a/pilot/const/function_calls.py +++ b/pilot/const/function_calls.py @@ -28,7 +28,7 @@ def return_array_from_prompt(name_plural, name_singular, return_var_name): "properties": { f"{return_var_name}": { "type": "array", - "description": f"List of {name_plural} that are created in a list.", + "description": f"List of {name_plural}.", "items": { "type": "string", "description": f"{name_singular}" diff --git a/pilot/helpers/agents/Developer.py b/pilot/helpers/agents/Developer.py index 53b3bcdec..69e536414 100644 --- a/pilot/helpers/agents/Developer.py +++ b/pilot/helpers/agents/Developer.py @@ -339,32 +339,20 @@ def set_up_environment(self): }, 'timeout': { 'type': 'number', - 'description': 'Timeout in seconds for the approcimate time this command takes to finish.', + 'description': 'Timeout in seconds for the approximate time this command takes to finish.', } }, 'required': ['command', 'timeout'], }, }], 'functions': { - 'execute_command': execute_command_and_check_cli_response - }, - 'send_convo': True + 'execute_command': lambda command, timeout: (command, timeout) + } }) - if llm_response != 'DONE': - installation_commands = self.convo_os_specific_tech.send_message('development/env_setup/unsuccessful_installation.prompt', - { 'technology': technology }, EXECUTE_COMMANDS) - if installation_commands is not None: - for cmd in installation_commands: - run_command_until_success(cmd['command'], cmd['timeout'], self.convo_os_specific_tech) + cli_response, llm_response = execute_command_and_check_cli_response(command, timeout, self.convo_os_specific_tech) - logger.info('The entire tech stack needed is installed and ready to be used.') - - save_progress(self.project.args['app_id'], self.project.current_step, { - "os_specific_technologies": os_specific_technologies, "newly_installed_technologies": [], "app_data": generate_app_data(self.project.args) - }) - - # ENVIRONMENT SETUP END + return llm_response def test_code_changes(self, code_monkey, convo): (test_type, command, automated_test_description, manual_test_description) = convo.send_message( diff --git a/pilot/prompts/architecture/technologies.prompt b/pilot/prompts/architecture/technologies.prompt index 2fd17dfc4..c041a6a05 100644 --- a/pilot/prompts/architecture/technologies.prompt +++ b/pilot/prompts/architecture/technologies.prompt @@ -27,7 +27,7 @@ Here are user tasks that specify what users need to do to interact with "{{ name {% endfor %} ```#} -Now, based on the app's description, user stories and user tasks, think step by step and write up all technologies that will be used by your development team to create the app "{{ name }}". Do not write any explanations behind your choices but only a list of technologies that will be used. +Now, based on the app's description, user stories and user tasks, think step by step and list the names of the technologies that will be used by your development team to create the app "{{ name }}". Do not write any explanations behind your choices but only a list of technologies that will be used. You do not need to list any technologies related to automated tests like Jest, Cypress, Mocha, Selenium, etc. diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py index e896b2cf4..2505bdd01 100644 --- a/pilot/utils/test_llm_connection.py +++ b/pilot/utils/test_llm_connection.py @@ -63,10 +63,10 @@ def setup_method(self): ("OPENROUTER", "openai/gpt-3.5-turbo"), # role: user ("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama missed "choices" ("OPENROUTER", "google/palm-2-chat-bison"), # role: user/system - - # See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py - ("OPENROUTER", "anthropic/claude-2"), # role: user, prompt 2 - sometimes JSON, sometimes Python to generate JSON - # ("OPENROUTER", "google/palm-2-codechat-bison"), # not working + ("OPENROUTER", "google/palm-2-codechat-bison"), + # TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py + # https://github.com/guidance-ai/guidance - token healing + ("OPENROUTER", "anthropic/claude-2"), # role: user, is_llama ]) def test_chat_completion_Architect(self, endpoint, model, monkeypatch): # Given From b317f5855050c49c232e007de61e02da9f5d6813 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Sat, 23 Sep 2023 16:27:02 +1000 Subject: [PATCH 13/22] clean up --- .github/workflows/ci.yml | 2 +- README.md | 2 +- pilot/utils/function_calling.py | 38 +++++++++----- pilot/utils/llm_connection.py | 2 +- pilot/utils/test_llm_connection.py | 79 ++++++++++-------------------- requirements.txt | 1 - 6 files changed, 55 insertions(+), 69 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2fca91108..db4ef81ab 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: matrix: # 3.10 - 04 Oct 2021 # 3.11 - 24 Oct 2022 - python-version: ['3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v4 diff --git a/README.md b/README.md index dc9ab9399..a6c0bd394 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ https://github.com/Pythagora-io/gpt-pilot/assets/10895136/0495631b-511e-451b-93d # 🔌 Requirements -- **Python >= 3.11** +- **Python >= 3.8** - **PostgreSQL** (optional, projects default is SQLite) - DB is needed for multiple reasons like continuing app development if you had to stop at any point or app crashed, going back to specific step so you can change some later steps in development, easier debugging, for future we will add functionality to update project (change some things in existing project or add new features to the project and so on)... diff --git a/pilot/utils/function_calling.py b/pilot/utils/function_calling.py index e22b20e54..98ba0be12 100644 --- a/pilot/utils/function_calling.py +++ b/pilot/utils/function_calling.py @@ -1,12 +1,31 @@ import json import re -# from local_llm_function_calling import Generator -# from local_llm_function_calling.model.llama import LlamaModel -# from local_llm_function_calling.model.huggingface import HuggingfaceModel -from local_llm_function_calling.prompter import FunctionType, CompletionModelPrompter, InstructModelPrompter -# from local_llm_function_calling.model.llama import LlamaInstructPrompter +from typing import Literal, NotRequired, TypedDict, Callable -from typing import Literal, NotRequired, Protocol, TypeVar, TypedDict, Callable +JsonType = str | int | float | bool | None | list["JsonType"] | dict[str, "JsonType"] + + +class FunctionParameters(TypedDict): + """Function parameters""" + + type: Literal["object"] + properties: dict[str, JsonType] + required: NotRequired[list[str]] + + +class FunctionType(TypedDict): + """Function type""" + + name: str + description: NotRequired[str] + parameters: FunctionParameters + + +class FunctionCall(TypedDict): + """Function call""" + + name: str + parameters: str class FunctionCallSet(TypedDict): @@ -29,8 +48,6 @@ def add_function_calls_to_request(gpt_data, function_calls: FunctionCallSet | No # gpt_data['function_call'] = {'name': function_calls['definitions'][0]['name']} # return - # prompter = CompletionModelPrompter() - # prompter = InstructModelPrompter() prompter = JsonPrompter(is_llama) if len(function_calls['definitions']) > 1: @@ -39,9 +56,6 @@ def add_function_calls_to_request(gpt_data, function_calls: FunctionCallSet | No function_call = function_calls['definitions'][0]['name'] role = 'user' if '/' in model else 'system' - # role = 'user' - # role = 'system' - # is_llama = True gpt_data['messages'].append({ 'role': role, @@ -186,8 +200,6 @@ def prompt( "Help choose the appropriate function to call to answer the user's question." if function_to_call is None else f"Define the arguments for {function_to_call} to answer the user's question." - # ) + "\nYou must return a JSON object without notes or commentary." - # ) + " \nIn your response you must only use JSON output and provide no explanation or commentary." ) + " \nThe response should contain only the JSON object, with no additional text or explanation." data = ( diff --git a/pilot/utils/llm_connection.py b/pilot/utils/llm_connection.py index 2aac35aca..8a59a724d 100644 --- a/pilot/utils/llm_connection.py +++ b/pilot/utils/llm_connection.py @@ -88,7 +88,7 @@ def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TO } # delete some keys if using "OpenRouter" API - if os.getenv('ENDPOINT') == "OPENROUTER": + if os.getenv('ENDPOINT') == 'OPENROUTER': keys_to_delete = ['n', 'max_tokens', 'temperature', 'top_p', 'presence_penalty', 'frequency_penalty'] for key in keys_to_delete: if key in gpt_data: diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py index 2505bdd01..6d1d2aea1 100644 --- a/pilot/utils/test_llm_connection.py +++ b/pilot/utils/test_llm_connection.py @@ -1,5 +1,4 @@ import builtins -import os import pytest from dotenv import load_dotenv @@ -21,42 +20,6 @@ class TestLlmConnection: def setup_method(self): builtins.print, ipc_client_instance = get_custom_print({}) - # def test_break_down_development_task(self): - # # Given - # agent = Developer(project) - # convo = AgentConvo(agent) - # # convo.construct_and_add_message_from_prompt('architecture/technologies.prompt', - # # { - # # 'name': 'Test App', - # # 'prompt': ''' - # - # messages = convo.messages - # function_calls = DEV_STEPS - # - # # When - # # response = create_gpt_chat_completion(messages, '', function_calls=function_calls) - # response = {'function_calls': { - # 'name': 'break_down_development_task', - # 'arguments': {'tasks': [{'type': 'command', 'description': 'Run the app'}]} - # }} - # response = convo.postprocess_response(response, function_calls) - # - # # Then - # # assert len(convo.messages) == 2 - # assert response == ([{'type': 'command', 'description': 'Run the app'}], 'more_tasks') - - # @pytest.fixture(params=[ - # {"endpoint": "OPENAI", "model": "gpt-4"}, - # {"endpoint": "OPENROUTER", "model": "openai/gpt-3.5-turbo"}, - # {"endpoint": "OPENROUTER", "model": "meta-llama/codellama-34b-instruct"}, - # {"endpoint": "OPENROUTER", "model": "anthropic/claude-2"}, - # {"endpoint": "OPENROUTER", "model": "google/palm-2-codechat-bison"}, - # {"endpoint": "OPENROUTER", "model": "google/palm-2-chat-bison"}, - # ]) - # def params(self, request): - # return request.param - - @pytest.mark.slow @pytest.mark.uses_tokens @pytest.mark.parametrize("endpoint, model", [ ("OPENAI", "gpt-4"), # role: system @@ -70,6 +33,9 @@ def setup_method(self): ]) def test_chat_completion_Architect(self, endpoint, model, monkeypatch): # Given + monkeypatch.setenv('ENDPOINT', endpoint) + monkeypatch.setenv('MODEL_NAME', model) + agent = Architect(project) convo = AgentConvo(agent) convo.construct_and_add_message_from_prompt('architecture/technologies.prompt', @@ -99,22 +65,10 @@ def test_chat_completion_Architect(self, endpoint, model, monkeypatch): ] }) - # endpoint = 'OPENROUTER' - # monkeypatch.setattr('utils.llm_connection.endpoint', endpoint) - monkeypatch.setenv('ENDPOINT', endpoint) - monkeypatch.setenv('MODEL_NAME', model) - # monkeypatch.setenv('MODEL_NAME', 'meta-llama/codellama-34b-instruct') - # monkeypatch.setenv('MODEL_NAME', 'openai/gpt-3.5-turbo-16k-0613') - # monkeypatch.setenv('MODEL_NAME', 'anthropic/claude-2') # TODO: remove ```json\n ... ``` - # monkeypatch.setenv('MODEL_NAME', 'google/palm-2-codechat-bison') # TODO: not JSON - # monkeypatch.setenv('MODEL_NAME', 'google/palm-2-chat-bison') # TODO: not JSON - - messages = convo.messages function_calls = ARCHITECTURE - # with patch('.llm_connection.endpoint', endpoint): # When - response = create_gpt_chat_completion(messages, '', function_calls=function_calls) + response = create_gpt_chat_completion(convo.messages, '', function_calls=function_calls) # Then assert convo.messages[0]['content'].startswith('You are an experienced software architect') @@ -122,9 +76,30 @@ def test_chat_completion_Architect(self, endpoint, model, monkeypatch): assert response is not None response = parse_agent_response(response, function_calls) - # response = response['function_calls']['arguments']['technologies'] assert 'Node.js' in response + # def test_break_down_development_task(self): + # # Given + # agent = Developer(project) + # convo = AgentConvo(agent) + # # convo.construct_and_add_message_from_prompt('architecture/technologies.prompt', + # # { + # # 'name': 'Test App', + # # 'prompt': ''' + # + # function_calls = DEV_STEPS + # + # # When + # response = create_gpt_chat_completion(convo.messages, '', function_calls=function_calls) + # # response = {'function_calls': { + # # 'name': 'break_down_development_task', + # # 'arguments': {'tasks': [{'type': 'command', 'description': 'Run the app'}]} + # # }} + # response = parse_agent_response(response, function_calls) + # + # # Then + # # assert len(convo.messages) == 2 + # assert response == ([{'type': 'command', 'description': 'Run the app'}], 'more_tasks') def _create_convo(self, agent): - convo = AgentConvo(agent) \ No newline at end of file + convo = AgentConvo(agent) diff --git a/requirements.txt b/requirements.txt index 602600180..7a4eecaf2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,6 @@ charset-normalizer==3.2.0 distro==1.8.0 idna==3.4 Jinja2==3.1.2 -local_llm_function_calling==0.1.14 MarkupSafe==2.1.3 peewee==3.16.2 prompt-toolkit==3.0.39 From 623908d09331f865d004c530088cd4e57d20cdfe Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Sat, 23 Sep 2023 17:08:22 +1000 Subject: [PATCH 14/22] fixed tests --- pilot/helpers/agents/CodeMonkey.py | 6 ++-- pilot/helpers/agents/test_CodeMonkey.py | 12 ++++--- pilot/test/__init__.py | 0 pilot/utils/function_calling.py | 2 +- pilot/utils/test_function_calling.py | 45 ++++++++++++++++++++----- 5 files changed, 48 insertions(+), 17 deletions(-) create mode 100644 pilot/test/__init__.py diff --git a/pilot/helpers/agents/CodeMonkey.py b/pilot/helpers/agents/CodeMonkey.py index c3116d8f9..69cab6869 100644 --- a/pilot/helpers/agents/CodeMonkey.py +++ b/pilot/helpers/agents/CodeMonkey.py @@ -1,9 +1,8 @@ from const.function_calls import GET_FILES, DEV_STEPS, IMPLEMENT_CHANGES, CODE_CHANGES -from database.models.files import File -from helpers.files import update_file from helpers.AgentConvo import AgentConvo from helpers.Agent import Agent + class CodeMonkey(Agent): def __init__(self, project, developer): super().__init__('code_monkey', project) @@ -20,12 +19,11 @@ def implement_code_changes(self, convo, code_changes_description, step_index=0): # "finished_steps": ', '.join(f"#{j}" for j in range(step_index)) # }, GET_FILES) - changes = convo.send_message('development/implement_changes.prompt', { "step_description": code_changes_description, "step_index": step_index, "directory_tree": self.project.get_directory_tree(True), - "files": []#self.project.get_files(files_needed), + "files": [] # self.project.get_files(files_needed), }, IMPLEMENT_CHANGES) convo.remove_last_x_messages(1) diff --git a/pilot/helpers/agents/test_CodeMonkey.py b/pilot/helpers/agents/test_CodeMonkey.py index 187bdf758..8c65e7f84 100644 --- a/pilot/helpers/agents/test_CodeMonkey.py +++ b/pilot/helpers/agents/test_CodeMonkey.py @@ -7,6 +7,7 @@ from .CodeMonkey import CodeMonkey from .Developer import Developer from database.models.files import File +from database.models.development_steps import DevelopmentSteps from helpers.Project import Project, update_file, clear_directory from helpers.AgentConvo import AgentConvo @@ -37,11 +38,14 @@ def setup_method(self): self.project.root_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../../workspace/TestDeveloper')) self.project.technologies = [] + last_step = DevelopmentSteps() + last_step.id = 1 + self.project.checkpoints = {'last_development_step': last_step} self.project.app = None self.developer = Developer(self.project) self.codeMonkey = CodeMonkey(self.project, developer=self.developer) - @patch('helpers.AgentConvo.get_development_step_from_hash_id', return_value=None) + @patch('helpers.AgentConvo.get_saved_development_step', return_value=None) @patch('helpers.AgentConvo.save_development_step', return_value=None) @patch('os.get_terminal_size', mock_terminal_size) @patch.object(File, 'insert') @@ -54,7 +58,7 @@ def test_implement_code_changes(self, mock_get_dev, mock_save_dev, mock_file_ins else: convo = MagicMock() mock_responses = [ - [], + # [], [{ 'content': 'Washington', 'description': "A new .txt file with the word 'Washington' in it.", @@ -79,7 +83,7 @@ def test_implement_code_changes(self, mock_get_dev, mock_save_dev, mock_file_ins assert (called_data['path'] == '/' or called_data['path'] == called_data['name']) assert called_data['content'] == 'Washington' - @patch('helpers.AgentConvo.get_development_step_from_hash_id', return_value=None) + @patch('helpers.AgentConvo.get_saved_development_step', return_value=None) @patch('helpers.AgentConvo.save_development_step', return_value=None) @patch('os.get_terminal_size', mock_terminal_size) @patch.object(File, 'insert') @@ -94,7 +98,7 @@ def test_implement_code_changes_with_read(self, mock_get_dev, mock_save_dev, moc else: convo = MagicMock() mock_responses = [ - ['file_to_read.txt', 'output.txt'], + # ['file_to_read.txt', 'output.txt'], [{ 'content': 'Hello World!\n', 'description': 'This file is the output file. The content of file_to_read.txt is copied into this file.', diff --git a/pilot/test/__init__.py b/pilot/test/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pilot/utils/function_calling.py b/pilot/utils/function_calling.py index 98ba0be12..3c7c3d269 100644 --- a/pilot/utils/function_calling.py +++ b/pilot/utils/function_calling.py @@ -200,7 +200,7 @@ def prompt( "Help choose the appropriate function to call to answer the user's question." if function_to_call is None else f"Define the arguments for {function_to_call} to answer the user's question." - ) + " \nThe response should contain only the JSON object, with no additional text or explanation." + ) + "\nThe response should contain only the JSON object, with no additional text or explanation." data = ( self.function_data(functions, function_to_call) diff --git a/pilot/utils/test_function_calling.py b/pilot/utils/test_function_calling.py index c64b2ce99..dfd038255 100644 --- a/pilot/utils/test_function_calling.py +++ b/pilot/utils/test_function_calling.py @@ -77,7 +77,7 @@ def test_completion_function_prompt(): { "technologies": { "type": "array", - "description": "List of technologies that are created in a list.", + "description": "List of technologies.", "items": { "type": "string", "description": "technology" @@ -105,7 +105,7 @@ def test_instruct_function_prompter(): { "technologies": { "type": "array", - "description": "List of technologies that are created in a list.", + "description": "List of technologies.", "items": { "type": "string", "description": "technology" @@ -138,25 +138,54 @@ def test_instruct_function_prompter(): # Create a web-based chat app [/INST]''' -def test_llama_instruct_function_prompter_named(): +def test_json_prompter_named(): # Given prompter = JsonPrompter() # When prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'], 'process_technologies') + # Then + assert prompt == '''Define the arguments for process_technologies to answer the user's question. +The response should contain only the JSON object, with no additional text or explanation. + +Print the list of technologies that are created. +The response should be a JSON object matching this schema: +```json +{ + "technologies": { + "type": "array", + "description": "List of technologies.", + "items": { + "type": "string", + "description": "technology" + } + } +} +``` + +Create a web-based chat app''' + + +def test_llama_json_prompter_named(): + # Given + prompter = JsonPrompter(is_llama=True) + + # When + prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'], 'process_technologies') + # Then assert prompt == '''[INST] <> Define the arguments for process_technologies to answer the user's question. -In your response you must only use JSON output and provide no notes or commentary. +The response should contain only the JSON object, with no additional text or explanation. -Function description: Print the list of technologies that are created. -Function parameters should follow this schema: -```jsonschema +Print the list of technologies that are created. +The response should be a JSON object matching this schema: +```json { "technologies": { "type": "array", - "description": "List of technologies that are created in a list.", + "description": "List of technologies.", "items": { "type": "string", "description": "technology" From c59f068a288b3df57dddee078e4e204ba27c90d7 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Sat, 23 Sep 2023 17:09:54 +1000 Subject: [PATCH 15/22] run CI --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index db4ef81ab..4aa69450f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,7 @@ on: pull_request: branches: - main + - debugging_ipc jobs: build: From 323b2669c0dd95d2dba38678d75d9aff1a823934 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Sat, 23 Sep 2023 17:14:54 +1000 Subject: [PATCH 16/22] updated test --- pilot/helpers/agents/test_Developer.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pilot/helpers/agents/test_Developer.py b/pilot/helpers/agents/test_Developer.py index 0dc603b99..f326b5630 100644 --- a/pilot/helpers/agents/test_Developer.py +++ b/pilot/helpers/agents/test_Developer.py @@ -42,10 +42,7 @@ def setup_method(self): @patch('helpers.AgentConvo.get_saved_development_step') @patch('helpers.AgentConvo.save_development_step') @patch('helpers.AgentConvo.create_gpt_chat_completion', - return_value={'function_calls': { - 'name': 'execute_command', - 'arguments': {'command': 'python --version', 'timeout': 10} - }}) + return_value={'text': '{"command": "python --version", "timeout": 10}'}) @patch('helpers.cli.styled_text', return_value='no') @patch('helpers.cli.execute_command', return_value=('', 'DONE')) def test_install_technology(self, mock_execute_command, mock_styled_text, From b5cebb95dcdc2f76154eb5d32dbc88442a9e3e83 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Sat, 23 Sep 2023 17:21:06 +1000 Subject: [PATCH 17/22] linting --- pilot/helpers/agents/Developer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pilot/helpers/agents/Developer.py b/pilot/helpers/agents/Developer.py index 69e536414..bda64e883 100644 --- a/pilot/helpers/agents/Developer.py +++ b/pilot/helpers/agents/Developer.py @@ -350,7 +350,7 @@ def set_up_environment(self): } }) - cli_response, llm_response = execute_command_and_check_cli_response(command, timeout, self.convo_os_specific_tech) + cli_response, llm_response = execute_command_and_check_cli_response(cmd, timeout_val, self.convo_os_specific_tech) return llm_response From 1c290c86a0fea60a1f9e8a6a5c1228c65be4f8fa Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Sat, 23 Sep 2023 17:22:40 +1000 Subject: [PATCH 18/22] linting --- pilot/helpers/agents/Developer.py | 68 +++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 21 deletions(-) diff --git a/pilot/helpers/agents/Developer.py b/pilot/helpers/agents/Developer.py index bda64e883..cc0064747 100644 --- a/pilot/helpers/agents/Developer.py +++ b/pilot/helpers/agents/Developer.py @@ -324,31 +324,57 @@ def set_up_environment(self): }, FILTER_OS_TECHNOLOGIES) for technology in os_specific_technologies: - # TODO move the functions definitions to function_calls.py - cli_response, llm_response = self.convo_os_specific_tech.send_message('development/env_setup/install_next_technology.prompt', - { 'technology': technology}, { - 'definitions': [{ - 'name': 'execute_command', - 'description': f'Executes a command that should check if {technology} is installed on the machine. ', - 'parameters': { - 'type': 'object', - 'properties': { - 'command': { - 'type': 'string', - 'description': f'Command that needs to be executed to check if {technology} is installed on the machine.', - }, - 'timeout': { - 'type': 'number', - 'description': 'Timeout in seconds for the approximate time this command takes to finish.', - } + llm_response = self.install_technology(technology) + + # TODO: I don't think llm_response would ever be 'DONE'? + if llm_response != 'DONE': + installation_commands = self.convo_os_specific_tech.send_message( + 'development/env_setup/unsuccessful_installation.prompt', + {'technology': technology}, + EXECUTE_COMMANDS) + + if installation_commands is not None: + for cmd in installation_commands: + run_command_until_success(cmd['command'], cmd['timeout'], self.convo_os_specific_tech) + + logger.info('The entire tech stack needed is installed and ready to be used.') + + save_progress(self.project.args['app_id'], self.project.current_step, { + "os_specific_technologies": os_specific_technologies, + "newly_installed_technologies": [], + "app_data": generate_app_data(self.project.args) + }) + + # ENVIRONMENT SETUP END + + # TODO: This is only called from the unreachable section of set_up_environment() + def install_technology(self, technology): + # TODO move the functions definitions to function_calls.py + cmd, timeout_val = self.convo_os_specific_tech.send_message( + 'development/env_setup/install_next_technology.prompt', + {'technology': technology}, { + 'definitions': [{ + 'name': 'execute_command', + 'description': f'Executes a command that should check if {technology} is installed on the machine. ', + 'parameters': { + 'type': 'object', + 'properties': { + 'command': { + 'type': 'string', + 'description': f'Command that needs to be executed to check if {technology} is installed on the machine.', }, - 'required': ['command', 'timeout'], + 'timeout': { + 'type': 'number', + 'description': 'Timeout in seconds for the approximate time this command takes to finish.', + } }, - }], - 'functions': { + 'required': ['command', 'timeout'], + }, + }], + 'functions': { 'execute_command': lambda command, timeout: (command, timeout) } - }) + }) cli_response, llm_response = execute_command_and_check_cli_response(cmd, timeout_val, self.convo_os_specific_tech) From dd10b4ad2417a8e5fdc5ca6d8690be1b8e17809b Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Sat, 23 Sep 2023 20:20:58 +1000 Subject: [PATCH 19/22] removed local_llm_function_calling --- pilot/utils/function_calling.py | 6 +-- pilot/utils/test_function_calling.py | 75 ++++++---------------------- 2 files changed, 19 insertions(+), 62 deletions(-) diff --git a/pilot/utils/function_calling.py b/pilot/utils/function_calling.py index 3c7c3d269..4ef7ae6de 100644 --- a/pilot/utils/function_calling.py +++ b/pilot/utils/function_calling.py @@ -1,6 +1,6 @@ import json import re -from typing import Literal, NotRequired, TypedDict, Callable +from typing import Literal, Optional, TypedDict, Callable JsonType = str | int | float | bool | None | list["JsonType"] | dict[str, "JsonType"] @@ -10,14 +10,14 @@ class FunctionParameters(TypedDict): type: Literal["object"] properties: dict[str, JsonType] - required: NotRequired[list[str]] + required: Optional[list[str]] class FunctionType(TypedDict): """Function type""" name: str - description: NotRequired[str] + description: Optional[str] parameters: FunctionParameters diff --git a/pilot/utils/test_function_calling.py b/pilot/utils/test_function_calling.py index dfd038255..36b54912d 100644 --- a/pilot/utils/test_function_calling.py +++ b/pilot/utils/test_function_calling.py @@ -1,5 +1,3 @@ -from local_llm_function_calling.prompter import CompletionModelPrompter, InstructModelPrompter - from const.function_calls import ARCHITECTURE, DEV_STEPS from .function_calling import parse_agent_response, JsonPrompter @@ -61,81 +59,40 @@ def test_parse_agent_response_multiple_args(self): assert name == 'John' -def test_completion_function_prompt(): +def test_json_prompter(): # Given - prompter = CompletionModelPrompter() + prompter = JsonPrompter() # When prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies') # Then - assert prompt == '''Create a web-based chat app + assert prompt == '''Help choose the appropriate function to call to answer the user's question. +The response should contain only the JSON object, with no additional text or explanation. Available functions: -process_technologies - Print the list of technologies that are created. -```jsonschema -{ - "technologies": { - "type": "array", - "description": "List of technologies.", - "items": { - "type": "string", - "description": "technology" - } - } -} -``` - -Function call: +- process_technologies - Print the list of technologies that are created. -Function call: ''' +Create a web-based chat app''' -def test_instruct_function_prompter(): +def test_llama_json_prompter(): # Given - prompter = InstructModelPrompter() + prompter = JsonPrompter(is_llama=True) # When prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies') # Then - assert prompt == '''Your task is to call a function when needed. You will be provided with a list of functions. Available functions: -process_technologies - Print the list of technologies that are created. -```jsonschema -{ - "technologies": { - "type": "array", - "description": "List of technologies.", - "items": { - "type": "string", - "description": "technology" - } - } -} -``` + assert prompt == '''[INST] <> +Help choose the appropriate function to call to answer the user's question. +The response should contain only the JSON object, with no additional text or explanation. -Create a web-based chat app - -Function call: ''' - - -# def test_json_prompter(): -# # Given -# prompter = JsonPrompter() -# -# # When -# prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies') -# -# # Then -# assert prompt == '''[INST] <> -# Help choose the appropriate function to call to answer the user's question. -# In your response you must only use JSON output and provide no notes or commentary. -# -# Available functions: -# - process_technologies - Print the list of technologies that are created. -# <> -# -# Create a web-based chat app [/INST]''' +Available functions: +- process_technologies - Print the list of technologies that are created. +<> + +Create a web-based chat app [/INST]''' def test_json_prompter_named(): From 68f8368eaf5ece8e177c09cc7cee200a1146b3b9 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Sat, 23 Sep 2023 20:26:49 +1000 Subject: [PATCH 20/22] Require Python >= 3.10 --- .github/workflows/ci.yml | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4aa69450f..026a8a3d6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,7 @@ jobs: matrix: # 3.10 - 04 Oct 2021 # 3.11 - 24 Oct 2022 - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.10', '3.11'] steps: - uses: actions/checkout@v4 diff --git a/README.md b/README.md index a6c0bd394..5c74d72fc 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ https://github.com/Pythagora-io/gpt-pilot/assets/10895136/0495631b-511e-451b-93d # 🔌 Requirements -- **Python >= 3.8** +- **Python >= 3.10** - **PostgreSQL** (optional, projects default is SQLite) - DB is needed for multiple reasons like continuing app development if you had to stop at any point or app crashed, going back to specific step so you can change some later steps in development, easier debugging, for future we will add functionality to update project (change some things in existing project or add new features to the project and so on)... From b8965f527d9802c1bc088980c9c78ae43d7371e4 Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Mon, 25 Sep 2023 10:36:37 +1000 Subject: [PATCH 21/22] notes and testing --- pilot/helpers/cli.py | 19 ++++++++++++++----- pilot/test_main_e2e.py | 15 ++++++++++++++- pilot/utils/test_llm_connection.py | 2 +- pilot/utils/utils.py | 4 ++-- 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/pilot/helpers/cli.py b/pilot/helpers/cli.py index e85fa0b16..5ff36db94 100644 --- a/pilot/helpers/cli.py +++ b/pilot/helpers/cli.py @@ -93,7 +93,10 @@ def execute_command(project, command, timeout=None, force=False): force (bool, optional): Whether to execute the command without confirmation. Default is False. Returns: - str: The command output. + cli_response (str): The command output + or: '', 'DONE' if user answered 'no' or 'skip' + llm_response (str): The response from the agent. + TODO: this seems to be 'DONE' (no or skip) or None """ if timeout is not None: if timeout < 1000: @@ -109,6 +112,9 @@ def execute_command(project, command, timeout=None, force=False): 'If yes, just press ENTER' ) + # TODO: I think AutoGPT allows other feedback here, like: + # "That's not going to work, let's do X instead" + # We don't explicitly make "no" or "skip" options to the user if answer == 'no': return '', 'DONE' elif answer == 'skip': @@ -252,12 +258,15 @@ def execute_command_and_check_cli_response(command, timeout, convo): Returns: tuple: A tuple containing the CLI response and the agent's response. + - cli_response (str): The command output. + - llm_response (str): 'DONE' or 'NEEDS_DEBUGGING' """ - cli_response, response = execute_command(convo.agent.project, command, timeout) - if response is None: - response = convo.send_message('dev_ops/ran_command.prompt', + # TODO: Prompt mentions `command` could be `INSTALLED` or `NOT_INSTALLED`, where is this handled? + cli_response, llm_response = execute_command(convo.agent.project, command, timeout) + if llm_response is None: + llm_response = convo.send_message('dev_ops/ran_command.prompt', { 'cli_response': cli_response, 'command': command }) - return cli_response, response + return cli_response, llm_response def run_command_until_success(command, timeout, convo, additional_message=None, force=False, return_cli_response=False, is_root_task=False): """ diff --git a/pilot/test_main_e2e.py b/pilot/test_main_e2e.py index e865e7035..10086e6aa 100644 --- a/pilot/test_main_e2e.py +++ b/pilot/test_main_e2e.py @@ -25,8 +25,21 @@ def test_init(): @pytest.mark.slow @pytest.mark.uses_tokens @pytest.mark.skip(reason="Uses lots of tokens") -def test_end_to_end(): +@pytest.mark.parametrize("endpoint, model", [ + # ("OPENAI", "gpt-4"), + # ("OPENROUTER", "openai/gpt-3.5-turbo"), + # ("OPENROUTER", "meta-llama/codellama-34b-instruct"), + ("OPENROUTER", "google/palm-2-chat-bison"), + ("OPENROUTER", "google/palm-2-codechat-bison"), + # TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py + # https://github.com/guidance-ai/guidance - token healing + ("OPENROUTER", "anthropic/claude-2"), +]) +def test_end_to_end(endpoint, model, monkeypatch): # Given + monkeypatch.setenv('ENDPOINT', endpoint) + monkeypatch.setenv('MODEL_NAME', model) + create_tables() args = init() builtins.print, ipc_client_instance = get_custom_print(args) diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py index 6d1d2aea1..f5ec80f6e 100644 --- a/pilot/utils/test_llm_connection.py +++ b/pilot/utils/test_llm_connection.py @@ -24,7 +24,7 @@ def setup_method(self): @pytest.mark.parametrize("endpoint, model", [ ("OPENAI", "gpt-4"), # role: system ("OPENROUTER", "openai/gpt-3.5-turbo"), # role: user - ("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama missed "choices" + ("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama ("OPENROUTER", "google/palm-2-chat-bison"), # role: user/system ("OPENROUTER", "google/palm-2-codechat-bison"), # TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py diff --git a/pilot/utils/utils.py b/pilot/utils/utils.py index 1a2eb1c34..a3bb728bf 100644 --- a/pilot/utils/utils.py +++ b/pilot/utils/utils.py @@ -9,7 +9,7 @@ import hashlib import re from jinja2 import Environment, FileSystemLoader -from termcolor import colored +from .style import green from const.llm import MAX_QUESTIONS, END_RESPONSE from const.common import ROLES, STEPS @@ -138,7 +138,7 @@ def step_already_finished(args, step): args.update(step['app_data']) message = f"{capitalize_first_word_with_underscores(step['step'])} already done for this app_id: {args['app_id']}. Moving to next step..." - print(colored(message, 'green')) + print(green(message)) logger.info(message) From 8a024c2ff26f85342c3108441faf225e86040bda Mon Sep 17 00:00:00 2001 From: Nicholas Albion Date: Tue, 26 Sep 2023 17:27:54 +1000 Subject: [PATCH 22/22] rejecting responses that are not JSON. Need to fix prompts for GPT-4 --- README.md | 2 +- pilot/helpers/AgentConvo.py | 4 +- pilot/helpers/agents/test_CodeMonkey.py | 7 +- pilot/helpers/agents/test_Developer.py | 7 +- pilot/helpers/agents/test_TechLead.py | 72 +++++++++++++++ pilot/test/mock_questionary.py | 11 ++- pilot/test/test_utils.py | 11 +++ pilot/utils/function_calling.py | 23 ++--- pilot/utils/llm_connection.py | 111 +++++++++++++++++------ pilot/utils/test_function_calling.py | 4 +- pilot/utils/test_llm_connection.py | 112 ++++++++++++++++++++++-- 11 files changed, 298 insertions(+), 66 deletions(-) create mode 100644 pilot/helpers/agents/test_TechLead.py create mode 100644 pilot/test/test_utils.py diff --git a/README.md b/README.md index 5c74d72fc..791f7063e 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ After you have Python and PostgreSQL installed, follow these steps: 1. `git clone https://github.com/Pythagora-io/gpt-pilot.git` (clone the repo) 2. `cd gpt-pilot` 3. `python -m venv pilot-env` (create a virtual environment) -4. `source pilot-env/bin/activate` (activate the virtual environment) +4. `source pilot-env/bin/activate` (or on Windows `pilot-env\Scripts\activate`) (activate the virtual environment) 5. `pip install -r requirements.txt` (install the dependencies) 6. `cd pilot` 7. `mv .env.example .env` (create the .env file) diff --git a/pilot/helpers/AgentConvo.py b/pilot/helpers/AgentConvo.py index 25e309b74..928320b30 100644 --- a/pilot/helpers/AgentConvo.py +++ b/pilot/helpers/AgentConvo.py @@ -5,7 +5,7 @@ from database.database import get_saved_development_step, save_development_step, delete_all_subsequent_steps from helpers.exceptions.TokenLimitError import TokenLimitError -from utils.function_calling import parse_agent_response +from utils.function_calling import parse_agent_response, FunctionCallSet from utils.llm_connection import create_gpt_chat_completion from utils.utils import array_of_objects_to_string, get_prompt, get_sys_message, capitalize_first_word_with_underscores from logger.logger import logger @@ -31,7 +31,7 @@ def __init__(self, agent): # add system message self.messages.append(get_sys_message(self.agent.role)) - def send_message(self, prompt_path=None, prompt_data=None, function_calls=None): + def send_message(self, prompt_path=None, prompt_data=None, function_calls: FunctionCallSet = None): """ Sends a message in the conversation. diff --git a/pilot/helpers/agents/test_CodeMonkey.py b/pilot/helpers/agents/test_CodeMonkey.py index 8c65e7f84..bb04b9711 100644 --- a/pilot/helpers/agents/test_CodeMonkey.py +++ b/pilot/helpers/agents/test_CodeMonkey.py @@ -10,17 +10,12 @@ from database.models.development_steps import DevelopmentSteps from helpers.Project import Project, update_file, clear_directory from helpers.AgentConvo import AgentConvo +from test.mock_terminal_size import mock_terminal_size SEND_TO_LLM = False WRITE_TO_FILE = False -def mock_terminal_size(): - mock_size = Mock() - mock_size.columns = 80 # or whatever width you want - return mock_size - - class TestCodeMonkey: def setup_method(self): name = 'TestDeveloper' diff --git a/pilot/helpers/agents/test_Developer.py b/pilot/helpers/agents/test_Developer.py index f326b5630..2009bce81 100644 --- a/pilot/helpers/agents/test_Developer.py +++ b/pilot/helpers/agents/test_Developer.py @@ -9,12 +9,7 @@ from main import get_custom_print from .Developer import Developer, ENVIRONMENT_SETUP_STEP from helpers.Project import Project - - -def mock_terminal_size(): - mock_size = Mock() - mock_size.columns = 80 # or whatever width you want - return mock_size +from test.mock_terminal_size import mock_terminal_size class TestDeveloper: diff --git a/pilot/helpers/agents/test_TechLead.py b/pilot/helpers/agents/test_TechLead.py new file mode 100644 index 000000000..f06d93cab --- /dev/null +++ b/pilot/helpers/agents/test_TechLead.py @@ -0,0 +1,72 @@ +import builtins +import os +import pytest +from unittest.mock import patch +from dotenv import load_dotenv +load_dotenv() + +from main import get_custom_print +from helpers.agents.TechLead import TechLead, DEVELOPMENT_PLANNING_STEP +from helpers.Project import Project +from test.test_utils import assert_non_empty_string, mock_terminal_size +from test.mock_questionary import MockQuestionary +from utils.function_calling import parse_agent_response + + +class TestTechLead: + def setup_method(self): + builtins.print, ipc_client_instance = get_custom_print({}) + + name = 'TestTechLead' + self.project = Project({ + 'app_id': 'test-tech-lead', + 'name': name, + 'app_type': '' + }, + name=name, + architecture=[], + user_stories=[] + ) + + self.project.root_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), + '../../../workspace/TestTechLead')) + self.project.technologies = [] + self.project.project_description = ''' +The project entails creating a web-based chat application, tentatively named "chat_app." +This application does not require user authentication or chat history storage. +It solely supports one-on-one messaging, excluding group chats or multimedia sharing like photos, videos, or files. +Additionally, there are no specific requirements for real-time functionality, like live typing indicators or read receipts. +The development of this application will strictly follow a monolithic structure, avoiding the use of microservices, as per the client's demand. +The development process will include the creation of user stories and tasks, based on detailed discussions with the client. + ''' + self.project.user_stories = [ + 'User Story 1: As a user, I can access the web-based "chat_app" directly without needing to authenticate or log in. Do you want to add anything else? If not, just press ENTER.', + 'User Story 2: As a user, I can start one-on-one conversations with another user on the "chat_app". Do you want to add anything else? If not, just press ENTER.', + 'User Story 3: As a user, I can send and receive messages in real-time within my one-on-one conversation on the "chat_app". Do you want to add anything else? If not, just press ENTER.', + 'User Story 4: As a user, I do not need to worry about deleting or storing my chats because the "chat_app" does not store chat histories. Do you want to add anything else? If not, just press ENTER.', + 'User Story 5: As a user, I will only be able to send text messages, as the "chat_app" does not support any kind of multimedia sharing like photos, videos, or files. Do you want to add anything else? If not, just press ENTER.', + 'User Story 6: As a user, I will not see any live typing indicators or read receipts since the "chat_app" does not provide any additional real-time functionality beyond message exchange. Do you want to add anything else? If not, just press ENTER.', + ] + self.project.architecture = ['Node.js', 'Socket.io', 'Bootstrap', 'JavaScript', 'HTML5', 'CSS3'] + self.project.current_step = DEVELOPMENT_PLANNING_STEP + + @pytest.mark.uses_tokens + # @patch('database.database.get_progress_steps', return_value=None) + @patch('helpers.AgentConvo.get_saved_development_step', return_value=None) + @patch('helpers.agents.TechLead.save_progress', return_value=None) + # @patch('os.get_terminal_size', mock_terminal_size) + @patch('helpers.agents.TechLead.get_progress_steps', return_value=None) + def test_create_development_plan(self, mock_get_saved_step, mock_save_progress, mock_get_progress_steps): + self.techLead = TechLead(self.project) + + mock_questionary = MockQuestionary(['', '', 'no']) + + with patch('utils.llm_connection.questionary', mock_questionary): + # When + development_plan = self.techLead.create_development_plan() + + # Then + assert development_plan is not None + assert_non_empty_string(development_plan[0]['description']) + assert_non_empty_string(development_plan[0]['programmatic_goal']) + assert_non_empty_string(development_plan[0]['user_review_goal']) diff --git a/pilot/test/mock_questionary.py b/pilot/test/mock_questionary.py index 56f1ed9b2..59aa995eb 100644 --- a/pilot/test/mock_questionary.py +++ b/pilot/test/mock_questionary.py @@ -1,9 +1,13 @@ class MockQuestionary: - def __init__(self, answers=None): + def __init__(self, answers=None, initial_state='project_description'): if answers is None: answers = [] self.answers = iter(answers) - self.state = 'project_description' + self.state = initial_state + + class Style: + def __init__(self, *args, **kwargs): + pass def text(self, question: str, style=None): print('AI: ' + question) @@ -13,6 +17,9 @@ def text(self, question: str, style=None): self.state = 'DONE' return self + def ask(self): + return self.unsafe_ask() + def unsafe_ask(self): if self.state == 'user_stories': answer = '' diff --git a/pilot/test/test_utils.py b/pilot/test/test_utils.py new file mode 100644 index 000000000..a08354f67 --- /dev/null +++ b/pilot/test/test_utils.py @@ -0,0 +1,11 @@ +from unittest.mock import Mock + + +def mock_terminal_size(): + mock_size = Mock() + mock_size.columns = 80 # or whatever width you want + return mock_size + +def assert_non_empty_string(value): + assert isinstance(value, str) + assert len(value) > 0 diff --git a/pilot/utils/function_calling.py b/pilot/utils/function_calling.py index 4ef7ae6de..469bc53dc 100644 --- a/pilot/utils/function_calling.py +++ b/pilot/utils/function_calling.py @@ -38,17 +38,11 @@ def add_function_calls_to_request(gpt_data, function_calls: FunctionCallSet | No return model: str = gpt_data['model'] - is_llama = 'llama' in model or 'anthropic' in model + is_instruct = 'llama' in model or 'anthropic' in model - # if model == 'gpt-4': - # gpt_data['functions'] = function_calls['definitions'] - # if len(function_calls['definitions']) > 1: - # gpt_data['function_call'] = 'auto' - # else: - # gpt_data['function_call'] = {'name': function_calls['definitions'][0]['name']} - # return + gpt_data['functions'] = function_calls['definitions'] - prompter = JsonPrompter(is_llama) + prompter = JsonPrompter(is_instruct) if len(function_calls['definitions']) > 1: function_call = None @@ -77,7 +71,8 @@ def parse_agent_response(response, function_calls: FunctionCallSet | None): if function_calls: text = re.sub(r'^.*```json\s*', '', response['text'], flags=re.DOTALL) - values = list(json.loads(text.strip('` \n')).values()) + text = text.strip('` \n') + values = list(json.loads(text).values()) if len(values) == 1: return values[0] else: @@ -90,8 +85,8 @@ class JsonPrompter: """ Adapted from local_llm_function_calling """ - def __init__(self, is_llama: bool = False): - self.is_llama = is_llama + def __init__(self, is_instruct: bool = False): + self.is_instruct = is_instruct def function_descriptions( self, functions: list[FunctionType], function_to_call: str @@ -107,7 +102,7 @@ def function_descriptions( (empty if the function doesn't exist or has no description) """ return [ - function["description"] + f'# {function["name"]}: {function["description"]}' for function in functions if function["name"] == function_to_call and "description" in function ] @@ -213,7 +208,7 @@ def prompt( else "Here's the function the user should call: " ) - if self.is_llama: + if self.is_instruct: return f"[INST] <>\n{system}\n\n{data}\n<>\n\n{prompt} [/INST]" else: return f"{system}\n\n{data}\n\n{prompt}" diff --git a/pilot/utils/llm_connection.py b/pilot/utils/llm_connection.py index 8a59a724d..249259863 100644 --- a/pilot/utils/llm_connection.py +++ b/pilot/utils/llm_connection.py @@ -7,14 +7,13 @@ import tiktoken import questionary - from utils.style import red from typing import List from const.llm import MIN_TOKENS_FOR_GPT_RESPONSE, MAX_GPT_MODEL_TOKENS from logger.logger import logger from helpers.exceptions.TokenLimitError import TokenLimitError from utils.utils import fix_json -from utils.function_calling import add_function_calls_to_request +from utils.function_calling import add_function_calls_to_request, FunctionCallSet, FunctionType def get_tokens_in_messages(messages: List[str]) -> int: tokenizer = tiktoken.get_encoding("cl100k_base") # GPT-4 tokenizer @@ -58,7 +57,7 @@ def num_tokens_from_functions(functions): def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TOKENS_FOR_GPT_RESPONSE, - function_calls=None): + function_calls: FunctionCallSet = None): """ Called from: - AgentConvo.send_message() - these calls often have `function_calls`, usually from `pilot/const/function_calls.py` @@ -167,6 +166,7 @@ def wrapper(*args, **kwargs): ('answer', 'fg:orange') ])).ask() + # TODO: take user's input into consideration - send to LLM? if user_message != '': return {} @@ -183,9 +183,16 @@ def stream_gpt_completion(data, req_type): """ # TODO add type dynamically - this isn't working when connected to the external process - terminal_width = 50#os.get_terminal_size().columns + terminal_width = 50 # os.get_terminal_size().columns lines_printed = 2 - buffer = "" # A buffer to accumulate incoming data + buffer = '' # A buffer to accumulate incoming data + expecting_json = False + received_json = False + + if 'functions' in data: + expecting_json = data['functions'] + # Don't send the `functions` parameter to Open AI, but don't remove it from `data` in case we need to retry + data = {key: value for key, value in data.items() if key != "functions"} def return_result(result_data, lines_printed): if buffer: @@ -197,7 +204,6 @@ def return_result(result_data, lines_printed): # spinner = spinner_start(yellow("Waiting for OpenAI API response...")) # print(yellow("Stream response from OpenAI:")) - api_key = os.getenv("OPENAI_API_KEY") logger.info(f'Request data: {data}') @@ -208,15 +214,26 @@ def return_result(result_data, lines_printed): if endpoint == 'AZURE': # If yes, get the AZURE_ENDPOINT from .ENV file endpoint_url = os.getenv('AZURE_ENDPOINT') + '/openai/deployments/' + model + '/chat/completions?api-version=2023-05-15' - headers = {'Content-Type': 'application/json', 'api-key': os.getenv('AZURE_API_KEY')} + headers = { + 'Content-Type': 'application/json', + 'api-key': os.getenv('AZURE_API_KEY') + } elif endpoint == 'OPENROUTER': # If so, send the request to the OpenRouter API endpoint - headers = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + os.getenv("OPENROUTER_API_KEY"), 'HTTP-Referer': 'http://localhost:3000', 'X-Title': 'GPT Pilot (LOCAL)'} - endpoint_url = os.getenv("OPENROUTER_ENDPOINT", 'https://openrouter.ai/api/v1/chat/completions') + endpoint_url = os.getenv('OPENROUTER_ENDPOINT', 'https://openrouter.ai/api/v1/chat/completions') + headers = { + 'Content-Type': 'application/json', + 'Authorization': 'Bearer ' + os.getenv('OPENROUTER_API_KEY'), + 'HTTP-Referer': 'http://localhost:3000', + 'X-Title': 'GPT Pilot (LOCAL)' + } else: # If not, send the request to the OpenAI endpoint - headers = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + os.getenv("OPENAI_API_KEY")} - endpoint_url = os.getenv("OPENAI_ENDPOINT", 'https://api.openai.com/v1/chat/completions') + endpoint_url = os.getenv('OPENAI_ENDPOINT', 'https://api.openai.com/v1/chat/completions') + headers = { + 'Content-Type': 'application/json', + 'Authorization': 'Bearer ' + os.getenv('OPENAI_API_KEY') + } response = requests.post( endpoint_url, @@ -233,7 +250,7 @@ def return_result(result_data, lines_printed): raise Exception(f"API responded with status code: {response.status_code}. Response text: {response.text}") gpt_response = '' - function_calls = {'name': '', 'arguments': ''} + # function_calls = {'name': '', 'arguments': ''} for line in response.iter_lines(): # Ignore keep-alive new lines @@ -259,9 +276,9 @@ def return_result(result_data, lines_printed): choice = json_line['choices'][0] - if 'finish_reason' in choice and choice['finish_reason'] == 'function_call': - function_calls['arguments'] = load_data_to_json(function_calls['arguments']) - return return_result({'function_calls': function_calls}, lines_printed) + # if 'finish_reason' in choice and choice['finish_reason'] == 'function_call': + # function_calls['arguments'] = load_data_to_json(function_calls['arguments']) + # return return_result({'function_calls': function_calls}, lines_printed) json_line = choice['delta'] # TODO: token healing? https://github.com/1rgs/jsonformer-claude @@ -272,14 +289,14 @@ def return_result(result_data, lines_printed): continue # skip to the next line # handle the streaming response - if 'function_call' in json_line: - if 'name' in json_line['function_call']: - function_calls['name'] = json_line['function_call']['name'] - print(f'Function call: {function_calls["name"]}') - - if 'arguments' in json_line['function_call']: - function_calls['arguments'] += json_line['function_call']['arguments'] - print(json_line['function_call']['arguments'], type='stream', end='', flush=True) + # if 'function_call' in json_line: + # if 'name' in json_line['function_call']: + # function_calls['name'] = json_line['function_call']['name'] + # print(f'Function call: {function_calls["name"]}') + # + # if 'arguments' in json_line['function_call']: + # function_calls['arguments'] += json_line['function_call']['arguments'] + # print(json_line['function_call']['arguments'], type='stream', end='', flush=True) if 'content' in json_line: content = json_line.get('content') @@ -287,7 +304,18 @@ def return_result(result_data, lines_printed): buffer += content # accumulate the data # If you detect a natural breakpoint (e.g., line break or end of a response object), print & count: - if buffer.endswith("\n"): # or some other condition that denotes a breakpoint + if buffer.endswith("\n"): + if expecting_json and not received_json: + received_json = assert_json_response(buffer, lines_printed > 2) + if received_json: + gpt_response = "" + # if not received_json: + # # Don't append to gpt_response, but increment lines_printed + # lines_printed += 1 + # buffer = "" + # continue + + # or some other condition that denotes a breakpoint lines_printed += count_lines_based_on_width(buffer, terminal_width) buffer = "" # reset the buffer @@ -295,15 +323,42 @@ def return_result(result_data, lines_printed): print(content, type='stream', end='', flush=True) print('\n', type='stream') - if function_calls['arguments'] != '': - logger.info(f'Response via function call: {function_calls["arguments"]}') - function_calls['arguments'] = load_data_to_json(function_calls['arguments']) - return return_result({'function_calls': function_calls}, lines_printed) + + # if function_calls['arguments'] != '': + # logger.info(f'Response via function call: {function_calls["arguments"]}') + # function_calls['arguments'] = load_data_to_json(function_calls['arguments']) + # return return_result({'function_calls': function_calls}, lines_printed) logger.info(f'Response message: {gpt_response}') + + if expecting_json: + assert_json_schema(gpt_response, expecting_json) + new_code = postprocessing(gpt_response, req_type) # TODO add type dynamically return return_result({'text': new_code}, lines_printed) +def assert_json_response(response: str, or_fail=True) -> bool: + if re.match(r'.*(```(json)?|{|\[)', response): + return True + elif or_fail: + raise ValueError('LLM did not respond with JSON') + else: + return False + + +def assert_json_schema(response: str, functions: list[FunctionType]) -> True: + return True + # TODO: validation always fails + # for function in functions: + # schema = function['parameters'] + # parser = parser_for_schema(schema) + # validated = parser.validate(response) + # if validated.valid and validated.end_index: + # return True + # + # raise ValueError('LLM responded with invalid JSON') + + def postprocessing(gpt_response, req_type): return gpt_response diff --git a/pilot/utils/test_function_calling.py b/pilot/utils/test_function_calling.py index 36b54912d..0e5ea694c 100644 --- a/pilot/utils/test_function_calling.py +++ b/pilot/utils/test_function_calling.py @@ -78,7 +78,7 @@ def test_json_prompter(): def test_llama_json_prompter(): # Given - prompter = JsonPrompter(is_llama=True) + prompter = JsonPrompter(is_instruct=True) # When prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions']) # , 'process_technologies') @@ -126,7 +126,7 @@ def test_json_prompter_named(): def test_llama_json_prompter_named(): # Given - prompter = JsonPrompter(is_llama=True) + prompter = JsonPrompter(is_instruct=True) # When prompt = prompter.prompt('Create a web-based chat app', ARCHITECTURE['definitions'], 'process_technologies') diff --git a/pilot/utils/test_llm_connection.py b/pilot/utils/test_llm_connection.py index f5ec80f6e..ec55633fe 100644 --- a/pilot/utils/test_llm_connection.py +++ b/pilot/utils/test_llm_connection.py @@ -2,13 +2,14 @@ import pytest from dotenv import load_dotenv -from const.function_calls import ARCHITECTURE, DEV_STEPS +from const.function_calls import ARCHITECTURE, DEVELOPMENT_PLAN from helpers.AgentConvo import AgentConvo from helpers.Project import Project from helpers.agents.Architect import Architect -from helpers.agents.Developer import Developer -from utils.function_calling import parse_agent_response -from .llm_connection import create_gpt_chat_completion +from helpers.agents.TechLead import TechLead +from utils.function_calling import parse_agent_response, FunctionType +from test.test_utils import assert_non_empty_string +from .llm_connection import create_gpt_chat_completion, assert_json_response, assert_json_schema from main import get_custom_print load_dotenv() @@ -16,10 +17,58 @@ project = Project({'app_id': 'test-app'}, current_step='test') +class TestSchemaValidation: + def setup_method(self): + self.function: FunctionType = { + 'name': 'test', + 'description': 'test schema', + 'parameters': { + 'type': 'object', + 'properties': {'foo': {'type': 'string'}}, + 'required': ['foo'] + } + } + + def test_assert_json_response(self): + assert assert_json_response('{"foo": "bar"}') + assert assert_json_response('{\n"foo": "bar"}') + assert assert_json_response('```\n{"foo": "bar"}') + assert assert_json_response('```json\n{\n"foo": "bar"}') + with pytest.raises(ValueError, match='LLM did not respond with JSON'): + assert assert_json_response('# Foo\n bar') + + def test_assert_json_schema(self): + # When assert_json_schema is called with valid JSON + # Then no errors + assert(assert_json_schema('{"foo": "bar"}', [self.function])) + + def test_assert_json_schema_invalid(self): + # When assert_json_schema is called with invalid JSON + # Then error is raised + with pytest.raises(ValueError, match='LLM responded with invalid JSON'): + assert_json_schema('{"foo": 1}', [self.function]) + + def test_assert_json_schema_incomplete(self): + # When assert_json_schema is called with incomplete JSON + # Then error is raised + with pytest.raises(ValueError, match='LLM responded with invalid JSON'): + assert_json_schema('{"foo": "b', [self.function]) + + def test_assert_json_schema_required(self): + # When assert_json_schema is called with missing required property + # Then error is raised + self.function['parameters']['properties']['other'] = {'type': 'string'} + self.function['parameters']['required'] = ['foo', 'other'] + + with pytest.raises(ValueError, match='LLM responded with invalid JSON'): + assert_json_schema('{"foo": "bar"}', [self.function]) + class TestLlmConnection: def setup_method(self): builtins.print, ipc_client_instance = get_custom_print({}) + + @pytest.mark.uses_tokens @pytest.mark.parametrize("endpoint, model", [ ("OPENAI", "gpt-4"), # role: system @@ -64,7 +113,6 @@ def test_chat_completion_Architect(self, endpoint, model, monkeypatch): 'User will be able to register a new account on Test_App.', ] }) - function_calls = ARCHITECTURE # When @@ -78,6 +126,60 @@ def test_chat_completion_Architect(self, endpoint, model, monkeypatch): response = parse_agent_response(response, function_calls) assert 'Node.js' in response + @pytest.mark.uses_tokens + @pytest.mark.parametrize("endpoint, model", [ + ("OPENAI", "gpt-4"), # role: system + ("OPENROUTER", "openai/gpt-3.5-turbo"), # role: user + ("OPENROUTER", "meta-llama/codellama-34b-instruct"), # rule: user, is_llama + ("OPENROUTER", "google/palm-2-chat-bison"), # role: user/system + ("OPENROUTER", "google/palm-2-codechat-bison"), + # TODO: See https://github.com/1rgs/jsonformer-claude/blob/main/jsonformer_claude/main.py + # https://github.com/guidance-ai/guidance - token healing + ("OPENROUTER", "anthropic/claude-2"), # role: user, is_llama + ]) + def test_chat_completion_TechLead(self, endpoint, model, monkeypatch): + # Given + monkeypatch.setenv('ENDPOINT', endpoint) + monkeypatch.setenv('MODEL_NAME', model) + + agent = TechLead(project) + convo = AgentConvo(agent) + convo.construct_and_add_message_from_prompt('development/plan.prompt', + { + 'name': 'Test App', + 'app_summary': ''' + The project entails creating a web-based chat application, tentatively named "chat_app." +This application does not require user authentication or chat history storage. +It solely supports one-on-one messaging, excluding group chats or multimedia sharing like photos, videos, or files. +Additionally, there are no specific requirements for real-time functionality, like live typing indicators or read receipts. +The development of this application will strictly follow a monolithic structure, avoiding the use of microservices, as per the client's demand. +The development process will include the creation of user stories and tasks, based on detailed discussions with the client.''', + 'app_type': 'web app', + 'user_stories': [ + 'User Story 1: As a user, I can access the web-based "chat_app" directly without needing to authenticate or log in. Do you want to add anything else? If not, just press ENTER.', + 'User Story 2: As a user, I can start one-on-one conversations with another user on the "chat_app". Do you want to add anything else? If not, just press ENTER.', + 'User Story 3: As a user, I can send and receive messages in real-time within my one-on-one conversation on the "chat_app". Do you want to add anything else? If not, just press ENTER.', + 'User Story 4: As a user, I do not need to worry about deleting or storing my chats because the "chat_app" does not store chat histories. Do you want to add anything else? If not, just press ENTER.', + 'User Story 5: As a user, I will only be able to send text messages, as the "chat_app" does not support any kind of multimedia sharing like photos, videos, or files. Do you want to add anything else? If not, just press ENTER.', + 'User Story 6: As a user, I will not see any live typing indicators or read receipts since the "chat_app" does not provide any additional real-time functionality beyond message exchange. Do you want to add anything else? If not, just press ENTER.', + ] + }) + function_calls = DEVELOPMENT_PLAN + + # When + response = create_gpt_chat_completion(convo.messages, '', function_calls=function_calls) + + # Then + assert convo.messages[0]['content'].startswith('You are a tech lead in a software development agency') + assert convo.messages[1]['content'].startswith('You are working in a software development agency and a project manager and software architect approach you') + + assert response is not None + response = parse_agent_response(response, function_calls) + assert_non_empty_string(response[0]['description']) + assert_non_empty_string(response[0]['programmatic_goal']) + assert_non_empty_string(response[0]['user_review_goal']) + + # def test_break_down_development_task(self): # # Given # agent = Developer(project)