From 2cc35a43006ff72eae77b3db3ac89ae339ced660 Mon Sep 17 00:00:00 2001 From: Matt Gotteiner Date: Wed, 16 Apr 2025 16:08:10 -0700 Subject: [PATCH 01/10] checkpoint --- app/backend/app.py | 16 +- app/backend/approaches/approach.py | 23 +- app/backend/approaches/chatapproach.py | 181 +++++++----- .../approaches/chatreadretrieveread.py | 105 +++---- .../approaches/chatreadretrievereadvision.py | 86 +++--- app/backend/approaches/retrievethenread.py | 73 ++--- .../approaches/retrievethenreadvision.py | 76 ++--- app/backend/error.py | 1 + app/frontend/src/api/api.ts | 6 +- app/frontend/src/api/models.ts | 27 +- .../AnalysisPanel/AnalysisPanel.tsx | 13 +- app/frontend/src/components/Answer/Answer.tsx | 18 +- .../src/components/Answer/AnswerParser.tsx | 6 +- app/frontend/src/index.tsx | 4 +- app/frontend/src/pages/ask/Ask.tsx | 22 +- app/frontend/src/pages/chat/Chat.tsx | 260 ++++++++++-------- 16 files changed, 501 insertions(+), 416 deletions(-) diff --git a/app/backend/app.py b/app/backend/app.py index 263fcf06a6..3d6ce2dc25 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -8,6 +8,7 @@ from collections.abc import AsyncGenerator from pathlib import Path from typing import Any, Union, cast +import traceback from azure.cognitiveservices.speech import ( ResultReason, @@ -188,10 +189,11 @@ async def ask(auth_claims: dict[str, Any]): approach = cast(Approach, current_app.config[CONFIG_ASK_VISION_APPROACH]) else: approach = cast(Approach, current_app.config[CONFIG_ASK_APPROACH]) - r = await approach.run( + result = await approach.run( request_json["messages"], context=context, session_state=request_json.get("session_state") ) - return jsonify(r) + results = [r async for r in result] + return jsonify({"value": results}) except Exception as error: return error_response(error, "/ask") @@ -208,6 +210,7 @@ async def format_as_ndjson(r: AsyncGenerator[dict, None]) -> AsyncGenerator[str, async for event in r: yield json.dumps(event, ensure_ascii=False, cls=JSONEncoder) + "\n" except Exception as error: + traceback.print_exc() logging.exception("Exception while generating response stream: %s", error) yield json.dumps(error_dict(error)) @@ -241,7 +244,8 @@ async def chat(auth_claims: dict[str, Any]): context=context, session_state=session_state, ) - return jsonify(result) + results = [r async for r in result] + return jsonify({"value": results}) except Exception as error: return error_response(error, "/chat") @@ -790,12 +794,12 @@ def create_app(): # Log levels should be one of https://docs.python.org/3/library/logging.html#logging-levels # Set root level to WARNING to avoid seeing overly verbose logs from SDKS - logging.basicConfig(level=logging.WARNING) + logging.basicConfig(level=logging.INFO) # Set our own logger levels to INFO by default - app_level = os.getenv("APP_LOG_LEVEL", "INFO") + app_level = os.getenv("APP_LOG_LEVEL", "DEBUG") app.logger.setLevel(os.getenv("APP_LOG_LEVEL", app_level)) + app.logger.setLevel("DEBUG") logging.getLogger("scripts").setLevel(app_level) - if allowed_origin := os.getenv("ALLOWED_ORIGIN"): allowed_origins = allowed_origin.split(";") if len(allowed_origins) > 0: diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py index 59f1909a54..3d3ed6edca 100644 --- a/app/backend/approaches/approach.py +++ b/app/backend/approaches/approach.py @@ -88,31 +88,23 @@ def trim_embedding(cls, embedding: Optional[list[float]]) -> Optional[str]: return None +@dataclass +class DataPoints: + text: Optional[list[str]] = None + images: Optional[list] = None @dataclass class ThoughtStep: title: str description: Optional[Any] props: Optional[dict[str, Any]] = None + data_points: Optional[DataPoints] = None def update_token_usage(self, usage: CompletionUsage) -> None: if self.props: self.props["token_usage"] = TokenUsageProps.from_completion_usage(usage) -@dataclass -class DataPoints: - text: Optional[list[str]] = None - images: Optional[list] = None - - -@dataclass -class ExtraInfo: - data_points: DataPoints - thoughts: Optional[list[ThoughtStep]] = None - followup_questions: Optional[list[Any]] = None - - @dataclass class TokenUsageProps: prompt_tokens: int @@ -403,6 +395,7 @@ def format_thought_step_for_chatcompletion( deployment: Optional[str], usage: Optional[CompletionUsage] = None, reasoning_effort: Optional[ChatCompletionReasoningEffort] = None, + data_points: Optional[DataPoints] = None, ) -> ThoughtStep: properties: dict[str, Any] = {"model": model} if deployment: @@ -414,14 +407,14 @@ def format_thought_step_for_chatcompletion( ) if usage: properties["token_usage"] = TokenUsageProps.from_completion_usage(usage) - return ThoughtStep(title, messages, properties) + return ThoughtStep(title, messages, properties, data_points) async def run( self, messages: list[ChatCompletionMessageParam], session_state: Any = None, context: dict[str, Any] = {}, - ) -> dict[str, Any]: + ) -> AsyncGenerator[dict[str, Any], None]: raise NotImplementedError async def run_stream( diff --git a/app/backend/approaches/chatapproach.py b/app/backend/approaches/chatapproach.py index 346c9f3b0a..1559c20a5a 100644 --- a/app/backend/approaches/chatapproach.py +++ b/app/backend/approaches/chatapproach.py @@ -2,7 +2,7 @@ import re from abc import ABC, abstractmethod from collections.abc import AsyncGenerator, Awaitable -from typing import Any, Optional, Union, cast +from typing import Any, Optional, List, Union from openai import AsyncStream from openai.types.chat import ( @@ -13,9 +13,57 @@ from approaches.approach import ( Approach, - ExtraInfo, + ThoughtStep ) +class StreamingThoughtStep: + def __init__(self, step: ThoughtStep, chat_completion: Optional[Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]] = None, role: Optional[str] = "assistant"): + self.step = step + self.chat_completion = chat_completion + self.role = role + self._stream = None + self._is_streaming = None + + def has_content(self) -> bool: + return self.chat_completion is not None + + def __aiter__(self): + return self + + async def start(self): + if self._stream is None and self.chat_completion is not None: + self._stream = await self.chat_completion + self._is_streaming = True + + async def __anext__(self) -> Union[ChatCompletion, ChatCompletionChunk, ThoughtStep]: + if self._is_streaming: + # Streaming Implementation: yield each chunk, then the step with token usage + if self._stream is None: + raise StopAsyncIteration + + try: + # Get the next chunk from the async stream + chunk = await self._stream.__anext__() + if len(chunk.choices) == 0 and chunk.usage: + self.step.update_token_usage(chunk.usage) + return chunk + except StopAsyncIteration: + # If the stream is exhausted, yield the step with token usage + self._stream = None + return self.step + + # Non-Streaming Implementation: return the entire response, then the step with token usage + if self._stream is None: + if self.step is None: + raise StopAsyncIteration + + result = self.step + self.step = None + return result + + result = self._stream + self._stream = None + return result class ChatApproach(Approach, ABC): @@ -24,7 +72,7 @@ class ChatApproach(Approach, ABC): @abstractmethod async def run_until_final_call( self, messages, overrides, auth_claims, should_stream - ) -> tuple[ExtraInfo, Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]]: + ) -> AsyncGenerator[StreamingThoughtStep, None]: pass def get_search_query(self, chat_completion: ChatCompletion, user_query: str): @@ -45,7 +93,7 @@ def get_search_query(self, chat_completion: ChatCompletion, user_query: str): return query_text return user_query - def extract_followup_questions(self, content: Optional[str]): + def extract_followup_questions(self, content: Optional[str]) -> Optional[List[str]]: if content is None: return content, [] return content.split("<<")[0], re.findall(r"<<([^>>]+)>>", content) @@ -56,25 +104,32 @@ async def run_without_streaming( overrides: dict[str, Any], auth_claims: dict[str, Any], session_state: Any = None, - ) -> dict[str, Any]: - extra_info, chat_coroutine = await self.run_until_final_call( + ) -> AsyncGenerator[dict[str, Any], None]: + thoughts = self.run_until_final_call( messages, overrides, auth_claims, should_stream=False ) - chat_completion_response: ChatCompletion = await cast(Awaitable[ChatCompletion], chat_coroutine) - content = chat_completion_response.choices[0].message.content - role = chat_completion_response.choices[0].message.role - if overrides.get("suggest_followup_questions"): - content, followup_questions = self.extract_followup_questions(content) - extra_info.followup_questions = followup_questions - # Assume last thought is for generating answer - if self.include_token_usage and extra_info.thoughts and chat_completion_response.usage: - extra_info.thoughts[-1].update_token_usage(chat_completion_response.usage) - chat_app_response = { - "message": {"content": content, "role": role}, - "context": extra_info, - "session_state": session_state, - } - return chat_app_response + async for thought in thoughts: + content = None + role = None + thought_step = None + followup_questions = None + await thought.start() + async for chunk in thought: + if isinstance(chunk, ChatCompletion): + content = chunk.choices[0].message.content + role = chunk.choices[0].message.role + elif isinstance(chunk, ThoughtStep): + thought_step = chunk + + if overrides.get("suggest_followup_questions"): + content, followup_questions = self.extract_followup_questions(content) + followup_questions = followup_questions + + yield { + "message": {"content": content, "role": role}, + "context": { "thought": thought_step, "followup_questions": followup_questions }, + "session_state": session_state, + } async def run_with_streaming( self, @@ -82,53 +137,47 @@ async def run_with_streaming( overrides: dict[str, Any], auth_claims: dict[str, Any], session_state: Any = None, - ) -> AsyncGenerator[dict, None]: - extra_info, chat_coroutine = await self.run_until_final_call( + ) -> AsyncGenerator[dict[str, Any], None]: + thoughts = self.run_until_final_call( messages, overrides, auth_claims, should_stream=True ) - chat_coroutine = cast(Awaitable[AsyncStream[ChatCompletionChunk]], chat_coroutine) - yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state} - - followup_questions_started = False - followup_content = "" - async for event_chunk in await chat_coroutine: - # "2023-07-01-preview" API version has a bug where first response has empty choices - event = event_chunk.model_dump() # Convert pydantic model to dict - if event["choices"]: - # No usage during streaming - completion = { - "delta": { - "content": event["choices"][0]["delta"].get("content"), - "role": event["choices"][0]["delta"]["role"], - } - } - # if event contains << and not >>, it is start of follow-up question, truncate - content = completion["delta"].get("content") - content = content or "" # content may either not exist in delta, or explicitly be None - if overrides.get("suggest_followup_questions") and "<<" in content: - followup_questions_started = True - earlier_content = content[: content.index("<<")] - if earlier_content: - completion["delta"]["content"] = earlier_content - yield completion - followup_content += content[content.index("<<") :] - elif followup_questions_started: - followup_content += content - else: - yield completion - else: - # Final chunk at end of streaming should contain usage - # https://cookbook.openai.com/examples/how_to_stream_completions#4-how-to-get-token-usage-data-for-streamed-chat-completion-response - if event_chunk.usage and extra_info.thoughts and self.include_token_usage: - extra_info.thoughts[-1].update_token_usage(event_chunk.usage) - yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state} - - if followup_content: - _, followup_questions = self.extract_followup_questions(followup_content) - yield { - "delta": {"role": "assistant"}, - "context": {"context": extra_info, "followup_questions": followup_questions}, - } + async for thought in thoughts: + yield { "delta": { "role": thought.role }, "has_content": thought.has_content() } + + followup_questions_started = False + followup_content = "" + thought_step = None + await thought.start() + async for event in thought: + if isinstance(event, ChatCompletionChunk): + if event.choices: + completion = { + "delta": { + "content": event.choices[0].delta.content, + "role": event.choices[0].delta.role + } + } + # if event contains << and not >>, it is start of follow-up question, truncate + content = completion["delta"].get("content") + content = content or "" # content may either not exist in delta, or explicitly be None + if overrides.get("suggest_followup_questions") and "<<" in content: + followup_questions_started = True + earlier_content = content[: content.index("<<")] + if earlier_content: + completion["delta"]["content"] = earlier_content + yield completion + followup_content += content[content.index("<<") :] + elif followup_questions_started: + followup_content += content + else: + yield completion + elif isinstance(event, ThoughtStep): + thought_step = event + + followup_questions = None + if followup_content: + _, followup_questions = self.extract_followup_questions(followup_content) + yield {"delta": {"role": thought.role, "finish_reason": "stop" }, "context": { "thought": thought_step, "followup_questions": followup_questions }, "session_state": session_state } async def run( self, diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py index 249c7247b2..9f25c57a36 100644 --- a/app/backend/approaches/chatreadretrieveread.py +++ b/app/backend/approaches/chatreadretrieveread.py @@ -1,5 +1,5 @@ from collections.abc import Awaitable -from typing import Any, Optional, Union, cast +from typing import Any, Optional, Union, cast, AsyncGenerator from azure.search.documents.aio import SearchClient from azure.search.documents.models import VectorQuery @@ -11,8 +11,8 @@ ChatCompletionToolParam, ) -from approaches.approach import DataPoints, ExtraInfo, ThoughtStep -from approaches.chatapproach import ChatApproach +from approaches.approach import DataPoints, ThoughtStep +from approaches.chatapproach import ChatApproach, StreamingThoughtStep from approaches.promptmanager import PromptManager from core.authentication import AuthenticationHelper @@ -67,7 +67,7 @@ async def run_until_final_call( overrides: dict[str, Any], auth_claims: dict[str, Any], should_stream: bool = False, - ) -> tuple[ExtraInfo, Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]]: + ) -> AsyncGenerator[StreamingThoughtStep, None]: use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None] use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None] use_semantic_ranker = True if overrides.get("semantic_ranker") else False @@ -88,13 +88,12 @@ async def run_until_final_call( f"{self.chatgpt_model} does not support streaming. Please use a different model or disable streaming." ) + # STEP 1: Generate an optimized keyword search query based on the chat history and the last question query_messages = self.prompt_manager.render_prompt( self.query_rewrite_prompt, {"user_query": original_user_query, "past_messages": messages[:-1]} ) tools: list[ChatCompletionToolParam] = self.query_rewrite_tools - # STEP 1: Generate an optimized keyword search query based on the chat history and the last question - chat_completion = cast( ChatCompletion, await self.create_chat_completion( @@ -111,9 +110,38 @@ async def run_until_final_call( ), ) + yield StreamingThoughtStep( + step=self.format_thought_step_for_chatcompletion( + title="Prompt to generate search query", + messages=query_messages, + overrides=overrides, + model=self.chatgpt_model, + deployment=self.chatgpt_deployment, + usage=chat_completion.usage, + reasoning_effort="low", + ), + role="tool" + ) + query_text = self.get_search_query(chat_completion, original_user_query) # STEP 2: Retrieve relevant documents from the search index with the GPT optimized query + yield StreamingThoughtStep( + step=ThoughtStep( + "Search using generated search query", + query_text, + { + "use_semantic_captions": use_semantic_captions, + "use_semantic_ranker": use_semantic_ranker, + "use_query_rewriting": use_query_rewriting, + "top": top, + "filter": filter, + "use_vector_search": use_vector_search, + "use_text_search": use_text_search, + }, + ), + role="tool" + ) # If retrieval mode includes vectors, compute an embedding for the query vectors: list[VectorQuery] = [] @@ -134,6 +162,14 @@ async def run_until_final_call( use_query_rewriting, ) + yield StreamingThoughtStep( + step=ThoughtStep( + "Search results", + [result.serialize_for_results() for result in results], + ), + role="tool" + ) + # STEP 3: Generate a contextual and content specific answer using the search results and chat history text_sources = self.get_sources_content(results, use_semantic_captions, use_image_citation=False) messages = self.prompt_manager.render_prompt( @@ -147,55 +183,22 @@ async def run_until_final_call( }, ) - extra_info = ExtraInfo( - DataPoints(text=text_sources), - thoughts=[ - self.format_thought_step_for_chatcompletion( - title="Prompt to generate search query", - messages=query_messages, - overrides=overrides, - model=self.chatgpt_model, - deployment=self.chatgpt_deployment, - usage=chat_completion.usage, - reasoning_effort="low", - ), - ThoughtStep( - "Search using generated search query", - query_text, - { - "use_semantic_captions": use_semantic_captions, - "use_semantic_ranker": use_semantic_ranker, - "use_query_rewriting": use_query_rewriting, - "top": top, - "filter": filter, - "use_vector_search": use_vector_search, - "use_text_search": use_text_search, - }, - ), - ThoughtStep( - "Search results", - [result.serialize_for_results() for result in results], - ), - self.format_thought_step_for_chatcompletion( - title="Prompt to generate answer", - messages=messages, - overrides=overrides, - model=self.chatgpt_model, - deployment=self.chatgpt_deployment, - usage=None, - ), - ], - ) - - chat_coroutine = cast( - Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]], - self.create_chat_completion( + yield StreamingThoughtStep( + step=self.format_thought_step_for_chatcompletion( + title="Prompt to generate answer", + messages=messages, + overrides=overrides, + model=self.chatgpt_model, + deployment=self.chatgpt_deployment, + usage=None, + data_points=DataPoints(text=text_sources) + ), + chat_completion=self.create_chat_completion( self.chatgpt_deployment, self.chatgpt_model, messages, overrides, self.get_response_token_limit(self.chatgpt_model, 1024), should_stream, - ), + ) ) - return (extra_info, chat_coroutine) diff --git a/app/backend/approaches/chatreadretrievereadvision.py b/app/backend/approaches/chatreadretrievereadvision.py index b56d773a6f..1c6a360ed8 100644 --- a/app/backend/approaches/chatreadretrievereadvision.py +++ b/app/backend/approaches/chatreadretrievereadvision.py @@ -1,5 +1,5 @@ from collections.abc import Awaitable -from typing import Any, Callable, Optional, Union, cast +from typing import Any, Callable, Optional, Union, cast, AsyncGenerator from azure.search.documents.aio import SearchClient from azure.storage.blob.aio import ContainerClient @@ -11,8 +11,8 @@ ChatCompletionToolParam, ) -from approaches.approach import DataPoints, ExtraInfo, ThoughtStep -from approaches.chatapproach import ChatApproach +from approaches.approach import DataPoints, ThoughtStep +from approaches.chatapproach import ChatApproach, StreamingThoughtStep from approaches.promptmanager import PromptManager from core.authentication import AuthenticationHelper from core.imageshelper import fetch_image @@ -77,7 +77,7 @@ async def run_until_final_call( overrides: dict[str, Any], auth_claims: dict[str, Any], should_stream: bool = False, - ) -> tuple[ExtraInfo, Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]]: + ) -> AsyncGenerator[StreamingThoughtStep, None]: seed = overrides.get("seed", None) use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None] use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None] @@ -104,6 +104,19 @@ async def run_until_final_call( tools: list[ChatCompletionToolParam] = self.query_rewrite_tools # STEP 1: Generate an optimized keyword search query based on the chat history and the last question + yield StreamingThoughtStep( + step=ThoughtStep( + "Prompt to generate search query", + query_messages, + ( + {"model": self.chatgpt_model, "deployment": self.chatgpt_deployment} + if self.chatgpt_deployment + else {"model": self.chatgpt_model} + ), + ), + role="tool" + ) + chat_completion: ChatCompletion = await self.openai_client.chat.completions.create( messages=query_messages, # Azure OpenAI takes the deployment name as the model name @@ -144,6 +157,14 @@ async def run_until_final_call( use_query_rewriting, ) + yield StreamingThoughtStep( + step=ThoughtStep( + "Search results", + [result.serialize_for_results() for result in results] + ), + role="tool" + ) + # STEP 3: Generate a contextual and content specific answer using the search results and chat history text_sources = [] image_sources = [] @@ -167,50 +188,18 @@ async def run_until_final_call( }, ) - extra_info = ExtraInfo( - DataPoints(text=text_sources, images=image_sources), - [ - ThoughtStep( - "Prompt to generate search query", - query_messages, - ( - {"model": self.chatgpt_model, "deployment": self.chatgpt_deployment} - if self.chatgpt_deployment - else {"model": self.chatgpt_model} - ), + yield StreamingThoughtStep( + step=ThoughtStep( + "Prompt to generate answer", + messages, + ( + {"model": self.gpt4v_model, "deployment": self.gpt4v_deployment} + if self.gpt4v_deployment + else {"model": self.gpt4v_model} ), - ThoughtStep( - "Search using generated search query", - query_text, - { - "use_semantic_captions": use_semantic_captions, - "use_semantic_ranker": use_semantic_ranker, - "use_query_rewriting": use_query_rewriting, - "top": top, - "filter": filter, - "vector_fields": vector_fields, - "use_text_search": use_text_search, - }, - ), - ThoughtStep( - "Search results", - [result.serialize_for_results() for result in results], - ), - ThoughtStep( - "Prompt to generate answer", - messages, - ( - {"model": self.gpt4v_model, "deployment": self.gpt4v_deployment} - if self.gpt4v_deployment - else {"model": self.gpt4v_model} - ), - ), - ], - ) - - chat_coroutine = cast( - Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]], - self.openai_client.chat.completions.create( + data_points=DataPoints(text=text_sources, images=image_sources) + ), + chat_completion=self.openai_client.chat.completions.create( model=self.gpt4v_deployment if self.gpt4v_deployment else self.gpt4v_model, messages=messages, temperature=overrides.get("temperature", 0.3), @@ -218,6 +207,5 @@ async def run_until_final_call( n=1, stream=should_stream, seed=seed, - ), + ) ) - return (extra_info, chat_coroutine) diff --git a/app/backend/approaches/retrievethenread.py b/app/backend/approaches/retrievethenread.py index 8bdbb9785e..8eb74a69d4 100644 --- a/app/backend/approaches/retrievethenread.py +++ b/app/backend/approaches/retrievethenread.py @@ -1,11 +1,11 @@ -from typing import Any, Optional, cast +from typing import Any, Optional, cast, AsyncGenerator from azure.search.documents.aio import SearchClient from azure.search.documents.models import VectorQuery from openai import AsyncOpenAI from openai.types.chat import ChatCompletion, ChatCompletionMessageParam -from approaches.approach import Approach, DataPoints, ExtraInfo, ThoughtStep +from approaches.approach import Approach, DataPoints, ThoughtStep from approaches.promptmanager import PromptManager from core.authentication import AuthenticationHelper @@ -58,7 +58,7 @@ async def run( messages: list[ChatCompletionMessageParam], session_state: Any = None, context: dict[str, Any] = {}, - ) -> dict[str, Any]: + ) -> AsyncGenerator[dict[str, Any], None]: q = messages[-1]["content"] if not isinstance(q, str): raise ValueError("The most recent message content must be a string.") @@ -73,6 +73,26 @@ async def run( minimum_search_score = overrides.get("minimum_search_score", 0.0) minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0) filter = self.build_filter(overrides, auth_claims) + + yield { + "context": { + "thought": ThoughtStep( + "Search using user query", + q, + { + "use_semantic_captions": use_semantic_captions, + "use_semantic_ranker": use_semantic_ranker, + "use_query_rewriting": use_query_rewriting, + "top": top, + "filter": filter, + "use_vector_search": use_vector_search, + "use_text_search": use_text_search, + }, + ) + }, + "session_state": session_state, + } + # If retrieval mode includes vectors, compute an embedding for the query vectors: list[VectorQuery] = [] @@ -101,6 +121,16 @@ async def run( | {"user_query": q, "text_sources": text_sources}, ) + yield { + "context": { + "thought": ThoughtStep( + "Search results", + [result.serialize_for_results() for result in results], + ) + }, + "session_state": session_state + } + chat_completion = cast( ChatCompletion, await self.create_chat_completion( @@ -112,42 +142,21 @@ async def run( ), ) - extra_info = ExtraInfo( - DataPoints(text=text_sources), - thoughts=[ - ThoughtStep( - "Search using user query", - q, - { - "use_semantic_captions": use_semantic_captions, - "use_semantic_ranker": use_semantic_ranker, - "use_query_rewriting": use_query_rewriting, - "top": top, - "filter": filter, - "use_vector_search": use_vector_search, - "use_text_search": use_text_search, - }, - ), - ThoughtStep( - "Search results", - [result.serialize_for_results() for result in results], - ), - self.format_thought_step_for_chatcompletion( + yield { + "message": { + "content": chat_completion.choices[0].message.content, + "role": chat_completion.choices[0].message.role, + }, + "context": { + "thought": self.format_thought_step_for_chatcompletion( title="Prompt to generate answer", messages=messages, overrides=overrides, model=self.chatgpt_model, deployment=self.chatgpt_deployment, usage=chat_completion.usage, + data_points=DataPoints(text=text_sources) ), - ], - ) - - return { - "message": { - "content": chat_completion.choices[0].message.content, - "role": chat_completion.choices[0].message.role, }, - "context": extra_info, "session_state": session_state, } diff --git a/app/backend/approaches/retrievethenreadvision.py b/app/backend/approaches/retrievethenreadvision.py index a556fd8b6c..af92a7a509 100644 --- a/app/backend/approaches/retrievethenreadvision.py +++ b/app/backend/approaches/retrievethenreadvision.py @@ -1,5 +1,5 @@ from collections.abc import Awaitable -from typing import Any, Callable, Optional +from typing import Any, Callable, Optional, AsyncGenerator from azure.search.documents.aio import SearchClient from azure.storage.blob.aio import ContainerClient @@ -8,7 +8,7 @@ ChatCompletionMessageParam, ) -from approaches.approach import Approach, DataPoints, ExtraInfo, ThoughtStep +from approaches.approach import Approach, DataPoints, ThoughtStep from approaches.promptmanager import PromptManager from core.authentication import AuthenticationHelper from core.imageshelper import fetch_image @@ -66,7 +66,7 @@ async def run( messages: list[ChatCompletionMessageParam], session_state: Any = None, context: dict[str, Any] = {}, - ) -> dict[str, Any]: + ) -> AsyncGenerator[dict[str, Any], None]: q = messages[-1]["content"] if not isinstance(q, str): raise ValueError("The most recent message content must be a string.") @@ -87,6 +87,26 @@ async def run( vector_fields = overrides.get("vector_fields", ["embedding"]) send_text_to_gptvision = overrides.get("gpt4v_input") in ["textAndImages", "texts", None] send_images_to_gptvision = overrides.get("gpt4v_input") in ["textAndImages", "images", None] + + yield { + "context": { + "thought": ThoughtStep( + "Search using user query", + q, + { + "use_semantic_captions": use_semantic_captions, + "use_semantic_ranker": use_semantic_ranker, + "use_query_rewriting": use_query_rewriting, + "top": top, + "filter": filter, + "vector_fields": vector_fields, + "use_vector_search": use_vector_search, + "use_text_search": use_text_search, + }, + ), + }, + "session_state": session_state, + } # If retrieval mode includes vectors, compute an embedding for the query vectors = [] @@ -113,6 +133,16 @@ async def run( use_query_rewriting, ) + yield { + "context": { + "thought": ThoughtStep( + "Search results", + [result.serialize_for_results() for result in results], + ) + }, + "session_state": session_state + } + # Process results text_sources = [] image_sources = [] @@ -139,28 +169,13 @@ async def run( seed=seed, ) - extra_info = ExtraInfo( - DataPoints(text=text_sources, images=image_sources), - [ - ThoughtStep( - "Search using user query", - q, - { - "use_semantic_captions": use_semantic_captions, - "use_semantic_ranker": use_semantic_ranker, - "use_query_rewriting": use_query_rewriting, - "top": top, - "filter": filter, - "vector_fields": vector_fields, - "use_vector_search": use_vector_search, - "use_text_search": use_text_search, - }, - ), - ThoughtStep( - "Search results", - [result.serialize_for_results() for result in results], - ), - ThoughtStep( + yield { + "message": { + "content": chat_completion.choices[0].message.content, + "role": chat_completion.choices[0].message.role, + }, + "context": { + "thought": ThoughtStep( "Prompt to generate answer", messages, ( @@ -168,15 +183,8 @@ async def run( if self.gpt4v_deployment else {"model": self.gpt4v_model} ), - ), - ], - ) - - return { - "message": { - "content": chat_completion.choices[0].message.content, - "role": chat_completion.choices[0].message.role, + data_points=DataPoints(text=text_sources, images=image_sources), + ) }, - "context": extra_info, "session_state": session_state, } diff --git a/app/backend/error.py b/app/backend/error.py index 0a21afe6b7..e761847e73 100644 --- a/app/backend/error.py +++ b/app/backend/error.py @@ -2,6 +2,7 @@ from openai import APIError from quart import jsonify +import traceback ERROR_MESSAGE = """The app encountered an error processing your request. If you are an administrator of the app, view the full error in the logs. See aka.ms/appservice-logs for more information. diff --git a/app/frontend/src/api/api.ts b/app/frontend/src/api/api.ts index df95f801b5..dc4c30ffc2 100644 --- a/app/frontend/src/api/api.ts +++ b/app/frontend/src/api/api.ts @@ -22,7 +22,7 @@ export async function configApi(): Promise { return (await response.json()) as Config; } -export async function askApi(request: ChatAppRequest, idToken: string | undefined): Promise { +export async function askApi(request: ChatAppRequest, idToken: string | undefined): Promise { const headers = await getHeaders(idToken); const response = await fetch(`${BACKEND_URI}/ask`, { method: "POST", @@ -34,11 +34,11 @@ export async function askApi(request: ChatAppRequest, idToken: string | undefine throw Error(`Request failed with status ${response.status}`); } const parsedResponse: ChatAppResponseOrError = await response.json(); - if (parsedResponse.error) { + if ("error" in parsedResponse) { throw Error(parsedResponse.error); } - return parsedResponse as ChatAppResponse; + return parsedResponse as ChatAppResponse[]; } export async function chatApi(request: ChatAppRequest, shouldStream: boolean, idToken: string | undefined): Promise { diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts index c915a19ee5..bf0bd8bcf1 100644 --- a/app/frontend/src/api/models.ts +++ b/app/frontend/src/api/models.ts @@ -46,33 +46,38 @@ export type ResponseMessage = { role: string; }; -export type Thoughts = { +export type Thought = { title: string; description: any; // It can be any output from the api props?: { [key: string]: any }; + data_points: string[] | null; }; export type ResponseContext = { - data_points: string[]; followup_questions: string[] | null; - thoughts: Thoughts[]; + thought: Thought | null; }; -export type ChatAppResponseOrError = { +export type ChatAppResponseItem = { message: ResponseMessage; - delta: ResponseMessage; - context: ResponseContext; + delta: ResponseMessage | null; + context: ResponseContext | null; session_state: any; - error?: string; }; export type ChatAppResponse = { - message: ResponseMessage; - delta: ResponseMessage; - context: ResponseContext; - session_state: any; + value: ChatAppResponseItem[]; +}; + +export function getLastResponse(response: ChatAppResponse): ChatAppResponseItem | null { + return response.value.length > 0 ? response.value[response.value.length - 1] : null; +} +export type ChatAppError = { + error?: string; }; +export type ChatAppResponseOrError = ChatAppResponse | ChatAppError; + export type ChatAppRequestContext = { overrides?: ChatAppRequestOverrides; }; diff --git a/app/frontend/src/components/AnalysisPanel/AnalysisPanel.tsx b/app/frontend/src/components/AnalysisPanel/AnalysisPanel.tsx index 2cee00c761..1ec4a27da3 100644 --- a/app/frontend/src/components/AnalysisPanel/AnalysisPanel.tsx +++ b/app/frontend/src/components/AnalysisPanel/AnalysisPanel.tsx @@ -18,14 +18,14 @@ interface Props { onActiveTabChanged: (tab: AnalysisPanelTabs) => void; activeCitation: string | undefined; citationHeight: string; - answer: ChatAppResponse; + response: ChatAppResponse; } const pivotItemDisabledStyle = { disabled: true, style: { color: "grey" } }; -export const AnalysisPanel = ({ answer, activeTab, activeCitation, citationHeight, className, onActiveTabChanged }: Props) => { - const isDisabledThoughtProcessTab: boolean = !answer.context.thoughts; - const isDisabledSupportingContentTab: boolean = !answer.context.data_points; +export const AnalysisPanel = ({ response, activeTab, activeCitation, citationHeight, className, onActiveTabChanged }: Props) => { + const isDisabledThoughtProcessTab: boolean = !response.value.some(item => item.context?.thought); + const isDisabledSupportingContentTab: boolean = !response.value.some(item => item.context?.thought?.data_points); const isDisabledCitationTab: boolean = !activeCitation; const [citation, setCitation] = useState(""); @@ -82,14 +82,15 @@ export const AnalysisPanel = ({ answer, activeTab, activeCitation, citationHeigh headerText={t("headerTexts.thoughtProcess")} headerButtonProps={isDisabledThoughtProcessTab ? pivotItemDisabledStyle : undefined} > - + item.context?.thought ?? [])} /> - + {/* TODO: How do we handle citations?*/} + { const followupQuestions = answer.context?.followup_questions; - const parsedAnswer = useMemo(() => parseAnswerToHtml(answer, isStreaming, onCitationClicked), [answer]); + const parsedAnswer = useMemo(() => parseAnswerToHtml(answer, isStreaming, onCitationClicked), [answer.message.content]); const { t } = useTranslation(); const sanitizedAnswerHtml = DOMPurify.sanitize(parsedAnswer.answerHtml); const [copied, setCopied] = useState(false); @@ -80,7 +82,7 @@ export const Answer = ({ title={t("tooltips.showThoughtProcess")} ariaLabel={t("tooltips.showThoughtProcess")} onClick={() => onThoughtProcessClicked()} - disabled={!answer.context.thoughts?.length} + disabled={!response.value.some(thought => thought.context?.thought)} /> onSupportingContentClicked()} - disabled={!answer.context.data_points} + disabled={!answer.context?.thought?.data_points?.length} /> {showSpeechOutputAzure && ( - + )} {showSpeechOutputBrowser && } diff --git a/app/frontend/src/components/Answer/AnswerParser.tsx b/app/frontend/src/components/Answer/AnswerParser.tsx index 3807592f6d..c44690ea4e 100644 --- a/app/frontend/src/components/Answer/AnswerParser.tsx +++ b/app/frontend/src/components/Answer/AnswerParser.tsx @@ -1,5 +1,5 @@ import { renderToStaticMarkup } from "react-dom/server"; -import { ChatAppResponse, getCitationFilePath } from "../../api"; +import { ChatAppResponseItem, getCitationFilePath } from "../../api"; type HtmlParsedAnswer = { answerHtml: string; @@ -30,8 +30,8 @@ function isCitationValid(contextDataPoints: any, citationCandidate: string): boo return isValidCitation; } -export function parseAnswerToHtml(answer: ChatAppResponse, isStreaming: boolean, onCitationClicked: (citationFilePath: string) => void): HtmlParsedAnswer { - const contextDataPoints = answer.context.data_points; +export function parseAnswerToHtml(answer: ChatAppResponseItem, isStreaming: boolean, onCitationClicked: (citationFilePath: string) => void): HtmlParsedAnswer { + const contextDataPoints = answer.context?.thought?.data_points ?? []; const citations: string[] = []; // Trim any whitespace from the end of the answer after removing follow-up questions diff --git a/app/frontend/src/index.tsx b/app/frontend/src/index.tsx index a8821c8c45..706c28532e 100644 --- a/app/frontend/src/index.tsx +++ b/app/frontend/src/index.tsx @@ -23,8 +23,8 @@ const router = createHashRouter([ element: }, { - path: "qa", - lazy: () => import("./pages/ask/Ask") + path: "qa" + //lazy: () => import("./pages/ask/Ask") }, { path: "*", diff --git a/app/frontend/src/pages/ask/Ask.tsx b/app/frontend/src/pages/ask/Ask.tsx index 8e38076adb..b339d9c05a 100644 --- a/app/frontend/src/pages/ask/Ask.tsx +++ b/app/frontend/src/pages/ask/Ask.tsx @@ -146,12 +146,12 @@ export function Component(): JSX.Element { language: i18n.language, ...(seed !== null ? { seed: seed } : {}) } - }, + } // AI Chat Protocol: Client must pass on any session state received from the server - session_state: answer ? answer.session_state : null + //session_state: answer ? answer.session_state : null }; const result = await askApi(request, token); - setAnswer(result); + //setAnswer(result); setSpeechUrls([null]); } catch (e) { setError(e); @@ -287,21 +287,7 @@ export function Component(): JSX.Element { )} - {!isLoading && answer && !error && ( -
- onShowCitation(x)} - onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab)} - onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab)} - showSpeechOutputAzure={showSpeechOutputAzure} - showSpeechOutputBrowser={showSpeechOutputBrowser} - /> -
- )} + {!isLoading && answer && !error &&
} {error ? (
makeApiRequest(lastQuestionRef.current)} /> diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 5d00c2c914..784444b351 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -11,13 +11,16 @@ import { chatApi, configApi, RetrievalMode, + ChatAppError, ChatAppResponse, + ChatAppResponseItem, ChatAppResponseOrError, ChatAppRequest, ResponseMessage, VectorFieldOptions, GPT4VInput, - SpeechConfig + SpeechConfig, + getLastResponse } from "../../api"; import { Answer, AnswerError, AnswerLoading } from "../../components/Answer"; import { QuestionInput } from "../../components/QuestionInput"; @@ -37,6 +40,12 @@ import { LoginContext } from "../../loginContext"; import { LanguagePicker } from "../../i18n/LanguagePicker"; import { Settings } from "../../components/Settings/Settings"; +const enum LoadingType { + None = "none", + Generating = "generating", + Thinking = "thinking" +} + const Chat = () => { const [isConfigPanelOpen, setIsConfigPanelOpen] = useState(false); const [isHistoryPanelOpen, setIsHistoryPanelOpen] = useState(false); @@ -65,16 +74,16 @@ const Chat = () => { const lastQuestionRef = useRef(""); const chatMessageStreamEnd = useRef(null); - const [isLoading, setIsLoading] = useState(false); + const [isLoading, setIsLoading] = useState(LoadingType.None); const [isStreaming, setIsStreaming] = useState(false); const [error, setError] = useState(); const [activeCitation, setActiveCitation] = useState(); const [activeAnalysisPanelTab, setActiveAnalysisPanelTab] = useState(undefined); - const [selectedAnswer, setSelectedAnswer] = useState(0); - const [answers, setAnswers] = useState<[user: string, response: ChatAppResponse][]>([]); - const [streamedAnswers, setStreamedAnswers] = useState<[user: string, response: ChatAppResponse][]>([]); + const [selectedResponse, setSelectedResponse] = useState(0); + const [responses, setResponses] = useState<[user: string, response: ChatAppResponse][]>([]); + const [streamedResponses, setStreamedResponses] = useState<[user: string, response: ChatAppResponse][]>([]); const [speechUrls, setSpeechUrls] = useState<(string | null)[]>([]); const [showGPT4VOptions, setShowGPT4VOptions] = useState(false); @@ -129,47 +138,62 @@ const Chat = () => { }); }; - const handleAsyncRequest = async (question: string, answers: [string, ChatAppResponse][], responseBody: ReadableStream) => { - let answer: string = ""; - let askResponse: ChatAppResponse = {} as ChatAppResponse; - - const updateState = (newContent: string) => { + const handleAsyncRequest = async (question: string, responses: [string, ChatAppResponse][], responseBody: ReadableStream) => { + var response: ChatAppResponse = { + value: [] + }; + const updateState = () => { return new Promise(resolve => { setTimeout(() => { - answer += newContent; - const latestResponse: ChatAppResponse = { - ...askResponse, - message: { content: answer, role: askResponse.message.role } - }; - setStreamedAnswers([...answers, [question, latestResponse]]); + setStreamedResponses([...responses, [question, response]]); resolve(null); }, 33); }); }; try { setIsStreaming(true); + for await (const event of readNDJSONStream(responseBody)) { - if (event["context"] && event["context"]["data_points"]) { - event["message"] = event["delta"]; - askResponse = event as ChatAppResponse; - } else if (event["delta"] && event["delta"]["content"]) { - setIsLoading(false); - await updateState(event["delta"]["content"]); - } else if (event["context"]) { - // Update context with new keys from latest event - askResponse.context = { ...askResponse.context, ...event["context"] }; - } else if (event["error"]) { - throw Error(event["error"]); + if (event["error"]) { + return { error: event["error"] } as ChatAppError; + } + + if (event["delta"]) { + if (event["delta"]["finish_reason"] == "stop") { + setIsLoading(LoadingType.None); + response.value[response.value.length - 1].context = event["context"]; + response.value[response.value.length - 1].session_state = event["session_state"]; + + await updateState(); + continue; + } + + if ("has_content" in event) { + setIsLoading(event["has_content"] ? LoadingType.Generating : LoadingType.Thinking); + var responseItem: ChatAppResponseItem = { + message: event["has_content"] ? { content: "", role: "" } : null + } as ChatAppResponseItem; + response.value.push(responseItem); + + continue; + } + + if (event["delta"]["content"]) { + setIsLoading(LoadingType.None); + response.value[response.value.length - 1].message.content += event["delta"]["content"]; + response.value[response.value.length - 1].message.role = event["delta"]["role"]; + + await updateState(); + continue; + } } } } finally { setIsStreaming(false); + setIsLoading(LoadingType.None); } - const fullResponse: ChatAppResponse = { - ...askResponse, - message: { content: answer, role: askResponse.message.role } - }; - return fullResponse; + + return response; }; const client = useLogin ? useMsal().instance : undefined; @@ -186,17 +210,20 @@ const Chat = () => { lastQuestionRef.current = question; error && setError(undefined); - setIsLoading(true); + setIsLoading(shouldStream ? LoadingType.Thinking : LoadingType.Generating); setActiveCitation(undefined); setActiveAnalysisPanelTab(undefined); const token = client ? await getToken(client) : undefined; try { - const messages: ResponseMessage[] = answers.flatMap(a => [ - { content: a[0], role: "user" }, - { content: a[1].message.content, role: "assistant" } - ]); + const messages: ResponseMessage[] = responses.flatMap(a => { + let lastMessage = getLastResponse(a[1])?.message; + return [ + { content: a[0], role: "user" }, + { content: lastMessage?.content ?? "", role: lastMessage?.role ?? "assistant" } + ]; + }); const request: ChatAppRequest = { messages: [...messages, { content: question, role: "user" }], @@ -225,7 +252,7 @@ const Chat = () => { } }, // AI Chat Protocol: Client must pass on any session state received from the server - session_state: answers.length ? answers[answers.length - 1][1].session_state : null + session_state: responses.length ? getLastResponse(responses[responses.length - 1][1])?.session_state : null }; const response = await chatApi(request, shouldStream, token); @@ -235,29 +262,22 @@ const Chat = () => { if (response.status > 299 || !response.ok) { throw Error(`Request failed with status ${response.status}`); } - if (shouldStream) { - const parsedResponse: ChatAppResponse = await handleAsyncRequest(question, answers, response.body); - setAnswers([...answers, [question, parsedResponse]]); - if (typeof parsedResponse.session_state === "string" && parsedResponse.session_state !== "") { - const token = client ? await getToken(client) : undefined; - historyManager.addItem(parsedResponse.session_state, [...answers, [question, parsedResponse]], token); - } - } else { - const parsedResponse: ChatAppResponseOrError = await response.json(); - if (parsedResponse.error) { - throw Error(parsedResponse.error); - } - setAnswers([...answers, [question, parsedResponse as ChatAppResponse]]); - if (typeof parsedResponse.session_state === "string" && parsedResponse.session_state !== "") { - const token = client ? await getToken(client) : undefined; - historyManager.addItem(parsedResponse.session_state, [...answers, [question, parsedResponse as ChatAppResponse]], token); - } + var parsedResponse: ChatAppResponseOrError = shouldStream ? await handleAsyncRequest(question, responses, response.body) : await response.json(); + + if ("error" in parsedResponse) { + throw Error(parsedResponse.error); + } + setResponses([...responses, [question, parsedResponse as ChatAppResponse]]); + let lastMessage = getLastResponse(parsedResponse as ChatAppResponse) ?? ({} as ChatAppResponseItem); + if (typeof lastMessage.session_state === "string" && lastMessage.session_state !== "") { + const token = client ? await getToken(client) : undefined; + historyManager.addItem(lastMessage.session_state, [...responses, [question, parsedResponse as ChatAppResponse]], token); } setSpeechUrls([...speechUrls, null]); } catch (e) { setError(e); } finally { - setIsLoading(false); + setIsLoading(LoadingType.None); } }; @@ -266,15 +286,15 @@ const Chat = () => { error && setError(undefined); setActiveCitation(undefined); setActiveAnalysisPanelTab(undefined); - setAnswers([]); + setResponses([]); setSpeechUrls([]); - setStreamedAnswers([]); - setIsLoading(false); + setStreamedResponses([]); + setIsLoading(LoadingType.None); setIsStreaming(false); }; useEffect(() => chatMessageStreamEnd.current?.scrollIntoView({ behavior: "smooth" }), [isLoading]); - useEffect(() => chatMessageStreamEnd.current?.scrollIntoView({ behavior: "auto" }), [streamedAnswers]); + useEffect(() => chatMessageStreamEnd.current?.scrollIntoView({ behavior: "auto" }), [streamedResponses]); useEffect(() => { getConfig(); }, []); @@ -349,24 +369,24 @@ const Chat = () => { }; const onShowCitation = (citation: string, index: number) => { - if (activeCitation === citation && activeAnalysisPanelTab === AnalysisPanelTabs.CitationTab && selectedAnswer === index) { + if (activeCitation === citation && activeAnalysisPanelTab === AnalysisPanelTabs.CitationTab && selectedResponse === index) { setActiveAnalysisPanelTab(undefined); } else { setActiveCitation(citation); setActiveAnalysisPanelTab(AnalysisPanelTabs.CitationTab); } - setSelectedAnswer(index); + setSelectedResponse(index); }; const onToggleTab = (tab: AnalysisPanelTabs, index: number) => { - if (activeAnalysisPanelTab === tab && selectedAnswer === index) { + if (activeAnalysisPanelTab === tab && selectedResponse === index) { setActiveAnalysisPanelTab(undefined); } else { setActiveAnalysisPanelTab(tab); } - setSelectedAnswer(index); + setSelectedResponse(index); }; const { t, i18n } = useTranslation(); @@ -384,7 +404,11 @@ const Chat = () => { )}
- + {showUserUpload && } setIsConfigPanelOpen(!isConfigPanelOpen)} />
@@ -404,52 +428,64 @@ const Chat = () => { ) : (
{isStreaming && - streamedAnswers.map((streamedAnswer, index) => ( -
- -
- onShowCitation(c, index)} - onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)} - onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab, index)} - onFollowupQuestionClicked={q => makeApiRequest(q)} - showFollowupQuestions={useSuggestFollowupQuestions && answers.length - 1 === index} - showSpeechOutputAzure={showSpeechOutputAzure} - showSpeechOutputBrowser={showSpeechOutputBrowser} - /> -
-
+ streamedResponses.map((streamedResponse, responseIndex) => ( + <> + + {streamedResponse[1].value.map( + (thought, thoughtIndex) => + thought.message && ( + onShowCitation(c, responseIndex)} + onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, responseIndex)} + onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab, responseIndex)} + onFollowupQuestionClicked={q => makeApiRequest(q)} + showFollowupQuestions={useSuggestFollowupQuestions && responses.length - 1 === responseIndex} + showSpeechOutputAzure={showSpeechOutputAzure} + showSpeechOutputBrowser={showSpeechOutputBrowser} + /> + ) + )} + ))} {!isStreaming && - answers.map((answer, index) => ( -
- -
- onShowCitation(c, index)} - onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)} - onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab, index)} - onFollowupQuestionClicked={q => makeApiRequest(q)} - showFollowupQuestions={useSuggestFollowupQuestions && answers.length - 1 === index} - showSpeechOutputAzure={showSpeechOutputAzure} - showSpeechOutputBrowser={showSpeechOutputBrowser} - /> -
+ responses.map((response, responseIndex) => ( +
+ + <> + {response[1].value.map( + (thought, thoughtIndex) => + thought.message && ( + onShowCitation(c, responseIndex)} + onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, responseIndex)} + onSupportingContentClicked={() => + onToggleTab(AnalysisPanelTabs.SupportingContentTab, responseIndex) + } + onFollowupQuestionClicked={q => makeApiRequest(q)} + showFollowupQuestions={useSuggestFollowupQuestions && responses.length - 1 === responseIndex} + showSpeechOutputAzure={showSpeechOutputAzure} + showSpeechOutputBrowser={showSpeechOutputBrowser} + /> + ) + )} +
))} - {isLoading && ( + {isLoading == LoadingType.Generating && ( <>
@@ -473,20 +509,20 @@ const Chat = () => { makeApiRequest(question)} showSpeechInput={showSpeechInput} />
- {answers.length > 0 && activeAnalysisPanelTab && ( + {responses.length > 0 && activeAnalysisPanelTab && ( onToggleTab(x, selectedAnswer)} + onActiveTabChanged={x => onToggleTab(x, selectedResponse)} citationHeight="810px" - answer={answers[selectedAnswer][1]} + response={responses[selectedResponse][1]} activeTab={activeAnalysisPanelTab} /> )} @@ -499,7 +535,7 @@ const Chat = () => { onClose={() => setIsHistoryPanelOpen(false)} onChatSelected={answers => { if (answers.length === 0) return; - setAnswers(answers); + setResponses(answers); lastQuestionRef.current = answers[answers.length - 1][0]; }} /> From 7684fc7363dfa17302e2e7b6487384c676e8c23e Mon Sep 17 00:00:00 2001 From: Matt Gotteiner Date: Thu, 17 Apr 2025 21:45:38 -0700 Subject: [PATCH 02/10] revert --- app/frontend/src/api/api.ts | 6 +- app/frontend/src/api/models.ts | 27 +- .../AnalysisPanel/AnalysisPanel.tsx | 13 +- app/frontend/src/components/Answer/Answer.tsx | 18 +- .../src/components/Answer/AnswerParser.tsx | 6 +- app/frontend/src/index.tsx | 4 +- app/frontend/src/pages/ask/Ask.tsx | 22 +- app/frontend/src/pages/chat/Chat.tsx | 260 ++++++++---------- 8 files changed, 163 insertions(+), 193 deletions(-) diff --git a/app/frontend/src/api/api.ts b/app/frontend/src/api/api.ts index dc4c30ffc2..df95f801b5 100644 --- a/app/frontend/src/api/api.ts +++ b/app/frontend/src/api/api.ts @@ -22,7 +22,7 @@ export async function configApi(): Promise { return (await response.json()) as Config; } -export async function askApi(request: ChatAppRequest, idToken: string | undefined): Promise { +export async function askApi(request: ChatAppRequest, idToken: string | undefined): Promise { const headers = await getHeaders(idToken); const response = await fetch(`${BACKEND_URI}/ask`, { method: "POST", @@ -34,11 +34,11 @@ export async function askApi(request: ChatAppRequest, idToken: string | undefine throw Error(`Request failed with status ${response.status}`); } const parsedResponse: ChatAppResponseOrError = await response.json(); - if ("error" in parsedResponse) { + if (parsedResponse.error) { throw Error(parsedResponse.error); } - return parsedResponse as ChatAppResponse[]; + return parsedResponse as ChatAppResponse; } export async function chatApi(request: ChatAppRequest, shouldStream: boolean, idToken: string | undefined): Promise { diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts index bf0bd8bcf1..c915a19ee5 100644 --- a/app/frontend/src/api/models.ts +++ b/app/frontend/src/api/models.ts @@ -46,38 +46,33 @@ export type ResponseMessage = { role: string; }; -export type Thought = { +export type Thoughts = { title: string; description: any; // It can be any output from the api props?: { [key: string]: any }; - data_points: string[] | null; }; export type ResponseContext = { + data_points: string[]; followup_questions: string[] | null; - thought: Thought | null; + thoughts: Thoughts[]; }; -export type ChatAppResponseItem = { +export type ChatAppResponseOrError = { message: ResponseMessage; - delta: ResponseMessage | null; - context: ResponseContext | null; + delta: ResponseMessage; + context: ResponseContext; session_state: any; + error?: string; }; export type ChatAppResponse = { - value: ChatAppResponseItem[]; -}; - -export function getLastResponse(response: ChatAppResponse): ChatAppResponseItem | null { - return response.value.length > 0 ? response.value[response.value.length - 1] : null; -} -export type ChatAppError = { - error?: string; + message: ResponseMessage; + delta: ResponseMessage; + context: ResponseContext; + session_state: any; }; -export type ChatAppResponseOrError = ChatAppResponse | ChatAppError; - export type ChatAppRequestContext = { overrides?: ChatAppRequestOverrides; }; diff --git a/app/frontend/src/components/AnalysisPanel/AnalysisPanel.tsx b/app/frontend/src/components/AnalysisPanel/AnalysisPanel.tsx index 1ec4a27da3..2cee00c761 100644 --- a/app/frontend/src/components/AnalysisPanel/AnalysisPanel.tsx +++ b/app/frontend/src/components/AnalysisPanel/AnalysisPanel.tsx @@ -18,14 +18,14 @@ interface Props { onActiveTabChanged: (tab: AnalysisPanelTabs) => void; activeCitation: string | undefined; citationHeight: string; - response: ChatAppResponse; + answer: ChatAppResponse; } const pivotItemDisabledStyle = { disabled: true, style: { color: "grey" } }; -export const AnalysisPanel = ({ response, activeTab, activeCitation, citationHeight, className, onActiveTabChanged }: Props) => { - const isDisabledThoughtProcessTab: boolean = !response.value.some(item => item.context?.thought); - const isDisabledSupportingContentTab: boolean = !response.value.some(item => item.context?.thought?.data_points); +export const AnalysisPanel = ({ answer, activeTab, activeCitation, citationHeight, className, onActiveTabChanged }: Props) => { + const isDisabledThoughtProcessTab: boolean = !answer.context.thoughts; + const isDisabledSupportingContentTab: boolean = !answer.context.data_points; const isDisabledCitationTab: boolean = !activeCitation; const [citation, setCitation] = useState(""); @@ -82,15 +82,14 @@ export const AnalysisPanel = ({ response, activeTab, activeCitation, citationHei headerText={t("headerTexts.thoughtProcess")} headerButtonProps={isDisabledThoughtProcessTab ? pivotItemDisabledStyle : undefined} > - item.context?.thought ?? [])} /> + - {/* TODO: How do we handle citations?*/} - + { const followupQuestions = answer.context?.followup_questions; - const parsedAnswer = useMemo(() => parseAnswerToHtml(answer, isStreaming, onCitationClicked), [answer.message.content]); + const parsedAnswer = useMemo(() => parseAnswerToHtml(answer, isStreaming, onCitationClicked), [answer]); const { t } = useTranslation(); const sanitizedAnswerHtml = DOMPurify.sanitize(parsedAnswer.answerHtml); const [copied, setCopied] = useState(false); @@ -82,7 +80,7 @@ export const Answer = ({ title={t("tooltips.showThoughtProcess")} ariaLabel={t("tooltips.showThoughtProcess")} onClick={() => onThoughtProcessClicked()} - disabled={!response.value.some(thought => thought.context?.thought)} + disabled={!answer.context.thoughts?.length} /> onSupportingContentClicked()} - disabled={!answer.context?.thought?.data_points?.length} + disabled={!answer.context.data_points} /> {showSpeechOutputAzure && ( - + )} {showSpeechOutputBrowser && }
diff --git a/app/frontend/src/components/Answer/AnswerParser.tsx b/app/frontend/src/components/Answer/AnswerParser.tsx index c44690ea4e..3807592f6d 100644 --- a/app/frontend/src/components/Answer/AnswerParser.tsx +++ b/app/frontend/src/components/Answer/AnswerParser.tsx @@ -1,5 +1,5 @@ import { renderToStaticMarkup } from "react-dom/server"; -import { ChatAppResponseItem, getCitationFilePath } from "../../api"; +import { ChatAppResponse, getCitationFilePath } from "../../api"; type HtmlParsedAnswer = { answerHtml: string; @@ -30,8 +30,8 @@ function isCitationValid(contextDataPoints: any, citationCandidate: string): boo return isValidCitation; } -export function parseAnswerToHtml(answer: ChatAppResponseItem, isStreaming: boolean, onCitationClicked: (citationFilePath: string) => void): HtmlParsedAnswer { - const contextDataPoints = answer.context?.thought?.data_points ?? []; +export function parseAnswerToHtml(answer: ChatAppResponse, isStreaming: boolean, onCitationClicked: (citationFilePath: string) => void): HtmlParsedAnswer { + const contextDataPoints = answer.context.data_points; const citations: string[] = []; // Trim any whitespace from the end of the answer after removing follow-up questions diff --git a/app/frontend/src/index.tsx b/app/frontend/src/index.tsx index 706c28532e..a8821c8c45 100644 --- a/app/frontend/src/index.tsx +++ b/app/frontend/src/index.tsx @@ -23,8 +23,8 @@ const router = createHashRouter([ element: }, { - path: "qa" - //lazy: () => import("./pages/ask/Ask") + path: "qa", + lazy: () => import("./pages/ask/Ask") }, { path: "*", diff --git a/app/frontend/src/pages/ask/Ask.tsx b/app/frontend/src/pages/ask/Ask.tsx index b339d9c05a..8e38076adb 100644 --- a/app/frontend/src/pages/ask/Ask.tsx +++ b/app/frontend/src/pages/ask/Ask.tsx @@ -146,12 +146,12 @@ export function Component(): JSX.Element { language: i18n.language, ...(seed !== null ? { seed: seed } : {}) } - } + }, // AI Chat Protocol: Client must pass on any session state received from the server - //session_state: answer ? answer.session_state : null + session_state: answer ? answer.session_state : null }; const result = await askApi(request, token); - //setAnswer(result); + setAnswer(result); setSpeechUrls([null]); } catch (e) { setError(e); @@ -287,7 +287,21 @@ export function Component(): JSX.Element { )} - {!isLoading && answer && !error &&
} + {!isLoading && answer && !error && ( +
+ onShowCitation(x)} + onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab)} + onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab)} + showSpeechOutputAzure={showSpeechOutputAzure} + showSpeechOutputBrowser={showSpeechOutputBrowser} + /> +
+ )} {error ? (
makeApiRequest(lastQuestionRef.current)} /> diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 784444b351..5d00c2c914 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -11,16 +11,13 @@ import { chatApi, configApi, RetrievalMode, - ChatAppError, ChatAppResponse, - ChatAppResponseItem, ChatAppResponseOrError, ChatAppRequest, ResponseMessage, VectorFieldOptions, GPT4VInput, - SpeechConfig, - getLastResponse + SpeechConfig } from "../../api"; import { Answer, AnswerError, AnswerLoading } from "../../components/Answer"; import { QuestionInput } from "../../components/QuestionInput"; @@ -40,12 +37,6 @@ import { LoginContext } from "../../loginContext"; import { LanguagePicker } from "../../i18n/LanguagePicker"; import { Settings } from "../../components/Settings/Settings"; -const enum LoadingType { - None = "none", - Generating = "generating", - Thinking = "thinking" -} - const Chat = () => { const [isConfigPanelOpen, setIsConfigPanelOpen] = useState(false); const [isHistoryPanelOpen, setIsHistoryPanelOpen] = useState(false); @@ -74,16 +65,16 @@ const Chat = () => { const lastQuestionRef = useRef(""); const chatMessageStreamEnd = useRef(null); - const [isLoading, setIsLoading] = useState(LoadingType.None); + const [isLoading, setIsLoading] = useState(false); const [isStreaming, setIsStreaming] = useState(false); const [error, setError] = useState(); const [activeCitation, setActiveCitation] = useState(); const [activeAnalysisPanelTab, setActiveAnalysisPanelTab] = useState(undefined); - const [selectedResponse, setSelectedResponse] = useState(0); - const [responses, setResponses] = useState<[user: string, response: ChatAppResponse][]>([]); - const [streamedResponses, setStreamedResponses] = useState<[user: string, response: ChatAppResponse][]>([]); + const [selectedAnswer, setSelectedAnswer] = useState(0); + const [answers, setAnswers] = useState<[user: string, response: ChatAppResponse][]>([]); + const [streamedAnswers, setStreamedAnswers] = useState<[user: string, response: ChatAppResponse][]>([]); const [speechUrls, setSpeechUrls] = useState<(string | null)[]>([]); const [showGPT4VOptions, setShowGPT4VOptions] = useState(false); @@ -138,62 +129,47 @@ const Chat = () => { }); }; - const handleAsyncRequest = async (question: string, responses: [string, ChatAppResponse][], responseBody: ReadableStream) => { - var response: ChatAppResponse = { - value: [] - }; - const updateState = () => { + const handleAsyncRequest = async (question: string, answers: [string, ChatAppResponse][], responseBody: ReadableStream) => { + let answer: string = ""; + let askResponse: ChatAppResponse = {} as ChatAppResponse; + + const updateState = (newContent: string) => { return new Promise(resolve => { setTimeout(() => { - setStreamedResponses([...responses, [question, response]]); + answer += newContent; + const latestResponse: ChatAppResponse = { + ...askResponse, + message: { content: answer, role: askResponse.message.role } + }; + setStreamedAnswers([...answers, [question, latestResponse]]); resolve(null); }, 33); }); }; try { setIsStreaming(true); - for await (const event of readNDJSONStream(responseBody)) { - if (event["error"]) { - return { error: event["error"] } as ChatAppError; - } - - if (event["delta"]) { - if (event["delta"]["finish_reason"] == "stop") { - setIsLoading(LoadingType.None); - response.value[response.value.length - 1].context = event["context"]; - response.value[response.value.length - 1].session_state = event["session_state"]; - - await updateState(); - continue; - } - - if ("has_content" in event) { - setIsLoading(event["has_content"] ? LoadingType.Generating : LoadingType.Thinking); - var responseItem: ChatAppResponseItem = { - message: event["has_content"] ? { content: "", role: "" } : null - } as ChatAppResponseItem; - response.value.push(responseItem); - - continue; - } - - if (event["delta"]["content"]) { - setIsLoading(LoadingType.None); - response.value[response.value.length - 1].message.content += event["delta"]["content"]; - response.value[response.value.length - 1].message.role = event["delta"]["role"]; - - await updateState(); - continue; - } + if (event["context"] && event["context"]["data_points"]) { + event["message"] = event["delta"]; + askResponse = event as ChatAppResponse; + } else if (event["delta"] && event["delta"]["content"]) { + setIsLoading(false); + await updateState(event["delta"]["content"]); + } else if (event["context"]) { + // Update context with new keys from latest event + askResponse.context = { ...askResponse.context, ...event["context"] }; + } else if (event["error"]) { + throw Error(event["error"]); } } } finally { setIsStreaming(false); - setIsLoading(LoadingType.None); } - - return response; + const fullResponse: ChatAppResponse = { + ...askResponse, + message: { content: answer, role: askResponse.message.role } + }; + return fullResponse; }; const client = useLogin ? useMsal().instance : undefined; @@ -210,20 +186,17 @@ const Chat = () => { lastQuestionRef.current = question; error && setError(undefined); - setIsLoading(shouldStream ? LoadingType.Thinking : LoadingType.Generating); + setIsLoading(true); setActiveCitation(undefined); setActiveAnalysisPanelTab(undefined); const token = client ? await getToken(client) : undefined; try { - const messages: ResponseMessage[] = responses.flatMap(a => { - let lastMessage = getLastResponse(a[1])?.message; - return [ - { content: a[0], role: "user" }, - { content: lastMessage?.content ?? "", role: lastMessage?.role ?? "assistant" } - ]; - }); + const messages: ResponseMessage[] = answers.flatMap(a => [ + { content: a[0], role: "user" }, + { content: a[1].message.content, role: "assistant" } + ]); const request: ChatAppRequest = { messages: [...messages, { content: question, role: "user" }], @@ -252,7 +225,7 @@ const Chat = () => { } }, // AI Chat Protocol: Client must pass on any session state received from the server - session_state: responses.length ? getLastResponse(responses[responses.length - 1][1])?.session_state : null + session_state: answers.length ? answers[answers.length - 1][1].session_state : null }; const response = await chatApi(request, shouldStream, token); @@ -262,22 +235,29 @@ const Chat = () => { if (response.status > 299 || !response.ok) { throw Error(`Request failed with status ${response.status}`); } - var parsedResponse: ChatAppResponseOrError = shouldStream ? await handleAsyncRequest(question, responses, response.body) : await response.json(); - - if ("error" in parsedResponse) { - throw Error(parsedResponse.error); - } - setResponses([...responses, [question, parsedResponse as ChatAppResponse]]); - let lastMessage = getLastResponse(parsedResponse as ChatAppResponse) ?? ({} as ChatAppResponseItem); - if (typeof lastMessage.session_state === "string" && lastMessage.session_state !== "") { - const token = client ? await getToken(client) : undefined; - historyManager.addItem(lastMessage.session_state, [...responses, [question, parsedResponse as ChatAppResponse]], token); + if (shouldStream) { + const parsedResponse: ChatAppResponse = await handleAsyncRequest(question, answers, response.body); + setAnswers([...answers, [question, parsedResponse]]); + if (typeof parsedResponse.session_state === "string" && parsedResponse.session_state !== "") { + const token = client ? await getToken(client) : undefined; + historyManager.addItem(parsedResponse.session_state, [...answers, [question, parsedResponse]], token); + } + } else { + const parsedResponse: ChatAppResponseOrError = await response.json(); + if (parsedResponse.error) { + throw Error(parsedResponse.error); + } + setAnswers([...answers, [question, parsedResponse as ChatAppResponse]]); + if (typeof parsedResponse.session_state === "string" && parsedResponse.session_state !== "") { + const token = client ? await getToken(client) : undefined; + historyManager.addItem(parsedResponse.session_state, [...answers, [question, parsedResponse as ChatAppResponse]], token); + } } setSpeechUrls([...speechUrls, null]); } catch (e) { setError(e); } finally { - setIsLoading(LoadingType.None); + setIsLoading(false); } }; @@ -286,15 +266,15 @@ const Chat = () => { error && setError(undefined); setActiveCitation(undefined); setActiveAnalysisPanelTab(undefined); - setResponses([]); + setAnswers([]); setSpeechUrls([]); - setStreamedResponses([]); - setIsLoading(LoadingType.None); + setStreamedAnswers([]); + setIsLoading(false); setIsStreaming(false); }; useEffect(() => chatMessageStreamEnd.current?.scrollIntoView({ behavior: "smooth" }), [isLoading]); - useEffect(() => chatMessageStreamEnd.current?.scrollIntoView({ behavior: "auto" }), [streamedResponses]); + useEffect(() => chatMessageStreamEnd.current?.scrollIntoView({ behavior: "auto" }), [streamedAnswers]); useEffect(() => { getConfig(); }, []); @@ -369,24 +349,24 @@ const Chat = () => { }; const onShowCitation = (citation: string, index: number) => { - if (activeCitation === citation && activeAnalysisPanelTab === AnalysisPanelTabs.CitationTab && selectedResponse === index) { + if (activeCitation === citation && activeAnalysisPanelTab === AnalysisPanelTabs.CitationTab && selectedAnswer === index) { setActiveAnalysisPanelTab(undefined); } else { setActiveCitation(citation); setActiveAnalysisPanelTab(AnalysisPanelTabs.CitationTab); } - setSelectedResponse(index); + setSelectedAnswer(index); }; const onToggleTab = (tab: AnalysisPanelTabs, index: number) => { - if (activeAnalysisPanelTab === tab && selectedResponse === index) { + if (activeAnalysisPanelTab === tab && selectedAnswer === index) { setActiveAnalysisPanelTab(undefined); } else { setActiveAnalysisPanelTab(tab); } - setSelectedResponse(index); + setSelectedAnswer(index); }; const { t, i18n } = useTranslation(); @@ -404,11 +384,7 @@ const Chat = () => { )}
- + {showUserUpload && } setIsConfigPanelOpen(!isConfigPanelOpen)} />
@@ -428,64 +404,52 @@ const Chat = () => { ) : (
{isStreaming && - streamedResponses.map((streamedResponse, responseIndex) => ( - <> - - {streamedResponse[1].value.map( - (thought, thoughtIndex) => - thought.message && ( - onShowCitation(c, responseIndex)} - onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, responseIndex)} - onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab, responseIndex)} - onFollowupQuestionClicked={q => makeApiRequest(q)} - showFollowupQuestions={useSuggestFollowupQuestions && responses.length - 1 === responseIndex} - showSpeechOutputAzure={showSpeechOutputAzure} - showSpeechOutputBrowser={showSpeechOutputBrowser} - /> - ) - )} - + streamedAnswers.map((streamedAnswer, index) => ( +
+ +
+ onShowCitation(c, index)} + onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)} + onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab, index)} + onFollowupQuestionClicked={q => makeApiRequest(q)} + showFollowupQuestions={useSuggestFollowupQuestions && answers.length - 1 === index} + showSpeechOutputAzure={showSpeechOutputAzure} + showSpeechOutputBrowser={showSpeechOutputBrowser} + /> +
+
))} {!isStreaming && - responses.map((response, responseIndex) => ( -
- - <> - {response[1].value.map( - (thought, thoughtIndex) => - thought.message && ( - onShowCitation(c, responseIndex)} - onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, responseIndex)} - onSupportingContentClicked={() => - onToggleTab(AnalysisPanelTabs.SupportingContentTab, responseIndex) - } - onFollowupQuestionClicked={q => makeApiRequest(q)} - showFollowupQuestions={useSuggestFollowupQuestions && responses.length - 1 === responseIndex} - showSpeechOutputAzure={showSpeechOutputAzure} - showSpeechOutputBrowser={showSpeechOutputBrowser} - /> - ) - )} - + answers.map((answer, index) => ( +
+ +
+ onShowCitation(c, index)} + onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)} + onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab, index)} + onFollowupQuestionClicked={q => makeApiRequest(q)} + showFollowupQuestions={useSuggestFollowupQuestions && answers.length - 1 === index} + showSpeechOutputAzure={showSpeechOutputAzure} + showSpeechOutputBrowser={showSpeechOutputBrowser} + /> +
))} - {isLoading == LoadingType.Generating && ( + {isLoading && ( <>
@@ -509,20 +473,20 @@ const Chat = () => { makeApiRequest(question)} showSpeechInput={showSpeechInput} />
- {responses.length > 0 && activeAnalysisPanelTab && ( + {answers.length > 0 && activeAnalysisPanelTab && ( onToggleTab(x, selectedResponse)} + onActiveTabChanged={x => onToggleTab(x, selectedAnswer)} citationHeight="810px" - response={responses[selectedResponse][1]} + answer={answers[selectedAnswer][1]} activeTab={activeAnalysisPanelTab} /> )} @@ -535,7 +499,7 @@ const Chat = () => { onClose={() => setIsHistoryPanelOpen(false)} onChatSelected={answers => { if (answers.length === 0) return; - setResponses(answers); + setAnswers(answers); lastQuestionRef.current = answers[answers.length - 1][0]; }} /> From 9688143193be60fd4a490d9e4636e322d1f97906 Mon Sep 17 00:00:00 2001 From: Matt Gotteiner Date: Thu, 17 Apr 2025 22:26:51 -0700 Subject: [PATCH 03/10] update --- app/backend/approaches/approach.py | 18 +-- app/backend/approaches/chatapproach.py | 117 ++++++++++-------- .../approaches/chatreadretrieveread.py | 6 +- .../approaches/chatreadretrievereadvision.py | 6 +- app/backend/approaches/retrievethenread.py | 73 +++++------ .../approaches/retrievethenreadvision.py | 76 +++++------- app/frontend/src/pages/chat/Chat.tsx | 17 +-- 7 files changed, 154 insertions(+), 159 deletions(-) diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py index 3d3ed6edca..050a9381c4 100644 --- a/app/backend/approaches/approach.py +++ b/app/backend/approaches/approach.py @@ -88,22 +88,26 @@ def trim_embedding(cls, embedding: Optional[list[float]]) -> Optional[str]: return None -@dataclass -class DataPoints: - text: Optional[list[str]] = None - images: Optional[list] = None - @dataclass class ThoughtStep: title: str description: Optional[Any] props: Optional[dict[str, Any]] = None - data_points: Optional[DataPoints] = None - def update_token_usage(self, usage: CompletionUsage) -> None: if self.props: self.props["token_usage"] = TokenUsageProps.from_completion_usage(usage) +@dataclass +class DataPoints: + text: Optional[list[str]] = None + images: Optional[list] = None + + +@dataclass +class ExtraInfo: + data_points: DataPoints + thoughts: Optional[list[ThoughtStep]] = None + followup_questions: Optional[list[Any]] = None @dataclass class TokenUsageProps: diff --git a/app/backend/approaches/chatapproach.py b/app/backend/approaches/chatapproach.py index 1559c20a5a..1b50964afd 100644 --- a/app/backend/approaches/chatapproach.py +++ b/app/backend/approaches/chatapproach.py @@ -13,14 +13,23 @@ from approaches.approach import ( Approach, + DataPoints, + ExtraInfo, ThoughtStep ) class StreamingThoughtStep: - def __init__(self, step: ThoughtStep, chat_completion: Optional[Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]] = None, role: Optional[str] = "assistant"): + def __init__( + self, + step: ThoughtStep, + chat_completion: Optional[Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]] = None, + role: Optional[str] = "assistant", + data_points: Optional[DataPoints] = None): + self.step = step self.chat_completion = chat_completion self.role = role + self.data_points = data_points self._stream = None self._is_streaming = None @@ -35,7 +44,7 @@ async def start(self): self._stream = await self.chat_completion self._is_streaming = True - async def __anext__(self) -> Union[ChatCompletion, ChatCompletionChunk, ThoughtStep]: + async def __anext__(self) -> Union[ChatCompletion, ChatCompletionChunk, DataPoints, ThoughtStep]: if self._is_streaming: # Streaming Implementation: yield each chunk, then the step with token usage if self._stream is None: @@ -54,9 +63,14 @@ async def __anext__(self) -> Union[ChatCompletion, ChatCompletionChunk, ThoughtS # Non-Streaming Implementation: return the entire response, then the step with token usage if self._stream is None: - if self.step is None: + if self.step is None and self.data_points is None: raise StopAsyncIteration + if self.data_points is not None: + result = self.data_points + self.data_points = None + return result + result = self.step self.step = None return result @@ -93,7 +107,7 @@ def get_search_query(self, chat_completion: ChatCompletion, user_query: str): return query_text return user_query - def extract_followup_questions(self, content: Optional[str]) -> Optional[List[str]]: + def extract_followup_questions(self, content: Optional[str]): if content is None: return content, [] return content.split("<<")[0], re.findall(r"<<([^>>]+)>>", content) @@ -104,32 +118,32 @@ async def run_without_streaming( overrides: dict[str, Any], auth_claims: dict[str, Any], session_state: Any = None, - ) -> AsyncGenerator[dict[str, Any], None]: + ) -> dict[str, Any]: thoughts = self.run_until_final_call( messages, overrides, auth_claims, should_stream=False ) + content = None + role = None + extra_info = ExtraInfo() async for thought in thoughts: - content = None - role = None - thought_step = None - followup_questions = None await thought.start() async for chunk in thought: if isinstance(chunk, ChatCompletion): content = chunk.choices[0].message.content role = chunk.choices[0].message.role elif isinstance(chunk, ThoughtStep): - thought_step = chunk - - if overrides.get("suggest_followup_questions"): - content, followup_questions = self.extract_followup_questions(content) - followup_questions = followup_questions - - yield { - "message": {"content": content, "role": role}, - "context": { "thought": thought_step, "followup_questions": followup_questions }, - "session_state": session_state, - } + extra_info.thoughts.append(chunk) + elif isinstance(chunk, DataPoints): + extra_info.data_points = chunk + + if overrides.get("suggest_followup_questions"): + content, followup_questions = self.extract_followup_questions(content) + followup_questions = followup_questions + return { + "message": {"content": content, "role": role}, + "context": extra_info, + "session_state": session_state, + } async def run_with_streaming( self, @@ -137,47 +151,48 @@ async def run_with_streaming( overrides: dict[str, Any], auth_claims: dict[str, Any], session_state: Any = None, - ) -> AsyncGenerator[dict[str, Any], None]: + ) -> AsyncGenerator[dict, None]: thoughts = self.run_until_final_call( messages, overrides, auth_claims, should_stream=True ) - async for thought in thoughts: - yield { "delta": { "role": thought.role }, "has_content": thought.has_content() } + extra_info = ExtraInfo() - followup_questions_started = False - followup_content = "" - thought_step = None + yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state} + followup_questions_started = False + followup_content = "" + async for thought in thoughts: await thought.start() - async for event in thought: - if isinstance(event, ChatCompletionChunk): - if event.choices: - completion = { - "delta": { - "content": event.choices[0].delta.content, - "role": event.choices[0].delta.role - } - } + async for chunk in thought: + if isinstance(chunk, ChatCompletionChunk): + content = chunk.choices[0].delta.content + role = chunk.choices[0].delta.role + content = content or "" # content may either not exist in delta, or explicitly be None + completion = { "delta": {"content": content, "role": role} } + if overrides.get("suggest_followup_questions") and "<<" in content: # if event contains << and not >>, it is start of follow-up question, truncate - content = completion["delta"].get("content") - content = content or "" # content may either not exist in delta, or explicitly be None - if overrides.get("suggest_followup_questions") and "<<" in content: - followup_questions_started = True - earlier_content = content[: content.index("<<")] - if earlier_content: - completion["delta"]["content"] = earlier_content - yield completion - followup_content += content[content.index("<<") :] - elif followup_questions_started: - followup_content += content - else: + followup_questions_started = True + earlier_content = content[: content.index("<<")] + if earlier_content: + completion["delta"]["content"] = earlier_content yield completion - elif isinstance(event, ThoughtStep): - thought_step = event + followup_content += content[content.index("<<") :] + elif followup_questions_started: + followup_content += content + else: + yield completion + elif isinstance(chunk, ThoughtStep): + extra_info.thoughts.append(chunk) + yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state} + elif isinstance(chunk, DataPoints): + extra_info.data_points = chunk + yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state} - followup_questions = None if followup_content: _, followup_questions = self.extract_followup_questions(followup_content) - yield {"delta": {"role": thought.role, "finish_reason": "stop" }, "context": { "thought": thought_step, "followup_questions": followup_questions }, "session_state": session_state } + yield { + "delta": {"role": "assistant"}, + "context": {"context": extra_info, "followup_questions": followup_questions}, + } async def run( self, diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py index 9f25c57a36..529eec120f 100644 --- a/app/backend/approaches/chatreadretrieveread.py +++ b/app/backend/approaches/chatreadretrieveread.py @@ -190,8 +190,7 @@ async def run_until_final_call( overrides=overrides, model=self.chatgpt_model, deployment=self.chatgpt_deployment, - usage=None, - data_points=DataPoints(text=text_sources) + usage=None ), chat_completion=self.create_chat_completion( self.chatgpt_deployment, @@ -200,5 +199,6 @@ async def run_until_final_call( overrides, self.get_response_token_limit(self.chatgpt_model, 1024), should_stream, - ) + ), + data_points=DataPoints(text=text_sources) ) diff --git a/app/backend/approaches/chatreadretrievereadvision.py b/app/backend/approaches/chatreadretrievereadvision.py index 1c6a360ed8..517b84c708 100644 --- a/app/backend/approaches/chatreadretrievereadvision.py +++ b/app/backend/approaches/chatreadretrievereadvision.py @@ -196,8 +196,7 @@ async def run_until_final_call( {"model": self.gpt4v_model, "deployment": self.gpt4v_deployment} if self.gpt4v_deployment else {"model": self.gpt4v_model} - ), - data_points=DataPoints(text=text_sources, images=image_sources) + ) ), chat_completion=self.openai_client.chat.completions.create( model=self.gpt4v_deployment if self.gpt4v_deployment else self.gpt4v_model, @@ -207,5 +206,6 @@ async def run_until_final_call( n=1, stream=should_stream, seed=seed, - ) + ), + data_points=DataPoints(text=text_sources, images=image_sources) ) diff --git a/app/backend/approaches/retrievethenread.py b/app/backend/approaches/retrievethenread.py index 8eb74a69d4..8bdbb9785e 100644 --- a/app/backend/approaches/retrievethenread.py +++ b/app/backend/approaches/retrievethenread.py @@ -1,11 +1,11 @@ -from typing import Any, Optional, cast, AsyncGenerator +from typing import Any, Optional, cast from azure.search.documents.aio import SearchClient from azure.search.documents.models import VectorQuery from openai import AsyncOpenAI from openai.types.chat import ChatCompletion, ChatCompletionMessageParam -from approaches.approach import Approach, DataPoints, ThoughtStep +from approaches.approach import Approach, DataPoints, ExtraInfo, ThoughtStep from approaches.promptmanager import PromptManager from core.authentication import AuthenticationHelper @@ -58,7 +58,7 @@ async def run( messages: list[ChatCompletionMessageParam], session_state: Any = None, context: dict[str, Any] = {}, - ) -> AsyncGenerator[dict[str, Any], None]: + ) -> dict[str, Any]: q = messages[-1]["content"] if not isinstance(q, str): raise ValueError("The most recent message content must be a string.") @@ -73,26 +73,6 @@ async def run( minimum_search_score = overrides.get("minimum_search_score", 0.0) minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0) filter = self.build_filter(overrides, auth_claims) - - yield { - "context": { - "thought": ThoughtStep( - "Search using user query", - q, - { - "use_semantic_captions": use_semantic_captions, - "use_semantic_ranker": use_semantic_ranker, - "use_query_rewriting": use_query_rewriting, - "top": top, - "filter": filter, - "use_vector_search": use_vector_search, - "use_text_search": use_text_search, - }, - ) - }, - "session_state": session_state, - } - # If retrieval mode includes vectors, compute an embedding for the query vectors: list[VectorQuery] = [] @@ -121,16 +101,6 @@ async def run( | {"user_query": q, "text_sources": text_sources}, ) - yield { - "context": { - "thought": ThoughtStep( - "Search results", - [result.serialize_for_results() for result in results], - ) - }, - "session_state": session_state - } - chat_completion = cast( ChatCompletion, await self.create_chat_completion( @@ -142,21 +112,42 @@ async def run( ), ) - yield { - "message": { - "content": chat_completion.choices[0].message.content, - "role": chat_completion.choices[0].message.role, - }, - "context": { - "thought": self.format_thought_step_for_chatcompletion( + extra_info = ExtraInfo( + DataPoints(text=text_sources), + thoughts=[ + ThoughtStep( + "Search using user query", + q, + { + "use_semantic_captions": use_semantic_captions, + "use_semantic_ranker": use_semantic_ranker, + "use_query_rewriting": use_query_rewriting, + "top": top, + "filter": filter, + "use_vector_search": use_vector_search, + "use_text_search": use_text_search, + }, + ), + ThoughtStep( + "Search results", + [result.serialize_for_results() for result in results], + ), + self.format_thought_step_for_chatcompletion( title="Prompt to generate answer", messages=messages, overrides=overrides, model=self.chatgpt_model, deployment=self.chatgpt_deployment, usage=chat_completion.usage, - data_points=DataPoints(text=text_sources) ), + ], + ) + + return { + "message": { + "content": chat_completion.choices[0].message.content, + "role": chat_completion.choices[0].message.role, }, + "context": extra_info, "session_state": session_state, } diff --git a/app/backend/approaches/retrievethenreadvision.py b/app/backend/approaches/retrievethenreadvision.py index af92a7a509..a556fd8b6c 100644 --- a/app/backend/approaches/retrievethenreadvision.py +++ b/app/backend/approaches/retrievethenreadvision.py @@ -1,5 +1,5 @@ from collections.abc import Awaitable -from typing import Any, Callable, Optional, AsyncGenerator +from typing import Any, Callable, Optional from azure.search.documents.aio import SearchClient from azure.storage.blob.aio import ContainerClient @@ -8,7 +8,7 @@ ChatCompletionMessageParam, ) -from approaches.approach import Approach, DataPoints, ThoughtStep +from approaches.approach import Approach, DataPoints, ExtraInfo, ThoughtStep from approaches.promptmanager import PromptManager from core.authentication import AuthenticationHelper from core.imageshelper import fetch_image @@ -66,7 +66,7 @@ async def run( messages: list[ChatCompletionMessageParam], session_state: Any = None, context: dict[str, Any] = {}, - ) -> AsyncGenerator[dict[str, Any], None]: + ) -> dict[str, Any]: q = messages[-1]["content"] if not isinstance(q, str): raise ValueError("The most recent message content must be a string.") @@ -87,26 +87,6 @@ async def run( vector_fields = overrides.get("vector_fields", ["embedding"]) send_text_to_gptvision = overrides.get("gpt4v_input") in ["textAndImages", "texts", None] send_images_to_gptvision = overrides.get("gpt4v_input") in ["textAndImages", "images", None] - - yield { - "context": { - "thought": ThoughtStep( - "Search using user query", - q, - { - "use_semantic_captions": use_semantic_captions, - "use_semantic_ranker": use_semantic_ranker, - "use_query_rewriting": use_query_rewriting, - "top": top, - "filter": filter, - "vector_fields": vector_fields, - "use_vector_search": use_vector_search, - "use_text_search": use_text_search, - }, - ), - }, - "session_state": session_state, - } # If retrieval mode includes vectors, compute an embedding for the query vectors = [] @@ -133,16 +113,6 @@ async def run( use_query_rewriting, ) - yield { - "context": { - "thought": ThoughtStep( - "Search results", - [result.serialize_for_results() for result in results], - ) - }, - "session_state": session_state - } - # Process results text_sources = [] image_sources = [] @@ -169,13 +139,28 @@ async def run( seed=seed, ) - yield { - "message": { - "content": chat_completion.choices[0].message.content, - "role": chat_completion.choices[0].message.role, - }, - "context": { - "thought": ThoughtStep( + extra_info = ExtraInfo( + DataPoints(text=text_sources, images=image_sources), + [ + ThoughtStep( + "Search using user query", + q, + { + "use_semantic_captions": use_semantic_captions, + "use_semantic_ranker": use_semantic_ranker, + "use_query_rewriting": use_query_rewriting, + "top": top, + "filter": filter, + "vector_fields": vector_fields, + "use_vector_search": use_vector_search, + "use_text_search": use_text_search, + }, + ), + ThoughtStep( + "Search results", + [result.serialize_for_results() for result in results], + ), + ThoughtStep( "Prompt to generate answer", messages, ( @@ -183,8 +168,15 @@ async def run( if self.gpt4v_deployment else {"model": self.gpt4v_model} ), - data_points=DataPoints(text=text_sources, images=image_sources), - ) + ), + ], + ) + + return { + "message": { + "content": chat_completion.choices[0].message.content, + "role": chat_completion.choices[0].message.role, }, + "context": extra_info, "session_state": session_state, } diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 5d00c2c914..6b8a710d29 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -133,13 +133,13 @@ const Chat = () => { let answer: string = ""; let askResponse: ChatAppResponse = {} as ChatAppResponse; - const updateState = (newContent: string) => { + const updateState = (newContent: string, role: string) => { return new Promise(resolve => { setTimeout(() => { answer += newContent; const latestResponse: ChatAppResponse = { ...askResponse, - message: { content: answer, role: askResponse.message.role } + message: { content: answer, role: role } }; setStreamedAnswers([...answers, [question, latestResponse]]); resolve(null); @@ -149,12 +149,9 @@ const Chat = () => { try { setIsStreaming(true); for await (const event of readNDJSONStream(responseBody)) { - if (event["context"] && event["context"]["data_points"]) { - event["message"] = event["delta"]; - askResponse = event as ChatAppResponse; - } else if (event["delta"] && event["delta"]["content"]) { + if (event["delta"] && event["delta"]["content"]) { setIsLoading(false); - await updateState(event["delta"]["content"]); + await updateState(event["delta"]["content"], event["delta"]["role"]); } else if (event["context"]) { // Update context with new keys from latest event askResponse.context = { ...askResponse.context, ...event["context"] }; @@ -165,11 +162,7 @@ const Chat = () => { } finally { setIsStreaming(false); } - const fullResponse: ChatAppResponse = { - ...askResponse, - message: { content: answer, role: askResponse.message.role } - }; - return fullResponse; + return askResponse; }; const client = useLogin ? useMsal().instance : undefined; From fcf959e30dc57d3578cd542cb3f08e3b1b63094a Mon Sep 17 00:00:00 2001 From: Matt Gotteiner Date: Thu, 17 Apr 2025 23:00:07 -0700 Subject: [PATCH 04/10] fix streaming --- app/backend/approaches/approach.py | 11 ++- app/backend/approaches/chatapproach.py | 92 +++++++++++++------------- app/frontend/src/pages/chat/Chat.tsx | 7 +- 3 files changed, 52 insertions(+), 58 deletions(-) diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py index 050a9381c4..fdafc19e54 100644 --- a/app/backend/approaches/approach.py +++ b/app/backend/approaches/approach.py @@ -1,7 +1,7 @@ import os from abc import ABC from collections.abc import AsyncGenerator, Awaitable -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import ( Any, Callable, @@ -105,8 +105,8 @@ class DataPoints: @dataclass class ExtraInfo: - data_points: DataPoints - thoughts: Optional[list[ThoughtStep]] = None + data_points: DataPoints = None + thoughts: list[ThoughtStep] = field(default_factory=list) followup_questions: Optional[list[Any]] = None @dataclass @@ -398,8 +398,7 @@ def format_thought_step_for_chatcompletion( model: str, deployment: Optional[str], usage: Optional[CompletionUsage] = None, - reasoning_effort: Optional[ChatCompletionReasoningEffort] = None, - data_points: Optional[DataPoints] = None, + reasoning_effort: Optional[ChatCompletionReasoningEffort] = None ) -> ThoughtStep: properties: dict[str, Any] = {"model": model} if deployment: @@ -411,7 +410,7 @@ def format_thought_step_for_chatcompletion( ) if usage: properties["token_usage"] = TokenUsageProps.from_completion_usage(usage) - return ThoughtStep(title, messages, properties, data_points) + return ThoughtStep(title, messages, properties) async def run( self, diff --git a/app/backend/approaches/chatapproach.py b/app/backend/approaches/chatapproach.py index 1b50964afd..51261d6c77 100644 --- a/app/backend/approaches/chatapproach.py +++ b/app/backend/approaches/chatapproach.py @@ -32,9 +32,6 @@ def __init__( self.data_points = data_points self._stream = None self._is_streaming = None - - def has_content(self) -> bool: - return self.chat_completion is not None def __aiter__(self): return self @@ -45,39 +42,38 @@ async def start(self): self._is_streaming = True async def __anext__(self) -> Union[ChatCompletion, ChatCompletionChunk, DataPoints, ThoughtStep]: - if self._is_streaming: - # Streaming Implementation: yield each chunk, then the step with token usage - if self._stream is None: - raise StopAsyncIteration - - try: - # Get the next chunk from the async stream - chunk = await self._stream.__anext__() - if len(chunk.choices) == 0 and chunk.usage: - self.step.update_token_usage(chunk.usage) - return chunk - except StopAsyncIteration: - # If the stream is exhausted, yield the step with token usage - self._stream = None - return self.step + # If there are data points, return them first to render citations + if self.data_points is not None: + result = self.data_points + self.data_points = None + return result - # Non-Streaming Implementation: return the entire response, then the step with token usage - if self._stream is None: - if self.step is None and self.data_points is None: - raise StopAsyncIteration - - if self.data_points is not None: - result = self.data_points - self.data_points = None - return result - + if self._stream is not None: + if self._is_streaming: + try: + # Get the next chunk from the async stream + chunk = await self._stream.__anext__() + if len(chunk.choices) == 0 and chunk.usage: + self.step.update_token_usage(chunk.usage) + return chunk + except StopAsyncIteration: + # If the stream is exhausted, yield the step with token usage + self._stream = None + return self.step + + # Non-Streaming Implementation: return the entire response, then the step with token usage + result = self._stream + self._stream = None + return result + + if self.step is not None: result = self.step self.step = None return result + + # No more items to yield + raise StopAsyncIteration - result = self._stream - self._stream = None - return result class ChatApproach(Approach, ABC): @@ -164,22 +160,23 @@ async def run_with_streaming( await thought.start() async for chunk in thought: if isinstance(chunk, ChatCompletionChunk): - content = chunk.choices[0].delta.content - role = chunk.choices[0].delta.role - content = content or "" # content may either not exist in delta, or explicitly be None - completion = { "delta": {"content": content, "role": role} } - if overrides.get("suggest_followup_questions") and "<<" in content: - # if event contains << and not >>, it is start of follow-up question, truncate - followup_questions_started = True - earlier_content = content[: content.index("<<")] - if earlier_content: - completion["delta"]["content"] = earlier_content + if chunk.choices: + content = chunk.choices[0].delta.content + role = chunk.choices[0].delta.role + content = content or "" # content may either not exist in delta, or explicitly be None + completion = { "delta": {"content": content, "role": role} } + if overrides.get("suggest_followup_questions") and "<<" in content: + # if event contains << and not >>, it is start of follow-up question, truncate + followup_questions_started = True + earlier_content = content[: content.index("<<")] + if earlier_content: + completion["delta"]["content"] = earlier_content + yield completion + followup_content += content[content.index("<<") :] + elif followup_questions_started: + followup_content += content + else: yield completion - followup_content += content[content.index("<<") :] - elif followup_questions_started: - followup_content += content - else: - yield completion elif isinstance(chunk, ThoughtStep): extra_info.thoughts.append(chunk) yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state} @@ -189,9 +186,10 @@ async def run_with_streaming( if followup_content: _, followup_questions = self.extract_followup_questions(followup_content) + extra_info.followup_questions = followup_questions yield { "delta": {"role": "assistant"}, - "context": {"context": extra_info, "followup_questions": followup_questions}, + "context": extra_info, } async def run( diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 6b8a710d29..7ab010f337 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -137,11 +137,8 @@ const Chat = () => { return new Promise(resolve => { setTimeout(() => { answer += newContent; - const latestResponse: ChatAppResponse = { - ...askResponse, - message: { content: answer, role: role } - }; - setStreamedAnswers([...answers, [question, latestResponse]]); + askResponse.message = { content: answer, role: role }; + setStreamedAnswers([...answers, [question, { ...askResponse }]]); resolve(null); }, 33); }); From 1a5c7382aeaaa2c1610e2b515eb577552a9ef023 Mon Sep 17 00:00:00 2001 From: Matt Gotteiner Date: Thu, 17 Apr 2025 23:14:15 -0700 Subject: [PATCH 05/10] initial reflection prompt --- .../approaches/chatreadretrievereflectread.py | 206 ++++++++++++++++++ .../prompts/chat_reflect_answer.prompty | 65 ++++++ 2 files changed, 271 insertions(+) create mode 100644 app/backend/approaches/chatreadretrievereflectread.py create mode 100644 app/backend/approaches/prompts/chat_reflect_answer.prompty diff --git a/app/backend/approaches/chatreadretrievereflectread.py b/app/backend/approaches/chatreadretrievereflectread.py new file mode 100644 index 0000000000..647cf3cdd1 --- /dev/null +++ b/app/backend/approaches/chatreadretrievereflectread.py @@ -0,0 +1,206 @@ +from collections.abc import Awaitable +from typing import Any, Optional, Union, cast, AsyncGenerator + +from azure.search.documents.aio import SearchClient +from azure.search.documents.models import VectorQuery +from openai import AsyncOpenAI, AsyncStream +from openai.types.chat import ( + ChatCompletion, + ChatCompletionChunk, + ChatCompletionMessageParam, + ChatCompletionToolParam, +) + +from approaches.approach import DataPoints, ThoughtStep +from approaches.chatapproach import ChatApproach, StreamingThoughtStep +from approaches.promptmanager import PromptManager +from core.authentication import AuthenticationHelper + + +class ChatReadRetrieveReflectReadApproach(ChatApproach): + """ + A multi-step approach that first uses OpenAI to turn the user's question into a search query, + then uses Azure AI Search to retrieve relevant documents, and then sends the conversation history, + original user question, and search results to OpenAI to generate a response. + """ + + def __init__( + self, + *, + search_client: SearchClient, + auth_helper: AuthenticationHelper, + openai_client: AsyncOpenAI, + chatgpt_model: str, + chatgpt_deployment: Optional[str], # Not needed for non-Azure OpenAI + embedding_deployment: Optional[str], # Not needed for non-Azure OpenAI or for retrieval_mode="text" + embedding_model: str, + embedding_dimensions: int, + sourcepage_field: str, + content_field: str, + query_language: str, + query_speller: str, + prompt_manager: PromptManager, + reasoning_effort: Optional[str] = None, + max_steps: Optional[int] = None, + ): + self.search_client = search_client + self.openai_client = openai_client + self.auth_helper = auth_helper + self.chatgpt_model = chatgpt_model + self.chatgpt_deployment = chatgpt_deployment + self.embedding_deployment = embedding_deployment + self.embedding_model = embedding_model + self.embedding_dimensions = embedding_dimensions + self.sourcepage_field = sourcepage_field + self.content_field = content_field + self.query_language = query_language + self.query_speller = query_speller + self.prompt_manager = prompt_manager + self.query_rewrite_prompt = self.prompt_manager.load_prompt("chat_query_rewrite.prompty") + self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json") + self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty") + self.reasoning_effort = reasoning_effort + self.include_token_usage = True + self.max_steps = max_steps or 3 + + async def run_until_final_call( + self, + messages: list[ChatCompletionMessageParam], + overrides: dict[str, Any], + auth_claims: dict[str, Any], + should_stream: bool = False, + ) -> AsyncGenerator[StreamingThoughtStep, None]: + use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None] + use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None] + use_semantic_ranker = True if overrides.get("semantic_ranker") else False + use_semantic_captions = True if overrides.get("semantic_captions") else False + use_query_rewriting = True if overrides.get("query_rewriting") else False + top = overrides.get("top", 3) + minimum_search_score = overrides.get("minimum_search_score", 0.0) + minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0) + filter = self.build_filter(overrides, auth_claims) + + original_user_query = messages[-1]["content"] + if not isinstance(original_user_query, str): + raise ValueError("The most recent message content must be a string.") + + reasoning_model_support = self.GPT_REASONING_MODELS.get(self.chatgpt_model) + if reasoning_model_support and (not reasoning_model_support.streaming and should_stream): + raise Exception( + f"{self.chatgpt_model} does not support streaming. Please use a different model or disable streaming." + ) + + # STEP 1: Generate an optimized keyword search query based on the chat history and the last question + query_messages = self.prompt_manager.render_prompt( + self.query_rewrite_prompt, {"user_query": original_user_query, "past_messages": messages[:-1]} + ) + tools: list[ChatCompletionToolParam] = self.query_rewrite_tools + + chat_completion = cast( + ChatCompletion, + await self.create_chat_completion( + self.chatgpt_deployment, + self.chatgpt_model, + messages=query_messages, + overrides=overrides, + response_token_limit=self.get_response_token_limit( + self.chatgpt_model, 100 + ), # Setting too low risks malformed JSON, setting too high may affect performance + temperature=0.0, # Minimize creativity for search query generation + tools=tools, + reasoning_effort="low", # Minimize reasoning for search query generation + ), + ) + + yield StreamingThoughtStep( + step=self.format_thought_step_for_chatcompletion( + title="Prompt to generate search query", + messages=query_messages, + overrides=overrides, + model=self.chatgpt_model, + deployment=self.chatgpt_deployment, + usage=chat_completion.usage, + reasoning_effort="low", + ), + role="tool" + ) + + query_text = self.get_search_query(chat_completion, original_user_query) + + # STEP 2: Retrieve relevant documents from the search index with the GPT optimized query + yield StreamingThoughtStep( + step=ThoughtStep( + "Search using generated search query", + query_text, + { + "use_semantic_captions": use_semantic_captions, + "use_semantic_ranker": use_semantic_ranker, + "use_query_rewriting": use_query_rewriting, + "top": top, + "filter": filter, + "use_vector_search": use_vector_search, + "use_text_search": use_text_search, + }, + ), + role="tool" + ) + + # If retrieval mode includes vectors, compute an embedding for the query + vectors: list[VectorQuery] = [] + if use_vector_search: + vectors.append(await self.compute_text_embedding(query_text)) + + results = await self.search( + top, + query_text, + filter, + vectors, + use_text_search, + use_vector_search, + use_semantic_ranker, + use_semantic_captions, + minimum_search_score, + minimum_reranker_score, + use_query_rewriting, + ) + + yield StreamingThoughtStep( + step=ThoughtStep( + "Search results", + [result.serialize_for_results() for result in results], + ), + role="tool" + ) + + # STEP 3: Generate a contextual and content specific answer using the search results and chat history + text_sources = self.get_sources_content(results, use_semantic_captions, use_image_citation=False) + messages = self.prompt_manager.render_prompt( + self.answer_prompt, + self.get_system_prompt_variables(overrides.get("prompt_template")) + | { + "include_follow_up_questions": bool(overrides.get("suggest_followup_questions")), + "past_messages": messages[:-1], + "user_query": original_user_query, + "text_sources": text_sources, + }, + ) + + yield StreamingThoughtStep( + step=self.format_thought_step_for_chatcompletion( + title="Prompt to generate answer", + messages=messages, + overrides=overrides, + model=self.chatgpt_model, + deployment=self.chatgpt_deployment, + usage=None + ), + chat_completion=self.create_chat_completion( + self.chatgpt_deployment, + self.chatgpt_model, + messages, + overrides, + self.get_response_token_limit(self.chatgpt_model, 1024), + should_stream, + ), + data_points=DataPoints(text=text_sources) + ) diff --git a/app/backend/approaches/prompts/chat_reflect_answer.prompty b/app/backend/approaches/prompts/chat_reflect_answer.prompty new file mode 100644 index 0000000000..d5149ceb2f --- /dev/null +++ b/app/backend/approaches/prompts/chat_reflect_answer.prompty @@ -0,0 +1,65 @@ +--- +name: Chat +description: Answer a question (with chat history) using solely text sources. +model: + api: chat +--- +system: +SYSTEM: +You are an expert in evaluating the quality of a RESPONSE from an intelligent system based on three communication traits: Relevance, Groundedness, and Correctness. Your job is to assign each trait a score from 1 to 5 using the definitions below. + +USER: +# Definitions + +## Relevance +1 - Irrelevant Response: Unrelated to the question. +2 - Incorrect Response: Attempts to answer but gives wrong info. +3 - Incomplete Response: Addresses the question but omits key details. +4 - Complete Response: Fully addresses the question with accurate, essential details. +5 - Comprehensive Response with Insights: Fully accurate and adds relevant insights or implications. + +## Groundedness +1 - Completely Unrelated Response: No relation to context or question. +2 - Related Topic but Does Not Respond: Mentions context topic but fails to answer. +3 - Attempts to Respond but Contains Incorrect Info: Tries to answer but misstates facts. +4 - Partially Correct Response: Correct but omits specific context details. +5 - Fully Correct and Complete Response: Thoroughly accurate and includes all context details. + +## Correctness +1 - Completely Incorrect: Contains no correct or relevant facts. +2 - Mostly Incorrect: Major factual or logical errors overshadow any correct parts. +3 - Partially Correct: Some facts are right but others are wrong or misleading. +4 - Mostly Correct: Largely accurate with only minor inaccuracies. +5 - Fully Correct: Entirely accurate, fact-based, and logically consistent. + +# Data +CONTEXT: {{context}} +QUERY: {{query}} +RESPONSE: {{response}} + +# Tasks +For each trait—Relevance, Groundedness, Correctness—produce: +• ThoughtChain: start with “Let's think step by step:” and give a concise chain of reasoning. +• Explanation: a very short justification. +• Score: an integer from 1 to 5. + +Wrap your outputs in these tags: + +Relevance: +…ThoughtChain… +…Explanation… +…Score… + +Groundedness: +…ThoughtChain… +…Explanation… +…Score… + +Correctness: +…ThoughtChain… +…Explanation… +…Score… + +Finally, based on your reflections, propose a new query to search a knowledge base for any potentially missing context. Wrap it in: + +…your proposed search query… From 885da6d8caaeddc058890480fd58e8eb6459b760 Mon Sep 17 00:00:00 2001 From: Matt Gotteiner Date: Fri, 18 Apr 2025 13:38:49 -0700 Subject: [PATCH 06/10] remove double yield on stream --- app/backend/approaches/chatapproach.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/app/backend/approaches/chatapproach.py b/app/backend/approaches/chatapproach.py index 51261d6c77..33b0c82855 100644 --- a/app/backend/approaches/chatapproach.py +++ b/app/backend/approaches/chatapproach.py @@ -57,14 +57,13 @@ async def __anext__(self) -> Union[ChatCompletion, ChatCompletionChunk, DataPoin self.step.update_token_usage(chunk.usage) return chunk except StopAsyncIteration: - # If the stream is exhausted, yield the step with token usage + # Stream is exhausted self._stream = None - return self.step - - # Non-Streaming Implementation: return the entire response, then the step with token usage - result = self._stream - self._stream = None - return result + else: + # Non-Streaming Implementation: return the entire response, then the step with token usage + result = self._stream + self._stream = None + return result if self.step is not None: result = self.step From d7fbd4295076ff66bd080d75e95bbeb59fdfafb5 Mon Sep 17 00:00:00 2001 From: Matt Gotteiner Date: Mon, 21 Apr 2025 23:20:49 -0700 Subject: [PATCH 07/10] WIP --- app/backend/approaches/approach.py | 5 +- app/backend/approaches/chatapproach.py | 89 +++++++- .../approaches/chatreadretrieveread.py | 69 +++++- .../approaches/chatreadretrievereadvision.py | 3 +- .../approaches/chatreadretrievereflectread.py | 206 ------------------ .../prompts/chat_reflect_answer.prompty | 51 ++--- .../prompts/chat_reflect_answer_tools.json | 111 ++++++++++ 7 files changed, 289 insertions(+), 245 deletions(-) delete mode 100644 app/backend/approaches/chatreadretrievereflectread.py create mode 100644 app/backend/approaches/prompts/chat_reflect_answer_tools.json diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py index fdafc19e54..74cb711a56 100644 --- a/app/backend/approaches/approach.py +++ b/app/backend/approaches/approach.py @@ -398,7 +398,8 @@ def format_thought_step_for_chatcompletion( model: str, deployment: Optional[str], usage: Optional[CompletionUsage] = None, - reasoning_effort: Optional[ChatCompletionReasoningEffort] = None + reasoning_effort: Optional[ChatCompletionReasoningEffort] = None, + additional_properties: Optional[dict[str, Any]] = None, ) -> ThoughtStep: properties: dict[str, Any] = {"model": model} if deployment: @@ -410,6 +411,8 @@ def format_thought_step_for_chatcompletion( ) if usage: properties["token_usage"] = TokenUsageProps.from_completion_usage(usage) + if additional_properties: + properties.update(additional_properties) return ThoughtStep(title, messages, properties) async def run( diff --git a/app/backend/approaches/chatapproach.py b/app/backend/approaches/chatapproach.py index 33b0c82855..163a6758b7 100644 --- a/app/backend/approaches/chatapproach.py +++ b/app/backend/approaches/chatapproach.py @@ -3,6 +3,7 @@ from abc import ABC, abstractmethod from collections.abc import AsyncGenerator, Awaitable from typing import Any, Optional, List, Union +from dataclasses import dataclass from openai import AsyncStream from openai.types.chat import ( @@ -24,37 +25,60 @@ def __init__( step: ThoughtStep, chat_completion: Optional[Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]] = None, role: Optional[str] = "assistant", - data_points: Optional[DataPoints] = None): + data_points: Optional[DataPoints] = None, + should_stream: bool = True): self.step = step self.chat_completion = chat_completion self.role = role self.data_points = data_points self._stream = None - self._is_streaming = None + self.should_stream = should_stream + self._steps = [] + self._step_i = -1 + self._completion = "" def __aiter__(self): return self async def start(self): - if self._stream is None and self.chat_completion is not None: + if self._step_i < 0 and self._stream is None and self.chat_completion is not None: self._stream = await self.chat_completion - self._is_streaming = True + + def rewind(self): + if not self._steps: + raise ValueError("Cannot rewind: no steps recorded.") + self._step_i = 0 + + def get_completion(self) -> Optional[str]: + return self._completion async def __anext__(self) -> Union[ChatCompletion, ChatCompletionChunk, DataPoints, ThoughtStep]: + if self._step_i >= 0: + if self._step_i < len(self._steps): + # Return the next step in the recorded steps + self._step_i += 1 + return self._steps[self._step_i - 1] + + raise StopAsyncIteration() + # If there are data points, return them first to render citations if self.data_points is not None: result = self.data_points self.data_points = None + self._steps.append(result) return result if self._stream is not None: - if self._is_streaming: + if self.should_stream: try: # Get the next chunk from the async stream chunk = await self._stream.__anext__() if len(chunk.choices) == 0 and chunk.usage: self.step.update_token_usage(chunk.usage) + elif len(chunk.choices) > 0 and chunk.choices[0].delta.content: + self._completion += chunk.choices[0].delta.content + self._steps.append(chunk) return chunk except StopAsyncIteration: # Stream is exhausted @@ -63,16 +87,32 @@ async def __anext__(self) -> Union[ChatCompletion, ChatCompletionChunk, DataPoin # Non-Streaming Implementation: return the entire response, then the step with token usage result = self._stream self._stream = None + self._completion = result.choices[0].message.content if result.choices else "" + self._steps.append(result) return result if self.step is not None: result = self.step self.step = None + self._steps.append(result) return result # No more items to yield raise StopAsyncIteration +@dataclass +class Reflection: + score: Optional[int] = None + thought_chain: Optional[str] = None + explanation: Optional[str] = None + +@dataclass +class ReflectionResponse: + relevance: Optional[Reflection] = None + groundedness: Optional[Reflection] = None + correctness: Optional[Reflection] = None + next_query: Optional[str] = None + next_answer: Optional[str] = None class ChatApproach(Approach, ABC): @@ -102,6 +142,45 @@ def get_search_query(self, chat_completion: ChatCompletion, user_query: str): return query_text return user_query + def get_reflection(self, chat_completion: ChatCompletion) -> Optional[ReflectionResponse]: + response_message = chat_completion.choices[0].message + reflection_response = ReflectionResponse() + + print(response_message.model_dump()) + if response_message.tool_calls: + for tool in response_message.tool_calls: + if tool.type != "function": + continue + function = tool.function + if function.name == "reflect_answer": + arg = json.loads(function.arguments) + if relevance_reflection := arg.get("relevance"): + reflection_response.relevance = Reflection( + score=relevance_reflection.get("score"), + thought_chain=relevance_reflection.get("thoughtChain"), + explanation=relevance_reflection.get("explanation") + ) + if groundedness_reflection := arg.get("groundedness"): + reflection_response.groundedness = Reflection( + score=groundedness_reflection.get("score"), + thought_chain=groundedness_reflection.get("thoughtChain"), + explanation=groundedness_reflection.get("explanation") + ) + if correctness_reflection := arg.get("correctness"): + reflection_response.correctness = Reflection( + score=correctness_reflection.get("score"), + thought_chain=correctness_reflection.get("thoughtChain"), + explanation=correctness_reflection.get("explanation") + ) + if function.name == "search_index": + arg = json.loads(function.arguments) + reflection_response.next_query = arg.get("query") + if function.name == "rewrite_answer": + arg = json.loads(function.arguments) + reflection_response.next_answer = arg.get("answer") + + return reflection_response + def extract_followup_questions(self, content: Optional[str]): if content is None: return content, [] diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py index 529eec120f..8dc64bf4d0 100644 --- a/app/backend/approaches/chatreadretrieveread.py +++ b/app/backend/approaches/chatreadretrieveread.py @@ -1,5 +1,6 @@ from collections.abc import Awaitable from typing import Any, Optional, Union, cast, AsyncGenerator +import asyncio from azure.search.documents.aio import SearchClient from azure.search.documents.models import VectorQuery @@ -15,6 +16,7 @@ from approaches.chatapproach import ChatApproach, StreamingThoughtStep from approaches.promptmanager import PromptManager from core.authentication import AuthenticationHelper +import dataclasses class ChatReadRetrieveReadApproach(ChatApproach): @@ -41,6 +43,7 @@ def __init__( query_speller: str, prompt_manager: PromptManager, reasoning_effort: Optional[str] = None, + reflection_max_steps: Optional[int] = None, ): self.search_client = search_client self.openai_client = openai_client @@ -58,8 +61,11 @@ def __init__( self.query_rewrite_prompt = self.prompt_manager.load_prompt("chat_query_rewrite.prompty") self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json") self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty") + self.reflect_prompt = self.prompt_manager.load_prompt("chat_reflect_answer.prompty") + self.reflect_tools = self.prompt_manager.load_tools("chat_reflect_answer_tools.json") self.reasoning_effort = reasoning_effort self.include_token_usage = True + self.reflection_max_steps = reflection_max_steps or 3 async def run_until_final_call( self, @@ -73,6 +79,8 @@ async def run_until_final_call( use_semantic_ranker = True if overrides.get("semantic_ranker") else False use_semantic_captions = True if overrides.get("semantic_captions") else False use_query_rewriting = True if overrides.get("query_rewriting") else False + use_reflection = True if overrides.get("reflection") else True + reflection_max_steps = overrides.get("reflection_max_steps", self.reflection_max_steps) top = overrides.get("top", 3) minimum_search_score = overrides.get("minimum_search_score", 0.0) minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0) @@ -183,14 +191,13 @@ async def run_until_final_call( }, ) - yield StreamingThoughtStep( + answer_step = StreamingThoughtStep( step=self.format_thought_step_for_chatcompletion( title="Prompt to generate answer", messages=messages, overrides=overrides, model=self.chatgpt_model, deployment=self.chatgpt_deployment, - usage=None ), chat_completion=self.create_chat_completion( self.chatgpt_deployment, @@ -200,5 +207,61 @@ async def run_until_final_call( self.get_response_token_limit(self.chatgpt_model, 1024), should_stream, ), - data_points=DataPoints(text=text_sources) + data_points=DataPoints(text=text_sources), + should_stream=should_stream ) + if not use_reflection: + yield answer_step + return + + # Step 4: Reflection loop to improve the answer + for i in range(reflection_max_steps): + answer_chunks = [] + + # Read the answer + await answer_step.start() + async for chunk in answer_step: + if isinstance(chunk, ThoughtStep): + answer_thought = chunk + + # STEP 5: Determine the next action to take + reflect_messages = self.prompt_manager.render_prompt( + self.reflect_prompt, {"text_sources": text_sources, "query": original_user_query, "response": answer_step.get_completion(), "past_messages": messages[:-1]} + ) + tools: list[ChatCompletionToolParam] = self.reflect_tools + + chat_completion = cast( + ChatCompletion, + await self.create_chat_completion( + self.chatgpt_deployment, + self.chatgpt_model, + messages=reflect_messages, + overrides=overrides, + response_token_limit=self.get_response_token_limit(self.chatgpt_model, 1024), + temperature=0.0, # Minimize creativity for reflection + tools=tools + ) + ) + reflection = self.get_reflection(chat_completion) + + yield StreamingThoughtStep( + step=self.format_thought_step_for_chatcompletion( + title="Prompt to reflect on answer", + messages=reflect_messages, + overrides=overrides, + model=self.chatgpt_model, + deployment=self.chatgpt_deployment, + usage=chat_completion.usage, + additional_properties=dataclasses.asdict(reflection) + ), + role="tool" + ) + + answer_step.rewind() + yield answer_step + return + + + + + diff --git a/app/backend/approaches/chatreadretrievereadvision.py b/app/backend/approaches/chatreadretrievereadvision.py index 517b84c708..3472bc6d3c 100644 --- a/app/backend/approaches/chatreadretrievereadvision.py +++ b/app/backend/approaches/chatreadretrievereadvision.py @@ -207,5 +207,6 @@ async def run_until_final_call( stream=should_stream, seed=seed, ), - data_points=DataPoints(text=text_sources, images=image_sources) + data_points=DataPoints(text=text_sources, images=image_sources), + should_stream=should_stream ) diff --git a/app/backend/approaches/chatreadretrievereflectread.py b/app/backend/approaches/chatreadretrievereflectread.py deleted file mode 100644 index 647cf3cdd1..0000000000 --- a/app/backend/approaches/chatreadretrievereflectread.py +++ /dev/null @@ -1,206 +0,0 @@ -from collections.abc import Awaitable -from typing import Any, Optional, Union, cast, AsyncGenerator - -from azure.search.documents.aio import SearchClient -from azure.search.documents.models import VectorQuery -from openai import AsyncOpenAI, AsyncStream -from openai.types.chat import ( - ChatCompletion, - ChatCompletionChunk, - ChatCompletionMessageParam, - ChatCompletionToolParam, -) - -from approaches.approach import DataPoints, ThoughtStep -from approaches.chatapproach import ChatApproach, StreamingThoughtStep -from approaches.promptmanager import PromptManager -from core.authentication import AuthenticationHelper - - -class ChatReadRetrieveReflectReadApproach(ChatApproach): - """ - A multi-step approach that first uses OpenAI to turn the user's question into a search query, - then uses Azure AI Search to retrieve relevant documents, and then sends the conversation history, - original user question, and search results to OpenAI to generate a response. - """ - - def __init__( - self, - *, - search_client: SearchClient, - auth_helper: AuthenticationHelper, - openai_client: AsyncOpenAI, - chatgpt_model: str, - chatgpt_deployment: Optional[str], # Not needed for non-Azure OpenAI - embedding_deployment: Optional[str], # Not needed for non-Azure OpenAI or for retrieval_mode="text" - embedding_model: str, - embedding_dimensions: int, - sourcepage_field: str, - content_field: str, - query_language: str, - query_speller: str, - prompt_manager: PromptManager, - reasoning_effort: Optional[str] = None, - max_steps: Optional[int] = None, - ): - self.search_client = search_client - self.openai_client = openai_client - self.auth_helper = auth_helper - self.chatgpt_model = chatgpt_model - self.chatgpt_deployment = chatgpt_deployment - self.embedding_deployment = embedding_deployment - self.embedding_model = embedding_model - self.embedding_dimensions = embedding_dimensions - self.sourcepage_field = sourcepage_field - self.content_field = content_field - self.query_language = query_language - self.query_speller = query_speller - self.prompt_manager = prompt_manager - self.query_rewrite_prompt = self.prompt_manager.load_prompt("chat_query_rewrite.prompty") - self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json") - self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty") - self.reasoning_effort = reasoning_effort - self.include_token_usage = True - self.max_steps = max_steps or 3 - - async def run_until_final_call( - self, - messages: list[ChatCompletionMessageParam], - overrides: dict[str, Any], - auth_claims: dict[str, Any], - should_stream: bool = False, - ) -> AsyncGenerator[StreamingThoughtStep, None]: - use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None] - use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None] - use_semantic_ranker = True if overrides.get("semantic_ranker") else False - use_semantic_captions = True if overrides.get("semantic_captions") else False - use_query_rewriting = True if overrides.get("query_rewriting") else False - top = overrides.get("top", 3) - minimum_search_score = overrides.get("minimum_search_score", 0.0) - minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0) - filter = self.build_filter(overrides, auth_claims) - - original_user_query = messages[-1]["content"] - if not isinstance(original_user_query, str): - raise ValueError("The most recent message content must be a string.") - - reasoning_model_support = self.GPT_REASONING_MODELS.get(self.chatgpt_model) - if reasoning_model_support and (not reasoning_model_support.streaming and should_stream): - raise Exception( - f"{self.chatgpt_model} does not support streaming. Please use a different model or disable streaming." - ) - - # STEP 1: Generate an optimized keyword search query based on the chat history and the last question - query_messages = self.prompt_manager.render_prompt( - self.query_rewrite_prompt, {"user_query": original_user_query, "past_messages": messages[:-1]} - ) - tools: list[ChatCompletionToolParam] = self.query_rewrite_tools - - chat_completion = cast( - ChatCompletion, - await self.create_chat_completion( - self.chatgpt_deployment, - self.chatgpt_model, - messages=query_messages, - overrides=overrides, - response_token_limit=self.get_response_token_limit( - self.chatgpt_model, 100 - ), # Setting too low risks malformed JSON, setting too high may affect performance - temperature=0.0, # Minimize creativity for search query generation - tools=tools, - reasoning_effort="low", # Minimize reasoning for search query generation - ), - ) - - yield StreamingThoughtStep( - step=self.format_thought_step_for_chatcompletion( - title="Prompt to generate search query", - messages=query_messages, - overrides=overrides, - model=self.chatgpt_model, - deployment=self.chatgpt_deployment, - usage=chat_completion.usage, - reasoning_effort="low", - ), - role="tool" - ) - - query_text = self.get_search_query(chat_completion, original_user_query) - - # STEP 2: Retrieve relevant documents from the search index with the GPT optimized query - yield StreamingThoughtStep( - step=ThoughtStep( - "Search using generated search query", - query_text, - { - "use_semantic_captions": use_semantic_captions, - "use_semantic_ranker": use_semantic_ranker, - "use_query_rewriting": use_query_rewriting, - "top": top, - "filter": filter, - "use_vector_search": use_vector_search, - "use_text_search": use_text_search, - }, - ), - role="tool" - ) - - # If retrieval mode includes vectors, compute an embedding for the query - vectors: list[VectorQuery] = [] - if use_vector_search: - vectors.append(await self.compute_text_embedding(query_text)) - - results = await self.search( - top, - query_text, - filter, - vectors, - use_text_search, - use_vector_search, - use_semantic_ranker, - use_semantic_captions, - minimum_search_score, - minimum_reranker_score, - use_query_rewriting, - ) - - yield StreamingThoughtStep( - step=ThoughtStep( - "Search results", - [result.serialize_for_results() for result in results], - ), - role="tool" - ) - - # STEP 3: Generate a contextual and content specific answer using the search results and chat history - text_sources = self.get_sources_content(results, use_semantic_captions, use_image_citation=False) - messages = self.prompt_manager.render_prompt( - self.answer_prompt, - self.get_system_prompt_variables(overrides.get("prompt_template")) - | { - "include_follow_up_questions": bool(overrides.get("suggest_followup_questions")), - "past_messages": messages[:-1], - "user_query": original_user_query, - "text_sources": text_sources, - }, - ) - - yield StreamingThoughtStep( - step=self.format_thought_step_for_chatcompletion( - title="Prompt to generate answer", - messages=messages, - overrides=overrides, - model=self.chatgpt_model, - deployment=self.chatgpt_deployment, - usage=None - ), - chat_completion=self.create_chat_completion( - self.chatgpt_deployment, - self.chatgpt_model, - messages, - overrides, - self.get_response_token_limit(self.chatgpt_model, 1024), - should_stream, - ), - data_points=DataPoints(text=text_sources) - ) diff --git a/app/backend/approaches/prompts/chat_reflect_answer.prompty b/app/backend/approaches/prompts/chat_reflect_answer.prompty index d5149ceb2f..f95175eb41 100644 --- a/app/backend/approaches/prompts/chat_reflect_answer.prompty +++ b/app/backend/approaches/prompts/chat_reflect_answer.prompty @@ -5,10 +5,8 @@ model: api: chat --- system: -SYSTEM: You are an expert in evaluating the quality of a RESPONSE from an intelligent system based on three communication traits: Relevance, Groundedness, and Correctness. Your job is to assign each trait a score from 1 to 5 using the definitions below. -USER: # Definitions ## Relevance @@ -32,34 +30,29 @@ USER: 4 - Mostly Correct: Largely accurate with only minor inaccuracies. 5 - Fully Correct: Entirely accurate, fact-based, and logically consistent. -# Data -CONTEXT: {{context}} -QUERY: {{query}} -RESPONSE: {{response}} - # Tasks For each trait—Relevance, Groundedness, Correctness—produce: • ThoughtChain: start with “Let's think step by step:” and give a concise chain of reasoning. • Explanation: a very short justification. -• Score: an integer from 1 to 5. - -Wrap your outputs in these tags: - -Relevance: -…ThoughtChain… -…Explanation… -…Score… - -Groundedness: -…ThoughtChain… -…Explanation… -…Score… - -Correctness: -…ThoughtChain… -…Explanation… -…Score… - -Finally, based on your reflections, propose a new query to search a knowledge base for any potentially missing context. Wrap it in: - -…your proposed search query… +• Score: an integer from 1 to 5. + +Based on your reflection, if it is necessary to search a knowledge base for any potentially missing context, propose it. + +Based on your reflection, if it is necessary to adjust the final answer to improve the quality of the response, propose the adjusted answer + +# Data +CONTEXT: +{% for text_source in text_sources %} +{{ text_source }} +{% endfor %} +QUERY: {{query}} +RESPONSE: {{response}} + +user: + +Conversation History: + +{% for message in past_messages %} +{{ message["role"] }}: +{{ message["content"] }} +{% endfor %} diff --git a/app/backend/approaches/prompts/chat_reflect_answer_tools.json b/app/backend/approaches/prompts/chat_reflect_answer_tools.json new file mode 100644 index 0000000000..92549c91c8 --- /dev/null +++ b/app/backend/approaches/prompts/chat_reflect_answer_tools.json @@ -0,0 +1,111 @@ +[ + { + "type": "function", + "function": { + "name": "reflect_answer", + "description": "Reflect on an answer to a question and decide on the next step based on the context of the conversation.", + "parameters": { + "type": "object", + "properties": { + "relevance": { + "type": "object", + "description": "Relevance of the answer to the question asked.", + "properties": { + "thoughtChain": { + "type": "string", + "description": "A chain of thoughts that led to the conclusion about the relevance of the answer." + }, + "explanation": { + "type": "string", + "description": "An explanation of why the answer is relevant or not relevant to the question asked." + }, + "score": { + "type": "number", + "description": "A score indicating how relevant the answer is to the question asked, on a scale from 1 to 5." + } + } + }, + "groundedness": { + "type": "object", + "description": "Groundedness of the answer based on the context for the question.", + "properties": { + "thoughtChain": { + "type": "string", + "description": "A chain of thoughts that led to the conclusion about the groundedness of the answer." + }, + "explanation": { + "type": "string", + "description": "An explanation of why the answer is grounded based on the context." + }, + "score": { + "type": "number", + "description": "A score indicating how grounded the answer is to the context given, on a scale from 1 to 5." + } + } + }, + "correctness": { + "type": "object", + "description": "Correctness of the answer based on the context for the question.", + "properties": { + "thoughtChain": { + "type": "string", + "description": "A chain of thoughts that led to the conclusion about the correctness of the answer." + }, + "explanation": { + "type": "string", + "description": "An explanation of why the answer is correct or not correct based on the context." + }, + "score": { + "type": "number", + "description": "A score indicating how correct the answer is to the context given, on a scale from 1 to 5." + } + } + } + }, + "required": [ + "relevance", + "groundedness", + "correctness" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "search_index", + "description": "Search the index to find relevant information based on the question asked.", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The query to search in the index." + } + }, + "required": [ + "query" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "rewrite_answer", + "description": "Rewrite an answer to a question based on the context of the conversation and the relevance, groundedness, and correctness of the original answer.", + "parameters": { + "type": "object", + "properties": { + "answer": { + "type": "string", + "description": "The updated answer to the query" + } + }, + "required": [ + "answer" + ] + } + } + } +] \ No newline at end of file From bce7553cd215b8b82b135c25a41c3907a1f4b823 Mon Sep 17 00:00:00 2001 From: Matt Gotteiner Date: Tue, 22 Apr 2025 13:46:35 -0700 Subject: [PATCH 08/10] checkpoint --- app/backend/app.py | 6 + app/backend/approaches/approach.py | 11 +- app/backend/approaches/chatapproach.py | 29 ++- .../approaches/chatreadretrieveread.py | 166 +++++++++++++++--- .../prompts/chat_answer_question.prompty | 22 +++ .../prompts/chat_reflect_answer.prompty | 16 +- .../prompts/chat_reflect_answer_tools.json | 50 ++---- .../AnalysisPanel/AnalysisPanel.module.css | 27 +++ .../AnalysisPanel/CandidateAnswer.tsx | 19 ++ .../components/AnalysisPanel/Evaluation.tsx | 24 +++ .../components/AnalysisPanel/Reflection.tsx | 19 ++ .../AnalysisPanel/ThoughtProcess.tsx | 12 +- 12 files changed, 329 insertions(+), 72 deletions(-) create mode 100644 app/frontend/src/components/AnalysisPanel/CandidateAnswer.tsx create mode 100644 app/frontend/src/components/AnalysisPanel/Evaluation.tsx create mode 100644 app/frontend/src/components/AnalysisPanel/Reflection.tsx diff --git a/app/backend/app.py b/app/backend/app.py index 3d6ce2dc25..01c3bf23e7 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -432,6 +432,7 @@ async def setup_clients(): # Shared by all OpenAI deployments OPENAI_HOST = os.getenv("OPENAI_HOST", "azure") OPENAI_CHATGPT_MODEL = os.environ["AZURE_OPENAI_CHATGPT_MODEL"] + OPENAI_CHATGPT_REFLECTION_MODEL = os.environ.get("AZURE_OPENAI_CHATGPT_REFLECTION_MODEL") OPENAI_EMB_MODEL = os.getenv("AZURE_OPENAI_EMB_MODEL_NAME", "text-embedding-ada-002") OPENAI_EMB_DIMENSIONS = int(os.getenv("AZURE_OPENAI_EMB_DIMENSIONS") or 1536) OPENAI_REASONING_EFFORT = os.getenv("AZURE_OPENAI_REASONING_EFFORT") @@ -442,6 +443,9 @@ async def setup_clients(): AZURE_OPENAI_CHATGPT_DEPLOYMENT = ( os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT") if OPENAI_HOST.startswith("azure") else None ) + AZURE_OPENAI_CHATGPT_REFLECTION_DEPLOYMENT = ( + os.getenv("AZURE_OPENAI_CHATGPT_REFLECTION_DEPLOYMENT") if OPENAI_HOST.startswith("azure") else None + ) AZURE_OPENAI_EMB_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT") if OPENAI_HOST.startswith("azure") else None AZURE_OPENAI_CUSTOM_URL = os.getenv("AZURE_OPENAI_CUSTOM_URL") # https://learn.microsoft.com/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release @@ -696,6 +700,8 @@ async def setup_clients(): auth_helper=auth_helper, chatgpt_model=OPENAI_CHATGPT_MODEL, chatgpt_deployment=AZURE_OPENAI_CHATGPT_DEPLOYMENT, + chatgpt_reflection_model=OPENAI_CHATGPT_REFLECTION_MODEL, + chatgpt_reflection_deployment=AZURE_OPENAI_CHATGPT_REFLECTION_DEPLOYMENT, embedding_model=OPENAI_EMB_MODEL, embedding_deployment=AZURE_OPENAI_EMB_DEPLOYMENT, embedding_dimensions=OPENAI_EMB_DIMENSIONS, diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py index 74cb711a56..e66104ad37 100644 --- a/app/backend/approaches/approach.py +++ b/app/backend/approaches/approach.py @@ -28,6 +28,7 @@ ChatCompletionMessageParam, ChatCompletionReasoningEffort, ChatCompletionToolParam, + ChatCompletionNamedToolChoiceParam ) from approaches.promptmanager import PromptManager @@ -266,17 +267,21 @@ def nonewlines(s: str) -> str: return s.replace("\n", " ").replace("\r", " ") if use_semantic_captions: - return [ + results = [ (self.get_citation((doc.sourcepage or ""), use_image_citation)) + ": " + nonewlines(" . ".join([cast(str, c.text) for c in (doc.captions or [])])) for doc in results ] else: - return [ + results = [ (self.get_citation((doc.sourcepage or ""), use_image_citation)) + ": " + nonewlines(doc.content or "") for doc in results ] + + # Remove duplicates + results = list(set(results)) + return results def get_citation(self, sourcepage: str, use_image_citation: bool) -> str: if use_image_citation: @@ -352,6 +357,7 @@ def create_chat_completion( response_token_limit: int, should_stream: bool = False, tools: Optional[list[ChatCompletionToolParam]] = None, + tool_choice: Optional[ChatCompletionNamedToolChoiceParam] = None, temperature: Optional[float] = None, n: Optional[int] = None, reasoning_effort: Optional[ChatCompletionReasoningEffort] = None, @@ -380,6 +386,7 @@ def create_chat_completion( params["stream_options"] = {"include_usage": True} params["tools"] = tools + params["tool_choice"] = tool_choice # Azure OpenAI takes the deployment name as the model name return self.openai_client.chat.completions.create( diff --git a/app/backend/approaches/chatapproach.py b/app/backend/approaches/chatapproach.py index 163a6758b7..2c9e70967a 100644 --- a/app/backend/approaches/chatapproach.py +++ b/app/backend/approaches/chatapproach.py @@ -26,7 +26,8 @@ def __init__( chat_completion: Optional[Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]] = None, role: Optional[str] = "assistant", data_points: Optional[DataPoints] = None, - should_stream: bool = True): + should_stream: bool = True, + completion: Optional[str] = None): self.step = step self.chat_completion = chat_completion @@ -36,7 +37,8 @@ def __init__( self.should_stream = should_stream self._steps = [] self._step_i = -1 - self._completion = "" + self._completion = completion or "" + self._has_existing_completion = completion is not None def __aiter__(self): return self @@ -90,6 +92,10 @@ async def __anext__(self) -> Union[ChatCompletion, ChatCompletionChunk, DataPoin self._completion = result.choices[0].message.content if result.choices else "" self._steps.append(result) return result + elif self._has_existing_completion: + # Stream is none - yield already done completion + self._has_existing_completion = False + return self._completion if self.step is not None: result = self.step @@ -146,7 +152,6 @@ def get_reflection(self, chat_completion: ChatCompletion) -> Optional[Reflection response_message = chat_completion.choices[0].message reflection_response = ReflectionResponse() - print(response_message.model_dump()) if response_message.tool_calls: for tool in response_message.tool_calls: if tool.type != "function": @@ -172,12 +177,10 @@ def get_reflection(self, chat_completion: ChatCompletion) -> Optional[Reflection thought_chain=correctness_reflection.get("thoughtChain"), explanation=correctness_reflection.get("explanation") ) - if function.name == "search_index": - arg = json.loads(function.arguments) - reflection_response.next_query = arg.get("query") - if function.name == "rewrite_answer": - arg = json.loads(function.arguments) - reflection_response.next_answer = arg.get("answer") + if next_answer := arg.get("next_answer"): + reflection_response.next_answer = next_answer + if next_query := arg.get("next_query"): + reflection_response.next_query = next_query return reflection_response @@ -205,6 +208,9 @@ async def run_without_streaming( if isinstance(chunk, ChatCompletion): content = chunk.choices[0].message.content role = chunk.choices[0].message.role + elif isinstance(chunk, str): + content = chunk + role = "assistant" elif isinstance(chunk, ThoughtStep): extra_info.thoughts.append(chunk) elif isinstance(chunk, DataPoints): @@ -255,6 +261,11 @@ async def run_with_streaming( followup_content += content else: yield completion + elif isinstance(chunk, str): + content = chunk + role = "assistant" + completion = { "delta": {"content": content, "role": role} } + yield completion elif isinstance(chunk, ThoughtStep): extra_info.thoughts.append(chunk) yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state} diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py index 8dc64bf4d0..7df341d5a9 100644 --- a/app/backend/approaches/chatreadretrieveread.py +++ b/app/backend/approaches/chatreadretrieveread.py @@ -1,15 +1,15 @@ from collections.abc import Awaitable from typing import Any, Optional, Union, cast, AsyncGenerator -import asyncio +from copy import deepcopy from azure.search.documents.aio import SearchClient from azure.search.documents.models import VectorQuery -from openai import AsyncOpenAI, AsyncStream +from openai import AsyncOpenAI from openai.types.chat import ( ChatCompletion, - ChatCompletionChunk, ChatCompletionMessageParam, ChatCompletionToolParam, + ChatCompletionNamedToolChoiceParam ) from approaches.approach import DataPoints, ThoughtStep @@ -34,6 +34,8 @@ def __init__( openai_client: AsyncOpenAI, chatgpt_model: str, chatgpt_deployment: Optional[str], # Not needed for non-Azure OpenAI + chatgpt_reflection_model: Optional[str], + chatgpt_reflection_deployment: Optional[str], # Not needed for non-Azure OpenAI embedding_deployment: Optional[str], # Not needed for non-Azure OpenAI or for retrieval_mode="text" embedding_model: str, embedding_dimensions: int, @@ -50,6 +52,8 @@ def __init__( self.auth_helper = auth_helper self.chatgpt_model = chatgpt_model self.chatgpt_deployment = chatgpt_deployment + self.chatgpt_reflection_model = chatgpt_reflection_model + self.chatgpt_reflection_deployment = chatgpt_reflection_deployment self.embedding_deployment = embedding_deployment self.embedding_model = embedding_model self.embedding_dimensions = embedding_dimensions @@ -180,12 +184,13 @@ async def run_until_final_call( # STEP 3: Generate a contextual and content specific answer using the search results and chat history text_sources = self.get_sources_content(results, use_semantic_captions, use_image_citation=False) - messages = self.prompt_manager.render_prompt( + answer_messages = deepcopy(messages) + answer_messages = self.prompt_manager.render_prompt( self.answer_prompt, self.get_system_prompt_variables(overrides.get("prompt_template")) | { "include_follow_up_questions": bool(overrides.get("suggest_followup_questions")), - "past_messages": messages[:-1], + "past_messages": answer_messages[:-1], "user_query": original_user_query, "text_sources": text_sources, }, @@ -194,7 +199,7 @@ async def run_until_final_call( answer_step = StreamingThoughtStep( step=self.format_thought_step_for_chatcompletion( title="Prompt to generate answer", - messages=messages, + messages=answer_messages, overrides=overrides, model=self.chatgpt_model, deployment=self.chatgpt_deployment, @@ -202,7 +207,7 @@ async def run_until_final_call( chat_completion=self.create_chat_completion( self.chatgpt_deployment, self.chatgpt_model, - messages, + answer_messages, overrides, self.get_response_token_limit(self.chatgpt_model, 1024), should_stream, @@ -214,16 +219,30 @@ async def run_until_final_call( yield answer_step return + answer_passed_eval = False + next_answer = "" # Step 4: Reflection loop to improve the answer for i in range(reflection_max_steps): - answer_chunks = [] - - # Read the answer + # Read the candidate answer step await answer_step.start() async for chunk in answer_step: - if isinstance(chunk, ThoughtStep): - answer_thought = chunk - + pass + + yield StreamingThoughtStep( + step=self.format_thought_step_for_chatcompletion( + title="Generate candidate answer", + messages=answer_messages, + overrides=overrides, + model=self.chatgpt_model, + deployment=self.chatgpt_deployment, + additional_properties={ + "candidate_answer": answer_step.get_completion() + } + ), + data_points=DataPoints(text=text_sources), + should_stream=False + ) + # STEP 5: Determine the next action to take reflect_messages = self.prompt_manager.render_prompt( self.reflect_prompt, {"text_sources": text_sources, "query": original_user_query, "response": answer_step.get_completion(), "past_messages": messages[:-1]} @@ -233,13 +252,14 @@ async def run_until_final_call( chat_completion = cast( ChatCompletion, await self.create_chat_completion( - self.chatgpt_deployment, - self.chatgpt_model, + self.chatgpt_reflection_deployment, + self.chatgpt_reflection_model, messages=reflect_messages, overrides=overrides, response_token_limit=self.get_response_token_limit(self.chatgpt_model, 1024), temperature=0.0, # Minimize creativity for reflection - tools=tools + tools=tools, + tool_choice=ChatCompletionNamedToolChoiceParam(function={"name": self.reflect_tools[0]["function"]["name"]}, type="function"), ) ) reflection = self.get_reflection(chat_completion) @@ -249,18 +269,124 @@ async def run_until_final_call( title="Prompt to reflect on answer", messages=reflect_messages, overrides=overrides, - model=self.chatgpt_model, - deployment=self.chatgpt_deployment, + model=self.chatgpt_reflection_model, + deployment=self.chatgpt_reflection_deployment, usage=chat_completion.usage, additional_properties=dataclasses.asdict(reflection) ), role="tool" ) + # If the reflection was good, stop generating + answer_passed_eval = reflection.groundedness.score >= 4 and reflection.correctness.score >= 4 and reflection.relevance.score >= 4 + if answer_passed_eval: + break + + if reflection.next_answer: + next_answer = reflection.next_answer + if reflection.next_query: + # Repeat STEP 2: Retrieve relevant documents from the search index with the GPT optimized query + yield StreamingThoughtStep( + step=ThoughtStep( + "Updated search using reflected search query", + reflection.next_query, + { + "use_semantic_captions": use_semantic_captions, + "use_semantic_ranker": use_semantic_ranker, + "use_query_rewriting": use_query_rewriting, + "top": top, + "filter": filter, + "use_vector_search": use_vector_search, + "use_text_search": use_text_search, + }, + ), + role="tool" + ) + + # If retrieval mode includes vectors, compute an embedding for the query + vectors: list[VectorQuery] = [] + if use_vector_search: + vectors.append(await self.compute_text_embedding(reflection.next_query)) + + reflection_results = await self.search( + top, + reflection.next_query, + filter, + vectors, + use_text_search, + use_vector_search, + use_semantic_ranker, + use_semantic_captions, + minimum_search_score, + minimum_reranker_score, + use_query_rewriting, + ) + + # Repeat STEP 3: Generate a contextual and content specific answer using the search results and chat history + results.extend(reflection_results) + text_sources = self.get_sources_content(results, use_semantic_captions, use_image_citation=False) + answer_messages = deepcopy(messages) + answer_messages = self.prompt_manager.render_prompt( + self.answer_prompt, + self.get_system_prompt_variables(overrides.get("prompt_template")) + | { + "include_follow_up_questions": bool(overrides.get("suggest_followup_questions")), + "past_messages": answer_messages[:-1], + "user_query": original_user_query, + "text_sources": text_sources, + "previous_answer": answer_step.get_completion(), + "previous_answer_evaluations": [ + { "label": "Groundedness", "score": reflection.groundedness.score, "explanation": reflection.groundedness.explanation }, + { "label": "Correctness", "score": reflection.correctness.score, "explanation": reflection.correctness.explanation }, + { "label": "Relevance", "score": reflection.relevance.score, "explanation": reflection.relevance.explanation }, + ], + "revised_answer": reflection.next_answer + }, + ) + + answer_step = StreamingThoughtStep( + step=self.format_thought_step_for_chatcompletion( + title="Prompt to generate updated reflected answer", + messages=answer_messages, + overrides=overrides, + model=self.chatgpt_model, + deployment=self.chatgpt_deployment, + ), + chat_completion=self.create_chat_completion( + self.chatgpt_deployment, + self.chatgpt_model, + answer_messages, + overrides, + self.get_response_token_limit(self.chatgpt_model, 1024), + should_stream, + ), + data_points=DataPoints(text=text_sources), + should_stream=should_stream + ) + else: + # No new query, yield revised answer + break + + if answer_passed_eval: answer_step.rewind() yield answer_step - return - + else: + next_answer = reflection.next_answer or next_answer + if next_answer: + yield StreamingThoughtStep( + step=self.format_thought_step_for_chatcompletion( + title="Using reflection revised answer", + messages=answer_messages, + overrides=overrides, + model=self.chatgpt_model, + deployment=self.chatgpt_deployment, + ), + completion=next_answer, + data_points=DataPoints(text=text_sources), + should_stream=False + ) + else: + yield answer_step diff --git a/app/backend/approaches/prompts/chat_answer_question.prompty b/app/backend/approaches/prompts/chat_answer_question.prompty index 3dcb05ae21..91a7733a6e 100644 --- a/app/backend/approaches/prompts/chat_answer_question.prompty +++ b/app/backend/approaches/prompts/chat_answer_question.prompty @@ -37,6 +37,28 @@ Do not repeat questions that have already been asked. Make sure the last question ends with ">>". {% endif %} +{% if previous_answer %} +You've previously attempted to answer this question and it has been evalauted that the previous answer was not sufficient +Previous answer: +{{ previous_answer }} + +Why was the previous answer insufficient? +{% for evaluation in previous_answer_evaluations %} +Evaluation: {{ evaluation["label"] }} +Score: {{ evaluation["score"] }} +Explanation: {{ evaluation["explanation"] }} +{% endfor %} + +{% if revised_answer %} +The previous answer was revised to improve the quality of the response. Use this to help generate a better answer: +Revised answer: +{{ revised_answer }} + +{% endif %} + +Use this information to improve the answer next time +{% endif %} + {% for message in past_messages %} {{ message["role"] }}: {{ message["content"] }} diff --git a/app/backend/approaches/prompts/chat_reflect_answer.prompty b/app/backend/approaches/prompts/chat_reflect_answer.prompty index f95175eb41..8f78f3d18e 100644 --- a/app/backend/approaches/prompts/chat_reflect_answer.prompty +++ b/app/backend/approaches/prompts/chat_reflect_answer.prompty @@ -34,11 +34,25 @@ You are an expert in evaluating the quality of a RESPONSE from an intelligent sy For each trait—Relevance, Groundedness, Correctness—produce: • ThoughtChain: start with “Let's think step by step:” and give a concise chain of reasoning. • Explanation: a very short justification. -• Score: an integer from 1 to 5. +• Score: an integer from 1 to 5. A response like "I don't know" can never achieve a high score Based on your reflection, if it is necessary to search a knowledge base for any potentially missing context, propose it. +Query generation guidelines: +You have access to Azure AI Search index with 100's of documents. +Generate a search query based on the conversation and the new question. +Do not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms. +Do not include any text inside [] or <<>> in the search query terms. +Do not include any special characters like '+'. +If the question is not in English, translate the question to English before generating the search query. Based on your reflection, if it is necessary to adjust the final answer to improve the quality of the response, propose the adjusted answer +If the answer cannot be improved, return an empty string. +Answer generation guidelines: +Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers. +Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question. +If the question is not in English, answer in the language used in the question. +Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]. + # Data CONTEXT: diff --git a/app/backend/approaches/prompts/chat_reflect_answer_tools.json b/app/backend/approaches/prompts/chat_reflect_answer_tools.json index 92549c91c8..8a405fae11 100644 --- a/app/backend/approaches/prompts/chat_reflect_answer_tools.json +++ b/app/backend/approaches/prompts/chat_reflect_answer_tools.json @@ -60,50 +60,22 @@ "description": "A score indicating how correct the answer is to the context given, on a scale from 1 to 5." } } - } - }, - "required": [ - "relevance", - "groundedness", - "correctness" - ] - } - } - }, - { - "type": "function", - "function": { - "name": "search_index", - "description": "Search the index to find relevant information based on the question asked.", - "parameters": { - "type": "object", - "properties": { - "query": { + }, + "next_query": { "type": "string", - "description": "The query to search in the index." - } - }, - "required": [ - "query" - ] - } - } - }, - { - "type": "function", - "function": { - "name": "rewrite_answer", - "description": "Rewrite an answer to a question based on the context of the conversation and the relevance, groundedness, and correctness of the original answer.", - "parameters": { - "type": "object", - "properties": { - "answer": { + "description": "The query to search in the index if necessary. Output an empty string if it's not necessary" + }, + "next_answer": { "type": "string", - "description": "The updated answer to the query" + "description": "The answer to the question asked, which may be empty if no correction is necessary or a new one based on the context." } }, "required": [ - "answer" + "relevance", + "groundedness", + "correctness", + "next_query", + "next_answer" ] } } diff --git a/app/frontend/src/components/AnalysisPanel/AnalysisPanel.module.css b/app/frontend/src/components/AnalysisPanel/AnalysisPanel.module.css index 84b9f110ea..17ad5e751e 100644 --- a/app/frontend/src/components/AnalysisPanel/AnalysisPanel.module.css +++ b/app/frontend/src/components/AnalysisPanel/AnalysisPanel.module.css @@ -134,3 +134,30 @@ background-color: #424242; color: #ffffff; } + +.evaluationContainer { + margin: 16px 0; + padding: 16px; + border: 1px solid #ddd; + border-radius: 8px; + background-color: #f9f9f9; +} + +.evaluationLabel { + font-size: 18px; + font-weight: bold; + margin-bottom: 8px; + color: #333; +} + +.evaluationList { + list-style-type: none; + padding: 0; + margin: 0; +} + +.evaluationList li { + margin: 4px 0; + font-size: 14px; + color: #555; +} diff --git a/app/frontend/src/components/AnalysisPanel/CandidateAnswer.tsx b/app/frontend/src/components/AnalysisPanel/CandidateAnswer.tsx new file mode 100644 index 0000000000..5d37fd649b --- /dev/null +++ b/app/frontend/src/components/AnalysisPanel/CandidateAnswer.tsx @@ -0,0 +1,19 @@ +import React from "react"; +import styles from "./AnalysisPanel.module.css"; + +interface CandidateAnswerProps { + candidate_answer: string | undefined; +} + +export const CandidateAnswer: React.FC = ({ candidate_answer }) => { + return ( +
+
Candidate Answer
+ {candidate_answer ? ( +
{candidate_answer}
+ ) : ( +
No candidate answer available
+ )} +
+ ); +}; diff --git a/app/frontend/src/components/AnalysisPanel/Evaluation.tsx b/app/frontend/src/components/AnalysisPanel/Evaluation.tsx new file mode 100644 index 0000000000..6bc145f2c5 --- /dev/null +++ b/app/frontend/src/components/AnalysisPanel/Evaluation.tsx @@ -0,0 +1,24 @@ +import React from "react"; +import styles from "./AnalysisPanel.module.css"; + +interface EvaluationProps { + label: string; + value: { + thought_chain: string; + score: number; + explanation: string; + }; +} + +export const Evaluation: React.FC = ({ label, value }) => { + return ( +
+
{label}
+
    +
  • Thought Chain: {value.thought_chain}
  • +
  • Score: {value.score}
  • +
  • Explanation: {value.explanation}
  • +
+
+ ); +}; diff --git a/app/frontend/src/components/AnalysisPanel/Reflection.tsx b/app/frontend/src/components/AnalysisPanel/Reflection.tsx new file mode 100644 index 0000000000..3f057298d7 --- /dev/null +++ b/app/frontend/src/components/AnalysisPanel/Reflection.tsx @@ -0,0 +1,19 @@ +import React from "react"; +import styles from "./AnalysisPanel.module.css"; + +interface ReflectionProps { + next_answer: string | undefined; + next_query: string | undefined; +} + +export const Reflection: React.FC = ({ next_answer, next_query }) => { + return ( +
+
Next Steps
+
    +
  • Next Query: {next_query}
  • +
  • Revised Answer: {next_answer}
  • +
+
+ ); +}; diff --git a/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx b/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx index f666960da1..9b27c8a32c 100644 --- a/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx +++ b/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx @@ -7,6 +7,9 @@ import styles from "./AnalysisPanel.module.css"; import { Thoughts } from "../../api"; import { TokenUsageGraph } from "./TokenUsageGraph"; +import { Evaluation } from "./Evaluation"; +import { Reflection } from "./Reflection"; +import { CandidateAnswer } from "./CandidateAnswer"; SyntaxHighlighter.registerLanguage("json", json); @@ -14,6 +17,8 @@ interface Props { thoughts: Thoughts[]; } +const known_keys = ["token_usage", "reasoning_effort", "groundedness", "relevance", "correctness", "next_query", "next_answer", "candidate_answer"]; + export const ThoughtProcess = ({ thoughts }: Props) => { return (
    @@ -23,13 +28,18 @@ export const ThoughtProcess = ({ thoughts }: Props) => {
    {t.title}
    {t.props && - (Object.keys(t.props).filter(k => k !== "token_usage") || []).map((k: any) => ( + (Object.keys(t.props).filter(k => !known_keys.includes(k)) || []).map((k: any) => ( {k}: {JSON.stringify(t.props?.[k])} ))} {t.props?.token_usage && } + {t.props?.groundedness && } + {t.props?.relevance && } + {t.props?.correctness && } + {(t.props?.next_query || t.props?.next_answer) && } + {t.props?.candidate_answer && } {Array.isArray(t.description) ? ( {JSON.stringify(t.description, null, 2)} From 6380656185125f9aaf70665dd15f7d4d9b2d9323 Mon Sep 17 00:00:00 2001 From: Matt Gotteiner Date: Tue, 22 Apr 2025 14:12:52 -0700 Subject: [PATCH 09/10] checkpoint 2 --- app/backend/app.py | 4 ++++ .../approaches/chatreadretrieveread.py | 2 +- app/backend/config.py | 1 + app/frontend/src/api/models.ts | 2 ++ .../src/components/Settings/Settings.tsx | 19 +++++++++++++++++++ app/frontend/src/locales/en/translation.json | 3 +++ app/frontend/src/pages/ask/Ask.tsx | 9 +++++++++ app/frontend/src/pages/chat/Chat.tsx | 10 ++++++++++ 8 files changed, 49 insertions(+), 1 deletion(-) diff --git a/app/backend/app.py b/app/backend/app.py index 01c3bf23e7..194bcadeee 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -73,6 +73,7 @@ CONFIG_LANGUAGE_PICKER_ENABLED, CONFIG_OPENAI_CLIENT, CONFIG_QUERY_REWRITING_ENABLED, + CONFIG_REFLECTION_ENABLED, CONFIG_REASONING_EFFORT_ENABLED, CONFIG_SEARCH_CLIENT, CONFIG_SEMANTIC_RANKER_DEPLOYED, @@ -301,6 +302,7 @@ def config(): "showGPT4VOptions": current_app.config[CONFIG_GPT4V_DEPLOYED], "showSemanticRankerOption": current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED], "showQueryRewritingOption": current_app.config[CONFIG_QUERY_REWRITING_ENABLED], + "showReflectionOption": current_app.config[CONFIG_REFLECTION_ENABLED], "showReasoningEffortOption": current_app.config[CONFIG_REASONING_EFFORT_ENABLED], "streamingEnabled": current_app.config[CONFIG_STREAMING_ENABLED], "defaultReasoningEffort": current_app.config[CONFIG_DEFAULT_REASONING_EFFORT], @@ -479,6 +481,7 @@ async def setup_clients(): USE_GPT4V = os.getenv("USE_GPT4V", "").lower() == "true" USE_USER_UPLOAD = os.getenv("USE_USER_UPLOAD", "").lower() == "true" + USE_REFLECTION = os.getenv("USE_REFLECTION", "").lower() == "true" ENABLE_LANGUAGE_PICKER = os.getenv("ENABLE_LANGUAGE_PICKER", "").lower() == "true" USE_SPEECH_INPUT_BROWSER = os.getenv("USE_SPEECH_INPUT_BROWSER", "").lower() == "true" USE_SPEECH_OUTPUT_BROWSER = os.getenv("USE_SPEECH_OUTPUT_BROWSER", "").lower() == "true" @@ -663,6 +666,7 @@ async def setup_clients(): or OPENAI_CHATGPT_MODEL not in Approach.GPT_REASONING_MODELS or Approach.GPT_REASONING_MODELS[OPENAI_CHATGPT_MODEL].streaming ) + current_app.config[CONFIG_REFLECTION_ENABLED] = USE_REFLECTION current_app.config[CONFIG_VECTOR_SEARCH_ENABLED] = os.getenv("USE_VECTORS", "").lower() != "false" current_app.config[CONFIG_USER_UPLOAD_ENABLED] = bool(USE_USER_UPLOAD) current_app.config[CONFIG_LANGUAGE_PICKER_ENABLED] = ENABLE_LANGUAGE_PICKER diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py index 7df341d5a9..7b9cb58200 100644 --- a/app/backend/approaches/chatreadretrieveread.py +++ b/app/backend/approaches/chatreadretrieveread.py @@ -83,7 +83,7 @@ async def run_until_final_call( use_semantic_ranker = True if overrides.get("semantic_ranker") else False use_semantic_captions = True if overrides.get("semantic_captions") else False use_query_rewriting = True if overrides.get("query_rewriting") else False - use_reflection = True if overrides.get("reflection") else True + use_reflection = True if overrides.get("reflection") else False reflection_max_steps = overrides.get("reflection_max_steps", self.reflection_max_steps) top = overrides.get("top", 3) minimum_search_score = overrides.get("minimum_search_score", 0.0) diff --git a/app/backend/config.py b/app/backend/config.py index 5f3354f2da..b190e11615 100644 --- a/app/backend/config.py +++ b/app/backend/config.py @@ -11,6 +11,7 @@ CONFIG_GPT4V_DEPLOYED = "gpt4v_deployed" CONFIG_SEMANTIC_RANKER_DEPLOYED = "semantic_ranker_deployed" CONFIG_QUERY_REWRITING_ENABLED = "query_rewriting_enabled" +CONFIG_REFLECTION_ENABLED = "reflection_enabled" CONFIG_REASONING_EFFORT_ENABLED = "reasoning_effort_enabled" CONFIG_VISION_REASONING_EFFORT_ENABLED = "vision_reasoning_effort_enabled" CONFIG_DEFAULT_REASONING_EFFORT = "default_reasoning_effort" diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts index c915a19ee5..ef7847ea71 100644 --- a/app/frontend/src/api/models.ts +++ b/app/frontend/src/api/models.ts @@ -21,6 +21,7 @@ export type ChatAppRequestOverrides = { semantic_ranker?: boolean; semantic_captions?: boolean; query_rewriting?: boolean; + reflection?: boolean; reasoning_effort?: string; include_category?: string; exclude_category?: string; @@ -89,6 +90,7 @@ export type Config = { showSemanticRankerOption: boolean; showQueryRewritingOption: boolean; showReasoningEffortOption: boolean; + showReflectionOption: boolean; streamingEnabled: boolean; showVectorOption: boolean; showUserUpload: boolean; diff --git a/app/frontend/src/components/Settings/Settings.tsx b/app/frontend/src/components/Settings/Settings.tsx index b16beb0246..a06469d985 100644 --- a/app/frontend/src/components/Settings/Settings.tsx +++ b/app/frontend/src/components/Settings/Settings.tsx @@ -20,6 +20,7 @@ export interface SettingsProps { useSemanticRanker: boolean; useSemanticCaptions: boolean; useQueryRewriting: boolean; + useReflection: boolean; reasoningEffort: string; excludeCategory: string; includeCategory: string; @@ -30,6 +31,7 @@ export interface SettingsProps { showSemanticRankerOption: boolean; showQueryRewritingOption: boolean; showReasoningEffortOption: boolean; + showReflectionOption: boolean; showGPT4VOptions: boolean; showVectorOption: boolean; useOidSecurityFilter: boolean; @@ -57,6 +59,7 @@ export const Settings = ({ useSemanticRanker, useSemanticCaptions, useQueryRewriting, + useReflection, reasoningEffort, excludeCategory, includeCategory, @@ -67,6 +70,7 @@ export const Settings = ({ showSemanticRankerOption, showQueryRewritingOption, showReasoningEffortOption, + showReflectionOption, showGPT4VOptions, showVectorOption, useOidSecurityFilter, @@ -106,6 +110,7 @@ export const Settings = ({ const semanticRankerFieldId = useId("semanticRankerField"); const queryRewritingFieldId = useId("queryRewritingField"); const reasoningEffortFieldId = useId("reasoningEffortField"); + const reflectionField = useId("reflectionField"); const semanticCaptionsId = useId("semanticCaptions"); const semanticCaptionsFieldId = useId("semanticCaptionsField"); const useOidSecurityFilterId = useId("useOidSecurityFilter"); @@ -266,6 +271,20 @@ export const Settings = ({ )} + {showReflectionOption && ( + <> + onChange("useReflection", !!checked)} + aria-labelledby={reasoningEffortFieldId} + onRenderLabel={props => renderLabel(props, reasoningEffortFieldId, reasoningEffortFieldId, t("helpTexts.useReflection"))} + /> + + )} + {showReasoningEffortOption && ( (true); const [useSemanticCaptions, setUseSemanticCaptions] = useState(false); const [useQueryRewriting, setUseQueryRewriting] = useState(false); + const [useReflection, setUseReflection] = useState(false); const [reasoningEffort, setReasoningEffort] = useState(""); const [useGPT4V, setUseGPT4V] = useState(false); const [gpt4vInput, setGPT4VInput] = useState(GPT4VInput.TextAndImages); @@ -45,6 +46,7 @@ export function Component(): JSX.Element { const [showGPT4VOptions, setShowGPT4VOptions] = useState(false); const [showSemanticRankerOption, setShowSemanticRankerOption] = useState(false); const [showQueryRewritingOption, setShowQueryRewritingOption] = useState(false); + const [showReflectionOption, setShowReflectionOption] = useState(false); const [showReasoningEffortOption, setShowReasoningEffortOption] = useState(false); const [showVectorOption, setShowVectorOption] = useState(false); const [showUserUpload, setShowUserUpload] = useState(false); @@ -84,6 +86,8 @@ export function Component(): JSX.Element { setShowSemanticRankerOption(config.showSemanticRankerOption); setUseQueryRewriting(config.showQueryRewritingOption); setShowQueryRewritingOption(config.showQueryRewritingOption); + setUseReflection(config.showReflectionOption); + setShowReflectionOption(config.showReflectionOption); setShowReasoningEffortOption(config.showReasoningEffortOption); if (config.showReasoningEffortOption) { setReasoningEffort(config.defaultReasoningEffort); @@ -195,6 +199,9 @@ export function Component(): JSX.Element { case "useQueryRewriting": setUseQueryRewriting(value); break; + case "useReflection": + setUseReflection(value); + break; case "reasoningEffort": setReasoningEffort(value); break; @@ -340,6 +347,7 @@ export function Component(): JSX.Element { useSemanticRanker={useSemanticRanker} useSemanticCaptions={useSemanticCaptions} useQueryRewriting={useQueryRewriting} + useReflection={useReflection} reasoningEffort={reasoningEffort} excludeCategory={excludeCategory} includeCategory={includeCategory} @@ -350,6 +358,7 @@ export function Component(): JSX.Element { showSemanticRankerOption={showSemanticRankerOption} showQueryRewritingOption={showQueryRewritingOption} showReasoningEffortOption={showReasoningEffortOption} + showReflectionOption={showReflectionOption} showGPT4VOptions={showGPT4VOptions} showVectorOption={showVectorOption} useOidSecurityFilter={useOidSecurityFilter} diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 7ab010f337..09284a723a 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -49,6 +49,7 @@ const Chat = () => { const [retrievalMode, setRetrievalMode] = useState(RetrievalMode.Hybrid); const [useSemanticRanker, setUseSemanticRanker] = useState(true); const [useQueryRewriting, setUseQueryRewriting] = useState(false); + const [useReflection, setUseReflection] = useState(false); const [reasoningEffort, setReasoningEffort] = useState(""); const [streamingEnabled, setStreamingEnabled] = useState(true); const [shouldStream, setShouldStream] = useState(true); @@ -80,6 +81,7 @@ const Chat = () => { const [showGPT4VOptions, setShowGPT4VOptions] = useState(false); const [showSemanticRankerOption, setShowSemanticRankerOption] = useState(false); const [showQueryRewritingOption, setShowQueryRewritingOption] = useState(false); + const [showReflectionOption, setShowReflectionOption] = useState(false); const [showReasoningEffortOption, setShowReasoningEffortOption] = useState(false); const [showVectorOption, setShowVectorOption] = useState(false); const [showUserUpload, setShowUserUpload] = useState(false); @@ -107,6 +109,8 @@ const Chat = () => { setShowSemanticRankerOption(config.showSemanticRankerOption); setUseQueryRewriting(config.showQueryRewritingOption); setShowQueryRewritingOption(config.showQueryRewritingOption); + setUseReflection(config.showReflectionOption); + setShowReflectionOption(config.showReflectionOption); setShowReasoningEffortOption(config.showReasoningEffortOption); setStreamingEnabled(config.streamingEnabled); if (!config.streamingEnabled) { @@ -203,6 +207,7 @@ const Chat = () => { semantic_ranker: useSemanticRanker, semantic_captions: useSemanticCaptions, query_rewriting: useQueryRewriting, + reflection: useReflection, reasoning_effort: reasoningEffort, suggest_followup_questions: useSuggestFollowupQuestions, use_oid_security_filter: useOidSecurityFilter, @@ -298,6 +303,9 @@ const Chat = () => { case "reasoningEffort": setReasoningEffort(value); break; + case "useReflection": + setUseReflection(value); + break; case "useSemanticCaptions": setUseSemanticCaptions(value); break; @@ -514,6 +522,7 @@ const Chat = () => { useSemanticRanker={useSemanticRanker} useSemanticCaptions={useSemanticCaptions} useQueryRewriting={useQueryRewriting} + useReflection={useReflection} reasoningEffort={reasoningEffort} excludeCategory={excludeCategory} includeCategory={includeCategory} @@ -523,6 +532,7 @@ const Chat = () => { vectorFieldList={vectorFieldList} showSemanticRankerOption={showSemanticRankerOption} showQueryRewritingOption={showQueryRewritingOption} + showReflectionOption={showReflectionOption} showReasoningEffortOption={showReasoningEffortOption} showGPT4VOptions={showGPT4VOptions} showVectorOption={showVectorOption} From 1d599892de5435c80cac4bf85be189dc6ce50a8e Mon Sep 17 00:00:00 2001 From: Matt Gotteiner Date: Wed, 23 Apr 2025 11:01:00 -0700 Subject: [PATCH 10/10] fix order --- app/backend/approaches/chatreadretrieveread.py | 9 ++++++++- .../src/components/AnalysisPanel/ThoughtProcess.tsx | 12 +++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py index 7b9cb58200..521bc8c014 100644 --- a/app/backend/approaches/chatreadretrieveread.py +++ b/app/backend/approaches/chatreadretrieveread.py @@ -321,9 +321,16 @@ async def run_until_final_call( minimum_reranker_score, use_query_rewriting, ) + results.extend(reflection_results) + yield StreamingThoughtStep( + step=ThoughtStep( + "Search results", + [result.serialize_for_results() for result in results], + ), + role="tool" + ) # Repeat STEP 3: Generate a contextual and content specific answer using the search results and chat history - results.extend(reflection_results) text_sources = self.get_sources_content(results, use_semantic_captions, use_image_citation=False) answer_messages = deepcopy(messages) answer_messages = self.prompt_manager.render_prompt( diff --git a/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx b/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx index 9b27c8a32c..b0e7517839 100644 --- a/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx +++ b/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx @@ -35,11 +35,7 @@ export const ThoughtProcess = ({ thoughts }: Props) => { ))} {t.props?.token_usage && } - {t.props?.groundedness && } - {t.props?.relevance && } - {t.props?.correctness && } - {(t.props?.next_query || t.props?.next_answer) && } - {t.props?.candidate_answer && } + {Array.isArray(t.description) ? ( {JSON.stringify(t.description, null, 2)} @@ -47,6 +43,12 @@ export const ThoughtProcess = ({ thoughts }: Props) => { ) : (
    {t.description}
    )} + + {t.props?.groundedness && } + {t.props?.relevance && } + {t.props?.correctness && } + {(t.props?.next_query || t.props?.next_answer) && } + {t.props?.candidate_answer && } ); })}