diff --git a/app/backend/app.py b/app/backend/app.py
index 263fcf06a6..194bcadeee 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -8,6 +8,7 @@
 from collections.abc import AsyncGenerator
 from pathlib import Path
 from typing import Any, Union, cast
+import traceback
 
 from azure.cognitiveservices.speech import (
     ResultReason,
@@ -72,6 +73,7 @@
     CONFIG_LANGUAGE_PICKER_ENABLED,
     CONFIG_OPENAI_CLIENT,
     CONFIG_QUERY_REWRITING_ENABLED,
+    CONFIG_REFLECTION_ENABLED,
     CONFIG_REASONING_EFFORT_ENABLED,
     CONFIG_SEARCH_CLIENT,
     CONFIG_SEMANTIC_RANKER_DEPLOYED,
@@ -188,10 +190,11 @@ async def ask(auth_claims: dict[str, Any]):
             approach = cast(Approach, current_app.config[CONFIG_ASK_VISION_APPROACH])
         else:
             approach = cast(Approach, current_app.config[CONFIG_ASK_APPROACH])
-        r = await approach.run(
+        result = await approach.run(
             request_json["messages"], context=context, session_state=request_json.get("session_state")
         )
-        return jsonify(r)
+        results = [r async for r in result]
+        return jsonify({"value": results})
     except Exception as error:
         return error_response(error, "/ask")
 
@@ -208,6 +211,7 @@ async def format_as_ndjson(r: AsyncGenerator[dict, None]) -> AsyncGenerator[str,
         async for event in r:
             yield json.dumps(event, ensure_ascii=False, cls=JSONEncoder) + "\n"
     except Exception as error:
+        traceback.print_exc()
         logging.exception("Exception while generating response stream: %s", error)
         yield json.dumps(error_dict(error))
 
@@ -241,7 +245,8 @@ async def chat(auth_claims: dict[str, Any]):
             context=context,
             session_state=session_state,
         )
-        return jsonify(result)
+        results = [r async for r in result]
+        return jsonify({"value": results})
     except Exception as error:
         return error_response(error, "/chat")
 
@@ -297,6 +302,7 @@ def config():
             "showGPT4VOptions": current_app.config[CONFIG_GPT4V_DEPLOYED],
             "showSemanticRankerOption": current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED],
             "showQueryRewritingOption": current_app.config[CONFIG_QUERY_REWRITING_ENABLED],
+            "showReflectionOption": current_app.config[CONFIG_REFLECTION_ENABLED],
             "showReasoningEffortOption": current_app.config[CONFIG_REASONING_EFFORT_ENABLED],
             "streamingEnabled": current_app.config[CONFIG_STREAMING_ENABLED],
             "defaultReasoningEffort": current_app.config[CONFIG_DEFAULT_REASONING_EFFORT],
@@ -428,6 +434,7 @@ async def setup_clients():
     # Shared by all OpenAI deployments
     OPENAI_HOST = os.getenv("OPENAI_HOST", "azure")
     OPENAI_CHATGPT_MODEL = os.environ["AZURE_OPENAI_CHATGPT_MODEL"]
+    OPENAI_CHATGPT_REFLECTION_MODEL = os.environ.get("AZURE_OPENAI_CHATGPT_REFLECTION_MODEL")
     OPENAI_EMB_MODEL = os.getenv("AZURE_OPENAI_EMB_MODEL_NAME", "text-embedding-ada-002")
     OPENAI_EMB_DIMENSIONS = int(os.getenv("AZURE_OPENAI_EMB_DIMENSIONS") or 1536)
     OPENAI_REASONING_EFFORT = os.getenv("AZURE_OPENAI_REASONING_EFFORT")
@@ -438,6 +445,9 @@ async def setup_clients():
     AZURE_OPENAI_CHATGPT_DEPLOYMENT = (
         os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT") if OPENAI_HOST.startswith("azure") else None
     )
+    AZURE_OPENAI_CHATGPT_REFLECTION_DEPLOYMENT = (
+        os.getenv("AZURE_OPENAI_CHATGPT_REFLECTION_DEPLOYMENT") if OPENAI_HOST.startswith("azure") else None
+    )
     AZURE_OPENAI_EMB_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT") if OPENAI_HOST.startswith("azure") else None
     AZURE_OPENAI_CUSTOM_URL = os.getenv("AZURE_OPENAI_CUSTOM_URL")
     # https://learn.microsoft.com/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release
@@ -471,6 +481,7 @@ async def setup_clients():
 
     USE_GPT4V = os.getenv("USE_GPT4V", "").lower() == "true"
     USE_USER_UPLOAD = os.getenv("USE_USER_UPLOAD", "").lower() == "true"
+    USE_REFLECTION = os.getenv("USE_REFLECTION", "").lower() == "true"
     ENABLE_LANGUAGE_PICKER = os.getenv("ENABLE_LANGUAGE_PICKER", "").lower() == "true"
     USE_SPEECH_INPUT_BROWSER = os.getenv("USE_SPEECH_INPUT_BROWSER", "").lower() == "true"
     USE_SPEECH_OUTPUT_BROWSER = os.getenv("USE_SPEECH_OUTPUT_BROWSER", "").lower() == "true"
@@ -655,6 +666,7 @@ async def setup_clients():
         or OPENAI_CHATGPT_MODEL not in Approach.GPT_REASONING_MODELS
         or Approach.GPT_REASONING_MODELS[OPENAI_CHATGPT_MODEL].streaming
     )
+    current_app.config[CONFIG_REFLECTION_ENABLED] = USE_REFLECTION
     current_app.config[CONFIG_VECTOR_SEARCH_ENABLED] = os.getenv("USE_VECTORS", "").lower() != "false"
     current_app.config[CONFIG_USER_UPLOAD_ENABLED] = bool(USE_USER_UPLOAD)
     current_app.config[CONFIG_LANGUAGE_PICKER_ENABLED] = ENABLE_LANGUAGE_PICKER
@@ -692,6 +704,8 @@ async def setup_clients():
         auth_helper=auth_helper,
         chatgpt_model=OPENAI_CHATGPT_MODEL,
         chatgpt_deployment=AZURE_OPENAI_CHATGPT_DEPLOYMENT,
+        chatgpt_reflection_model=OPENAI_CHATGPT_REFLECTION_MODEL,
+        chatgpt_reflection_deployment=AZURE_OPENAI_CHATGPT_REFLECTION_DEPLOYMENT,
         embedding_model=OPENAI_EMB_MODEL,
         embedding_deployment=AZURE_OPENAI_EMB_DEPLOYMENT,
         embedding_dimensions=OPENAI_EMB_DIMENSIONS,
@@ -790,12 +804,12 @@ def create_app():
 
     # Log levels should be one of https://docs.python.org/3/library/logging.html#logging-levels
     # Set root level to WARNING to avoid seeing overly verbose logs from SDKS
-    logging.basicConfig(level=logging.WARNING)
+    logging.basicConfig(level=logging.INFO)
     # Set our own logger levels to INFO by default
-    app_level = os.getenv("APP_LOG_LEVEL", "INFO")
+    app_level = os.getenv("APP_LOG_LEVEL", "DEBUG")
     app.logger.setLevel(os.getenv("APP_LOG_LEVEL", app_level))
+    app.logger.setLevel("DEBUG")
     logging.getLogger("scripts").setLevel(app_level)
-
     if allowed_origin := os.getenv("ALLOWED_ORIGIN"):
         allowed_origins = allowed_origin.split(";")
         if len(allowed_origins) > 0:
diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py
index 59f1909a54..e66104ad37 100644
--- a/app/backend/approaches/approach.py
+++ b/app/backend/approaches/approach.py
@@ -1,7 +1,7 @@
 import os
 from abc import ABC
 from collections.abc import AsyncGenerator, Awaitable
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import (
     Any,
     Callable,
@@ -28,6 +28,7 @@
     ChatCompletionMessageParam,
     ChatCompletionReasoningEffort,
     ChatCompletionToolParam,
+    ChatCompletionNamedToolChoiceParam
 )
 
 from approaches.promptmanager import PromptManager
@@ -88,18 +89,15 @@ def trim_embedding(cls, embedding: Optional[list[float]]) -> Optional[str]:
 
         return None
 
-
 @dataclass
 class ThoughtStep:
     title: str
     description: Optional[Any]
     props: Optional[dict[str, Any]] = None
-
     def update_token_usage(self, usage: CompletionUsage) -> None:
         if self.props:
             self.props["token_usage"] = TokenUsageProps.from_completion_usage(usage)
 
-
 @dataclass
 class DataPoints:
     text: Optional[list[str]] = None
@@ -108,11 +106,10 @@ class DataPoints:
 
 @dataclass
 class ExtraInfo:
-    data_points: DataPoints
-    thoughts: Optional[list[ThoughtStep]] = None
+    data_points: DataPoints = None
+    thoughts: list[ThoughtStep] = field(default_factory=list)
     followup_questions: Optional[list[Any]] = None
 
-
 @dataclass
 class TokenUsageProps:
     prompt_tokens: int
@@ -270,17 +267,21 @@ def nonewlines(s: str) -> str:
             return s.replace("\n", " ").replace("\r", " ")
 
         if use_semantic_captions:
-            return [
+            results = [
                 (self.get_citation((doc.sourcepage or ""), use_image_citation))
                 + ": "
                 + nonewlines(" . ".join([cast(str, c.text) for c in (doc.captions or [])]))
                 for doc in results
             ]
         else:
-            return [
+            results = [
                 (self.get_citation((doc.sourcepage or ""), use_image_citation)) + ": " + nonewlines(doc.content or "")
                 for doc in results
             ]
+        
+        # Remove duplicates
+        results = list(set(results))
+        return results
 
     def get_citation(self, sourcepage: str, use_image_citation: bool) -> str:
         if use_image_citation:
@@ -356,6 +357,7 @@ def create_chat_completion(
         response_token_limit: int,
         should_stream: bool = False,
         tools: Optional[list[ChatCompletionToolParam]] = None,
+        tool_choice: Optional[ChatCompletionNamedToolChoiceParam] = None,
         temperature: Optional[float] = None,
         n: Optional[int] = None,
         reasoning_effort: Optional[ChatCompletionReasoningEffort] = None,
@@ -384,6 +386,7 @@ def create_chat_completion(
             params["stream_options"] = {"include_usage": True}
 
         params["tools"] = tools
+        params["tool_choice"] = tool_choice
 
         # Azure OpenAI takes the deployment name as the model name
         return self.openai_client.chat.completions.create(
@@ -403,6 +406,7 @@ def format_thought_step_for_chatcompletion(
         deployment: Optional[str],
         usage: Optional[CompletionUsage] = None,
         reasoning_effort: Optional[ChatCompletionReasoningEffort] = None,
+        additional_properties: Optional[dict[str, Any]] = None,
     ) -> ThoughtStep:
         properties: dict[str, Any] = {"model": model}
         if deployment:
@@ -414,6 +418,8 @@ def format_thought_step_for_chatcompletion(
             )
         if usage:
             properties["token_usage"] = TokenUsageProps.from_completion_usage(usage)
+        if additional_properties:
+            properties.update(additional_properties)
         return ThoughtStep(title, messages, properties)
 
     async def run(
@@ -421,7 +427,7 @@ async def run(
         messages: list[ChatCompletionMessageParam],
         session_state: Any = None,
         context: dict[str, Any] = {},
-    ) -> dict[str, Any]:
+    ) -> AsyncGenerator[dict[str, Any], None]:
         raise NotImplementedError
 
     async def run_stream(
diff --git a/app/backend/approaches/chatapproach.py b/app/backend/approaches/chatapproach.py
index 346c9f3b0a..2c9e70967a 100644
--- a/app/backend/approaches/chatapproach.py
+++ b/app/backend/approaches/chatapproach.py
@@ -2,7 +2,8 @@
 import re
 from abc import ABC, abstractmethod
 from collections.abc import AsyncGenerator, Awaitable
-from typing import Any, Optional, Union, cast
+from typing import Any, Optional, List, Union
+from dataclasses import dataclass
 
 from openai import AsyncStream
 from openai.types.chat import (
@@ -13,9 +14,111 @@
 
 from approaches.approach import (
     Approach,
+    DataPoints,
     ExtraInfo,
+    ThoughtStep
 )
 
+class StreamingThoughtStep:
+    def __init__(
+            self,
+            step: ThoughtStep,
+            chat_completion: Optional[Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]] = None,
+            role: Optional[str] = "assistant",
+            data_points: Optional[DataPoints] = None,
+            should_stream: bool = True,
+            completion: Optional[str] = None):
+        
+        self.step = step
+        self.chat_completion = chat_completion
+        self.role = role
+        self.data_points = data_points
+        self._stream = None
+        self.should_stream = should_stream
+        self._steps = []
+        self._step_i = -1
+        self._completion = completion or ""
+        self._has_existing_completion = completion is not None
+
+    def __aiter__(self):
+        return self
+    
+    async def start(self):
+        if self._step_i < 0 and self._stream is None and self.chat_completion is not None:
+            self._stream = await self.chat_completion
+    
+    def rewind(self):
+        if not self._steps:
+            raise ValueError("Cannot rewind: no steps recorded.")
+        self._step_i = 0
+    
+    def get_completion(self) -> Optional[str]:
+        return self._completion
+
+    async def __anext__(self) -> Union[ChatCompletion, ChatCompletionChunk, DataPoints, ThoughtStep]:
+        if self._step_i >= 0:
+            if self._step_i < len(self._steps):
+                # Return the next step in the recorded steps
+                self._step_i += 1
+                return self._steps[self._step_i - 1]
+
+            raise StopAsyncIteration()
+
+        # If there are data points, return them first to render citations
+        if self.data_points is not None:
+            result = self.data_points
+            self.data_points = None
+            self._steps.append(result)
+            return result
+        
+        if self._stream is not None:
+            if self.should_stream:
+                try:
+                    # Get the next chunk from the async stream
+                    chunk = await self._stream.__anext__()
+                    if len(chunk.choices) == 0 and chunk.usage:
+                        self.step.update_token_usage(chunk.usage)
+                    elif len(chunk.choices) > 0 and chunk.choices[0].delta.content:
+                        self._completion += chunk.choices[0].delta.content
+                    self._steps.append(chunk)
+                    return chunk
+                except StopAsyncIteration:
+                    # Stream is exhausted
+                    self._stream = None
+            else:
+                # Non-Streaming Implementation: return the entire response, then the step with token usage
+                result = self._stream
+                self._stream = None
+                self._completion = result.choices[0].message.content if result.choices else ""
+                self._steps.append(result)
+                return result
+        elif self._has_existing_completion:
+            # Stream is none - yield already done completion
+            self._has_existing_completion = False
+            return self._completion
+    
+        if self.step is not None:
+            result = self.step
+            self.step = None
+            self._steps.append(result)
+            return result
+    
+        # No more items to yield
+        raise StopAsyncIteration
+
+@dataclass
+class Reflection:
+    score: Optional[int] = None
+    thought_chain: Optional[str] = None
+    explanation: Optional[str] = None
+
+@dataclass
+class ReflectionResponse:
+    relevance: Optional[Reflection] = None
+    groundedness: Optional[Reflection] = None
+    correctness: Optional[Reflection] = None
+    next_query: Optional[str] = None
+    next_answer: Optional[str] = None
 
 class ChatApproach(Approach, ABC):
 
@@ -24,7 +127,7 @@ class ChatApproach(Approach, ABC):
     @abstractmethod
     async def run_until_final_call(
         self, messages, overrides, auth_claims, should_stream
-    ) -> tuple[ExtraInfo, Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]]:
+    ) -> AsyncGenerator[StreamingThoughtStep, None]:
         pass
 
     def get_search_query(self, chat_completion: ChatCompletion, user_query: str):
@@ -45,6 +148,42 @@ def get_search_query(self, chat_completion: ChatCompletion, user_query: str):
                 return query_text
         return user_query
 
+    def get_reflection(self, chat_completion: ChatCompletion) -> Optional[ReflectionResponse]:
+        response_message = chat_completion.choices[0].message
+        reflection_response = ReflectionResponse()
+
+        if response_message.tool_calls:
+            for tool in response_message.tool_calls:
+                if tool.type != "function":
+                    continue
+                function = tool.function
+                if function.name == "reflect_answer":
+                    arg = json.loads(function.arguments)
+                    if relevance_reflection := arg.get("relevance"):
+                        reflection_response.relevance = Reflection(
+                            score=relevance_reflection.get("score"),
+                            thought_chain=relevance_reflection.get("thoughtChain"),
+                            explanation=relevance_reflection.get("explanation")
+                        )
+                    if groundedness_reflection := arg.get("groundedness"):
+                        reflection_response.groundedness = Reflection(
+                            score=groundedness_reflection.get("score"),
+                            thought_chain=groundedness_reflection.get("thoughtChain"),
+                            explanation=groundedness_reflection.get("explanation")
+                        )
+                    if correctness_reflection := arg.get("correctness"):
+                        reflection_response.correctness = Reflection(
+                            score=correctness_reflection.get("score"),
+                            thought_chain=correctness_reflection.get("thoughtChain"),
+                            explanation=correctness_reflection.get("explanation")
+                        )
+                    if next_answer := arg.get("next_answer"):
+                        reflection_response.next_answer = next_answer
+                    if next_query := arg.get("next_query"):
+                        reflection_response.next_query = next_query
+
+        return reflection_response
+
     def extract_followup_questions(self, content: Optional[str]):
         if content is None:
             return content, []
@@ -57,24 +196,34 @@ async def run_without_streaming(
         auth_claims: dict[str, Any],
         session_state: Any = None,
     ) -> dict[str, Any]:
-        extra_info, chat_coroutine = await self.run_until_final_call(
+        thoughts = self.run_until_final_call(
             messages, overrides, auth_claims, should_stream=False
         )
-        chat_completion_response: ChatCompletion = await cast(Awaitable[ChatCompletion], chat_coroutine)
-        content = chat_completion_response.choices[0].message.content
-        role = chat_completion_response.choices[0].message.role
+        content = None
+        role = None
+        extra_info = ExtraInfo()
+        async for thought in thoughts:
+            await thought.start()
+            async for chunk in thought:
+                if isinstance(chunk, ChatCompletion):
+                    content = chunk.choices[0].message.content
+                    role = chunk.choices[0].message.role
+                elif isinstance(chunk, str):
+                    content = chunk
+                    role = "assistant" 
+                elif isinstance(chunk, ThoughtStep):
+                   extra_info.thoughts.append(chunk)
+                elif isinstance(chunk, DataPoints):
+                    extra_info.data_points = chunk
+
         if overrides.get("suggest_followup_questions"):
             content, followup_questions = self.extract_followup_questions(content)
-            extra_info.followup_questions = followup_questions
-        # Assume last thought is for generating answer
-        if self.include_token_usage and extra_info.thoughts and chat_completion_response.usage:
-            extra_info.thoughts[-1].update_token_usage(chat_completion_response.usage)
-        chat_app_response = {
+            followup_questions = followup_questions
+        return {
             "message": {"content": content, "role": role},
             "context": extra_info,
             "session_state": session_state,
         }
-        return chat_app_response
 
     async def run_with_streaming(
         self,
@@ -83,52 +232,54 @@ async def run_with_streaming(
         auth_claims: dict[str, Any],
         session_state: Any = None,
     ) -> AsyncGenerator[dict, None]:
-        extra_info, chat_coroutine = await self.run_until_final_call(
+        thoughts = self.run_until_final_call(
             messages, overrides, auth_claims, should_stream=True
         )
-        chat_coroutine = cast(Awaitable[AsyncStream[ChatCompletionChunk]], chat_coroutine)
-        yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state}
+        extra_info = ExtraInfo()
 
+        yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state}
         followup_questions_started = False
         followup_content = ""
-        async for event_chunk in await chat_coroutine:
-            # "2023-07-01-preview" API version has a bug where first response has empty choices
-            event = event_chunk.model_dump()  # Convert pydantic model to dict
-            if event["choices"]:
-                # No usage during streaming
-                completion = {
-                    "delta": {
-                        "content": event["choices"][0]["delta"].get("content"),
-                        "role": event["choices"][0]["delta"]["role"],
-                    }
-                }
-                # if event contains << and not >>, it is start of follow-up question, truncate
-                content = completion["delta"].get("content")
-                content = content or ""  # content may either not exist in delta, or explicitly be None
-                if overrides.get("suggest_followup_questions") and "<<" in content:
-                    followup_questions_started = True
-                    earlier_content = content[: content.index("<<")]
-                    if earlier_content:
-                        completion["delta"]["content"] = earlier_content
-                        yield completion
-                    followup_content += content[content.index("<<") :]
-                elif followup_questions_started:
-                    followup_content += content
-                else:
+        async for thought in thoughts:
+            await thought.start()
+            async for chunk in thought:
+                if isinstance(chunk, ChatCompletionChunk):
+                    if chunk.choices:
+                        content = chunk.choices[0].delta.content
+                        role = chunk.choices[0].delta.role
+                        content = content or ""  # content may either not exist in delta, or explicitly be None
+                        completion = { "delta": {"content": content, "role": role} }
+                        if overrides.get("suggest_followup_questions") and "<<" in content:
+                            # if event contains << and not >>, it is start of follow-up question, truncate
+                            followup_questions_started = True
+                            earlier_content = content[: content.index("<<")]
+                            if earlier_content:
+                                completion["delta"]["content"] = earlier_content
+                                yield completion
+                            followup_content += content[content.index("<<") :]
+                        elif followup_questions_started:
+                            followup_content += content
+                        else:
+                            yield completion
+                elif isinstance(chunk, str):
+                    content = chunk
+                    role = "assistant" 
+                    completion = { "delta": {"content": content, "role": role} }
                     yield completion
-            else:
-                # Final chunk at end of streaming should contain usage
-                # https://cookbook.openai.com/examples/how_to_stream_completions#4-how-to-get-token-usage-data-for-streamed-chat-completion-response
-                if event_chunk.usage and extra_info.thoughts and self.include_token_usage:
-                    extra_info.thoughts[-1].update_token_usage(event_chunk.usage)
+                elif isinstance(chunk, ThoughtStep):
+                    extra_info.thoughts.append(chunk)
+                    yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state}
+                elif isinstance(chunk, DataPoints):
+                    extra_info.data_points = chunk
                     yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state}
 
-        if followup_content:
-            _, followup_questions = self.extract_followup_questions(followup_content)
-            yield {
-                "delta": {"role": "assistant"},
-                "context": {"context": extra_info, "followup_questions": followup_questions},
-            }
+            if followup_content:
+                _, followup_questions = self.extract_followup_questions(followup_content)
+                extra_info.followup_questions = followup_questions
+                yield {
+                    "delta": {"role": "assistant"},
+                    "context": extra_info,
+                }
 
     async def run(
         self,
diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py
index 249c7247b2..521bc8c014 100644
--- a/app/backend/approaches/chatreadretrieveread.py
+++ b/app/backend/approaches/chatreadretrieveread.py
@@ -1,20 +1,22 @@
 from collections.abc import Awaitable
-from typing import Any, Optional, Union, cast
+from typing import Any, Optional, Union, cast, AsyncGenerator
+from copy import deepcopy
 
 from azure.search.documents.aio import SearchClient
 from azure.search.documents.models import VectorQuery
-from openai import AsyncOpenAI, AsyncStream
+from openai import AsyncOpenAI
 from openai.types.chat import (
     ChatCompletion,
-    ChatCompletionChunk,
     ChatCompletionMessageParam,
     ChatCompletionToolParam,
+    ChatCompletionNamedToolChoiceParam
 )
 
-from approaches.approach import DataPoints, ExtraInfo, ThoughtStep
-from approaches.chatapproach import ChatApproach
+from approaches.approach import DataPoints, ThoughtStep
+from approaches.chatapproach import ChatApproach, StreamingThoughtStep
 from approaches.promptmanager import PromptManager
 from core.authentication import AuthenticationHelper
+import dataclasses
 
 
 class ChatReadRetrieveReadApproach(ChatApproach):
@@ -32,6 +34,8 @@ def __init__(
         openai_client: AsyncOpenAI,
         chatgpt_model: str,
         chatgpt_deployment: Optional[str],  # Not needed for non-Azure OpenAI
+        chatgpt_reflection_model: Optional[str],
+        chatgpt_reflection_deployment: Optional[str],  # Not needed for non-Azure OpenAI
         embedding_deployment: Optional[str],  # Not needed for non-Azure OpenAI or for retrieval_mode="text"
         embedding_model: str,
         embedding_dimensions: int,
@@ -41,12 +45,15 @@ def __init__(
         query_speller: str,
         prompt_manager: PromptManager,
         reasoning_effort: Optional[str] = None,
+        reflection_max_steps: Optional[int] = None,
     ):
         self.search_client = search_client
         self.openai_client = openai_client
         self.auth_helper = auth_helper
         self.chatgpt_model = chatgpt_model
         self.chatgpt_deployment = chatgpt_deployment
+        self.chatgpt_reflection_model = chatgpt_reflection_model
+        self.chatgpt_reflection_deployment = chatgpt_reflection_deployment
         self.embedding_deployment = embedding_deployment
         self.embedding_model = embedding_model
         self.embedding_dimensions = embedding_dimensions
@@ -58,8 +65,11 @@ def __init__(
         self.query_rewrite_prompt = self.prompt_manager.load_prompt("chat_query_rewrite.prompty")
         self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
         self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty")
+        self.reflect_prompt = self.prompt_manager.load_prompt("chat_reflect_answer.prompty")
+        self.reflect_tools = self.prompt_manager.load_tools("chat_reflect_answer_tools.json")
         self.reasoning_effort = reasoning_effort
         self.include_token_usage = True
+        self.reflection_max_steps = reflection_max_steps or 3
 
     async def run_until_final_call(
         self,
@@ -67,12 +77,14 @@ async def run_until_final_call(
         overrides: dict[str, Any],
         auth_claims: dict[str, Any],
         should_stream: bool = False,
-    ) -> tuple[ExtraInfo, Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]]:
+    ) -> AsyncGenerator[StreamingThoughtStep, None]:
         use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
         use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
         use_semantic_ranker = True if overrides.get("semantic_ranker") else False
         use_semantic_captions = True if overrides.get("semantic_captions") else False
         use_query_rewriting = True if overrides.get("query_rewriting") else False
+        use_reflection = True if overrides.get("reflection") else False
+        reflection_max_steps = overrides.get("reflection_max_steps", self.reflection_max_steps)
         top = overrides.get("top", 3)
         minimum_search_score = overrides.get("minimum_search_score", 0.0)
         minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0)
@@ -88,13 +100,12 @@ async def run_until_final_call(
                 f"{self.chatgpt_model} does not support streaming. Please use a different model or disable streaming."
             )
 
+        # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
         query_messages = self.prompt_manager.render_prompt(
             self.query_rewrite_prompt, {"user_query": original_user_query, "past_messages": messages[:-1]}
         )
         tools: list[ChatCompletionToolParam] = self.query_rewrite_tools
 
-        # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
-
         chat_completion = cast(
             ChatCompletion,
             await self.create_chat_completion(
@@ -111,9 +122,38 @@ async def run_until_final_call(
             ),
         )
 
+        yield StreamingThoughtStep(
+            step=self.format_thought_step_for_chatcompletion(
+                title="Prompt to generate search query",
+                messages=query_messages,
+                overrides=overrides,
+                model=self.chatgpt_model,
+                deployment=self.chatgpt_deployment,
+                usage=chat_completion.usage,
+                reasoning_effort="low",
+            ),
+            role="tool"
+        )
+
         query_text = self.get_search_query(chat_completion, original_user_query)
 
         # STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
+        yield StreamingThoughtStep(
+            step=ThoughtStep(
+                "Search using generated search query",
+                query_text,
+                {
+                    "use_semantic_captions": use_semantic_captions,
+                    "use_semantic_ranker": use_semantic_ranker,
+                    "use_query_rewriting": use_query_rewriting,
+                    "top": top,
+                    "filter": filter,
+                    "use_vector_search": use_vector_search,
+                    "use_text_search": use_text_search,
+                },
+            ),
+            role="tool"
+        )
 
         # If retrieval mode includes vectors, compute an embedding for the query
         vectors: list[VectorQuery] = []
@@ -134,68 +174,227 @@ async def run_until_final_call(
             use_query_rewriting,
         )
 
+        yield StreamingThoughtStep(
+            step=ThoughtStep(
+                "Search results",
+                [result.serialize_for_results() for result in results],
+            ),
+            role="tool"
+        )
+
         # STEP 3: Generate a contextual and content specific answer using the search results and chat history
         text_sources = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
-        messages = self.prompt_manager.render_prompt(
+        answer_messages = deepcopy(messages)
+        answer_messages = self.prompt_manager.render_prompt(
             self.answer_prompt,
             self.get_system_prompt_variables(overrides.get("prompt_template"))
             | {
                 "include_follow_up_questions": bool(overrides.get("suggest_followup_questions")),
-                "past_messages": messages[:-1],
+                "past_messages": answer_messages[:-1],
                 "user_query": original_user_query,
                 "text_sources": text_sources,
             },
         )
 
-        extra_info = ExtraInfo(
-            DataPoints(text=text_sources),
-            thoughts=[
-                self.format_thought_step_for_chatcompletion(
-                    title="Prompt to generate search query",
-                    messages=query_messages,
-                    overrides=overrides,
-                    model=self.chatgpt_model,
-                    deployment=self.chatgpt_deployment,
-                    usage=chat_completion.usage,
-                    reasoning_effort="low",
-                ),
-                ThoughtStep(
-                    "Search using generated search query",
-                    query_text,
-                    {
-                        "use_semantic_captions": use_semantic_captions,
-                        "use_semantic_ranker": use_semantic_ranker,
-                        "use_query_rewriting": use_query_rewriting,
-                        "top": top,
-                        "filter": filter,
-                        "use_vector_search": use_vector_search,
-                        "use_text_search": use_text_search,
-                    },
-                ),
-                ThoughtStep(
-                    "Search results",
-                    [result.serialize_for_results() for result in results],
-                ),
-                self.format_thought_step_for_chatcompletion(
-                    title="Prompt to generate answer",
-                    messages=messages,
-                    overrides=overrides,
-                    model=self.chatgpt_model,
-                    deployment=self.chatgpt_deployment,
-                    usage=None,
-                ),
-            ],
-        )
-
-        chat_coroutine = cast(
-            Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]],
-            self.create_chat_completion(
+        answer_step = StreamingThoughtStep(
+            step=self.format_thought_step_for_chatcompletion(
+                title="Prompt to generate answer",
+                messages=answer_messages,
+                overrides=overrides,
+                model=self.chatgpt_model,
+                deployment=self.chatgpt_deployment,
+            ),
+            chat_completion=self.create_chat_completion(
                 self.chatgpt_deployment,
                 self.chatgpt_model,
-                messages,
+                answer_messages,
                 overrides,
                 self.get_response_token_limit(self.chatgpt_model, 1024),
                 should_stream,
             ),
+            data_points=DataPoints(text=text_sources),
+            should_stream=should_stream
         )
-        return (extra_info, chat_coroutine)
+        if not use_reflection:
+            yield answer_step
+            return
+
+        answer_passed_eval = False
+        next_answer = ""
+        # Step 4: Reflection loop to improve the answer
+        for i in range(reflection_max_steps):
+            # Read the candidate answer step
+            await answer_step.start()
+            async for chunk in answer_step:
+                pass
+    
+            yield StreamingThoughtStep(
+                step=self.format_thought_step_for_chatcompletion(
+                    title="Generate candidate answer",
+                    messages=answer_messages,
+                    overrides=overrides,
+                    model=self.chatgpt_model,
+                    deployment=self.chatgpt_deployment,
+                    additional_properties={
+                        "candidate_answer": answer_step.get_completion()
+                    }
+                ),
+                data_points=DataPoints(text=text_sources),
+                should_stream=False
+            )
+
+            # STEP 5: Determine the next action to take
+            reflect_messages = self.prompt_manager.render_prompt(
+                self.reflect_prompt, {"text_sources": text_sources, "query": original_user_query, "response": answer_step.get_completion(), "past_messages": messages[:-1]}
+            )
+            tools: list[ChatCompletionToolParam] = self.reflect_tools
+
+            chat_completion = cast(
+                ChatCompletion,
+                await self.create_chat_completion(
+                    self.chatgpt_reflection_deployment,
+                    self.chatgpt_reflection_model,
+                    messages=reflect_messages,
+                    overrides=overrides,
+                    response_token_limit=self.get_response_token_limit(self.chatgpt_model, 1024),
+                    temperature=0.0,  # Minimize creativity for reflection
+                    tools=tools,
+                    tool_choice=ChatCompletionNamedToolChoiceParam(function={"name": self.reflect_tools[0]["function"]["name"]}, type="function"),
+                )
+            )
+            reflection = self.get_reflection(chat_completion)
+
+            yield StreamingThoughtStep(
+                step=self.format_thought_step_for_chatcompletion(
+                    title="Prompt to reflect on answer",
+                    messages=reflect_messages,
+                    overrides=overrides,
+                    model=self.chatgpt_reflection_model,
+                    deployment=self.chatgpt_reflection_deployment,
+                    usage=chat_completion.usage,
+                    additional_properties=dataclasses.asdict(reflection)
+                ),
+                role="tool"
+            )
+
+            # If the reflection was good, stop generating
+            answer_passed_eval = reflection.groundedness.score >= 4 and reflection.correctness.score >= 4 and reflection.relevance.score >= 4
+            if answer_passed_eval:
+                break
+
+            if reflection.next_answer:
+                next_answer = reflection.next_answer
+            if reflection.next_query:
+                # Repeat STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
+                yield StreamingThoughtStep(
+                    step=ThoughtStep(
+                        "Updated search using reflected search query",
+                        reflection.next_query,
+                        {
+                            "use_semantic_captions": use_semantic_captions,
+                            "use_semantic_ranker": use_semantic_ranker,
+                            "use_query_rewriting": use_query_rewriting,
+                            "top": top,
+                            "filter": filter,
+                            "use_vector_search": use_vector_search,
+                            "use_text_search": use_text_search,
+                        },
+                    ),
+                    role="tool"
+                )
+
+                # If retrieval mode includes vectors, compute an embedding for the query
+                vectors: list[VectorQuery] = []
+                if use_vector_search:
+                    vectors.append(await self.compute_text_embedding(reflection.next_query))
+
+                reflection_results = await self.search(
+                    top,
+                    reflection.next_query,
+                    filter,
+                    vectors,
+                    use_text_search,
+                    use_vector_search,
+                    use_semantic_ranker,
+                    use_semantic_captions,
+                    minimum_search_score,
+                    minimum_reranker_score,
+                    use_query_rewriting,
+                )
+                results.extend(reflection_results)
+                yield StreamingThoughtStep(
+                    step=ThoughtStep(
+                        "Search results",
+                        [result.serialize_for_results() for result in results],
+                    ),
+                    role="tool"
+                )
+
+                # Repeat STEP 3: Generate a contextual and content specific answer using the search results and chat history
+                text_sources = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
+                answer_messages = deepcopy(messages)
+                answer_messages = self.prompt_manager.render_prompt(
+                    self.answer_prompt,
+                    self.get_system_prompt_variables(overrides.get("prompt_template"))
+                    | {
+                        "include_follow_up_questions": bool(overrides.get("suggest_followup_questions")),
+                        "past_messages": answer_messages[:-1],
+                        "user_query": original_user_query,
+                        "text_sources": text_sources,
+                        "previous_answer": answer_step.get_completion(),
+                        "previous_answer_evaluations": [
+                            { "label": "Groundedness", "score": reflection.groundedness.score, "explanation": reflection.groundedness.explanation },
+                            { "label": "Correctness", "score": reflection.correctness.score, "explanation": reflection.correctness.explanation },
+                            { "label": "Relevance", "score": reflection.relevance.score, "explanation": reflection.relevance.explanation },
+                        ],
+                        "revised_answer": reflection.next_answer
+                    },
+                )
+
+                answer_step = StreamingThoughtStep(
+                    step=self.format_thought_step_for_chatcompletion(
+                        title="Prompt to generate updated reflected answer",
+                        messages=answer_messages,
+                        overrides=overrides,
+                        model=self.chatgpt_model,
+                        deployment=self.chatgpt_deployment,
+                    ),
+                    chat_completion=self.create_chat_completion(
+                        self.chatgpt_deployment,
+                        self.chatgpt_model,
+                        answer_messages,
+                        overrides,
+                        self.get_response_token_limit(self.chatgpt_model, 1024),
+                        should_stream,
+                    ),
+                    data_points=DataPoints(text=text_sources),
+                    should_stream=should_stream
+                )
+            else:
+                # No new query, yield revised answer
+                break
+
+        if answer_passed_eval:
+            answer_step.rewind()
+            yield answer_step
+        else:
+            next_answer = reflection.next_answer or next_answer
+            if next_answer:
+                yield StreamingThoughtStep(
+                    step=self.format_thought_step_for_chatcompletion(
+                        title="Using reflection revised answer",
+                        messages=answer_messages,
+                        overrides=overrides,
+                        model=self.chatgpt_model,
+                        deployment=self.chatgpt_deployment,
+                    ),
+                    completion=next_answer,
+                    data_points=DataPoints(text=text_sources),
+                    should_stream=False
+                )
+            else:
+                yield answer_step
+
+
+
+
diff --git a/app/backend/approaches/chatreadretrievereadvision.py b/app/backend/approaches/chatreadretrievereadvision.py
index b56d773a6f..3472bc6d3c 100644
--- a/app/backend/approaches/chatreadretrievereadvision.py
+++ b/app/backend/approaches/chatreadretrievereadvision.py
@@ -1,5 +1,5 @@
 from collections.abc import Awaitable
-from typing import Any, Callable, Optional, Union, cast
+from typing import Any, Callable, Optional, Union, cast, AsyncGenerator
 
 from azure.search.documents.aio import SearchClient
 from azure.storage.blob.aio import ContainerClient
@@ -11,8 +11,8 @@
     ChatCompletionToolParam,
 )
 
-from approaches.approach import DataPoints, ExtraInfo, ThoughtStep
-from approaches.chatapproach import ChatApproach
+from approaches.approach import DataPoints, ThoughtStep
+from approaches.chatapproach import ChatApproach, StreamingThoughtStep
 from approaches.promptmanager import PromptManager
 from core.authentication import AuthenticationHelper
 from core.imageshelper import fetch_image
@@ -77,7 +77,7 @@ async def run_until_final_call(
         overrides: dict[str, Any],
         auth_claims: dict[str, Any],
         should_stream: bool = False,
-    ) -> tuple[ExtraInfo, Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]]:
+    ) -> AsyncGenerator[StreamingThoughtStep, None]:
         seed = overrides.get("seed", None)
         use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
         use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
@@ -104,6 +104,19 @@ async def run_until_final_call(
         tools: list[ChatCompletionToolParam] = self.query_rewrite_tools
 
         # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
+        yield StreamingThoughtStep(
+            step=ThoughtStep(
+                "Prompt to generate search query",
+                query_messages,
+                (
+                    {"model": self.chatgpt_model, "deployment": self.chatgpt_deployment}
+                    if self.chatgpt_deployment
+                    else {"model": self.chatgpt_model}
+                ),
+            ),
+            role="tool"
+        )
+
         chat_completion: ChatCompletion = await self.openai_client.chat.completions.create(
             messages=query_messages,
             # Azure OpenAI takes the deployment name as the model name
@@ -144,6 +157,14 @@ async def run_until_final_call(
             use_query_rewriting,
         )
 
+        yield StreamingThoughtStep(
+            step=ThoughtStep(
+                "Search results",
+                [result.serialize_for_results() for result in results]
+            ),
+            role="tool"
+        )
+
         # STEP 3: Generate a contextual and content specific answer using the search results and chat history
         text_sources = []
         image_sources = []
@@ -167,50 +188,17 @@ async def run_until_final_call(
             },
         )
 
-        extra_info = ExtraInfo(
-            DataPoints(text=text_sources, images=image_sources),
-            [
-                ThoughtStep(
-                    "Prompt to generate search query",
-                    query_messages,
-                    (
-                        {"model": self.chatgpt_model, "deployment": self.chatgpt_deployment}
-                        if self.chatgpt_deployment
-                        else {"model": self.chatgpt_model}
-                    ),
-                ),
-                ThoughtStep(
-                    "Search using generated search query",
-                    query_text,
-                    {
-                        "use_semantic_captions": use_semantic_captions,
-                        "use_semantic_ranker": use_semantic_ranker,
-                        "use_query_rewriting": use_query_rewriting,
-                        "top": top,
-                        "filter": filter,
-                        "vector_fields": vector_fields,
-                        "use_text_search": use_text_search,
-                    },
-                ),
-                ThoughtStep(
-                    "Search results",
-                    [result.serialize_for_results() for result in results],
-                ),
-                ThoughtStep(
-                    "Prompt to generate answer",
-                    messages,
-                    (
-                        {"model": self.gpt4v_model, "deployment": self.gpt4v_deployment}
-                        if self.gpt4v_deployment
-                        else {"model": self.gpt4v_model}
-                    ),
-                ),
-            ],
-        )
-
-        chat_coroutine = cast(
-            Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]],
-            self.openai_client.chat.completions.create(
+        yield StreamingThoughtStep(
+            step=ThoughtStep(
+                "Prompt to generate answer",
+                messages,
+                (
+                    {"model": self.gpt4v_model, "deployment": self.gpt4v_deployment}
+                    if self.gpt4v_deployment
+                    else {"model": self.gpt4v_model}
+                )
+            ),
+            chat_completion=self.openai_client.chat.completions.create(
                 model=self.gpt4v_deployment if self.gpt4v_deployment else self.gpt4v_model,
                 messages=messages,
                 temperature=overrides.get("temperature", 0.3),
@@ -219,5 +207,6 @@ async def run_until_final_call(
                 stream=should_stream,
                 seed=seed,
             ),
+            data_points=DataPoints(text=text_sources, images=image_sources),
+            should_stream=should_stream
         )
-        return (extra_info, chat_coroutine)
diff --git a/app/backend/approaches/prompts/chat_answer_question.prompty b/app/backend/approaches/prompts/chat_answer_question.prompty
index 3dcb05ae21..91a7733a6e 100644
--- a/app/backend/approaches/prompts/chat_answer_question.prompty
+++ b/app/backend/approaches/prompts/chat_answer_question.prompty
@@ -37,6 +37,28 @@ Do not repeat questions that have already been asked.
 Make sure the last question ends with ">>".
 {% endif %}
 
+{% if previous_answer %}
+You've previously attempted to answer this question and it has been evalauted that the previous answer was not sufficient
+Previous answer:
+{{ previous_answer }}
+
+Why was the previous answer insufficient?
+{% for evaluation in previous_answer_evaluations %}
+Evaluation: {{ evaluation["label"] }}
+Score: {{ evaluation["score"] }}
+Explanation: {{ evaluation["explanation"] }}
+{% endfor %}
+
+{% if revised_answer %}
+The previous answer was revised to improve the quality of the response. Use this to help generate a better answer:
+Revised answer:
+{{ revised_answer }}
+
+{% endif %}
+
+Use this information to improve the answer next time
+{% endif %}
+
 {% for message in past_messages %}
 {{ message["role"] }}:
 {{ message["content"] }}
diff --git a/app/backend/approaches/prompts/chat_reflect_answer.prompty b/app/backend/approaches/prompts/chat_reflect_answer.prompty
new file mode 100644
index 0000000000..8f78f3d18e
--- /dev/null
+++ b/app/backend/approaches/prompts/chat_reflect_answer.prompty
@@ -0,0 +1,72 @@
+---
+name: Chat
+description: Answer a question (with chat history) using solely text sources.
+model:
+    api: chat
+---
+system:
+You are an expert in evaluating the quality of a RESPONSE from an intelligent system based on three communication traits: Relevance, Groundedness, and Correctness. Your job is to assign each trait a score from 1 to 5 using the definitions below.  
+  
+# Definitions  
+  
+## Relevance    
+1 - Irrelevant Response: Unrelated to the question.    
+2 - Incorrect Response: Attempts to answer but gives wrong info.    
+3 - Incomplete Response: Addresses the question but omits key details.    
+4 - Complete Response: Fully addresses the question with accurate, essential details.    
+5 - Comprehensive Response with Insights: Fully accurate and adds relevant insights or implications.  
+  
+## Groundedness    
+1 - Completely Unrelated Response: No relation to context or question.    
+2 - Related Topic but Does Not Respond: Mentions context topic but fails to answer.    
+3 - Attempts to Respond but Contains Incorrect Info: Tries to answer but misstates facts.    
+4 - Partially Correct Response: Correct but omits specific context details.    
+5 - Fully Correct and Complete Response: Thoroughly accurate and includes all context details.  
+  
+## Correctness    
+1 - Completely Incorrect: Contains no correct or relevant facts.    
+2 - Mostly Incorrect: Major factual or logical errors overshadow any correct parts.    
+3 - Partially Correct: Some facts are right but others are wrong or misleading.    
+4 - Mostly Correct: Largely accurate with only minor inaccuracies.    
+5 - Fully Correct: Entirely accurate, fact-based, and logically consistent.  
+  
+# Tasks    
+For each trait—Relevance, Groundedness, Correctness—produce:    
+• ThoughtChain: start with “Let's think step by step:” and give a concise chain of reasoning.    
+• Explanation: a very short justification.    
+• Score: an integer from 1 to 5. A response like "I don't know" can never achieve a high score
+
+Based on your reflection, if it is necessary to search a knowledge base for any potentially missing context, propose it.
+Query generation guidelines:
+You have access to Azure AI Search index with 100's of documents.
+Generate a search query based on the conversation and the new question.
+Do not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.
+Do not include any text inside [] or <<>> in the search query terms.
+Do not include any special characters like '+'.
+If the question is not in English, translate the question to English before generating the search query.
+
+Based on your reflection, if it is necessary to adjust the final answer to improve the quality of the response, propose the adjusted answer
+If the answer cannot be improved, return an empty string.
+Answer generation guidelines:
+Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.
+Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
+If the question is not in English, answer in the language used in the question.
+Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].
+
+
+# Data    
+CONTEXT:
+{% for text_source in text_sources %}
+{{ text_source }}
+{% endfor %}
+QUERY: {{query}}    
+RESPONSE: {{response}}
+
+user:
+
+Conversation History:
+
+{% for message in past_messages %}
+{{ message["role"] }}:
+{{ message["content"] }}
+{% endfor %}
diff --git a/app/backend/approaches/prompts/chat_reflect_answer_tools.json b/app/backend/approaches/prompts/chat_reflect_answer_tools.json
new file mode 100644
index 0000000000..8a405fae11
--- /dev/null
+++ b/app/backend/approaches/prompts/chat_reflect_answer_tools.json
@@ -0,0 +1,83 @@
+[
+    {
+        "type": "function",
+        "function": {
+            "name": "reflect_answer",
+            "description": "Reflect on an answer to a question and decide on the next step based on the context of the conversation.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "relevance": {
+                        "type": "object",
+                        "description": "Relevance of the answer to the question asked.",
+                        "properties": {
+                            "thoughtChain": {
+                                "type": "string",
+                                "description": "A chain of thoughts that led to the conclusion about the relevance of the answer."
+                            },
+                            "explanation": {
+                                "type": "string",
+                                "description": "An explanation of why the answer is relevant or not relevant to the question asked."
+                            },
+                            "score": {
+                                "type": "number",
+                                "description": "A score indicating how relevant the answer is to the question asked, on a scale from 1 to 5."
+                            }
+                        }
+                    },
+                    "groundedness": {
+                        "type": "object",
+                        "description": "Groundedness of the answer based on the context for the question.",
+                        "properties": {
+                            "thoughtChain": {
+                                "type": "string",
+                                "description": "A chain of thoughts that led to the conclusion about the groundedness of the answer."
+                            },
+                            "explanation": {
+                                "type": "string",
+                                "description": "An explanation of why the answer is grounded based on the context."
+                            },
+                            "score": {
+                                "type": "number",
+                                "description": "A score indicating how grounded the answer is to the context given, on a scale from 1 to 5."
+                            }
+                        }
+                    },
+                    "correctness": {
+                        "type": "object",
+                        "description": "Correctness of the answer based on the context for the question.",
+                        "properties": {
+                            "thoughtChain": {
+                                "type": "string",
+                                "description": "A chain of thoughts that led to the conclusion about the correctness of the answer."
+                            },
+                            "explanation": {
+                                "type": "string",
+                                "description": "An explanation of why the answer is correct or not correct based on the context."
+                            },
+                            "score": {
+                                "type": "number",
+                                "description": "A score indicating how correct the answer is to the context given, on a scale from 1 to 5."
+                            }
+                        }
+                    },
+                    "next_query": {
+                        "type": "string",
+                        "description": "The query to search in the index if necessary. Output an empty string if it's not necessary"
+                    },
+                    "next_answer": {
+                        "type": "string",
+                        "description": "The answer to the question asked, which may be empty if no correction is necessary or a new one based on the context."
+                    }
+                },
+                "required": [
+                    "relevance",
+                    "groundedness",
+                    "correctness",
+                    "next_query",
+                    "next_answer"
+                ]
+            }
+        }
+    }
+]
\ No newline at end of file
diff --git a/app/backend/config.py b/app/backend/config.py
index 5f3354f2da..b190e11615 100644
--- a/app/backend/config.py
+++ b/app/backend/config.py
@@ -11,6 +11,7 @@
 CONFIG_GPT4V_DEPLOYED = "gpt4v_deployed"
 CONFIG_SEMANTIC_RANKER_DEPLOYED = "semantic_ranker_deployed"
 CONFIG_QUERY_REWRITING_ENABLED = "query_rewriting_enabled"
+CONFIG_REFLECTION_ENABLED = "reflection_enabled"
 CONFIG_REASONING_EFFORT_ENABLED = "reasoning_effort_enabled"
 CONFIG_VISION_REASONING_EFFORT_ENABLED = "vision_reasoning_effort_enabled"
 CONFIG_DEFAULT_REASONING_EFFORT = "default_reasoning_effort"
diff --git a/app/backend/error.py b/app/backend/error.py
index 0a21afe6b7..e761847e73 100644
--- a/app/backend/error.py
+++ b/app/backend/error.py
@@ -2,6 +2,7 @@
 
 from openai import APIError
 from quart import jsonify
+import traceback
 
 ERROR_MESSAGE = """The app encountered an error processing your request.
 If you are an administrator of the app, view the full error in the logs. See aka.ms/appservice-logs for more information.
diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts
index c915a19ee5..ef7847ea71 100644
--- a/app/frontend/src/api/models.ts
+++ b/app/frontend/src/api/models.ts
@@ -21,6 +21,7 @@ export type ChatAppRequestOverrides = {
     semantic_ranker?: boolean;
     semantic_captions?: boolean;
     query_rewriting?: boolean;
+    reflection?: boolean;
     reasoning_effort?: string;
     include_category?: string;
     exclude_category?: string;
@@ -89,6 +90,7 @@ export type Config = {
     showSemanticRankerOption: boolean;
     showQueryRewritingOption: boolean;
     showReasoningEffortOption: boolean;
+    showReflectionOption: boolean;
     streamingEnabled: boolean;
     showVectorOption: boolean;
     showUserUpload: boolean;
diff --git a/app/frontend/src/components/AnalysisPanel/AnalysisPanel.module.css b/app/frontend/src/components/AnalysisPanel/AnalysisPanel.module.css
index 84b9f110ea..17ad5e751e 100644
--- a/app/frontend/src/components/AnalysisPanel/AnalysisPanel.module.css
+++ b/app/frontend/src/components/AnalysisPanel/AnalysisPanel.module.css
@@ -134,3 +134,30 @@
     background-color: #424242;
     color: #ffffff;
 }
+
+.evaluationContainer {
+    margin: 16px 0;
+    padding: 16px;
+    border: 1px solid #ddd;
+    border-radius: 8px;
+    background-color: #f9f9f9;
+}
+
+.evaluationLabel {
+    font-size: 18px;
+    font-weight: bold;
+    margin-bottom: 8px;
+    color: #333;
+}
+
+.evaluationList {
+    list-style-type: none;
+    padding: 0;
+    margin: 0;
+}
+
+.evaluationList li {
+    margin: 4px 0;
+    font-size: 14px;
+    color: #555;
+}
diff --git a/app/frontend/src/components/AnalysisPanel/CandidateAnswer.tsx b/app/frontend/src/components/AnalysisPanel/CandidateAnswer.tsx
new file mode 100644
index 0000000000..5d37fd649b
--- /dev/null
+++ b/app/frontend/src/components/AnalysisPanel/CandidateAnswer.tsx
@@ -0,0 +1,19 @@
+import React from "react";
+import styles from "./AnalysisPanel.module.css";
+
+interface CandidateAnswerProps {
+    candidate_answer: string | undefined;
+}
+
+export const CandidateAnswer: React.FC<CandidateAnswerProps> = ({ candidate_answer }) => {
+    return (
+        <div className={styles.evaluationContainer}>
+            <div className={styles.evaluationLabel}>Candidate Answer</div>
+            {candidate_answer ? (
+                <div className={styles.answerText}>{candidate_answer}</div>
+            ) : (
+                <div className={styles.answerText}>No candidate answer available</div>
+            )}
+        </div>
+    );
+};
diff --git a/app/frontend/src/components/AnalysisPanel/Evaluation.tsx b/app/frontend/src/components/AnalysisPanel/Evaluation.tsx
new file mode 100644
index 0000000000..6bc145f2c5
--- /dev/null
+++ b/app/frontend/src/components/AnalysisPanel/Evaluation.tsx
@@ -0,0 +1,24 @@
+import React from "react";
+import styles from "./AnalysisPanel.module.css";
+
+interface EvaluationProps {
+    label: string;
+    value: {
+        thought_chain: string;
+        score: number;
+        explanation: string;
+    };
+}
+
+export const Evaluation: React.FC<EvaluationProps> = ({ label, value }) => {
+    return (
+        <div className={styles.evaluationContainer}>
+            <div className={styles.evaluationLabel}>{label}</div>
+            <ul className={styles.evaluationList}>
+                <li>Thought Chain: {value.thought_chain}</li>
+                <li>Score: {value.score}</li>
+                <li>Explanation: {value.explanation}</li>
+            </ul>
+        </div>
+    );
+};
diff --git a/app/frontend/src/components/AnalysisPanel/Reflection.tsx b/app/frontend/src/components/AnalysisPanel/Reflection.tsx
new file mode 100644
index 0000000000..3f057298d7
--- /dev/null
+++ b/app/frontend/src/components/AnalysisPanel/Reflection.tsx
@@ -0,0 +1,19 @@
+import React from "react";
+import styles from "./AnalysisPanel.module.css";
+
+interface ReflectionProps {
+    next_answer: string | undefined;
+    next_query: string | undefined;
+}
+
+export const Reflection: React.FC<ReflectionProps> = ({ next_answer, next_query }) => {
+    return (
+        <div className={styles.evaluationContainer}>
+            <div className={styles.evaluationLabel}>Next Steps</div>
+            <ul className={styles.evaluationList}>
+                <li>Next Query: {next_query}</li>
+                <li>Revised Answer: {next_answer}</li>
+            </ul>
+        </div>
+    );
+};
diff --git a/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx b/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx
index f666960da1..b0e7517839 100644
--- a/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx
+++ b/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx
@@ -7,6 +7,9 @@ import styles from "./AnalysisPanel.module.css";
 
 import { Thoughts } from "../../api";
 import { TokenUsageGraph } from "./TokenUsageGraph";
+import { Evaluation } from "./Evaluation";
+import { Reflection } from "./Reflection";
+import { CandidateAnswer } from "./CandidateAnswer";
 
 SyntaxHighlighter.registerLanguage("json", json);
 
@@ -14,6 +17,8 @@ interface Props {
     thoughts: Thoughts[];
 }
 
+const known_keys = ["token_usage", "reasoning_effort", "groundedness", "relevance", "correctness", "next_query", "next_answer", "candidate_answer"];
+
 export const ThoughtProcess = ({ thoughts }: Props) => {
     return (
         <ul className={styles.tList}>
@@ -23,13 +28,14 @@ export const ThoughtProcess = ({ thoughts }: Props) => {
                         <div className={styles.tStep}>{t.title}</div>
                         <Stack horizontal tokens={{ childrenGap: 5 }}>
                             {t.props &&
-                                (Object.keys(t.props).filter(k => k !== "token_usage") || []).map((k: any) => (
+                                (Object.keys(t.props).filter(k => !known_keys.includes(k)) || []).map((k: any) => (
                                     <span className={styles.tProp} key={k}>
                                         {k}: {JSON.stringify(t.props?.[k])}
                                     </span>
                                 ))}
                         </Stack>
                         {t.props?.token_usage && <TokenUsageGraph tokenUsage={t.props.token_usage} reasoningEffort={t.props.reasoning_effort} />}
+
                         {Array.isArray(t.description) ? (
                             <SyntaxHighlighter language="json" wrapLongLines className={styles.tCodeBlock} style={a11yLight}>
                                 {JSON.stringify(t.description, null, 2)}
@@ -37,6 +43,12 @@ export const ThoughtProcess = ({ thoughts }: Props) => {
                         ) : (
                             <div>{t.description}</div>
                         )}
+
+                        {t.props?.groundedness && <Evaluation label="Groundedness" value={t.props.groundedness} />}
+                        {t.props?.relevance && <Evaluation label="Relevance" value={t.props.relevance} />}
+                        {t.props?.correctness && <Evaluation label="Correctness" value={t.props.correctness} />}
+                        {(t.props?.next_query || t.props?.next_answer) && <Reflection next_query={t.props.next_query} next_answer={t.props.next_answer} />}
+                        {t.props?.candidate_answer && <CandidateAnswer candidate_answer={t.props.candidate_answer} />}
                     </li>
                 );
             })}
diff --git a/app/frontend/src/components/Settings/Settings.tsx b/app/frontend/src/components/Settings/Settings.tsx
index b16beb0246..a06469d985 100644
--- a/app/frontend/src/components/Settings/Settings.tsx
+++ b/app/frontend/src/components/Settings/Settings.tsx
@@ -20,6 +20,7 @@ export interface SettingsProps {
     useSemanticRanker: boolean;
     useSemanticCaptions: boolean;
     useQueryRewriting: boolean;
+    useReflection: boolean;
     reasoningEffort: string;
     excludeCategory: string;
     includeCategory: string;
@@ -30,6 +31,7 @@ export interface SettingsProps {
     showSemanticRankerOption: boolean;
     showQueryRewritingOption: boolean;
     showReasoningEffortOption: boolean;
+    showReflectionOption: boolean;
     showGPT4VOptions: boolean;
     showVectorOption: boolean;
     useOidSecurityFilter: boolean;
@@ -57,6 +59,7 @@ export const Settings = ({
     useSemanticRanker,
     useSemanticCaptions,
     useQueryRewriting,
+    useReflection,
     reasoningEffort,
     excludeCategory,
     includeCategory,
@@ -67,6 +70,7 @@ export const Settings = ({
     showSemanticRankerOption,
     showQueryRewritingOption,
     showReasoningEffortOption,
+    showReflectionOption,
     showGPT4VOptions,
     showVectorOption,
     useOidSecurityFilter,
@@ -106,6 +110,7 @@ export const Settings = ({
     const semanticRankerFieldId = useId("semanticRankerField");
     const queryRewritingFieldId = useId("queryRewritingField");
     const reasoningEffortFieldId = useId("reasoningEffortField");
+    const reflectionField = useId("reflectionField");
     const semanticCaptionsId = useId("semanticCaptions");
     const semanticCaptionsFieldId = useId("semanticCaptionsField");
     const useOidSecurityFilterId = useId("useOidSecurityFilter");
@@ -266,6 +271,20 @@ export const Settings = ({
                 </>
             )}
 
+            {showReflectionOption && (
+                <>
+                    <Checkbox
+                        id={reasoningEffortFieldId}
+                        className={styles.settingsSeparator}
+                        checked={useReflection}
+                        label={t("labels.useReflection")}
+                        onChange={(_ev, checked) => onChange("useReflection", !!checked)}
+                        aria-labelledby={reasoningEffortFieldId}
+                        onRenderLabel={props => renderLabel(props, reasoningEffortFieldId, reasoningEffortFieldId, t("helpTexts.useReflection"))}
+                    />
+                </>
+            )}
+
             {showReasoningEffortOption && (
                 <Dropdown
                     id={reasoningEffortFieldId}
diff --git a/app/frontend/src/locales/en/translation.json b/app/frontend/src/locales/en/translation.json
index 03ee719042..6b90f01f70 100644
--- a/app/frontend/src/locales/en/translation.json
+++ b/app/frontend/src/locales/en/translation.json
@@ -92,6 +92,7 @@
         "useSemanticCaptions": "Use semantic captions",
         "useQueryRewriting": "Use query rewriting for retrieval",
         "reasoningEffort": "Reasoning effort",
+        "useReflection": "Use reflection",
         "reasoningEffortOptions": {
             "low": "Low",
             "medium": "Medium",
@@ -150,6 +151,8 @@
             "Enables Azure AI Search query rewriting, a process that modifies the user's query to improve search results. Requires semantic ranker to be enabled.",
         "reasoningEffort":
             "Sets the reasoning effort for the LLM. Higher values result in more reasoning, but may take longer to generate a response. The default is medium.",
+        "useReflection":
+            "Enables the LLM to reflect on the answer it generated and provide a thought process, explanation, and score for the relevance, groundedness, and correctness of the answer.",
         "useSemanticCaptions":
              "Sends semantic captions to the LLM instead of the full search result. A semantic caption is extracted from a search result during the process of semantic ranking.",
         "suggestFollowupQuestions": "Asks the LLM to suggest follow-up questions based on the user's query.",
diff --git a/app/frontend/src/pages/ask/Ask.tsx b/app/frontend/src/pages/ask/Ask.tsx
index 8e38076adb..649ab40afa 100644
--- a/app/frontend/src/pages/ask/Ask.tsx
+++ b/app/frontend/src/pages/ask/Ask.tsx
@@ -33,6 +33,7 @@ export function Component(): JSX.Element {
     const [useSemanticRanker, setUseSemanticRanker] = useState<boolean>(true);
     const [useSemanticCaptions, setUseSemanticCaptions] = useState<boolean>(false);
     const [useQueryRewriting, setUseQueryRewriting] = useState<boolean>(false);
+    const [useReflection, setUseReflection] = useState<boolean>(false);
     const [reasoningEffort, setReasoningEffort] = useState<string>("");
     const [useGPT4V, setUseGPT4V] = useState<boolean>(false);
     const [gpt4vInput, setGPT4VInput] = useState<GPT4VInput>(GPT4VInput.TextAndImages);
@@ -45,6 +46,7 @@ export function Component(): JSX.Element {
     const [showGPT4VOptions, setShowGPT4VOptions] = useState<boolean>(false);
     const [showSemanticRankerOption, setShowSemanticRankerOption] = useState<boolean>(false);
     const [showQueryRewritingOption, setShowQueryRewritingOption] = useState<boolean>(false);
+    const [showReflectionOption, setShowReflectionOption] = useState<boolean>(false);
     const [showReasoningEffortOption, setShowReasoningEffortOption] = useState<boolean>(false);
     const [showVectorOption, setShowVectorOption] = useState<boolean>(false);
     const [showUserUpload, setShowUserUpload] = useState<boolean>(false);
@@ -84,6 +86,8 @@ export function Component(): JSX.Element {
             setShowSemanticRankerOption(config.showSemanticRankerOption);
             setUseQueryRewriting(config.showQueryRewritingOption);
             setShowQueryRewritingOption(config.showQueryRewritingOption);
+            setUseReflection(config.showReflectionOption);
+            setShowReflectionOption(config.showReflectionOption);
             setShowReasoningEffortOption(config.showReasoningEffortOption);
             if (config.showReasoningEffortOption) {
                 setReasoningEffort(config.defaultReasoningEffort);
@@ -195,6 +199,9 @@ export function Component(): JSX.Element {
             case "useQueryRewriting":
                 setUseQueryRewriting(value);
                 break;
+            case "useReflection":
+                setUseReflection(value);
+                break;
             case "reasoningEffort":
                 setReasoningEffort(value);
                 break;
@@ -340,6 +347,7 @@ export function Component(): JSX.Element {
                     useSemanticRanker={useSemanticRanker}
                     useSemanticCaptions={useSemanticCaptions}
                     useQueryRewriting={useQueryRewriting}
+                    useReflection={useReflection}
                     reasoningEffort={reasoningEffort}
                     excludeCategory={excludeCategory}
                     includeCategory={includeCategory}
@@ -350,6 +358,7 @@ export function Component(): JSX.Element {
                     showSemanticRankerOption={showSemanticRankerOption}
                     showQueryRewritingOption={showQueryRewritingOption}
                     showReasoningEffortOption={showReasoningEffortOption}
+                    showReflectionOption={showReflectionOption}
                     showGPT4VOptions={showGPT4VOptions}
                     showVectorOption={showVectorOption}
                     useOidSecurityFilter={useOidSecurityFilter}
diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx
index 5d00c2c914..09284a723a 100644
--- a/app/frontend/src/pages/chat/Chat.tsx
+++ b/app/frontend/src/pages/chat/Chat.tsx
@@ -49,6 +49,7 @@ const Chat = () => {
     const [retrievalMode, setRetrievalMode] = useState<RetrievalMode>(RetrievalMode.Hybrid);
     const [useSemanticRanker, setUseSemanticRanker] = useState<boolean>(true);
     const [useQueryRewriting, setUseQueryRewriting] = useState<boolean>(false);
+    const [useReflection, setUseReflection] = useState<boolean>(false);
     const [reasoningEffort, setReasoningEffort] = useState<string>("");
     const [streamingEnabled, setStreamingEnabled] = useState<boolean>(true);
     const [shouldStream, setShouldStream] = useState<boolean>(true);
@@ -80,6 +81,7 @@ const Chat = () => {
     const [showGPT4VOptions, setShowGPT4VOptions] = useState<boolean>(false);
     const [showSemanticRankerOption, setShowSemanticRankerOption] = useState<boolean>(false);
     const [showQueryRewritingOption, setShowQueryRewritingOption] = useState<boolean>(false);
+    const [showReflectionOption, setShowReflectionOption] = useState<boolean>(false);
     const [showReasoningEffortOption, setShowReasoningEffortOption] = useState<boolean>(false);
     const [showVectorOption, setShowVectorOption] = useState<boolean>(false);
     const [showUserUpload, setShowUserUpload] = useState<boolean>(false);
@@ -107,6 +109,8 @@ const Chat = () => {
             setShowSemanticRankerOption(config.showSemanticRankerOption);
             setUseQueryRewriting(config.showQueryRewritingOption);
             setShowQueryRewritingOption(config.showQueryRewritingOption);
+            setUseReflection(config.showReflectionOption);
+            setShowReflectionOption(config.showReflectionOption);
             setShowReasoningEffortOption(config.showReasoningEffortOption);
             setStreamingEnabled(config.streamingEnabled);
             if (!config.streamingEnabled) {
@@ -133,15 +137,12 @@ const Chat = () => {
         let answer: string = "";
         let askResponse: ChatAppResponse = {} as ChatAppResponse;
 
-        const updateState = (newContent: string) => {
+        const updateState = (newContent: string, role: string) => {
             return new Promise(resolve => {
                 setTimeout(() => {
                     answer += newContent;
-                    const latestResponse: ChatAppResponse = {
-                        ...askResponse,
-                        message: { content: answer, role: askResponse.message.role }
-                    };
-                    setStreamedAnswers([...answers, [question, latestResponse]]);
+                    askResponse.message = { content: answer, role: role };
+                    setStreamedAnswers([...answers, [question, { ...askResponse }]]);
                     resolve(null);
                 }, 33);
             });
@@ -149,12 +150,9 @@ const Chat = () => {
         try {
             setIsStreaming(true);
             for await (const event of readNDJSONStream(responseBody)) {
-                if (event["context"] && event["context"]["data_points"]) {
-                    event["message"] = event["delta"];
-                    askResponse = event as ChatAppResponse;
-                } else if (event["delta"] && event["delta"]["content"]) {
+                if (event["delta"] && event["delta"]["content"]) {
                     setIsLoading(false);
-                    await updateState(event["delta"]["content"]);
+                    await updateState(event["delta"]["content"], event["delta"]["role"]);
                 } else if (event["context"]) {
                     // Update context with new keys from latest event
                     askResponse.context = { ...askResponse.context, ...event["context"] };
@@ -165,11 +163,7 @@ const Chat = () => {
         } finally {
             setIsStreaming(false);
         }
-        const fullResponse: ChatAppResponse = {
-            ...askResponse,
-            message: { content: answer, role: askResponse.message.role }
-        };
-        return fullResponse;
+        return askResponse;
     };
 
     const client = useLogin ? useMsal().instance : undefined;
@@ -213,6 +207,7 @@ const Chat = () => {
                         semantic_ranker: useSemanticRanker,
                         semantic_captions: useSemanticCaptions,
                         query_rewriting: useQueryRewriting,
+                        reflection: useReflection,
                         reasoning_effort: reasoningEffort,
                         suggest_followup_questions: useSuggestFollowupQuestions,
                         use_oid_security_filter: useOidSecurityFilter,
@@ -308,6 +303,9 @@ const Chat = () => {
             case "reasoningEffort":
                 setReasoningEffort(value);
                 break;
+            case "useReflection":
+                setUseReflection(value);
+                break;
             case "useSemanticCaptions":
                 setUseSemanticCaptions(value);
                 break;
@@ -524,6 +522,7 @@ const Chat = () => {
                         useSemanticRanker={useSemanticRanker}
                         useSemanticCaptions={useSemanticCaptions}
                         useQueryRewriting={useQueryRewriting}
+                        useReflection={useReflection}
                         reasoningEffort={reasoningEffort}
                         excludeCategory={excludeCategory}
                         includeCategory={includeCategory}
@@ -533,6 +532,7 @@ const Chat = () => {
                         vectorFieldList={vectorFieldList}
                         showSemanticRankerOption={showSemanticRankerOption}
                         showQueryRewritingOption={showQueryRewritingOption}
+                        showReflectionOption={showReflectionOption}
                         showReasoningEffortOption={showReasoningEffortOption}
                         showGPT4VOptions={showGPT4VOptions}
                         showVectorOption={showVectorOption}