Skip to content

Matt/reflection #2510

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions app/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from collections.abc import AsyncGenerator
from pathlib import Path
from typing import Any, Union, cast
import traceback

from azure.cognitiveservices.speech import (
ResultReason,
Expand Down Expand Up @@ -72,6 +73,7 @@
CONFIG_LANGUAGE_PICKER_ENABLED,
CONFIG_OPENAI_CLIENT,
CONFIG_QUERY_REWRITING_ENABLED,
CONFIG_REFLECTION_ENABLED,
CONFIG_REASONING_EFFORT_ENABLED,
CONFIG_SEARCH_CLIENT,
CONFIG_SEMANTIC_RANKER_DEPLOYED,
Expand Down Expand Up @@ -188,10 +190,11 @@ async def ask(auth_claims: dict[str, Any]):
approach = cast(Approach, current_app.config[CONFIG_ASK_VISION_APPROACH])
else:
approach = cast(Approach, current_app.config[CONFIG_ASK_APPROACH])
r = await approach.run(
result = await approach.run(
request_json["messages"], context=context, session_state=request_json.get("session_state")
)
return jsonify(r)
results = [r async for r in result]
return jsonify({"value": results})
except Exception as error:
return error_response(error, "/ask")

Expand All @@ -208,6 +211,7 @@ async def format_as_ndjson(r: AsyncGenerator[dict, None]) -> AsyncGenerator[str,
async for event in r:
yield json.dumps(event, ensure_ascii=False, cls=JSONEncoder) + "\n"
except Exception as error:
traceback.print_exc()
logging.exception("Exception while generating response stream: %s", error)
yield json.dumps(error_dict(error))

Expand Down Expand Up @@ -241,7 +245,8 @@ async def chat(auth_claims: dict[str, Any]):
context=context,
session_state=session_state,
)
return jsonify(result)
results = [r async for r in result]
return jsonify({"value": results})
except Exception as error:
return error_response(error, "/chat")

Expand Down Expand Up @@ -297,6 +302,7 @@ def config():
"showGPT4VOptions": current_app.config[CONFIG_GPT4V_DEPLOYED],
"showSemanticRankerOption": current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED],
"showQueryRewritingOption": current_app.config[CONFIG_QUERY_REWRITING_ENABLED],
"showReflectionOption": current_app.config[CONFIG_REFLECTION_ENABLED],
"showReasoningEffortOption": current_app.config[CONFIG_REASONING_EFFORT_ENABLED],
"streamingEnabled": current_app.config[CONFIG_STREAMING_ENABLED],
"defaultReasoningEffort": current_app.config[CONFIG_DEFAULT_REASONING_EFFORT],
Expand Down Expand Up @@ -428,6 +434,7 @@ async def setup_clients():
# Shared by all OpenAI deployments
OPENAI_HOST = os.getenv("OPENAI_HOST", "azure")
OPENAI_CHATGPT_MODEL = os.environ["AZURE_OPENAI_CHATGPT_MODEL"]
OPENAI_CHATGPT_REFLECTION_MODEL = os.environ.get("AZURE_OPENAI_CHATGPT_REFLECTION_MODEL")
OPENAI_EMB_MODEL = os.getenv("AZURE_OPENAI_EMB_MODEL_NAME", "text-embedding-ada-002")
OPENAI_EMB_DIMENSIONS = int(os.getenv("AZURE_OPENAI_EMB_DIMENSIONS") or 1536)
OPENAI_REASONING_EFFORT = os.getenv("AZURE_OPENAI_REASONING_EFFORT")
Expand All @@ -438,6 +445,9 @@ async def setup_clients():
AZURE_OPENAI_CHATGPT_DEPLOYMENT = (
os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT") if OPENAI_HOST.startswith("azure") else None
)
AZURE_OPENAI_CHATGPT_REFLECTION_DEPLOYMENT = (
os.getenv("AZURE_OPENAI_CHATGPT_REFLECTION_DEPLOYMENT") if OPENAI_HOST.startswith("azure") else None
)
AZURE_OPENAI_EMB_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT") if OPENAI_HOST.startswith("azure") else None
AZURE_OPENAI_CUSTOM_URL = os.getenv("AZURE_OPENAI_CUSTOM_URL")
# https://learn.microsoft.com/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release
Expand Down Expand Up @@ -471,6 +481,7 @@ async def setup_clients():

USE_GPT4V = os.getenv("USE_GPT4V", "").lower() == "true"
USE_USER_UPLOAD = os.getenv("USE_USER_UPLOAD", "").lower() == "true"
USE_REFLECTION = os.getenv("USE_REFLECTION", "").lower() == "true"
ENABLE_LANGUAGE_PICKER = os.getenv("ENABLE_LANGUAGE_PICKER", "").lower() == "true"
USE_SPEECH_INPUT_BROWSER = os.getenv("USE_SPEECH_INPUT_BROWSER", "").lower() == "true"
USE_SPEECH_OUTPUT_BROWSER = os.getenv("USE_SPEECH_OUTPUT_BROWSER", "").lower() == "true"
Expand Down Expand Up @@ -655,6 +666,7 @@ async def setup_clients():
or OPENAI_CHATGPT_MODEL not in Approach.GPT_REASONING_MODELS
or Approach.GPT_REASONING_MODELS[OPENAI_CHATGPT_MODEL].streaming
)
current_app.config[CONFIG_REFLECTION_ENABLED] = USE_REFLECTION
current_app.config[CONFIG_VECTOR_SEARCH_ENABLED] = os.getenv("USE_VECTORS", "").lower() != "false"
current_app.config[CONFIG_USER_UPLOAD_ENABLED] = bool(USE_USER_UPLOAD)
current_app.config[CONFIG_LANGUAGE_PICKER_ENABLED] = ENABLE_LANGUAGE_PICKER
Expand Down Expand Up @@ -692,6 +704,8 @@ async def setup_clients():
auth_helper=auth_helper,
chatgpt_model=OPENAI_CHATGPT_MODEL,
chatgpt_deployment=AZURE_OPENAI_CHATGPT_DEPLOYMENT,
chatgpt_reflection_model=OPENAI_CHATGPT_REFLECTION_MODEL,
chatgpt_reflection_deployment=AZURE_OPENAI_CHATGPT_REFLECTION_DEPLOYMENT,
embedding_model=OPENAI_EMB_MODEL,
embedding_deployment=AZURE_OPENAI_EMB_DEPLOYMENT,
embedding_dimensions=OPENAI_EMB_DIMENSIONS,
Expand Down Expand Up @@ -790,12 +804,12 @@ def create_app():

# Log levels should be one of https://docs.python.org/3/library/logging.html#logging-levels
# Set root level to WARNING to avoid seeing overly verbose logs from SDKS
logging.basicConfig(level=logging.WARNING)
logging.basicConfig(level=logging.INFO)
# Set our own logger levels to INFO by default
app_level = os.getenv("APP_LOG_LEVEL", "INFO")
app_level = os.getenv("APP_LOG_LEVEL", "DEBUG")
app.logger.setLevel(os.getenv("APP_LOG_LEVEL", app_level))
app.logger.setLevel("DEBUG")
logging.getLogger("scripts").setLevel(app_level)

if allowed_origin := os.getenv("ALLOWED_ORIGIN"):
allowed_origins = allowed_origin.split(";")
if len(allowed_origins) > 0:
Expand Down
26 changes: 16 additions & 10 deletions app/backend/approaches/approach.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from abc import ABC
from collections.abc import AsyncGenerator, Awaitable
from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import (
Any,
Callable,
Expand All @@ -28,6 +28,7 @@
ChatCompletionMessageParam,
ChatCompletionReasoningEffort,
ChatCompletionToolParam,
ChatCompletionNamedToolChoiceParam
)

from approaches.promptmanager import PromptManager
Expand Down Expand Up @@ -88,18 +89,15 @@ def trim_embedding(cls, embedding: Optional[list[float]]) -> Optional[str]:

return None


@dataclass
class ThoughtStep:
title: str
description: Optional[Any]
props: Optional[dict[str, Any]] = None

def update_token_usage(self, usage: CompletionUsage) -> None:
if self.props:
self.props["token_usage"] = TokenUsageProps.from_completion_usage(usage)


@dataclass
class DataPoints:
text: Optional[list[str]] = None
Expand All @@ -108,11 +106,10 @@ class DataPoints:

@dataclass
class ExtraInfo:
data_points: DataPoints
thoughts: Optional[list[ThoughtStep]] = None
data_points: DataPoints = None
thoughts: list[ThoughtStep] = field(default_factory=list)
followup_questions: Optional[list[Any]] = None


@dataclass
class TokenUsageProps:
prompt_tokens: int
Expand Down Expand Up @@ -270,17 +267,21 @@ def nonewlines(s: str) -> str:
return s.replace("\n", " ").replace("\r", " ")

if use_semantic_captions:
return [
results = [
(self.get_citation((doc.sourcepage or ""), use_image_citation))
+ ": "
+ nonewlines(" . ".join([cast(str, c.text) for c in (doc.captions or [])]))
for doc in results
]
else:
return [
results = [
(self.get_citation((doc.sourcepage or ""), use_image_citation)) + ": " + nonewlines(doc.content or "")
for doc in results
]

# Remove duplicates
results = list(set(results))
return results

def get_citation(self, sourcepage: str, use_image_citation: bool) -> str:
if use_image_citation:
Expand Down Expand Up @@ -356,6 +357,7 @@ def create_chat_completion(
response_token_limit: int,
should_stream: bool = False,
tools: Optional[list[ChatCompletionToolParam]] = None,
tool_choice: Optional[ChatCompletionNamedToolChoiceParam] = None,
temperature: Optional[float] = None,
n: Optional[int] = None,
reasoning_effort: Optional[ChatCompletionReasoningEffort] = None,
Expand Down Expand Up @@ -384,6 +386,7 @@ def create_chat_completion(
params["stream_options"] = {"include_usage": True}

params["tools"] = tools
params["tool_choice"] = tool_choice

# Azure OpenAI takes the deployment name as the model name
return self.openai_client.chat.completions.create(
Expand All @@ -403,6 +406,7 @@ def format_thought_step_for_chatcompletion(
deployment: Optional[str],
usage: Optional[CompletionUsage] = None,
reasoning_effort: Optional[ChatCompletionReasoningEffort] = None,
additional_properties: Optional[dict[str, Any]] = None,
) -> ThoughtStep:
properties: dict[str, Any] = {"model": model}
if deployment:
Expand All @@ -414,14 +418,16 @@ def format_thought_step_for_chatcompletion(
)
if usage:
properties["token_usage"] = TokenUsageProps.from_completion_usage(usage)
if additional_properties:
properties.update(additional_properties)
return ThoughtStep(title, messages, properties)

async def run(
self,
messages: list[ChatCompletionMessageParam],
session_state: Any = None,
context: dict[str, Any] = {},
) -> dict[str, Any]:
) -> AsyncGenerator[dict[str, Any], None]:
raise NotImplementedError

async def run_stream(
Expand Down
Loading
Loading