From 68d10b3cb2201e84ce0128e3ddb57b571e6b84bd Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Tue, 12 Sep 2023 09:25:08 -0700
Subject: [PATCH 01/18] add prepdocs.py

---
 .../copilot/data/dataprep/prepdocs.py         | 371 ++++++++++++++++++
 1 file changed, 371 insertions(+)
 create mode 100644 scenarios/incubations/copilot/data/dataprep/prepdocs.py
diff --git a/scenarios/incubations/copilot/data/dataprep/prepdocs.py b/scenarios/incubations/copilot/data/dataprep/prepdocs.py
new file mode 100644
index 000000000..9fb853235
--- /dev/null
+++ b/scenarios/incubations/copilot/data/dataprep/prepdocs.py
@@ -0,0 +1,371 @@
+import argparse
+import base64
+import glob
+import html
+import io
+import os
+import re
+import time
+
+import openai
+from azure.ai.formrecognizer import DocumentAnalysisClient
+from azure.core.credentials import AzureKeyCredential
+from azure.identity import AzureDeveloperCliCredential
+from azure.search.documents import SearchClient
+from azure.search.documents.indexes import SearchIndexClient
+from azure.search.documents.indexes.models import *
+from azure.storage.blob import BlobServiceClient
+from pypdf import PdfReader, PdfWriter
+from tenacity import retry, stop_after_attempt, wait_random_exponential
+MAX_SECTION_LENGTH = 1000
+SENTENCE_SEARCH_LIMIT = 100
+SECTION_OVERLAP = 100
+
+def blob_name_from_file_page(filename, page = 0):
+    if os.path.splitext(filename)[1].lower() == ".pdf":
+        return os.path.splitext(os.path.basename(filename))[0] + f"-{page}" + ".pdf"
+    else:
+        return os.path.basename(filename)
+
+def upload_blobs(filename):
+    blob_service = BlobServiceClient(account_url=f"https://{args.storageaccount}.blob.core.windows.net", credential=storage_creds)
+    blob_container = blob_service.get_container_client(args.container)
+    if not blob_container.exists():
+        blob_container.create_container()
+
+    # if file is PDF split into pages and upload each page as a separate blob
+    if os.path.splitext(filename)[1].lower() == ".pdf":
+        reader = PdfReader(filename)
+        pages = reader.pages
+        for i in range(len(pages)):
+            blob_name = blob_name_from_file_page(filename, i)
+            if args.verbose: print(f"\tUploading blob for page {i} -> {blob_name}")
+            f = io.BytesIO()
+            writer = PdfWriter()
+            writer.add_page(pages[i])
+            writer.write(f)
+            f.seek(0)
+            blob_container.upload_blob(blob_name, f, overwrite=True)
+    else:
+        blob_name = blob_name_from_file_page(filename)
+        with open(filename,"rb") as data:
+            blob_container.upload_blob(blob_name, data, overwrite=True)
+
+def remove_blobs(filename):
+    if args.verbose: print(f"Removing blobs for '{filename or '<all>'}'")
+    blob_service = BlobServiceClient(account_url=f"https://{args.storageaccount}.blob.core.windows.net", credential=storage_creds)
+    blob_container = blob_service.get_container_client(args.container)
+    if blob_container.exists():
+        if filename == None:
+            blobs = blob_container.list_blob_names()
+        else:
+            prefix = os.path.splitext(os.path.basename(filename))[0]
+            blobs = filter(lambda b: re.match(f"{prefix}-\d+\.pdf", b), blob_container.list_blob_names(name_starts_with=os.path.splitext(os.path.basename(prefix))[0]))
+        for b in blobs:
+            if args.verbose: print(f"\tRemoving blob {b}")
+            blob_container.delete_blob(b)
+
+def table_to_html(table):
+    table_html = "<table>"
+    rows = [sorted([cell for cell in table.cells if cell.row_index == i], key=lambda cell: cell.column_index) for i in range(table.row_count)]
+    for row_cells in rows:
+        table_html += "<tr>"
+        for cell in row_cells:
+            tag = "th" if (cell.kind == "columnHeader" or cell.kind == "rowHeader") else "td"
+            cell_spans = ""
+            if cell.column_span > 1: cell_spans += f" colSpan={cell.column_span}"
+            if cell.row_span > 1: cell_spans += f" rowSpan={cell.row_span}"
+            table_html += f"<{tag}{cell_spans}>{html.escape(cell.content)}</{tag}>"
+        table_html +="</tr>"
+    table_html += "</table>"
+    return table_html
+
+def get_document_text(filename):
+    offset = 0
+    page_map = []
+    if args.localpdfparser:
+        reader = PdfReader(filename)
+        pages = reader.pages
+        for page_num, p in enumerate(pages):
+            page_text = p.extract_text()
+            page_map.append((page_num, offset, page_text))
+            offset += len(page_text)
+    else:
+        if args.verbose: print(f"Extracting text from '{filename}' using Azure Form Recognizer")
+        form_recognizer_client = DocumentAnalysisClient(endpoint=f"https://{args.formrecognizerservice}.cognitiveservices.azure.com/", credential=formrecognizer_creds, headers={"x-ms-useragent": "azure-search-chat-demo/1.0.0"})
+        with open(filename, "rb") as f:
+            poller = form_recognizer_client.begin_analyze_document("prebuilt-layout", document = f)
+        form_recognizer_results = poller.result()
+
+        for page_num, page in enumerate(form_recognizer_results.pages):
+            tables_on_page = [table for table in form_recognizer_results.tables if table.bounding_regions[0].page_number == page_num + 1]
+
+            # mark all positions of the table spans in the page
+            page_offset = page.spans[0].offset
+            page_length = page.spans[0].length
+            table_chars = [-1]*page_length
+            for table_id, table in enumerate(tables_on_page):
+                for span in table.spans:
+                    # replace all table spans with "table_id" in table_chars array
+                    for i in range(span.length):
+                        idx = span.offset - page_offset + i
+                        if idx >=0 and idx < page_length:
+                            table_chars[idx] = table_id
+
+            # build page text by replacing charcters in table spans with table html
+            page_text = ""
+            added_tables = set()
+            for idx, table_id in enumerate(table_chars):
+                if table_id == -1:
+                    page_text += form_recognizer_results.content[page_offset + idx]
+                elif not table_id in added_tables:
+                    page_text += table_to_html(tables_on_page[table_id])
+                    added_tables.add(table_id)
+
+            page_text += " "
+            page_map.append((page_num, offset, page_text))
+            offset += len(page_text)
+
+    return page_map
+
+def split_text(page_map):
+    SENTENCE_ENDINGS = [".", "!", "?"]
+    WORDS_BREAKS = [",", ";", ":", " ", "(", ")", "[", "]", "{", "}", "\t", "\n"]
+    if args.verbose: print(f"Splitting '{filename}' into sections")
+
+    def find_page(offset):
+        l = len(page_map)
+        for i in range(l - 1):
+            if offset >= page_map[i][1] and offset < page_map[i + 1][1]:
+                return i
+        return l - 1
+
+    all_text = "".join(p[2] for p in page_map)
+    length = len(all_text)
+    start = 0
+    end = length
+    while start + SECTION_OVERLAP < length:
+        last_word = -1
+        end = start + MAX_SECTION_LENGTH
+
+        if end > length:
+            end = length
+        else:
+            # Try to find the end of the sentence
+            while end < length and (end - start - MAX_SECTION_LENGTH) < SENTENCE_SEARCH_LIMIT and all_text[end] not in SENTENCE_ENDINGS:
+                if all_text[end] in WORDS_BREAKS:
+                    last_word = end
+                end += 1
+            if end < length and all_text[end] not in SENTENCE_ENDINGS and last_word > 0:
+                end = last_word # Fall back to at least keeping a whole word
+        if end < length:
+            end += 1
+
+        # Try to find the start of the sentence or at least a whole word boundary
+        last_word = -1
+        while start > 0 and start > end - MAX_SECTION_LENGTH - 2 * SENTENCE_SEARCH_LIMIT and all_text[start] not in SENTENCE_ENDINGS:
+            if all_text[start] in WORDS_BREAKS:
+                last_word = start
+            start -= 1
+        if all_text[start] not in SENTENCE_ENDINGS and last_word > 0:
+            start = last_word
+        if start > 0:
+            start += 1
+
+        section_text = all_text[start:end]
+        yield (section_text, find_page(start))
+
+        last_table_start = section_text.rfind("<table")
+        if (last_table_start > 2 * SENTENCE_SEARCH_LIMIT and last_table_start > section_text.rfind("</table")):
+            # If the section ends with an unclosed table, we need to start the next section with the table.
+            # If table starts inside SENTENCE_SEARCH_LIMIT, we ignore it, as that will cause an infinite loop for tables longer than MAX_SECTION_LENGTH
+            # If last table starts inside SECTION_OVERLAP, keep overlapping
+            if args.verbose: print(f"Section ends with unclosed table, starting next section with the table at page {find_page(start)} offset {start} table start {last_table_start}")
+            start = min(end - SECTION_OVERLAP, start + last_table_start)
+        else:
+            start = end - SECTION_OVERLAP
+        
+    if start + SECTION_OVERLAP < end:
+        yield (all_text[start:end], find_page(start))
+
+def filename_to_id(filename):
+    filename_ascii = re.sub("[^0-9a-zA-Z_-]", "_", filename)
+    filename_hash = base64.b16encode(filename.encode('utf-8')).decode('ascii')
+    return f"file-{filename_ascii}-{filename_hash}"
+
+def create_sections(filename, page_map, use_vectors):
+    file_id = filename_to_id(filename)
+    for i, (content, pagenum) in enumerate(split_text(page_map)):
+        section = {
+            "id": f"{file_id}-page-{i}",
+            "content": content,
+            "category": args.category,
+            "sourcepage": blob_name_from_file_page(filename, pagenum),
+            "sourcefile": filename
+        }
+        if use_vectors:
+            section["embedding"] = compute_embedding(content)
+        yield section
+
+def before_retry_sleep(retry_state):
+    if args.verbose: print(f"Rate limited on the OpenAI embeddings API, sleeping before retrying...")
+
+@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(15), before_sleep=before_retry_sleep)
+def compute_embedding(text):
+    return openai.Embedding.create(engine=args.openaideployment, input=text)["data"][0]["embedding"]
+
+def create_search_index():
+    if args.verbose: print(f"Ensuring search index {args.index} exists")
+    index_client = SearchIndexClient(endpoint=f"https://{args.searchservice}.search.windows.net/",
+                                     credential=search_creds)
+    if args.index not in index_client.list_index_names():
+        index = SearchIndex(
+            name=args.index,
+            fields=[
+                SimpleField(name="id", type="Edm.String", key=True),
+                SearchableField(name="content", type="Edm.String", analyzer_name="en.microsoft"),
+                SearchField(name="embedding", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), 
+                            hidden=False, searchable=True, filterable=False, sortable=False, facetable=False,
+                            vector_search_dimensions=1536, vector_search_configuration="default"),
+                SimpleField(name="category", type="Edm.String", filterable=True, facetable=True),
+                SimpleField(name="sourcepage", type="Edm.String", filterable=True, facetable=True),
+                SimpleField(name="sourcefile", type="Edm.String", filterable=True, facetable=True)
+            ],
+            semantic_settings=SemanticSettings(
+                configurations=[SemanticConfiguration(
+                    name='default',
+                    prioritized_fields=PrioritizedFields(
+                        title_field=None, prioritized_content_fields=[SemanticField(field_name='content')]))]),
+                vector_search=VectorSearch(
+                    algorithm_configurations=[
+                        HnswVectorSearchAlgorithmConfiguration(
+                            name="default",
+                            kind="hnsw",
+                            parameters={
+                                "m": 4,
+                                "efConstruction": 400,
+                                "efSearch": 500,
+                                "metric": "cosine"
+                            }
+                        )
+                    ]
+                )        
+            )
+        if args.verbose: print(f"Creating {args.index} search index")
+        index_client.create_index(index)
+    else:
+        if args.verbose: print(f"Search index {args.index} already exists")
+
+def index_sections(filename, sections):
+    if args.verbose: print(f"Indexing sections from '{filename}' into search index '{args.index}'")
+    search_client = SearchClient(endpoint=f"https://{args.searchservice}.search.windows.net/",
+                                    index_name=args.index,
+                                    credential=search_creds)
+    i = 0
+    batch = []
+    for s in sections:
+        batch.append(s)
+        i += 1
+        if i % 1000 == 0:
+            results = search_client.upload_documents(documents=batch)
+            succeeded = sum([1 for r in results if r.succeeded])
+            if args.verbose: print(f"\tIndexed {len(results)} sections, {succeeded} succeeded")
+            batch = []
+
+    if len(batch) > 0:
+        results = search_client.upload_documents(documents=batch)
+        succeeded = sum([1 for r in results if r.succeeded])
+        if args.verbose: print(f"\tIndexed {len(results)} sections, {succeeded} succeeded")
+
+def remove_from_index(filename):
+    if args.verbose: print(f"Removing sections from '{filename or '<all>'}' from search index '{args.index}'")
+    search_client = SearchClient(endpoint=f"https://{args.searchservice}.search.windows.net/",
+                                    index_name=args.index,
+                                    credential=search_creds)
+    while True:
+        filter = None if filename == None else f"sourcefile eq '{os.path.basename(filename)}'"
+        r = search_client.search("", filter=filter, top=1000, include_total_count=True)
+        if r.get_count() == 0:
+            break
+        r = search_client.delete_documents(documents=[{ "id": d["id"] } for d in r])
+        if args.verbose: print(f"\tRemoved {len(r)} sections from index")
+        # It can take a few seconds for search results to reflect changes, so wait a bit
+        time.sleep(2)
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(
+        description="Prepare documents by extracting content from PDFs, splitting content into sections, uploading to blob storage, and indexing in a search index.",
+        epilog="Example: prepdocs.py '..\data\*' --storageaccount myaccount --container mycontainer --searchservice mysearch --index myindex -v"
+        )
+    parser.add_argument("files", help="Files to be processed")
+    parser.add_argument("--category", help="Value for the category field in the search index for all sections indexed in this run")
+    parser.add_argument("--skipblobs", action="store_true", help="Skip uploading individual pages to Azure Blob Storage")
+    parser.add_argument("--storageaccount", help="Azure Blob Storage account name")
+    parser.add_argument("--container", help="Azure Blob Storage container name")
+    parser.add_argument("--storagekey", required=False, help="Optional. Use this Azure Blob Storage account key instead of the current user identity to login (use az login to set current user for Azure)")
+    parser.add_argument("--tenantid", required=False, help="Optional. Use this to define the Azure directory where to authenticate)")
+    parser.add_argument("--searchservice", help="Name of the Azure Cognitive Search service where content should be indexed (must exist already)")
+    parser.add_argument("--index", help="Name of the Azure Cognitive Search index where content should be indexed (will be created if it doesn't exist)")
+    parser.add_argument("--searchkey", required=False, help="Optional. Use this Azure Cognitive Search account key instead of the current user identity to login (use az login to set current user for Azure)")
+    parser.add_argument("--openaiservice", help="Name of the Azure OpenAI service used to compute embeddings")
+    parser.add_argument("--openaideployment", help="Name of the Azure OpenAI model deployment for an embedding model ('text-embedding-ada-002' recommended)")
+    parser.add_argument("--novectors", action="store_true", help="Don't compute embeddings for the sections (e.g. don't call the OpenAI embeddings API during indexing)")
+    parser.add_argument("--openaikey", required=False, help="Optional. Use this Azure OpenAI account key instead of the current user identity to login (use az login to set current user for Azure)")
+    parser.add_argument("--remove", action="store_true", help="Remove references to this document from blob storage and the search index")
+    parser.add_argument("--removeall", action="store_true", help="Remove all blobs from blob storage and documents from the search index")
+    parser.add_argument("--localpdfparser", action="store_true", help="Use PyPdf local PDF parser (supports only digital PDFs) instead of Azure Form Recognizer service to extract text, tables and layout from the documents")
+    parser.add_argument("--formrecognizerservice", required=False, help="Optional. Name of the Azure Form Recognizer service which will be used to extract text, tables and layout from the documents (must exist already)")
+    parser.add_argument("--formrecognizerkey", required=False, help="Optional. Use this Azure Form Recognizer account key instead of the current user identity to login (use az login to set current user for Azure)")
+    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
+    args = parser.parse_args()
+    print("tenant id is ", args.tenantid)
+    # Use the current user identity to connect to Azure services unless a key is explicitly set for any of them
+    azd_credential = AzureDeveloperCliCredential() if args.tenantid == None else AzureDeveloperCliCredential(tenant_id=args.tenantid, process_timeout=60)
+    default_creds = azd_credential if args.searchkey == None or args.storagekey == None else None
+    search_creds = default_creds if args.searchkey == None else AzureKeyCredential(args.searchkey)
+    use_vectors = not args.novectors
+
+    if not args.skipblobs:
+        storage_creds = default_creds if args.storagekey == None else args.storagekey
+    if not args.localpdfparser:
+        # check if Azure Form Recognizer credentials are provided
+        if args.formrecognizerservice == None:
+            print("Error: Azure Form Recognizer service is not provided. Please provide formrecognizerservice or use --localpdfparser for local pypdf parser.")
+            exit(1)
+        formrecognizer_creds = default_creds if args.formrecognizerkey == None else AzureKeyCredential(args.formrecognizerkey)
+
+    if use_vectors:
+        if args.openaikey == None:
+            openai.api_key = azd_credential.get_token("https://cognitiveservices.azure.com/.default").token
+            openai.api_type = "azure_ad"
+        else:
+            openai.api_type = "azure"
+            openai.api_key = args.openaikey
+
+        openai.api_base = f"https://{args.openaiservice}.openai.azure.com"
+        openai.api_version = "2022-12-01"
+
+    if args.removeall:
+        remove_blobs(None)
+        remove_from_index(None)
+    else:
+        if not args.remove:
+            create_search_index()
+        
+        print(f"Processing files...")
+        for filename in glob.glob(args.files):
+            if args.verbose: print(f"Processing '{filename}'")
+            if args.remove:
+                remove_blobs(filename)
+                remove_from_index(filename)
+            elif args.removeall:
+                remove_blobs(None)
+                remove_from_index(None)
+            else:
+                if not args.skipblobs:
+                    upload_blobs(filename)
+                page_map = get_document_text(filename)
+                sections = create_sections(os.path.basename(filename), page_map, use_vectors)
+                index_sections(os.path.basename(filename), sections)
\ No newline at end of file

From 0fc67164102ae836dcdea24ae94685c3e886936c Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Tue, 12 Sep 2023 09:37:20 -0700
Subject: [PATCH 02/18] add requirement

---
 .../incubations/copilot/data/dataprep/requirements.txt     | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 scenarios/incubations/copilot/data/dataprep/requirements.txt

diff --git a/scenarios/incubations/copilot/data/dataprep/requirements.txt b/scenarios/incubations/copilot/data/dataprep/requirements.txt
new file mode 100644
index 000000000..56b74b983
--- /dev/null
+++ b/scenarios/incubations/copilot/data/dataprep/requirements.txt
@@ -0,0 +1,7 @@
+pypdf==3.9.0
+azure-identity==1.13.0
+azure-search-documents==11.4.0b8
+azure-ai-formrecognizer==3.2.1
+azure-storage-blob==12.14.1
+openai[datalib]==0.27.8
+tenacity==8.2.2
\ No newline at end of file

From d809a2c1d886a4030ce2323625cf50fabed1db6d Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Tue, 12 Sep 2023 12:02:23 -0700
Subject: [PATCH 03/18] command run

---
 .../copilot/data/dataprep/command_run.txt           | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 scenarios/incubations/copilot/data/dataprep/command_run.txt

diff --git a/scenarios/incubations/copilot/data/dataprep/command_run.txt b/scenarios/incubations/copilot/data/dataprep/command_run.txt
new file mode 100644
index 000000000..49db2435b
--- /dev/null
+++ b/scenarios/incubations/copilot/data/dataprep/command_run.txt
@@ -0,0 +1,13 @@
+SET AZURE_STORAGE_ACCOUNT=""
+SET AZURE_STORAGE_CONTAINER="content"
+SET AZURE_SEARCH_SERVICE="cogsearch001"
+SET AZURE_OPENAI_SERVICE="openai002"
+SET AZURE_OPENAI_EMB_DEPLOYMENT ="text-embedding-ada-002"
+SET AZURE_FORMRECOGNIZER_RESOURCE_GROUP="rg-azure-search-demo-dev"
+SET AZURE_FORMRECOGNIZER_SERVICE=""
+SET AZURE_TENANT_ID=""
+SET AZURE_SEARCH_ADMIN_KEY=""
+SET AZURE_SEARCH_INDEX_NAME=""
+
+python prepdocs.py "./data/*" --storageaccount "%AZURE_STORAGE_ACCOUNT%" --container "%AZURE_STORAGE_CONTAINER%" --searchservice "%AZURE_SEARCH_SERVICE%" --openaiservice "%AZURE_OPENAI_SERVICE%" --openaideployment "%AZURE_OPENAI_EMB_DEPLOYMENT%" --searchkey "%AZURE_SEARCH_ADMIN_KEY%" --index "%AZURE_SEARCH_INDEX_NAME%" --formrecognizerservice "%AZURE_FORMRECOGNIZER_SERVICE%" --tenantid "%AZURE_TENANT_ID%" -v
+

From a06c598940c56228d80c761f79c681c44552742b Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Wed, 13 Sep 2023 10:33:14 -0700
Subject: [PATCH 04/18] add tech expert

---
 .../copilot/tech_expert/tech_copilot.py       | 83 +++++++++++++++++++
 .../copilot/tech_expert/tech_copilot_utils.py | 63 ++++++++++++++
 2 files changed, 146 insertions(+)
 create mode 100644 scenarios/incubations/copilot/tech_expert/tech_copilot.py
 create mode 100644 scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py

diff --git a/scenarios/incubations/copilot/tech_expert/tech_copilot.py b/scenarios/incubations/copilot/tech_expert/tech_copilot.py
new file mode 100644
index 000000000..0eb571dbb
--- /dev/null
+++ b/scenarios/incubations/copilot/tech_expert/tech_copilot.py
@@ -0,0 +1,83 @@
+import streamlit as st
+from streamlit_extras.add_vertical_space import add_vertical_space
+from tech_copilot_utils import PERSONA, AVAILABLE_FUNCTIONS, FUNCTIONS_SPEC
+import sys
+sys.path.append("..")
+from utils import Smart_Agent,add_to_cache
+import time
+import random
+import os
+from pathlib import Path  
+import json
+print("AVAILABLE_FUNCTIONS", AVAILABLE_FUNCTIONS)
+agent = Smart_Agent(persona=PERSONA,functions_list=AVAILABLE_FUNCTIONS, functions_spec=FUNCTIONS_SPEC, init_message="Hi there, this is Maya, Computer science specialist helping with answering technical questions about Computer Science and System, what can I do for you?")
+
+st.set_page_config(layout="wide",page_title="Enterprise Copilot- A demo of Copilot application using GPT")
+styl = f"""
+<style>
+    .stTextInput {{
+      position: fixed;
+      bottom: 3rem;
+    }}
+</style>
+"""
+st.markdown(styl, unsafe_allow_html=True)
+
+
+MAX_HIST= 5
+# Sidebar contents
+with st.sidebar:
+    st.title('Tech Copilot')
+    st.markdown('''
+    This is a demo of Copilot Concept for Computer Science.
+
+    ''')
+    add_vertical_space(5)
+    st.write('Created by James N')
+    if st.button('Clear Chat'):
+
+        if 'history' in st.session_state:
+            st.session_state['history'] = []
+
+    if 'history' not in st.session_state:
+        st.session_state['history'] = []
+    if 'input' not in st.session_state:
+        st.session_state['input'] = ""
+
+
+user_input= st.chat_input("You:")
+
+## Conditional display of AI generated responses as a function of user provided prompts
+history = st.session_state['history']
+      
+if len(history) > 0:
+    for message in history:
+        if message.get("role") != "system" and message.get("name") is  None:
+            with st.chat_message(message["role"]):
+                    st.markdown(message["content"])
+else:
+    history, agent_response = agent.run(user_input=None)
+    with st.chat_message("assistant"):
+        st.markdown(agent_response)
+    user_history=[]
+if user_input:
+    with st.chat_message("user"):
+        st.markdown(user_input)
+    stream_out, query_used, history, agent_response = agent.run(user_input=user_input, conversation=history, stream=True)
+    with st.chat_message("assistant"):
+        if stream_out:
+            message_placeholder = st.empty()
+            full_response = ""
+            for response in agent_response:
+                if len(response.choices)>0:
+                    full_response += response.choices[0].delta.get("content", "")
+                    message_placeholder.markdown(full_response + "▌")
+            message_placeholder.markdown(full_response)
+            if query_used: #add to cache
+                add_to_cache(query_used, full_response)
+                print(f"query {query_used} added to cache")
+            history.append({"role": "assistant", "content": full_response})
+        else:
+            st.markdown(agent_response)
+
+st.session_state['history'] = history
\ No newline at end of file
diff --git a/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py b/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py
new file mode 100644
index 000000000..bf895f916
--- /dev/null
+++ b/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py
@@ -0,0 +1,63 @@
+# Agent class
+### responsbility definition: expertise, scope, conversation script, style 
+import openai
+import os
+from pathlib import Path  
+import json
+import time
+from azure.search.documents.models import Vector  
+import uuid
+from tenacity import retry, wait_random_exponential, stop_after_attempt  
+
+from dotenv import load_dotenv
+from azure.core.credentials import AzureKeyCredential  
+from azure.search.documents import SearchClient  
+from openai.embeddings_utils import get_embedding, cosine_similarity
+import inspect
+env_path = Path('..') / 'secrets.env'
+load_dotenv(dotenv_path=env_path)
+openai.api_key =  os.environ.get("AZURE_OPENAI_API_KEY")
+openai.api_base =  os.environ.get("AZURE_OPENAI_ENDPOINT")
+openai.api_type = "azure"
+import sys
+import random
+sys.path.append("..")
+from utils import Agent, Smart_Agent, check_args, search_knowledgebase
+
+
+    
+
+
+
+PERSONA = """
+You are Maya, an technical support specialist responsible for answering questions about computer science and system.
+When you are asked with a question, use the search tool to find relavent knowlege articles to create the answer.
+Answer ONLY with the facts from the search tool. If there isn't enough information, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
+Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brakets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].
+If the user is asking for information that is not related to computer science or computer system, say it's not your area of expertise.
+"""
+
+AVAILABLE_FUNCTIONS = {
+            "search_knowledgebase": search_knowledgebase,
+
+        } 
+
+FUNCTIONS_SPEC= [  
+    {
+        "name": "search_knowledgebase",
+        "description": "Searches the knowledge base for an answer to the technical question",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "search_query": {
+                    "type": "string",
+                    "description": "The search query to use to search the knowledge base"
+                }
+            },
+            "required": ["search_query"],
+        },
+    },
+
+]  
+
+

From c1a0e932661757156b93358900403601ce6ae111 Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Wed, 13 Sep 2023 11:44:35 -0700
Subject: [PATCH 05/18] update citation

---
 scenarios/incubations/copilot/utils.py | 225 +++++++++++++------------
 1 file changed, 116 insertions(+), 109 deletions(-)

diff --git a/scenarios/incubations/copilot/utils.py b/scenarios/incubations/copilot/utils.py
index 50aa59224..e5b4925f2 100644
--- a/scenarios/incubations/copilot/utils.py
+++ b/scenarios/incubations/copilot/utils.py
@@ -19,6 +19,8 @@
 openai.api_key =  os.environ.get("AZURE_OPENAI_API_KEY")
 openai.api_base =  os.environ.get("AZURE_OPENAI_ENDPOINT")
 openai.api_type = "azure"
+emb_engine = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT")
+emb_engine = emb_engine.strip('"')
 class Search_Client():
     def __init__(self,emb_map_file_path):
         with open(emb_map_file_path) as file:
@@ -29,7 +31,7 @@ def find_article(self,question, topk=3):
         Given an input vector and a dictionary of label vectors,  
         returns the label with the highest cosine similarity to the input vector.  
         """  
-        input_vector = get_embedding(question, engine = 'text-embedding-ada-002')        
+        input_vector = get_embedding(question, engine = emb_engine)        
         # Compute cosine similarity between input vector and each label vector
         cosine_list=[]  
         for chunk_id,chunk_content, vector in self.chunks_emb:  
@@ -52,31 +54,35 @@ def find_article(self,question, topk=3):
 if os.getenv("USE_AZCS") == "True":
     service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT") 
     index_name = os.getenv("AZURE_SEARCH_INDEX_NAME") 
+    index_name = index_name.strip('"')
     key = os.getenv("AZURE_SEARCH_ADMIN_KEY") 
-    @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
+    key = key.strip('"')
+    # @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
     # Function to generate embeddings for title and content fields, also used for query embeddings
     def generate_embeddings(text):
+        print("emb_engine", emb_engine)
+        openai.api_version = "2023-05-15"
         response = openai.Embedding.create(
-            input=text, engine="text-embedding-ada-002")
+            input=text, engine=emb_engine)
         embeddings = response['data'][0]['embedding']
         return embeddings
-
     credential = AzureKeyCredential(key)
-    azcs_search_client = SearchClient(service_endpoint, index_name, credential=credential)
+    azcs_search_client = SearchClient(service_endpoint, index_name =index_name , credential=credential)
 else:
     faiss_search_client = Search_Client("../data/chunk_emb_map.json")
 
 def search_knowledgebase_acs(search_query):
-    vector = Vector(value=generate_embeddings(search_query), k=3, fields="contentVector")
+    vector = Vector(value=generate_embeddings(search_query), k=3, fields="embedding")
   
     results = azcs_search_client.search(  
         search_text=None,  
         vectors= [vector],
-        select=["id", "content"],
+        select=["sourcepage","content"],
     )  
     text_content =""
     for result in results:  
-        text_content += f"{result['id']}\n{result['content']}\n"
+        text_content += f"{result['sourcepage']}\n{result['content']}\n"
+    print("text_content", text_content)
     return text_content
 
 def search_knowledgebase_faiss(search_query):
@@ -95,6 +101,7 @@ def search_knowledgebase(search_query):
 ###Sematic caching implementation
 if os.getenv("USE_SEMANTIC_CACHE") == "True":
     cache_index_name = os.getenv("CACHE_INDEX_NAME")
+    cache_index_name= cache_index_name.strip('"')
     azcs_semantic_cache_search_client = SearchClient(service_endpoint, cache_index_name, credential=credential)
 
 def add_to_cache(search_query, gpt_response):
@@ -205,7 +212,7 @@ class Smart_Agent(Agent):
         engine (str): The name of the GPT engine to use.
     """
 
-    def __init__(self, persona,functions_spec, functions_list, name=None, init_message=None, engine ="gpt-4"):
+    def __init__(self, persona,functions_spec, functions_list, name=None, init_message=None, engine =os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT")):
         super().__init__(engine=engine,persona=persona, init_message=init_message, name=name)
         self.functions_spec = functions_spec
         self.functions_list= functions_list
@@ -220,106 +227,106 @@ def run(self, user_input, conversation=None, stream = False, api_version = "2023
         i=0
         query_used = None
 
-        while True:
-            try:
-                i+=1
-                response = openai.ChatCompletion.create(
-                    deployment_id=self.engine, # The deployment name you chose when you deployed the GPT-35-turbo or GPT-4 model.
-                    messages=conversation,
-                functions=self.functions_spec,
-                function_call="auto", 
-                )
-                response_message = response["choices"][0]["message"]
-
-
-                    # Step 2: check if GPT wanted to call a function
-                if  response_message.get("function_call"):
-                    print("Recommended Function call:")
-                    print(response_message.get("function_call"))
-                    print()
-                    
-                    # Step 3: call the function
-                    # Note: the JSON response may not always be valid; be sure to handle errors
-                    
-                    function_name = response_message["function_call"]["name"]
-                    
-                    # verify function exists
-                    if function_name not in self.functions_list:
-                        print("function list:", self.functions_list)
-                        raise Exception("Function " + function_name + " does not exist")
-                    function_to_call = self.functions_list[function_name]  
-                    
-                    # verify function has correct number of arguments
-                    function_args = json.loads(response_message["function_call"]["arguments"])
-
-                    if check_args(function_to_call, function_args) is False:
-                        raise Exception("Invalid number of arguments for function: " + function_name)
-                    
-
-                    # check if there's an opprotunity to use semantic cache
-                    if function_name =="search_knowledgebase":
-                        if os.getenv("USE_SEMANTIC_CACHE") == "True":
-                            search_query = function_args["search_query"]
-                            cache_output = get_cache(search_query)
-                            if cache_output is not None:
-                                print("semantic cache hit")
-                                conversation.append({"role": "assistant", "content": cache_output})
-                                return False, query_used,conversation, cache_output
-                            else:
-                                print("semantic cache missed")
-                                query_used = search_query
-
-
-                    function_response = function_to_call(**function_args)
-                    print("Output of function call:")
-                    print(function_response)
-                    print()
-
-                    
-                    # Step 4: send the info on the function call and function response to GPT
-                    
-                    # adding assistant response to messages
-                    conversation.append(
-                        {
-                            "role": response_message["role"],
-                            "name": response_message["function_call"]["name"],
-                            "content": response_message["function_call"]["arguments"],
-                        }
-                    )
-
-                    # adding function response to messages
-                    conversation.append(
-                        {
-                            "role": "function",
-                            "name": function_name,
-                            "content": function_response,
-                        }
-                    )  # extend conversation with function response
-                    openai.api_version = api_version
-                
-                    second_response = openai.ChatCompletion.create(
-                        messages=conversation,
-                        deployment_id=self.engine,
-                        stream=stream,
-                    )  # get a new response from GPT where it can see the function response
-
-                    if not stream:
-                        assistant_response = second_response["choices"][0]["message"]["content"]
-                        conversation.append({"role": "assistant", "content": assistant_response})
-
+        # while True:
+        #     try:
+        #         i+=1
+        response = openai.ChatCompletion.create(
+            deployment_id=self.engine, # The deployment name you chose when you deployed the GPT-35-turbo or GPT-4 model.
+            messages=conversation,
+        functions=self.functions_spec,
+        function_call="auto", 
+        )
+        response_message = response["choices"][0]["message"]
+
+
+            # Step 2: check if GPT wanted to call a function
+        if  response_message.get("function_call"):
+            print("Recommended Function call:")
+            print(response_message.get("function_call"))
+            print()
+            
+            # Step 3: call the function
+            # Note: the JSON response may not always be valid; be sure to handle errors
+            
+            function_name = response_message["function_call"]["name"]
+            
+            # verify function exists
+            if function_name not in self.functions_list:
+                print("function list:", self.functions_list)
+                raise Exception("Function " + function_name + " does not exist")
+            function_to_call = self.functions_list[function_name]  
+            
+            # verify function has correct number of arguments
+            function_args = json.loads(response_message["function_call"]["arguments"])
+
+            if check_args(function_to_call, function_args) is False:
+                raise Exception("Invalid number of arguments for function: " + function_name)
+            
+
+            # check if there's an opprotunity to use semantic cache
+            if function_name =="search_knowledgebase":
+                if os.getenv("USE_SEMANTIC_CACHE") == "True":
+                    search_query = function_args["search_query"]
+                    cache_output = get_cache(search_query)
+                    if cache_output is not None:
+                        print("semantic cache hit")
+                        conversation.append({"role": "assistant", "content": cache_output})
+                        return False, query_used,conversation, cache_output
                     else:
-                        assistant_response = second_response
-
-                    return stream,query_used, conversation, assistant_response
-                else:
-                    assistant_response = response_message["content"]
-                    conversation.append({"role": "assistant", "content": assistant_response})
-                break
-            except Exception as e:
-                if i>3: 
-                    assistant_response="Haizz, my memory is having some trouble, can you repeat what you just said?"
-                    break
-                print("Exception as below, will retry\n", str(e))
-                time.sleep(5)
+                        print("semantic cache missed")
+                        query_used = search_query
+
+
+            function_response = function_to_call(**function_args)
+            print("Output of function call:")
+            print(function_response)
+            print()
+
+            
+            # Step 4: send the info on the function call and function response to GPT
+            
+            # adding assistant response to messages
+            conversation.append(
+                {
+                    "role": response_message["role"],
+                    "name": response_message["function_call"]["name"],
+                    "content": response_message["function_call"]["arguments"],
+                }
+            )
+
+            # adding function response to messages
+            conversation.append(
+                {
+                    "role": "function",
+                    "name": function_name,
+                    "content": function_response,
+                }
+            )  # extend conversation with function response
+            openai.api_version = api_version
+        
+            second_response = openai.ChatCompletion.create(
+                messages=conversation,
+                deployment_id=self.engine,
+                stream=stream,
+            )  # get a new response from GPT where it can see the function response
+
+            if not stream:
+                assistant_response = second_response["choices"][0]["message"]["content"]
+                conversation.append({"role": "assistant", "content": assistant_response})
+
+            else:
+                assistant_response = second_response
+
+            return stream,query_used, conversation, assistant_response
+        else:
+            assistant_response = response_message["content"]
+            conversation.append({"role": "assistant", "content": assistant_response})
+            #     break
+            # except Exception as e:
+            #     if i>3: 
+            #         assistant_response="Haizz, my memory is having some trouble, can you repeat what you just said?"
+            #         break
+            #     print("Exception as below, will retry\n", str(e))
+            #     time.sleep(5)
 
         return False,query_used, conversation, assistant_response
\ No newline at end of file

From 9644dfa3bfa422d431ef0f31b6f8bf804ba6d185 Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Thu, 14 Sep 2023 11:18:58 -0700
Subject: [PATCH 06/18] update utils

---
 scenarios/incubations/copilot/utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scenarios/incubations/copilot/utils.py b/scenarios/incubations/copilot/utils.py
index e5b4925f2..bfbcc6e37 100644
--- a/scenarios/incubations/copilot/utils.py
+++ b/scenarios/incubations/copilot/utils.py
@@ -73,11 +73,12 @@ def generate_embeddings(text):
 
 def search_knowledgebase_acs(search_query):
     vector = Vector(value=generate_embeddings(search_query), k=3, fields="embedding")
-  
+    print("search query: ", search_query)
     results = azcs_search_client.search(  
-        search_text=None,  
+        search_text=search_query,  
         vectors= [vector],
         select=["sourcepage","content"],
+        top=5
     )  
     text_content =""
     for result in results:  

From 84e7a5132c7d4cce38646e4822fbf3e47db3ef3b Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Wed, 11 Oct 2023 12:57:33 -0700
Subject: [PATCH 07/18] update tech expert

---
 .../copilot/tech_expert/tech_copilot.py       |  4 +-
 .../copilot/tech_expert/tech_copilot_utils.py | 44 +++++++++++++++----
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/scenarios/incubations/copilot/tech_expert/tech_copilot.py b/scenarios/incubations/copilot/tech_expert/tech_copilot.py
index 0eb571dbb..e68a78059 100644
--- a/scenarios/incubations/copilot/tech_expert/tech_copilot.py
+++ b/scenarios/incubations/copilot/tech_expert/tech_copilot.py
@@ -10,7 +10,7 @@
 from pathlib import Path  
 import json
 print("AVAILABLE_FUNCTIONS", AVAILABLE_FUNCTIONS)
-agent = Smart_Agent(persona=PERSONA,functions_list=AVAILABLE_FUNCTIONS, functions_spec=FUNCTIONS_SPEC, init_message="Hi there, this is Maya, Computer science specialist helping with answering technical questions about Computer Science and System, what can I do for you?")
+agent = Smart_Agent(persona=PERSONA,functions_list=AVAILABLE_FUNCTIONS, functions_spec=FUNCTIONS_SPEC, init_message="Hi there, this is Maya, Computer science specialist helping with answering technical questions about enterprise networking  and System, what can I do for you?")
 
 st.set_page_config(layout="wide",page_title="Enterprise Copilot- A demo of Copilot application using GPT")
 styl = f"""
@@ -29,7 +29,7 @@
 with st.sidebar:
     st.title('Tech Copilot')
     st.markdown('''
-    This is a demo of Copilot Concept for Computer Science.
+    This is a demo of Copilot Concept for Enterprise Networking Technical Support.
 
     ''')
     add_vertical_space(5)
diff --git a/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py b/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py
index bf895f916..f8b28168d 100644
--- a/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py
+++ b/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py
@@ -14,27 +14,55 @@
 from azure.search.documents import SearchClient  
 from openai.embeddings_utils import get_embedding, cosine_similarity
 import inspect
-env_path = Path('..') / 'secrets.env'
-load_dotenv(dotenv_path=env_path)
-openai.api_key =  os.environ.get("AZURE_OPENAI_API_KEY")
-openai.api_base =  os.environ.get("AZURE_OPENAI_ENDPOINT")
-openai.api_type = "azure"
+# env_path = Path('.') / 'secrets.env'
+# load_dotenv(dotenv_path=env_path)
+# openai.api_key =  os.environ.get("AZURE_OPENAI_API_KEY")
+# openai.api_base =  os.environ.get("AZURE_OPENAI_ENDPOINT")
+# openai.api_type = "azure"
 import sys
 import random
 sys.path.append("..")
 from utils import Agent, Smart_Agent, check_args, search_knowledgebase
+# service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT") 
+# index_name = os.getenv("AZURE_SEARCH_INDEX_NAME") 
+# index_name = index_name.strip('"')
+# key = os.getenv("AZURE_SEARCH_ADMIN_KEY") 
+# key = key.strip('"')
+# emb_engine = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT")
+# credential = AzureKeyCredential(key)
+# azcs_search_client = SearchClient(service_endpoint, index_name =index_name , credential=credential)
 
 
-    
+# def generate_embeddings(text):
+#     openai.api_version = "2023-05-15"
+#     response = openai.Embedding.create(
+#         input=text, engine=emb_engine)
+#     embeddings = response['data'][0]['embedding']
+#     return embeddings
+# def search_knowledgebase_acs(search_query):
+#     vector = Vector(value=generate_embeddings(search_query), k=3, fields="embedding")
+#     print("search query: ", search_query)
+#     results = azcs_search_client.search(  
+#         search_text=search_query,  
+#         vectors= [vector],
+#         select=["sourcepage","content"],
+#         top=5
+#     )  
+#     text_content =""
+#     for result in results:  
+#         text_content += f"{result['sourcepage']}\n{result['content']}\n"
+#     print("text_content", text_content)
+#     return text_content
+
 
 
 
 PERSONA = """
-You are Maya, an technical support specialist responsible for answering questions about computer science and system.
+You are Maya, an technical support specialist responsible for answering questions about computer networking and system.
 When you are asked with a question, use the search tool to find relavent knowlege articles to create the answer.
 Answer ONLY with the facts from the search tool. If there isn't enough information, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
 Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brakets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].
-If the user is asking for information that is not related to computer science or computer system, say it's not your area of expertise.
+If the user is asking for information that is not related to computer networking, say it's not your area of expertise.
 """
 
 AVAILABLE_FUNCTIONS = {

From 2fe6126746b666179d730ea58fc94f90870be15b Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Wed, 11 Oct 2023 13:00:35 -0700
Subject: [PATCH 08/18] update real time

---
 .../copilot/data/dataprep/command_run.txt          | 14 +++++++-------
 .../incubations/copilot/data/dataprep/prepdocs.py  |  2 ++
 scenarios/incubations/copilot/requirements.txt     |  4 +++-
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/scenarios/incubations/copilot/data/dataprep/command_run.txt b/scenarios/incubations/copilot/data/dataprep/command_run.txt
index 49db2435b..f11340726 100644
--- a/scenarios/incubations/copilot/data/dataprep/command_run.txt
+++ b/scenarios/incubations/copilot/data/dataprep/command_run.txt
@@ -1,13 +1,13 @@
-SET AZURE_STORAGE_ACCOUNT=""
+SET AZURE_STORAGE_ACCOUNT="sttyf3apvjit5gg"
 SET AZURE_STORAGE_CONTAINER="content"
 SET AZURE_SEARCH_SERVICE="cogsearch001"
 SET AZURE_OPENAI_SERVICE="openai002"
-SET AZURE_OPENAI_EMB_DEPLOYMENT ="text-embedding-ada-002"
+SET AZURE_OPENAI_EMB_DEPLOYMENT="text-embedding-ada-002"
 SET AZURE_FORMRECOGNIZER_RESOURCE_GROUP="rg-azure-search-demo-dev"
-SET AZURE_FORMRECOGNIZER_SERVICE=""
-SET AZURE_TENANT_ID=""
-SET AZURE_SEARCH_ADMIN_KEY=""
-SET AZURE_SEARCH_INDEX_NAME=""
+SET AZURE_FORMRECOGNIZER_SERVICE="cog-fr-tyf3apvjit5gg"
+SET AZURE_TENANT_ID="0fbe7234-45ea-498b-b7e4-1a8b2d3be4d9"
+SET AZURE_SEARCH_ADMIN_KEY="2HwNw2JAmUjp8lYFfiiIMeIFTPye8rHHq9ZYu3hc9YAzSeA9Vl5L"
+SET AZURE_SEARCH_INDEX_NAME="enetwork"
 
-python prepdocs.py "./data/*" --storageaccount "%AZURE_STORAGE_ACCOUNT%" --container "%AZURE_STORAGE_CONTAINER%" --searchservice "%AZURE_SEARCH_SERVICE%" --openaiservice "%AZURE_OPENAI_SERVICE%" --openaideployment "%AZURE_OPENAI_EMB_DEPLOYMENT%" --searchkey "%AZURE_SEARCH_ADMIN_KEY%" --index "%AZURE_SEARCH_INDEX_NAME%" --formrecognizerservice "%AZURE_FORMRECOGNIZER_SERVICE%" --tenantid "%AZURE_TENANT_ID%" -v
+python prepdocs.py "../EVA_documents/*/*.pdf" --storageaccount "%AZURE_STORAGE_ACCOUNT%" --container "%AZURE_STORAGE_CONTAINER%" --searchservice "%AZURE_SEARCH_SERVICE%" --openaiservice "%AZURE_OPENAI_SERVICE%" --openaideployment "%AZURE_OPENAI_EMB_DEPLOYMENT%" --searchkey "%AZURE_SEARCH_ADMIN_KEY%" --index "%AZURE_SEARCH_INDEX_NAME%" --formrecognizerservice "%AZURE_FORMRECOGNIZER_SERVICE%" --tenantid "%AZURE_TENANT_ID%" -v
 
diff --git a/scenarios/incubations/copilot/data/dataprep/prepdocs.py b/scenarios/incubations/copilot/data/dataprep/prepdocs.py
index 9fb853235..cb7d1c78d 100644
--- a/scenarios/incubations/copilot/data/dataprep/prepdocs.py
+++ b/scenarios/incubations/copilot/data/dataprep/prepdocs.py
@@ -212,6 +212,7 @@ def before_retry_sleep(retry_state):
 
 @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(15), before_sleep=before_retry_sleep)
 def compute_embedding(text):
+    # print("args.openaideployment", args.openaideployment)
     return openai.Embedding.create(engine=args.openaideployment, input=text)["data"][0]["embedding"]
 
 def create_search_index():
@@ -355,6 +356,7 @@ def remove_from_index(filename):
             create_search_index()
         
         print(f"Processing files...")
+        print(args.files)
         for filename in glob.glob(args.files):
             if args.verbose: print(f"Processing '{filename}'")
             if args.remove:
diff --git a/scenarios/incubations/copilot/requirements.txt b/scenarios/incubations/copilot/requirements.txt
index 457a4d27e..85545889a 100644
--- a/scenarios/incubations/copilot/requirements.txt
+++ b/scenarios/incubations/copilot/requirements.txt
@@ -4,4 +4,6 @@ streamlit-extras
 python-dotenv 
 plotly
 scipy
-scikit-learn
\ No newline at end of file
+scikit-learn
+azure-search-documents==11.4.0b10
+faiss-cpu

From 629d94a534492c85d6d2a60bf9da7dbf568b2cb7 Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Wed, 11 Oct 2023 13:00:49 -0700
Subject: [PATCH 09/18] update real time

---
 scenarios/incubations/copilot/utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scenarios/incubations/copilot/utils.py b/scenarios/incubations/copilot/utils.py
index bfbcc6e37..af09a3572 100644
--- a/scenarios/incubations/copilot/utils.py
+++ b/scenarios/incubations/copilot/utils.py
@@ -95,6 +95,7 @@ def search_knowledgebase(search_query):
         return search_knowledgebase_acs(search_query)
     else:
         print("using faiss")
+        print(os.getenv("USE_AZCS"))
         return search_knowledgebase_faiss(search_query)
 
 
@@ -217,7 +218,8 @@ def __init__(self, persona,functions_spec, functions_list, name=None, init_messa
         super().__init__(engine=engine,persona=persona, init_message=init_message, name=name)
         self.functions_spec = functions_spec
         self.functions_list= functions_list
-
+        
+    @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
     def run(self, user_input, conversation=None, stream = False, api_version = "2023-07-01-preview"):
         openai.api_version = api_version
         if user_input is None: #if no input return init message

From 69dd398b52e62552be7f5b740ac8193e5623c49a Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Thu, 12 Oct 2023 14:51:43 -0700
Subject: [PATCH 10/18] add tech expert

---
 .../copilot/tech_expert/requirements.txt      |   9 +
 .../copilot/tech_expert/tech_copilot.py       |   3 +-
 .../copilot/tech_expert/tech_copilot_utils.py |   3 +-
 .../incubations/copilot/tech_expert/utils.py  | 335 ++++++++++++++++++
 4 files changed, 346 insertions(+), 4 deletions(-)
 create mode 100644 scenarios/incubations/copilot/tech_expert/requirements.txt
 create mode 100644 scenarios/incubations/copilot/tech_expert/utils.py

diff --git a/scenarios/incubations/copilot/tech_expert/requirements.txt b/scenarios/incubations/copilot/tech_expert/requirements.txt
new file mode 100644
index 000000000..0bfb3cf51
--- /dev/null
+++ b/scenarios/incubations/copilot/tech_expert/requirements.txt
@@ -0,0 +1,9 @@
+streamlit
+openai
+streamlit-extras
+python-dotenv 
+plotly
+scipy
+scikit-learn
+azure-search-documents==11.4.0b9
+faiss-cpu
diff --git a/scenarios/incubations/copilot/tech_expert/tech_copilot.py b/scenarios/incubations/copilot/tech_expert/tech_copilot.py
index e68a78059..86d507756 100644
--- a/scenarios/incubations/copilot/tech_expert/tech_copilot.py
+++ b/scenarios/incubations/copilot/tech_expert/tech_copilot.py
@@ -2,7 +2,6 @@
 from streamlit_extras.add_vertical_space import add_vertical_space
 from tech_copilot_utils import PERSONA, AVAILABLE_FUNCTIONS, FUNCTIONS_SPEC
 import sys
-sys.path.append("..")
 from utils import Smart_Agent,add_to_cache
 import time
 import random
@@ -10,7 +9,7 @@
 from pathlib import Path  
 import json
 print("AVAILABLE_FUNCTIONS", AVAILABLE_FUNCTIONS)
-agent = Smart_Agent(persona=PERSONA,functions_list=AVAILABLE_FUNCTIONS, functions_spec=FUNCTIONS_SPEC, init_message="Hi there, this is Maya, Computer science specialist helping with answering technical questions about enterprise networking  and System, what can I do for you?")
+agent = Smart_Agent(persona=PERSONA,functions_list=AVAILABLE_FUNCTIONS, functions_spec=FUNCTIONS_SPEC, init_message="Hi there, this is Maya, technical specialist helping with questions about networking and system, what can I do for you?")
 
 st.set_page_config(layout="wide",page_title="Enterprise Copilot- A demo of Copilot application using GPT")
 styl = f"""
diff --git a/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py b/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py
index f8b28168d..566587cba 100644
--- a/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py
+++ b/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py
@@ -21,7 +21,6 @@
 # openai.api_type = "azure"
 import sys
 import random
-sys.path.append("..")
 from utils import Agent, Smart_Agent, check_args, search_knowledgebase
 # service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT") 
 # index_name = os.getenv("AZURE_SEARCH_INDEX_NAME") 
@@ -58,7 +57,7 @@
 
 
 PERSONA = """
-You are Maya, an technical support specialist responsible for answering questions about computer networking and system.
+You are Maya, a technical support specialist responsible for answering questions about computer networking and system.
 When you are asked with a question, use the search tool to find relavent knowlege articles to create the answer.
 Answer ONLY with the facts from the search tool. If there isn't enough information, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
 Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brakets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].
diff --git a/scenarios/incubations/copilot/tech_expert/utils.py b/scenarios/incubations/copilot/tech_expert/utils.py
new file mode 100644
index 000000000..af09a3572
--- /dev/null
+++ b/scenarios/incubations/copilot/tech_expert/utils.py
@@ -0,0 +1,335 @@
+# Agent class
+### responsbility definition: expertise, scope, conversation script, style 
+import openai
+import os
+from pathlib import Path  
+import json
+import time
+from azure.search.documents.models import Vector  
+import uuid
+from tenacity import retry, wait_random_exponential, stop_after_attempt  
+
+from dotenv import load_dotenv
+from azure.core.credentials import AzureKeyCredential  
+from azure.search.documents import SearchClient  
+from openai.embeddings_utils import get_embedding, cosine_similarity
+import inspect
+env_path = Path('.') / 'secrets.env'
+load_dotenv(dotenv_path=env_path)
+openai.api_key =  os.environ.get("AZURE_OPENAI_API_KEY")
+openai.api_base =  os.environ.get("AZURE_OPENAI_ENDPOINT")
+openai.api_type = "azure"
+emb_engine = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT")
+emb_engine = emb_engine.strip('"')
+class Search_Client():
+    def __init__(self,emb_map_file_path):
+        with open(emb_map_file_path) as file:
+            self.chunks_emb = json.load(file)
+
+    def find_article(self,question, topk=3):  
+        """  
+        Given an input vector and a dictionary of label vectors,  
+        returns the label with the highest cosine similarity to the input vector.  
+        """  
+        input_vector = get_embedding(question, engine = emb_engine)        
+        # Compute cosine similarity between input vector and each label vector
+        cosine_list=[]  
+        for chunk_id,chunk_content, vector in self.chunks_emb:  
+            #by default, we use embedding for the entire content of the topic (plus topic descrition).
+            # If you you want to use embedding on just topic name and description use this code cosine_sim = cosine_similarity(input_vector, vector[0])
+            cosine_sim = cosine_similarity(input_vector, vector) 
+            cosine_list.append((chunk_id,chunk_content,cosine_sim ))
+        cosine_list.sort(key=lambda x:x[2],reverse=True)
+        cosine_list= cosine_list[:topk]
+        best_chunks =[chunk[0] for chunk in cosine_list]
+        contents = [chunk[1] for chunk in cosine_list]
+        text_content = ""
+        for chunk_id, content in zip(best_chunks, contents):
+            text_content += f"{chunk_id}\n{content}\n"
+
+        return text_content
+
+
+#azcs implementation
+if os.getenv("USE_AZCS") == "True":
+    service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT") 
+    index_name = os.getenv("AZURE_SEARCH_INDEX_NAME") 
+    index_name = index_name.strip('"')
+    key = os.getenv("AZURE_SEARCH_ADMIN_KEY") 
+    key = key.strip('"')
+    # @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
+    # Function to generate embeddings for title and content fields, also used for query embeddings
+    def generate_embeddings(text):
+        print("emb_engine", emb_engine)
+        openai.api_version = "2023-05-15"
+        response = openai.Embedding.create(
+            input=text, engine=emb_engine)
+        embeddings = response['data'][0]['embedding']
+        return embeddings
+    credential = AzureKeyCredential(key)
+    azcs_search_client = SearchClient(service_endpoint, index_name =index_name , credential=credential)
+else:
+    faiss_search_client = Search_Client("../data/chunk_emb_map.json")
+
+def search_knowledgebase_acs(search_query):
+    vector = Vector(value=generate_embeddings(search_query), k=3, fields="embedding")
+    print("search query: ", search_query)
+    results = azcs_search_client.search(  
+        search_text=search_query,  
+        vectors= [vector],
+        select=["sourcepage","content"],
+        top=5
+    )  
+    text_content =""
+    for result in results:  
+        text_content += f"{result['sourcepage']}\n{result['content']}\n"
+    print("text_content", text_content)
+    return text_content
+
+def search_knowledgebase_faiss(search_query):
+    return faiss_search_client.find_article(search_query)
+
+def search_knowledgebase(search_query):
+    if os.getenv("USE_AZCS") == "True":
+        print("using azcs")
+        return search_knowledgebase_acs(search_query)
+    else:
+        print("using faiss")
+        print(os.getenv("USE_AZCS"))
+        return search_knowledgebase_faiss(search_query)
+
+
+
+###Sematic caching implementation
+if os.getenv("USE_SEMANTIC_CACHE") == "True":
+    cache_index_name = os.getenv("CACHE_INDEX_NAME")
+    cache_index_name= cache_index_name.strip('"')
+    azcs_semantic_cache_search_client = SearchClient(service_endpoint, cache_index_name, credential=credential)
+
+def add_to_cache(search_query, gpt_response):
+    search_doc = {
+                 "id" : str(uuid.uuid4()),
+                 "search_query" : search_query,
+                 "search_query_vector" : generate_embeddings(search_query),
+                "gpt_response" : gpt_response
+              }
+    azcs_semantic_cache_search_client.upload_documents(documents = [search_doc])
+
+def get_cache(search_query):
+    vector = Vector(value=generate_embeddings(search_query), k=3, fields="search_query_vector")
+  
+    results = azcs_semantic_cache_search_client.search(  
+        search_text=None,  
+        vectors= [vector],
+        select=["gpt_response"],
+    )  
+    try:
+        result =next(results)
+        print("threshold ", result['@search.score'])
+        if result['@search.score']>= float(os.getenv("SEMANTIC_HIT_THRESHOLD")):
+            return result['gpt_response']
+    except StopIteration:
+        pass
+
+    return None
+
+
+def gpt_stream_wrapper(response):
+    for chunk in response:
+        chunk_msg= chunk['choices'][0]['delta']
+        chunk_msg= chunk_msg.get('content',"")
+        yield chunk_msg
+class Agent(): #Base class for Agent
+    def __init__(self, engine,persona, name=None, init_message=None):
+        if init_message is not None:
+            init_hist =[{"role":"system", "content":persona}, {"role":"assistant", "content":init_message}]
+        else:
+            init_hist =[{"role":"system", "content":persona}]
+
+        self.init_history =  init_hist
+        self.persona = persona
+        self.engine = engine
+        self.name= name
+    def generate_response(self, new_input,history=None, stream = False,request_timeout =20,api_version = "2023-05-15"):
+        openai.api_version = api_version
+        if new_input is None: # return init message 
+            return self.init_history[1]["content"]
+        messages = self.init_history.copy()
+        if history is not None:
+            for user_question, bot_response in history:
+                messages.append({"role":"user", "content":user_question})
+                messages.append({"role":"assistant", "content":bot_response})
+        messages.append({"role":"user", "content":new_input})
+        response = openai.ChatCompletion.create(
+            engine=self.engine,
+            messages=messages,
+            stream=stream,
+            request_timeout =request_timeout
+        )
+        if not stream:
+            return response['choices'][0]['message']['content']
+        else:
+            return gpt_stream_wrapper(response)
+    def run(self, **kwargs):
+        return self.generate_response(**kwargs)
+
+
+
+def check_args(function, args):
+    sig = inspect.signature(function)
+    params = sig.parameters
+
+    # Check if there are extra arguments
+    for name in args:
+        if name not in params:
+            return False
+    # Check if the required arguments are provided 
+    for name, param in params.items():
+        if param.default is param.empty and name not in args:
+            return False
+
+    return True
+
+class Smart_Agent(Agent):
+    """
+    Agent that can use other agents and tools to answer questions.
+
+    Args:
+        persona (str): The persona of the agent.
+        tools (list): A list of {"tool_name":tool} that the agent can use to answer questions. Tool must have a run method that takes a question and returns an answer.
+        stop (list): A list of strings that the agent will use to stop the conversation.
+        init_message (str): The initial message of the agent. Defaults to None.
+        engine (str): The name of the GPT engine to use. Defaults to "gpt-35-turbo".
+
+    Methods:
+        llm(new_input, stop, history=None, stream=False): Generates a response to the input using the LLM model.
+        _run(new_input, stop, history=None, stream=False): Runs the agent and generates a response to the input.
+        run(new_input, history=None, stream=False): Runs the agent and generates a response to the input.
+
+    Attributes:
+        persona (str): The persona of the agent.
+        tools (list): A list of {"tool_name":tool} that the agent can use to answer questions. Tool must have a run method that takes a question and returns an answer.
+        stop (list): A list of strings that the agent will use to stop the conversation.
+        init_message (str): The initial message of the agent.
+        engine (str): The name of the GPT engine to use.
+    """
+
+    def __init__(self, persona,functions_spec, functions_list, name=None, init_message=None, engine =os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT")):
+        super().__init__(engine=engine,persona=persona, init_message=init_message, name=name)
+        self.functions_spec = functions_spec
+        self.functions_list= functions_list
+        
+    @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
+    def run(self, user_input, conversation=None, stream = False, api_version = "2023-07-01-preview"):
+        openai.api_version = api_version
+        if user_input is None: #if no input return init message
+            return self.init_history, self.init_history[1]["content"]
+        if conversation is None: #if no history return init message
+            conversation = self.init_history.copy()
+        conversation.append({"role": "user", "content": user_input})
+        i=0
+        query_used = None
+
+        # while True:
+        #     try:
+        #         i+=1
+        response = openai.ChatCompletion.create(
+            deployment_id=self.engine, # The deployment name you chose when you deployed the GPT-35-turbo or GPT-4 model.
+            messages=conversation,
+        functions=self.functions_spec,
+        function_call="auto", 
+        )
+        response_message = response["choices"][0]["message"]
+
+
+            # Step 2: check if GPT wanted to call a function
+        if  response_message.get("function_call"):
+            print("Recommended Function call:")
+            print(response_message.get("function_call"))
+            print()
+            
+            # Step 3: call the function
+            # Note: the JSON response may not always be valid; be sure to handle errors
+            
+            function_name = response_message["function_call"]["name"]
+            
+            # verify function exists
+            if function_name not in self.functions_list:
+                print("function list:", self.functions_list)
+                raise Exception("Function " + function_name + " does not exist")
+            function_to_call = self.functions_list[function_name]  
+            
+            # verify function has correct number of arguments
+            function_args = json.loads(response_message["function_call"]["arguments"])
+
+            if check_args(function_to_call, function_args) is False:
+                raise Exception("Invalid number of arguments for function: " + function_name)
+            
+
+            # check if there's an opprotunity to use semantic cache
+            if function_name =="search_knowledgebase":
+                if os.getenv("USE_SEMANTIC_CACHE") == "True":
+                    search_query = function_args["search_query"]
+                    cache_output = get_cache(search_query)
+                    if cache_output is not None:
+                        print("semantic cache hit")
+                        conversation.append({"role": "assistant", "content": cache_output})
+                        return False, query_used,conversation, cache_output
+                    else:
+                        print("semantic cache missed")
+                        query_used = search_query
+
+
+            function_response = function_to_call(**function_args)
+            print("Output of function call:")
+            print(function_response)
+            print()
+
+            
+            # Step 4: send the info on the function call and function response to GPT
+            
+            # adding assistant response to messages
+            conversation.append(
+                {
+                    "role": response_message["role"],
+                    "name": response_message["function_call"]["name"],
+                    "content": response_message["function_call"]["arguments"],
+                }
+            )
+
+            # adding function response to messages
+            conversation.append(
+                {
+                    "role": "function",
+                    "name": function_name,
+                    "content": function_response,
+                }
+            )  # extend conversation with function response
+            openai.api_version = api_version
+        
+            second_response = openai.ChatCompletion.create(
+                messages=conversation,
+                deployment_id=self.engine,
+                stream=stream,
+            )  # get a new response from GPT where it can see the function response
+
+            if not stream:
+                assistant_response = second_response["choices"][0]["message"]["content"]
+                conversation.append({"role": "assistant", "content": assistant_response})
+
+            else:
+                assistant_response = second_response
+
+            return stream,query_used, conversation, assistant_response
+        else:
+            assistant_response = response_message["content"]
+            conversation.append({"role": "assistant", "content": assistant_response})
+            #     break
+            # except Exception as e:
+            #     if i>3: 
+            #         assistant_response="Haizz, my memory is having some trouble, can you repeat what you just said?"
+            #         break
+            #     print("Exception as below, will retry\n", str(e))
+            #     time.sleep(5)
+
+        return False,query_used, conversation, assistant_response
\ No newline at end of file

From 92778a548f91798365639d13361b1ae01f98170a Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Thu, 12 Oct 2023 14:59:25 -0700
Subject: [PATCH 11/18] remove commands

---
 scenarios/incubations/copilot/data/dataprep/command_run.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scenarios/incubations/copilot/data/dataprep/command_run.txt b/scenarios/incubations/copilot/data/dataprep/command_run.txt
index f11340726..e4baf8b4c 100644
--- a/scenarios/incubations/copilot/data/dataprep/command_run.txt
+++ b/scenarios/incubations/copilot/data/dataprep/command_run.txt
@@ -6,7 +6,7 @@ SET AZURE_OPENAI_EMB_DEPLOYMENT="text-embedding-ada-002"
 SET AZURE_FORMRECOGNIZER_RESOURCE_GROUP="rg-azure-search-demo-dev"
 SET AZURE_FORMRECOGNIZER_SERVICE="cog-fr-tyf3apvjit5gg"
 SET AZURE_TENANT_ID="0fbe7234-45ea-498b-b7e4-1a8b2d3be4d9"
-SET AZURE_SEARCH_ADMIN_KEY="2HwNw2JAmUjp8lYFfiiIMeIFTPye8rHHq9ZYu3hc9YAzSeA9Vl5L"
+SET AZURE_SEARCH_ADMIN_KEY=""
 SET AZURE_SEARCH_INDEX_NAME="enetwork"
 
 python prepdocs.py "../EVA_documents/*/*.pdf" --storageaccount "%AZURE_STORAGE_ACCOUNT%" --container "%AZURE_STORAGE_CONTAINER%" --searchservice "%AZURE_SEARCH_SERVICE%" --openaiservice "%AZURE_OPENAI_SERVICE%" --openaideployment "%AZURE_OPENAI_EMB_DEPLOYMENT%" --searchkey "%AZURE_SEARCH_ADMIN_KEY%" --index "%AZURE_SEARCH_INDEX_NAME%" --formrecognizerservice "%AZURE_FORMRECOGNIZER_SERVICE%" --tenantid "%AZURE_TENANT_ID%" -v

From dc41a5dbb499613e41067543d7c56e70d9ddaee8 Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Fri, 13 Oct 2023 19:12:36 -0700
Subject: [PATCH 12/18] add filtering

---
 .../copilot/tech_expert/tech_copilot.py       |  11 +-
 .../copilot/tech_expert/tech_copilot_utils.py | 308 +++++++++++++---
 .../copilot/tech_expert/user_profile.json     |   1 +
 .../incubations/copilot/tech_expert/utils.py  | 335 ------------------
 4 files changed, 274 insertions(+), 381 deletions(-)
 create mode 100644 scenarios/incubations/copilot/tech_expert/user_profile.json
 delete mode 100644 scenarios/incubations/copilot/tech_expert/utils.py

diff --git a/scenarios/incubations/copilot/tech_expert/tech_copilot.py b/scenarios/incubations/copilot/tech_expert/tech_copilot.py
index 86d507756..d732c1805 100644
--- a/scenarios/incubations/copilot/tech_expert/tech_copilot.py
+++ b/scenarios/incubations/copilot/tech_expert/tech_copilot.py
@@ -1,15 +1,18 @@
 import streamlit as st
 from streamlit_extras.add_vertical_space import add_vertical_space
-from tech_copilot_utils import PERSONA, AVAILABLE_FUNCTIONS, FUNCTIONS_SPEC
+from tech_copilot_utils import PERSONA, AVAILABLE_FUNCTIONS, FUNCTIONS_SPEC, Smart_Agent, add_to_cache
 import sys
-from utils import Smart_Agent,add_to_cache
 import time
 import random
 import os
 from pathlib import Path  
 import json
-print("AVAILABLE_FUNCTIONS", AVAILABLE_FUNCTIONS)
-agent = Smart_Agent(persona=PERSONA,functions_list=AVAILABLE_FUNCTIONS, functions_spec=FUNCTIONS_SPEC, init_message="Hi there, this is Maya, technical specialist helping with questions about networking and system, what can I do for you?")
+with open('./user_profile.json') as f:
+    user_profile = json.load(f)
+functions = FUNCTIONS_SPEC.copy()
+functions[0]["parameters"]["properties"]["products"]["description"] = functions[0]["parameters"]["properties"]["products"]["description"].format(products=user_profile['products'])
+
+agent = Smart_Agent(persona=PERSONA.format(username=user_profile['username'],products=user_profile['products'] ),functions_list=AVAILABLE_FUNCTIONS, functions_spec=functions, init_message=f"Hi {user_profile['username']}, this is Maya, technical specialist helping with questions about networking and system, what can I do for you?")
 
 st.set_page_config(layout="wide",page_title="Enterprise Copilot- A demo of Copilot application using GPT")
 styl = f"""
diff --git a/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py b/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py
index 566587cba..4cab5ffd5 100644
--- a/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py
+++ b/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py
@@ -1,5 +1,3 @@
-# Agent class
-### responsbility definition: expertise, scope, conversation script, style 
 import openai
 import os
 from pathlib import Path  
@@ -14,51 +12,272 @@
 from azure.search.documents import SearchClient  
 from openai.embeddings_utils import get_embedding, cosine_similarity
 import inspect
-# env_path = Path('.') / 'secrets.env'
-# load_dotenv(dotenv_path=env_path)
-# openai.api_key =  os.environ.get("AZURE_OPENAI_API_KEY")
-# openai.api_base =  os.environ.get("AZURE_OPENAI_ENDPOINT")
-# openai.api_type = "azure"
-import sys
-import random
-from utils import Agent, Smart_Agent, check_args, search_knowledgebase
-# service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT") 
-# index_name = os.getenv("AZURE_SEARCH_INDEX_NAME") 
-# index_name = index_name.strip('"')
-# key = os.getenv("AZURE_SEARCH_ADMIN_KEY") 
-# key = key.strip('"')
-# emb_engine = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT")
-# credential = AzureKeyCredential(key)
-# azcs_search_client = SearchClient(service_endpoint, index_name =index_name , credential=credential)
-
-
-# def generate_embeddings(text):
-#     openai.api_version = "2023-05-15"
-#     response = openai.Embedding.create(
-#         input=text, engine=emb_engine)
-#     embeddings = response['data'][0]['embedding']
-#     return embeddings
-# def search_knowledgebase_acs(search_query):
-#     vector = Vector(value=generate_embeddings(search_query), k=3, fields="embedding")
-#     print("search query: ", search_query)
-#     results = azcs_search_client.search(  
-#         search_text=search_query,  
-#         vectors= [vector],
-#         select=["sourcepage","content"],
-#         top=5
-#     )  
-#     text_content =""
-#     for result in results:  
-#         text_content += f"{result['sourcepage']}\n{result['content']}\n"
-#     print("text_content", text_content)
-#     return text_content
+env_path = Path('.') / 'secrets.env'
+load_dotenv(dotenv_path=env_path)
+openai.api_key =  os.environ.get("AZURE_OPENAI_API_KEY")
+openai.api_base =  os.environ.get("AZURE_OPENAI_ENDPOINT")
+openai.api_type = "azure"
+emb_engine = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT")
+emb_engine = emb_engine.strip('"')
 
+#azcs implementation
+service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT") 
+index_name = os.getenv("AZURE_SEARCH_INDEX_NAME") 
+index_name = index_name.strip('"')
+key = os.getenv("AZURE_SEARCH_ADMIN_KEY") 
+key = key.strip('"')
+# @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
+# Function to generate embeddings for title and content fields, also used for query embeddings
+def generate_embeddings(text):
+    print("emb_engine", emb_engine)
+    openai.api_version = "2023-05-15"
+    response = openai.Embedding.create(
+        input=text, engine=emb_engine)
+    embeddings = response['data'][0]['embedding']
+    return embeddings
+credential = AzureKeyCredential(key)
+azcs_search_client = SearchClient(service_endpoint, index_name =index_name , credential=credential)
 
 
+def search_knowledgebase(search_query, products):
+
+    vector = Vector(value=generate_embeddings(search_query), k=3, fields="embedding")
+    print("search query: ", search_query)
+    print("products: ", products.split(","))
+    results = azcs_search_client.search(  
+        search_text=search_query,  
+        vectors= [vector],
+        select=["sourcepage","content"],
+        top=3
+    )  
+    text_content =""
+    for result in results:  
+        text_content += f"{result['sourcepage']}\n{result['content']}\n"
+    print("text_content", text_content)
+    return text_content
+
+
+###Sematic caching implementation
+if os.getenv("USE_SEMANTIC_CACHE") == "True":
+    cache_index_name = os.getenv("CACHE_INDEX_NAME")
+    cache_index_name= cache_index_name.strip('"')
+    azcs_semantic_cache_search_client = SearchClient(service_endpoint, cache_index_name, credential=credential)
+
+def add_to_cache(search_query, gpt_response):
+    search_doc = {
+                 "id" : str(uuid.uuid4()),
+                 "search_query" : search_query,
+                 "search_query_vector" : generate_embeddings(search_query),
+                "gpt_response" : gpt_response
+              }
+    azcs_semantic_cache_search_client.upload_documents(documents = [search_doc])
+
+def gpt_stream_wrapper(response):
+    for chunk in response:
+        chunk_msg= chunk['choices'][0]['delta']
+        chunk_msg= chunk_msg.get('content',"")
+        yield chunk_msg
+class Agent(): #Base class for Agent
+    def __init__(self, engine,persona, name=None, init_message=None):
+        if init_message is not None:
+            init_hist =[{"role":"system", "content":persona}, {"role":"assistant", "content":init_message}]
+        else:
+            init_hist =[{"role":"system", "content":persona}]
+
+        self.init_history =  init_hist
+        self.persona = persona
+        self.engine = engine
+        self.name= name
+    def generate_response(self, new_input,history=None, stream = False,request_timeout =20,api_version = "2023-05-15"):
+        openai.api_version = api_version
+        if new_input is None: # return init message 
+            return self.init_history[1]["content"]
+        messages = self.init_history.copy()
+        if history is not None:
+            for user_question, bot_response in history:
+                messages.append({"role":"user", "content":user_question})
+                messages.append({"role":"assistant", "content":bot_response})
+        messages.append({"role":"user", "content":new_input})
+        response = openai.ChatCompletion.create(
+            engine=self.engine,
+            messages=messages,
+            stream=stream,
+            request_timeout =request_timeout
+        )
+        if not stream:
+            return response['choices'][0]['message']['content']
+        else:
+            return gpt_stream_wrapper(response)
+    def run(self, **kwargs):
+        return self.generate_response(**kwargs)
+
+
+
+def check_args(function, args):
+    sig = inspect.signature(function)
+    params = sig.parameters
+
+    # Check if there are extra arguments
+    for name in args:
+        if name not in params:
+            return False
+    # Check if the required arguments are provided 
+    for name, param in params.items():
+        if param.default is param.empty and name not in args:
+            return False
+
+    return True
+
+class Smart_Agent(Agent):
+    """
+    Agent that can use other agents and tools to answer questions.
+
+    Args:
+        persona (str): The persona of the agent.
+        tools (list): A list of {"tool_name":tool} that the agent can use to answer questions. Tool must have a run method that takes a question and returns an answer.
+        stop (list): A list of strings that the agent will use to stop the conversation.
+        init_message (str): The initial message of the agent. Defaults to None.
+        engine (str): The name of the GPT engine to use. Defaults to "gpt-35-turbo".
+
+    Methods:
+        llm(new_input, stop, history=None, stream=False): Generates a response to the input using the LLM model.
+        _run(new_input, stop, history=None, stream=False): Runs the agent and generates a response to the input.
+        run(new_input, history=None, stream=False): Runs the agent and generates a response to the input.
+
+    Attributes:
+        persona (str): The persona of the agent.
+        tools (list): A list of {"tool_name":tool} that the agent can use to answer questions. Tool must have a run method that takes a question and returns an answer.
+        stop (list): A list of strings that the agent will use to stop the conversation.
+        init_message (str): The initial message of the agent.
+        engine (str): The name of the GPT engine to use.
+    """
+
+    def __init__(self, persona,functions_spec, functions_list, name=None, init_message=None, engine =os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT")):
+        super().__init__(engine=engine,persona=persona, init_message=init_message, name=name)
+        self.functions_spec = functions_spec
+        self.functions_list= functions_list
+        
+    @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
+    def run(self, user_input, conversation=None, stream = False, api_version = "2023-07-01-preview"):
+        openai.api_version = api_version
+        if user_input is None: #if no input return init message
+            return self.init_history, self.init_history[1]["content"]
+        if conversation is None: #if no history return init message
+            conversation = self.init_history.copy()
+        conversation.append({"role": "user", "content": user_input})
+        i=0
+        query_used = None
+
+        # while True:
+        #     try:
+        #         i+=1
+        response = openai.ChatCompletion.create(
+            deployment_id=self.engine, # The deployment name you chose when you deployed the GPT-35-turbo or GPT-4 model.
+            messages=conversation,
+        functions=self.functions_spec,
+        function_call="auto", 
+        )
+        response_message = response["choices"][0]["message"]
+
+
+            # Step 2: check if GPT wanted to call a function
+        if  response_message.get("function_call"):
+            print("Recommended Function call:")
+            print(response_message.get("function_call"))
+            print()
+            
+            # Step 3: call the function
+            # Note: the JSON response may not always be valid; be sure to handle errors
+            
+            function_name = response_message["function_call"]["name"]
+            
+            # verify function exists
+            if function_name not in self.functions_list:
+                print("function list:", self.functions_list)
+                raise Exception("Function " + function_name + " does not exist")
+            function_to_call = self.functions_list[function_name]  
+            
+            # verify function has correct number of arguments
+            function_args = json.loads(response_message["function_call"]["arguments"])
+
+            if check_args(function_to_call, function_args) is False:
+                raise Exception("Invalid number of arguments for function: " + function_name)
+            
+
+            # check if there's an opprotunity to use semantic cache
+            if function_name =="search_knowledgebase":
+                if os.getenv("USE_SEMANTIC_CACHE") == "True":
+                    search_query = function_args["search_query"]
+                    cache_output = get_cache(search_query)
+                    if cache_output is not None:
+                        print("semantic cache hit")
+                        conversation.append({"role": "assistant", "content": cache_output})
+                        return False, query_used,conversation, cache_output
+                    else:
+                        print("semantic cache missed")
+                        query_used = search_query
+
+
+            function_response = function_to_call(**function_args)
+            print("Output of function call:")
+            print(function_response)
+            print()
+
+            
+            # Step 4: send the info on the function call and function response to GPT
+            
+            # adding assistant response to messages
+            conversation.append(
+                {
+                    "role": response_message["role"],
+                    "name": response_message["function_call"]["name"],
+                    "content": response_message["function_call"]["arguments"],
+                }
+            )
+
+            # adding function response to messages
+            conversation.append(
+                {
+                    "role": "function",
+                    "name": function_name,
+                    "content": function_response,
+                }
+            )  # extend conversation with function response
+            openai.api_version = api_version
+        
+            second_response = openai.ChatCompletion.create(
+                messages=conversation,
+                deployment_id=self.engine,
+                stream=stream,
+            )  # get a new response from GPT where it can see the function response
+
+            if not stream:
+                assistant_response = second_response["choices"][0]["message"]["content"]
+                conversation.append({"role": "assistant", "content": assistant_response})
+
+            else:
+                assistant_response = second_response
+
+            return stream,query_used, conversation, assistant_response
+        else:
+            assistant_response = response_message["content"]
+            conversation.append({"role": "assistant", "content": assistant_response})
+            #     break
+            # except Exception as e:
+            #     if i>3: 
+            #         assistant_response="Haizz, my memory is having some trouble, can you repeat what you just said?"
+            #         break
+            #     print("Exception as below, will retry\n", str(e))
+            #     time.sleep(5)
+
+        return False,query_used, conversation, assistant_response
+
 
 PERSONA = """
 You are Maya, a technical support specialist responsible for answering questions about computer networking and system.
-When you are asked with a question, use the search tool to find relavent knowlege articles to create the answer.
+Upon checking the customer database, you know that {username} have support access to following products: {products}.
+If {username} does not mention the product in the question, ask him to pick from the above list.
+Then use the search tool to find relavent knowlege articles specific to the products in question to create the answer
 Answer ONLY with the facts from the search tool. If there isn't enough information, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
 Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brakets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].
 If the user is asking for information that is not related to computer networking, say it's not your area of expertise.
@@ -79,9 +298,14 @@
                 "search_query": {
                     "type": "string",
                     "description": "The search query to use to search the knowledge base"
+                },
+                "products": {
+                    "type": "string",
+                    "description": "The comma seperated list of products that the user is asking about. Must be a subset of {products}"
                 }
+
             },
-            "required": ["search_query"],
+            "required": ["search_query","products"],
         },
     },
 
diff --git a/scenarios/incubations/copilot/tech_expert/user_profile.json b/scenarios/incubations/copilot/tech_expert/user_profile.json
new file mode 100644
index 000000000..bd75fbff1
--- /dev/null
+++ b/scenarios/incubations/copilot/tech_expert/user_profile.json
@@ -0,0 +1 @@
+{"products":"ExtremeCloud, AP3000, AP302W, AP305C, AP305CX, 7720", "username":"John Doe"}
diff --git a/scenarios/incubations/copilot/tech_expert/utils.py b/scenarios/incubations/copilot/tech_expert/utils.py
deleted file mode 100644
index af09a3572..000000000
--- a/scenarios/incubations/copilot/tech_expert/utils.py
+++ /dev/null
@@ -1,335 +0,0 @@
-# Agent class
-### responsbility definition: expertise, scope, conversation script, style 
-import openai
-import os
-from pathlib import Path  
-import json
-import time
-from azure.search.documents.models import Vector  
-import uuid
-from tenacity import retry, wait_random_exponential, stop_after_attempt  
-
-from dotenv import load_dotenv
-from azure.core.credentials import AzureKeyCredential  
-from azure.search.documents import SearchClient  
-from openai.embeddings_utils import get_embedding, cosine_similarity
-import inspect
-env_path = Path('.') / 'secrets.env'
-load_dotenv(dotenv_path=env_path)
-openai.api_key =  os.environ.get("AZURE_OPENAI_API_KEY")
-openai.api_base =  os.environ.get("AZURE_OPENAI_ENDPOINT")
-openai.api_type = "azure"
-emb_engine = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT")
-emb_engine = emb_engine.strip('"')
-class Search_Client():
-    def __init__(self,emb_map_file_path):
-        with open(emb_map_file_path) as file:
-            self.chunks_emb = json.load(file)
-
-    def find_article(self,question, topk=3):  
-        """  
-        Given an input vector and a dictionary of label vectors,  
-        returns the label with the highest cosine similarity to the input vector.  
-        """  
-        input_vector = get_embedding(question, engine = emb_engine)        
-        # Compute cosine similarity between input vector and each label vector
-        cosine_list=[]  
-        for chunk_id,chunk_content, vector in self.chunks_emb:  
-            #by default, we use embedding for the entire content of the topic (plus topic descrition).
-            # If you you want to use embedding on just topic name and description use this code cosine_sim = cosine_similarity(input_vector, vector[0])
-            cosine_sim = cosine_similarity(input_vector, vector) 
-            cosine_list.append((chunk_id,chunk_content,cosine_sim ))
-        cosine_list.sort(key=lambda x:x[2],reverse=True)
-        cosine_list= cosine_list[:topk]
-        best_chunks =[chunk[0] for chunk in cosine_list]
-        contents = [chunk[1] for chunk in cosine_list]
-        text_content = ""
-        for chunk_id, content in zip(best_chunks, contents):
-            text_content += f"{chunk_id}\n{content}\n"
-
-        return text_content
-
-
-#azcs implementation
-if os.getenv("USE_AZCS") == "True":
-    service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT") 
-    index_name = os.getenv("AZURE_SEARCH_INDEX_NAME") 
-    index_name = index_name.strip('"')
-    key = os.getenv("AZURE_SEARCH_ADMIN_KEY") 
-    key = key.strip('"')
-    # @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
-    # Function to generate embeddings for title and content fields, also used for query embeddings
-    def generate_embeddings(text):
-        print("emb_engine", emb_engine)
-        openai.api_version = "2023-05-15"
-        response = openai.Embedding.create(
-            input=text, engine=emb_engine)
-        embeddings = response['data'][0]['embedding']
-        return embeddings
-    credential = AzureKeyCredential(key)
-    azcs_search_client = SearchClient(service_endpoint, index_name =index_name , credential=credential)
-else:
-    faiss_search_client = Search_Client("../data/chunk_emb_map.json")
-
-def search_knowledgebase_acs(search_query):
-    vector = Vector(value=generate_embeddings(search_query), k=3, fields="embedding")
-    print("search query: ", search_query)
-    results = azcs_search_client.search(  
-        search_text=search_query,  
-        vectors= [vector],
-        select=["sourcepage","content"],
-        top=5
-    )  
-    text_content =""
-    for result in results:  
-        text_content += f"{result['sourcepage']}\n{result['content']}\n"
-    print("text_content", text_content)
-    return text_content
-
-def search_knowledgebase_faiss(search_query):
-    return faiss_search_client.find_article(search_query)
-
-def search_knowledgebase(search_query):
-    if os.getenv("USE_AZCS") == "True":
-        print("using azcs")
-        return search_knowledgebase_acs(search_query)
-    else:
-        print("using faiss")
-        print(os.getenv("USE_AZCS"))
-        return search_knowledgebase_faiss(search_query)
-
-
-
-###Sematic caching implementation
-if os.getenv("USE_SEMANTIC_CACHE") == "True":
-    cache_index_name = os.getenv("CACHE_INDEX_NAME")
-    cache_index_name= cache_index_name.strip('"')
-    azcs_semantic_cache_search_client = SearchClient(service_endpoint, cache_index_name, credential=credential)
-
-def add_to_cache(search_query, gpt_response):
-    search_doc = {
-                 "id" : str(uuid.uuid4()),
-                 "search_query" : search_query,
-                 "search_query_vector" : generate_embeddings(search_query),
-                "gpt_response" : gpt_response
-              }
-    azcs_semantic_cache_search_client.upload_documents(documents = [search_doc])
-
-def get_cache(search_query):
-    vector = Vector(value=generate_embeddings(search_query), k=3, fields="search_query_vector")
-  
-    results = azcs_semantic_cache_search_client.search(  
-        search_text=None,  
-        vectors= [vector],
-        select=["gpt_response"],
-    )  
-    try:
-        result =next(results)
-        print("threshold ", result['@search.score'])
-        if result['@search.score']>= float(os.getenv("SEMANTIC_HIT_THRESHOLD")):
-            return result['gpt_response']
-    except StopIteration:
-        pass
-
-    return None
-
-
-def gpt_stream_wrapper(response):
-    for chunk in response:
-        chunk_msg= chunk['choices'][0]['delta']
-        chunk_msg= chunk_msg.get('content',"")
-        yield chunk_msg
-class Agent(): #Base class for Agent
-    def __init__(self, engine,persona, name=None, init_message=None):
-        if init_message is not None:
-            init_hist =[{"role":"system", "content":persona}, {"role":"assistant", "content":init_message}]
-        else:
-            init_hist =[{"role":"system", "content":persona}]
-
-        self.init_history =  init_hist
-        self.persona = persona
-        self.engine = engine
-        self.name= name
-    def generate_response(self, new_input,history=None, stream = False,request_timeout =20,api_version = "2023-05-15"):
-        openai.api_version = api_version
-        if new_input is None: # return init message 
-            return self.init_history[1]["content"]
-        messages = self.init_history.copy()
-        if history is not None:
-            for user_question, bot_response in history:
-                messages.append({"role":"user", "content":user_question})
-                messages.append({"role":"assistant", "content":bot_response})
-        messages.append({"role":"user", "content":new_input})
-        response = openai.ChatCompletion.create(
-            engine=self.engine,
-            messages=messages,
-            stream=stream,
-            request_timeout =request_timeout
-        )
-        if not stream:
-            return response['choices'][0]['message']['content']
-        else:
-            return gpt_stream_wrapper(response)
-    def run(self, **kwargs):
-        return self.generate_response(**kwargs)
-
-
-
-def check_args(function, args):
-    sig = inspect.signature(function)
-    params = sig.parameters
-
-    # Check if there are extra arguments
-    for name in args:
-        if name not in params:
-            return False
-    # Check if the required arguments are provided 
-    for name, param in params.items():
-        if param.default is param.empty and name not in args:
-            return False
-
-    return True
-
-class Smart_Agent(Agent):
-    """
-    Agent that can use other agents and tools to answer questions.
-
-    Args:
-        persona (str): The persona of the agent.
-        tools (list): A list of {"tool_name":tool} that the agent can use to answer questions. Tool must have a run method that takes a question and returns an answer.
-        stop (list): A list of strings that the agent will use to stop the conversation.
-        init_message (str): The initial message of the agent. Defaults to None.
-        engine (str): The name of the GPT engine to use. Defaults to "gpt-35-turbo".
-
-    Methods:
-        llm(new_input, stop, history=None, stream=False): Generates a response to the input using the LLM model.
-        _run(new_input, stop, history=None, stream=False): Runs the agent and generates a response to the input.
-        run(new_input, history=None, stream=False): Runs the agent and generates a response to the input.
-
-    Attributes:
-        persona (str): The persona of the agent.
-        tools (list): A list of {"tool_name":tool} that the agent can use to answer questions. Tool must have a run method that takes a question and returns an answer.
-        stop (list): A list of strings that the agent will use to stop the conversation.
-        init_message (str): The initial message of the agent.
-        engine (str): The name of the GPT engine to use.
-    """
-
-    def __init__(self, persona,functions_spec, functions_list, name=None, init_message=None, engine =os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT")):
-        super().__init__(engine=engine,persona=persona, init_message=init_message, name=name)
-        self.functions_spec = functions_spec
-        self.functions_list= functions_list
-        
-    @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
-    def run(self, user_input, conversation=None, stream = False, api_version = "2023-07-01-preview"):
-        openai.api_version = api_version
-        if user_input is None: #if no input return init message
-            return self.init_history, self.init_history[1]["content"]
-        if conversation is None: #if no history return init message
-            conversation = self.init_history.copy()
-        conversation.append({"role": "user", "content": user_input})
-        i=0
-        query_used = None
-
-        # while True:
-        #     try:
-        #         i+=1
-        response = openai.ChatCompletion.create(
-            deployment_id=self.engine, # The deployment name you chose when you deployed the GPT-35-turbo or GPT-4 model.
-            messages=conversation,
-        functions=self.functions_spec,
-        function_call="auto", 
-        )
-        response_message = response["choices"][0]["message"]
-
-
-            # Step 2: check if GPT wanted to call a function
-        if  response_message.get("function_call"):
-            print("Recommended Function call:")
-            print(response_message.get("function_call"))
-            print()
-            
-            # Step 3: call the function
-            # Note: the JSON response may not always be valid; be sure to handle errors
-            
-            function_name = response_message["function_call"]["name"]
-            
-            # verify function exists
-            if function_name not in self.functions_list:
-                print("function list:", self.functions_list)
-                raise Exception("Function " + function_name + " does not exist")
-            function_to_call = self.functions_list[function_name]  
-            
-            # verify function has correct number of arguments
-            function_args = json.loads(response_message["function_call"]["arguments"])
-
-            if check_args(function_to_call, function_args) is False:
-                raise Exception("Invalid number of arguments for function: " + function_name)
-            
-
-            # check if there's an opprotunity to use semantic cache
-            if function_name =="search_knowledgebase":
-                if os.getenv("USE_SEMANTIC_CACHE") == "True":
-                    search_query = function_args["search_query"]
-                    cache_output = get_cache(search_query)
-                    if cache_output is not None:
-                        print("semantic cache hit")
-                        conversation.append({"role": "assistant", "content": cache_output})
-                        return False, query_used,conversation, cache_output
-                    else:
-                        print("semantic cache missed")
-                        query_used = search_query
-
-
-            function_response = function_to_call(**function_args)
-            print("Output of function call:")
-            print(function_response)
-            print()
-
-            
-            # Step 4: send the info on the function call and function response to GPT
-            
-            # adding assistant response to messages
-            conversation.append(
-                {
-                    "role": response_message["role"],
-                    "name": response_message["function_call"]["name"],
-                    "content": response_message["function_call"]["arguments"],
-                }
-            )
-
-            # adding function response to messages
-            conversation.append(
-                {
-                    "role": "function",
-                    "name": function_name,
-                    "content": function_response,
-                }
-            )  # extend conversation with function response
-            openai.api_version = api_version
-        
-            second_response = openai.ChatCompletion.create(
-                messages=conversation,
-                deployment_id=self.engine,
-                stream=stream,
-            )  # get a new response from GPT where it can see the function response
-
-            if not stream:
-                assistant_response = second_response["choices"][0]["message"]["content"]
-                conversation.append({"role": "assistant", "content": assistant_response})
-
-            else:
-                assistant_response = second_response
-
-            return stream,query_used, conversation, assistant_response
-        else:
-            assistant_response = response_message["content"]
-            conversation.append({"role": "assistant", "content": assistant_response})
-            #     break
-            # except Exception as e:
-            #     if i>3: 
-            #         assistant_response="Haizz, my memory is having some trouble, can you repeat what you just said?"
-            #         break
-            #     print("Exception as below, will retry\n", str(e))
-            #     time.sleep(5)
-
-        return False,query_used, conversation, assistant_response
\ No newline at end of file

From d8c702223d483e17f529463627780e4ecd02db6d Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Sun, 15 Oct 2023 15:39:27 -0700
Subject: [PATCH 13/18] update prompt so that it clarify output and filtering
 query

---
 .../copilot/tech_expert/tech_copilot_utils.py | 28 +++++++++++++++++--
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py b/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py
index 4cab5ffd5..2a103ca83 100644
--- a/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py
+++ b/scenarios/incubations/copilot/tech_expert/tech_copilot_utils.py
@@ -44,9 +44,12 @@ def search_knowledgebase(search_query, products):
     vector = Vector(value=generate_embeddings(search_query), k=3, fields="embedding")
     print("search query: ", search_query)
     print("products: ", products.split(","))
+    product_filter = " or ".join([f"product eq '{product}'" for product in products.split(",")])
     results = azcs_search_client.search(  
         search_text=search_query,  
         vectors= [vector],
+        filter= product_filter,
+        query_type="semantic", query_language="en-us", semantic_configuration_name='default', query_caption="extractive", query_answer="extractive",
         select=["sourcepage","content"],
         top=3
     )  
@@ -71,6 +74,24 @@ def add_to_cache(search_query, gpt_response):
                 "gpt_response" : gpt_response
               }
     azcs_semantic_cache_search_client.upload_documents(documents = [search_doc])
+def get_cache(search_query):
+    vector = Vector(value=generate_embeddings(search_query), k=3, fields="search_query_vector")
+  
+    results = azcs_semantic_cache_search_client.search(  
+        search_text=None,  
+        vectors= [vector],
+        select=["gpt_response"],
+    )  
+    try:
+        result =next(results)
+        print("threshold ", result['@search.score'])
+        if result['@search.score']>= float(os.getenv("SEMANTIC_HIT_THRESHOLD")):
+            return result['gpt_response']
+    except StopIteration:
+        pass
+
+    return None
+
 
 def gpt_stream_wrapper(response):
     for chunk in response:
@@ -275,9 +296,10 @@ def run(self, user_input, conversation=None, stream = False, api_version = "2023
 
 PERSONA = """
 You are Maya, a technical support specialist responsible for answering questions about computer networking and system.
-Upon checking the customer database, you know that {username} have support access to following products: {products}.
-If {username} does not mention the product in the question, ask him to pick from the above list.
-Then use the search tool to find relavent knowlege articles specific to the products in question to create the answer
+You are helping {username} with a technical question.
+You will use the search tool to find relavent knowlege articles to create the answer.
+The search tool requires you to provide a list of products that the user is asking about.
+If {username} does not mention the product in the question, ask him to pick from following products: {products}.
 Answer ONLY with the facts from the search tool. If there isn't enough information, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
 Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brakets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].
 If the user is asking for information that is not related to computer networking, say it's not your area of expertise.

From c78f613251f1d483235129be292701028dd48d31 Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Mon, 16 Oct 2023 07:34:08 -0700
Subject: [PATCH 14/18] update data prep

---
 .../copilot/data/dataprep/command_run.txt       | 16 ++++++++--------
 .../copilot/data/dataprep/prepdocs.py           | 17 +++++++++++------
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/scenarios/incubations/copilot/data/dataprep/command_run.txt b/scenarios/incubations/copilot/data/dataprep/command_run.txt
index e4baf8b4c..79434516f 100644
--- a/scenarios/incubations/copilot/data/dataprep/command_run.txt
+++ b/scenarios/incubations/copilot/data/dataprep/command_run.txt
@@ -1,13 +1,13 @@
-SET AZURE_STORAGE_ACCOUNT="sttyf3apvjit5gg"
-SET AZURE_STORAGE_CONTAINER="content"
-SET AZURE_SEARCH_SERVICE="cogsearch001"
-SET AZURE_OPENAI_SERVICE="openai002"
+SET AZURE_STORAGE_ACCOUNT=""
+SET AZURE_STORAGE_CONTAINER=""
+SET AZURE_SEARCH_SERVICE=""
+SET AZURE_OPENAI_SERVICE=""
 SET AZURE_OPENAI_EMB_DEPLOYMENT="text-embedding-ada-002"
-SET AZURE_FORMRECOGNIZER_RESOURCE_GROUP="rg-azure-search-demo-dev"
-SET AZURE_FORMRECOGNIZER_SERVICE="cog-fr-tyf3apvjit5gg"
-SET AZURE_TENANT_ID="0fbe7234-45ea-498b-b7e4-1a8b2d3be4d9"
+SET AZURE_FORMRECOGNIZER_RESOURCE_GROUP=""
+SET AZURE_FORMRECOGNIZER_SERVICE=""
+SET AZURE_TENANT_ID=""
 SET AZURE_SEARCH_ADMIN_KEY=""
-SET AZURE_SEARCH_INDEX_NAME="enetwork"
+SET AZURE_SEARCH_INDEX_NAME=""
 
 python prepdocs.py "../EVA_documents/*/*.pdf" --storageaccount "%AZURE_STORAGE_ACCOUNT%" --container "%AZURE_STORAGE_CONTAINER%" --searchservice "%AZURE_SEARCH_SERVICE%" --openaiservice "%AZURE_OPENAI_SERVICE%" --openaideployment "%AZURE_OPENAI_EMB_DEPLOYMENT%" --searchkey "%AZURE_SEARCH_ADMIN_KEY%" --index "%AZURE_SEARCH_INDEX_NAME%" --formrecognizerservice "%AZURE_FORMRECOGNIZER_SERVICE%" --tenantid "%AZURE_TENANT_ID%" -v
 
diff --git a/scenarios/incubations/copilot/data/dataprep/prepdocs.py b/scenarios/incubations/copilot/data/dataprep/prepdocs.py
index cb7d1c78d..5ef5da693 100644
--- a/scenarios/incubations/copilot/data/dataprep/prepdocs.py
+++ b/scenarios/incubations/copilot/data/dataprep/prepdocs.py
@@ -17,10 +17,13 @@
 from azure.storage.blob import BlobServiceClient
 from pypdf import PdfReader, PdfWriter
 from tenacity import retry, stop_after_attempt, wait_random_exponential
-MAX_SECTION_LENGTH = 1000
+import json
+MAX_SECTION_LENGTH = 3000
 SENTENCE_SEARCH_LIMIT = 100
-SECTION_OVERLAP = 100
-
+SECTION_OVERLAP = 300
+with open("../EVA_documents/file_name_product_map.json", "r") as f:
+    product_file_map = json.load(f)
+product_file_map = eval(product_file_map)
 def blob_name_from_file_page(filename, page = 0):
     if os.path.splitext(filename)[1].lower() == ".pdf":
         return os.path.splitext(os.path.basename(filename))[0] + f"-{page}" + ".pdf"
@@ -194,12 +197,14 @@ def filename_to_id(filename):
     return f"file-{filename_ascii}-{filename_hash}"
 
 def create_sections(filename, page_map, use_vectors):
+    product = product_file_map[filename]
+    
     file_id = filename_to_id(filename)
     for i, (content, pagenum) in enumerate(split_text(page_map)):
         section = {
             "id": f"{file_id}-page-{i}",
             "content": content,
-            "category": args.category,
+            "product": product,
             "sourcepage": blob_name_from_file_page(filename, pagenum),
             "sourcefile": filename
         }
@@ -228,7 +233,7 @@ def create_search_index():
                 SearchField(name="embedding", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), 
                             hidden=False, searchable=True, filterable=False, sortable=False, facetable=False,
                             vector_search_dimensions=1536, vector_search_configuration="default"),
-                SimpleField(name="category", type="Edm.String", filterable=True, facetable=True),
+                SimpleField(name="product", type="Edm.String", filterable=True, facetable=True),
                 SimpleField(name="sourcepage", type="Edm.String", filterable=True, facetable=True),
                 SimpleField(name="sourcefile", type="Edm.String", filterable=True, facetable=True)
             ],
@@ -301,7 +306,7 @@ def remove_from_index(filename):
         epilog="Example: prepdocs.py '..\data\*' --storageaccount myaccount --container mycontainer --searchservice mysearch --index myindex -v"
         )
     parser.add_argument("files", help="Files to be processed")
-    parser.add_argument("--category", help="Value for the category field in the search index for all sections indexed in this run")
+    parser.add_argument("--product", help="Value for the product field in the search index for all sections indexed in this run")
     parser.add_argument("--skipblobs", action="store_true", help="Skip uploading individual pages to Azure Blob Storage")
     parser.add_argument("--storageaccount", help="Azure Blob Storage account name")
     parser.add_argument("--container", help="Azure Blob Storage container name")

From 1974a21f28bf3a0cc29a92099e62fea6f26a2bb1 Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Mon, 16 Oct 2023 07:35:18 -0700
Subject: [PATCH 15/18] update data prep

---
 .../copilot/data/dataprep/command_run.txt           | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/scenarios/incubations/copilot/data/dataprep/command_run.txt b/scenarios/incubations/copilot/data/dataprep/command_run.txt
index 79434516f..d0e31c706 100644
--- a/scenarios/incubations/copilot/data/dataprep/command_run.txt
+++ b/scenarios/incubations/copilot/data/dataprep/command_run.txt
@@ -1,13 +1,12 @@
 SET AZURE_STORAGE_ACCOUNT=""
-SET AZURE_STORAGE_CONTAINER=""
-SET AZURE_SEARCH_SERVICE=""
-SET AZURE_OPENAI_SERVICE=""
+SET AZURE_STORAGE_CONTAINER="content"
+SET AZURE_SEARCH_SERVICE="cogsearch001"
+SET AZURE_OPENAI_SERVICE="openai002"
 SET AZURE_OPENAI_EMB_DEPLOYMENT="text-embedding-ada-002"
-SET AZURE_FORMRECOGNIZER_RESOURCE_GROUP=""
-SET AZURE_FORMRECOGNIZER_SERVICE=""
-SET AZURE_TENANT_ID=""
+SET AZURE_FORMRECOGNIZER_RESOURCE_GROUP="rg-azure-search-demo-dev"
+SET AZURE_FORMRECOGNIZER_SERVICE="cog-fr-tyf3apvjit5gg"
+SET AZURE_TENANT_ID="0fbe7234-45ea-498b-b7e4-1a8b2d3be4d9"
 SET AZURE_SEARCH_ADMIN_KEY=""
 SET AZURE_SEARCH_INDEX_NAME=""
 
 python prepdocs.py "../EVA_documents/*/*.pdf" --storageaccount "%AZURE_STORAGE_ACCOUNT%" --container "%AZURE_STORAGE_CONTAINER%" --searchservice "%AZURE_SEARCH_SERVICE%" --openaiservice "%AZURE_OPENAI_SERVICE%" --openaideployment "%AZURE_OPENAI_EMB_DEPLOYMENT%" --searchkey "%AZURE_SEARCH_ADMIN_KEY%" --index "%AZURE_SEARCH_INDEX_NAME%" --formrecognizerservice "%AZURE_FORMRECOGNIZER_SERVICE%" --tenantid "%AZURE_TENANT_ID%" -v
-

From b7195a6476a027825e34a20b51702b96e6052181 Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Mon, 16 Oct 2023 07:49:51 -0700
Subject: [PATCH 16/18] tech expert

---
 .../incubations/copilot/tech_expert/README.md | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 scenarios/incubations/copilot/tech_expert/README.md

diff --git a/scenarios/incubations/copilot/tech_expert/README.md b/scenarios/incubations/copilot/tech_expert/README.md
new file mode 100644
index 000000000..4ebc871d2
--- /dev/null
+++ b/scenarios/incubations/copilot/tech_expert/README.md
@@ -0,0 +1,48 @@
+# Scenario Overview
+This scenario demonstrate the use Azure Open AI's function calling,   hybrid search + semantic reranker in Azure Cognitive Search and Semantic Caching for RAG pattern of complex technical document.
+Check out the ```../data/dataprep``` for the script to preprocess PDF documents using Azure Form recognizer with logic to process table data and populate meta data to Azure Cognitive Search Index.
+
+The copilot in this example interact with users to help answer technical questions about products. 
+A user has a user profile with products that they have access to.
+A small example business flow is implemented in this example which is to clarify which product(s) the question is about with user in case it's not clear.
+Then function calling is demonstrated that the copilot will formulate a search query for Azure Cognitive Search and the filtering condition to narrow down the scope of the search (products).
+
+
+# Installation 
+## Open AI setup
+Create an Azure OpenAI deployment in an Azure subscription with a GPT-4-0603 deployment and a ada-text-embedding-002 deloyment
+## Run the application locally
+1. Clone the repo (e.g. ```git clone https://github.com/microsoft/OpenAIWorkshop.git``` or download). Then navigate to ```cd scenarios/incubations/copilot/tech_expert```
+2. Create a `secrets.env` file under ``tech_expert`` folder
+```
+AZURE_OPENAI_ENDPOINT="/"
+AZURE_OPENAI_API_KEY=""
+AZURE_OPENAI_EMB_DEPLOYMENT="t"
+AZURE_OPENAI_CHAT_DEPLOYMENT=""
+USE_AZCS="True"
+AZURE_SEARCH_SERVICE_ENDPOINT=""
+AZURE_SEARCH_INDEX_NAME=
+AZURE_SEARCH_ADMIN_KEY=
+AZURE_OPENAI_API_VERSION="2023-07-01-preview"
+USE_SEMANTIC_CACHE="False" #Set to True for semantic caching.
+SEMANTIC_HIT_THRESHOLD=0.9
+
+
+```
+3. Create a python environment with version from 3.7 and 3.10
+
+    - [Python 3+](https://www.python.org/downloads/)
+        - **Important**: Python and the pip package manager must be in the path in Windows for the setup scripts to work.
+        - **Important**: Ensure you can run `python --version` from console. On Ubuntu, you might need to run `sudo apt install python-is-python3` to link `python` to `python3`. 
+4. Import the requirements.txt `pip install -r requirements.txt`
+5. To run the multi-agent copilot from the command line: `streamlit run tech_expert.py`
+
+## Deploy the application to Azure 
+##To be added
+
+
+
+
+
+
+

From 6c9aa4e2715cd1593db4d9b12896a7acbbfe786a Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Mon, 16 Oct 2023 07:54:54 -0700
Subject: [PATCH 17/18] tech expert

---
 scenarios/incubations/copilot/tech_expert/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scenarios/incubations/copilot/tech_expert/README.md b/scenarios/incubations/copilot/tech_expert/README.md
index 4ebc871d2..20b0617d4 100644
--- a/scenarios/incubations/copilot/tech_expert/README.md
+++ b/scenarios/incubations/copilot/tech_expert/README.md
@@ -35,7 +35,7 @@ SEMANTIC_HIT_THRESHOLD=0.9
         - **Important**: Python and the pip package manager must be in the path in Windows for the setup scripts to work.
         - **Important**: Ensure you can run `python --version` from console. On Ubuntu, you might need to run `sudo apt install python-is-python3` to link `python` to `python3`. 
 4. Import the requirements.txt `pip install -r requirements.txt`
-5. To run the multi-agent copilot from the command line: `streamlit run tech_expert.py`
+5. To run the multi-agent copilot from the command line: `streamlit run tech_copilot.py`
 
 ## Deploy the application to Azure 
 ##To be added

From a891a55222f117ba8852898f9354454753161b3c Mon Sep 17 00:00:00 2001
From: james-tn <james.nguyen@microsoft.com>
Date: Fri, 17 Nov 2023 15:30:36 -0800
Subject: [PATCH 18/18] add smart agent

---
 .../copilot/smart_agent/copilot.py            |  85 +++++
 .../copilot/smart_agent/requirements.txt      |   9 +
 .../copilot/smart_agent/user_profile.json     |   1 +
 .../incubations/copilot/smart_agent/utils.py  | 317 ++++++++++++++++++
 4 files changed, 412 insertions(+)
 create mode 100644 scenarios/incubations/copilot/smart_agent/copilot.py
 create mode 100644 scenarios/incubations/copilot/smart_agent/requirements.txt
 create mode 100644 scenarios/incubations/copilot/smart_agent/user_profile.json
 create mode 100644 scenarios/incubations/copilot/smart_agent/utils.py

diff --git a/scenarios/incubations/copilot/smart_agent/copilot.py b/scenarios/incubations/copilot/smart_agent/copilot.py
new file mode 100644
index 000000000..c8b4f4558
--- /dev/null
+++ b/scenarios/incubations/copilot/smart_agent/copilot.py
@@ -0,0 +1,85 @@
+import streamlit as st
+from streamlit_extras.add_vertical_space import add_vertical_space
+from utils import PERSONA, AVAILABLE_FUNCTIONS, FUNCTIONS_SPEC, Smart_Agent, add_to_cache
+import sys
+import time
+import random
+import os
+from pathlib import Path  
+import json
+with open('./user_profile.json') as f:
+    user_profile = json.load(f)
+functions = FUNCTIONS_SPEC.copy()
+# functions[0]["parameters"]["properties"]["products"]["description"] = functions[0]["parameters"]["properties"]["products"]["description"].format(products=user_profile['products'])
+
+agent = Smart_Agent(persona=PERSONA.format(username=user_profile['username']),functions_list=AVAILABLE_FUNCTIONS, functions_spec=functions, init_message=f"Hi {user_profile['username']}, this is Maya, technical specialist helping with questions about networking and system, what can I do for you?")
+
+st.set_page_config(layout="wide",page_title="Enterprise Copilot- A demo of Copilot application using GPT")
+styl = f"""
+<style>
+    .stTextInput {{
+      position: fixed;
+      bottom: 3rem;
+    }}
+</style>
+"""
+st.markdown(styl, unsafe_allow_html=True)
+
+
+MAX_HIST= 5
+# Sidebar contents
+with st.sidebar:
+    st.title('Tech Copilot')
+    st.markdown('''
+    This is a demo of Copilot Concept for Enterprise Networking Technical Support.
+
+    ''')
+    add_vertical_space(5)
+    st.write('Created by James N')
+    if st.button('Clear Chat'):
+
+        if 'history' in st.session_state:
+            st.session_state['history'] = []
+
+    if 'history' not in st.session_state:
+        st.session_state['history'] = []
+    if 'input' not in st.session_state:
+        st.session_state['input'] = ""
+
+
+user_input= st.chat_input("You:")
+
+## Conditional display of AI generated responses as a function of user provided prompts
+history = st.session_state['history']
+      
+if len(history) > 0:
+    for message in history:
+        if message.get("role") != "system" and message.get("name") is  None:
+            with st.chat_message(message["role"]):
+                    st.markdown(message["content"])
+else:
+    history, agent_response = agent.run(user_input=None)
+    with st.chat_message("assistant"):
+        st.markdown(agent_response)
+    user_history=[]
+if user_input:
+    with st.chat_message("user"):
+        st.markdown(user_input)
+    stream_out, query_used, history, agent_response = agent.run(user_input=user_input, conversation=history, stream=False)
+    with st.chat_message("assistant"):
+        if stream_out:
+            message_placeholder = st.empty()
+            full_response = ""
+            for response in agent_response:
+                if len(response.choices)>0:
+                    full_response += response.choices[0].delta.get("content", "")
+                    message_placeholder.markdown(full_response + "▌")
+            message_placeholder.markdown(full_response)
+            if query_used: #add to cache
+                add_to_cache(query_used, full_response)
+                print(f"query {query_used} added to cache")
+            history.append({"role": "assistant", "content": full_response})
+        else:
+            st.markdown(agent_response)
+
+st.session_state['history'] = history
\ No newline at end of file
diff --git a/scenarios/incubations/copilot/smart_agent/requirements.txt b/scenarios/incubations/copilot/smart_agent/requirements.txt
new file mode 100644
index 000000000..0bfb3cf51
--- /dev/null
+++ b/scenarios/incubations/copilot/smart_agent/requirements.txt
@@ -0,0 +1,9 @@
+streamlit
+openai
+streamlit-extras
+python-dotenv 
+plotly
+scipy
+scikit-learn
+azure-search-documents==11.4.0b9
+faiss-cpu
diff --git a/scenarios/incubations/copilot/smart_agent/user_profile.json b/scenarios/incubations/copilot/smart_agent/user_profile.json
new file mode 100644
index 000000000..bd75fbff1
--- /dev/null
+++ b/scenarios/incubations/copilot/smart_agent/user_profile.json
@@ -0,0 +1 @@
+{"products":"ExtremeCloud, AP3000, AP302W, AP305C, AP305CX, 7720", "username":"John Doe"}
diff --git a/scenarios/incubations/copilot/smart_agent/utils.py b/scenarios/incubations/copilot/smart_agent/utils.py
new file mode 100644
index 000000000..8fa371c51
--- /dev/null
+++ b/scenarios/incubations/copilot/smart_agent/utils.py
@@ -0,0 +1,317 @@
+import openai
+import os
+from pathlib import Path  
+import json
+import time
+from azure.search.documents.models import Vector  
+import uuid
+from tenacity import retry, wait_random_exponential, stop_after_attempt  
+
+from dotenv import load_dotenv
+from azure.core.credentials import AzureKeyCredential  
+from azure.search.documents import SearchClient  
+from openai.embeddings_utils import get_embedding, cosine_similarity
+import inspect
+env_path = Path('.') / 'secrets.env'
+load_dotenv(dotenv_path=env_path)
+openai.api_key =  os.environ.get("AZURE_OPENAI_API_KEY")
+openai.api_base =  os.environ.get("AZURE_OPENAI_ENDPOINT")
+openai.api_type = "azure"
+emb_engine = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT")
+emb_engine = emb_engine.strip('"')
+
+#azcs implementation
+service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT") 
+index_name = os.getenv("AZURE_SEARCH_INDEX_NAME") 
+index_name = index_name.strip('"')
+key = os.getenv("AZURE_SEARCH_ADMIN_KEY") 
+key = key.strip('"')
+# @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
+# Function to generate embeddings for title and content fields, also used for query embeddings
+def generate_embeddings(text):
+    print("emb_engine", emb_engine)
+    openai.api_version = "2023-05-15"
+    response = openai.Embedding.create(
+        input=text, engine=emb_engine)
+    embeddings = response['data'][0]['embedding']
+    return embeddings
+credential = AzureKeyCredential(key)
+azcs_search_client = SearchClient(service_endpoint, index_name =index_name , credential=credential)
+
+
+def search_knowledgebase(search_query):
+
+    vector = Vector(value=generate_embeddings(search_query), k=3, fields="embedding")
+    print("search query: ", search_query)
+    # print("products: ", products.split(","))
+    # product_filter = " or ".join([f"product eq '{product}'" for product in products.split(",")])
+    results = azcs_search_client.search(  
+        search_text=search_query,  
+        vectors= [vector],
+        # filter= product_filter,
+        query_type="semantic", query_language="en-us", semantic_configuration_name='default', query_caption="extractive", query_answer="extractive",
+        select=["sourcepage","content"],
+        top=3
+    )  
+    text_content =""
+    for result in results:  
+        text_content += f"{result['sourcepage']}\n{result['content']}\n"
+    # print("text_content", text_content)
+    return text_content
+
+
+###Sematic caching implementation
+if os.getenv("USE_SEMANTIC_CACHE") == "True":
+    cache_index_name = os.getenv("CACHE_INDEX_NAME")
+    cache_index_name= cache_index_name.strip('"')
+    azcs_semantic_cache_search_client = SearchClient(service_endpoint, cache_index_name, credential=credential)
+
+def add_to_cache(search_query, gpt_response):
+    search_doc = {
+                 "id" : str(uuid.uuid4()),
+                 "search_query" : search_query,
+                 "search_query_vector" : generate_embeddings(search_query),
+                "gpt_response" : gpt_response
+              }
+    azcs_semantic_cache_search_client.upload_documents(documents = [search_doc])
+def get_cache(search_query):
+    vector = Vector(value=generate_embeddings(search_query), k=3, fields="search_query_vector")
+  
+    results = azcs_semantic_cache_search_client.search(  
+        search_text=None,  
+        vectors= [vector],
+        select=["gpt_response"],
+    )  
+    try:
+        result =next(results)
+        print("threshold ", result['@search.score'])
+        if result['@search.score']>= float(os.getenv("SEMANTIC_HIT_THRESHOLD")):
+            return result['gpt_response']
+    except StopIteration:
+        pass
+
+    return None
+
+
+def gpt_stream_wrapper(response):
+    for chunk in response:
+        chunk_msg= chunk['choices'][0]['delta']
+        chunk_msg= chunk_msg.get('content',"")
+        yield chunk_msg
+class Agent(): #Base class for Agent
+    def __init__(self, engine,persona, name=None, init_message=None):
+        if init_message is not None:
+            init_hist =[{"role":"system", "content":persona}, {"role":"assistant", "content":init_message}]
+        else:
+            init_hist =[{"role":"system", "content":persona}]
+
+        self.init_history =  init_hist
+        self.persona = persona
+        self.engine = engine
+        self.name= name
+    def generate_response(self, new_input,history=None, stream = False,request_timeout =20,api_version = "2023-05-15"):
+        openai.api_version = api_version
+        if new_input is None: # return init message 
+            return self.init_history[1]["content"]
+        messages = self.init_history.copy()
+        if history is not None:
+            for user_question, bot_response in history:
+                messages.append({"role":"user", "content":user_question})
+                messages.append({"role":"assistant", "content":bot_response})
+        messages.append({"role":"user", "content":new_input})
+        response = openai.ChatCompletion.create(
+            engine=self.engine,
+            messages=messages,
+            stream=stream,
+            request_timeout =request_timeout
+        )
+        if not stream:
+            return response['choices'][0]['message']['content']
+        else:
+            return gpt_stream_wrapper(response)
+    def run(self, **kwargs):
+        return self.generate_response(**kwargs)
+
+
+
+def check_args(function, args):
+    sig = inspect.signature(function)
+    params = sig.parameters
+
+    # Check if there are extra arguments
+    for name in args:
+        if name not in params:
+            return False
+    # Check if the required arguments are provided 
+    for name, param in params.items():
+        if param.default is param.empty and name not in args:
+            return False
+
+    return True
+
+class Smart_Agent(Agent):
+    """
+    Agent that can use other agents and tools to answer questions.
+
+    Args:
+        persona (str): The persona of the agent.
+        tools (list): A list of {"tool_name":tool} that the agent can use to answer questions. Tool must have a run method that takes a question and returns an answer.
+        stop (list): A list of strings that the agent will use to stop the conversation.
+        init_message (str): The initial message of the agent. Defaults to None.
+        engine (str): The name of the GPT engine to use. Defaults to "gpt-35-turbo".
+
+    Methods:
+        llm(new_input, stop, history=None, stream=False): Generates a response to the input using the LLM model.
+        _run(new_input, stop, history=None, stream=False): Runs the agent and generates a response to the input.
+        run(new_input, history=None, stream=False): Runs the agent and generates a response to the input.
+
+    Attributes:
+        persona (str): The persona of the agent.
+        tools (list): A list of {"tool_name":tool} that the agent can use to answer questions. Tool must have a run method that takes a question and returns an answer.
+        stop (list): A list of strings that the agent will use to stop the conversation.
+        init_message (str): The initial message of the agent.
+        engine (str): The name of the GPT engine to use.
+    """
+
+    def __init__(self, persona,functions_spec, functions_list, name=None, init_message=None, engine =os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT")):
+        super().__init__(engine=engine,persona=persona, init_message=init_message, name=name)
+        self.functions_spec = functions_spec
+        self.functions_list= functions_list
+        
+    @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
+    def run(self, user_input, conversation=None, stream = False, api_version = "2023-07-01-preview"):
+        openai.api_version = api_version
+        if user_input is None: #if no input return init message
+            return self.init_history, self.init_history[1]["content"]
+        if conversation is None: #if no history return init message
+            conversation = self.init_history.copy()
+        conversation.append({"role": "user", "content": user_input})
+        i=0
+        query_used = None
+
+        while True:
+
+            response = openai.ChatCompletion.create(
+                deployment_id=self.engine, # The deployment name you chose when you deployed the GPT-35-turbo or GPT-4 model.
+                messages=conversation,
+            functions=self.functions_spec,
+            function_call="auto"
+            
+            )
+            response_message = response["choices"][0]["message"]
+
+
+                # Step 2: check if GPT wanted to call a function
+            if  response_message.get("function_call"):
+                print("Recommended Function call:")
+                print(response_message.get("function_call"))
+                print()
+                
+                # Step 3: call the function
+                # Note: the JSON response may not always be valid; be sure to handle errors
+                
+                function_name = response_message["function_call"]["name"]
+                
+                # verify function exists
+                if function_name not in self.functions_list:
+                    raise Exception("Function " + function_name + " does not exist")
+                function_to_call = self.functions_list[function_name]  
+                
+                # verify function has correct number of arguments
+                function_args = json.loads(response_message["function_call"]["arguments"])
+
+                if check_args(function_to_call, function_args) is False:
+                    raise Exception("Invalid number of arguments for function: " + function_name)
+                search_query = function_args["search_query"]
+                print("search_query", search_query)
+
+                # check if there's an opprotunity to use semantic cache
+                if function_name =="search_knowledgebase":
+                    if os.getenv("USE_SEMANTIC_CACHE") == "True":
+                        
+                        cache_output = get_cache(search_query)
+                        if cache_output is not None:
+                            print("semantic cache hit")
+                            conversation.append({"role": "assistant", "content": cache_output})
+                            return False, query_used,conversation, cache_output
+                        else:
+                            print("semantic cache missed")
+                            query_used = search_query
+
+
+                function_response = function_to_call(**function_args)
+                print("Output of function call:")
+                print(function_response)
+                print()
+
+                
+                # Step 4: send the info on the function call and function response to GPT
+                
+                # adding assistant response to messages
+                conversation.append(
+                    {
+                        "role": response_message["role"],
+                        "name": response_message["function_call"]["name"],
+                        "content": response_message["function_call"]["arguments"],
+                    }
+                )
+
+                # adding function response to messages
+                conversation.append(
+                    {
+                        "role": "function",
+                        "name": function_name,
+                        "content": function_response,
+                    }
+                )  # extend conversation with function response
+                continue
+            else:
+                break #if no function call break out of loop as this indicates that the agent finished the research and is ready to respond to the user
+
+        if not stream:
+            assistant_response = response_message["content"]
+            conversation.append({"role": "assistant", "content": assistant_response})
+
+        else:
+            assistant_response = response_message
+
+        return stream,query_used, conversation, assistant_response
+
+
+PERSONA = """
+You are Maya, a technical support specialist responsible for answering questions about computer networking and system.
+You are helping {username} with a technical question.
+You will use the search tool to find relavent knowlege articles to create the answer.
+Being smart in your research. If the search does not come back with the answer, rephrase the question and try again.
+Review the result of the search and use it to guide your next search if needed.
+If the question is complex, break down to smaller search steps and find the answer in multiple steps.
+Answer ONLY with the facts from the search tool. If there isn't enough information, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
+Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brakets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].
+If the user is asking for information that is not related to computer networking, say it's not your area of expertise.
+"""
+
+AVAILABLE_FUNCTIONS = {
+            "search_knowledgebase": search_knowledgebase,
+
+        } 
+
+FUNCTIONS_SPEC= [  
+    {
+        "name": "search_knowledgebase",
+        "description": "Searches the knowledge base for an answer to the technical question",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "search_query": {
+                    "type": "string",
+                    "description": "The search query to use to search the knowledge base"
+                },
+
+            },
+            "required": ["search_query"],
+        },
+    },
+
+]  
+
+