From 8fd25234dd6888d16ba3aa8511f68c56767ebeb9 Mon Sep 17 00:00:00 2001 From: chetan-hirapara Date: Fri, 17 Jan 2025 05:48:53 +0000 Subject: [PATCH 01/35] updated description --- CSAE_Bot.ipynb | 464 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 464 insertions(+) create mode 100644 CSAE_Bot.ipynb diff --git a/CSAE_Bot.ipynb b/CSAE_Bot.ipynb new file mode 100644 index 00000000..7cbc3eb8 --- /dev/null +++ b/CSAE_Bot.ipynb @@ -0,0 +1,464 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fc92e86f-c5a7-499d-89e2-49046aa01be7", + "metadata": {}, + "source": [ + "
\n", + "

\n", + " CSAE Bot: Quickly find your demos of interest by just typing\n", + "
\n", + " \"Teradata\"\n", + "

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "d6c42cf3-ab6f-480d-943e-75be813f912b", + "metadata": {}, + "source": [ + "
\n", + "\n", + "

1. Install required libraries

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d74eee36-e3e8-42ca-9253-4e5ce8178c7b", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "\n", + "!pip install openai langchain langchain-openai" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98cfddce-a2b3-4d7d-aae4-1f2dbc463022", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "import re\n", + "\n", + "from langchain.document_loaders import DirectoryLoader\n", + "from langchain_openai import OpenAIEmbeddings\n", + "from langchain.vectorstores import FAISS\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain.schema import Document\n", + "\n", + "from langchain_community.document_loaders import NotebookLoader\n", + "from langchain_community.document_loaders import DirectoryLoader\n", + "\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.chains import RetrievalQA\n", + "import openai" + ] + }, + { + "cell_type": "markdown", + "id": "b4fe3299-d15d-48c8-ae02-a80968c9ad1f", + "metadata": {}, + "source": [ + "
\n", + "\n", + "

2. Extract contents (code, text) from notebook

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b661c301-cfe3-4389-a05e-e996336684e8", + "metadata": {}, + "outputs": [], + "source": [ + "# Function to extract content from a Jupyter notebook\n", + "def extract_notebook_content(file_path):\n", + " with open(file_path, 'r', encoding='utf-8') as f:\n", + " notebook_data = json.load(f)\n", + "\n", + " content = \"\"\n", + " for cell in notebook_data.get('cells', []):\n", + " if cell['cell_type'] == 'markdown':\n", + " # Clean markdown content by removing HTML tags\n", + " content += '\\n'.join(cell['source']) + '\\n\\n'\n", + " elif cell['cell_type'] == 'code':\n", + " # Format code properly\n", + " content += '```python\\n' + ''.join(cell['source']) + '\\n```\\n\\n'\n", + " return content\n", + "\n", + "# Function to remove HTML tags\n", + "def remove_html_tags(text):\n", + " \"\"\"Remove HTML tags from a string\"\"\"\n", + " clean = re.compile('<.*?>')\n", + " return re.sub(clean, '', text)\n", + "\n", + "# Function to split the notebook content into markdown and code\n", + "def split_ipynb_content(content):\n", + " # Regular expression to match code blocks\n", + " code_pattern = re.compile(r'```python(.*?)```', re.DOTALL)\n", + "\n", + " # Find all code blocks\n", + " code_blocks = code_pattern.findall(content)\n", + "\n", + " # Split the content by code blocks\n", + " parts = code_pattern.split(content)\n", + "\n", + " # Combine markdown and code blocks\n", + " result = []\n", + " for i, part in enumerate(parts):\n", + " if i % 2 == 0:\n", + " # This is a markdown part, remove HTML tags\n", + " clean_part = remove_html_tags(part)\n", + " result.append(('markdown', clean_part))\n", + " else:\n", + " # This is a code part\n", + " result.append(('code', part))\n", + "\n", + " return result\n", + "\n", + "# Function to clean and split notebook content\n", + "def clean_and_split_notebook_content(file_path):\n", + " \"\"\"Extract markdown content and clean up the notebook's information.\"\"\"\n", + " # Extract the content from the notebook file\n", + " content = extract_notebook_content(file_path)\n", + " \n", + " # Split content into markdown and code cells\n", + " split_content = split_ipynb_content(content)\n", + "\n", + " # Initialize a list to hold combined documents\n", + " combined_documents = []\n", + " current_markdown = \"\"\n", + " current_code = \"\"\n", + "\n", + " # Iterate through the split content to group markdown with code\n", + " for part_type, part in split_content:\n", + " if part_type == 'markdown':\n", + " # If we have code and markdown, combine them\n", + " if current_markdown or current_code:\n", + " combined_documents.append({\"markdown\": current_markdown, \"code\": current_code})\n", + " # Update current markdown to the new one\n", + " current_markdown = part\n", + " current_code = \"\" # Reset code, ready for next code block\n", + " elif part_type == 'code':\n", + " # Append the code to the current code block\n", + " current_code += part\n", + " \n", + " # Add the last document (markdown + code)\n", + " if current_markdown or current_code:\n", + " combined_documents.append({\"markdown\": current_markdown, \"code\": current_code})\n", + "\n", + " return combined_documents" + ] + }, + { + "cell_type": "markdown", + "id": "a27f7379-43bd-43af-9c4f-ad55f2b4035f", + "metadata": {}, + "source": [ + "
\n", + "\n", + "

3. Create FAISS vector database

\n", + "
\n", + "

Note: Please do not run below code multiple time, it is generating embeddings from all the notebooks, it will charge us $0.020 / 1M tokens

\n", + " \n", + " \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d11abb9-225e-4ba8-9041-9e29e738ed3b", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "if not os.getenv(\"OPENAI_API_KEY\"):\n", + " os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c357a955-d49d-4fec-99a5-6c42f9d204bf", + "metadata": {}, + "outputs": [], + "source": [ + "# Load notebooks and clean them\n", + "path = '/home/jovyan/JupyterLabRoot/'\n", + "loader = DirectoryLoader(path, glob=\"**/*.ipynb\", loader_cls=NotebookLoader)\n", + "notebooks = loader.load()\n", + "\n", + "# Clean each notebook before processing it\n", + "cleaned_documents = []\n", + "for notebook in notebooks:\n", + " # Assuming notebook metadata contains file path\n", + " file_path = notebook.metadata.get(\"source\", \"Unknown\") # Adjust this as needed\n", + " cleaned_data = clean_and_split_notebook_content(file_path)\n", + "\n", + " # Convert cleaned data to documents, including the source file path\n", + " for data in cleaned_data:\n", + " if data['markdown'] or data['code']:\n", + " doc = Document(\n", + " page_content=f\"Markdown:\\n{data['markdown']} \\n\\nCode:\\n{data['code']}\",\n", + " metadata={\"source\": file_path} # Ensure the source file path is added\n", + " )\n", + " cleaned_documents.append(doc)\n", + " \n", + " \n", + "# Split text into manageable chunks\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)\n", + "docs = text_splitter.split_documents(cleaned_documents)\n", + "\n", + "\n", + "# for count of token\n", + "from tiktoken import encoding_for_model\n", + "\n", + "def count_document_tokens(document, model_name=\"gpt-4\"):\n", + " encoder = encoding_for_model(model_name)\n", + " return len(encoder.encode(document.page_content))\n", + "\n", + "tiktokn = 0\n", + "for doc in cleaned_documents:\n", + " tiktokn = tiktokn + count_document_tokens(doc)\n", + "\n", + "print(\"total token from all the notebooks: \", tiktokn)\n", + "\n", + "# Create vector store using embeddings\n", + "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\") \n", + "vector_store = FAISS.from_documents(docs, embeddings)\n", + "\n", + "# Save the index for reuse\n", + "vector_store.save_local(\"notebooks_index\")" + ] + }, + { + "cell_type": "markdown", + "id": "5d754281-77b6-4f68-a41d-2323cdc020ad", + "metadata": {}, + "source": [ + "
\n", + "

4. Load existing vector database and define RAG

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5565e54c-6303-4648-81cf-9f2fe44c1de0", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the FAISS index with dangerous deserialization enabled\n", + "vector_store = FAISS.load_local(\n", + " \"notebooks_index\", embeddings, allow_dangerous_deserialization=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6dbb1de5-2966-474f-86a4-a1aff224e9e8", + "metadata": {}, + "outputs": [], + "source": [ + "# Custom Prompt Template\n", + "CUSTOM_PROMPT = \"\"\"\n", + "You are a helpful assistant. Use the following retrieved information from Jupyter notebooks to provide:\n", + "1. A **clean and concise textual explanation** based on the question and notebook markdown.\n", + "2. **Relevant Clean Python code** extracted from the notebooks' code cells that are related to the question. Please filter the code that is related to the query.\n", + "\n", + "If no relevant information is found, politely say so.\n", + "\n", + "Context:\n", + "{context}\n", + "\n", + "Question:\n", + "{question}\n", + "\n", + "Your response should be in below format:\n", + "- Answer:\n", + "- Relevant Code:\n", + "\"\"\"\n", + "\n", + "prompt = PromptTemplate(\n", + " input_variables=[\"context\", \"question\"],\n", + " template=CUSTOM_PROMPT\n", + ")\n", + "\n", + "# Make sure to use a Chat model like 'gpt-4' or 'gpt-3.5-turbo'\n", + "chat_model = ChatOpenAI(model=\"gpt-4\")\n", + "\n", + "# Retrieval QA Chain\n", + "qa_chain = RetrievalQA.from_chain_type(\n", + " llm=chat_model,\n", + " retriever=vector_store.as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 10}), # Assuming vector_store is your vector database\n", + " return_source_documents=True,\n", + " chain_type_kwargs={\"prompt\": prompt}\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d2efb7c-dfa6-4240-b3ae-e40337685705", + "metadata": {}, + "outputs": [], + "source": [ + "# Function to Query Chatbot\n", + "def query_chatbot(question):\n", + " # Query the chatbot using the chain\n", + " result = qa_chain.invoke(question)\n", + " answer = result[\"result\"]\n", + "\n", + " # Extract and format relevant source paths from source documents\n", + " source_docs = result.get(\"source_documents\", [])\n", + " sources = \"\\n\".join(set([doc.metadata.get(\"source\", \"Unknown\") for doc in source_docs]))\n", + "\n", + " return f\"\"\"\n", + "{answer}\n", + "\n", + "Reference Notebook(s):\n", + "{sources if sources else \"No source notebooks found.\"}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8e60ece-6bd2-495a-8d11-ec545c11be87", + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import display, Markdown, HTML, Javascript\n", + "import re\n", + "\n", + "def extract_answer_code_references(input_string):\n", + " # Extract the answer section (after \"- Answer:\" and before \"- Relevant Code:\")\n", + " answer_match = re.search(r'- Answer:\\s*(.*?)\\s*- Relevant Code:', input_string, re.DOTALL)\n", + " answer = answer_match.group(1).strip() if answer_match else \"Answer not found.\"\n", + "\n", + " # Extract the relevant code section (after \"- Relevant Code:\" and before any other section)\n", + " code_match = re.search(r'- Relevant Code:\\s*```python\\s*(.*?)```', input_string, re.DOTALL)\n", + " relevant_code = code_match.group(1).strip() if code_match else \"Relevant code not found.\"\n", + "\n", + " # Extract references and create JupyterLab-compatible links\n", + " references = re.findall(r'(/home/[^\\s]+)', input_string)\n", + " \n", + " # Check and format paths to open in JupyterLab (ensure paths are relative to /notebooks/)\n", + " html_output = \"\\n\\n\".join([f' -> {ref} ' for ref in references]) if references else \"No references found.\"\n", + " \n", + " return answer, relevant_code, html_output\n", + "\n", + "def display_answer(question):\n", + " # Query the chatbot for the answer\n", + " result = query_chatbot(question) \n", + " \n", + " # Extract formatted outputs\n", + " answer, relevant_code, source_notebooks = extract_answer_code_references(result)\n", + " \n", + " # Display answer as markdown\n", + " display(Markdown(f\"**Answer:**\\n\\n{answer}\"))\n", + " \n", + " # Display relevant code as code\n", + " display(Markdown(\"**Relevant Code:**\\n\\n```python\\n\" + relevant_code.strip() + \"\\n```\"))\n", + " \n", + " # Display reference notebooks\n", + " display(Markdown(f\"**Reference Notebook(s):**\\n\\n{source_notebooks}\"))\n" + ] + }, + { + "cell_type": "markdown", + "id": "f443eeaf-b913-4dd4-839b-e950b228a3c5", + "metadata": {}, + "source": [ + "
\n", + "

5. You can try your own question

\n", + "\n", + "\n", + "

Here are some sample questions that you can try out:

\n", + "\n", + "
    \n", + "
  1. How VectorDistance works?
  2. \n", + "
  3. What is Script table operator?
  4. \n", + "
  5. Give me demos which have AWS Bedrock?
  6. \n", + "
  7. What is GEOSEQUENCE? Show me some examples
  8. \n", + "
  9. Which notebooks are using OpenAI?
  10. \n", + "
  11. Which notebooks are about fraud detection?
  12. \n", + "
  13. How to use TDApiClient to generate the embeddings?
  14. \n", + "
  15. Show me demo for Broken digital Journey?
  16. \n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b88f09b4-128f-4680-9d84-e15a491f431a", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "query = input(\"Enter your query here. To stop, type 'exit'. Query:\")\n", + "while query.lower() != 'exit':\n", + " loading_spinner3 = widgets.HTML(\n", + " value=\" Please wait while we are extracing the answer for you...\",\n", + " )\n", + "\n", + " display(loading_spinner3)\n", + " \n", + " display_answer(query)\n", + " loading_spinner3.value = \"\"\n", + " display(Markdown(\"
\"))\n", + " query = input(\"Please enter your query here...\")\n", + " \n", + "display(Markdown(\"**Thank you for chatting with us. I hope I was able to assist you.**\"))" + ] + }, + { + "cell_type": "markdown", + "id": "2fc3a070-33cf-4787-8c66-cc221075fdd5", + "metadata": {}, + "source": [ + "
\n", + "
ClearScape Analytics™
\n", + "
\n", + "
\n", + " Copyright © Teradata Corporation - 2023. All Rights Reserved\n", + "
\n", + "
\n", + "
" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 42dc4e97371adfb0bbcea9ccd1528d5ffeaa809f Mon Sep 17 00:00:00 2001 From: dallasbowden Date: Fri, 17 Jan 2025 23:19:22 +0000 Subject: [PATCH 02/35] Added Restart Kernel notice. Added yaml file. --- .CSAE_Bot.yaml | 7 +++++++ CSAE_Bot.ipynb | 19 ++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 .CSAE_Bot.yaml diff --git a/.CSAE_Bot.yaml b/.CSAE_Bot.yaml new file mode 100644 index 00000000..8803bd37 --- /dev/null +++ b/.CSAE_Bot.yaml @@ -0,0 +1,7 @@ +inputs: + - type: env + value: 'JUPYTER_NOTEBOOK_CI_OPEN_AI_KEY' + cell: 8 + - type: text + value: 'exit' + prompt: "Enter your query here. To stop, type 'exit'. Query:" diff --git a/CSAE_Bot.ipynb b/CSAE_Bot.ipynb index 7cbc3eb8..312bc7f7 100644 --- a/CSAE_Bot.ipynb +++ b/CSAE_Bot.ipynb @@ -36,6 +36,16 @@ "!pip install openai langchain langchain-openai" ] }, + { + "cell_type": "markdown", + "id": "87d04ebc-4e5c-4e11-8094-fef11cf0b017", + "metadata": {}, + "source": [ + "
\n", + "

Note: Please restart the kernel. The simplest way is by typing 0 0 (zero zero) and then pressing enter

\n", + "
" + ] + }, { "cell_type": "code", "execution_count": null, @@ -167,8 +177,7 @@ "\n", "

3. Create FAISS vector database

\n", "
\n", - "

Note: Please do not run below code multiple time, it is generating embeddings from all the notebooks, it will charge us $0.020 / 1M tokens

\n", - " \n", + "

Note: You do not have to run the next cell multiple times. Each time it is executed it will generate over 1M embeddings and the charge is typically $0.02USD / 1M tokens.

\n", " \n", "\n" ] @@ -239,7 +248,7 @@ "vector_store = FAISS.from_documents(docs, embeddings)\n", "\n", "# Save the index for reuse\n", - "vector_store.save_local(\"notebooks_index\")" + "vector_store.save_local(\".notebooks_index\")" ] }, { @@ -260,7 +269,7 @@ "source": [ "# Load the FAISS index with dangerous deserialization enabled\n", "vector_store = FAISS.load_local(\n", - " \"notebooks_index\", embeddings, allow_dangerous_deserialization=True\n", + " \".notebooks_index\", embeddings, allow_dangerous_deserialization=True\n", ")" ] }, @@ -433,7 +442,7 @@ "
ClearScape Analytics™
\n", "
\n", "
\n", - " Copyright © Teradata Corporation - 2023. All Rights Reserved\n", + " Copyright © Teradata Corporation - 2025. All Rights Reserved\n", "
\n", "
\n", "" From f9e02f3e00726b0d815c176ee586ec85b1897dd7 Mon Sep 17 00:00:00 2001 From: chetan-hirapara Date: Mon, 27 Jan 2025 17:24:29 +0000 Subject: [PATCH 03/35] Added Panel chatbot --- CSAE_Bot.ipynb | 1078 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 963 insertions(+), 115 deletions(-) diff --git a/CSAE_Bot.ipynb b/CSAE_Bot.ipynb index 312bc7f7..7c343bdc 100644 --- a/CSAE_Bot.ipynb +++ b/CSAE_Bot.ipynb @@ -26,14 +26,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "d74eee36-e3e8-42ca-9253-4e5ce8178c7b", "metadata": {}, "outputs": [], "source": [ "%%capture\n", "\n", - "!pip install openai langchain langchain-openai" + "!pip install openai langchain langchain-openai panel==1.3.4" ] }, { @@ -48,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "98cfddce-a2b3-4d7d-aae4-1f2dbc463022", "metadata": {}, "outputs": [], @@ -69,7 +69,69 @@ "from langchain_openai import ChatOpenAI\n", "from langchain.prompts import PromptTemplate\n", "from langchain.chains import RetrievalQA\n", - "import openai" + "import openai\n", + "\n", + "from teradataml import *" + ] + }, + { + "cell_type": "markdown", + "id": "1320f4b6-15ab-4d5b-8bcd-56fb9154d5ca", + "metadata": {}, + "source": [ + "
\n", + "\n", + "

1.1 Connect to Vantage

\n", + "

We will be prompted to provide the password. We will enter the password, press the Enter key, and then use the down arrow to go to the next cell.

" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "882f4318-bbb0-4214-ab97-316ded717794", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Performing setup ...\n", + "Setup complete\n" + ] + }, + { + "name": "stdin", + "output_type": "stream", + "text": [ + "\n", + "Enter password: ·········\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "... Logon successful\n", + "Connected as: teradatasql://demo_user:xxxxx@host.docker.internal/dbc\n", + "Engine(teradatasql://demo_user:***@host.docker.internal)\n" + ] + }, + { + "data": { + "text/plain": [ + "TeradataCursor uRowsHandle=37 bClosed=False" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%run -i ./UseCases/startup.ipynb\n", + "eng = create_context(host = 'host.docker.internal', username='demo_user', password = password)\n", + "print(eng)\n", + "execute_sql('''SET query_band='DEMO= CSAE_Bot.ipynb;' UPDATE FOR SESSION;''')" ] }, { @@ -84,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "b661c301-cfe3-4389-a05e-e996336684e8", "metadata": {}, "outputs": [], @@ -184,10 +246,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "4d11abb9-225e-4ba8-9041-9e29e738ed3b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Enter your OpenAI API key: ························································\n" + ] + } + ], "source": [ "import getpass\n", "import os\n", @@ -198,57 +268,114 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "c357a955-d49d-4fec-99a5-6c42f9d204bf", + "execution_count": 8, + "id": "fe6c268a-7004-48f2-8cf4-fb401b363235", "metadata": {}, "outputs": [], "source": [ - "# Load notebooks and clean them\n", - "path = '/home/jovyan/JupyterLabRoot/'\n", - "loader = DirectoryLoader(path, glob=\"**/*.ipynb\", loader_cls=NotebookLoader)\n", - "notebooks = loader.load()\n", - "\n", - "# Clean each notebook before processing it\n", - "cleaned_documents = []\n", - "for notebook in notebooks:\n", - " # Assuming notebook metadata contains file path\n", - " file_path = notebook.metadata.get(\"source\", \"Unknown\") # Adjust this as needed\n", - " cleaned_data = clean_and_split_notebook_content(file_path)\n", - "\n", - " # Convert cleaned data to documents, including the source file path\n", - " for data in cleaned_data:\n", - " if data['markdown'] or data['code']:\n", - " doc = Document(\n", - " page_content=f\"Markdown:\\n{data['markdown']} \\n\\nCode:\\n{data['code']}\",\n", - " metadata={\"source\": file_path} # Ensure the source file path is added\n", - " )\n", - " cleaned_documents.append(doc)\n", - " \n", - " \n", - "# Split text into manageable chunks\n", - "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)\n", - "docs = text_splitter.split_documents(cleaned_documents)\n", - "\n", - "\n", - "# for count of token\n", - "from tiktoken import encoding_for_model\n", - "\n", - "def count_document_tokens(document, model_name=\"gpt-4\"):\n", - " encoder = encoding_for_model(model_name)\n", - " return len(encoder.encode(document.page_content))\n", - "\n", - "tiktokn = 0\n", - "for doc in cleaned_documents:\n", - " tiktokn = tiktokn + count_document_tokens(doc)\n", - "\n", - "print(\"total token from all the notebooks: \", tiktokn)\n", - "\n", - "# Create vector store using embeddings\n", - "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\") \n", - "vector_store = FAISS.from_documents(docs, embeddings)\n", - "\n", - "# Save the index for reuse\n", - "vector_store.save_local(\".notebooks_index\")" + "def generate_emb():\n", + " # Load notebooks and clean them\n", + " path = '/home/jovyan/JupyterLabRoot/'\n", + " loader = DirectoryLoader(path, glob=\"**/*.ipynb\", loader_cls=NotebookLoader)\n", + " notebooks = loader.load()\n", + "\n", + " # Clean each notebook before processing it\n", + " cleaned_documents = []\n", + " for notebook in notebooks:\n", + " # Assuming notebook metadata contains file path\n", + " file_path = notebook.metadata.get(\"source\", \"Unknown\") # Adjust this as needed\n", + " cleaned_data = clean_and_split_notebook_content(file_path)\n", + "\n", + " # Convert cleaned data to documents, including the source file path\n", + " for data in cleaned_data:\n", + " if data['markdown'] or data['code']:\n", + " doc = Document(\n", + " page_content=f\"Markdown:\\n{data['markdown']} \\n\\nCode:\\n{data['code']}\",\n", + " metadata={\"source\": file_path} # Ensure the source file path is added\n", + " )\n", + " cleaned_documents.append(doc)\n", + "\n", + "\n", + " # Split text into manageable chunks\n", + " text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)\n", + " docs = text_splitter.split_documents(cleaned_documents)\n", + "\n", + "\n", + " # for count of token\n", + " from tiktoken import encoding_for_model\n", + "\n", + " def count_document_tokens(document, model_name=\"gpt-4\"):\n", + " encoder = encoding_for_model(model_name)\n", + " return len(encoder.encode(document.page_content))\n", + "\n", + " tiktokn = 0\n", + " for doc in cleaned_documents:\n", + " tiktokn = tiktokn + count_document_tokens(doc)\n", + "\n", + " print(\"total token from all the notebooks: \", tiktokn)\n", + "\n", + " # Create vector store using embeddings\n", + " embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\") \n", + " vector_store = FAISS.from_documents(docs, embeddings)\n", + "\n", + " # Save the index for reuse\n", + " vector_store.save_local(\".notebooks_index\")\n", + " return vector_store" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "2f34de3e-b917-4b5d-9448-c9341a9980fa", + "metadata": {}, + "outputs": [], + "source": [ + "def load_emb():\n", + " # Load the FAISS index with dangerous deserialization enabled\n", + " # Create vector store using embeddings\n", + " embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\") \n", + "\n", + " return FAISS.load_local(\n", + " \".notebooks_index\", embeddings, allow_dangerous_deserialization=True\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c357a955-d49d-4fec-99a5-6c42f9d204bf", + "metadata": {}, + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Do you want to generate embeddings? ('yes'/'no'): no\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading existing embeddings...\n" + ] + } + ], + "source": [ + "# Request user's input\n", + "generate = input(\"Do you want to generate embeddings? ('yes'/'no'): \")\n", + "\n", + "# Check the user's input\n", + "if generate.lower() == \"yes\":\n", + " vector_store = generate_emb()\n", + "elif generate.lower() == \"no\":\n", + " try:\n", + " print('Loading existing embeddings...')\n", + " vector_store = load_emb()\n", + " except:\n", + " print('Embeddings not found, generating now..')\n", + " generate_emb()\n", + " vector_store = load_emb()" ] }, { @@ -262,20 +389,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "5565e54c-6303-4648-81cf-9f2fe44c1de0", "metadata": {}, "outputs": [], "source": [ - "# Load the FAISS index with dangerous deserialization enabled\n", - "vector_store = FAISS.load_local(\n", - " \".notebooks_index\", embeddings, allow_dangerous_deserialization=True\n", - ")" + "# # Load the FAISS index with dangerous deserialization enabled\n", + "# # Create vector store using embeddings\n", + "# embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\") \n", + "\n", + "# vector_store = FAISS.load_local(\n", + "# \"notebooks_index\", embeddings, allow_dangerous_deserialization=True\n", + "# )" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "6dbb1de5-2966-474f-86a4-a1aff224e9e8", "metadata": {}, "outputs": [], @@ -285,7 +415,7 @@ "You are a helpful assistant. Use the following retrieved information from Jupyter notebooks to provide:\n", "1. A **clean and concise textual explanation** based on the question and notebook markdown.\n", "2. **Relevant Clean Python code** extracted from the notebooks' code cells that are related to the question. Please filter the code that is related to the query.\n", - "\n", + "3. Extract the source documents\n", "If no relevant information is found, politely say so.\n", "\n", "Context:\n", @@ -295,8 +425,9 @@ "{question}\n", "\n", "Your response should be in below format:\n", - "- Answer:\n", - "- Relevant Code:\n", + "##Answer:\n", + "##Relevant Code:\n", + "##Source documents:\n", "\"\"\"\n", "\n", "prompt = PromptTemplate(\n", @@ -305,7 +436,7 @@ ")\n", "\n", "# Make sure to use a Chat model like 'gpt-4' or 'gpt-3.5-turbo'\n", - "chat_model = ChatOpenAI(model=\"gpt-4\")\n", + "chat_model = ChatOpenAI(model=\"gpt-4o-mini\")\n", "\n", "# Retrieval QA Chain\n", "qa_chain = RetrievalQA.from_chain_type(\n", @@ -318,7 +449,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "9d2efb7c-dfa6-4240-b3ae-e40337685705", "metadata": {}, "outputs": [], @@ -343,46 +474,788 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "b8e60ece-6bd2-495a-8d11-ec545c11be87", "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown, HTML, Javascript\n", "import re\n", + "import textwrap\n", "\n", "def extract_answer_code_references(input_string):\n", - " # Extract the answer section (after \"- Answer:\" and before \"- Relevant Code:\")\n", - " answer_match = re.search(r'- Answer:\\s*(.*?)\\s*- Relevant Code:', input_string, re.DOTALL)\n", - " answer = answer_match.group(1).strip() if answer_match else \"Answer not found.\"\n", - "\n", - " # Extract the relevant code section (after \"- Relevant Code:\" and before any other section)\n", - " code_match = re.search(r'- Relevant Code:\\s*```python\\s*(.*?)```', input_string, re.DOTALL)\n", - " relevant_code = code_match.group(1).strip() if code_match else \"Relevant code not found.\"\n", "\n", " # Extract references and create JupyterLab-compatible links\n", " references = re.findall(r'(/home/[^\\s]+)', input_string)\n", " \n", " # Check and format paths to open in JupyterLab (ensure paths are relative to /notebooks/)\n", - " html_output = \"\\n\\n\".join([f' -> {ref} ' for ref in references]) if references else \"No references found.\"\n", - " \n", - " return answer, relevant_code, html_output\n", + " html_output = [f' -> {ref.split(\"/\")[-1]} ' for ref in references]\n", + " html_output2 = []\n", + " for t in html_output:\n", + " html_output2.append(textwrap.fill(t, width=100))\n", + " return \"\\n\\n\".join(html_output2)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "8b8e3755-2398-4e51-9479-3fe810f34beb", + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + " var py_version = '3.3.4'.replace('rc', '-rc.').replace('.dev', '-dev.');\n", + " var reloading = false;\n", + " var Bokeh = root.Bokeh;\n", + "\n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks;\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + " if (js_modules == null) js_modules = [];\n", + " if (js_exports == null) js_exports = {};\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + "\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " if (!reloading) {\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " }\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + " window._bokeh_on_load = on_load\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " var skip = [];\n", + " if (window.requirejs) {\n", + " window.requirejs.config({'packages': {}, 'paths': {'jspanel': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/jspanel', 'jspanel-modal': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal', 'jspanel-tooltip': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip', 'jspanel-hint': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint', 'jspanel-layout': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout', 'jspanel-contextmenu': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu', 'jspanel-dock': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@7.2.3/dist/gridstack-all', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'jspanel': {'exports': 'jsPanel'}, 'gridstack': {'exports': 'GridStack'}}});\n", + " require([\"jspanel\"], function(jsPanel) {\n", + "\twindow.jsPanel = jsPanel\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-modal\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-tooltip\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-hint\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-layout\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-contextmenu\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-dock\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"gridstack\"], function(GridStack) {\n", + "\twindow.GridStack = GridStack\n", + "\ton_load()\n", + " })\n", + " require([\"notyf\"], function() {\n", + "\ton_load()\n", + " })\n", + " root._bokeh_is_loading = css_urls.length + 9;\n", + " } else {\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n", + " }\n", + "\n", + " var existing_stylesheets = []\n", + " var links = document.getElementsByTagName('link')\n", + " for (var i = 0; i < links.length; i++) {\n", + " var link = links[i]\n", + " if (link.href != null) {\n", + "\texisting_stylesheets.push(link.href)\n", + " }\n", + " }\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " if (existing_stylesheets.indexOf(url) !== -1) {\n", + "\ton_load()\n", + "\tcontinue;\n", + " }\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " } if (((window['jsPanel'] !== undefined) && (!(window['jsPanel'] instanceof HTMLElement))) || window.requirejs) {\n", + " var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/jspanel.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock.js'];\n", + " for (var i = 0; i < urls.length; i++) {\n", + " skip.push(urls[i])\n", + " }\n", + " } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n", + " var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/gridstack/gridstack@7.2.3/dist/gridstack-all.js'];\n", + " for (var i = 0; i < urls.length; i++) {\n", + " skip.push(urls[i])\n", + " }\n", + " } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n", + " var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n", + " for (var i = 0; i < urls.length; i++) {\n", + " skip.push(urls[i])\n", + " }\n", + " } var existing_scripts = []\n", + " var scripts = document.getElementsByTagName('script')\n", + " for (var i = 0; i < scripts.length; i++) {\n", + " var script = scripts[i]\n", + " if (script.src != null) {\n", + "\texisting_scripts.push(script.src)\n", + " }\n", + " }\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " for (var i = 0; i < js_modules.length; i++) {\n", + " var url = js_modules[i];\n", + " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " element.type = \"module\";\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " for (const name in js_exports) {\n", + " var url = js_exports[name];\n", + " if (skip.indexOf(url) >= 0 || root[name] != null) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.type = \"module\";\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " element.textContent = `\n", + " import ${name} from \"${url}\"\n", + " window.${name} = ${name}\n", + " window._bokeh_on_load()\n", + " `\n", + " document.head.appendChild(element);\n", + " }\n", + " if (!js_urls.length && !js_modules.length) {\n", + " on_load()\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.3.4.min.js\", \"https://cdn.holoviz.org/panel/1.3.4/dist/bundled/material-components-web@7.0.0/dist/material-components-web.min.js\", \"https://cdn.holoviz.org/panel/1.3.4/dist/panel.min.js\"];\n", + " var js_modules = [];\n", + " var js_exports = {};\n", + " var css_urls = [\"https://fonts.googleapis.com/css?family=Roboto:300,400,500\", \"https://fonts.googleapis.com/css?family=Material+Icons&display=block\"];\n", + " var inline_js = [ function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + "function(Bokeh) {} // ensure no trailing comma for IE\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " if ((root.Bokeh !== undefined) || (force === true)) {\n", + " for (var i = 0; i < inline_js.length; i++) {\n", + "\ttry {\n", + " inline_js[i].call(root, root.Bokeh);\n", + "\t} catch(e) {\n", + "\t if (!reloading) {\n", + "\t throw e;\n", + "\t }\n", + "\t}\n", + " }\n", + " // Cache old bokeh versions\n", + " if (Bokeh != undefined && !reloading) {\n", + "\tvar NewBokeh = root.Bokeh;\n", + "\tif (Bokeh.versions === undefined) {\n", + "\t Bokeh.versions = new Map();\n", + "\t}\n", + "\tif (NewBokeh.version !== Bokeh.version) {\n", + "\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n", + "\t}\n", + "\troot.Bokeh = Bokeh;\n", + " }} else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " }\n", + " root._bokeh_is_initializing = false\n", + " }\n", + "\n", + " function load_or_wait() {\n", + " // Implement a backoff loop that tries to ensure we do not load multiple\n", + " // versions of Bokeh and its dependencies at the same time.\n", + " // In recent versions we use the root._bokeh_is_initializing flag\n", + " // to determine whether there is an ongoing attempt to initialize\n", + " // bokeh, however for backward compatibility we also try to ensure\n", + " // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n", + " // before older versions are fully initialized.\n", + " if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n", + " root._bokeh_is_initializing = false;\n", + " root._bokeh_onload_callbacks = undefined;\n", + " console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n", + " load_or_wait();\n", + " } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n", + " setTimeout(load_or_wait, 100);\n", + " } else {\n", + " root._bokeh_is_initializing = true\n", + " root._bokeh_onload_callbacks = []\n", + " var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n", + " if (!reloading && !bokeh_loaded) {\n", + "\troot.Bokeh = undefined;\n", + " }\n", + " load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n", + "\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + "\trun_inline_js();\n", + " });\n", + " }\n", + " }\n", + " // Give older versions of the autoload script a head-start to ensure\n", + " // they initialize before we start loading newer version.\n", + " setTimeout(load_or_wait, 100)\n", + "}(window));" + ], + "application/vnd.holoviews_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.3.4'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var reloading = false;\n var Bokeh = root.Bokeh;\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'jspanel': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/jspanel', 'jspanel-modal': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal', 'jspanel-tooltip': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip', 'jspanel-hint': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint', 'jspanel-layout': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout', 'jspanel-contextmenu': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu', 'jspanel-dock': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@7.2.3/dist/gridstack-all', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'jspanel': {'exports': 'jsPanel'}, 'gridstack': {'exports': 'GridStack'}}});\n require([\"jspanel\"], function(jsPanel) {\n\twindow.jsPanel = jsPanel\n\ton_load()\n })\n require([\"jspanel-modal\"], function() {\n\ton_load()\n })\n require([\"jspanel-tooltip\"], function() {\n\ton_load()\n })\n require([\"jspanel-hint\"], function() {\n\ton_load()\n })\n require([\"jspanel-layout\"], function() {\n\ton_load()\n })\n require([\"jspanel-contextmenu\"], function() {\n\ton_load()\n })\n require([\"jspanel-dock\"], function() {\n\ton_load()\n })\n require([\"gridstack\"], function(GridStack) {\n\twindow.GridStack = GridStack\n\ton_load()\n })\n require([\"notyf\"], function() {\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 9;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } if (((window['jsPanel'] !== undefined) && (!(window['jsPanel'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/jspanel.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/gridstack/gridstack@7.2.3/dist/gridstack-all.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.3.4.min.js\", \"https://cdn.holoviz.org/panel/1.3.4/dist/bundled/material-components-web@7.0.0/dist/material-components-web.min.js\", \"https://cdn.holoviz.org/panel/1.3.4/dist/panel.min.js\"];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [\"https://fonts.googleapis.com/css?family=Roboto:300,400,500\", \"https://fonts.googleapis.com/css?family=Material+Icons&display=block\"];\n var inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n\ttry {\n inline_js[i].call(root, root.Bokeh);\n\t} catch(e) {\n\t if (!reloading) {\n\t throw e;\n\t }\n\t}\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + "if ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n", + " window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n", + "}\n", + "\n", + "\n", + " function JupyterCommManager() {\n", + " }\n", + "\n", + " JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n", + " if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", + " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", + " comm_manager.register_target(comm_id, function(comm) {\n", + " comm.on_msg(msg_handler);\n", + " });\n", + " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", + " window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n", + " comm.onMsg = msg_handler;\n", + " });\n", + " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", + " google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n", + " var messages = comm.messages[Symbol.asyncIterator]();\n", + " function processIteratorResult(result) {\n", + " var message = result.value;\n", + " console.log(message)\n", + " var content = {data: message.data, comm_id};\n", + " var buffers = []\n", + " for (var buffer of message.buffers || []) {\n", + " buffers.push(new DataView(buffer))\n", + " }\n", + " var metadata = message.metadata || {};\n", + " var msg = {content, buffers, metadata}\n", + " msg_handler(msg);\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " return messages.next().then(processIteratorResult);\n", + " })\n", + " }\n", + " }\n", + "\n", + " JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n", + " if (comm_id in window.PyViz.comms) {\n", + " return window.PyViz.comms[comm_id];\n", + " } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", + " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", + " var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n", + " if (msg_handler) {\n", + " comm.on_msg(msg_handler);\n", + " }\n", + " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", + " var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n", + " comm.open();\n", + " if (msg_handler) {\n", + " comm.onMsg = msg_handler;\n", + " }\n", + " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", + " var comm_promise = google.colab.kernel.comms.open(comm_id)\n", + " comm_promise.then((comm) => {\n", + " window.PyViz.comms[comm_id] = comm;\n", + " if (msg_handler) {\n", + " var messages = comm.messages[Symbol.asyncIterator]();\n", + " function processIteratorResult(result) {\n", + " var message = result.value;\n", + " var content = {data: message.data};\n", + " var metadata = message.metadata || {comm_id};\n", + " var msg = {content, metadata}\n", + " msg_handler(msg);\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " })\n", + " var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n", + " return comm_promise.then((comm) => {\n", + " comm.send(data, metadata, buffers, disposeOnDone);\n", + " });\n", + " };\n", + " var comm = {\n", + " send: sendClosure\n", + " };\n", + " }\n", + " window.PyViz.comms[comm_id] = comm;\n", + " return comm;\n", + " }\n", + " window.PyViz.comm_manager = new JupyterCommManager();\n", + " \n", + "\n", + "\n", + "var JS_MIME_TYPE = 'application/javascript';\n", + "var HTML_MIME_TYPE = 'text/html';\n", + "var EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\n", + "var CLASS_NAME = 'output';\n", + "\n", + "/**\n", + " * Render data to the DOM node\n", + " */\n", + "function render(props, node) {\n", + " var div = document.createElement(\"div\");\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(div);\n", + " node.appendChild(script);\n", + "}\n", + "\n", + "/**\n", + " * Handle when a new output is added\n", + " */\n", + "function handle_add_output(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + " if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + " var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + " if (id !== undefined) {\n", + " var nchildren = toinsert.length;\n", + " var html_node = toinsert[nchildren-1].children[0];\n", + " html_node.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var scripts = [];\n", + " var nodelist = html_node.querySelectorAll(\"script\");\n", + " for (var i in nodelist) {\n", + " if (nodelist.hasOwnProperty(i)) {\n", + " scripts.push(nodelist[i])\n", + " }\n", + " }\n", + "\n", + " scripts.forEach( function (oldScript) {\n", + " var newScript = document.createElement(\"script\");\n", + " var attrs = [];\n", + " var nodemap = oldScript.attributes;\n", + " for (var j in nodemap) {\n", + " if (nodemap.hasOwnProperty(j)) {\n", + " attrs.push(nodemap[j])\n", + " }\n", + " }\n", + " attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n", + " newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n", + " oldScript.parentNode.replaceChild(newScript, oldScript);\n", + " });\n", + " if (JS_MIME_TYPE in output.data) {\n", + " toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n", + " }\n", + " output_area._hv_plot_id = id;\n", + " if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n", + " window.PyViz.plot_index[id] = Bokeh.index[id];\n", + " } else {\n", + " window.PyViz.plot_index[id] = null;\n", + " }\n", + " } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + "}\n", + "\n", + "/**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + "function handle_clear_output(event, handle) {\n", + " var id = handle.cell.output_area._hv_plot_id;\n", + " var server_id = handle.cell.output_area._bokeh_server_id;\n", + " if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n", + " var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n", + " if (server_id !== null) {\n", + " comm.send({event_type: 'server_delete', 'id': server_id});\n", + " return;\n", + " } else if (comm !== null) {\n", + " comm.send({event_type: 'delete', 'id': id});\n", + " }\n", + " delete PyViz.plot_index[id];\n", + " if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n", + " var doc = window.Bokeh.index[id].model.document\n", + " doc.clear();\n", + " const i = window.Bokeh.documents.indexOf(doc);\n", + " if (i > -1) {\n", + " window.Bokeh.documents.splice(i, 1);\n", + " }\n", + " }\n", + "}\n", + "\n", + "/**\n", + " * Handle kernel restart event\n", + " */\n", + "function handle_kernel_cleanup(event, handle) {\n", + " delete PyViz.comms[\"hv-extension-comm\"];\n", + " window.PyViz.plot_index = {}\n", + "}\n", + "\n", + "/**\n", + " * Handle update_display_data messages\n", + " */\n", + "function handle_update_output(event, handle) {\n", + " handle_clear_output(event, {cell: {output_area: handle.output_area}})\n", + " handle_add_output(event, handle)\n", + "}\n", + "\n", + "function register_renderer(events, OutputArea) {\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[0]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " events.on('output_added.OutputArea', handle_add_output);\n", + " events.on('output_updated.OutputArea', handle_update_output);\n", + " events.on('clear_output.CodeCell', handle_clear_output);\n", + " events.on('delete.Cell', handle_clear_output);\n", + " events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n", + "\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " safe: true,\n", + " index: 0\n", + " });\n", + "}\n", + "\n", + "if (window.Jupyter !== undefined) {\n", + " try {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " } catch(err) {\n", + " }\n", + "}\n" + ], + "application/vnd.holoviews_load.v0+json": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n })\n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ] + }, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "cb02b6be-806d-46b4-b6e7-db9506cb4142" + } + }, + "output_type": "display_data" + }, + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + "ChatInterface(_button_data={'send': _ChatButtonData(i...}, _input_container=Row, _input_layout=Row, _placeholder=ChatMessage, _widgets={'TextInput': TextInput(cs...}, callback=" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "b88f09b4-128f-4680-9d84-e15a491f431a", - "metadata": {}, - "outputs": [], - "source": [ - "import ipywidgets as widgets\n", - "\n", - "query = input(\"Enter your query here. To stop, type 'exit'. Query:\")\n", - "while query.lower() != 'exit':\n", - " loading_spinner3 = widgets.HTML(\n", - " value=\" Please wait while we are extracing the answer for you...\",\n", - " )\n", - "\n", - " display(loading_spinner3)\n", - " \n", - " display_answer(query)\n", - " loading_spinner3.value = \"\"\n", - " display(Markdown(\"
\"))\n", - " query = input(\"Please enter your query here...\")\n", - " \n", - "display(Markdown(\"**Thank you for chatting with us. I hope I was able to assist you.**\"))" - ] - }, { "cell_type": "markdown", "id": "2fc3a070-33cf-4787-8c66-cc221075fdd5", From d6d7c8d2d7f6d213913cec81652d6b2c713d4675 Mon Sep 17 00:00:00 2001 From: Pratik Somwanshi Date: Tue, 28 Jan 2025 11:17:27 +0000 Subject: [PATCH 04/35] Fixed the notebook --- ...2_ModelOps_Model_Factory_REST_Python.ipynb | 41 +++++++++++-------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/ModelOps/12_ModelOps_Model_Factory_REST_Python.ipynb b/ModelOps/12_ModelOps_Model_Factory_REST_Python.ipynb index 93ebacbc..1249d7d4 100644 --- a/ModelOps/12_ModelOps_Model_Factory_REST_Python.ipynb +++ b/ModelOps/12_ModelOps_Model_Factory_REST_Python.ipynb @@ -96,7 +96,8 @@ "metadata": {}, "outputs": [], "source": [ - "#%pip install -q teradataml==17.20.0.6 teradatamodelops==7.0.3 matplotlib==3.8.2" + "# %%capture\n", + "# !pip install -q teradataml==20.0.0.2 teradatamodelops==7.0.6 matplotlib==3.8.2" ] }, { @@ -136,7 +137,7 @@ "from getpass import getpass\n", "\n", "logging.basicConfig(encoding=\"utf-8\", level=logging.INFO)\n", - "requests.packages.urllib3.disable_warnings()\n" + "requests.packages.urllib3.disable_warnings()" ] }, { @@ -187,7 +188,7 @@ "outputs": [], "source": [ "# base domain for ModelOps\n", - "url = \"https://\" + hostname + \"/modelops\"\n", + "url = \"https://\" + \"web-\"+ hostname + \"/modelops\"\n", "print(url)" ] }, @@ -221,21 +222,21 @@ "# or user's password\n", "password = getpass(\"Enter user's password: \")\n", "# project id\n", - "project = \"23e1df4b-b630-47a1-ab80-7ad5385fcd8d\"\n", + "project = \"70d4659b-92a2-4723-841a-9ba5629b5f27\"\n", "# model id\n", - "model = \"f937b5d8-02c6-5150-80c7-1e4ff07fea31\"\n", + "model = \"55b3662b-302c-5b5b-b668-84929a5c6f99\"\n", "# dataset connection id\n", "dataset_connection = \"151abf05-1914-4d38-a90d-272d850f212c\"\n", "# dataset training id\n", - "dataset_train = \"ba39e766-2fdf-426f-ba5c-4ca3e90955fc\"\n", + "dataset_train = \"2335c9ca-ce34-400d-820e-2a3cd7bb57bc\"\n", "# dataset evaluation id\n", - "dataset_eval = \"74489d62-2af5-4402-b264-715e151a420a\"\n", + "dataset_eval = \"3f2792b1-fcb6-4b58-851e-2b79382861c9\"\n", "# dataset template id, useful for batch deployment\n", - "dataset_template = \"d8a35d98-21ce-47d0-b9f2-00d355777de1\"\n", + "dataset_template = \"4053c309-2811-4f03-9fb4-e99c728ecdfb\"\n", "# Docker image for training\n", - "training_image = \"artifacts.td.teradata.com/tdproduct-docker-snapshot/avmo/aoa-python-base:3.9.13-1\"\n", + "training_image = \"artifacts.td.teradata.com/tdproduct-docker-snapshot/avmo/vmo-python-base:3.9.4\"\n", "# Docker image for deployment\n", - "deployment_image = \"artifacts.td.teradata.com/tdproduct-docker-snapshot/avmo/aoa-python-base:3.9.13-1\"\n", + "deployment_image = \"artifacts.td.teradata.com/tdproduct-docker-snapshot/avmo/vmo-python-base:3.9.4\"\n", "# hyperparameters for training, set to empty dictionary if none\n", "hyper_params = {\"eta\": 0.2, \"max_depth\": 6}\n", "# performance settings\n", @@ -450,7 +451,7 @@ "source": [ "def train(url, token, project, model, dataset_connection, dataset_train, train_memory, train_cpu, hyper_params, training_image):\n", " headers = {\n", - " \"AOA-Project-ID\": project,\n", + " \"VMO-Project-ID\": project,\n", " \"Authorization\": f\"Bearer {token}\",\n", " \"Content-Type\": \"application/json\",\n", " }\n", @@ -519,7 +520,7 @@ "source": [ "def evaluate(url, token, project, version_id, dataset_connection, dataset_eval, eval_memory, eval_cpu, hyper_params, deployment_image):\n", " headers = {\n", - " \"AOA-Project-ID\": project,\n", + " \"VMO-PROJECT-ID\": project,\n", " \"Authorization\": f\"Bearer {token}\",\n", " \"Content-Type\": \"application/json\",\n", " }\n", @@ -587,7 +588,7 @@ "\n", " comment = b6(b\"Approved\").decode().strip()\n", " headers = {\n", - " \"AOA-Project-ID\": project,\n", + " \"VMO-PROJECT-ID\": project,\n", " \"Authorization\": f\"Bearer {token}\",\n", " \"Content-Type\": \"application/json\",\n", " }\n", @@ -686,7 +687,7 @@ "source": [ "def deploy(url, token, project, version_id, deploy_memory, deploy_cpu, dataset_connection, dataset_template, deployment_image):\n", " headers = {\n", - " \"AOA-Project-ID\": project,\n", + " \"VMO-PROJECT-ID\": project,\n", " \"Authorization\": f\"Bearer {token}\",\n", " \"Content-Type\": \"application/json\",\n", " }\n", @@ -773,7 +774,7 @@ "def retire_others(url, token, project, model, version_id):\n", " deployments = []\n", " headers = {\n", - " \"AOA-Project-ID\": project,\n", + " \"VMO-PROJECT-ID\": project,\n", " \"Authorization\": f\"Bearer {token}\",\n", " \"Content-Type\": \"application/json\",\n", " }\n", @@ -847,7 +848,7 @@ "def get_job_status(url, token, project, job_id):\n", " status = \"RUNNING\"\n", " headers = {\n", - " \"AOA-Project-ID\": project,\n", + " \"VMO-PROJECT-ID\": project,\n", " \"Authorization\": f\"Bearer {token}\",\n", " \"Content-Type\": \"application/json\",\n", " }\n", @@ -911,7 +912,10 @@ "id": "0c040a31-16be-4276-bc34-93cd8c193546", "metadata": {}, "source": [ - "[![image](images/launchModelOps.png)](/modelops)" + "\n", + " LAUNCH MODELOPS\n", + " \"External\n", + "" ] }, { @@ -927,6 +931,7 @@ "execution_count": null, "id": "f8ae5862-ae90-40fc-9e77-7d736ef7dfdd", "metadata": { + "scrolled": true, "tags": [] }, "outputs": [], @@ -983,7 +988,7 @@ "outputs": [], "source": [ "logging.info(\"Getting training status\")\n", - "version_id = get_job_status(url, token, project, train_job)\n" + "version_id = get_job_status(url, token, project, train_job)" ] }, { From 91606060b72d7f4e0513198b6ffa0810eb11716c Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Thu, 30 Jan 2025 15:51:52 +0530 Subject: [PATCH 05/35] snapshorts on failure --- .tests/playwright.config.ts | 4 ++-- .tests/tests/jupytertest.spec.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.tests/playwright.config.ts b/.tests/playwright.config.ts index 5fdfb5f0..5b6c7e2d 100644 --- a/.tests/playwright.config.ts +++ b/.tests/playwright.config.ts @@ -41,6 +41,7 @@ export default defineConfig({ /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */ trace: 'on-first-retry', + screenshot: 'only-on-failure' }, /* Configure projects for major browsers */ @@ -82,8 +83,7 @@ export default defineConfig({ ], /* Folder for test artifacts such as screenshots, videos, traces, etc. */ - // outputDir: 'test-results/', - + //outputDir: 'test-results/', /* Run your local dev server before starting the tests */ // webServer: { // command: 'npm run start', diff --git a/.tests/tests/jupytertest.spec.ts b/.tests/tests/jupytertest.spec.ts index 79a7ae6c..a8898ac0 100644 --- a/.tests/tests/jupytertest.spec.ts +++ b/.tests/tests/jupytertest.spec.ts @@ -151,7 +151,7 @@ for (let i = 0; i < testCount; i++) { if(strKernelType === "Python 3 (ipykernel) "){ await expect(page.locator(".jp-RenderedText[data-mime-type='application/vnd.jupyter.stderr']").filter({ hasText: 'Traceback (most recent call last):' })).toHaveCount(0); }else if(strKernelType === 'Teradata SQL '){ - await expect(page.locator(".jp-RenderedText[data-mime-type='application/vnd.jupyter.stderr']").filter({ hasNotText: '[Teradata Database] [Warning' })).toHaveCount(0); + await expect(page.locator(".jp-RenderedText[data-mime-type='application/vnd.jupyter.stderr']")).toHaveCount(0)//.filter({ hasNotText: '[Teradata Database] [Warning' })).toHaveCount(0); }else{ await expect(page.locator(".jp-RenderedText[data-mime-type='application/vnd.jupyter.stderr']")).toHaveCount(0); } From 27e53988eada60cc4ecac4cb793a4ea337a95d62 Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Thu, 30 Jan 2025 15:57:54 +0530 Subject: [PATCH 06/35] Merging the reports --- .github/workflows/nightly.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 7e85d23c..53ee11cb 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -106,3 +106,16 @@ jobs: name: playwright-report-job-${{ matrix.idx }} path: .tests/playwright-report/ retention-days: 30 + merge_reports: + permissions: + contents: 'read' + id-token: 'write' + runs-on: ubuntu-latest + name: Job ${{ matrix.idx }} + timeout-minutes: 1440 + steps: + - uses: actions/download-artifact@v4 + with: + name: playwright-report-job-${{ matrix.idx }} + path: playwright-reports + - run: ls playwright-reports \ No newline at end of file From b27e00594b7145959ec9bd0ce5e851e1c21a58cf Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Thu, 30 Jan 2025 17:18:34 +0530 Subject: [PATCH 07/35] merge reports --- .github/workflows/ci.yml | 2 +- .github/workflows/nightly.yml | 53 ++++++++++++++++++++++++----------- .tests/.gitignore | 1 + .tests/playwright.config.ts | 2 +- 4 files changed, 39 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 542a6330..8f13d2e0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ defaults: shell: bash jobs: - tests: + playwright-tests: permissions: contents: 'read' id-token: 'write' diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 53ee11cb..5b7df15e 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -27,7 +27,7 @@ defaults: shell: bash jobs: - tests: + playwright-tests: permissions: contents: 'read' id-token: 'write' @@ -43,6 +43,7 @@ jobs: TEST_ENV: ${{ github.event.inputs.test_env || 'PROD' }} CSAE_CI_JOB_COUNT: ${{ github.event.inputs.notebooks == '' && '10' || '1' }} IGNORE_BLACKLIST: ${{ github.event.inputs.ignore_blacklist || 'false' }} + CI_ENV: 'nightly' name: Job ${{ matrix.idx }} timeout-minutes: 1440 runs-on: ubuntu-latest @@ -100,22 +101,40 @@ jobs: - name: Delete test environments if: always() run: npx ts-node post_delete_env.ts - - uses: actions/upload-artifact@v4 - if: always() + - name: Upload blob report to GitHub Actions Artifacts + if: ${{ !cancelled() }} + uses: actions/upload-artifact@v4 with: - name: playwright-report-job-${{ matrix.idx }} - path: .tests/playwright-report/ - retention-days: 30 - merge_reports: - permissions: - contents: 'read' - id-token: 'write' + name: blob-report-${{ matrix.idx }} + path: blob-report + retention-days: 1 + merge-reports: + # Merge reports after playwright-tests, even if some shards have failed + if: ${{ !cancelled() }} + needs: [playwright-tests] + runs-on: ubuntu-latest - name: Job ${{ matrix.idx }} - timeout-minutes: 1440 steps: - - uses: actions/download-artifact@v4 - with: - name: playwright-report-job-${{ matrix.idx }} - path: playwright-reports - - run: ls playwright-reports \ No newline at end of file + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: lts/* + - name: Install dependencies + run: npm ci + + - name: Download blob reports from GitHub Actions Artifacts + uses: actions/download-artifact@v4 + with: + path: all-blob-reports + pattern: blob-report-* + merge-multiple: true + + - name: Merge into HTML Report + run: npx playwright merge-reports --reporter html ./all-blob-reports + + - name: Upload HTML report + uses: actions/upload-artifact@v4 + with: + name: html-report--attempt-${{ github.run_attempt }} + path: playwright-report + retention-days: 14 \ No newline at end of file diff --git a/.tests/.gitignore b/.tests/.gitignore index 93a132d9..a2cbadd6 100644 --- a/.tests/.gitignore +++ b/.tests/.gitignore @@ -2,5 +2,6 @@ .DS_Store node_modules/ playwright-report/ +blob-report/ files.txt .env \ No newline at end of file diff --git a/.tests/playwright.config.ts b/.tests/playwright.config.ts index 5b6c7e2d..8d9fbd96 100644 --- a/.tests/playwright.config.ts +++ b/.tests/playwright.config.ts @@ -31,7 +31,7 @@ export default defineConfig({ /* Opt out of parallel tests on CI. Dave-changed from 1 to 3 */ workers: process.env.CSAE_WORKERS_COUNT ? parseInt(process.env.CSAE_WORKERS_COUNT):undefined, /* Reporter to use. See https://playwright.dev/docs/test-reporters */ - reporter: 'html', + reporter: process.env.CI_ENV === 'nightly'? 'blob' : 'html', /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */ use: { /* Maximum time each action such as `click()` can take. Defaults to 0 (no limit). */ From e1f94b5ad5ad03e285ba0397a722c415d43449ee Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Thu, 30 Jan 2025 17:29:38 +0530 Subject: [PATCH 08/35] dep --- .tests/package-lock.json | 8 ++++---- .tests/package.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.tests/package-lock.json b/.tests/package-lock.json index cea5100a..93275356 100644 --- a/.tests/package-lock.json +++ b/.tests/package-lock.json @@ -15,7 +15,7 @@ }, "devDependencies": { "@playwright/test": "^1.44.1", - "@types/node": "^20.17.6", + "@types/node": "^20.17.16", "dotenv-cli": "^7.4.2" } }, @@ -88,9 +88,9 @@ "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==" }, "node_modules/@types/node": { - "version": "20.17.10", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.10.tgz", - "integrity": "sha512-/jrvh5h6NXhEauFFexRin69nA0uHJ5gwk4iDivp/DeoEua3uwCUto6PC86IpRITBOs4+6i2I56K5x5b6WYGXHA==", + "version": "20.17.16", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.16.tgz", + "integrity": "sha512-vOTpLduLkZXePLxHiHsBLp98mHGnl8RptV4YAO3HfKO5UHjDvySGbxKtpYfy8Sx5+WKcgc45qNreJJRVM3L6mw==", "dependencies": { "undici-types": "~6.19.2" } diff --git a/.tests/package.json b/.tests/package.json index 2c18766e..23fa8f80 100644 --- a/.tests/package.json +++ b/.tests/package.json @@ -9,7 +9,7 @@ "license": "ISC", "devDependencies": { "@playwright/test": "^1.44.1", - "@types/node": "^20.17.6", + "@types/node": "^20.17.16", "dotenv-cli": "^7.4.2" }, "dependencies": { From 20ff48485a58b12f826ed644dd4798ef7746d816 Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Thu, 30 Jan 2025 18:32:04 +0530 Subject: [PATCH 09/35] Adding error message to the error message --- .tests/tests/jupytertest.spec.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.tests/tests/jupytertest.spec.ts b/.tests/tests/jupytertest.spec.ts index a8898ac0..9e558955 100644 --- a/.tests/tests/jupytertest.spec.ts +++ b/.tests/tests/jupytertest.spec.ts @@ -148,12 +148,13 @@ for (let i = 0; i < testCount; i++) { await page.locator('span[class="f1235lqo"] >> text="' + strKernelType + '| Idle"').waitFor({ timeout: 600000 }); //Check for any errors so far + const errorLocator = page.locator(".jp-RenderedText[data-mime-type='application/vnd.jupyter.stderr']"); if(strKernelType === "Python 3 (ipykernel) "){ - await expect(page.locator(".jp-RenderedText[data-mime-type='application/vnd.jupyter.stderr']").filter({ hasText: 'Traceback (most recent call last):' })).toHaveCount(0); + await expect(errorLocator.filter({ hasText: 'Traceback (most recent call last):' }),`Occured At: ${await errorLocator.textContent()}`).toHaveCount(0); }else if(strKernelType === 'Teradata SQL '){ - await expect(page.locator(".jp-RenderedText[data-mime-type='application/vnd.jupyter.stderr']")).toHaveCount(0)//.filter({ hasNotText: '[Teradata Database] [Warning' })).toHaveCount(0); + await expect(errorLocator.filter({ hasNotText: '[Teradata Database] [Warning' }),`Occured At: ${await errorLocator.textContent()}`).toHaveCount(0); }else{ - await expect(page.locator(".jp-RenderedText[data-mime-type='application/vnd.jupyter.stderr']")).toHaveCount(0); + await expect(errorLocator,`Occured At: ${await errorLocator.textContent()}`).toHaveCount(0); } await expect(page.locator(`div.jp-NotebookPanel:not(.p-mod-hidden)> div > div.jp-Cell:nth-child(${i})`)).toHaveClass(/jp-mod-active/); From 1da24be2c1def3016d75c8f7f71af91112b1ff3c Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Thu, 30 Jan 2025 18:39:17 +0530 Subject: [PATCH 10/35] tsconfig --- .tests/tsconfig.json | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.tests/tsconfig.json b/.tests/tsconfig.json index 37e7f131..12b951d7 100644 --- a/.tests/tsconfig.json +++ b/.tests/tsconfig.json @@ -13,12 +13,9 @@ "esModuleInterop": true, }, "include": [ - "src/**/*.ts", - "test/**/*.ts", - "test-helpers/**/*.ts", + "tests/**/*.ts", "./package.json", "./package-lock.json", - "./templates/**/*.mustache" ], "exclude": ["node_modules"] } \ No newline at end of file From 596aff9e24240f79ac525feb4d2bfc692200f5b1 Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Thu, 30 Jan 2025 18:48:57 +0530 Subject: [PATCH 11/35] type fix --- .github/workflows/nightly.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 5b7df15e..a66544f9 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -106,8 +106,9 @@ jobs: uses: actions/upload-artifact@v4 with: name: blob-report-${{ matrix.idx }} - path: blob-report + path: .tests/blob-report retention-days: 1 + merge-reports: # Merge reports after playwright-tests, even if some shards have failed if: ${{ !cancelled() }} From 12b6a8aab9944e88dcb7beac19181af93bc79e2d Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Fri, 31 Jan 2025 01:33:46 +0530 Subject: [PATCH 12/35] fixing bug --- .tests/tests/jupytertest.spec.ts | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/.tests/tests/jupytertest.spec.ts b/.tests/tests/jupytertest.spec.ts index 9e558955..d7a83d58 100644 --- a/.tests/tests/jupytertest.spec.ts +++ b/.tests/tests/jupytertest.spec.ts @@ -13,8 +13,6 @@ const CSAE_CI_JOB_COUNT = parseInt(process.env.CSAE_CI_JOB_COUNT || '1'); const CI_BRANCH = process.env.CI_BRANCH || 'main'; const IGNORE_BLACKLIST = process.env.IGNORE_BLACKLIST || 'false'; -const sleep = (ms) => new Promise(r => setTimeout(r, ms)); - test.describe.configure({ mode: 'parallel' }); // files.txt will contain the list of files to be tested. // It is generated by the following command: @@ -146,18 +144,23 @@ for (let i = 0; i < testCount; i++) { for (let i = 1; i <= dm; i++) { // To continute the notebook the kernel should be in Idle state. i.e previous cell execution should be completed. await page.locator('span[class="f1235lqo"] >> text="' + strKernelType + '| Idle"').waitFor({ timeout: 600000 }); - - //Check for any errors so far + const errorLocator = page.locator(".jp-RenderedText[data-mime-type='application/vnd.jupyter.stderr']"); - if(strKernelType === "Python 3 (ipykernel) "){ - await expect(errorLocator.filter({ hasText: 'Traceback (most recent call last):' }),`Occured At: ${await errorLocator.textContent()}`).toHaveCount(0); - }else if(strKernelType === 'Teradata SQL '){ - await expect(errorLocator.filter({ hasNotText: '[Teradata Database] [Warning' }),`Occured At: ${await errorLocator.textContent()}`).toHaveCount(0); - }else{ - await expect(errorLocator,`Occured At: ${await errorLocator.textContent()}`).toHaveCount(0); + try { + //Check for any errors so far + if(strKernelType === "Python 3 (ipykernel) "){ + await expect(errorLocator.filter({ hasText: 'Traceback (most recent call last):' })).toHaveCount(0); + }else if(strKernelType === 'Teradata SQL '){ + await expect(errorLocator.filter({ hasNotText: '[Teradata Database] [Warning'})).toHaveCount(0); + }else{ + await expect(errorLocator).toHaveCount(0); + } + await expect(page.locator(`div.jp-NotebookPanel:not(.p-mod-hidden)> div > div.jp-Cell:nth-child(${i})`)).toHaveClass(/jp-mod-active/); + }catch(e){ + console.log('Error found in cell ' + i + ' with following error:'); + console.log(await errorLocator.textContent()) + throw e; } - await expect(page.locator(`div.jp-NotebookPanel:not(.p-mod-hidden)> div > div.jp-Cell:nth-child(${i})`)).toHaveClass(/jp-mod-active/); - //restart the kernel if the cell has 'zero zero' text const restartKernal = await page.locator(`div.jp-NotebookPanel:not(.p-mod-hidden)> div > div.jp-Cell:nth-child(${i})`) .filter({ hasText: 'The simplest way to restart the Kernel is by typing zero zero:' }); @@ -264,4 +267,4 @@ for (let i = 0; i < testCount; i++) { } }); -} \ No newline at end of file +} From 2475915f754ebf2da6230d292ea064681cc9a262 Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Fri, 31 Jan 2025 11:11:40 +0530 Subject: [PATCH 13/35] Path fixes and github pages --- .github/workflows/nightly.yml | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index a66544f9..fdb069da 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -113,7 +113,6 @@ jobs: # Merge reports after playwright-tests, even if some shards have failed if: ${{ !cancelled() }} needs: [playwright-tests] - runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -122,20 +121,27 @@ jobs: node-version: lts/* - name: Install dependencies run: npm ci - - name: Download blob reports from GitHub Actions Artifacts uses: actions/download-artifact@v4 with: - path: all-blob-reports + path: .tests/all-blob-reports pattern: blob-report-* merge-multiple: true - - name: Merge into HTML Report run: npx playwright merge-reports --reporter html ./all-blob-reports - - name: Upload HTML report uses: actions/upload-artifact@v4 with: name: html-report--attempt-${{ github.run_attempt }} - path: playwright-report - retention-days: 14 \ No newline at end of file + path: .tests/playwright-report/ + retention-days: 14 + - name: Setup pages + uses: actions/configure-pages@v5 + - name: Upload report to GitHub Pages + uses: actions/upload-artifact@v4 + with: + path: .tests/playwright-report/ + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 + From 01034f1e5a12829fbfaf73a52af86b95372b8a19 Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Fri, 31 Jan 2025 14:50:10 +0530 Subject: [PATCH 14/35] tail 10 for testing --- .github/workflows/nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index fdb069da..d4b89c6b 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -91,7 +91,7 @@ jobs: run: | npx playwright install --with-deps - name: Find all Jupyter notebooks - run: find .. -name '*.ipynb' | sort > ./files.txt + run: find .. -name '*.ipynb' |tail -n 10| sort > ./files.txt - name: Start test environments id: start-test-environment run: npx ts-node pre_create_env.ts From bf5e755cbbe0617b3b269c912cd07a22bcab89ba Mon Sep 17 00:00:00 2001 From: chetan-hirapara Date: Fri, 31 Jan 2025 09:29:23 +0000 Subject: [PATCH 15/35] Added experience bot with no code --- CSAE_Bot.ipynb | 1321 ----------------- ExperienceBot/ExperienceBot.ipynb | 538 +++++++ ExperienceBot/ExperienceBot_NoCode.ipynb | 170 +++ .../__pycache__/chat_helper.cpython-39.pyc | Bin 0 -> 4538 bytes ExperienceBot/chat_helper.py | 171 +++ 5 files changed, 879 insertions(+), 1321 deletions(-) delete mode 100644 CSAE_Bot.ipynb create mode 100644 ExperienceBot/ExperienceBot.ipynb create mode 100644 ExperienceBot/ExperienceBot_NoCode.ipynb create mode 100644 ExperienceBot/__pycache__/chat_helper.cpython-39.pyc create mode 100644 ExperienceBot/chat_helper.py diff --git a/CSAE_Bot.ipynb b/CSAE_Bot.ipynb deleted file mode 100644 index 7c343bdc..00000000 --- a/CSAE_Bot.ipynb +++ /dev/null @@ -1,1321 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "fc92e86f-c5a7-499d-89e2-49046aa01be7", - "metadata": {}, - "source": [ - "
\n", - "

\n", - " CSAE Bot: Quickly find your demos of interest by just typing\n", - "
\n", - " \"Teradata\"\n", - "

\n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "d6c42cf3-ab6f-480d-943e-75be813f912b", - "metadata": {}, - "source": [ - "
\n", - "\n", - "

1. Install required libraries

" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "d74eee36-e3e8-42ca-9253-4e5ce8178c7b", - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "\n", - "!pip install openai langchain langchain-openai panel==1.3.4" - ] - }, - { - "cell_type": "markdown", - "id": "87d04ebc-4e5c-4e11-8094-fef11cf0b017", - "metadata": {}, - "source": [ - "
\n", - "

Note: Please restart the kernel. The simplest way is by typing 0 0 (zero zero) and then pressing enter

\n", - "
" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "98cfddce-a2b3-4d7d-aae4-1f2dbc463022", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import json\n", - "import re\n", - "\n", - "from langchain.document_loaders import DirectoryLoader\n", - "from langchain_openai import OpenAIEmbeddings\n", - "from langchain.vectorstores import FAISS\n", - "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", - "from langchain.schema import Document\n", - "\n", - "from langchain_community.document_loaders import NotebookLoader\n", - "from langchain_community.document_loaders import DirectoryLoader\n", - "\n", - "from langchain_openai import ChatOpenAI\n", - "from langchain.prompts import PromptTemplate\n", - "from langchain.chains import RetrievalQA\n", - "import openai\n", - "\n", - "from teradataml import *" - ] - }, - { - "cell_type": "markdown", - "id": "1320f4b6-15ab-4d5b-8bcd-56fb9154d5ca", - "metadata": {}, - "source": [ - "
\n", - "\n", - "

1.1 Connect to Vantage

\n", - "

We will be prompted to provide the password. We will enter the password, press the Enter key, and then use the down arrow to go to the next cell.

" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "id": "882f4318-bbb0-4214-ab97-316ded717794", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Performing setup ...\n", - "Setup complete\n" - ] - }, - { - "name": "stdin", - "output_type": "stream", - "text": [ - "\n", - "Enter password: ·········\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "... Logon successful\n", - "Connected as: teradatasql://demo_user:xxxxx@host.docker.internal/dbc\n", - "Engine(teradatasql://demo_user:***@host.docker.internal)\n" - ] - }, - { - "data": { - "text/plain": [ - "TeradataCursor uRowsHandle=37 bClosed=False" - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%run -i ./UseCases/startup.ipynb\n", - "eng = create_context(host = 'host.docker.internal', username='demo_user', password = password)\n", - "print(eng)\n", - "execute_sql('''SET query_band='DEMO= CSAE_Bot.ipynb;' UPDATE FOR SESSION;''')" - ] - }, - { - "cell_type": "markdown", - "id": "b4fe3299-d15d-48c8-ae02-a80968c9ad1f", - "metadata": {}, - "source": [ - "
\n", - "\n", - "

2. Extract contents (code, text) from notebook

" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b661c301-cfe3-4389-a05e-e996336684e8", - "metadata": {}, - "outputs": [], - "source": [ - "# Function to extract content from a Jupyter notebook\n", - "def extract_notebook_content(file_path):\n", - " with open(file_path, 'r', encoding='utf-8') as f:\n", - " notebook_data = json.load(f)\n", - "\n", - " content = \"\"\n", - " for cell in notebook_data.get('cells', []):\n", - " if cell['cell_type'] == 'markdown':\n", - " # Clean markdown content by removing HTML tags\n", - " content += '\\n'.join(cell['source']) + '\\n\\n'\n", - " elif cell['cell_type'] == 'code':\n", - " # Format code properly\n", - " content += '```python\\n' + ''.join(cell['source']) + '\\n```\\n\\n'\n", - " return content\n", - "\n", - "# Function to remove HTML tags\n", - "def remove_html_tags(text):\n", - " \"\"\"Remove HTML tags from a string\"\"\"\n", - " clean = re.compile('<.*?>')\n", - " return re.sub(clean, '', text)\n", - "\n", - "# Function to split the notebook content into markdown and code\n", - "def split_ipynb_content(content):\n", - " # Regular expression to match code blocks\n", - " code_pattern = re.compile(r'```python(.*?)```', re.DOTALL)\n", - "\n", - " # Find all code blocks\n", - " code_blocks = code_pattern.findall(content)\n", - "\n", - " # Split the content by code blocks\n", - " parts = code_pattern.split(content)\n", - "\n", - " # Combine markdown and code blocks\n", - " result = []\n", - " for i, part in enumerate(parts):\n", - " if i % 2 == 0:\n", - " # This is a markdown part, remove HTML tags\n", - " clean_part = remove_html_tags(part)\n", - " result.append(('markdown', clean_part))\n", - " else:\n", - " # This is a code part\n", - " result.append(('code', part))\n", - "\n", - " return result\n", - "\n", - "# Function to clean and split notebook content\n", - "def clean_and_split_notebook_content(file_path):\n", - " \"\"\"Extract markdown content and clean up the notebook's information.\"\"\"\n", - " # Extract the content from the notebook file\n", - " content = extract_notebook_content(file_path)\n", - " \n", - " # Split content into markdown and code cells\n", - " split_content = split_ipynb_content(content)\n", - "\n", - " # Initialize a list to hold combined documents\n", - " combined_documents = []\n", - " current_markdown = \"\"\n", - " current_code = \"\"\n", - "\n", - " # Iterate through the split content to group markdown with code\n", - " for part_type, part in split_content:\n", - " if part_type == 'markdown':\n", - " # If we have code and markdown, combine them\n", - " if current_markdown or current_code:\n", - " combined_documents.append({\"markdown\": current_markdown, \"code\": current_code})\n", - " # Update current markdown to the new one\n", - " current_markdown = part\n", - " current_code = \"\" # Reset code, ready for next code block\n", - " elif part_type == 'code':\n", - " # Append the code to the current code block\n", - " current_code += part\n", - " \n", - " # Add the last document (markdown + code)\n", - " if current_markdown or current_code:\n", - " combined_documents.append({\"markdown\": current_markdown, \"code\": current_code})\n", - "\n", - " return combined_documents" - ] - }, - { - "cell_type": "markdown", - "id": "a27f7379-43bd-43af-9c4f-ad55f2b4035f", - "metadata": {}, - "source": [ - "
\n", - "\n", - "

3. Create FAISS vector database

\n", - "
\n", - "

Note: You do not have to run the next cell multiple times. Each time it is executed it will generate over 1M embeddings and the charge is typically $0.02USD / 1M tokens.

\n", - " \n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "4d11abb9-225e-4ba8-9041-9e29e738ed3b", - "metadata": {}, - "outputs": [ - { - "name": "stdin", - "output_type": "stream", - "text": [ - "Enter your OpenAI API key: ························································\n" - ] - } - ], - "source": [ - "import getpass\n", - "import os\n", - "\n", - "if not os.getenv(\"OPENAI_API_KEY\"):\n", - " os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "fe6c268a-7004-48f2-8cf4-fb401b363235", - "metadata": {}, - "outputs": [], - "source": [ - "def generate_emb():\n", - " # Load notebooks and clean them\n", - " path = '/home/jovyan/JupyterLabRoot/'\n", - " loader = DirectoryLoader(path, glob=\"**/*.ipynb\", loader_cls=NotebookLoader)\n", - " notebooks = loader.load()\n", - "\n", - " # Clean each notebook before processing it\n", - " cleaned_documents = []\n", - " for notebook in notebooks:\n", - " # Assuming notebook metadata contains file path\n", - " file_path = notebook.metadata.get(\"source\", \"Unknown\") # Adjust this as needed\n", - " cleaned_data = clean_and_split_notebook_content(file_path)\n", - "\n", - " # Convert cleaned data to documents, including the source file path\n", - " for data in cleaned_data:\n", - " if data['markdown'] or data['code']:\n", - " doc = Document(\n", - " page_content=f\"Markdown:\\n{data['markdown']} \\n\\nCode:\\n{data['code']}\",\n", - " metadata={\"source\": file_path} # Ensure the source file path is added\n", - " )\n", - " cleaned_documents.append(doc)\n", - "\n", - "\n", - " # Split text into manageable chunks\n", - " text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)\n", - " docs = text_splitter.split_documents(cleaned_documents)\n", - "\n", - "\n", - " # for count of token\n", - " from tiktoken import encoding_for_model\n", - "\n", - " def count_document_tokens(document, model_name=\"gpt-4\"):\n", - " encoder = encoding_for_model(model_name)\n", - " return len(encoder.encode(document.page_content))\n", - "\n", - " tiktokn = 0\n", - " for doc in cleaned_documents:\n", - " tiktokn = tiktokn + count_document_tokens(doc)\n", - "\n", - " print(\"total token from all the notebooks: \", tiktokn)\n", - "\n", - " # Create vector store using embeddings\n", - " embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\") \n", - " vector_store = FAISS.from_documents(docs, embeddings)\n", - "\n", - " # Save the index for reuse\n", - " vector_store.save_local(\".notebooks_index\")\n", - " return vector_store" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "2f34de3e-b917-4b5d-9448-c9341a9980fa", - "metadata": {}, - "outputs": [], - "source": [ - "def load_emb():\n", - " # Load the FAISS index with dangerous deserialization enabled\n", - " # Create vector store using embeddings\n", - " embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\") \n", - "\n", - " return FAISS.load_local(\n", - " \".notebooks_index\", embeddings, allow_dangerous_deserialization=True\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "c357a955-d49d-4fec-99a5-6c42f9d204bf", - "metadata": {}, - "outputs": [ - { - "name": "stdin", - "output_type": "stream", - "text": [ - "Do you want to generate embeddings? ('yes'/'no'): no\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading existing embeddings...\n" - ] - } - ], - "source": [ - "# Request user's input\n", - "generate = input(\"Do you want to generate embeddings? ('yes'/'no'): \")\n", - "\n", - "# Check the user's input\n", - "if generate.lower() == \"yes\":\n", - " vector_store = generate_emb()\n", - "elif generate.lower() == \"no\":\n", - " try:\n", - " print('Loading existing embeddings...')\n", - " vector_store = load_emb()\n", - " except:\n", - " print('Embeddings not found, generating now..')\n", - " generate_emb()\n", - " vector_store = load_emb()" - ] - }, - { - "cell_type": "markdown", - "id": "5d754281-77b6-4f68-a41d-2323cdc020ad", - "metadata": {}, - "source": [ - "
\n", - "

4. Load existing vector database and define RAG

" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "5565e54c-6303-4648-81cf-9f2fe44c1de0", - "metadata": {}, - "outputs": [], - "source": [ - "# # Load the FAISS index with dangerous deserialization enabled\n", - "# # Create vector store using embeddings\n", - "# embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\") \n", - "\n", - "# vector_store = FAISS.load_local(\n", - "# \"notebooks_index\", embeddings, allow_dangerous_deserialization=True\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "6dbb1de5-2966-474f-86a4-a1aff224e9e8", - "metadata": {}, - "outputs": [], - "source": [ - "# Custom Prompt Template\n", - "CUSTOM_PROMPT = \"\"\"\n", - "You are a helpful assistant. Use the following retrieved information from Jupyter notebooks to provide:\n", - "1. A **clean and concise textual explanation** based on the question and notebook markdown.\n", - "2. **Relevant Clean Python code** extracted from the notebooks' code cells that are related to the question. Please filter the code that is related to the query.\n", - "3. Extract the source documents\n", - "If no relevant information is found, politely say so.\n", - "\n", - "Context:\n", - "{context}\n", - "\n", - "Question:\n", - "{question}\n", - "\n", - "Your response should be in below format:\n", - "##Answer:\n", - "##Relevant Code:\n", - "##Source documents:\n", - "\"\"\"\n", - "\n", - "prompt = PromptTemplate(\n", - " input_variables=[\"context\", \"question\"],\n", - " template=CUSTOM_PROMPT\n", - ")\n", - "\n", - "# Make sure to use a Chat model like 'gpt-4' or 'gpt-3.5-turbo'\n", - "chat_model = ChatOpenAI(model=\"gpt-4o-mini\")\n", - "\n", - "# Retrieval QA Chain\n", - "qa_chain = RetrievalQA.from_chain_type(\n", - " llm=chat_model,\n", - " retriever=vector_store.as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 10}), # Assuming vector_store is your vector database\n", - " return_source_documents=True,\n", - " chain_type_kwargs={\"prompt\": prompt}\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "9d2efb7c-dfa6-4240-b3ae-e40337685705", - "metadata": {}, - "outputs": [], - "source": [ - "# Function to Query Chatbot\n", - "def query_chatbot(question):\n", - " # Query the chatbot using the chain\n", - " result = qa_chain.invoke(question)\n", - " answer = result[\"result\"]\n", - "\n", - " # Extract and format relevant source paths from source documents\n", - " source_docs = result.get(\"source_documents\", [])\n", - " sources = \"\\n\".join(set([doc.metadata.get(\"source\", \"Unknown\") for doc in source_docs]))\n", - "\n", - " return f\"\"\"\n", - "{answer}\n", - "\n", - "Reference Notebook(s):\n", - "{sources if sources else \"No source notebooks found.\"}\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "b8e60ece-6bd2-495a-8d11-ec545c11be87", - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import display, Markdown, HTML, Javascript\n", - "import re\n", - "import textwrap\n", - "\n", - "def extract_answer_code_references(input_string):\n", - "\n", - " # Extract references and create JupyterLab-compatible links\n", - " references = re.findall(r'(/home/[^\\s]+)', input_string)\n", - " \n", - " # Check and format paths to open in JupyterLab (ensure paths are relative to /notebooks/)\n", - " html_output = [f' -> {ref.split(\"/\")[-1]} ' for ref in references]\n", - " html_output2 = []\n", - " for t in html_output:\n", - " html_output2.append(textwrap.fill(t, width=100))\n", - " return \"\\n\\n\".join(html_output2)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "8b8e3755-2398-4e51-9479-3fe810f34beb", - "metadata": {}, - "outputs": [ - { - "data": { - "application/javascript": [ - "(function(root) {\n", - " function now() {\n", - " return new Date();\n", - " }\n", - "\n", - " var force = true;\n", - " var py_version = '3.3.4'.replace('rc', '-rc.').replace('.dev', '-dev.');\n", - " var reloading = false;\n", - " var Bokeh = root.Bokeh;\n", - "\n", - " if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n", - " root._bokeh_timeout = Date.now() + 5000;\n", - " root._bokeh_failed_load = false;\n", - " }\n", - "\n", - " function run_callbacks() {\n", - " try {\n", - " root._bokeh_onload_callbacks.forEach(function(callback) {\n", - " if (callback != null)\n", - " callback();\n", - " });\n", - " } finally {\n", - " delete root._bokeh_onload_callbacks;\n", - " }\n", - " console.debug(\"Bokeh: all callbacks have finished\");\n", - " }\n", - "\n", - " function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n", - " if (css_urls == null) css_urls = [];\n", - " if (js_urls == null) js_urls = [];\n", - " if (js_modules == null) js_modules = [];\n", - " if (js_exports == null) js_exports = {};\n", - "\n", - " root._bokeh_onload_callbacks.push(callback);\n", - "\n", - " if (root._bokeh_is_loading > 0) {\n", - " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", - " return null;\n", - " }\n", - " if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n", - " run_callbacks();\n", - " return null;\n", - " }\n", - " if (!reloading) {\n", - " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", - " }\n", - "\n", - " function on_load() {\n", - " root._bokeh_is_loading--;\n", - " if (root._bokeh_is_loading === 0) {\n", - " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", - " run_callbacks()\n", - " }\n", - " }\n", - " window._bokeh_on_load = on_load\n", - "\n", - " function on_error() {\n", - " console.error(\"failed to load \" + url);\n", - " }\n", - "\n", - " var skip = [];\n", - " if (window.requirejs) {\n", - " window.requirejs.config({'packages': {}, 'paths': {'jspanel': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/jspanel', 'jspanel-modal': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal', 'jspanel-tooltip': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip', 'jspanel-hint': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint', 'jspanel-layout': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout', 'jspanel-contextmenu': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu', 'jspanel-dock': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@7.2.3/dist/gridstack-all', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'jspanel': {'exports': 'jsPanel'}, 'gridstack': {'exports': 'GridStack'}}});\n", - " require([\"jspanel\"], function(jsPanel) {\n", - "\twindow.jsPanel = jsPanel\n", - "\ton_load()\n", - " })\n", - " require([\"jspanel-modal\"], function() {\n", - "\ton_load()\n", - " })\n", - " require([\"jspanel-tooltip\"], function() {\n", - "\ton_load()\n", - " })\n", - " require([\"jspanel-hint\"], function() {\n", - "\ton_load()\n", - " })\n", - " require([\"jspanel-layout\"], function() {\n", - "\ton_load()\n", - " })\n", - " require([\"jspanel-contextmenu\"], function() {\n", - "\ton_load()\n", - " })\n", - " require([\"jspanel-dock\"], function() {\n", - "\ton_load()\n", - " })\n", - " require([\"gridstack\"], function(GridStack) {\n", - "\twindow.GridStack = GridStack\n", - "\ton_load()\n", - " })\n", - " require([\"notyf\"], function() {\n", - "\ton_load()\n", - " })\n", - " root._bokeh_is_loading = css_urls.length + 9;\n", - " } else {\n", - " root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n", - " }\n", - "\n", - " var existing_stylesheets = []\n", - " var links = document.getElementsByTagName('link')\n", - " for (var i = 0; i < links.length; i++) {\n", - " var link = links[i]\n", - " if (link.href != null) {\n", - "\texisting_stylesheets.push(link.href)\n", - " }\n", - " }\n", - " for (var i = 0; i < css_urls.length; i++) {\n", - " var url = css_urls[i];\n", - " if (existing_stylesheets.indexOf(url) !== -1) {\n", - "\ton_load()\n", - "\tcontinue;\n", - " }\n", - " const element = document.createElement(\"link\");\n", - " element.onload = on_load;\n", - " element.onerror = on_error;\n", - " element.rel = \"stylesheet\";\n", - " element.type = \"text/css\";\n", - " element.href = url;\n", - " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", - " document.body.appendChild(element);\n", - " } if (((window['jsPanel'] !== undefined) && (!(window['jsPanel'] instanceof HTMLElement))) || window.requirejs) {\n", - " var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/jspanel.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock.js'];\n", - " for (var i = 0; i < urls.length; i++) {\n", - " skip.push(urls[i])\n", - " }\n", - " } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n", - " var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/gridstack/gridstack@7.2.3/dist/gridstack-all.js'];\n", - " for (var i = 0; i < urls.length; i++) {\n", - " skip.push(urls[i])\n", - " }\n", - " } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n", - " var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n", - " for (var i = 0; i < urls.length; i++) {\n", - " skip.push(urls[i])\n", - " }\n", - " } var existing_scripts = []\n", - " var scripts = document.getElementsByTagName('script')\n", - " for (var i = 0; i < scripts.length; i++) {\n", - " var script = scripts[i]\n", - " if (script.src != null) {\n", - "\texisting_scripts.push(script.src)\n", - " }\n", - " }\n", - " for (var i = 0; i < js_urls.length; i++) {\n", - " var url = js_urls[i];\n", - " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", - "\tif (!window.requirejs) {\n", - "\t on_load();\n", - "\t}\n", - "\tcontinue;\n", - " }\n", - " var element = document.createElement('script');\n", - " element.onload = on_load;\n", - " element.onerror = on_error;\n", - " element.async = false;\n", - " element.src = url;\n", - " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", - " document.head.appendChild(element);\n", - " }\n", - " for (var i = 0; i < js_modules.length; i++) {\n", - " var url = js_modules[i];\n", - " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", - "\tif (!window.requirejs) {\n", - "\t on_load();\n", - "\t}\n", - "\tcontinue;\n", - " }\n", - " var element = document.createElement('script');\n", - " element.onload = on_load;\n", - " element.onerror = on_error;\n", - " element.async = false;\n", - " element.src = url;\n", - " element.type = \"module\";\n", - " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", - " document.head.appendChild(element);\n", - " }\n", - " for (const name in js_exports) {\n", - " var url = js_exports[name];\n", - " if (skip.indexOf(url) >= 0 || root[name] != null) {\n", - "\tif (!window.requirejs) {\n", - "\t on_load();\n", - "\t}\n", - "\tcontinue;\n", - " }\n", - " var element = document.createElement('script');\n", - " element.onerror = on_error;\n", - " element.async = false;\n", - " element.type = \"module\";\n", - " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", - " element.textContent = `\n", - " import ${name} from \"${url}\"\n", - " window.${name} = ${name}\n", - " window._bokeh_on_load()\n", - " `\n", - " document.head.appendChild(element);\n", - " }\n", - " if (!js_urls.length && !js_modules.length) {\n", - " on_load()\n", - " }\n", - " };\n", - "\n", - " function inject_raw_css(css) {\n", - " const element = document.createElement(\"style\");\n", - " element.appendChild(document.createTextNode(css));\n", - " document.body.appendChild(element);\n", - " }\n", - "\n", - " var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.3.4.min.js\", \"https://cdn.holoviz.org/panel/1.3.4/dist/bundled/material-components-web@7.0.0/dist/material-components-web.min.js\", \"https://cdn.holoviz.org/panel/1.3.4/dist/panel.min.js\"];\n", - " var js_modules = [];\n", - " var js_exports = {};\n", - " var css_urls = [\"https://fonts.googleapis.com/css?family=Roboto:300,400,500\", \"https://fonts.googleapis.com/css?family=Material+Icons&display=block\"];\n", - " var inline_js = [ function(Bokeh) {\n", - " Bokeh.set_log_level(\"info\");\n", - " },\n", - "function(Bokeh) {} // ensure no trailing comma for IE\n", - " ];\n", - "\n", - " function run_inline_js() {\n", - " if ((root.Bokeh !== undefined) || (force === true)) {\n", - " for (var i = 0; i < inline_js.length; i++) {\n", - "\ttry {\n", - " inline_js[i].call(root, root.Bokeh);\n", - "\t} catch(e) {\n", - "\t if (!reloading) {\n", - "\t throw e;\n", - "\t }\n", - "\t}\n", - " }\n", - " // Cache old bokeh versions\n", - " if (Bokeh != undefined && !reloading) {\n", - "\tvar NewBokeh = root.Bokeh;\n", - "\tif (Bokeh.versions === undefined) {\n", - "\t Bokeh.versions = new Map();\n", - "\t}\n", - "\tif (NewBokeh.version !== Bokeh.version) {\n", - "\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n", - "\t}\n", - "\troot.Bokeh = Bokeh;\n", - " }} else if (Date.now() < root._bokeh_timeout) {\n", - " setTimeout(run_inline_js, 100);\n", - " } else if (!root._bokeh_failed_load) {\n", - " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", - " root._bokeh_failed_load = true;\n", - " }\n", - " root._bokeh_is_initializing = false\n", - " }\n", - "\n", - " function load_or_wait() {\n", - " // Implement a backoff loop that tries to ensure we do not load multiple\n", - " // versions of Bokeh and its dependencies at the same time.\n", - " // In recent versions we use the root._bokeh_is_initializing flag\n", - " // to determine whether there is an ongoing attempt to initialize\n", - " // bokeh, however for backward compatibility we also try to ensure\n", - " // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n", - " // before older versions are fully initialized.\n", - " if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n", - " root._bokeh_is_initializing = false;\n", - " root._bokeh_onload_callbacks = undefined;\n", - " console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n", - " load_or_wait();\n", - " } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n", - " setTimeout(load_or_wait, 100);\n", - " } else {\n", - " root._bokeh_is_initializing = true\n", - " root._bokeh_onload_callbacks = []\n", - " var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n", - " if (!reloading && !bokeh_loaded) {\n", - "\troot.Bokeh = undefined;\n", - " }\n", - " load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n", - "\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", - "\trun_inline_js();\n", - " });\n", - " }\n", - " }\n", - " // Give older versions of the autoload script a head-start to ensure\n", - " // they initialize before we start loading newer version.\n", - " setTimeout(load_or_wait, 100)\n", - "}(window));" - ], - "application/vnd.holoviews_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.3.4'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var reloading = false;\n var Bokeh = root.Bokeh;\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'jspanel': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/jspanel', 'jspanel-modal': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal', 'jspanel-tooltip': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip', 'jspanel-hint': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint', 'jspanel-layout': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout', 'jspanel-contextmenu': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu', 'jspanel-dock': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@7.2.3/dist/gridstack-all', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'jspanel': {'exports': 'jsPanel'}, 'gridstack': {'exports': 'GridStack'}}});\n require([\"jspanel\"], function(jsPanel) {\n\twindow.jsPanel = jsPanel\n\ton_load()\n })\n require([\"jspanel-modal\"], function() {\n\ton_load()\n })\n require([\"jspanel-tooltip\"], function() {\n\ton_load()\n })\n require([\"jspanel-hint\"], function() {\n\ton_load()\n })\n require([\"jspanel-layout\"], function() {\n\ton_load()\n })\n require([\"jspanel-contextmenu\"], function() {\n\ton_load()\n })\n require([\"jspanel-dock\"], function() {\n\ton_load()\n })\n require([\"gridstack\"], function(GridStack) {\n\twindow.GridStack = GridStack\n\ton_load()\n })\n require([\"notyf\"], function() {\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 9;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } if (((window['jsPanel'] !== undefined) && (!(window['jsPanel'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/jspanel.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu.js', 'https://cdn.holoviz.org/panel/1.3.4/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/gridstack/gridstack@7.2.3/dist/gridstack-all.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.4/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.3.4.min.js\", \"https://cdn.holoviz.org/panel/1.3.4/dist/bundled/material-components-web@7.0.0/dist/material-components-web.min.js\", \"https://cdn.holoviz.org/panel/1.3.4/dist/panel.min.js\"];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [\"https://fonts.googleapis.com/css?family=Roboto:300,400,500\", \"https://fonts.googleapis.com/css?family=Material+Icons&display=block\"];\n var inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n\ttry {\n inline_js[i].call(root, root.Bokeh);\n\t} catch(e) {\n\t if (!reloading) {\n\t throw e;\n\t }\n\t}\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/javascript": [ - "\n", - "if ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n", - " window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n", - "}\n", - "\n", - "\n", - " function JupyterCommManager() {\n", - " }\n", - "\n", - " JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n", - " if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", - " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", - " comm_manager.register_target(comm_id, function(comm) {\n", - " comm.on_msg(msg_handler);\n", - " });\n", - " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", - " window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n", - " comm.onMsg = msg_handler;\n", - " });\n", - " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", - " google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n", - " var messages = comm.messages[Symbol.asyncIterator]();\n", - " function processIteratorResult(result) {\n", - " var message = result.value;\n", - " console.log(message)\n", - " var content = {data: message.data, comm_id};\n", - " var buffers = []\n", - " for (var buffer of message.buffers || []) {\n", - " buffers.push(new DataView(buffer))\n", - " }\n", - " var metadata = message.metadata || {};\n", - " var msg = {content, buffers, metadata}\n", - " msg_handler(msg);\n", - " return messages.next().then(processIteratorResult);\n", - " }\n", - " return messages.next().then(processIteratorResult);\n", - " })\n", - " }\n", - " }\n", - "\n", - " JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n", - " if (comm_id in window.PyViz.comms) {\n", - " return window.PyViz.comms[comm_id];\n", - " } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", - " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", - " var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n", - " if (msg_handler) {\n", - " comm.on_msg(msg_handler);\n", - " }\n", - " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", - " var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n", - " comm.open();\n", - " if (msg_handler) {\n", - " comm.onMsg = msg_handler;\n", - " }\n", - " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", - " var comm_promise = google.colab.kernel.comms.open(comm_id)\n", - " comm_promise.then((comm) => {\n", - " window.PyViz.comms[comm_id] = comm;\n", - " if (msg_handler) {\n", - " var messages = comm.messages[Symbol.asyncIterator]();\n", - " function processIteratorResult(result) {\n", - " var message = result.value;\n", - " var content = {data: message.data};\n", - " var metadata = message.metadata || {comm_id};\n", - " var msg = {content, metadata}\n", - " msg_handler(msg);\n", - " return messages.next().then(processIteratorResult);\n", - " }\n", - " return messages.next().then(processIteratorResult);\n", - " }\n", - " })\n", - " var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n", - " return comm_promise.then((comm) => {\n", - " comm.send(data, metadata, buffers, disposeOnDone);\n", - " });\n", - " };\n", - " var comm = {\n", - " send: sendClosure\n", - " };\n", - " }\n", - " window.PyViz.comms[comm_id] = comm;\n", - " return comm;\n", - " }\n", - " window.PyViz.comm_manager = new JupyterCommManager();\n", - " \n", - "\n", - "\n", - "var JS_MIME_TYPE = 'application/javascript';\n", - "var HTML_MIME_TYPE = 'text/html';\n", - "var EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\n", - "var CLASS_NAME = 'output';\n", - "\n", - "/**\n", - " * Render data to the DOM node\n", - " */\n", - "function render(props, node) {\n", - " var div = document.createElement(\"div\");\n", - " var script = document.createElement(\"script\");\n", - " node.appendChild(div);\n", - " node.appendChild(script);\n", - "}\n", - "\n", - "/**\n", - " * Handle when a new output is added\n", - " */\n", - "function handle_add_output(event, handle) {\n", - " var output_area = handle.output_area;\n", - " var output = handle.output;\n", - " if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", - " return\n", - " }\n", - " var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", - " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", - " if (id !== undefined) {\n", - " var nchildren = toinsert.length;\n", - " var html_node = toinsert[nchildren-1].children[0];\n", - " html_node.innerHTML = output.data[HTML_MIME_TYPE];\n", - " var scripts = [];\n", - " var nodelist = html_node.querySelectorAll(\"script\");\n", - " for (var i in nodelist) {\n", - " if (nodelist.hasOwnProperty(i)) {\n", - " scripts.push(nodelist[i])\n", - " }\n", - " }\n", - "\n", - " scripts.forEach( function (oldScript) {\n", - " var newScript = document.createElement(\"script\");\n", - " var attrs = [];\n", - " var nodemap = oldScript.attributes;\n", - " for (var j in nodemap) {\n", - " if (nodemap.hasOwnProperty(j)) {\n", - " attrs.push(nodemap[j])\n", - " }\n", - " }\n", - " attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n", - " newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n", - " oldScript.parentNode.replaceChild(newScript, oldScript);\n", - " });\n", - " if (JS_MIME_TYPE in output.data) {\n", - " toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n", - " }\n", - " output_area._hv_plot_id = id;\n", - " if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n", - " window.PyViz.plot_index[id] = Bokeh.index[id];\n", - " } else {\n", - " window.PyViz.plot_index[id] = null;\n", - " }\n", - " } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", - " var bk_div = document.createElement(\"div\");\n", - " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", - " var script_attrs = bk_div.children[0].attributes;\n", - " for (var i = 0; i < script_attrs.length; i++) {\n", - " toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n", - " }\n", - " // store reference to server id on output_area\n", - " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", - " }\n", - "}\n", - "\n", - "/**\n", - " * Handle when an output is cleared or removed\n", - " */\n", - "function handle_clear_output(event, handle) {\n", - " var id = handle.cell.output_area._hv_plot_id;\n", - " var server_id = handle.cell.output_area._bokeh_server_id;\n", - " if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n", - " var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n", - " if (server_id !== null) {\n", - " comm.send({event_type: 'server_delete', 'id': server_id});\n", - " return;\n", - " } else if (comm !== null) {\n", - " comm.send({event_type: 'delete', 'id': id});\n", - " }\n", - " delete PyViz.plot_index[id];\n", - " if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n", - " var doc = window.Bokeh.index[id].model.document\n", - " doc.clear();\n", - " const i = window.Bokeh.documents.indexOf(doc);\n", - " if (i > -1) {\n", - " window.Bokeh.documents.splice(i, 1);\n", - " }\n", - " }\n", - "}\n", - "\n", - "/**\n", - " * Handle kernel restart event\n", - " */\n", - "function handle_kernel_cleanup(event, handle) {\n", - " delete PyViz.comms[\"hv-extension-comm\"];\n", - " window.PyViz.plot_index = {}\n", - "}\n", - "\n", - "/**\n", - " * Handle update_display_data messages\n", - " */\n", - "function handle_update_output(event, handle) {\n", - " handle_clear_output(event, {cell: {output_area: handle.output_area}})\n", - " handle_add_output(event, handle)\n", - "}\n", - "\n", - "function register_renderer(events, OutputArea) {\n", - " function append_mime(data, metadata, element) {\n", - " // create a DOM node to render to\n", - " var toinsert = this.create_output_subarea(\n", - " metadata,\n", - " CLASS_NAME,\n", - " EXEC_MIME_TYPE\n", - " );\n", - " this.keyboard_manager.register_events(toinsert);\n", - " // Render to node\n", - " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", - " render(props, toinsert[0]);\n", - " element.append(toinsert);\n", - " return toinsert\n", - " }\n", - "\n", - " events.on('output_added.OutputArea', handle_add_output);\n", - " events.on('output_updated.OutputArea', handle_update_output);\n", - " events.on('clear_output.CodeCell', handle_clear_output);\n", - " events.on('delete.Cell', handle_clear_output);\n", - " events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n", - "\n", - " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", - " safe: true,\n", - " index: 0\n", - " });\n", - "}\n", - "\n", - "if (window.Jupyter !== undefined) {\n", - " try {\n", - " var events = require('base/js/events');\n", - " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", - " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", - " register_renderer(events, OutputArea);\n", - " }\n", - " } catch(err) {\n", - " }\n", - "}\n" - ], - "application/vnd.holoviews_load.v0+json": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n })\n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.holoviews_exec.v0+json": "", - "text/html": [ - "
\n", - "
\n", - "
\n", - "" - ] - }, - "metadata": { - "application/vnd.holoviews_exec.v0+json": { - "id": "cb02b6be-806d-46b4-b6e7-db9506cb4142" - } - }, - "output_type": "display_data" - }, - { - "data": {}, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.holoviews_exec.v0+json": "", - "text/html": [ - "
\n", - "
\n", - "
\n", - "" - ], - "text/plain": [ - "ChatInterface(_button_data={'send': _ChatButtonData(i...}, _input_container=Row, _input_layout=Row, _placeholder=ChatMessage, _widgets={'TextInput': TextInput(cs...}, callback=\n", - "

5. You can try your own question

\n", - "\n", - "\n", - "

Here are some sample questions that you can try out:

\n", - "\n", - "
    \n", - "
  1. How VectorDistance works?
  2. \n", - "
  3. What is Script table operator?
  4. \n", - "
  5. Give me demos which have AWS Bedrock?
  6. \n", - "
  7. What is GEOSEQUENCE? Show me some examples
  8. \n", - "
  9. Which notebooks are using OpenAI?
  10. \n", - "
  11. Which notebooks are about fraud detection?
  12. \n", - "
  13. How to use TDApiClient to generate the embeddings?
  14. \n", - "
  15. Show me demo for Broken digital Journey?
  16. \n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "2fc3a070-33cf-4787-8c66-cc221075fdd5", - "metadata": {}, - "source": [ - "
\n", - "
ClearScape Analytics™
\n", - "
\n", - "
\n", - " Copyright © Teradata Corporation - 2025. All Rights Reserved\n", - "
\n", - "
\n", - "
" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/ExperienceBot/ExperienceBot.ipynb b/ExperienceBot/ExperienceBot.ipynb new file mode 100644 index 00000000..3952af8b --- /dev/null +++ b/ExperienceBot/ExperienceBot.ipynb @@ -0,0 +1,538 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fc92e86f-c5a7-499d-89e2-49046aa01be7", + "metadata": {}, + "source": [ + "
\n", + "

\n", + " Experience-Bot: Quickly find your demos of interest by just typing\n", + "
\n", + " \"Teradata\"\n", + "

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "d6c42cf3-ab6f-480d-943e-75be813f912b", + "metadata": {}, + "source": [ + "
\n", + "\n", + "

1. Install required libraries

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d74eee36-e3e8-42ca-9253-4e5ce8178c7b", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "\n", + "!pip install openai langchain langchain-openai langchain-community requests faiss-cpu panel==1.3.4" + ] + }, + { + "cell_type": "markdown", + "id": "87d04ebc-4e5c-4e11-8094-fef11cf0b017", + "metadata": {}, + "source": [ + "
\n", + "

Note: Please restart the kernel. The simplest way is by typing 0 0 (zero zero) and then pressing enter

\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98cfddce-a2b3-4d7d-aae4-1f2dbc463022", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "import re\n", + "\n", + "# genAI\n", + "import openai\n", + "from langchain.document_loaders import DirectoryLoader\n", + "from langchain_openai import OpenAIEmbeddings\n", + "from langchain.vectorstores import FAISS\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain.schema import Document\n", + "\n", + "from langchain_community.document_loaders import NotebookLoader\n", + "from langchain_community.document_loaders import DirectoryLoader\n", + "\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.chains import RetrievalQA\n", + "\n", + "\n", + "# display\n", + "from IPython.display import display, Markdown, HTML, Javascript\n", + "\n", + "#teradataml\n", + "from teradataml import *" + ] + }, + { + "cell_type": "markdown", + "id": "1320f4b6-15ab-4d5b-8bcd-56fb9154d5ca", + "metadata": {}, + "source": [ + "
\n", + "\n", + "

1.1 Connect to Vantage

\n", + "

We will be prompted to provide the password. We will enter the password, press the Enter key, and then use the down arrow to go to the next cell.

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "882f4318-bbb0-4214-ab97-316ded717794", + "metadata": {}, + "outputs": [], + "source": [ + "%run -i ../UseCases/startup.ipynb\n", + "eng = create_context(host = 'host.docker.internal', username='demo_user', password = password)\n", + "print(eng)\n", + "execute_sql('''SET query_band='DEMO= ExperienceBot.ipynb;' UPDATE FOR SESSION;''')" + ] + }, + { + "cell_type": "markdown", + "id": "fd6a96fb-38bb-4f7c-b01b-a6fff11dbbc6", + "metadata": {}, + "source": [ + "
\n", + "\n", + "

1.2 Set OpenAI key to environment

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63f65a6a-cb8e-4ce4-a8ba-c59c95e55db3", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "if not os.getenv(\"OPENAI_API_KEY\"):\n", + " os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")" + ] + }, + { + "cell_type": "markdown", + "id": "b4fe3299-d15d-48c8-ae02-a80968c9ad1f", + "metadata": {}, + "source": [ + "
\n", + "\n", + "

2. Create the embeddings or load from existing

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b661c301-cfe3-4389-a05e-e996336684e8", + "metadata": {}, + "outputs": [], + "source": [ + "# Function to extract content from a Jupyter notebook\n", + "def extract_notebook_content(file_path):\n", + " with open(file_path, 'r', encoding='utf-8') as f:\n", + " notebook_data = json.load(f)\n", + "\n", + " content = \"\"\n", + " for cell in notebook_data.get('cells', []):\n", + " if cell['cell_type'] == 'markdown':\n", + " # Clean markdown content by removing HTML tags\n", + " content += '\\n'.join(cell['source']) + '\\n\\n'\n", + " elif cell['cell_type'] == 'code':\n", + " # Format code properly\n", + " content += '```python\\n' + ''.join(cell['source']) + '\\n```\\n\\n'\n", + " return content\n", + "\n", + "# Function to remove HTML tags\n", + "def remove_html_tags(text):\n", + " \"\"\"Remove HTML tags from a string\"\"\"\n", + " clean = re.compile('<.*?>')\n", + " return re.sub(clean, '', text)\n", + "\n", + "# Function to split the notebook content into markdown and code\n", + "def split_ipynb_content(content):\n", + " # Regular expression to match code blocks\n", + " code_pattern = re.compile(r'```python(.*?)```', re.DOTALL)\n", + "\n", + " # Find all code blocks\n", + " code_blocks = code_pattern.findall(content)\n", + "\n", + " # Split the content by code blocks\n", + " parts = code_pattern.split(content)\n", + "\n", + " # Combine markdown and code blocks\n", + " result = []\n", + " for i, part in enumerate(parts):\n", + " if i % 2 == 0:\n", + " # This is a markdown part, remove HTML tags\n", + " clean_part = remove_html_tags(part)\n", + " result.append(('markdown', clean_part))\n", + " else:\n", + " # This is a code part\n", + " result.append(('code', part))\n", + "\n", + " return result\n", + "\n", + "# Function to clean and split notebook content\n", + "def clean_and_split_notebook_content(file_path):\n", + " \"\"\"Extract markdown content and clean up the notebook's information.\"\"\"\n", + " # Extract the content from the notebook file\n", + " content = extract_notebook_content(file_path)\n", + " \n", + " # Split content into markdown and code cells\n", + " split_content = split_ipynb_content(content)\n", + "\n", + " # Initialize a list to hold combined documents\n", + " combined_documents = []\n", + " current_markdown = \"\"\n", + " current_code = \"\"\n", + "\n", + " # Iterate through the split content to group markdown with code\n", + " for part_type, part in split_content:\n", + " if part_type == 'markdown':\n", + " # If we have code and markdown, combine them\n", + " if current_markdown or current_code:\n", + " combined_documents.append({\"markdown\": current_markdown, \"code\": current_code})\n", + " # Update current markdown to the new one\n", + " current_markdown = part\n", + " current_code = \"\" # Reset code, ready for next code block\n", + " elif part_type == 'code':\n", + " # Append the code to the current code block\n", + " current_code += part\n", + " \n", + " # Add the last document (markdown + code)\n", + " if current_markdown or current_code:\n", + " combined_documents.append({\"markdown\": current_markdown, \"code\": current_code})\n", + "\n", + " return combined_documents" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe6c268a-7004-48f2-8cf4-fb401b363235", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_emb():\n", + " # Load notebooks and clean them\n", + " path = '/home/jovyan/JupyterLabRoot/'\n", + " loader = DirectoryLoader(path, glob=\"**/*.ipynb\", loader_cls=NotebookLoader)\n", + " notebooks = loader.load()\n", + "\n", + " # Clean each notebook before processing it\n", + " cleaned_documents = []\n", + " for notebook in notebooks:\n", + " # Assuming notebook metadata contains file path\n", + " file_path = notebook.metadata.get(\"source\", \"Unknown\") # Adjust this as needed\n", + " cleaned_data = clean_and_split_notebook_content(file_path)\n", + "\n", + " # Convert cleaned data to documents, including the source file path\n", + " for data in cleaned_data:\n", + " if data['markdown'] or data['code']:\n", + " doc = Document(\n", + " page_content=f\"Markdown:\\n{data['markdown']} \\n\\nCode:\\n{data['code']}\",\n", + " metadata={\"source\": file_path} # Ensure the source file path is added\n", + " )\n", + " cleaned_documents.append(doc)\n", + "\n", + "\n", + " # Split text into manageable chunks\n", + " text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)\n", + " docs = text_splitter.split_documents(cleaned_documents)\n", + "\n", + "\n", + " # for count of token\n", + " from tiktoken import encoding_for_model\n", + "\n", + " def count_document_tokens(document, model_name=\"gpt-4o-mini\"):\n", + " encoder = encoding_for_model(model_name)\n", + " return len(encoder.encode(document.page_content))\n", + "\n", + " tiktokn = 0\n", + " for doc in cleaned_documents:\n", + " tiktokn = tiktokn + count_document_tokens(doc)\n", + "\n", + " print(\"total token from all the notebooks: \", tiktokn)\n", + "\n", + " # Create vector store using embeddings\n", + " embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\") \n", + " vector_store = FAISS.from_documents(docs, embeddings)\n", + "\n", + " # Save the index for reuse\n", + " vector_store.save_local(\"notebooks_index\")\n", + " return vector_store" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f34de3e-b917-4b5d-9448-c9341a9980fa", + "metadata": {}, + "outputs": [], + "source": [ + "def load_emb():\n", + " # Load the FAISS index with dangerous deserialization enabled\n", + " # Create vector store using embeddings\n", + " embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\") \n", + "\n", + " return FAISS.load_local(\n", + " \"notebooks_index\", embeddings, allow_dangerous_deserialization=True\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "2f65df37-f51b-45cf-9aba-7058af4447b8", + "metadata": {}, + "source": [ + "
\n", + "

Note: You do not have to run the next cell multiple times. Each time it is executed it will generate over 1M embeddings and the charge is typically $0.02USD / 1M tokens.

\n", + " \n", + "
\n", + "

Note: If you have previously run this notebook, please select No in response to the following question.

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c357a955-d49d-4fec-99a5-6c42f9d204bf", + "metadata": {}, + "outputs": [], + "source": [ + "# Request user's input\n", + "generate = input(\"Do you want to generate embeddings? ('yes'/'no'): \")\n", + "\n", + "# Check the user's input\n", + "if generate.lower() == \"yes\":\n", + " vector_store = generate_emb()\n", + "elif generate.lower() == \"no\":\n", + " try:\n", + " print('Loading existing embeddings...')\n", + " vector_store = load_emb()\n", + " print('Embeddings are loaded now...')\n", + " except:\n", + " print('Embeddings not found, generating now..')\n", + " generate_emb()\n", + " vector_store = load_emb()" + ] + }, + { + "cell_type": "markdown", + "id": "5d754281-77b6-4f68-a41d-2323cdc020ad", + "metadata": {}, + "source": [ + "
\n", + "

3. Define a RAG Pipeline

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6dbb1de5-2966-474f-86a4-a1aff224e9e8", + "metadata": {}, + "outputs": [], + "source": [ + "# Custom Prompt Template\n", + "CUSTOM_PROMPT = \"\"\"\n", + "You are a helpful assistant. Use the following retrieved information from Jupyter notebooks to provide:\n", + "1. A **clean and concise textual explanation** based on the question and notebook markdown.\n", + "2. **Relevant Clean Python code** extracted from the notebooks' code cells that are related to the question. Please filter the code that is related to the query.\n", + "3. Extract the source documents\n", + "If no relevant information is found, politely say so.\n", + "\n", + "*Critical*: start by greeting only if user starts with greeting, just say, \"Hey there! 😊 Welcome to our chatbot!\"\n", + "\n", + "Context:\n", + "{context}\n", + "\n", + "Question:\n", + "{question}\n", + "\n", + "Your response should be in below format:\n", + "##Answer:\n", + "##Relevant Code:\n", + "##Source documents:\n", + "\"\"\"\n", + "\n", + "prompt = PromptTemplate(\n", + " input_variables=[\"context\", \"question\"],\n", + " template=CUSTOM_PROMPT\n", + ")\n", + "\n", + "# Make sure to use a Chat model like 'gpt-4' or 'gpt-3.5-turbo'\n", + "chat_model = ChatOpenAI(model=\"gpt-4o-mini\")\n", + "\n", + "# Retrieval QA Chain\n", + "qa_chain = RetrievalQA.from_chain_type(\n", + " llm=chat_model,\n", + " retriever=vector_store.as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 10}), # Assuming vector_store is your vector database\n", + " return_source_documents=True,\n", + " chain_type_kwargs={\"prompt\": prompt}\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d2efb7c-dfa6-4240-b3ae-e40337685705", + "metadata": {}, + "outputs": [], + "source": [ + "# Function to Query Chatbot\n", + "def query_chatbot(question):\n", + " # Query the chatbot using the chain\n", + " result = qa_chain.invoke(question)\n", + " answer = result[\"result\"]\n", + "\n", + " # Extract and format relevant source paths from source documents\n", + " source_docs = result.get(\"source_documents\", [])\n", + " sources = \"\\n\".join(set([doc.metadata.get(\"source\", \"Unknown\") for doc in source_docs]))\n", + "\n", + " return f\"\"\"\n", + "{answer}\n", + "\n", + "Reference Notebook(s):\n", + "{sources if sources else \"No source notebooks found.\"}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8e60ece-6bd2-495a-8d11-ec545c11be87", + "metadata": {}, + "outputs": [], + "source": [ + "def extract_answer_code_references(input_string):\n", + "\n", + " # Extract references and create JupyterLab-compatible links\n", + " references = re.findall(r'(/home/[^\\s]+)', input_string)\n", + " html_output = []\n", + " for i, ref in enumerate(references):\n", + " html_output.append(f' {i+1}. {ref.split(\"/\")[-1]} ')\n", + " \n", + " # return html_output\n", + " return \"\\n\\n\".join(html_output)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b8e3755-2398-4e51-9479-3fe810f34beb", + "metadata": {}, + "outputs": [], + "source": [ + "import panel as pn\n", + "pn.extension(design=\"material\")\n", + "\n", + "# panel callback function\n", + "def callback(contents, user, instance):\n", + " response = qa_chain.invoke(contents)\n", + " result = response['result']\n", + " \n", + " source_docs = response.get(\"source_documents\", [])\n", + " sources = \"\\n\".join(set([doc.metadata.get(\"source\", \"Unknown\") for doc in source_docs]))\n", + " html_output = extract_answer_code_references(sources)\n", + " result = result + \"\\n\\n\" + html_output\n", + " return result\n", + "\n", + "\n", + "pn.chat.ChatInterface(\n", + " callback=callback,\n", + " show_rerun=False,\n", + " show_undo=False,\n", + " show_clear=False,\n", + " width=1200,\n", + " height=400,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "28cdf87b-3b88-4acb-8a8d-4d9f2c6c3b97", + "metadata": {}, + "source": [ + "
\n", + "

Note: To ensure that the Chatbot interface reflects the latest changes, please reload the page by clicking the 'Reload' button or pressing F5 on your keyboard for first-time only This will update the notebook with the latest modifications, and you'll be able to interact with the Chatbot using the new libraries.

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "f443eeaf-b913-4dd4-839b-e950b228a3c5", + "metadata": {}, + "source": [ + "
\n", + "

4. You can try your own question

\n", + "\n", + "\n", + "

Here are some sample questions that you can try out:

\n", + "\n", + "
    \n", + "
  1. How VectorDistance works?
  2. \n", + "
  3. What is Script table operator?
  4. \n", + "
  5. Give me demos which have AWS Bedrock?
  6. \n", + "
  7. What is GEOSEQUENCE? Show me some examples
  8. \n", + "
  9. Which notebooks are using OpenAI?
  10. \n", + "
  11. Which notebooks are about fraud detection?
  12. \n", + "
  13. How to use TDApiClient to generate the embeddings?
  14. \n", + "
  15. Show me demo for Broken digital Journey?
  16. \n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "2fc3a070-33cf-4787-8c66-cc221075fdd5", + "metadata": {}, + "source": [ + "
\n", + "
ClearScape Analytics™
\n", + "
\n", + "
\n", + " Copyright © Teradata Corporation - 2025. All Rights Reserved\n", + "
\n", + "
\n", + "
" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ExperienceBot/ExperienceBot_NoCode.ipynb b/ExperienceBot/ExperienceBot_NoCode.ipynb new file mode 100644 index 00000000..799af1c6 --- /dev/null +++ b/ExperienceBot/ExperienceBot_NoCode.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fc92e86f-c5a7-499d-89e2-49046aa01be7", + "metadata": {}, + "source": [ + "
\n", + "

\n", + " Experience-Bot: Quickly find your demos of interest by just typing\n", + "
\n", + " \"Teradata\"\n", + "

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "d6c42cf3-ab6f-480d-943e-75be813f912b", + "metadata": {}, + "source": [ + "
\n", + "\n", + "

1. Install required libraries

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d74eee36-e3e8-42ca-9253-4e5ce8178c7b", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "\n", + "!pip install openai langchain langchain-openai langchain-community requests faiss-cpu panel==1.3.4" + ] + }, + { + "cell_type": "markdown", + "id": "87d04ebc-4e5c-4e11-8094-fef11cf0b017", + "metadata": {}, + "source": [ + "
\n", + "

Note: Please restart the kernel. The simplest way is by typing 0 0 (zero zero) and then pressing enter

\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98cfddce-a2b3-4d7d-aae4-1f2dbc463022", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "print(\"Current directory:\", os.path.join(os.getcwd(), 'ExperienceBot'))\n", + "\n", + "import sys\n", + "sys.path.append(os.path.join(os.getcwd(), 'ExperienceBot'))" + ] + }, + { + "cell_type": "markdown", + "id": "04cfe436-e5f5-436e-bf24-abf3dc507cc9", + "metadata": {}, + "source": [ + "
\n", + "\n", + "

2. Enter the OpenAI key and start Chatbot

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3222e2a9-bef6-4173-8011-0ca78908d4c0", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "if not os.getenv(\"OPENAI_API_KEY\"):\n", + " os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f3fa2bc-686e-453d-852c-e1fa6aa63efb", + "metadata": {}, + "outputs": [], + "source": [ + "# Import your module\n", + "from chat_helper import get_chat_ui\n", + "\n", + "get_chat_ui()" + ] + }, + { + "cell_type": "markdown", + "id": "1f135d10-d5bc-4710-b994-68ec7175e9d2", + "metadata": {}, + "source": [ + "
\n", + "

Note: To ensure that the Chatbot interface reflects the latest changes, please reload the page by clicking the 'Reload' button or pressing F5 on your keyboard for first-time only This will update the notebook with the latest modifications, and you'll be able to interact with the Chatbot using the new libraries.

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "f443eeaf-b913-4dd4-839b-e950b228a3c5", + "metadata": {}, + "source": [ + "
\n", + "

3. You can try your own question

\n", + "\n", + "\n", + "

Here are some sample questions that you can try out:

\n", + "\n", + "
    \n", + "
  1. How VectorDistance works?
  2. \n", + "
  3. What is Script table operator?
  4. \n", + "
  5. Give me demos which have AWS Bedrock?
  6. \n", + "
  7. What is GEOSEQUENCE? Show me some examples
  8. \n", + "
  9. Which notebooks are using OpenAI?
  10. \n", + "
  11. Which notebooks are about fraud detection?
  12. \n", + "
  13. How to use TDApiClient to generate the embeddings?
  14. \n", + "
  15. Show me demo for Broken digital Journey?
  16. \n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "2fc3a070-33cf-4787-8c66-cc221075fdd5", + "metadata": {}, + "source": [ + "
\n", + "
ClearScape Analytics™
\n", + "
\n", + "
\n", + " Copyright © Teradata Corporation - 2025. All Rights Reserved\n", + "
\n", + "
\n", + "
" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ExperienceBot/__pycache__/chat_helper.cpython-39.pyc b/ExperienceBot/__pycache__/chat_helper.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40bc36e60946f8d6961f5c61558da36f3b0142cd GIT binary patch literal 4538 zcmb7H&2QYs73Yv#F17l!?8J$iq;!m;S;b!2O_8RCYt)e))I}mYveO`J(gnpC?NZ{B z>zP@7Y=Jkx*tx@Em+S%0U-$;U?Hr})&@D7eL0YK7&Wh0L_=(8k|RosJv2jK-X_((yvi z)Lryf!>XxQQol15PMNxw);iPSH0o9Ee_@3)e2UlbpB1zF7J8@o3|e!1me1ioFXs2H zaDmVB1+>obb9@p1MSh-N!2djFkF5F⪻$JT4%+=gRaQ#+`rdZ7n~1*~_7V8j&Zc`<8L%U!mb7!|QEk*)9k79YWDi~K4_wan?ccsKtPCoLQv+wS zMzcPK>|DJ24zsd_8t`_#rdVg_EsBWGq5vXWe@IdE5|NNaS5K`tc$m6&t zrQkP%`dr~l@uVk|R)y0NI$V%ZqC|5emrbQ3-BX2~tHSL@x?Ol;H=&7sC)yG`k?7yd zldN!asHLdv$V7{x8t0i7nJy|_nc!CwyRqo%1l)uRYvo?|#$EMl;q+u$EOKy=WD!ln z-YFNd@HdiFWKl;Di{)vGDe7w)W?ZTkOnhs}??7kI-dS$v9kINbZ|_Cf^3QwSJuT$> z(fV4R>*ag9T>;%^v3M7)7>3epixhp0?p|>L=WdRq(2O$PB$+LcF~Dyui`lHkd}oH$ zoH;hbmh45g$mFXSiD`KJR8XPMSkYZnG<=FQ%IzUMXUMqAD=m9y5A3_vXRpJutbudU zg4I~u8@T&6EP)MN*pfqb^5hkx&+2Ob63=(CG>^#O1Kk#Zn3#Wmv#oVk-CSN)I+syP zG+KGyN=4L7R3pwi%Q5Ow#Zgz_U{SiKlUOym=;Tcv>1g@xy$>EV-_Kk5^0tUUfvAXP zl5w%SoaI`q=lPZzwTt;PJ^g7jv9SSzQT?g1(cMb>E;M?xZp%do!N{pzkr$}`0*azS zdQ*}_8yXciG2N)JEMB2Wi{mplCx^X)enncbn9p41+CKh1R3iiQol*ru$3zvE&>&TC z`+yB0;!PW_WRWrs95{@(+J8}tU43n`(Y3d(rIBojZ-FhHoQqVx1`?0!Ec|Yiruhy; zm$ihX1mOL zF6qptPDYeHd@J}A85yTD!Nj=JvguR@_K<1DE8N3rZEbHl16#ZM)@Fs^m`^2)*A8p~ z;Q>Q^_P~V<=2m;&nI~=h@=RjfE;9_mj;ni@cJmzjo>2y3RxgL6oMdolqIGD zVv-D)(J^ueHh{gs$OVIm*$@hKWxkzoantN|qY>N*mX^p!vjC1406JozPe2OkMQI>* z0p2AlEG-4=k-|QCW>_$o1+*~h##;oPNN$noGz|S8G(de#q`>@42PRWTF` zlGsN^u?AVgGM#NqOMTx=3SyC_3R_0n$VCbYCk9bz_Lz-E@DSv|I;b6)xQjphl^K^}V1ag#g~RAWh6qHi8}uxg1u(PNLhB8CQeNp3+3} zYVi8c#2)b|#pU21fBy0}!N(#6%HWWB0ImbV)%9Foe!bjzg#y8@E_MBD6Bc&$upIl4 zLJrh78XSvpGzue>IPExuwew!egLMHLLkY{niOJN;#aFJ}$<&UJH@-qA{bUGbbu&t+* zKw#~(0^r8&rrzs{aCX$#+5w)Z$At&$M@~|ALnlo;;ndi9WOxzPp3Iu1@|zRohvy*E zBs1iiC7HS>3EQMg1z|xrPqy1Uo@ihroQV{In#p*CtUKZC%14hLKlq^eaP7ed4;><03G6sJjmS&*TzDV?y5Xb1EP{ zMDHZ2Vgtt6%N9Z*JG8movXDGE+&Of#OSt?60^?n4AHfp|n2X?Oe}!an;L7&~Zf0{I zE$^EOaBFI{@L(mqR2K_pC0DT8DQ*auNm`HaDLP8a8d#q#4#CSNl08azkL=&uPk@Kt z+J#q=E2(JfZU5404*xbVF+r;m1RC`U zOnIcW!b`I4d`lRH_Z2+ zF-+s41*N+K_YmfB#JKk|wX5Htj!fw#?jtj|kLa#2wc4M1qs#<<`KwRWXK&W~?ycl@ z@RTgGZ#RNxZ29SPJpD>=YdN|7%&yhyUSUflt{X^U5aEj{kpX%oU>@S>BDLj3MRkF# zMuzoDI8y?If^%i9q9(_xQpI#zchY9w(*TfyC53~rv-a`YDCi&~BiL$En1H8_)UCb% zqsm;}ka9w#>e+lq7AnYZtkgoHNq+{bwvgu_D>$G8)c6#<&Dz8z zW?*yAye1zw@F29>swDAHUXR#?d-L@E~YBF!QPl%zcQ6Dsagahg+57_spu3@S>fExL@@#xj7szB|ME zul~;iyqEk1p(6PmGz^wM1ck7s+DK!iki9INGIit+`Ka4m8E~0PINkW#B5Ab&8DXUx zWg?|exT|+$)PTj zW_a#+Mj4WoiK!^G&dRl7Q$;U6}QR5 bRDi {i+1}. {ref.split("/")[-1]} ') + + # return html_output + return "\n\n".join(html_output) + + +# ---------------------------------------------------------------- +# Step4: Get the chat UI +# ---------------------------------------------------------------- +def get_chat_ui(): + import panel as pn + pn.extension(design="material") + + # panel callback function + def callback(contents, user, instance): + response = qa_chain.invoke(contents) + result = response["result"] + + source_docs = response.get("source_documents", []) + sources = "\n".join( + set([doc.metadata.get("source", "Unknown") for doc in source_docs]) + ) + html_output = extract_answer_code_references(sources) + result = result + "\n\n" + html_output + return result + + return pn.chat.ChatInterface( + callback=callback, + show_rerun=False, + show_undo=False, + show_clear=False, + width=1200, + height=400, + ) From 96c4dac3c8a9e927291a2ef2bf9042d3659bf504 Mon Sep 17 00:00:00 2001 From: Adam Tworkiewicz Date: Fri, 31 Jan 2025 06:37:25 -0600 Subject: [PATCH 16/35] Move CSAE tests to us-west as us-central is limited by quotas --- .tests/pre_create_env.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.tests/pre_create_env.ts b/.tests/pre_create_env.ts index 9b81b160..b6f5bf88 100644 --- a/.tests/pre_create_env.ts +++ b/.tests/pre_create_env.ts @@ -13,11 +13,11 @@ console.log('CSAE_PARALLEL_TESTS_COUNT:', CSAE_PARALLEL_TESTS_COUNT); console.log(`Need ${Math.floor(CSAE_WORKERS_COUNT/CSAE_PARALLEL_TESTS_COUNT)} environments`); for (let i = 0; i < Math.floor(CSAE_WORKERS_COUNT/CSAE_PARALLEL_TESTS_COUNT); i++) { - const env = new Environments(`jupyter-demos-${ENV_PREFIX}-${i}`, 'us-central', CSAE_ENV_PASSWORD) + const env = new Environments(`jupyter-demos-${ENV_PREFIX}-${i}`, 'us-west', CSAE_ENV_PASSWORD) envs.push(env.create()); } console.log(`Creating ${envs.length} environments`); Promise.all(envs).then(() => { console.log('Environments created'); -}) \ No newline at end of file +}) From 7bc81178e135aec291b588bc71d9b933e91b990c Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Sat, 1 Feb 2025 13:14:56 +0530 Subject: [PATCH 17/35] shard --- .github/workflows/ci.yml | 2 +- .github/workflows/nightly.yml | 8 ++++---- .tests/tests/jupytertest.spec.ts | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8f13d2e0..31898a41 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: CSAE_PARALLEL_TESTS_COUNT: ${{ vars.CSAE_PARALLEL_TESTS_COUNT }} CSAE_WORKERS_COUNT: ${{ vars.CSAE_WORKERS_COUNT }} TEST_ENV: ${{ github.event.inputs.test_env || 'PROD' }} - CSAE_CI_JOB_IDX: 0 + CSAE_CI_JOB_IDX: 1 CSAE_CI_JOB_COUNT: 1 name: Playwirght Test timeout-minutes: 1440 diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index d4b89c6b..87cf7b3a 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -33,7 +33,7 @@ jobs: id-token: 'write' strategy: fail-fast: false - matrix: ${{ github.event.inputs.notebooks == '' && fromJson('{"idx":[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}') || fromJson('{"idx":[0]}') }} + matrix: ${{ github.event.inputs.notebooks == '' && fromJson('{"idx":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}') || fromJson('{"idx":[1]}') }} env: CSAE_NOTEBOOKS: ${{ github.event.inputs.notebooks }} CSAE_ENV_PASSWORD: ${{ secrets.CSAE_ENV_PASSWORD }} @@ -91,13 +91,13 @@ jobs: run: | npx playwright install --with-deps - name: Find all Jupyter notebooks - run: find .. -name '*.ipynb' |tail -n 10| sort > ./files.txt + run: find .. -name '*.ipynb' |head -n 10| sort > ./files.txt - name: Start test environments id: start-test-environment run: npx ts-node pre_create_env.ts - name: Run Playwright tests run: | - npx playwright test + npx playwright test --shard ${{matrix.idx}}/10 - name: Delete test environments if: always() run: npx ts-node post_delete_env.ts @@ -132,7 +132,7 @@ jobs: - name: Upload HTML report uses: actions/upload-artifact@v4 with: - name: html-report--attempt-${{ github.run_attempt }} + name: html-report-attempt-${{ github.run_attempt }} path: .tests/playwright-report/ retention-days: 14 - name: Setup pages diff --git a/.tests/tests/jupytertest.spec.ts b/.tests/tests/jupytertest.spec.ts index d7a83d58..9ab3de27 100644 --- a/.tests/tests/jupytertest.spec.ts +++ b/.tests/tests/jupytertest.spec.ts @@ -8,7 +8,7 @@ const CSAE_WORKERS_COUNT = parseInt(process.env.CSAE_WORKERS_COUNT || '1'); const CSAE_PARALLEL_TESTS_COUNT = parseInt(process.env.CSAE_PARALLEL_TESTS_COUNT || '1'); const envPool = new EnvPool(Math.floor(CSAE_WORKERS_COUNT / CSAE_PARALLEL_TESTS_COUNT)); -const CSAE_CI_JOB_IDX = parseInt(process.env.CSAE_CI_JOB_IDX || '0'); +const CSAE_CI_JOB_IDX = parseInt(process.env.CSAE_CI_JOB_IDX || '1'); const CSAE_CI_JOB_COUNT = parseInt(process.env.CSAE_CI_JOB_COUNT || '1'); const CI_BRANCH = process.env.CI_BRANCH || 'main'; const IGNORE_BLACKLIST = process.env.IGNORE_BLACKLIST || 'false'; @@ -70,7 +70,7 @@ const testCount = Math.ceil(files.length / CSAE_CI_JOB_COUNT); for (let i = 0; i < testCount; i++) { - const idx = i * CSAE_CI_JOB_COUNT + CSAE_CI_JOB_IDX; + const idx = i * CSAE_CI_JOB_COUNT + (CSAE_CI_JOB_IDX-1); if (idx >= files.length) { break; } From 4833d58d1cf01e44b9a2b92731fc8af27af9e4f2 Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Sat, 1 Feb 2025 16:50:21 +0530 Subject: [PATCH 18/35] pipe issue --- .github/workflows/nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 87cf7b3a..34a4c699 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -91,7 +91,7 @@ jobs: run: | npx playwright install --with-deps - name: Find all Jupyter notebooks - run: find .. -name '*.ipynb' |head -n 10| sort > ./files.txt + run: find .. -name '*.ipynb' | tail -n 10 | sort > ./files.txt - name: Start test environments id: start-test-environment run: npx ts-node pre_create_env.ts From 48d3a1a53062500aae91e348c864b0b7f54fbd6f Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Sat, 1 Feb 2025 18:13:03 +0530 Subject: [PATCH 19/35] sharding will divide the tests --- .github/workflows/nightly.yml | 2 +- .tests/package-lock.json | 24 ++++++++++++------------ .tests/package.json | 2 +- .tests/tests/jupytertest.spec.ts | 13 ++----------- 4 files changed, 16 insertions(+), 25 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 34a4c699..19281f12 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -97,7 +97,7 @@ jobs: run: npx ts-node pre_create_env.ts - name: Run Playwright tests run: | - npx playwright test --shard ${{matrix.idx}}/10 + npx playwright test --shard=${{matrix.idx}}/10 - name: Delete test environments if: always() run: npx ts-node post_delete_env.ts diff --git a/.tests/package-lock.json b/.tests/package-lock.json index 93275356..6da84e49 100644 --- a/.tests/package-lock.json +++ b/.tests/package-lock.json @@ -14,7 +14,7 @@ "yaml": "^2.4.5" }, "devDependencies": { - "@playwright/test": "^1.44.1", + "@playwright/test": "^1.50.1", "@types/node": "^20.17.16", "dotenv-cli": "^7.4.2" } @@ -53,12 +53,12 @@ } }, "node_modules/@playwright/test": { - "version": "1.49.1", - "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.49.1.tgz", - "integrity": "sha512-Ky+BVzPz8pL6PQxHqNRW1k3mIyv933LML7HktS8uik0bUXNCdPhoS/kLihiO1tMf/egaJb4IutXd7UywvXEW+g==", + "version": "1.50.1", + "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.50.1.tgz", + "integrity": "sha512-Jii3aBg+CEDpgnuDxEp/h7BimHcUTDlpEtce89xEumlJ5ef2hqepZ+PWp1DDpYC/VO9fmWVI1IlEaoI5fK9FXQ==", "dev": true, "dependencies": { - "playwright": "1.49.1" + "playwright": "1.50.1" }, "bin": { "playwright": "cli.js" @@ -314,12 +314,12 @@ } }, "node_modules/playwright": { - "version": "1.49.1", - "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.49.1.tgz", - "integrity": "sha512-VYL8zLoNTBxVOrJBbDuRgDWa3i+mfQgDTrL8Ah9QXZ7ax4Dsj0MSq5bYgytRnDVVe+njoKnfsYkH3HzqVj5UZA==", + "version": "1.50.1", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.50.1.tgz", + "integrity": "sha512-G8rwsOQJ63XG6BbKj2w5rHeavFjy5zynBA9zsJMMtBoe/Uf757oG12NXz6e6OirF7RCrTVAKFXbLmn1RbL7Qaw==", "dev": true, "dependencies": { - "playwright-core": "1.49.1" + "playwright-core": "1.50.1" }, "bin": { "playwright": "cli.js" @@ -332,9 +332,9 @@ } }, "node_modules/playwright-core": { - "version": "1.49.1", - "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.49.1.tgz", - "integrity": "sha512-BzmpVcs4kE2CH15rWfzpjzVGhWERJfmnXmniSyKeRZUs9Ws65m+RGIi7mjJK/euCegfn3i7jvqWeWyHe9y3Vgg==", + "version": "1.50.1", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.50.1.tgz", + "integrity": "sha512-ra9fsNWayuYumt+NiM069M6OkcRb1FZSK8bgi66AtpFoWkg2+y0bJSNmkFrWhMbEBbVKC/EruAHH3g0zmtwGmQ==", "dev": true, "bin": { "playwright-core": "cli.js" diff --git a/.tests/package.json b/.tests/package.json index 23fa8f80..8d1a0111 100644 --- a/.tests/package.json +++ b/.tests/package.json @@ -8,7 +8,7 @@ "author": "", "license": "ISC", "devDependencies": { - "@playwright/test": "^1.44.1", + "@playwright/test": "^1.50.1", "@types/node": "^20.17.16", "dotenv-cli": "^7.4.2" }, diff --git a/.tests/tests/jupytertest.spec.ts b/.tests/tests/jupytertest.spec.ts index 9ab3de27..8b4dcb80 100644 --- a/.tests/tests/jupytertest.spec.ts +++ b/.tests/tests/jupytertest.spec.ts @@ -8,8 +8,6 @@ const CSAE_WORKERS_COUNT = parseInt(process.env.CSAE_WORKERS_COUNT || '1'); const CSAE_PARALLEL_TESTS_COUNT = parseInt(process.env.CSAE_PARALLEL_TESTS_COUNT || '1'); const envPool = new EnvPool(Math.floor(CSAE_WORKERS_COUNT / CSAE_PARALLEL_TESTS_COUNT)); -const CSAE_CI_JOB_IDX = parseInt(process.env.CSAE_CI_JOB_IDX || '1'); -const CSAE_CI_JOB_COUNT = parseInt(process.env.CSAE_CI_JOB_COUNT || '1'); const CI_BRANCH = process.env.CI_BRANCH || 'main'; const IGNORE_BLACKLIST = process.env.IGNORE_BLACKLIST || 'false'; @@ -66,20 +64,13 @@ if(process.env.CSAE_NOTEBOOKS){ } } -const testCount = Math.ceil(files.length / CSAE_CI_JOB_COUNT); -for (let i = 0; i < testCount; i++) { - - const idx = i * CSAE_CI_JOB_COUNT + (CSAE_CI_JOB_IDX-1); - if (idx >= files.length) { - break; - } - const name = files[idx]; +for (let i = 0; i < files.length ; i++) { + const name = files[i]; if (name === '') { continue; } - test(`test ${i}: ${name}`, async ({ page }, testInfo) => { test.setTimeout(10800000); From 0926dda1a56f88b8c86f76b42b8341b9be5e9b5f Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Sat, 1 Feb 2025 19:20:51 +0530 Subject: [PATCH 20/35] pages permissions --- .github/workflows/nightly.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 19281f12..e5edc41f 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -29,8 +29,9 @@ defaults: jobs: playwright-tests: permissions: - contents: 'read' - id-token: 'write' + pages: write + contents: 'read' + id-token: 'write' strategy: fail-fast: false matrix: ${{ github.event.inputs.notebooks == '' && fromJson('{"idx":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}') || fromJson('{"idx":[1]}') }} @@ -91,7 +92,7 @@ jobs: run: | npx playwright install --with-deps - name: Find all Jupyter notebooks - run: find .. -name '*.ipynb' | tail -n 10 | sort > ./files.txt + run: find .. -name '*.ipynb' | head -n 10 | sort > ./files.txt - name: Start test environments id: start-test-environment run: npx ts-node pre_create_env.ts From e64a69bca532eead75a1dc1a80966aa93d09654b Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Sat, 1 Feb 2025 20:43:11 +0530 Subject: [PATCH 21/35] tail --- .github/workflows/nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index e5edc41f..514511ae 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -92,7 +92,7 @@ jobs: run: | npx playwright install --with-deps - name: Find all Jupyter notebooks - run: find .. -name '*.ipynb' | head -n 10 | sort > ./files.txt + run: find .. -name '*.ipynb' | tail -n 10 | sort > ./files.txt - name: Start test environments id: start-test-environment run: npx ts-node pre_create_env.ts From a13e9ac61e86cef92490b2a5e02e6763143e23ed Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Sat, 1 Feb 2025 21:49:07 +0530 Subject: [PATCH 22/35] permissions --- .github/workflows/nightly.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 514511ae..033a47cb 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -29,7 +29,6 @@ defaults: jobs: playwright-tests: permissions: - pages: write contents: 'read' id-token: 'write' strategy: @@ -113,6 +112,10 @@ jobs: merge-reports: # Merge reports after playwright-tests, even if some shards have failed if: ${{ !cancelled() }} + permissions: + pages: 'write' + contents: 'read' + id-token: 'write' needs: [playwright-tests] runs-on: ubuntu-latest steps: From 97de8b9cf8d60690a7363b4a5c68edd2bb04f92e Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Sun, 2 Feb 2025 12:00:11 +0530 Subject: [PATCH 23/35] pages --- .github/workflows/nightly.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 033a47cb..ccc116fa 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -139,10 +139,11 @@ jobs: name: html-report-attempt-${{ github.run_attempt }} path: .tests/playwright-report/ retention-days: 14 + - name: Setup pages uses: actions/configure-pages@v5 - name: Upload report to GitHub Pages - uses: actions/upload-artifact@v4 + uses: actions/upload-pages-artifact@v3 with: path: .tests/playwright-report/ - name: Deploy to GitHub Pages From dc2dad1e5791dc986f62caa2d422810a7d8019c7 Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Sun, 2 Feb 2025 21:42:47 +0530 Subject: [PATCH 24/35] testing the job description --- .github/workflows/nightly.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index ccc116fa..4040f95a 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -91,7 +91,7 @@ jobs: run: | npx playwright install --with-deps - name: Find all Jupyter notebooks - run: find .. -name '*.ipynb' | tail -n 10 | sort > ./files.txt + run: find .. -name '*.ipynb' | tail -n 2 | sort > ./files.txt - name: Start test environments id: start-test-environment run: npx ts-node pre_create_env.ts @@ -149,4 +149,9 @@ jobs: - name: Deploy to GitHub Pages id: deployment uses: actions/deploy-pages@v4 + - name: Job summary + run: | + echo "# Deployment result" >> $GITHUB_STEP_SUMMARY + View the [Playwright report](${{ steps.deployment.outputs.url }}) >> $GITHUB_STEP_SUMMARY + From 76aaabab6acc7fde1c400a6c0a2e990f799f30c6 Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Sun, 2 Feb 2025 22:03:35 +0530 Subject: [PATCH 25/35] job summary --- .github/workflows/nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 4040f95a..b84c4a9c 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -152,6 +152,6 @@ jobs: - name: Job summary run: | echo "# Deployment result" >> $GITHUB_STEP_SUMMARY - View the [Playwright report](${{ steps.deployment.outputs.url }}) >> $GITHUB_STEP_SUMMARY + echo "View the [Playwright report](${{ steps.deployment.outputs.url }})" >> $GITHUB_STEP_SUMMARY From b71292fcbd339a13fb9c292840b1e4a907845ff8 Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Sun, 2 Feb 2025 22:19:03 +0530 Subject: [PATCH 26/35] job summary --- .github/workflows/nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index b84c4a9c..38d684bf 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -152,6 +152,6 @@ jobs: - name: Job summary run: | echo "# Deployment result" >> $GITHUB_STEP_SUMMARY - echo "View the [Playwright report](${{ steps.deployment.outputs.url }})" >> $GITHUB_STEP_SUMMARY + echo "View the [Playwright report](https://teradata.github.io/jupyter-demos/)" >> $GITHUB_STEP_SUMMARY From aab8ea8d73ab944fb2e059895aa604736249a03e Mon Sep 17 00:00:00 2001 From: Nitin Sreeram Date: Sun, 2 Feb 2025 22:30:50 +0530 Subject: [PATCH 27/35] removing test code --- .github/workflows/nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 38d684bf..0e1ffa3a 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -91,7 +91,7 @@ jobs: run: | npx playwright install --with-deps - name: Find all Jupyter notebooks - run: find .. -name '*.ipynb' | tail -n 2 | sort > ./files.txt + run: find .. -name '*.ipynb' | sort > ./files.txt - name: Start test environments id: start-test-environment run: npx ts-node pre_create_env.ts From 1d71e3ae8874f04664956f9be13ec04da4993eec Mon Sep 17 00:00:00 2001 From: Nupur Lal Date: Tue, 4 Feb 2025 06:53:54 +0000 Subject: [PATCH 28/35] correction in numbering --- Recipes/ClearScape_Functions/Anova.ipynb | 2 +- Recipes/ClearScape_Functions/Antiselect.ipynb | 2 +- Recipes/ClearScape_Functions/CategoricalSummary.ipynb | 2 +- Recipes/ClearScape_Functions/ChiSq.ipynb | 2 +- Recipes/ClearScape_Functions/ColumnSummary.ipynb | 2 +- Recipes/ClearScape_Functions/FTest.ipynb | 2 +- Recipes/ClearScape_Functions/GetFutileColumns.ipynb | 2 +- Recipes/ClearScape_Functions/GetRowsWithMissingValues.ipynb | 2 +- Recipes/ClearScape_Functions/GetRowsWithoutMissingValues.ipynb | 2 +- Recipes/ClearScape_Functions/Histogram.ipynb | 2 +- Recipes/ClearScape_Functions/OutlierFilterFitandTransform.ipynb | 2 +- Recipes/ClearScape_Functions/QQNorm.ipynb | 2 +- Recipes/ClearScape_Functions/SimpleImputeFitandTransform.ipynb | 2 +- Recipes/ClearScape_Functions/UnivariateStatistics.ipynb | 2 +- Recipes/ClearScape_Functions/WhichMax.ipynb | 2 +- Recipes/ClearScape_Functions/WhichMin.ipynb | 2 +- 16 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Recipes/ClearScape_Functions/Anova.ipynb b/Recipes/ClearScape_Functions/Anova.ipynb index 573c5a59..d48c52c9 100644 --- a/Recipes/ClearScape_Functions/Anova.ipynb +++ b/Recipes/ClearScape_Functions/Anova.ipynb @@ -238,7 +238,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/Antiselect.ipynb b/Recipes/ClearScape_Functions/Antiselect.ipynb index 32df9caf..469b7fdf 100644 --- a/Recipes/ClearScape_Functions/Antiselect.ipynb +++ b/Recipes/ClearScape_Functions/Antiselect.ipynb @@ -206,7 +206,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/CategoricalSummary.ipynb b/Recipes/ClearScape_Functions/CategoricalSummary.ipynb index 58d00613..a1ff9bfd 100644 --- a/Recipes/ClearScape_Functions/CategoricalSummary.ipynb +++ b/Recipes/ClearScape_Functions/CategoricalSummary.ipynb @@ -254,7 +254,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/ChiSq.ipynb b/Recipes/ClearScape_Functions/ChiSq.ipynb index 3db4c882..5279fb36 100644 --- a/Recipes/ClearScape_Functions/ChiSq.ipynb +++ b/Recipes/ClearScape_Functions/ChiSq.ipynb @@ -270,7 +270,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/ColumnSummary.ipynb b/Recipes/ClearScape_Functions/ColumnSummary.ipynb index 9c99cc55..ec5697aa 100644 --- a/Recipes/ClearScape_Functions/ColumnSummary.ipynb +++ b/Recipes/ClearScape_Functions/ColumnSummary.ipynb @@ -233,7 +233,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/FTest.ipynb b/Recipes/ClearScape_Functions/FTest.ipynb index cac653a3..0e99c3ce 100644 --- a/Recipes/ClearScape_Functions/FTest.ipynb +++ b/Recipes/ClearScape_Functions/FTest.ipynb @@ -244,7 +244,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/GetFutileColumns.ipynb b/Recipes/ClearScape_Functions/GetFutileColumns.ipynb index 994bd7fa..e69554e9 100644 --- a/Recipes/ClearScape_Functions/GetFutileColumns.ipynb +++ b/Recipes/ClearScape_Functions/GetFutileColumns.ipynb @@ -235,7 +235,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/GetRowsWithMissingValues.ipynb b/Recipes/ClearScape_Functions/GetRowsWithMissingValues.ipynb index b9f80cf8..77c8270b 100644 --- a/Recipes/ClearScape_Functions/GetRowsWithMissingValues.ipynb +++ b/Recipes/ClearScape_Functions/GetRowsWithMissingValues.ipynb @@ -255,7 +255,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/GetRowsWithoutMissingValues.ipynb b/Recipes/ClearScape_Functions/GetRowsWithoutMissingValues.ipynb index 2b785b84..69138546 100644 --- a/Recipes/ClearScape_Functions/GetRowsWithoutMissingValues.ipynb +++ b/Recipes/ClearScape_Functions/GetRowsWithoutMissingValues.ipynb @@ -255,7 +255,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/Histogram.ipynb b/Recipes/ClearScape_Functions/Histogram.ipynb index f009ec26..213451b0 100644 --- a/Recipes/ClearScape_Functions/Histogram.ipynb +++ b/Recipes/ClearScape_Functions/Histogram.ipynb @@ -271,7 +271,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/OutlierFilterFitandTransform.ipynb b/Recipes/ClearScape_Functions/OutlierFilterFitandTransform.ipynb index edbe2373..4e02cdfd 100644 --- a/Recipes/ClearScape_Functions/OutlierFilterFitandTransform.ipynb +++ b/Recipes/ClearScape_Functions/OutlierFilterFitandTransform.ipynb @@ -269,7 +269,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/QQNorm.ipynb b/Recipes/ClearScape_Functions/QQNorm.ipynb index db5a4f93..033fbda5 100644 --- a/Recipes/ClearScape_Functions/QQNorm.ipynb +++ b/Recipes/ClearScape_Functions/QQNorm.ipynb @@ -321,7 +321,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/SimpleImputeFitandTransform.ipynb b/Recipes/ClearScape_Functions/SimpleImputeFitandTransform.ipynb index 4af81876..4f6f0f88 100644 --- a/Recipes/ClearScape_Functions/SimpleImputeFitandTransform.ipynb +++ b/Recipes/ClearScape_Functions/SimpleImputeFitandTransform.ipynb @@ -283,7 +283,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/UnivariateStatistics.ipynb b/Recipes/ClearScape_Functions/UnivariateStatistics.ipynb index baf8dda5..9233853a 100644 --- a/Recipes/ClearScape_Functions/UnivariateStatistics.ipynb +++ b/Recipes/ClearScape_Functions/UnivariateStatistics.ipynb @@ -246,7 +246,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/WhichMax.ipynb b/Recipes/ClearScape_Functions/WhichMax.ipynb index bb350ca3..2c1e2e65 100644 --- a/Recipes/ClearScape_Functions/WhichMax.ipynb +++ b/Recipes/ClearScape_Functions/WhichMax.ipynb @@ -200,7 +200,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { diff --git a/Recipes/ClearScape_Functions/WhichMin.ipynb b/Recipes/ClearScape_Functions/WhichMin.ipynb index 350c5303..b27e0253 100644 --- a/Recipes/ClearScape_Functions/WhichMin.ipynb +++ b/Recipes/ClearScape_Functions/WhichMin.ipynb @@ -200,7 +200,7 @@ "metadata": {}, "source": [ "
\n", - "2. Cleanup" + "3. Cleanup" ] }, { From 7c9cef2c9ca6b74b733cad39791b72956285ea48 Mon Sep 17 00:00:00 2001 From: dallasbowden Date: Tue, 4 Feb 2025 20:02:32 +0000 Subject: [PATCH 29/35] Moved functions to a second py file. --- ExperienceBot/ExperienceBot.ipynb | 181 +----------------------------- ExperienceBot/chat_helper_db.py | 161 ++++++++++++++++++++++++++ 2 files changed, 163 insertions(+), 179 deletions(-) create mode 100644 ExperienceBot/chat_helper_db.py diff --git a/ExperienceBot/ExperienceBot.ipynb b/ExperienceBot/ExperienceBot.ipynb index 3952af8b..462984cc 100644 --- a/ExperienceBot/ExperienceBot.ipynb +++ b/ExperienceBot/ExperienceBot.ipynb @@ -145,140 +145,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Function to extract content from a Jupyter notebook\n", - "def extract_notebook_content(file_path):\n", - " with open(file_path, 'r', encoding='utf-8') as f:\n", - " notebook_data = json.load(f)\n", - "\n", - " content = \"\"\n", - " for cell in notebook_data.get('cells', []):\n", - " if cell['cell_type'] == 'markdown':\n", - " # Clean markdown content by removing HTML tags\n", - " content += '\\n'.join(cell['source']) + '\\n\\n'\n", - " elif cell['cell_type'] == 'code':\n", - " # Format code properly\n", - " content += '```python\\n' + ''.join(cell['source']) + '\\n```\\n\\n'\n", - " return content\n", - "\n", - "# Function to remove HTML tags\n", - "def remove_html_tags(text):\n", - " \"\"\"Remove HTML tags from a string\"\"\"\n", - " clean = re.compile('<.*?>')\n", - " return re.sub(clean, '', text)\n", - "\n", - "# Function to split the notebook content into markdown and code\n", - "def split_ipynb_content(content):\n", - " # Regular expression to match code blocks\n", - " code_pattern = re.compile(r'```python(.*?)```', re.DOTALL)\n", - "\n", - " # Find all code blocks\n", - " code_blocks = code_pattern.findall(content)\n", - "\n", - " # Split the content by code blocks\n", - " parts = code_pattern.split(content)\n", - "\n", - " # Combine markdown and code blocks\n", - " result = []\n", - " for i, part in enumerate(parts):\n", - " if i % 2 == 0:\n", - " # This is a markdown part, remove HTML tags\n", - " clean_part = remove_html_tags(part)\n", - " result.append(('markdown', clean_part))\n", - " else:\n", - " # This is a code part\n", - " result.append(('code', part))\n", - "\n", - " return result\n", - "\n", - "# Function to clean and split notebook content\n", - "def clean_and_split_notebook_content(file_path):\n", - " \"\"\"Extract markdown content and clean up the notebook's information.\"\"\"\n", - " # Extract the content from the notebook file\n", - " content = extract_notebook_content(file_path)\n", - " \n", - " # Split content into markdown and code cells\n", - " split_content = split_ipynb_content(content)\n", - "\n", - " # Initialize a list to hold combined documents\n", - " combined_documents = []\n", - " current_markdown = \"\"\n", - " current_code = \"\"\n", - "\n", - " # Iterate through the split content to group markdown with code\n", - " for part_type, part in split_content:\n", - " if part_type == 'markdown':\n", - " # If we have code and markdown, combine them\n", - " if current_markdown or current_code:\n", - " combined_documents.append({\"markdown\": current_markdown, \"code\": current_code})\n", - " # Update current markdown to the new one\n", - " current_markdown = part\n", - " current_code = \"\" # Reset code, ready for next code block\n", - " elif part_type == 'code':\n", - " # Append the code to the current code block\n", - " current_code += part\n", - " \n", - " # Add the last document (markdown + code)\n", - " if current_markdown or current_code:\n", - " combined_documents.append({\"markdown\": current_markdown, \"code\": current_code})\n", - "\n", - " return combined_documents" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fe6c268a-7004-48f2-8cf4-fb401b363235", - "metadata": {}, - "outputs": [], - "source": [ - "def generate_emb():\n", - " # Load notebooks and clean them\n", - " path = '/home/jovyan/JupyterLabRoot/'\n", - " loader = DirectoryLoader(path, glob=\"**/*.ipynb\", loader_cls=NotebookLoader)\n", - " notebooks = loader.load()\n", - "\n", - " # Clean each notebook before processing it\n", - " cleaned_documents = []\n", - " for notebook in notebooks:\n", - " # Assuming notebook metadata contains file path\n", - " file_path = notebook.metadata.get(\"source\", \"Unknown\") # Adjust this as needed\n", - " cleaned_data = clean_and_split_notebook_content(file_path)\n", - "\n", - " # Convert cleaned data to documents, including the source file path\n", - " for data in cleaned_data:\n", - " if data['markdown'] or data['code']:\n", - " doc = Document(\n", - " page_content=f\"Markdown:\\n{data['markdown']} \\n\\nCode:\\n{data['code']}\",\n", - " metadata={\"source\": file_path} # Ensure the source file path is added\n", - " )\n", - " cleaned_documents.append(doc)\n", - "\n", - "\n", - " # Split text into manageable chunks\n", - " text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)\n", - " docs = text_splitter.split_documents(cleaned_documents)\n", - "\n", - "\n", - " # for count of token\n", - " from tiktoken import encoding_for_model\n", - "\n", - " def count_document_tokens(document, model_name=\"gpt-4o-mini\"):\n", - " encoder = encoding_for_model(model_name)\n", - " return len(encoder.encode(document.page_content))\n", - "\n", - " tiktokn = 0\n", - " for doc in cleaned_documents:\n", - " tiktokn = tiktokn + count_document_tokens(doc)\n", - "\n", - " print(\"total token from all the notebooks: \", tiktokn)\n", - "\n", - " # Create vector store using embeddings\n", - " embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\") \n", - " vector_store = FAISS.from_documents(docs, embeddings)\n", - "\n", - " # Save the index for reuse\n", - " vector_store.save_local(\"notebooks_index\")\n", - " return vector_store" + "from chat_helper_db import *" ] }, { @@ -355,7 +222,7 @@ "You are a helpful assistant. Use the following retrieved information from Jupyter notebooks to provide:\n", "1. A **clean and concise textual explanation** based on the question and notebook markdown.\n", "2. **Relevant Clean Python code** extracted from the notebooks' code cells that are related to the question. Please filter the code that is related to the query.\n", - "3. Extract the source documents\n", + "3. Extract the source documents.\n", "If no relevant information is found, politely say so.\n", "\n", "*Critical*: start by greeting only if user starts with greeting, just say, \"Hey there! 😊 Welcome to our chatbot!\"\n", @@ -389,50 +256,6 @@ ")" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "9d2efb7c-dfa6-4240-b3ae-e40337685705", - "metadata": {}, - "outputs": [], - "source": [ - "# Function to Query Chatbot\n", - "def query_chatbot(question):\n", - " # Query the chatbot using the chain\n", - " result = qa_chain.invoke(question)\n", - " answer = result[\"result\"]\n", - "\n", - " # Extract and format relevant source paths from source documents\n", - " source_docs = result.get(\"source_documents\", [])\n", - " sources = \"\\n\".join(set([doc.metadata.get(\"source\", \"Unknown\") for doc in source_docs]))\n", - "\n", - " return f\"\"\"\n", - "{answer}\n", - "\n", - "Reference Notebook(s):\n", - "{sources if sources else \"No source notebooks found.\"}\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b8e60ece-6bd2-495a-8d11-ec545c11be87", - "metadata": {}, - "outputs": [], - "source": [ - "def extract_answer_code_references(input_string):\n", - "\n", - " # Extract references and create JupyterLab-compatible links\n", - " references = re.findall(r'(/home/[^\\s]+)', input_string)\n", - " html_output = []\n", - " for i, ref in enumerate(references):\n", - " html_output.append(f' {i+1}. {ref.split(\"/\")[-1]} ')\n", - " \n", - " # return html_output\n", - " return \"\\n\\n\".join(html_output)" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/ExperienceBot/chat_helper_db.py b/ExperienceBot/chat_helper_db.py new file mode 100644 index 00000000..af8bc6e3 --- /dev/null +++ b/ExperienceBot/chat_helper_db.py @@ -0,0 +1,161 @@ +# ---------------------------------------------------------------- +# Function to extract content from a Jupyter notebook +def extract_notebook_content(file_path): + with open(file_path, 'r', encoding='utf-8') as f: + notebook_data = json.load(f) + + content = "" + for cell in notebook_data.get('cells', []): + if cell['cell_type'] == 'markdown': + # Clean markdown content by removing HTML tags + content += '\n'.join(cell['source']) + '\n\n' + elif cell['cell_type'] == 'code': + # Format code properly + content += '```python\n' + ''.join(cell['source']) + '\n```\n\n' + return content + +# Function to remove HTML tags +def remove_html_tags(text): + """Remove HTML tags from a string""" + clean = re.compile('<.*?>') + return re.sub(clean, '', text) + +# Function to split the notebook content into markdown and code +def split_ipynb_content(content): + # Regular expression to match code blocks + code_pattern = re.compile(r'```python(.*?)```', re.DOTALL) + + # Find all code blocks + code_blocks = code_pattern.findall(content) + + # Split the content by code blocks + parts = code_pattern.split(content) + + # Combine markdown and code blocks + result = [] + for i, part in enumerate(parts): + if i % 2 == 0: + # This is a markdown part, remove HTML tags + clean_part = remove_html_tags(part) + result.append(('markdown', clean_part)) + else: + # This is a code part + result.append(('code', part)) + + return result + +# Function to clean and split notebook content +def clean_and_split_notebook_content(file_path): + """Extract markdown content and clean up the notebook's information.""" + # Extract the content from the notebook file + content = extract_notebook_content(file_path) + + # Split content into markdown and code cells + split_content = split_ipynb_content(content) + + # Initialize a list to hold combined documents + combined_documents = [] + current_markdown = "" + current_code = "" + + # Iterate through the split content to group markdown with code + for part_type, part in split_content: + if part_type == 'markdown': + # If we have code and markdown, combine them + if current_markdown or current_code: + combined_documents.append({"markdown": current_markdown, "code": current_code}) + # Update current markdown to the new one + current_markdown = part + current_code = "" # Reset code, ready for next code block + elif part_type == 'code': + # Append the code to the current code block + current_code += part + + # Add the last document (markdown + code) + if current_markdown or current_code: + combined_documents.append({"markdown": current_markdown, "code": current_code}) + + return combined_documents + + +def generate_emb(): + # Load notebooks and clean them + path = '/home/jovyan/JupyterLabRoot/' + loader = DirectoryLoader(path, glob="**/*.ipynb", loader_cls=NotebookLoader) + notebooks = loader.load() + + # Clean each notebook before processing it + cleaned_documents = [] + for notebook in notebooks: + # Assuming notebook metadata contains file path + file_path = notebook.metadata.get("source", "Unknown") # Adjust this as needed + cleaned_data = clean_and_split_notebook_content(file_path) + + # Convert cleaned data to documents, including the source file path + for data in cleaned_data: + if data['markdown'] or data['code']: + doc = Document( + page_content=f"Markdown:\n{data['markdown']} \n\nCode:\n{data['code']}", + metadata={"source": file_path} # Ensure the source file path is added + ) + cleaned_documents.append(doc) + + + # Split text into manageable chunks + text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) + docs = text_splitter.split_documents(cleaned_documents) + + + # for count of token + from tiktoken import encoding_for_model + + def count_document_tokens(document, model_name="gpt-4o-mini"): + encoder = encoding_for_model(model_name) + return len(encoder.encode(document.page_content)) + + tiktokn = 0 + for doc in cleaned_documents: + tiktokn = tiktokn + count_document_tokens(doc) + + print("total token from all the notebooks: ", tiktokn) + + # Create vector store using embeddings + embeddings = OpenAIEmbeddings(model="text-embedding-3-large") + vector_store = FAISS.from_documents(docs, embeddings) + + # Save the index for reuse + vector_store.save_local("notebooks_index") + return vector_store + +# ---------------------------------------------------------------- + +# Function to Query Chatbot +def query_chatbot(question): + # Query the chatbot using the chain + result = qa_chain.invoke(question) + answer = result["result"] + + # Extract and format relevant source paths from source documents + source_docs = result.get("source_documents", []) + sources = "\n".join(set([doc.metadata.get("source", "Unknown") for doc in source_docs])) + + return f""" +{answer} + +Reference Notebook(s): +{sources if sources else "No source notebooks found."} +""" + +def extract_answer_code_references(input_string): + + # Extract references and create JupyterLab-compatible links + references = re.findall(r'(/home/[^\s]+)', input_string) + html_output = [] + for i, ref in enumerate(references): + html_output.append(f' {i+1}. {ref.split("/")[-1]} ') + + # return html_output + return "\n\n".join(html_output) + + +# ---------------------------------------------------------------- From 4fe4f8f1526775e39f119fefa0b129b0a5e6672c Mon Sep 17 00:00:00 2001 From: dallasbowden Date: Tue, 4 Feb 2025 21:07:26 +0000 Subject: [PATCH 30/35] Added both yaml files. --- .CSAE_Bot.yaml | 7 ------- ExperienceBot/.ExperienceBot.yaml | 8 ++++++++ ExperienceBot/.ExperienceBot_NoCode.yaml | 5 +++++ 3 files changed, 13 insertions(+), 7 deletions(-) delete mode 100644 .CSAE_Bot.yaml create mode 100644 ExperienceBot/.ExperienceBot.yaml create mode 100644 ExperienceBot/.ExperienceBot_NoCode.yaml diff --git a/.CSAE_Bot.yaml b/.CSAE_Bot.yaml deleted file mode 100644 index 8803bd37..00000000 --- a/.CSAE_Bot.yaml +++ /dev/null @@ -1,7 +0,0 @@ -inputs: - - type: env - value: 'JUPYTER_NOTEBOOK_CI_OPEN_AI_KEY' - cell: 8 - - type: text - value: 'exit' - prompt: "Enter your query here. To stop, type 'exit'. Query:" diff --git a/ExperienceBot/.ExperienceBot.yaml b/ExperienceBot/.ExperienceBot.yaml new file mode 100644 index 00000000..4a73eee1 --- /dev/null +++ b/ExperienceBot/.ExperienceBot.yaml @@ -0,0 +1,8 @@ +inputs: + - type: env + value: 'JUPYTER_NOTEBOOK_CI_OPEN_AI_KEY' + cell: 10 + -type: text + value: 'no' + prompt: "Do you want to generate embeddings? ('yes'/'no'):" + \ No newline at end of file diff --git a/ExperienceBot/.ExperienceBot_NoCode.yaml b/ExperienceBot/.ExperienceBot_NoCode.yaml new file mode 100644 index 00000000..750796f3 --- /dev/null +++ b/ExperienceBot/.ExperienceBot_NoCode.yaml @@ -0,0 +1,5 @@ +inputs: + - type: env + value: 'JUPYTER_NOTEBOOK_CI_OPEN_AI_KEY' + cell: 8 + \ No newline at end of file From 18d3117cb96d7fe7894a8730ded265e492e8776e Mon Sep 17 00:00:00 2001 From: DallasBowden <149392066+DallasBowden@users.noreply.github.com> Date: Tue, 4 Feb 2025 16:20:40 -0500 Subject: [PATCH 31/35] Update .ExperienceBot.yaml Updated cell number for ai key. --- ExperienceBot/.ExperienceBot.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ExperienceBot/.ExperienceBot.yaml b/ExperienceBot/.ExperienceBot.yaml index 4a73eee1..c5f7b34a 100644 --- a/ExperienceBot/.ExperienceBot.yaml +++ b/ExperienceBot/.ExperienceBot.yaml @@ -1,8 +1,8 @@ inputs: - type: env value: 'JUPYTER_NOTEBOOK_CI_OPEN_AI_KEY' - cell: 10 + cell: 9 -type: text value: 'no' prompt: "Do you want to generate embeddings? ('yes'/'no'):" - \ No newline at end of file + From 7bb7ee0539d356aa7f89d642d786a5edb30095cb Mon Sep 17 00:00:00 2001 From: DallasBowden <149392066+DallasBowden@users.noreply.github.com> Date: Tue, 4 Feb 2025 16:35:27 -0500 Subject: [PATCH 32/35] Update .ExperienceBot.yaml Updated .yaml file. --- ExperienceBot/.ExperienceBot.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ExperienceBot/.ExperienceBot.yaml b/ExperienceBot/.ExperienceBot.yaml index c5f7b34a..df7ceaf5 100644 --- a/ExperienceBot/.ExperienceBot.yaml +++ b/ExperienceBot/.ExperienceBot.yaml @@ -1,7 +1,7 @@ inputs: - type: env value: 'JUPYTER_NOTEBOOK_CI_OPEN_AI_KEY' - cell: 9 + cell: 11 -type: text value: 'no' prompt: "Do you want to generate embeddings? ('yes'/'no'):" From 2b74a44c4e0b9ce80294408fb9a0e0bf050a0401 Mon Sep 17 00:00:00 2001 From: DallasBowden <149392066+DallasBowden@users.noreply.github.com> Date: Tue, 4 Feb 2025 16:46:20 -0500 Subject: [PATCH 33/35] Update .ExperienceBot.yaml There has to be a space between - and type. --- ExperienceBot/.ExperienceBot.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ExperienceBot/.ExperienceBot.yaml b/ExperienceBot/.ExperienceBot.yaml index df7ceaf5..1351047d 100644 --- a/ExperienceBot/.ExperienceBot.yaml +++ b/ExperienceBot/.ExperienceBot.yaml @@ -1,8 +1,8 @@ inputs: - type: env value: 'JUPYTER_NOTEBOOK_CI_OPEN_AI_KEY' - cell: 11 - -type: text + cell: 10 + - type: text value: 'no' prompt: "Do you want to generate embeddings? ('yes'/'no'):" From 9db2b60a0fc18ac02e5a65d1002e690c0eb891c9 Mon Sep 17 00:00:00 2001 From: dallasbowden Date: Wed, 5 Feb 2025 21:23:35 +0000 Subject: [PATCH 34/35] Updated the pip install and the kernel restart instructions. --- ...2_ModelOps_Model_Factory_REST_Python.ipynb | 77 ++++++++++++------- 1 file changed, 50 insertions(+), 27 deletions(-) diff --git a/ModelOps/12_ModelOps_Model_Factory_REST_Python.ipynb b/ModelOps/12_ModelOps_Model_Factory_REST_Python.ipynb index 1249d7d4..6c913417 100644 --- a/ModelOps/12_ModelOps_Model_Factory_REST_Python.ipynb +++ b/ModelOps/12_ModelOps_Model_Factory_REST_Python.ipynb @@ -86,28 +86,27 @@ "source": [ "

1.1 Libraries installation

\n", "\n", - "

A restart of the Kernel is needed to confirm changes. We use -q parameter for a non-verbose log of the installation command, you may remove this parameter if you want to know all the steps of the pip installation.

" + "

We use %%capture to supress the installation details of the installation command. You can remove this magic command if you want to know all the steps of the pip installation.

" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "f79b2e2f-a7d3-4cab-a447-8d61e4009b93", "metadata": {}, "outputs": [], "source": [ - "# %%capture\n", - "# !pip install -q teradataml==20.0.0.2 teradatamodelops==7.0.6 matplotlib==3.8.2" + "%%capture\n", + "!pip install -q teradataml==20.0.0.2 teradatamodelops==7.0.6 matplotlib==3.8.2" ] }, { "cell_type": "markdown", - "id": "d5149df7-a95f-43cc-846c-ec7b47e8d8e3", + "id": "07ce8839-3cd5-45e9-af6f-6920010e34fa", "metadata": {}, "source": [ - "
\n", - "

Note: The above statements may need to be uncommented if you run the notebooks on a platform other than ClearScape Analytics Experience that does not have the mentioned versions of the libraries installed. If you uncomment those installs, be sure to restart the kernel after executing those lines to bring the installed libraries into memory. The simplest way to restart the Kernel is by typing zero zero: 0 0

\n", - "
" + "

A restart of the Kernel is needed to bring these libraries into the session.

\n", + "

Hint:An easy way to restart the kernel is to type zero zero ( 0 0 and then press Enter).

" ] }, { @@ -121,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "5dbc3d39-4f2b-4765-9d49-abc5c26b1a22", "metadata": { "tags": [] @@ -163,16 +162,24 @@ "metadata": {}, "source": [ "
\n", - "

Important Note: Replace to use your ModelOps URL. You can copy the HOST address from the Connection details on your dashboard for the url

\n", + "

Important Note: Use the HOST string for this environment. You can copy the name from the Connection details on your dashboard.

\n", "
" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "90c06ba6-22b7-431a-9c48-89a38070123a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Host NAME of the machine for ModelOps: dallas47-88vgt0b5i55ikpx7.env.ci.clearscape.teradata.com\n" + ] + } + ], "source": [ "# Example modelops-apr24-ver20-63z6fpyuh8lhkz39.env.clearscape.teradata.com\n", "hostname = input(\"Host NAME of the machine for ModelOps:\")" @@ -180,12 +187,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "fc95baff-ccb9-4650-aa13-b15d919e6f78", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://web-dallas47-88vgt0b5i55ikpx7.env.ci.clearscape.teradata.com/modelops\n" + ] + } + ], "source": [ "# base domain for ModelOps\n", "url = \"https://\" + \"web-\"+ hostname + \"/modelops\"\n", @@ -210,12 +225,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "a65ffe4b-3be1-4ffb-815d-5c5abdf563c3", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Enter user's password: ········································\n" + ] + } + ], "source": [ "# secret for account \"service-account-modelops-cli\"\n", "#secret = \"\"\n", @@ -271,7 +294,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "08cfb70b-2b86-49c8-8c40-44558ac4ba2f", "metadata": { "tags": [] @@ -303,7 +326,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "3ddb2589-b2e5-4a96-b8a4-3ce00f9d3ea2", "metadata": { "tags": [] @@ -345,12 +368,12 @@ "id": "1aa69d39-56e6-409c-b3c7-25a653a050a2", "metadata": {}, "source": [ - "

3.1 Get a token from Keycloak through the secret credential

" + "

3.1 Functions to get a token from Keycloak through the secret credential

" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "5fe18031-abe9-4ed1-94f8-b92d9b761f3f", "metadata": { "tags": [] @@ -442,7 +465,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "3eb059a8-f2ac-4999-9046-920b74134bbf", "metadata": { "tags": [] @@ -511,7 +534,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "ebedfc24-cb4b-467e-832a-1f6bf8d954ff", "metadata": { "tags": [] @@ -576,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "4fa390bf-c3c4-48ec-b088-667addbf26d2", "metadata": { "tags": [] @@ -633,7 +656,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "a7585e7b-88a1-42f9-aba5-e562a7857975", "metadata": { "tags": [] @@ -678,7 +701,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "c295c4c0-94e6-491c-b8dd-ff0c7f39da16", "metadata": { "tags": [] @@ -764,7 +787,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "4b6d9aa2-d178-423b-ae67-5f58f5b59ff7", "metadata": { "tags": [] @@ -838,7 +861,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "5a27e50d-c136-411d-9b61-2c20ca71ef24", "metadata": { "tags": [] From 9feffbfb85e815d4bd54925845adc8279ec8bb8c Mon Sep 17 00:00:00 2001 From: DallasBowden <149392066+DallasBowden@users.noreply.github.com> Date: Wed, 5 Feb 2025 16:28:27 -0500 Subject: [PATCH 35/35] Update skip_files.txt Add ../ModelOps/12_ModelOps_Model_Factory_REST_Python.ipynb --- .tests/skip_files.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.tests/skip_files.txt b/.tests/skip_files.txt index 84638500..e9116bd2 100644 --- a/.tests/skip_files.txt +++ b/.tests/skip_files.txt @@ -15,4 +15,5 @@ ../UseCases/Language_Models_InVantage/Initialization_and_Model_Load.ipynb ../UseCases/Signal_Processing_and_Classification/Signal_Processing_and_Classification_Python.ipynb ../UseCases/AnomalyDetection_OutstandingAmount_FS/AnomalyDetection_OutstandingAmount_FeatureStore.ipynb -../UseCases/Recommendations_product_search/Recommendations_product_search_OpenSource_Python.ipynb \ No newline at end of file +../UseCases/Recommendations_product_search/Recommendations_product_search_OpenSource_Python.ipynb +../ModelOps/12_ModelOps_Model_Factory_REST_Python.ipynb