From 72c92b1a5461484eaa179ec800c535bac80ada8d Mon Sep 17 00:00:00 2001 From: Stan Girard Date: Thu, 1 Jun 2023 16:01:27 +0200 Subject: [PATCH] VertexAI Google Cloud Palm2 Support (#226) * feat(bard): added * docs(readme): update * chore(print): removed --- .backend_env.example | 4 +++- .gitignore | 1 + README.md | 7 +++++++ backend/Dockerfile | 7 +++++-- backend/api.py | 9 +++++---- backend/auth_handler.py | 1 - backend/llm/qa.py | 6 +++++- backend/parsers/audio.py | 10 +++++----- backend/requirements.txt | 5 +++-- docker-compose.yml | 1 + frontend/app/chat/page.tsx | 1 + 11 files changed, 36 insertions(+), 16 deletions(-) diff --git a/.backend_env.example b/.backend_env.example index b0e13b1e78ee..8f74cc00d9e1 100644 --- a/.backend_env.example +++ b/.backend_env.example @@ -3,4 +3,6 @@ SUPABASE_SERVICE_KEY=eyXXXXX OPENAI_API_KEY=sk-XXXXXX ANTHROPIC_API_KEY=XXXXXX JWT_SECRET_KEY=Found in Supabase settings in the API tab -AUTHENTICATE="true" \ No newline at end of file +AUTHENTICATE=true +GOOGLE_APPLICATION_CREDENTIALS=/code/application_default_credentials.json +GOOGLE_CLOUD_PROJECT=XXXXX to be changed with your GCP id \ No newline at end of file diff --git a/.gitignore b/.gitignore index d00146441572..1b80652eb5bd 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,4 @@ streamlit-demo/.streamlit/secrets.toml .frontend_env backend/pandoc-* **/.pandoc-* +backend/application_default_credentials.json diff --git a/README.md b/README.md index 21ab6aacd44b..24ad5cdb078a 100644 --- a/README.md +++ b/README.md @@ -88,8 +88,15 @@ cp .frontend_env.example frontend/.env - **Step 3**: Update the `backend/.env` and `frontend/.env` file > _Your `supabase_service_key` can be found in your Supabase dashboard under Project Settings -> API. Use the `anon` `public` key found in the `Project API keys` section._ + + > _Your `JWT_SECRET_KEY`can be found in your supabase settings under Project Settings -> JWT Settings -> JWT Secret_ +> _To activate vertexAI with PaLM from GCP follow the instructions [here](https://python.langchain.com/en/latest/modules/models/llms/integrations/google_vertex_ai_palm.html) and update `bacend/.env`- It is an advanced feature, please be expert in GCP before trying to use it_ + +- [ ] Change variables in `backend/.env` +- [ ] Change variables in `frontend/.env` + - **Step 4**: Run the following migration scripts on the Supabase database via the web interface (SQL Editor -> `New query`) [Migration Script 1](scripts/supabase_new_store_documents.sql) diff --git a/backend/Dockerfile b/backend/Dockerfile index 0a3e2906d40f..f47a37363606 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,4 +1,7 @@ -FROM python:3.11 +FROM python:3.11-buster + +# Install GEOS library +RUN apt-get update && apt-get install -y libgeos-dev WORKDIR /code @@ -8,4 +11,4 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt --timeout 100 COPY . /code/ -CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5050"] +CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5050"] \ No newline at end of file diff --git a/backend/api.py b/backend/api.py index 21927a532e27..1b178e52179f 100644 --- a/backend/api.py +++ b/backend/api.py @@ -1,12 +1,14 @@ import os import shutil +import time from tempfile import SpooledTemporaryFile from typing import Annotated, List, Tuple import pypandoc from auth_bearer import JWTBearer from crawl.crawler import CrawlWebsite -from fastapi import Depends, FastAPI, File, Header, HTTPException, UploadFile +from fastapi import (Depends, FastAPI, File, Header, HTTPException, Request, + UploadFile) from fastapi.middleware.cors import CORSMiddleware from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer from llm.qa import get_qa_llm @@ -25,13 +27,11 @@ from parsers.powerpoint import process_powerpoint from parsers.txt import process_txt from pydantic import BaseModel -from utils import ChatMessage, CommonsDep, similarity_search - from supabase import Client +from utils import ChatMessage, CommonsDep, similarity_search logger = get_logger(__name__) - app = FastAPI() origins = [ @@ -49,6 +49,7 @@ ) + @app.on_event("startup") async def startup_event(): pypandoc.download_pandoc() diff --git a/backend/auth_handler.py b/backend/auth_handler.py index a9817563e7b4..7f0b58b1557c 100644 --- a/backend/auth_handler.py +++ b/backend/auth_handler.py @@ -23,7 +23,6 @@ def decode_access_token(token: str): payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM], options={"verify_aud": False}) return payload except JWTError as e: - print(f"JWTError: {str(e)}") return None def get_user_email_from_token(token: str): diff --git a/backend/llm/qa.py b/backend/llm/qa.py index 9fd27855dfdf..9575f356021a 100644 --- a/backend/llm/qa.py +++ b/backend/llm/qa.py @@ -2,10 +2,11 @@ from typing import Any, List from langchain.chains import ConversationalRetrievalChain -from langchain.chat_models import ChatOpenAI +from langchain.chat_models import ChatOpenAI, ChatVertexAI from langchain.chat_models.anthropic import ChatAnthropic from langchain.docstore.document import Document from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.llms import VertexAI from langchain.memory import ConversationBufferMemory from langchain.vectorstores import SupabaseVectorStore from llm import LANGUAGE_PROMPT @@ -94,6 +95,9 @@ def get_qa_llm(chat_message: ChatMessage, user_id: str): temperature=chat_message.temperature, max_tokens=chat_message.max_tokens), vector_store.as_retriever(), memory=memory, verbose=True, max_tokens_limit=1024) + elif chat_message.model.startswith("vertex"): + qa = ConversationalRetrievalChain.from_llm( + ChatVertexAI(), vector_store.as_retriever(), memory=memory, verbose=False, max_tokens_limit=1024) elif anthropic_api_key and chat_message.model.startswith("claude"): qa = ConversationalRetrievalChain.from_llm( ChatAnthropic( diff --git a/backend/parsers/audio.py b/backend/parsers/audio.py index efc819942bde..8200d266d743 100644 --- a/backend/parsers/audio.py +++ b/backend/parsers/audio.py @@ -1,15 +1,16 @@ import os -from tempfile import NamedTemporaryFile import tempfile -from io import BytesIO import time +from io import BytesIO +from tempfile import NamedTemporaryFile + import openai +from fastapi import UploadFile from langchain.document_loaders import TextLoader from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.schema import Document from langchain.text_splitter import RecursiveCharacterTextSplitter from utils import compute_sha1_from_content, documents_vector_store -from langchain.schema import Document -from fastapi import UploadFile # # Create a function to transcribe audio using Whisper # def _transcribe_audio(api_key, audio_file, stats_db): @@ -52,7 +53,6 @@ async def process_audio(upload_file: UploadFile, stats_db): file_sha = compute_sha1_from_content(transcript.text.encode("utf-8")) file_size = len(transcript.text.encode("utf-8")) - print(file_size) # Load chunk size and overlap from sidebar chunk_size = 500 diff --git a/backend/requirements.txt b/backend/requirements.txt index 588662b0d1bf..19fb0f577fec 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,4 +1,4 @@ -langchain==0.0.166 +langchain==0.0.187 Markdown==3.4.3 openai==0.27.6 pdf2image==1.16.3 @@ -15,4 +15,5 @@ uvicorn==0.22.0 pypandoc==1.11 docx2txt==0.8 guidance==0.0.53 -python-jose==3.3.0 \ No newline at end of file +python-jose==3.3.0 +google_cloud_aiplatform==1.25.0 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 6d4d1b1454f4..c565f51055f2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,5 +25,6 @@ services: restart: always volumes: - ./backend/:/code/ + - ~/.config/gcloud:/root/.config/gcloud ports: - 5050:5050 \ No newline at end of file diff --git a/frontend/app/chat/page.tsx b/frontend/app/chat/page.tsx index d9e42ea7db8e..4856175ab661 100644 --- a/frontend/app/chat/page.tsx +++ b/frontend/app/chat/page.tsx @@ -159,6 +159,7 @@ export default function ChatPage() { > +