From e9c2b8d5ce116a52ea8dcf34f5bede4977528dca Mon Sep 17 00:00:00 2001 From: yingying Date: Mon, 15 Apr 2024 12:44:29 +0800 Subject: [PATCH 01/16] feat: init git loader --- .gitignore | 3 ++- app/api/chat/retrieval/route.ts | 5 +---- server/data_class.py | 8 +++++++- server/main.py | 16 ++++------------ server/rag/retrieval.py | 33 ++++++++++++++++++++++++++------- server/routers/github.py | 2 +- server/routers/rag.py | 20 ++++++++++++++++++++ server/tools/issue.py | 1 - 8 files changed, 61 insertions(+), 27 deletions(-) create mode 100644 server/routers/rag.py diff --git a/.gitignore b/.gitignore index 40e0ea72..7d24465c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. __pycache__/ -.pyc +*.pyc # dependencies /node_modules /.pnp @@ -39,3 +39,4 @@ next-env.d.ts .yarn /server/.aws-sam/* .aws-sam/* + diff --git a/app/api/chat/retrieval/route.ts b/app/api/chat/retrieval/route.ts index 33094a3d..b0d5d4f1 100644 --- a/app/api/chat/retrieval/route.ts +++ b/app/api/chat/retrieval/route.ts @@ -7,10 +7,7 @@ import { ChatOpenAI } from 'langchain/chat_models/openai'; import { PromptTemplate } from 'langchain/prompts'; import { SupabaseVectorStore } from 'langchain/vectorstores/supabase'; import { Document } from 'langchain/document'; -import { - RunnableSequence, - RunnablePassthrough, -} from 'langchain/schema/runnable'; +import { RunnableSequence } from 'langchain/schema/runnable'; import { BytesOutputParser, StringOutputParser, diff --git a/server/data_class.py b/server/data_class.py index 8ea7fd8f..a5389b69 100644 --- a/server/data_class.py +++ b/server/data_class.py @@ -1,3 +1,4 @@ +from typing import Optional from pydantic import BaseModel @@ -17,4 +18,9 @@ class ChatData(BaseModel): class ExecuteMessage(BaseModel): type: str repo: str - path: str \ No newline at end of file + path: str + +class GitRepo(BaseModel): + repo_name: str + path: Optional[str] = None + branch: Optional[str] = None diff --git a/server/main.py b/server/main.py index 1b4e6785..f68220d9 100644 --- a/server/main.py +++ b/server/main.py @@ -1,5 +1,5 @@ import os -from rag import retrieval + import uvicorn from fastapi import FastAPI from fastapi.responses import StreamingResponse @@ -11,7 +11,7 @@ from data_class import ChatData # Import fastapi routers -from routers import health_checker, github +from routers import health_checker, github, rag open_api_key = get_env_variable("OPENAI_API_KEY") is_dev = bool(get_env_variable("IS_DEV")) @@ -33,25 +33,17 @@ app.include_router(health_checker.router) app.include_router(github.router) - +app.include_router(rag.router) @app.post("/api/chat/stream", response_class=StreamingResponse) def run_agent_chat(input_data: ChatData): result = stream.agent_chat(input_data, open_api_key) return StreamingResponse(result, media_type="text/event-stream") -@app.post("/api/rag/add_knowledge") -def add_knowledge(): - data=retrieval.add_knowledge() - return data -@app.post("/api/rag/search_knowledge") -def search_knowledge(query: str): - data=retrieval.search_knowledge(query) - return data if __name__ == "__main__": if is_dev: uvicorn.run("main:app", host="0.0.0.0", port=int(os.environ.get("PORT", "8080")), reload=True) else: - uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", "8080"))) \ No newline at end of file + uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", "8080"))) diff --git a/server/rag/retrieval.py b/server/rag/retrieval.py index 2c48f163..1f0d486f 100644 --- a/server/rag/retrieval.py +++ b/server/rag/retrieval.py @@ -5,13 +5,18 @@ from langchain_text_splitters import CharacterTextSplitter from langchain_community.vectorstores import SupabaseVectorStore from db.supabase.client import get_client +from data_class import GitRepo from uilts.env import get_env_variable +from langchain_community.document_loaders import GitLoader +from github import Github, Repository supabase_url = get_env_variable("SUPABASE_URL") supabase_key = get_env_variable("SUPABASE_SERVICE_KEY") +github_token = get_env_variable("GITHUB_TOKEN") table_name="antd_knowledge" query_name="match_antd_knowledge" chunk_size=500 +g = Github() def convert_document_to_dict(document): return { @@ -32,10 +37,26 @@ def init_retriever(): return db.as_retriever() -def add_knowledge(): - current_dir = os.path.dirname(os.path.abspath(__file__)) - target_file_path = os.path.join(current_dir, "../docs/test.md") - loader = TextLoader(target_file_path) +def get_repo_info(repo_name: str): + try: + repo = g.get_repo(repo_name) + return repo + except Exception as e: + print(f"An error occurred: {e}") + return None + +def add_knowledge(repo: GitRepo): + repo_info: Repository = get_repo_info(repo.repo_name) + if not repo_info: + return json.dumps({ + "success": False, + "message": "Invalid repository name!" + }) + loader=GitLoader( + clone_url=repo_info.html_url, + repo_path=repo.path, + branch=repo.branch or repo_info.default_branch, + ) documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) docs = text_splitter.split_documents(documents) @@ -45,7 +66,7 @@ def add_knowledge(): SupabaseVectorStore.from_documents( docs, embeddings, - client=supabase, + client=get_client(), table_name=table_name, query_name=query_name, chunk_size=chunk_size, @@ -60,8 +81,6 @@ def add_knowledge(): "message": str(e) }) - - def search_knowledge(query: str): retriever = init_retriever() docs = retriever.get_relevant_documents(query) diff --git a/server/routers/github.py b/server/routers/github.py index 474ddd26..647852e2 100644 --- a/server/routers/github.py +++ b/server/routers/github.py @@ -18,7 +18,7 @@ router = APIRouter( prefix="/api/github", - tags=["health_checkers"], + tags=["github"], responses={404: {"description": "Not found"}}, ) diff --git a/server/routers/rag.py b/server/routers/rag.py new file mode 100644 index 00000000..a0789c81 --- /dev/null +++ b/server/routers/rag.py @@ -0,0 +1,20 @@ +from fastapi import APIRouter +from rag import retrieval +from data_class import GitRepo + +router = APIRouter( + prefix="/api", + tags=["rag"], + responses={404: {"description": "Not found"}}, +) + + +@router.post("/rag/add_knowledge") +def add_knowledge(repo: GitRepo): + data=retrieval.add_knowledge(repo) + return data + +@router.post("/rag/search_knowledge") +def search_knowledge(query: str): + data=retrieval.search_knowledge(query) + return data diff --git a/server/tools/issue.py b/server/tools/issue.py index 0a4342fc..bc13e660 100644 --- a/server/tools/issue.py +++ b/server/tools/issue.py @@ -2,7 +2,6 @@ from typing import Optional from github import Github from langchain.tools import tool -from uilts.env import get_env_variable DEFAULT_REPO_NAME = "ant-design/ant-design" From de7f9f4549705c705cec32a02617db7d021e6314 Mon Sep 17 00:00:00 2001 From: yingying Date: Mon, 15 Apr 2024 19:31:29 +0800 Subject: [PATCH 02/16] feat: add knowledge form s3 --- .gitignore | 2 + server/data_class.py | 7 ++-- server/rag/retrieval.py | 86 +++++++++++++++++--------------------- server/requirements.txt | 3 +- server/routers/rag.py | 6 +-- server/tools/issue.py | 6 +-- server/tools/sourcecode.py | 5 +-- 7 files changed, 53 insertions(+), 62 deletions(-) diff --git a/.gitignore b/.gitignore index 7d24465c..8ad9c20e 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ __pycache__/ /.pnp .pnp.js +server/temp/ + # testing /coverage diff --git a/server/data_class.py b/server/data_class.py index a5389b69..28c04f6a 100644 --- a/server/data_class.py +++ b/server/data_class.py @@ -20,7 +20,6 @@ class ExecuteMessage(BaseModel): repo: str path: str -class GitRepo(BaseModel): - repo_name: str - path: Optional[str] = None - branch: Optional[str] = None +class S3Config(BaseModel): + s3_bucket: str + file_path: Optional[str] = None diff --git a/server/rag/retrieval.py b/server/rag/retrieval.py index 1f0d486f..c9f5f3d4 100644 --- a/server/rag/retrieval.py +++ b/server/rag/retrieval.py @@ -1,22 +1,23 @@ -import os import json -from langchain_community.document_loaders import TextLoader +import boto3 from langchain_openai import OpenAIEmbeddings from langchain_text_splitters import CharacterTextSplitter from langchain_community.vectorstores import SupabaseVectorStore from db.supabase.client import get_client -from data_class import GitRepo +from data_class import S3Config from uilts.env import get_env_variable -from langchain_community.document_loaders import GitLoader -from github import Github, Repository +from langchain_community.document_loaders import S3DirectoryLoader + supabase_url = get_env_variable("SUPABASE_URL") supabase_key = get_env_variable("SUPABASE_SERVICE_KEY") -github_token = get_env_variable("GITHUB_TOKEN") +aws_access_key_id=get_env_variable("AWS_ACCESS_KEY_ID") +aws_secret_access_key=get_env_variable("AWS_SECRET_ACCESS_KEY") + table_name="antd_knowledge" query_name="match_antd_knowledge" -chunk_size=500 -g = Github() +chunk_size=2000 + def convert_document_to_dict(document): return { @@ -37,49 +38,38 @@ def init_retriever(): return db.as_retriever() -def get_repo_info(repo_name: str): - try: - repo = g.get_repo(repo_name) - return repo - except Exception as e: - print(f"An error occurred: {e}") - return None - -def add_knowledge(repo: GitRepo): - repo_info: Repository = get_repo_info(repo.repo_name) - if not repo_info: - return json.dumps({ - "success": False, - "message": "Invalid repository name!" - }) - loader=GitLoader( - clone_url=repo_info.html_url, - repo_path=repo.path, - branch=repo.branch or repo_info.default_branch, - ) - documents = loader.load() - text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) - docs = text_splitter.split_documents(documents) - embeddings = OpenAIEmbeddings() +def add_knowledge(config: S3Config): try: - SupabaseVectorStore.from_documents( - docs, - embeddings, - client=get_client(), - table_name=table_name, - query_name=query_name, - chunk_size=chunk_size, - ) - return json.dumps({ - "success": True, - "message": "Knowledge added successfully!" - }) + region_name = "ap-northeast-1" + session = boto3.session.Session() + session.client( + service_name='secretsmanager', + region_name=region_name + ) + loader = S3DirectoryLoader(config.s3_bucket, prefix=config.file_path) + documents = loader.load() + text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=0) + docs = text_splitter.split_documents(documents) + embeddings = OpenAIEmbeddings() + SupabaseVectorStore.from_documents( + docs, + embeddings, + client=get_client(), + table_name=table_name, + query_name=query_name, + chunk_size=chunk_size, + ) + return json.dumps({ + "success": True, + "message": "Knowledge added successfully!", + "docs_len": len(documents) + }) except Exception as e: - return json.dumps({ - "success": False, - "message": str(e) - }) + return json.dumps({ + "success": False, + "message": str(e) + }) def search_knowledge(query: str): retriever = init_retriever() diff --git a/server/requirements.txt b/server/requirements.txt index 854dfc4c..8b851929 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -15,4 +15,5 @@ load_dotenv supabase boto3>=1.34.84 pyjwt>=2.4.0 -pydantic>=2.7.0 \ No newline at end of file +pydantic>=2.7.0 +unstructured[md] diff --git a/server/routers/rag.py b/server/routers/rag.py index a0789c81..fded286c 100644 --- a/server/routers/rag.py +++ b/server/routers/rag.py @@ -1,6 +1,6 @@ from fastapi import APIRouter from rag import retrieval -from data_class import GitRepo +from data_class import S3Config router = APIRouter( prefix="/api", @@ -10,8 +10,8 @@ @router.post("/rag/add_knowledge") -def add_knowledge(repo: GitRepo): - data=retrieval.add_knowledge(repo) +def add_knowledge(config: S3Config): + data=retrieval.add_knowledge(config) return data @router.post("/rag/search_knowledge") diff --git a/server/tools/issue.py b/server/tools/issue.py index bc13e660..525a55b4 100644 --- a/server/tools/issue.py +++ b/server/tools/issue.py @@ -83,15 +83,15 @@ def search_issues( :param state: The state of the issue, e.g: open, closed, all """ try: - search_query = f'{keyword} in:title,body,comments repo:{repo_name}' + search_query = f"{keyword} in:title,body,comments repo:{repo_name}" # Retrieve a list of open issues from the repository issues = g.search_issues(query=search_query, sort=sort, order=order)[:max_num] print(f"issues: {issues}") issues_list = [ { - 'issue_name': f'Issue #{issue.number} - {issue.title}', - 'issue_url': issue.html_url + "issue_name": f"Issue #{issue.number} - {issue.title}", + "issue_url": issue.html_url } for issue in issues ] diff --git a/server/tools/sourcecode.py b/server/tools/sourcecode.py index c0bf304f..e0a1cdd4 100644 --- a/server/tools/sourcecode.py +++ b/server/tools/sourcecode.py @@ -2,8 +2,6 @@ from github import Github from github.ContentFile import ContentFile from langchain.tools import tool -from uilts.env import get_env_variable - DEFAULT_REPO_NAME = "ant-design/ant-design" @@ -29,7 +27,8 @@ def search_code( # Perform the search for code files containing the keyword code_files = g.search_code(query=query)[:max_num] - return code_files + + return code_files except Exception as e: print(f"An error occurred: {e}") return None From 29b22162dc89f29e80ce1d1d7db424b3a23f9f48 Mon Sep 17 00:00:00 2001 From: yingying Date: Mon, 15 Apr 2024 19:44:46 +0800 Subject: [PATCH 03/16] chore: update session --- server/rag/retrieval.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/server/rag/retrieval.py b/server/rag/retrieval.py index c9f5f3d4..a8d31fba 100644 --- a/server/rag/retrieval.py +++ b/server/rag/retrieval.py @@ -11,8 +11,7 @@ supabase_url = get_env_variable("SUPABASE_URL") supabase_key = get_env_variable("SUPABASE_SERVICE_KEY") -aws_access_key_id=get_env_variable("AWS_ACCESS_KEY_ID") -aws_secret_access_key=get_env_variable("AWS_SECRET_ACCESS_KEY") + table_name="antd_knowledge" query_name="match_antd_knowledge" @@ -41,13 +40,8 @@ def init_retriever(): def add_knowledge(config: S3Config): try: - region_name = "ap-northeast-1" session = boto3.session.Session() - session.client( - service_name='secretsmanager', - region_name=region_name - ) - loader = S3DirectoryLoader(config.s3_bucket, prefix=config.file_path) + loader = S3DirectoryLoader(config.s3_bucket, prefix=config.file_path, aws_session_token=session.get_token()) documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=0) docs = text_splitter.split_documents(documents) From 8f6faaa689edee235ff02c9ddda00c8c150c460d Mon Sep 17 00:00:00 2001 From: yingying Date: Mon, 15 Apr 2024 19:50:08 +0800 Subject: [PATCH 04/16] chore: remove usless config --- server/rag/retrieval.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/rag/retrieval.py b/server/rag/retrieval.py index a8d31fba..ed9a8ee3 100644 --- a/server/rag/retrieval.py +++ b/server/rag/retrieval.py @@ -40,8 +40,7 @@ def init_retriever(): def add_knowledge(config: S3Config): try: - session = boto3.session.Session() - loader = S3DirectoryLoader(config.s3_bucket, prefix=config.file_path, aws_session_token=session.get_token()) + loader = S3DirectoryLoader(config.s3_bucket, prefix=config.file_path) documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=0) docs = text_splitter.split_documents(documents) From ec9983d8481324181029be5b1bb4da42296c8b08 Mon Sep 17 00:00:00 2001 From: "raoha.rh" Date: Mon, 15 Apr 2024 20:22:37 +0800 Subject: [PATCH 05/16] feat: add nltk layer --- server/Dockerfile.aws.lambda | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/Dockerfile.aws.lambda b/server/Dockerfile.aws.lambda index 8e5f0560..2adeebd0 100644 --- a/server/Dockerfile.aws.lambda +++ b/server/Dockerfile.aws.lambda @@ -8,4 +8,9 @@ COPY . ${LAMBDA_TASK_ROOT} COPY requirements.txt . RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" -U --no-cache-dir +RUN pip install nltk + +RUN python -W ignore -m nltk.downloader punkt -d /build/nltk_data +RUN python -W ignore -m nltk.downloader stopwords -d /build/nltk_data + CMD ["python", "main.py"] \ No newline at end of file From 966095ea8396d58be4f45cff65e37b9bdeb32b7e Mon Sep 17 00:00:00 2001 From: "raoha.rh" Date: Mon, 15 Apr 2024 20:30:52 +0800 Subject: [PATCH 06/16] feat: add nltk layer --- server/Dockerfile.aws.lambda | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/Dockerfile.aws.lambda b/server/Dockerfile.aws.lambda index 2adeebd0..9af1eb8c 100644 --- a/server/Dockerfile.aws.lambda +++ b/server/Dockerfile.aws.lambda @@ -10,7 +10,7 @@ RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" -U --no-cach RUN pip install nltk -RUN python -W ignore -m nltk.downloader punkt -d /build/nltk_data -RUN python -W ignore -m nltk.downloader stopwords -d /build/nltk_data +RUN python -W ignore -m nltk.downloader punkt -d /tmp/nltk_data +RUN python -W ignore -m nltk.downloader stopwords -d /tmp/nltk_data CMD ["python", "main.py"] \ No newline at end of file From 266e26e2b86e2c36b7bdb934a1f1547934f063d4 Mon Sep 17 00:00:00 2001 From: "raoha.rh" Date: Mon, 15 Apr 2024 20:50:51 +0800 Subject: [PATCH 07/16] feat: store into ./nltk_data --- server/Dockerfile.aws.lambda | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/server/Dockerfile.aws.lambda b/server/Dockerfile.aws.lambda index 9af1eb8c..a25adde4 100644 --- a/server/Dockerfile.aws.lambda +++ b/server/Dockerfile.aws.lambda @@ -8,9 +8,13 @@ COPY . ${LAMBDA_TASK_ROOT} COPY requirements.txt . RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" -U --no-cache-dir +# Setup NLTK again in system path to execute nltk.downloader RUN pip install nltk +# Setup directory for NLTK_DATA +RUN mkdir -p ./nltk_data -RUN python -W ignore -m nltk.downloader punkt -d /tmp/nltk_data -RUN python -W ignore -m nltk.downloader stopwords -d /tmp/nltk_data +# Download NLTK_DATA to build directory +RUN python -W ignore -m nltk.downloader punkt -d ./nltk_data +RUN python -W ignore -m nltk.downloader stopwords -d ./nltk_data CMD ["python", "main.py"] \ No newline at end of file From 2b9c437cbaf7da983bc2fa44bc8787edef4a97c5 Mon Sep 17 00:00:00 2001 From: "raoha.rh" Date: Mon, 15 Apr 2024 20:56:39 +0800 Subject: [PATCH 08/16] feat: add custom layer --- server/Dockerfile.aws.lambda | 9 --------- template.yml | 2 ++ 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/server/Dockerfile.aws.lambda b/server/Dockerfile.aws.lambda index a25adde4..8e5f0560 100644 --- a/server/Dockerfile.aws.lambda +++ b/server/Dockerfile.aws.lambda @@ -8,13 +8,4 @@ COPY . ${LAMBDA_TASK_ROOT} COPY requirements.txt . RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" -U --no-cache-dir -# Setup NLTK again in system path to execute nltk.downloader -RUN pip install nltk -# Setup directory for NLTK_DATA -RUN mkdir -p ./nltk_data - -# Download NLTK_DATA to build directory -RUN python -W ignore -m nltk.downloader punkt -d ./nltk_data -RUN python -W ignore -m nltk.downloader stopwords -d ./nltk_data - CMD ["python", "main.py"] \ No newline at end of file diff --git a/template.yml b/template.yml index 717d7b57..25a601ef 100644 --- a/template.yml +++ b/template.yml @@ -17,6 +17,8 @@ Resources: Environment: Variables: AWS_LWA_INVOKE_MODE: RESPONSE_STREAM + Layers: + - arn:aws:lambda:ap-northeast-1:654654285942:layer:nltk-layer:1 FunctionUrlConfig: AuthType: NONE InvokeMode: RESPONSE_STREAM From 0adc42bc6a185dca842321c50dd0846697a6d144 Mon Sep 17 00:00:00 2001 From: "raoha.rh" Date: Mon, 15 Apr 2024 21:03:22 +0800 Subject: [PATCH 09/16] feat: add custom layer --- server/Dockerfile.aws.lambda | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/server/Dockerfile.aws.lambda b/server/Dockerfile.aws.lambda index 8e5f0560..a25adde4 100644 --- a/server/Dockerfile.aws.lambda +++ b/server/Dockerfile.aws.lambda @@ -8,4 +8,13 @@ COPY . ${LAMBDA_TASK_ROOT} COPY requirements.txt . RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" -U --no-cache-dir +# Setup NLTK again in system path to execute nltk.downloader +RUN pip install nltk +# Setup directory for NLTK_DATA +RUN mkdir -p ./nltk_data + +# Download NLTK_DATA to build directory +RUN python -W ignore -m nltk.downloader punkt -d ./nltk_data +RUN python -W ignore -m nltk.downloader stopwords -d ./nltk_data + CMD ["python", "main.py"] \ No newline at end of file From fe528f01dca97b65afa917d534aaa417c7380500 Mon Sep 17 00:00:00 2001 From: "raoha.rh" Date: Mon, 15 Apr 2024 21:11:07 +0800 Subject: [PATCH 10/16] feat: add custom layer --- server/routers/health_checker.py | 8 +++++++- template.yml | 2 -- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/server/routers/health_checker.py b/server/routers/health_checker.py index e4c078f3..b262075b 100644 --- a/server/routers/health_checker.py +++ b/server/routers/health_checker.py @@ -1,4 +1,7 @@ from fastapi import APIRouter, Depends, HTTPException +from uilts.env import get_env_variable + +task_root = get_env_variable("LAMBDA_TASK_ROOT") router = APIRouter( prefix="/api", @@ -8,4 +11,7 @@ @router.get("/health_checker") def health_checker(): - return {"Hello": "World"} \ No newline at end of file + return { + "Hello": "World", + "task_root": task_root, + } \ No newline at end of file diff --git a/template.yml b/template.yml index 25a601ef..717d7b57 100644 --- a/template.yml +++ b/template.yml @@ -17,8 +17,6 @@ Resources: Environment: Variables: AWS_LWA_INVOKE_MODE: RESPONSE_STREAM - Layers: - - arn:aws:lambda:ap-northeast-1:654654285942:layer:nltk-layer:1 FunctionUrlConfig: AuthType: NONE InvokeMode: RESPONSE_STREAM From e5fc349e01106d0e5678d9c609a53a3ff78e7246 Mon Sep 17 00:00:00 2001 From: "raoha.rh" Date: Mon, 15 Apr 2024 21:18:22 +0800 Subject: [PATCH 11/16] feat: add custom layer --- server/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/main.py b/server/main.py index f68220d9..cc2cb646 100644 --- a/server/main.py +++ b/server/main.py @@ -15,6 +15,10 @@ open_api_key = get_env_variable("OPENAI_API_KEY") is_dev = bool(get_env_variable("IS_DEV")) +task_root = get_env_variable("LAMBDA_TASK_ROOT") + +if task_root: + os.environ["NLTK_DATA"] = os.path.join(task_root, "nltk_data") app = FastAPI( title="Bo-meta Server", From 8d9c62e7d94f10714f6ff050b4757b2578432de2 Mon Sep 17 00:00:00 2001 From: "raoha.rh" Date: Mon, 15 Apr 2024 21:19:16 +0800 Subject: [PATCH 12/16] feat: add custom layer --- server/Dockerfile.aws.lambda | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/Dockerfile.aws.lambda b/server/Dockerfile.aws.lambda index a25adde4..846acb8f 100644 --- a/server/Dockerfile.aws.lambda +++ b/server/Dockerfile.aws.lambda @@ -11,10 +11,10 @@ RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" -U --no-cach # Setup NLTK again in system path to execute nltk.downloader RUN pip install nltk # Setup directory for NLTK_DATA -RUN mkdir -p ./nltk_data +RUN mkdir -p /opt/nltk_data # Download NLTK_DATA to build directory -RUN python -W ignore -m nltk.downloader punkt -d ./nltk_data -RUN python -W ignore -m nltk.downloader stopwords -d ./nltk_data +RUN python -W ignore -m nltk.downloader punkt -d /opt/nltk_data +RUN python -W ignore -m nltk.downloader stopwords -d /opt/nltk_data CMD ["python", "main.py"] \ No newline at end of file From fe59b8b99c779be2d1c767a3b5019d1e63c48407 Mon Sep 17 00:00:00 2001 From: "raoha.rh" Date: Mon, 15 Apr 2024 21:19:39 +0800 Subject: [PATCH 13/16] feat: add custom layer --- server/main.py | 4 ---- server/routers/health_checker.py | 8 +------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/server/main.py b/server/main.py index cc2cb646..f68220d9 100644 --- a/server/main.py +++ b/server/main.py @@ -15,10 +15,6 @@ open_api_key = get_env_variable("OPENAI_API_KEY") is_dev = bool(get_env_variable("IS_DEV")) -task_root = get_env_variable("LAMBDA_TASK_ROOT") - -if task_root: - os.environ["NLTK_DATA"] = os.path.join(task_root, "nltk_data") app = FastAPI( title="Bo-meta Server", diff --git a/server/routers/health_checker.py b/server/routers/health_checker.py index b262075b..243b8b26 100644 --- a/server/routers/health_checker.py +++ b/server/routers/health_checker.py @@ -1,7 +1,4 @@ from fastapi import APIRouter, Depends, HTTPException -from uilts.env import get_env_variable - -task_root = get_env_variable("LAMBDA_TASK_ROOT") router = APIRouter( prefix="/api", @@ -11,7 +8,4 @@ @router.get("/health_checker") def health_checker(): - return { - "Hello": "World", - "task_root": task_root, - } \ No newline at end of file + return { "Hello": "World" } \ No newline at end of file From 418457f334b8dd3de23245f85a9baa77421f32f6 Mon Sep 17 00:00:00 2001 From: "raoha.rh" Date: Mon, 15 Apr 2024 21:33:24 +0800 Subject: [PATCH 14/16] feat: add custom layer --- server/Dockerfile.aws.lambda | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/server/Dockerfile.aws.lambda b/server/Dockerfile.aws.lambda index 846acb8f..2b2355b9 100644 --- a/server/Dockerfile.aws.lambda +++ b/server/Dockerfile.aws.lambda @@ -11,10 +11,12 @@ RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" -U --no-cach # Setup NLTK again in system path to execute nltk.downloader RUN pip install nltk # Setup directory for NLTK_DATA -RUN mkdir -p /opt/nltk_data +RUN mkdir -p ./nltk_data # Download NLTK_DATA to build directory -RUN python -W ignore -m nltk.downloader punkt -d /opt/nltk_data -RUN python -W ignore -m nltk.downloader stopwords -d /opt/nltk_data +RUN python -W ignore -m nltk.downloader punkt -d ./nltk_data +RUN python -W ignore -m nltk.downloader stopwords -d ./nltk_data +RUN python -W ignore -m nltk.downloader averaged_perceptron_tagger -d ./nltk_data +COPY ./nltk_data /opt/nltk_data CMD ["python", "main.py"] \ No newline at end of file From 8c7dbfce79b6175cdd24b6d98d3bbd87a8d39d02 Mon Sep 17 00:00:00 2001 From: "raoha.rh" Date: Mon, 15 Apr 2024 21:37:25 +0800 Subject: [PATCH 15/16] feat: add averaged_perceptron_tagger --- server/Dockerfile.aws.lambda | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/Dockerfile.aws.lambda b/server/Dockerfile.aws.lambda index 2b2355b9..426695ae 100644 --- a/server/Dockerfile.aws.lambda +++ b/server/Dockerfile.aws.lambda @@ -11,12 +11,12 @@ RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" -U --no-cach # Setup NLTK again in system path to execute nltk.downloader RUN pip install nltk # Setup directory for NLTK_DATA -RUN mkdir -p ./nltk_data +RUN mkdir -p /opt/nltk_data # Download NLTK_DATA to build directory -RUN python -W ignore -m nltk.downloader punkt -d ./nltk_data -RUN python -W ignore -m nltk.downloader stopwords -d ./nltk_data -RUN python -W ignore -m nltk.downloader averaged_perceptron_tagger -d ./nltk_data +RUN python -W ignore -m nltk.downloader punkt -d /opt/nltk_data +RUN python -W ignore -m nltk.downloader stopwords -d /opt/nltk_data +RUN python -W ignore -m nltk.downloader averaged_perceptron_tagger -d /opt/nltk_data COPY ./nltk_data /opt/nltk_data CMD ["python", "main.py"] \ No newline at end of file From 747af832c7fb07f7be3432f67036ba415f658ef3 Mon Sep 17 00:00:00 2001 From: "raoha.rh" Date: Mon, 15 Apr 2024 21:39:50 +0800 Subject: [PATCH 16/16] feat: add averaged_perceptron_tagger --- server/Dockerfile.aws.lambda | 1 - 1 file changed, 1 deletion(-) diff --git a/server/Dockerfile.aws.lambda b/server/Dockerfile.aws.lambda index 426695ae..77fc2e0b 100644 --- a/server/Dockerfile.aws.lambda +++ b/server/Dockerfile.aws.lambda @@ -17,6 +17,5 @@ RUN mkdir -p /opt/nltk_data RUN python -W ignore -m nltk.downloader punkt -d /opt/nltk_data RUN python -W ignore -m nltk.downloader stopwords -d /opt/nltk_data RUN python -W ignore -m nltk.downloader averaged_perceptron_tagger -d /opt/nltk_data -COPY ./nltk_data /opt/nltk_data CMD ["python", "main.py"] \ No newline at end of file