Skip to content

Commit

Permalink
Merge pull request #126 from codebanesr/feature/django
Browse files Browse the repository at this point in the history
App rewrite in django
  • Loading branch information
codebanesr authored Aug 13, 2023
2 parents 397e053 + a4d4876 commit 4c1e578
Show file tree
Hide file tree
Showing 349 changed files with 18,303 additions and 9 deletions.
43 changes: 43 additions & 0 deletions .env.docker
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# For azure, can take either of azure or openai, for openai we will only need the key
OPENAI_API_TYPE=openai
AZURE_OPENAI_API_BASE=
AZURE_OPENAI_API_KEY=
AZURE_OPENAI_API_VERSION=2023-03-15-preview
AZURE_OPENAI_EMBEDDING_MODEL_NAME=
AZURE_OPENAI_DEPLOYMENT_NAME=
AZURE_OPENAI_COMPLETION_MODEL=gpt-35-turbo


# For openai
OPENAI_API_KEY=



# azure | openai
EMBEDDING_PROVIDER=azure

# Vector Store, PINECONE|QDRANT
STORE=QDRANT


# if using pinecone
PINECONE_API_KEY=
PINECONE_ENV=
VECTOR_STORE_INDEX_NAME=


# if using qdrant
QDRANT_URL=http://qdrant:6333


# optional, defaults to 15
MAX_PAGES_CRAWL=1

# --- these will change if you decide to start testing the software
CELERY_BROKER_URL=redis://redis:6379/0
CELERY_RESULT_BACKEND=redis://redis:6379/0
DATABASE_NAME=openchat
DATABASE_USER=dbuser
DATABASE_PASSWORD=dbpass
DATABASE_HOST=mysql
DATABASE_PORT=3306
13 changes: 13 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,16 @@ down:
exec-backend-server:
$(DOCKER_COMPOSE) exec backend-server bash
.PHONY: install down


# Makefile to Run docker-compose for Django App

.env.docker:
@echo "Error: The .env.docker file is missing. Please create it before proceeding. Refer readme"
@exit 1

install_django: .env.docker
@docker-compose -f docker-compose.django.yaml up -d

uninstall_django:
@docker-compose -f docker-compose.django.yaml down
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,15 @@ make install
make.bat
```

Sure, here's the modified text with the additional line you requested:

Once the installation is complete, you can access the OpenChat console at: http://localhost:8000
## Getting Started with the Openchat Django App

Start your adventure of contributing to and using OpenChat, now remade using the Python programming language. You can begin by following the instructions in the guide available here: [OpenChat Python Guide](docs/django_release.md).

**Kindly be aware that the transition to the Python backend includes a significant alteration related to the Qdrant vector store, constituting a breaking change.**

Once the installation is complete, you can access the OpenChat console at: http://localhost:8000

Documentation [available here](https://docs.openchat.so/introduction)

Expand Down
16 changes: 8 additions & 8 deletions common.env
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
# Required for openai azure ------------------------------------------------------------------
# USE_AZURE_OPENAI=true
# AZURE_OPENAI_API_KEY=1ddb532d770a414581c0ed9987c9ac2d
# AZURE_OPENAI_API_INSTANCE_NAME=shanurrahman
# AZURE_OPENAI_API_COMPLETIONS_DEPLOYMENT_NAME=emailclassifier
# AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME=emailclassifierembedding
# AZURE_OPENAI_API_VERSION=2023-03-15-preview
USE_AZURE_OPENAI=true
AZURE_OPENAI_API_KEY=
AZURE_OPENAI_API_INSTANCE_NAME=
AZURE_OPENAI_API_COMPLETIONS_DEPLOYMENT_NAME=
AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME=
AZURE_OPENAI_API_VERSION=2023-03-15-preview
# --------------------------------------------------------------------------------------------

OPENAI_API_KEY=

STORE=qdrant
STORE=pinecone

PINECONE_API_KEY=
PINECONE_ENVIRONMENT=
PINECONE_INDEX_NAME=
VECTOR_STORE_INDEX_NAME=

# QDRANT_URL
QDRANT_URL=http://qdrant:6333
38 changes: 38 additions & 0 deletions dj_backend_server/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Ignore general OS and text editor files
*.pyc
__pycache__/
*.pyo
*.pyd
*.swp
.DS_Store
Thumbs.db
desktop.ini

# Ignore Django-specific files
*.log
*.pot
*.pyc
local_settings.py
db.sqlite3
media
staticfiles

# Ignore development and environment files
.env
.env.*
*.envrc
.env.local
.env.*.local
*.sqlite3
*.db
*.sqlite
*.pdb
*.env
*.pyc
*.orig
*.egg-info/
*.egg-info
pip-log.txt
pip-delete-this-directory.txt
website_data_sources/*
venv
39 changes: 39 additions & 0 deletions dj_backend_server/.vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Django",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/manage.py",
"args": [
"runserver"
],
"django": true,
"justMyCode": true
},
{
"name": "Python: Celery Workers",
"type": "python",
"request": "launch",
// "program": "${workspaceFolder}/path_to_celery_executable",
"module": "celery",
"args": [
"-A",
"dj_backend_server",
"worker",
"-l",
"debug"
],
"env": {
"OBJC_DISABLE_INITIALIZE_FORK_SAFETY": "YES",
"DISABLE_SPRING": "true"
},
"console": "integratedTerminal",
"envFile": "${workspaceFolder}/.env"
}
]
}
6 changes: 6 additions & 0 deletions dj_backend_server/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"[python]": {
"editor.defaultFormatter": "ms-python.python"
},
"python.formatting.provider": "none"
}
18 changes: 18 additions & 0 deletions dj_backend_server/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Use an official Python runtime as a parent image
FROM python:3.9

# Set environment variables for the project
ENV PYTHONUNBUFFERED 1
ENV DJANGO_SETTINGS_MODULE=dj_backend_server.settings

# Set the working directory to /app
WORKDIR /app

# Copy the current directory contents into the container at /app
COPY . /app/

# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Run migrations on startup
CMD ["sh", "-c", "python manage.py migrate && python manage.py runserver 0.0.0.0:8000"]
Empty file.
3 changes: 3 additions & 0 deletions dj_backend_server/api/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.contrib import admin

# Register your models here.
6 changes: 6 additions & 0 deletions dj_backend_server/api/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class ApiConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'api'
1 change: 1 addition & 0 deletions dj_backend_server/api/configs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .base import PINECONE_NAMESPACE, PINECONE_TEXT_KEY, VECTOR_STORE_INDEX_NAME
11 changes: 11 additions & 0 deletions dj_backend_server/api/configs/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import os

VECTOR_STORE_INDEX_NAME = os.environ.get('VECTOR_STORE_INDEX_NAME', 'dummy')
PINECONE_NAMESPACE = 'bot-test'
PINECONE_TEXT_KEY = 'text'

__all__ = [
'VECTOR_STORE_INDEX_NAME',
'PINECONE_NAMESPACE',
'PINECONE_TEXT_KEY',
]
2 changes: 2 additions & 0 deletions dj_backend_server/api/data_sources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .codebase_handler import codebase_handler
from .website_handler import website_handler
30 changes: 30 additions & 0 deletions dj_backend_server/api/data_sources/codebase_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# views.py
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
from langchain.text_splitter import RecursiveCharacterTextSplitter
from api.utils import get_embeddings
from langchain.document_loaders import GitLoader
from api.utils import init_vector_store

# https://python.langchain.com/docs/integrations/document_loaders/git
@csrf_exempt
def codebase_handler(repo_path: str, namespace: str):
try:
loader = GitLoader(repo_path=repo_path, branch="main", recursive=True, unknown="warn")

raw_docs = loader.load()

print('Loaded documents')

text_splitter = RecursiveCharacterTextSplitter(separators=["\n"], chunk_size=1000, chunk_overlap=200,length_function=len)
docs = text_splitter.split_documents(raw_docs)

print('Split documents')

embeddings = get_embeddings()

init_vector_store(docs, embeddings, namespace=namespace)

print('Indexed documents. all done!')
except Exception as e:
print(e)
34 changes: 34 additions & 0 deletions dj_backend_server/api/data_sources/pdf_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# views.py
import json
from django.views.decorators.csrf import csrf_exempt
from langchain.text_splitter import RecursiveCharacterTextSplitter
from api.utils import get_embeddings
from langchain.document_loaders.directory import DirectoryLoader
from api.utils import init_vector_store
from langchain.document_loaders import PyPDFium2Loader
import os
from web.utils.delete_foler import delete_folder
from api.interfaces import StoreOptions
@csrf_exempt
def pdf_handler(shared_folder: str, namespace: str):
try:
directory_path = os.path.join("website_data_sources", shared_folder)

directory_loader = DirectoryLoader(path=directory_path, glob="**/*.pdf", loader_cls=PyPDFium2Loader, use_multithreading=True)

raw_docs = directory_loader.load_and_split()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200,length_function=len)
docs = text_splitter.split_documents(raw_docs)

embeddings = get_embeddings()

init_vector_store(docs, embeddings, StoreOptions(namespace))

delete_folder(folder_path=directory_path)
print('All is done, folder deleted')

except Exception as e:
import traceback
print(e)
traceback.print_exc()
39 changes: 39 additions & 0 deletions dj_backend_server/api/data_sources/website_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os
from django.http import JsonResponse

from langchain.document_loaders.directory import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from api.utils import init_vector_store
from api.utils.get_embeddings import get_embeddings
from api.interfaces import StoreOptions
# from import delete_folder
from web.models.website_data_sources import WebsiteDataSource
from web.enums.website_data_source_status_enum import WebsiteDataSourceStatusType
def website_handler(shared_folder, namespace):
website_data_source = WebsiteDataSource.objects.get(id=shared_folder)
try:
directory_path = os.path.join("website_data_sources", shared_folder)
directory_loader = DirectoryLoader(directory_path, glob="**/*.txt", loader_cls=TextLoader, use_multithreading=True)

raw_docs = directory_loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function=len)

docs = text_splitter.split_documents(raw_docs)

print("docs -->", docs);
embeddings = get_embeddings()

init_vector_store(docs, embeddings, StoreOptions(namespace=namespace))

website_data_source.crawling_status = WebsiteDataSourceStatusType.COMPLETED.value
website_data_source.save()
# delete_folder(folder_path=directory_path)
print('All is done, folder deleted...')
except Exception as e:
website_data_source.crawling_status = WebsiteDataSourceStatusType.FAILED.value
website_data_source.save()
import traceback
print(e)
traceback.print_exc()
2 changes: 2 additions & 0 deletions dj_backend_server/api/enums/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .store_type import StoreType
from .embedding_type import EmbeddingProvider
8 changes: 8 additions & 0 deletions dj_backend_server/api/enums/embedding_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from enum import Enum


class EmbeddingProvider(Enum):
OPENAI = "openai"
BARD = "bard"
azure = "azure"

5 changes: 5 additions & 0 deletions dj_backend_server/api/enums/store_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from enum import Enum

class StoreType(Enum):
PINECONE = 'PINECONE'
QDRANT = 'QDRANT'
5 changes: 5 additions & 0 deletions dj_backend_server/api/interfaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from typing import Optional

class StoreOptions:
def __init__(self, namespace: Optional[str] = None):
self.namespace = namespace
3 changes: 3 additions & 0 deletions dj_backend_server/api/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.db import models

# Create your models here.
Loading

0 comments on commit 4c1e578

Please sign in to comment.