Skip to content
This repository has been archived by the owner on Apr 29, 2024. It is now read-only.

Commit

Permalink
Add PSQL
Browse files Browse the repository at this point in the history
  • Loading branch information
sasha370 committed Apr 12, 2024
1 parent 1decee6 commit fc0c540
Show file tree
Hide file tree
Showing 19 changed files with 324 additions and 45 deletions.
7 changes: 7 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,10 @@ SLACK_CHANNEL_ID_KNOWLEDGE_GAP_DISCUSSIONS="???"
# Both values are optional and applied independently.
# SLACK_REQUIRE_ENTERPRISE_ID="???"
# SLACK_REQUIRE_TEAM_ID="???"

# DB configuration
DB_USER = 'postgres'
DB_PASSWORD = 'postgres'
DB_HOST = 'localhost'
DB_PORT = '5432'
DB_NAME = 'top_assist'
3 changes: 2 additions & 1 deletion .envrc
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
dotenv
source .env
dotenv
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,7 @@ Bear in mind that the Dockerized version uses a shared volume named `nur_shared_


```warp-runnable-command
# Move to the poetry package
cd code/nur/nurai
alembic revision -m "create account table"
# Import the model in /nurai/migrations/env.py
# Create a new migration
poetry run alembic revision --autogenerate -m "Initialize the db"
Expand All @@ -175,6 +174,7 @@ poetry run alembic upgrade head
```



## Network traffic

1. Outgoing to Open AI API (for Embeds and completion)
Expand Down
117 changes: 117 additions & 0 deletions alembic.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# A generic, single database configuration.

[alembic]
# path to migration scripts
script_location = alembic

# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
# Uncomment the line below if you want the files to be prepended with date and time
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
# for all available tokens
file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s

# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory.
prepend_sys_path = .

# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the python>=3.9 or backports.zoneinfo library.
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
# string value is passed to ZoneInfo()
# leave blank for localtime
# timezone =

# max length of characters to apply to the
# "slug" field
# truncate_slug_length = 40

# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false

# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false

# version location specification; This defaults
# to alembic/versions. When using multiple version
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the separator specified by "version_path_separator" below.
# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions

# version path separator; As mentioned above, this is the character used to split
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
# Valid values for version_path_separator are:
#
# version_path_separator = :
# version_path_separator = ;
# version_path_separator = space
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.

# set to 'true' to search source files recursively
# in each "version_locations" directory
# new in Alembic version 1.10
# recursive_version_locations = false

# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8

# Custom: Add DB URL
sqlalchemy.url = configuration.DB_URL


[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts. See the documentation for further
# detail and examples

# format using "black" - use the console_scripts runner, against the "black" entrypoint
# hooks = black
# black.type = console_scripts
# black.entrypoint = black
# black.options = -l 79 REVISION_SCRIPT_FILENAME

# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
# hooks = ruff
# ruff.type = exec
# ruff.executable = %(here)s/.venv/bin/ruff
# ruff.options = --fix REVISION_SCRIPT_FILENAME

# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = WARN
handlers = console
qualname =

[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine

[logger_alembic]
level = INFO
handlers =
qualname = alembic

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic

[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
1 change: 1 addition & 0 deletions alembic/README
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Generic single-database configuration.
77 changes: 77 additions & 0 deletions alembic/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from logging.config import fileConfig

from sqlalchemy import engine_from_config
from sqlalchemy import pool

from alembic import context

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config

# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None:
fileConfig(config.config_file_name)

# add your model's MetaData object here
# for 'autogenerate' support
from database.database import Base
target_metadata = Base.metadata

# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.


def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)

with context.begin_transaction():
context.run_migrations()


def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)

with connectable.connect() as connection:
context.configure(
connection=connection, target_metadata=target_metadata
)

with context.begin_transaction():
context.run_migrations()


if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
26 changes: 26 additions & 0 deletions alembic/script.py.mako
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""${message}

Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
${imports if imports else ""}

# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}


def upgrade() -> None:
${upgrades if upgrades else "pass"}


def downgrade() -> None:
${downgrades if downgrades else "pass"}
29 changes: 18 additions & 11 deletions configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,23 @@ def get_project_root() -> str:
return str(project_root)


# DB configuration
DB_USER = os.environ.get("DB_USER")
DB_PASSWORD = os.environ.get("DB_PASSWORD")
DB_HOST = os.environ.get("DB_HOST")
DB_PORT = int(os.environ.get("DB_PORT"))
DB_NAME = os.environ.get("DB_NAME")
DB_URL = f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'

# API configuration
api_host = os.environ.get("NUR_API_HOST")
api_port = int(os.environ.get("NUR_API_PORT"))
embeds_endpoint = f'http://{api_host}:{api_port}/api/v1/embeds'
feedback_endpoint = f'http://{api_host}:{api_port}/api/v1/feedback'
questions_endpoint = f'http://{api_host}:{api_port}/api/v1/questions'
interaction_embeds_endpoint = f'http://{api_host}:{api_port}/api/v1/interaction_embeds'


logging.basicConfig(level=logging.INFO)

project_path = get_project_root()
Expand All @@ -23,6 +40,7 @@ def get_project_root() -> str:
chart_folder_path = os.path.join(project_path, "content", "charts")
sql_file_path = os.path.join(project_path, "content", "database", "confluence_pages_sql.db")
db_url = 'sqlite:///' + sql_file_path

vector_folder_path = os.path.join(project_path, "content", "vectors", "confluence_pages")
interactions_folder_path = os.path.join(project_path, "content", "vectors", "confluence_interactions")

Expand Down Expand Up @@ -50,17 +68,6 @@ def get_project_root() -> str:
# 3 is minimum cost and 10 is maximum comprehensive list of questions
interaction_retrieval_count = 5

# Configuration for the Nur Services API
# get the values from the environment variables if available or use the default values
api_host = os.environ.get("NUR_API_HOST", "localhost")
api_port = int(os.environ.get("NUR_API_PORT", "8000"))

# Endpoints
embeds_endpoint = f'http://{api_host}:{api_port}/api/v1/embeds'
feedback_endpoint = f'http://{api_host}:{api_port}/api/v1/feedback'
questions_endpoint = f'http://{api_host}:{api_port}/api/v1/questions'
interaction_embeds_endpoint = f'http://{api_host}:{api_port}/api/v1/interaction_embeds'

# Name of the vector collection
pages_collection_name = "pages"
interactions_collection_name = "interactions"
Expand Down
24 changes: 12 additions & 12 deletions database/database.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from configuration import db_url
from models.bookmarked_conversation import BookmarkedConversation
from models.page_data import PageData
from models.qa_interaction import QAInteraction
from models.quiz_question import QuizQuestion
from models.space_info import SpaceInfo
from models.user_score import UserScore
from configuration import DB_URL
# from models.bookmarked_conversation import BookmarkedConversation
# from models.page_data import PageData
# from models.qa_interaction import QAInteraction
# from models.quiz_question import QuizQuestion
# from models.space_info import SpaceInfo
# from models.user_score import UserScore
from contextlib import contextmanager

engine = create_engine(db_url)
engine = create_engine(DB_URL)
SessionLocal = sessionmaker(autocommit=False, autoflush=True, bind=engine)

# TODO: Extract and uncomment
for model in [QAInteraction, SpaceInfo, PageData, BookmarkedConversation, QuizQuestion, UserScore]:
model.metadata.create_all(engine)
# from sqlalchemy.ext.declarative import declarative_base
# Base = declarative_base()
# for model in [QAInteraction, SpaceInfo, PageData, BookmarkedConversation, QuizQuestion, UserScore]:
# model.metadata.create_all(engine)
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()

@contextmanager
def get_db_session():
Expand Down
9 changes: 4 additions & 5 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ services:
image: postgres:12
restart: always
environment:
POSTGRES_DB: 'top-assist'
POSTGRES_USER: 'toptal'
POSTGRES_PASSWORD: 'toptal'
POSTGRES_DB: 'top_assist'
POSTGRES_USER: 'postgres'
POSTGRES_PASSWORD: 'postgres'
ports:
- '5432:5432'
expose:
Expand All @@ -18,14 +18,13 @@ services:
volumes:
- chroma-data:/chroma/.chroma/index
ports:
- 8001:8000
- 8000:8000
networks:
- net

volumes:
postgres-data:
chroma-data:
driver: local

networks:
net:
Expand Down
4 changes: 2 additions & 2 deletions models/bookmarked_conversation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# from database.database import Base
from database.database import Base
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
# Base = declarative_base()
from datetime import datetime, timezone
from sqlalchemy import Column, Integer, String, Text, DateTime

Expand Down
4 changes: 2 additions & 2 deletions models/page_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# from database.database import Base
from database.database import Base
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
# Base = declarative_base()
from sqlalchemy import Column, Integer, String, Text, DateTime
import json # TODO Can we switch to sqlalchemy JSON type?

Expand Down
Loading

0 comments on commit fc0c540

Please sign in to comment.