Add PSQL

toptal · Apr 12, 2024 · fc0c540 · fc0c540
1 parent 1decee6
commit fc0c540
Show file tree

Hide file tree

Showing 19 changed files with 324 additions and 45 deletions.
diff --git a/.env.example b/.env.example
@@ -19,3 +19,10 @@ SLACK_CHANNEL_ID_KNOWLEDGE_GAP_DISCUSSIONS="???"
 # Both values are optional and applied independently.
 # SLACK_REQUIRE_ENTERPRISE_ID="???"
 # SLACK_REQUIRE_TEAM_ID="???"
+
+# DB configuration
+DB_USER = 'postgres'
+DB_PASSWORD = 'postgres'
+DB_HOST = 'localhost'
+DB_PORT = '5432'
+DB_NAME = 'top_assist'
diff --git a/.envrc b/.envrc
@@ -1 +1,2 @@
-dotenv
+source .env
+dotenv
diff --git a/README.md b/README.md
@@ -165,8 +165,7 @@ Bear in mind that the Dockerized version uses a shared volume named `nur_shared_
 
 
 ```warp-runnable-command
-# Move to the poetry package
-cd code/nur/nurai
+alembic revision -m "create account table"
 # Import the model in /nurai/migrations/env.py
 # Create a new migration
 poetry run alembic revision --autogenerate -m "Initialize the db"
@@ -175,6 +174,7 @@ poetry run alembic upgrade head
 ```
 
 
+
 ## Network traffic
 
 1. Outgoing to Open AI API (for Embeds and completion)

diff --git a/alembic.ini b/alembic.ini
@@ -0,0 +1,117 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts
+script_location = alembic
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.
+prepend_sys_path = .
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the
+# "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to alembic/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "version_path_separator" below.
+# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
+
+# version path separator; As mentioned above, this is the character used to split
+# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
+# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
+# Valid values for version_path_separator are:
+#
+# version_path_separator = :
+# version_path_separator = ;
+# version_path_separator = space
+version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+# Custom: Add DB URL
+sqlalchemy.url = configuration.DB_URL
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = %(here)s/.venv/bin/ruff
+# ruff.options = --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/alembic/README b/alembic/README
@@ -0,0 +1 @@
+Generic single-database configuration.
diff --git a/alembic/env.py b/alembic/env.py
@@ -0,0 +1,77 @@
+from logging.config import fileConfig
+
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+
+from alembic import context
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+from database.database import Base
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection, target_metadata=target_metadata
+        )
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/alembic/script.py.mako b/alembic/script.py.mako
@@ -0,0 +1,26 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
diff --git a/configuration.py b/configuration.py
@@ -15,6 +15,23 @@ def get_project_root() -> str:
     return str(project_root)
 
 
+# DB configuration
+DB_USER = os.environ.get("DB_USER")
+DB_PASSWORD = os.environ.get("DB_PASSWORD")
+DB_HOST = os.environ.get("DB_HOST")
+DB_PORT = int(os.environ.get("DB_PORT"))
+DB_NAME = os.environ.get("DB_NAME")
+DB_URL = f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
+
+# API configuration
+api_host = os.environ.get("NUR_API_HOST")
+api_port = int(os.environ.get("NUR_API_PORT"))
+embeds_endpoint = f'http://{api_host}:{api_port}/api/v1/embeds'
+feedback_endpoint = f'http://{api_host}:{api_port}/api/v1/feedback'
+questions_endpoint = f'http://{api_host}:{api_port}/api/v1/questions'
+interaction_embeds_endpoint = f'http://{api_host}:{api_port}/api/v1/interaction_embeds'
+
+
 logging.basicConfig(level=logging.INFO)
 
 project_path = get_project_root()
@@ -23,6 +40,7 @@ def get_project_root() -> str:
 chart_folder_path = os.path.join(project_path, "content", "charts")
 sql_file_path = os.path.join(project_path, "content", "database", "confluence_pages_sql.db")
 db_url = 'sqlite:///' + sql_file_path
+
 vector_folder_path = os.path.join(project_path, "content", "vectors", "confluence_pages")
 interactions_folder_path = os.path.join(project_path, "content", "vectors", "confluence_interactions")
 
@@ -50,17 +68,6 @@ def get_project_root() -> str:
 # 3 is minimum cost and 10 is maximum comprehensive list of questions
 interaction_retrieval_count = 5
 
-# Configuration for the Nur Services API
-# get the values from the environment variables if available or use the default values
-api_host = os.environ.get("NUR_API_HOST", "localhost")
-api_port = int(os.environ.get("NUR_API_PORT", "8000"))
-
-# Endpoints
-embeds_endpoint = f'http://{api_host}:{api_port}/api/v1/embeds'
-feedback_endpoint = f'http://{api_host}:{api_port}/api/v1/feedback'
-questions_endpoint = f'http://{api_host}:{api_port}/api/v1/questions'
-interaction_embeds_endpoint = f'http://{api_host}:{api_port}/api/v1/interaction_embeds'
-
 # Name of the vector collection
 pages_collection_name = "pages"
 interactions_collection_name = "interactions"

diff --git a/database/database.py b/database/database.py
@@ -1,23 +1,23 @@
 from sqlalchemy import create_engine
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import sessionmaker
-from configuration import db_url
-from models.bookmarked_conversation import BookmarkedConversation
-from models.page_data import PageData
-from models.qa_interaction import QAInteraction
-from models.quiz_question import QuizQuestion
-from models.space_info import SpaceInfo
-from models.user_score import UserScore
+from configuration import DB_URL
+# from models.bookmarked_conversation import BookmarkedConversation
+# from models.page_data import PageData
+# from models.qa_interaction import QAInteraction
+# from models.quiz_question import QuizQuestion
+# from models.space_info import SpaceInfo
+# from models.user_score import UserScore
 from contextlib import contextmanager
 
-engine = create_engine(db_url)
+engine = create_engine(DB_URL)
 SessionLocal = sessionmaker(autocommit=False, autoflush=True, bind=engine)
 
 # TODO: Extract and uncomment
-for model in [QAInteraction, SpaceInfo, PageData, BookmarkedConversation, QuizQuestion, UserScore]:
-    model.metadata.create_all(engine)
-# from sqlalchemy.ext.declarative import declarative_base
-# Base = declarative_base()
+# for model in [QAInteraction, SpaceInfo, PageData, BookmarkedConversation, QuizQuestion, UserScore]:
+#     model.metadata.create_all(engine)
+from sqlalchemy.ext.declarative import declarative_base
+Base = declarative_base()
 
 @contextmanager
 def get_db_session():

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -4,9 +4,9 @@ services:
     image: postgres:12
     restart: always
     environment:
-      POSTGRES_DB: 'top-assist'
-      POSTGRES_USER: 'toptal'
-      POSTGRES_PASSWORD: 'toptal'
+      POSTGRES_DB: 'top_assist'
+      POSTGRES_USER: 'postgres'
+      POSTGRES_PASSWORD: 'postgres'
     ports:
       - '5432:5432'
     expose:
@@ -18,14 +18,13 @@ services:
     volumes:
       - chroma-data:/chroma/.chroma/index
     ports:
-      - 8001:8000
+      - 8000:8000
     networks:
       - net
 
 volumes:
   postgres-data:
   chroma-data:
-    driver: local
 
 networks:
   net:

diff --git a/models/bookmarked_conversation.py b/models/bookmarked_conversation.py
@@ -1,6 +1,6 @@
-# from database.database import Base
+from database.database import Base
 from sqlalchemy.ext.declarative import declarative_base
-Base = declarative_base()
+# Base = declarative_base()
 from datetime import datetime, timezone
 from sqlalchemy import Column, Integer, String, Text, DateTime
 

diff --git a/models/page_data.py b/models/page_data.py
@@ -1,6 +1,6 @@
-# from database.database import Base
+from database.database import Base
 from sqlalchemy.ext.declarative import declarative_base
-Base = declarative_base()
+# Base = declarative_base()
 from sqlalchemy import Column, Integer, String, Text, DateTime
 import json  # TODO Can we switch to sqlalchemy JSON type?