From fc0c5408eeec08d1171c2e6c4851840a87efb28a Mon Sep 17 00:00:00 2001 From: Aleksandr Kariakin Date: Fri, 12 Apr 2024 12:21:21 +0800 Subject: [PATCH] Add PSQL --- .env.example | 7 ++ .envrc | 3 +- README.md | 4 +- alembic.ini | 117 ++++++++++++++++++++++++++++++ alembic/README | 1 + alembic/env.py | 77 ++++++++++++++++++++ alembic/script.py.mako | 26 +++++++ configuration.py | 29 +++++--- database/database.py | 24 +++--- docker-compose.yml | 9 +-- models/bookmarked_conversation.py | 4 +- models/page_data.py | 4 +- models/qa_interaction.py | 4 +- models/quiz_question.py | 4 +- models/space_info.py | 4 +- models/user_score.py | 4 +- poetry.lock | 43 ++++++++++- pyproject.toml | 2 + slack/event_consumer.py | 3 +- 19 files changed, 324 insertions(+), 45 deletions(-) create mode 100644 alembic.ini create mode 100644 alembic/README create mode 100644 alembic/env.py create mode 100644 alembic/script.py.mako diff --git a/.env.example b/.env.example index b749b8ab..529a9ffd 100644 --- a/.env.example +++ b/.env.example @@ -19,3 +19,10 @@ SLACK_CHANNEL_ID_KNOWLEDGE_GAP_DISCUSSIONS="???" # Both values are optional and applied independently. # SLACK_REQUIRE_ENTERPRISE_ID="???" # SLACK_REQUIRE_TEAM_ID="???" + +# DB configuration +DB_USER = 'postgres' +DB_PASSWORD = 'postgres' +DB_HOST = 'localhost' +DB_PORT = '5432' +DB_NAME = 'top_assist' diff --git a/.envrc b/.envrc index 40448e65..bb4bedc0 100644 --- a/.envrc +++ b/.envrc @@ -1 +1,2 @@ -dotenv \ No newline at end of file +source .env +dotenv diff --git a/README.md b/README.md index db139eba..66b1c6ea 100644 --- a/README.md +++ b/README.md @@ -165,8 +165,7 @@ Bear in mind that the Dockerized version uses a shared volume named `nur_shared_ ```warp-runnable-command -# Move to the poetry package -cd code/nur/nurai +alembic revision -m "create account table" # Import the model in /nurai/migrations/env.py # Create a new migration poetry run alembic revision --autogenerate -m "Initialize the db" @@ -175,6 +174,7 @@ poetry run alembic upgrade head ``` + ## Network traffic 1. Outgoing to Open AI API (for Embeds and completion) diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 00000000..3a229aa4 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,117 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# Custom: Add DB URL +sqlalchemy.url = configuration.DB_URL + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/README b/alembic/README new file mode 100644 index 00000000..98e4f9c4 --- /dev/null +++ b/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 00000000..e1e144cf --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,77 @@ +from logging.config import fileConfig + +from sqlalchemy import engine_from_config +from sqlalchemy import pool + +from alembic import context + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +from database.database import Base +target_metadata = Base.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, target_metadata=target_metadata + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 00000000..fbc4b07d --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/configuration.py b/configuration.py index c895d3e3..1286a0c7 100644 --- a/configuration.py +++ b/configuration.py @@ -15,6 +15,23 @@ def get_project_root() -> str: return str(project_root) +# DB configuration +DB_USER = os.environ.get("DB_USER") +DB_PASSWORD = os.environ.get("DB_PASSWORD") +DB_HOST = os.environ.get("DB_HOST") +DB_PORT = int(os.environ.get("DB_PORT")) +DB_NAME = os.environ.get("DB_NAME") +DB_URL = f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}' + +# API configuration +api_host = os.environ.get("NUR_API_HOST") +api_port = int(os.environ.get("NUR_API_PORT")) +embeds_endpoint = f'http://{api_host}:{api_port}/api/v1/embeds' +feedback_endpoint = f'http://{api_host}:{api_port}/api/v1/feedback' +questions_endpoint = f'http://{api_host}:{api_port}/api/v1/questions' +interaction_embeds_endpoint = f'http://{api_host}:{api_port}/api/v1/interaction_embeds' + + logging.basicConfig(level=logging.INFO) project_path = get_project_root() @@ -23,6 +40,7 @@ def get_project_root() -> str: chart_folder_path = os.path.join(project_path, "content", "charts") sql_file_path = os.path.join(project_path, "content", "database", "confluence_pages_sql.db") db_url = 'sqlite:///' + sql_file_path + vector_folder_path = os.path.join(project_path, "content", "vectors", "confluence_pages") interactions_folder_path = os.path.join(project_path, "content", "vectors", "confluence_interactions") @@ -50,17 +68,6 @@ def get_project_root() -> str: # 3 is minimum cost and 10 is maximum comprehensive list of questions interaction_retrieval_count = 5 -# Configuration for the Nur Services API -# get the values from the environment variables if available or use the default values -api_host = os.environ.get("NUR_API_HOST", "localhost") -api_port = int(os.environ.get("NUR_API_PORT", "8000")) - -# Endpoints -embeds_endpoint = f'http://{api_host}:{api_port}/api/v1/embeds' -feedback_endpoint = f'http://{api_host}:{api_port}/api/v1/feedback' -questions_endpoint = f'http://{api_host}:{api_port}/api/v1/questions' -interaction_embeds_endpoint = f'http://{api_host}:{api_port}/api/v1/interaction_embeds' - # Name of the vector collection pages_collection_name = "pages" interactions_collection_name = "interactions" diff --git a/database/database.py b/database/database.py index e9d47e7e..eba43456 100644 --- a/database/database.py +++ b/database/database.py @@ -1,23 +1,23 @@ from sqlalchemy import create_engine from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker -from configuration import db_url -from models.bookmarked_conversation import BookmarkedConversation -from models.page_data import PageData -from models.qa_interaction import QAInteraction -from models.quiz_question import QuizQuestion -from models.space_info import SpaceInfo -from models.user_score import UserScore +from configuration import DB_URL +# from models.bookmarked_conversation import BookmarkedConversation +# from models.page_data import PageData +# from models.qa_interaction import QAInteraction +# from models.quiz_question import QuizQuestion +# from models.space_info import SpaceInfo +# from models.user_score import UserScore from contextlib import contextmanager -engine = create_engine(db_url) +engine = create_engine(DB_URL) SessionLocal = sessionmaker(autocommit=False, autoflush=True, bind=engine) # TODO: Extract and uncomment -for model in [QAInteraction, SpaceInfo, PageData, BookmarkedConversation, QuizQuestion, UserScore]: - model.metadata.create_all(engine) -# from sqlalchemy.ext.declarative import declarative_base -# Base = declarative_base() +# for model in [QAInteraction, SpaceInfo, PageData, BookmarkedConversation, QuizQuestion, UserScore]: +# model.metadata.create_all(engine) +from sqlalchemy.ext.declarative import declarative_base +Base = declarative_base() @contextmanager def get_db_session(): diff --git a/docker-compose.yml b/docker-compose.yml index 4e2ee8bb..dc42ce85 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,9 +4,9 @@ services: image: postgres:12 restart: always environment: - POSTGRES_DB: 'top-assist' - POSTGRES_USER: 'toptal' - POSTGRES_PASSWORD: 'toptal' + POSTGRES_DB: 'top_assist' + POSTGRES_USER: 'postgres' + POSTGRES_PASSWORD: 'postgres' ports: - '5432:5432' expose: @@ -18,14 +18,13 @@ services: volumes: - chroma-data:/chroma/.chroma/index ports: - - 8001:8000 + - 8000:8000 networks: - net volumes: postgres-data: chroma-data: - driver: local networks: net: diff --git a/models/bookmarked_conversation.py b/models/bookmarked_conversation.py index 8f76286b..37d452d1 100644 --- a/models/bookmarked_conversation.py +++ b/models/bookmarked_conversation.py @@ -1,6 +1,6 @@ -# from database.database import Base +from database.database import Base from sqlalchemy.ext.declarative import declarative_base -Base = declarative_base() +# Base = declarative_base() from datetime import datetime, timezone from sqlalchemy import Column, Integer, String, Text, DateTime diff --git a/models/page_data.py b/models/page_data.py index 89f7d17f..8b2412fd 100644 --- a/models/page_data.py +++ b/models/page_data.py @@ -1,6 +1,6 @@ -# from database.database import Base +from database.database import Base from sqlalchemy.ext.declarative import declarative_base -Base = declarative_base() +# Base = declarative_base() from sqlalchemy import Column, Integer, String, Text, DateTime import json # TODO Can we switch to sqlalchemy JSON type? diff --git a/models/qa_interaction.py b/models/qa_interaction.py index fee39809..977c324d 100644 --- a/models/qa_interaction.py +++ b/models/qa_interaction.py @@ -1,6 +1,6 @@ -# from database.database import Base +from database.database import Base from sqlalchemy.ext.declarative import declarative_base -Base = declarative_base() +# Base = declarative_base() from sqlalchemy import Column, Integer, String, Text, DateTime import json # TODO Can we switch to sqlalchemy JSON type? diff --git a/models/quiz_question.py b/models/quiz_question.py index 23ef76d4..c44f8519 100644 --- a/models/quiz_question.py +++ b/models/quiz_question.py @@ -1,6 +1,6 @@ -# from database.database import Base +from database.database import Base from sqlalchemy.ext.declarative import declarative_base -Base = declarative_base() +# Base = declarative_base() from sqlalchemy import Column, Integer, String, Text, DateTime diff --git a/models/space_info.py b/models/space_info.py index 5603947f..ee41701a 100644 --- a/models/space_info.py +++ b/models/space_info.py @@ -1,6 +1,6 @@ -# from database.database import Base +from database.database import Base from sqlalchemy.ext.declarative import declarative_base -Base = declarative_base() +# Base = declarative_base() from sqlalchemy import Column, Integer, String, DateTime diff --git a/models/user_score.py b/models/user_score.py index 87fa84fc..e39cef91 100644 --- a/models/user_score.py +++ b/models/user_score.py @@ -1,6 +1,6 @@ -# from database.database import Base +from database.database import Base from sqlalchemy.ext.declarative import declarative_base -Base = declarative_base() +# Base = declarative_base() from sqlalchemy import Column, Integer, String diff --git a/poetry.lock b/poetry.lock index eebf2eaf..fb1a6945 100644 --- a/poetry.lock +++ b/poetry.lock @@ -109,6 +109,25 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" +[[package]] +name = "alembic" +version = "1.13.1" +description = "A database migration tool for SQLAlchemy." +optional = false +python-versions = ">=3.8" +files = [ + {file = "alembic-1.13.1-py3-none-any.whl", hash = "sha256:2edcc97bed0bd3272611ce3a98d98279e9c209e7186e43e75bbb1b2bdfdbcc43"}, + {file = "alembic-1.13.1.tar.gz", hash = "sha256:4932c8558bf68f2ee92b9bbcb8218671c627064d5b08939437af6d77dc05e595"}, +] + +[package.dependencies] +Mako = "*" +SQLAlchemy = ">=1.3.0" +typing-extensions = ">=4" + +[package.extras] +tz = ["backports.zoneinfo"] + [[package]] name = "annotated-types" version = "0.6.0" @@ -3829,6 +3848,28 @@ files = [ [package.extras] test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] +[[package]] +name = "psycopg2" +version = "2.9.9" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "psycopg2-2.9.9-cp310-cp310-win32.whl", hash = "sha256:38a8dcc6856f569068b47de286b472b7c473ac7977243593a288ebce0dc89516"}, + {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, + {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, + {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, + {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, + {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, + {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, + {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, + {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, + {file = "psycopg2-2.9.9-cp38-cp38-win_amd64.whl", hash = "sha256:bac58c024c9922c23550af2a581998624d6e02350f4ae9c5f0bc642c633a2d5e"}, + {file = "psycopg2-2.9.9-cp39-cp39-win32.whl", hash = "sha256:c92811b2d4c9b6ea0285942b2e7cac98a59e166d59c588fe5cfe1eda58e72d59"}, + {file = "psycopg2-2.9.9-cp39-cp39-win_amd64.whl", hash = "sha256:de80739447af31525feddeb8effd640782cf5998e1a4e9192ebdf829717e3913"}, + {file = "psycopg2-2.9.9.tar.gz", hash = "sha256:d1454bde93fb1e224166811694d600e746430c006fbb031ea06ecc2ea41bf156"}, +] + [[package]] name = "ptyprocess" version = "0.7.0" @@ -5907,4 +5948,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "5abf1cda92e3adcd457017417c84d8399a852dea69e27682ffca63eb29b14ec3" +content-hash = "dcc3ffc9827d9b49490371936a8c9240b4e45ffd8924dcfdc40b5e882e02ed48" diff --git a/pyproject.toml b/pyproject.toml index a53dde95..783988e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,8 @@ seaborn = "^0.13.2" plotly = "^5.20.0" umap-learn = "^0.5.5" jupyter = "^1.0.0" +psycopg2 = "^2.9.9" +alembic = "^1.13.1" [tool.poetry.group.dev.dependencies] ipykernel = "^6.29.4" diff --git a/slack/event_consumer.py b/slack/event_consumer.py index 0d74f471..5481a062 100644 --- a/slack/event_consumer.py +++ b/slack/event_consumer.py @@ -130,12 +130,13 @@ def process_feedback(self, feedback_event: FeedbackEvent): # TODO: Refactor thi print(f"No response generated for feedback: {feedback_event.dict()}\n") +# TODO: Move session call to method level def process_question(question_event: QuestionEvent): """Directly processes a question event without using the queue.""" with get_db_session() as session: EventConsumer(session).process_question(question_event) - +# TODO: Move session call to method level def process_feedback(feedback_event: FeedbackEvent): """Directly processes a feedback event without using the queue.""" with get_db_session() as session: