From a808d347240f38d500e8e0e9fabb460c93975b40 Mon Sep 17 00:00:00 2001 From: "K.Filippopolitis" <56073635+KFilippopolitis@users.noreply.github.com> Date: Wed, 26 Jun 2024 13:43:16 +0300 Subject: [PATCH] Refactor (#47) * Added SQLite that will contain all tables except the primary_data. * Completely removed the actions table. * Added tests for no db validation * Moved the database specific modules in a folder named databases. * Session on the sqlite autocommits. --- .github/workflows/run-tests.yaml | 14 +- mipdb/commands.py | 256 +++++-- mipdb/database.py | 648 ----------------- mipdb/databases/__init__.py | 22 + mipdb/databases/monetdb.py | 154 ++++ mipdb/databases/monetdb_tables.py | 229 ++++++ mipdb/databases/sqlite.py | 367 ++++++++++ mipdb/databases/sqlite_tables.py | 194 +++++ mipdb/dataelements.py | 4 +- mipdb/properties.py | 26 +- mipdb/schema.py | 5 +- mipdb/tables.py | 441 ----------- mipdb/usecases.py | 1134 ++++++++++------------------- tests/conftest.py | 52 +- tests/mocks.py | 90 --- tests/test_commands.py | 594 +++++++-------- tests/test_database.py | 225 +++--- tests/test_dataelements.py | 19 +- tests/test_properties.py | 42 +- tests/test_schema.py | 4 +- tests/test_tables.py | 206 +----- tests/test_usecases.py | 812 ++++++++------------- 22 files changed, 2317 insertions(+), 3221 deletions(-) delete mode 100644 mipdb/database.py create mode 100644 mipdb/databases/__init__.py create mode 100644 mipdb/databases/monetdb.py create mode 100644 mipdb/databases/monetdb_tables.py create mode 100644 mipdb/databases/sqlite.py create mode 100644 mipdb/databases/sqlite_tables.py delete mode 100644 mipdb/tables.py delete mode 100644 tests/mocks.py diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index f5469eb..003cefb 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -33,12 +33,10 @@ jobs: run: poetry install --no-interaction - name: Run tests - run: | - poetry run coverage run -m pytest - poetry run coverage xml + run: poetry run pytest --cov=mipdb --cov-report=html - - name: Upload Coverage to Codecov - uses: codecov/codecov-action@v1.0.5 - with: - fail_ci_if_error: true - verbose: true + - name: Upload coverage to Codecov + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + run: | + bash <(curl -s https://codecov.io/bash) -t $CODECOV_TOKEN diff --git a/mipdb/commands.py b/mipdb/commands.py index c74e83e..5e23088 100644 --- a/mipdb/commands.py +++ b/mipdb/commands.py @@ -4,9 +4,10 @@ import os import glob - -from mipdb.database import MonetDB, credentials_from_config +from mipdb.databases import credentials_from_config +from mipdb.databases.monetdb import MonetDB from mipdb.reader import JsonFileReader +from mipdb.databases.sqlite import SQLiteDB from mipdb.usecases import ( AddDataModel, Cleanup, @@ -44,6 +45,7 @@ def __init__(self, *args, **kwargs): "--username": credentials["MONETDB_ADMIN_USERNAME"], "--password": credentials["MONETDB_LOCAL_PASSWORD"], "--db_name": credentials["DB_NAME"], + "--sqlite_db_path": credentials["SQLITE_DB_PATH"], } option = args[0][0] if option_to_env_var[option]: @@ -88,10 +90,17 @@ def __init__(self, *args, **kwargs): help="The name of the database", cls=NotRequiredIf, ), + cl.option( + "--sqlite_db_path", + "sqlite_db_path", + required=True, + help="The path for the sqlite database", + cls=NotRequiredIf, + ), ] -def get_db_config(ip, port, username, password, db_name): +def get_monetdb_config(ip, port, username, password, db_name): try: ipaddress.ip_address(ip) except ValueError: @@ -129,11 +138,14 @@ def entry(): ) @db_configs_options @handle_errors -def load_folder(file, copy_from_file, ip, port, username, password, db_name): - dbconfig = get_db_config(ip, port, username, password, db_name) - db = MonetDB.from_config(dbconfig) +def load_folder( + file, copy_from_file, ip, port, username, password, db_name, sqlite_db_path +): + dbconfig = get_monetdb_config(ip, port, username, password, db_name) + monetdb = MonetDB.from_config(dbconfig) + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) - Cleanup(db).execute() + Cleanup(sqlite_db, monetdb).execute() if not os.path.exists(file): print(f"The path {file} does not exist.") return @@ -150,13 +162,17 @@ def load_folder(file, copy_from_file, ip, port, username, password, db_name): data_model_metadata = reader.read() code = data_model_metadata["code"] version = data_model_metadata["version"] - AddDataModel(db).execute(data_model_metadata) + AddDataModel(sqlite_db=sqlite_db, monetdb=monetdb).execute(data_model_metadata) print(f"Data model '{code}' was successfully added.") for csv_path in glob.glob(subdir + "/*.csv"): print(f"CSV '{csv_path}' is being loaded...") - ValidateDataset(db).execute(csv_path, copy_from_file, code, version) - ImportCSV(db).execute(csv_path, copy_from_file, code, version) + ValidateDataset(sqlite_db=sqlite_db, monetdb=monetdb).execute( + csv_path, copy_from_file, code, version + ) + ImportCSV(sqlite_db=sqlite_db, monetdb=monetdb).execute( + csv_path, copy_from_file, code, version + ) print(f"CSV '{csv_path}' was successfully added.") @@ -189,12 +205,17 @@ def validate_folder(file): @entry.command() -@db_configs_options +@cl.option( + "--sqlite_db_path", + "sqlite_db_path", + required=True, + help="The path for the sqlite database", + cls=NotRequiredIf, +) @handle_errors -def init(ip, port, username, password, db_name): - dbconfig = get_db_config(ip, port, username, password, db_name) - db = MonetDB.from_config(dbconfig) - InitDB(db).execute() +def init(sqlite_db_path): + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) + InitDB(db=sqlite_db).execute() print("Database initialized") @@ -202,14 +223,15 @@ def init(ip, port, username, password, db_name): @cl.argument("file", required=True) @db_configs_options @handle_errors -def add_data_model(file, ip, port, username, password, db_name): +def add_data_model(file, ip, port, username, password, db_name, sqlite_db_path): print(f"Data model '{file}' is being loaded...") - dbconfig = get_db_config(ip, port, username, password, db_name) + dbconfig = get_monetdb_config(ip, port, username, password, db_name) reader = JsonFileReader(file) - db = MonetDB.from_config(dbconfig) + monetdb = MonetDB.from_config(dbconfig) + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) data_model_metadata = reader.read() ValidateDataModel().execute(data_model_metadata) - AddDataModel(db).execute(data_model_metadata) + AddDataModel(sqlite_db=sqlite_db, monetdb=monetdb).execute(data_model_metadata) print(f"Data model '{file}' was successfully added.") @@ -232,13 +254,27 @@ def add_data_model(file, ip, port, username, password, db_name): @db_configs_options @handle_errors def add_dataset( - csv_path, data_model, version, copy_from_file, ip, port, username, password, db_name + csv_path, + data_model, + version, + copy_from_file, + ip, + port, + username, + password, + db_name, + sqlite_db_path, ): print(f"CSV '{csv_path}' is being loaded...") - dbconfig = get_db_config(ip, port, username, password, db_name) - db = MonetDB.from_config(dbconfig) - ValidateDataset(db).execute(csv_path, copy_from_file, data_model, version) - ImportCSV(db).execute(csv_path, copy_from_file, data_model, version) + dbconfig = get_monetdb_config(ip, port, username, password, db_name) + monetdb = MonetDB.from_config(dbconfig) + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) + ValidateDataset(sqlite_db=sqlite_db, monetdb=monetdb).execute( + csv_path, copy_from_file, data_model, version + ) + ImportCSV(sqlite_db=sqlite_db, monetdb=monetdb).execute( + csv_path, copy_from_file, data_model, version + ) print(f"CSV '{csv_path}' was successfully added.") @@ -261,12 +297,25 @@ def add_dataset( @db_configs_options @handle_errors def validate_dataset( - csv_path, data_model, version, copy_from_file, ip, port, username, password, db_name + csv_path, + data_model, + version, + copy_from_file, + ip, + port, + username, + password, + db_name, + sqlite_db_path, ): print(f"Dataset '{csv_path}' is being validated...") - dbconfig = get_db_config(ip, port, username, password, db_name) - db = MonetDB.from_config(dbconfig) - ValidateDataset(db).execute(csv_path, copy_from_file, data_model, version) + dbconfig = get_monetdb_config(ip, port, username, password, db_name) + monetdb = MonetDB.from_config(dbconfig) + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) + + ValidateDataset(sqlite_db=sqlite_db, monetdb=monetdb).execute( + csv_path, copy_from_file, data_model, version + ) print(f"Dataset '{csv_path}' has a valid structure.") @@ -281,9 +330,13 @@ def validate_dataset( ) @db_configs_options @handle_errors -def delete_data_model(name, version, force, ip, port, username, password, db_name): - db = MonetDB.from_config(get_db_config(ip, port, username, password, db_name)) - DeleteDataModel(db).execute(name, version, force) +def delete_data_model( + name, version, force, ip, port, username, password, db_name, sqlite_db_path +): + dbconfig = get_monetdb_config(ip, port, username, password, db_name) + monetdb = MonetDB.from_config(dbconfig) + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) + DeleteDataModel(sqlite_db=sqlite_db, monetdb=monetdb).execute(name, version, force) print(f"Data model '{name}' was successfully removed.") @@ -298,31 +351,49 @@ def delete_data_model(name, version, force, ip, port, username, password, db_nam @cl.option("-v", "--version", required=True, help="The data model version") @db_configs_options @handle_errors -def delete_dataset(dataset, data_model, version, ip, port, username, password, db_name): - db = MonetDB.from_config(get_db_config(ip, port, username, password, db_name)) - DeleteDataset(db).execute(dataset, data_model, version) +def delete_dataset( + dataset, data_model, version, ip, port, username, password, db_name, sqlite_db_path +): + dbconfig = get_monetdb_config(ip, port, username, password, db_name) + monetdb = MonetDB.from_config(dbconfig) + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) + DeleteDataset(sqlite_db=sqlite_db, monetdb=monetdb).execute( + dataset, data_model, version + ) print(f"Dataset {dataset} was successfully removed.") @entry.command() @cl.argument("name", required=True) @cl.option("-v", "--version", required=True, help="The data model version") -@db_configs_options +@cl.option( + "--sqlite_db_path", + "sqlite_db_path", + required=True, + help="The path for the sqlite database", + cls=NotRequiredIf, +) @handle_errors -def enable_data_model(name, version, ip, port, username, password, db_name): - db = MonetDB.from_config(get_db_config(ip, port, username, password, db_name)) - EnableDataModel(db).execute(name, version) +def enable_data_model(name, version, sqlite_db_path): + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) + EnableDataModel(db=sqlite_db).execute(name, version) print(f"Data model {name} was successfully enabled.") @entry.command() @cl.argument("name", required=True) @cl.option("-v", "--version", required=True, help="The data model version") -@db_configs_options +@cl.option( + "--sqlite_db_path", + "sqlite_db_path", + required=True, + help="The path for the sqlite database", + cls=NotRequiredIf, +) @handle_errors -def disable_data_model(name, version, ip, port, username, password, db_name): - db = MonetDB.from_config(get_db_config(ip, port, username, password, db_name)) - DisableDataModel(db).execute(name, version) +def disable_data_model(name, version, sqlite_db_path): + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) + DisableDataModel(db=sqlite_db).execute(name, version) print(f"Data model {name} was successfully disabled.") @@ -335,11 +406,17 @@ def disable_data_model(name, version, ip, port, username, password, db_name): help="The data model to which the dataset is added", ) @cl.option("-v", "--version", required=True, help="The data model version") -@db_configs_options +@cl.option( + "--sqlite_db_path", + "sqlite_db_path", + required=True, + help="The path for the sqlite database", + cls=NotRequiredIf, +) @handle_errors -def enable_dataset(dataset, data_model, version, ip, port, username, password, db_name): - db = MonetDB.from_config(get_db_config(ip, port, username, password, db_name)) - EnableDataset(db).execute(dataset, data_model, version) +def enable_dataset(dataset, data_model, version, sqlite_db_path): + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) + EnableDataset(db=sqlite_db).execute(dataset, data_model, version) print(f"Dataset {dataset} was successfully enabled.") @@ -352,13 +429,17 @@ def enable_dataset(dataset, data_model, version, ip, port, username, password, d help="The data model to which the dataset is added", ) @cl.option("-v", "--version", required=True, help="The data model version") -@db_configs_options +@cl.option( + "--sqlite_db_path", + "sqlite_db_path", + required=True, + help="The path for the sqlite database", + cls=NotRequiredIf, +) @handle_errors -def disable_dataset( - dataset, data_model, version, ip, port, username, password, db_name -): - db = MonetDB.from_config(get_db_config(ip, port, username, password, db_name)) - DisableDataset(db).execute(dataset, data_model, version) +def disable_dataset(dataset, data_model, version, sqlite_db_path): + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) + DisableDataset(sqlite_db).execute(dataset, data_model, version) print(f"Dataset {dataset} was successfully disabled.") @@ -385,26 +466,32 @@ def disable_dataset( is_flag=True, help="Force overwrite on property", ) -@db_configs_options +@cl.option( + "--sqlite_db_path", + "sqlite_db_path", + required=True, + help="The path for the sqlite database", + cls=NotRequiredIf, +) @handle_errors -def tag_data_model( - name, version, tag, remove, force, ip, port, username, password, db_name -): - db = MonetDB.from_config(get_db_config(ip, port, username, password, db_name)) +def tag_data_model(name, version, tag, remove, force, sqlite_db_path): + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) if "=" in tag: key, value = tag.split("=") if remove: - RemovePropertyFromDataModel(db).execute(name, version, key, value) + RemovePropertyFromDataModel(db=sqlite_db).execute(name, version, key, value) print(f"Property was successfully removed from data model {name}.") else: - AddPropertyToDataModel(db).execute(name, version, key, value, force) + AddPropertyToDataModel(db=sqlite_db).execute( + name, version, key, value, force + ) print(f"Property was successfully added to data model {name}.") else: if remove: - UntagDataModel(db).execute(name, version, tag) + UntagDataModel(db=sqlite_db).execute(name, version, tag) print(f"Data model {name} was successfully untagged.") else: - TagDataModel(db).execute(name, version, tag) + TagDataModel(db=sqlite_db).execute(name, version, tag) print(f"Data model {name} was successfully tagged.") @@ -437,7 +524,13 @@ def tag_data_model( is_flag=True, help="Force overwrite on property", ) -@db_configs_options +@cl.option( + "--sqlite_db_path", + "sqlite_db_path", + required=True, + help="The path for the sqlite database", + cls=NotRequiredIf, +) @handle_errors def tag_dataset( dataset, @@ -446,45 +539,50 @@ def tag_dataset( tag, remove, force, - ip, - port, - username, - password, - db_name, + sqlite_db_path, ): - db = MonetDB.from_config(get_db_config(ip, port, username, password, db_name)) + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) + if "=" in tag: key, value = tag.split("=") if remove: - RemovePropertyFromDataset(db).execute( + RemovePropertyFromDataset(db=sqlite_db).execute( dataset, data_model, version, key, value ) print(f"Property was successfully removed from dataset {dataset}.") else: - AddPropertyToDataset(db).execute( + AddPropertyToDataset(db=sqlite_db).execute( dataset, data_model, version, key, value, force ) print(f"Property was successfully added to dataset {dataset}.") else: if remove: - UntagDataset(db).execute(dataset, data_model, version, tag) + UntagDataset(db=sqlite_db).execute(dataset, data_model, version, tag) print(f"Dataset {dataset} was successfully untagged.") else: - TagDataset(db).execute(dataset, data_model, version, tag) + TagDataset(db=sqlite_db).execute(dataset, data_model, version, tag) print(f"Dataset {dataset} was successfully tagged.") @entry.command() -@db_configs_options +@cl.option( + "--sqlite_db_path", + "sqlite_db_path", + required=True, + help="The path for the sqlite database", + cls=NotRequiredIf, +) @handle_errors -def list_data_models(ip, port, username, password, db_name): - db = MonetDB.from_config(get_db_config(ip, port, username, password, db_name)) - ListDataModels(db).execute() +def list_data_models(sqlite_db_path): + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) + ListDataModels(db=sqlite_db).execute() @entry.command() @db_configs_options @handle_errors -def list_datasets(ip, port, username, password, db_name): - db = MonetDB.from_config(get_db_config(ip, port, username, password, db_name)) - ListDatasets(db).execute() +def list_datasets(ip, port, username, password, db_name, sqlite_db_path): + dbconfig = get_monetdb_config(ip, port, username, password, db_name) + monetdb = MonetDB.from_config(dbconfig) + sqlite_db = SQLiteDB.from_config({"db_path": sqlite_db_path}) + ListDatasets(sqlite_db=sqlite_db, monetdb=monetdb).execute() diff --git a/mipdb/database.py b/mipdb/database.py deleted file mode 100644 index 341390c..0000000 --- a/mipdb/database.py +++ /dev/null @@ -1,648 +0,0 @@ -import ipaddress -from abc import abstractmethod, ABC -from contextlib import contextmanager -from typing import Union - -import sqlalchemy as sql -import toml -from pymonetdb.sql import monetize - -from mipdb.exceptions import DataBaseError - -METADATA_SCHEMA = "mipdb_metadata" -METADATA_TABLE = "variables_metadata" - -CONFIG = "/home/config.toml" - -class Status: - ENABLED = "ENABLED" - DISABLED = "DISABLED" - - -class Connection(ABC): - """Abstract class representing a database connection interface.""" - - @abstractmethod - def create_schema(self, schema_name): - pass - - @abstractmethod - def drop_schema(self, schema_name): - pass - - @abstractmethod - def get_data_model_status(self, data_model_id): - pass - - @abstractmethod - def update_data_model_status(self, status, data_model_id): - pass - - @abstractmethod - def get_dataset_status(self, dataset_id): - pass - - @abstractmethod - def get_metadata(self, schema): - pass - - @abstractmethod - def update_dataset_status(self, status, dataset_id): - pass - - @abstractmethod - def get_dataset(self, dataset_id, columns): - pass - - @abstractmethod - def get_data_model(self, data_model_id, columns): - pass - - @abstractmethod - def get_values(self, data_model_id, columns): - pass - - @abstractmethod - def get_data_models(self, columns): - pass - - @abstractmethod - def get_dataset_count_by_data_model_id(self): - pass - - @abstractmethod - def get_data_count_by_dataset(self, schema_fullname): - pass - - @abstractmethod - def get_row_count(self, table): - pass - - @abstractmethod - def drop_table(self, table): - pass - - @abstractmethod - def delete_table_values(self, table): - pass - - @abstractmethod - def get_dataset_properties(self, dataset_id): - pass - - @abstractmethod - def get_data_model_properties(self, data_model_id): - pass - - @abstractmethod - def set_data_model_properties(self, properties, data_model_id): - pass - - @abstractmethod - def set_dataset_properties(self, properties, dataset_id): - pass - - @abstractmethod - def get_schemas(self): - pass - - @abstractmethod - def get_data_model_id(self, code, version): - pass - - @abstractmethod - def create_table(self, table): - pass - - @abstractmethod - def grant_select_access_rights(self, table, user): - pass - - @abstractmethod - def table_exists(self, table): - pass - - @abstractmethod - def insert_values_to_table(self, values, table): - pass - - @abstractmethod - def get_dataset_id(self, code, data_model_id): - pass - - @abstractmethod - def execute(self, *args, **kwargs): - pass - - @abstractmethod - def get_current_user(self): - pass - - @abstractmethod - def copy_csv_in_table(self, offset, table_name): - pass - - -class DataBase(ABC): - """Abstract class representing a database interface.""" - - @abstractmethod - def create_schema(self, schema_name): - pass - - @abstractmethod - def drop_schema(self, schema_name): - pass - - @abstractmethod - def get_data_model_status(self, data_model_id): - pass - - @abstractmethod - def update_data_model_status(self, status, data_model_id): - pass - - @abstractmethod - def get_dataset_status(self, dataset_id): - pass - - @abstractmethod - def get_metadata(self, schema): - pass - - @abstractmethod - def update_dataset_status(self, status, dataset_id): - pass - - @abstractmethod - def get_row_count(self, table): - pass - - @abstractmethod - def drop_table(self, table): - pass - - @abstractmethod - def delete_table_values(self, table): - pass - - @abstractmethod - def get_dataset_properties(self, dataset_id): - pass - - @abstractmethod - def get_data_model_properties(self, data_model_id): - pass - - @abstractmethod - def set_data_model_properties(self, properties, data_model_id): - pass - - @abstractmethod - def set_dataset_properties(self, properties, dataset_id): - pass - - @abstractmethod - def get_dataset_id(self, code, data_model_id): - pass - - @abstractmethod - def get_schemas(self): - pass - - @abstractmethod - def get_dataset(self, dataset_id, columns): - pass - - @abstractmethod - def get_data_model(self, data_model_id, columns): - pass - - @abstractmethod - def get_values(self, data_model_id, columns): - pass - - @abstractmethod - def get_data_models(self, columns): - pass - - @abstractmethod - def get_dataset_count_by_data_model_id(self): - pass - - @abstractmethod - def get_data_count_by_dataset(self, schema_fullname): - pass - - @abstractmethod - def table_exists(self, table): - pass - - @abstractmethod - def create_table(self, table): - pass - - @abstractmethod - def grant_select_access_rights(self, table, user): - pass - - @abstractmethod - def insert_values_to_table(self, values, table): - pass - - @abstractmethod - def begin(self): - pass - - @abstractmethod - def execute(self, *args, **kwargs): - pass - - @abstractmethod - def get_current_user(self): - pass - - @abstractmethod - def get_executor(self): - pass - - @abstractmethod - def copy_csv_in_table(self, offset, table_name): - pass - - -def handle_errors(func): - """Decorator for any function susceptible to raise a DB related exception. - Wraps function with an exception handling contextmanager which catches DB - exceptions and reraises them using a DataBaseError instance.""" - - @contextmanager - def _handle_errors(): - try: - yield - except sql.exc.OperationalError as exc: - _, msg = exc.orig.args[0].split("!") - raise DataBaseError(msg) - except sql.exc.IntegrityError as exc: - _, msg = exc.orig.args[0].split("!") - raise DataBaseError(msg) - - def wrapper(*args, **kwargs): - with _handle_errors(): - return func(*args, **kwargs) - - return wrapper - - -class DBExecutorMixin(ABC): - """Since SQLAlchemy's Engine and Connection object interfaces have a - significant overlap, we can avoid code duplication by defining the current - mixin abstract class. Subclasses are required to implement the execute - method. In practice this is done by delegating to the execute method of - either an Engine or a Connection. - - Subclasses are required to have an _executor attribute of type Engine or - Connection. - - Remark: creating tables using the execute method doesn't seem to work - because tables need to be already bound to a connectable, which in our case - they aren't. Hence, a small hack is needed to implement create_table.""" - - _executor: Union[sql.engine.Engine, sql.engine.Connection] - - @abstractmethod - def execute(self, *args, **kwargs) -> list: - pass - - def create_schema(self, schema_name): - self.execute(sql.schema.CreateSchema(schema_name)) - - def drop_schema(self, schema_name): - self.execute(f'DROP SCHEMA "{schema_name}" CASCADE') - - def get_data_model_status(self, data_model_id): - select = sql.text( - f"SELECT status FROM {METADATA_SCHEMA}.data_models " - "WHERE data_model_id = :data_model_id " - ) - (status, *_), *_ = self.execute(select, data_model_id=data_model_id) - return status - - def update_data_model_status(self, status, data_model_id): - update = sql.text( - f"UPDATE {METADATA_SCHEMA}.data_models " - "SET status = :status " - "WHERE data_model_id = :data_model_id " - "AND status <> :status" - ) - self.execute(update, status=status, data_model_id=data_model_id) - - def get_dataset_status(self, dataset_id): - select = sql.text( - f"SELECT status FROM {METADATA_SCHEMA}.datasets " - "WHERE dataset_id = :dataset_id " - ) - (status, *_), *_ = self.execute(select, dataset_id=dataset_id) - return status - - def get_metadata(self, schema): - select = sql.text( - "SELECT code, json.filter(metadata, '$') " - f'FROM "{schema.name}".{METADATA_TABLE}' - ) - res = self.execute(select) - return {code: metadata for code, metadata in res} - - def update_dataset_status(self, status, dataset_id): - update = sql.text( - f"UPDATE {METADATA_SCHEMA}.datasets " - "SET status = :status " - "WHERE dataset_id = :dataset_id " - "AND status <> :status" - ) - self.execute(update, status=status, dataset_id=dataset_id) - - @handle_errors - def get_data_model_id(self, code, version): - # I am forced to use textual SQL instead of SQLAlchemy objects because - # of two bugs. The first one is in sqlalchemy_monetdb which translates - # the 'not equal' operator as != instead of the correct <>. The second - # bug is in Monet DB where column names of level >= 3 are not yet - # implemented. - select = sql.text( - "SELECT data_model_id " - f"FROM {METADATA_SCHEMA}.data_models " - "WHERE code = :code " - "AND version = :version " - ) - res = list(self.execute(select, code=code, version=version)) - if len(res) > 1: - raise DataBaseError( - f"Got more than one data_model ids for {code=} and {version=}." - ) - if len(res) == 0: - raise DataBaseError( - f"Data_models table doesn't have a record with {code=}, {version=}" - ) - return res[0][0] - - @handle_errors - def get_dataset_id(self, code, data_model_id): - select = sql.text( - "SELECT dataset_id " - f"FROM {METADATA_SCHEMA}.datasets " - "WHERE code = :code " - "AND data_model_id = :data_model_id " - ) - res = list(self.execute(select, code=code, data_model_id=data_model_id)) - if len(res) > 1: - raise DataBaseError( - f"Got more than one dataset ids for {code=} and {data_model_id=}." - ) - if len(res) == 0: - raise DataBaseError( - f"Datasets table doesn't have a record with {code=}, {data_model_id=}" - ) - return res[0][0] - - def get_schemas(self): - res = self.execute("SELECT name FROM sys.schemas WHERE system=FALSE") - return [schema for schema, *_ in res] - - def get_dataset_count_by_data_model_id(self): - res = self.execute( - f""" - SELECT data_model_id, COUNT(data_model_id) as count - FROM {METADATA_SCHEMA}.datasets - GROUP BY data_model_id - """ - ) - return list(res) - - def get_data_count_by_dataset(self, schema_fullname): - res = self.execute( - f""" - SELECT dataset, COUNT(dataset) as count - FROM "{schema_fullname}"."primary_data" - GROUP BY dataset - """ - ) - return list(res) - - def get_row_count(self, table): - res = self.execute(f"select COUNT(*) from {table}").fetchone() - return res[0] - - def get_column_distinct(self, column, table): - datasets = list(self.execute( - f"SELECT DISTINCT({column}) FROM {table};" - )) - datasets = [dataset[0] for dataset in datasets] - return datasets - - def get_dataset(self, dataset_id, columns): - columns_query = ", ".join(columns) if columns else "*" - - dataset = self.execute( - f""" - SELECT {columns_query} - FROM {METADATA_SCHEMA}.datasets - WHERE dataset_id = {dataset_id} - LIMIT 1 - """ - ).fetchone() - - return dataset - - def get_data_model(self, data_model_id, columns): - columns_query = ", ".join(columns) if columns else "*" - data_model = self.execute( - f""" - SELECT {columns_query} - FROM {METADATA_SCHEMA}.data_models - WHERE data_model_id = {data_model_id} - LIMIT 1 - """ - ).fetchone() - return data_model - - def get_data_models(self, columns=None): - columns_query = ", ".join(columns) if columns else "*" - data_models = self.execute( - f""" - SELECT {columns_query} - FROM {METADATA_SCHEMA}.data_models - """ - ) - - return list(data_models) - - def get_values(self, data_model_id=None, columns=None): - columns_query = ", ".join(columns) if columns else "*" - data_model_id_clause = ( - f"WHERE data_model_id={data_model_id}" if data_model_id else "" - ) - datasets = self.execute( - f""" - SELECT {columns_query} - FROM {METADATA_SCHEMA}.datasets {data_model_id_clause} - """ - ) - return list(datasets) - - def table_exists(self, table): - return table.exists(bind=self._executor) - - @handle_errors - def create_table(self, table): - table.create(bind=self._executor) - - @handle_errors - def grant_select_access_rights(self, table, user): - fullname = ( - f'"{table.schema}"."{table.name}"' if table.schema else f'"{table.name}"' - ) - query = f"GRANT SELECT ON TABLE {fullname} TO {user} WITH GRANT OPTION;" - self.execute(query) - - def get_dataset_properties(self, dataset_id): - (properties, *_), *_ = self.execute( - f"SELECT properties FROM {METADATA_SCHEMA}.datasets WHERE dataset_id = {dataset_id}" - ) - return properties - - def get_data_model_properties(self, data_model_id): - (properties, *_), *_ = self.execute( - f"SELECT properties FROM {METADATA_SCHEMA}.data_models WHERE data_model_id = {data_model_id}" - ) - return properties - - def set_data_model_properties(self, properties, data_model_id): - properties_monetized = monetize.convert(properties) - query = f"""UPDATE {METADATA_SCHEMA}.data_models SET properties = {properties_monetized} - WHERE data_model_id = {data_model_id}""" - self.execute(query) - - def set_dataset_properties(self, properties, dataset_id): - properties_monetized = monetize.convert(properties) - query = f"""UPDATE {METADATA_SCHEMA}.datasets SET properties = {properties_monetized} - WHERE dataset_id = {dataset_id}""" - self.execute(query) - - def copy_csv_in_table(self, file_location, records, offset, table_name): - records_query = "" - if records: - records_query = f"{records} RECORDS" - - copy_into_query = f""" - COPY {records_query} OFFSET {offset} INTO {table_name} - FROM '{file_location}' - USING DELIMITERS ',', E'\n', '\"' - NULL AS ''; - """ - self.execute(copy_into_query) - - def copy_data_table_to_another_table(self, copy_into_table, copy_from_table): - # row_id is autoincrement, so we do not need to provide values. - table_column_names_without_row_id = [ - column.name - for column in list(copy_into_table.table.columns) - if column.name != "row_id" - ] - csv_column_names = [ - column.name for column in list(copy_from_table.table.columns) - ] - select_query_columns = [ - f'"{column}"' if column in csv_column_names else "NULL" - for column in table_column_names_without_row_id - ] - self.execute( - f""" - INSERT INTO "{copy_into_table.table.schema}".{copy_into_table.table.name} ({','.join([f'"{column}"' for column in table_column_names_without_row_id])}) - SELECT {', '.join(select_query_columns)} - FROM {copy_from_table.table.name}; - """ - ) - - @handle_errors - def drop_table(self, table): - table.drop(bind=self._executor) - - def delete_table_values(self, table): - self.execute(table.delete()) - - def insert_values_to_table(self, table, values): - self.execute(table.insert(), values) - - def get_executor(self): - return self._executor - - def get_current_user(self): - (user, *_), *_ = self.execute("SELECT CURRENT_USER") - return user - - -class MonetDBConnection(DBExecutorMixin, Connection): - """Concrete connection object returned by MonetDB's begin contextmanager. - This object offers the same interface as MonetDB, with respect to query - execution, and is used to execute multiple queries within transaction - boundaries. Gets all its query executing methods from DBExecutorMixin.""" - - def __init__(self, conn: sql.engine.Connection) -> None: - self._executor = conn - - @handle_errors - def execute(self, query, *args, **kwargs) -> list: - """Wrapper around SQLAlchemy's execute. Required because pymonetdb - returns None when the result is empty, instead of [] which make more - sense and agrees with sqlite behaviour.""" - return self._executor.execute(query, *args, **kwargs) or [] - - -def credentials_from_config(): - try: - return toml.load(CONFIG) - except FileNotFoundError: - return { - "DB_IP":"", - "DB_PORT": "", - "MONETDB_ADMIN_USERNAME":"", - "MONETDB_LOCAL_USERNAME":"", - "MONETDB_LOCAL_PASSWORD":"", - "MONETDB_PUBLIC_USERNAME":"", - "MONETDB_PUBLIC_PASSWORD":"", - "DB_NAME": "", - } - - -class MonetDB(DBExecutorMixin, DataBase): - """Concrete DataBase object connecting to a MonetDB instance. Gets all its - query executing methods from DBExecutorMixin.""" - - def __init__(self, url: str, echo=False) -> None: - self._executor = sql.create_engine(url, echo=echo) - - @classmethod - def from_config(self, dbconfig) -> "MonetDB": - username = dbconfig["username"] - password = dbconfig["password"] - ip = dbconfig["ip"] - port = dbconfig["port"] - dbfarm = dbconfig["dbfarm"] - - url = f"monetdb://{username}:{password}@{ip}:{port}/{dbfarm}" - return MonetDB(url) - - @handle_errors - def execute(self, query, *args, **kwargs) -> list: - """Wrapper around SQLAlchemy's execute. Required because pymonetdb - returns None when the result is empty, instead of [] which make more - sense and agrees with sqlite behaviour.""" - return self._executor.execute(query, *args, **kwargs) or [] - - @contextmanager - def begin(self) -> MonetDBConnection: - """Context manager returning a connection object. Used to execute - multiple queries within transaction boundaries.""" - with self._executor.begin() as conn: - yield MonetDBConnection(conn) diff --git a/mipdb/databases/__init__.py b/mipdb/databases/__init__.py new file mode 100644 index 0000000..233e890 --- /dev/null +++ b/mipdb/databases/__init__.py @@ -0,0 +1,22 @@ +import os + +import toml + +CONFIG_PATH = "/home/config.toml" + + +def credentials_from_config(): + try: + return toml.load(os.getenv("CONFIG_PATH", CONFIG_PATH)) + except FileNotFoundError: + return { + "DB_IP": "", + "DB_PORT": "", + "MONETDB_ADMIN_USERNAME": "", + "MONETDB_LOCAL_USERNAME": "", + "MONETDB_LOCAL_PASSWORD": "", + "MONETDB_PUBLIC_USERNAME": "", + "MONETDB_PUBLIC_PASSWORD": "", + "DB_NAME": "", + "SQLITE_DB_PATH": "", + } diff --git a/mipdb/databases/monetdb.py b/mipdb/databases/monetdb.py new file mode 100644 index 0000000..4113ca7 --- /dev/null +++ b/mipdb/databases/monetdb.py @@ -0,0 +1,154 @@ +from contextlib import contextmanager +from typing import Union + +import sqlalchemy as sql + +from mipdb.exceptions import DataBaseError + +PRIMARYDATA_TABLE = "primary_data" + + +def handle_errors(func): + """Decorator to handle DB exceptions and raise DataBaseError.""" + + @contextmanager + def _handle_errors(): + try: + yield + except (sql.exc.OperationalError, sql.exc.IntegrityError) as exc: + _, msg = exc.orig.args[0].split("!") + raise DataBaseError(msg) + + def wrapper(*args, **kwargs): + with _handle_errors(): + return func(*args, **kwargs) + + return wrapper + + +class DBExecutor: + """Class to handle SQL execution using SQLAlchemy's Engine and Connection.""" + + def __init__(self, executor: Union[sql.engine.Engine, sql.engine.Connection]): + self._executor = executor + + @handle_errors + def execute(self, query, *args, **kwargs) -> list: + return self._executor.execute(query, *args, **kwargs) or [] + + def create_schema(self, schema_name): + self.execute(sql.schema.CreateSchema(schema_name)) + + def drop_schema(self, schema_name): + self.execute(f'DROP SCHEMA "{schema_name}" CASCADE') + + def get_schemas(self): + res = self.execute("SELECT name FROM sys.schemas WHERE system=FALSE") + return [schema for schema, *_ in res] + + def get_data_count_by_dataset(self, schema_fullname): + res = self.execute( + f""" + SELECT dataset, COUNT(dataset) as count + FROM "{schema_fullname}"."primary_data" + GROUP BY dataset + """ + ) + return list(res) + + def get_row_count(self, table): + res = self.execute(f"SELECT COUNT(*) FROM {table}").fetchone() + return res[0] + + def get_column_distinct(self, column, table): + datasets = self.execute(f"SELECT DISTINCT({column}) FROM {table};") + return [dataset[0] for dataset in datasets] + + def table_exists(self, table): + return table.exists(bind=self._executor) + + @handle_errors + def create_table(self, table): + table.create(bind=self._executor) + + @handle_errors + def grant_select_access_rights(self, table, user): + fullname = ( + f'"{table.schema}"."{table.name}"' if table.schema else f'"{table.name}"' + ) + self.execute(f"GRANT SELECT ON TABLE {fullname} TO {user} WITH GRANT OPTION;") + + def copy_csv_in_table(self, file_location, records, offset, table_name): + records_query = f"{records} RECORDS" if records else "" + self.execute( + f""" + COPY {records_query} OFFSET {offset} INTO {table_name} + FROM '{file_location}' + USING DELIMITERS ',', E'\n', '\"' + NULL AS ''; + """ + ) + + def copy_data_table_to_another_table(self, copy_into_table, copy_from_table): + table_columns = [ + col.name for col in copy_into_table.table.columns if col.name != "row_id" + ] + csv_columns = [col.name for col in copy_from_table.table.columns] + select_columns = [ + f'"{col}"' if col in csv_columns else "NULL" for col in table_columns + ] + self.execute( + f""" + INSERT INTO "{copy_into_table.table.schema}".{copy_into_table.table.name} ({','.join(f'"{col}"' for col in table_columns)}) + SELECT {', '.join(select_columns)} + FROM {copy_from_table.table.name}; + """ + ) + + @handle_errors + def drop_table(self, table): + table.drop(bind=self._executor) + + def delete_table_values(self, table): + self.execute(table.delete()) + + def insert_values_to_table(self, table, values): + self.execute(table.insert(), values) + + def get_current_user(self): + (user, *_), *_ = self.execute("SELECT CURRENT_USER") + return user + + def get_executor(self): + return self._executor + + +class MonetDBConnection(DBExecutor): + """Concrete connection object for MonetDB within transaction boundaries.""" + + def __init__(self, conn: sql.engine.Connection) -> None: + super().__init__(conn) + + +class MonetDB(DBExecutor): + """Concrete DataBase object for connecting to a MonetDB instance.""" + + def __init__(self, url: str, echo=False) -> None: + super().__init__(sql.create_engine(url, echo=echo)) + + @classmethod + def from_config(cls, dbconfig) -> "MonetDB": + username, password, ip, port, dbfarm = ( + dbconfig["username"], + dbconfig["password"], + dbconfig["ip"], + dbconfig["port"], + dbconfig["dbfarm"], + ) + url = f"monetdb://{username}:{password}@{ip}:{port}/{dbfarm}" + return cls(url) + + @contextmanager + def begin(self) -> MonetDBConnection: + with self._executor.begin() as conn: + yield MonetDBConnection(conn) diff --git a/mipdb/databases/monetdb_tables.py b/mipdb/databases/monetdb_tables.py new file mode 100644 index 0000000..dff0b2f --- /dev/null +++ b/mipdb/databases/monetdb_tables.py @@ -0,0 +1,229 @@ +from abc import ABC, abstractmethod +import json +from enum import Enum +from typing import List + +import sqlalchemy as sql +from sqlalchemy import MetaData + +from mipdb.data_frame import DATASET_COLUMN_NAME +from mipdb.databases import credentials_from_config +from mipdb.dataelements import CommonDataElement +from mipdb.exceptions import UserInputError +from mipdb.schema import Schema + +RECORDS_PER_COPY = 100000 + + +class User(Enum): + credentials = credentials_from_config() + executor = ( + credentials["MONETDB_LOCAL_USERNAME"] + if credentials["MONETDB_LOCAL_USERNAME"] + else "executor" + ) + admin = ( + credentials["MONETDB_ADMIN_USERNAME"] + if credentials["MONETDB_ADMIN_USERNAME"] + else "admin" + ) + guest = ( + credentials["MONETDB_PUBLIC_USERNAME"] + if credentials["MONETDB_PUBLIC_USERNAME"] + else "guest" + ) + + +class SQLTYPES: + INTEGER = sql.Integer + STRING = sql.String(255) + FLOAT = sql.Float + + +STR2SQLTYPE = {"int": SQLTYPES.INTEGER, "text": SQLTYPES.STRING, "real": SQLTYPES.FLOAT} +TEMPORARY_TABLE_NAME = "temp" + + +class Table(ABC): + _table: sql.Table + + @abstractmethod + def __init__(self, schema): + pass + + @property + def table(self): + return self._table + + def create(self, db): + db.create_table(self._table) + db.grant_select_access_rights(self._table, User.executor.value) + + def exists(self, db): + return db.table_exists(self._table) + + def insert_values(self, values, db): + db.insert_values_to_table(self._table, values) + + def delete(self, db): + db.delete_table_values(self._table) + + def get_row_count(self, db): + return db.get_row_count(self.table.fullname) + + def get_column_distinct(self, column, db): + return db.get_column_distinct(column, self.table.fullname) + + def drop(self, db): + db.drop_table(self._table) + + +metadata = MetaData() + + +class PrimaryDataTable(Table): + def __init__(self): + self._table = None + + def set_table(self, table): + self._table = table + + @classmethod + def from_cdes( + cls, schema: Schema, cdes: List[CommonDataElement] + ) -> "PrimaryDataTable": + columns = [ + sql.Column( + cde.code, STR2SQLTYPE[json.loads(cde.metadata)["sql_type"]], quote=True + ) + for cde in cdes + ] + columns.insert( + 0, + sql.Column( + "row_id", + SQLTYPES.INTEGER, + primary_key=True, + quote=True, + ), + ) + table = sql.Table( + "primary_data", + schema.schema, + *columns, + ) + new_table = cls() + new_table.set_table(table) + return new_table + + def get_data_count_by_dataset(self, data_model_fullname, db): + return db.get_data_count_by_dataset(data_model_fullname) + + @classmethod + def from_db(cls, schema: Schema, db) -> "PrimaryDataTable": + table = sql.Table( + "primary_data", schema.schema, autoload_with=db.get_executor() + ) + new_table = cls() + table.columns = [ + sql.Column(column.name, quote=True) for column in list(table.columns) + ] + new_table.set_table(table) + return new_table + + def remove_dataset(self, dataset_name, schema_full_name, db): + delete = sql.text( + f'DELETE FROM "{schema_full_name}".primary_data ' + "WHERE dataset = :dataset_name " + ) + db.execute(delete, dataset_name=dataset_name) + + +class TemporaryTable(Table): + def __init__(self, dataframe_sql_type_per_column, db): + columns = [ + sql.Column(name, STR2SQLTYPE[sql_type], quote=True) + for name, sql_type in dataframe_sql_type_per_column.items() + ] + + self._table = sql.Table( + TEMPORARY_TABLE_NAME, + MetaData(bind=db.get_executor()), + *columns, + prefixes=["TEMPORARY"], + ) + + def validate_csv(self, csv_path, cdes_with_min_max, cdes_with_enumerations, db): + validated_datasets = [] + offset = 2 + + while True: + self.load_csv( + csv_path=csv_path, offset=offset, records=RECORDS_PER_COPY, db=db + ) + offset += RECORDS_PER_COPY + + table_count = self.get_row_count(db=db) + if not table_count: + break + + validated_datasets = set(validated_datasets) | set( + self.get_column_distinct(DATASET_COLUMN_NAME, db) + ) + self._validate_enumerations_restriction(cdes_with_enumerations, db) + self._validate_min_max_restriction(cdes_with_min_max, db) + self.delete(db) + + # If the temp contains fewer rows than RECORDS_PER_COPY + # that means we have read all the records in the csv and we need to stop the iteration. + if table_count < RECORDS_PER_COPY: + break + + return validated_datasets + + def _validate_min_max_restriction(self, cdes_with_min_max, db): + for cde, min_max in cdes_with_min_max.items(): + min_value, max_value = min_max + cde_invalid_values = db.execute( + f"SELECT \"{cde}\" FROM {self.table.fullname} WHERE \"{cde}\" NOT BETWEEN '{min_value}' AND '{max_value}' " + ).fetchone() + if cde_invalid_values: + raise Exception( + f"In the column: '{cde}' the following values are invalid: '{cde_invalid_values}'" + ) + + def load_csv( + self, + csv_path, + db, + records=None, + offset=2, + ): + self._validate_csv_contains_eof(csv_path=csv_path) + db.copy_csv_in_table( + file_location=csv_path, + records=records, + offset=offset, + table_name=self.table.name, + ) + + def _validate_csv_contains_eof(self, csv_path): + with open(csv_path, "rb") as f: + last_line = f.readlines()[-1] + if not last_line.endswith(b"\n"): + raise UserInputError( + f"CSV:'{csv_path}' does not end with a valid EOF delimiter." + ) + + def _validate_enumerations_restriction(self, cdes_with_enumerations, db): + for cde, enumerations in cdes_with_enumerations.items(): + cde_invalid_values = db.execute( + f'SELECT "{cde}" from {self.table.fullname} where "{cde}" not in ({str(enumerations)[1:-1]})' + ).fetchone() + if cde_invalid_values: + raise Exception( + f"In the column: '{cde}' the following values are invalid: '{cde_invalid_values}'" + ) + + def set_table(self, table): + self._table = table diff --git a/mipdb/databases/sqlite.py b/mipdb/databases/sqlite.py new file mode 100644 index 0000000..0bed12f --- /dev/null +++ b/mipdb/databases/sqlite.py @@ -0,0 +1,367 @@ +from typing import List, Any, Dict +from enum import Enum + +import sqlalchemy as sql +from sqlalchemy import MetaData, ForeignKey, inspect +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm.exc import MultipleResultsFound + +from mipdb.exceptions import DataBaseError + +METADATA_TABLE = "variables_metadata" +PRIMARYDATA_TABLE = "primary_data" +metadata = MetaData() +Base = declarative_base(metadata=metadata) + + +class Status: + ENABLED = "ENABLED" + DISABLED = "DISABLED" + + +class DBType(Enum): + monetdb = "monetdb" + sqlite = "sqlite" + + +class SQLTYPES: + INTEGER = sql.Integer + STRING = sql.String(255) + FLOAT = sql.Float + JSON = sql.types.JSON + + +def handle_errors(func): + """Decorator for any function susceptible to raise a DB related exception.""" + + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as exc: # Use generic Exception to capture all errors + raise DataBaseError(f"Database error: {exc}") + + return wrapper + + +class DataModel(Base): + __tablename__ = "data_models" + data_model_id = sql.Column(sql.Integer, primary_key=True, autoincrement=True) + code = sql.Column(sql.String, nullable=False) + version = sql.Column(sql.String, nullable=False) + label = sql.Column(sql.String, nullable=False) + status = sql.Column(sql.String, nullable=False) + properties = sql.Column(sql.JSON, nullable=True) + + +class Dataset(Base): + __tablename__ = "datasets" + dataset_id = sql.Column(sql.Integer, primary_key=True, autoincrement=True) + data_model_id = sql.Column( + sql.Integer, ForeignKey("data_models.data_model_id"), nullable=False + ) + code = sql.Column(sql.String, nullable=False) + label = sql.Column(sql.String, nullable=False) + status = sql.Column(sql.String, nullable=False) + csv_path = sql.Column(sql.String, nullable=False) + properties = sql.Column(sql.JSON, nullable=True) + + +class SQLiteDB: + """Class representing a SQLite database interface.""" + + def __init__(self, url: str, echo=False) -> None: + self._executor = sql.create_engine(url, echo=echo) + self.Session = sessionmaker(bind=self._executor, autocommit=True) + + @classmethod + def from_config(cls, dbconfig: dict) -> "SQLiteDB": + db_path = dbconfig["db_path"] + url = f"sqlite:///{db_path}" + return SQLiteDB(url) + + @handle_errors + def execute(self, query: str, *args, **kwargs) -> None: + with self._executor.connect() as conn: + conn = conn.execution_options(autocommit=True) + conn.execute(sql.text(query), *args, **kwargs) + + @handle_errors + def execute_fetchall(self, query: str, *args, **kwargs) -> List[dict]: + with self._executor.connect() as conn: + result = conn.execute(sql.text(query), *args, **kwargs) + return result.fetchall() if result else [] + + def insert_values_to_table(self, table: sql.Table, values: List[dict]) -> None: + session = self.Session() + try: + session.execute(table.insert(), values) + finally: + session.close() + + def get_data_model_status(self, data_model_id: int) -> Any: + session = self.Session() + try: + query = session.query(DataModel.status).filter( + DataModel.data_model_id == data_model_id + ) + result = query.one_or_none() + finally: + session.close() + if result: + return result[0] + return None + + def update_data_model_status(self, status: str, data_model_id: int) -> None: + session = self.Session() + try: + session.query(DataModel).filter( + DataModel.data_model_id == data_model_id + ).update({"status": status}) + + finally: + session.close() + + def get_dataset_status(self, dataset_id: int) -> Any: + session = self.Session() + try: + query = session.query(Dataset.status).filter( + Dataset.dataset_id == dataset_id + ) + result = query.one_or_none() + finally: + session.close() + if result: + return result[0] + return None + + def get_metadata(self, data_model: str) -> dict: + session = self.Session() + try: + table = sql.Table( + f"{data_model}_{METADATA_TABLE}", + Base.metadata, + sql.Column("code", SQLTYPES.STRING, primary_key=True), + sql.Column("metadata", SQLTYPES.JSON), + extend_existing=True, + ) + query = session.query(table.c.code, table.c.metadata) + res = query.all() + finally: + session.close() + return {row.code: row.metadata for row in res} + + def update_dataset_status(self, status: str, dataset_id: int) -> None: + session = self.Session() + try: + session.query(Dataset).filter(Dataset.dataset_id == dataset_id).update( + {"status": status} + ) + + finally: + session.close() + + def get_dataset(self, dataset_id: int, columns: List[str]) -> Any: + session = self.Session() + try: + query = session.query(*[getattr(Dataset, col) for col in columns]).filter( + Dataset.dataset_id == dataset_id + ) + result = query.one_or_none() + finally: + session.close() + return result + + def get_data_model(self, data_model_id: int, columns: List[str]) -> Any: + session = self.Session() + try: + query = session.query(*[getattr(DataModel, col) for col in columns]).filter( + DataModel.data_model_id == data_model_id + ) + result = query.one_or_none() + finally: + session.close() + return result + + def get_values( + self, table, columns: List[str] = None, where_conditions: Dict[str, Any] = None + ) -> List[dict]: + session = self.Session() + try: + if columns is None: + columns = [ + col.name for col in table.columns + ] # Get all columns if none are specified + query = session.query(*[getattr(table.c, col) for col in columns]) + + if where_conditions: + for col, value in where_conditions.items(): + query = query.filter(getattr(table.c, col) == value) + + result = query.all() + finally: + session.close() + return result + + def get_data_models(self, columns: List[str]) -> List[dict]: + session = self.Session() + try: + query = session.query(*[getattr(DataModel, col) for col in columns]) + result = query.all() + finally: + session.close() + return result + + def get_dataset_count_by_data_model_id(self) -> List[dict]: + session = self.Session() + try: + query = session.query( + Dataset.data_model_id, + sql.func.count(Dataset.data_model_id).label("count"), + ).group_by(Dataset.data_model_id) + result = query.all() + finally: + session.close() + return result + + def get_row_count(self, table: str) -> int: + session = self.Session() + try: + query = session.query(sql.func.count()).select_from(sql.text(table)) + result = query.scalar() + finally: + session.close() + return result + + def drop_table(self, table: str) -> None: + session = self.Session() + try: + table = sql.Table(table, metadata, autoload_with=self._executor) + table.drop(bind=self._executor) + + finally: + session.close() + + def delete_from(self, table, where_conditions: Dict[str, Any]) -> None: + session = self.Session() + try: + query = session.query(table) + if where_conditions: + for col, value in where_conditions.items(): + query = query.filter(getattr(table.c, col) == value) + + query.delete(synchronize_session=False) + finally: + session.close() + + def get_dataset_properties(self, dataset_id: int) -> Any: + session = self.Session() + try: + query = session.query(Dataset.properties).filter( + Dataset.dataset_id == dataset_id + ) + result = query.one_or_none() + finally: + session.close() + + return result[0] if result else {} + + def get_data_model_properties(self, data_model_id: int) -> Any: + session = self.Session() + try: + query = session.query(DataModel.properties).filter( + DataModel.data_model_id == data_model_id + ) + result = query.one_or_none() + finally: + session.close() + + return result[0] if result else {} + + def set_data_model_properties(self, properties: dict, data_model_id: int) -> None: + session = self.Session() + try: + session.query(DataModel).filter( + DataModel.data_model_id == data_model_id + ).update({"properties": properties}) + + finally: + session.close() + + def set_dataset_properties(self, properties: dict, dataset_id: int) -> None: + session = self.Session() + try: + session.query(Dataset).filter(Dataset.dataset_id == dataset_id).update( + {"properties": properties} + ) + + finally: + session.close() + + def get_data_model_id(self, code: str, version: str) -> int: + session = self.Session() + try: + query = session.query(DataModel.data_model_id).filter( + DataModel.code == code, DataModel.version == version + ) + data_model_id = query.scalar() + except MultipleResultsFound: + raise DataBaseError( + f"Got more than one data_model ids for {code=} and {version=}." + ) + finally: + session.close() + + if not data_model_id: + raise DataBaseError( + f"Data_models table doesn't have a record with {code=}, {version=}" + ) + + return data_model_id + + def get_max_data_model_id(self) -> int: + session = self.Session() + try: + result = session.query(sql.func.max(DataModel.data_model_id)).scalar() + finally: + session.close() + return result + + def get_max_dataset_id(self) -> int: + session = self.Session() + try: + result = session.query(sql.func.max(Dataset.dataset_id)).scalar() + finally: + session.close() + return result + + def get_dataset_id(self, code, data_model_id) -> int: + session = self.Session() + try: + query = session.query(Dataset.dataset_id).filter( + Dataset.code == code, Dataset.data_model_id == data_model_id + ) + dataset_id = query.scalar() + except MultipleResultsFound: + raise DataBaseError( + f"Got more than one dataset ids for {code=} and {data_model_id=}." + ) + finally: + session.close() + + if not dataset_id: + raise DataBaseError( + f"Datasets table doesn't have a record with {code=}, {data_model_id=}" + ) + + return dataset_id + + def table_exists(self, table) -> bool: + return table.exists(bind=self._executor) + + def create_table(self, table: sql.Table) -> None: + table.create(bind=self._executor) + + def get_all_tables(self) -> List[str]: + inspector = inspect(self._executor) + return inspector.get_table_names() diff --git a/mipdb/databases/sqlite_tables.py b/mipdb/databases/sqlite_tables.py new file mode 100644 index 0000000..8aa51bb --- /dev/null +++ b/mipdb/databases/sqlite_tables.py @@ -0,0 +1,194 @@ +from abc import ABC, abstractmethod + +import sqlalchemy as sql +from sqlalchemy import Integer, String, JSON +from sqlalchemy.ext.declarative import declarative_base +from mipdb.dataelements import CommonDataElement +from mipdb.exceptions import DataBaseError +from mipdb.databases.sqlite import DataModel, Dataset + +METADATA_TABLE = "variables_metadata" +PRIMARYDATA_TABLE = "primary_data" + +Base = declarative_base() + + +class Status: + ENABLED = "ENABLED" + DISABLED = "DISABLED" + + +class SQLTYPES: + INTEGER = Integer + STRING = String(255) + FLOAT = sql.Float + JSON = JSON + + +def get_metadata_table_name(data_model): + return f"{data_model}_{METADATA_TABLE}" + + +def handle_errors(func): + """Decorator for any function susceptible to raise a DB related exception.""" + + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as exc: # Use generic Exception to capture all errors + raise DataBaseError(f"Database error: {exc}") + + return wrapper + + +class Table(ABC): + _table: Base + + @abstractmethod + def __init__(self): + pass + + @property + def table(self): + return self._table + + def create(self, db): + db.create_table(self._table) + + def exists(self, db): + return db.table_exists(self._table) + + def insert_values(self, values, db): + db.insert_values_to_table(self._table, values) + + def delete(self, db): + db.delete_from(self._table, where_conditions={}) + + def get_row_count(self, db): + return db.get_row_count(self.table.fullname) + + def get_column_distinct(self, column, db): + return db.get_column_distinct(column, self.table.fullname) + + def drop(self, db): + db.drop_table(self._table) + + +class DataModelTable(Table): + def __init__(self): + self._table = DataModel.__table__ + + def get_data_models(self, db, columns: list = None): + return db.get_values(table=self._table, columns=columns) + + def get_data_model(self, data_model_id, db, columns: list = None): + return db.get_data_models_values( + columns=columns, where_conditions={"data_model_id": data_model_id} + ) + + def get_dataset_count_by_data_model_id(self, db): + return db.get_dataset_count_by_data_model_id() + + def get_data_model_id(self, code, version, db): + return db.get_data_model_id(code, version) + + def get_data_model_properties(self, data_model_id, db): + return db.get_data_model_properties(data_model_id) + + def set_data_model_properties(self, properties, data_model_id, db): + db.set_data_model_properties(properties, data_model_id) + + def get_data_model_status(self, data_model_id, db): + return db.get_data_model_status(data_model_id) + + def set_data_model_status(self, status, data_model_id, db): + db.update_data_model_status(status, data_model_id) + + def delete_data_model(self, code, version, db): + db.delete_from(self._table, where_conditions={"code": code, "version": version}) + + def get_next_data_model_id(self, db): + result = db.get_max_data_model_id() + return result + 1 if result else 1 + + +class DatasetsTable(Table): + def __init__(self): + self._table = Dataset.__table__ + + def get_datasets(self, db, columns: list = None): + return db.get_values(table=self._table, columns=columns, where_conditions={}) + + def get_dataset_codes(self, db, data_model_id=None, columns=None): + result = db.get_values( + table=self._table, + columns=columns, + where_conditions={"data_model_id": data_model_id}, + ) + return [dataset[0] for dataset in result] + + def get_dataset(self, db, dataset_id=None, columns=None): + return db.get_values( + table=self._table, + columns=columns, + where_conditions={"dataset_id": dataset_id}, + ) + + def get_dataset_properties(self, dataset_id, db): + return db.get_dataset_properties(dataset_id) + + def set_dataset_properties(self, properties, dataset_id, db): + db.set_dataset_properties(properties, dataset_id) + + def delete_dataset(self, dataset_id, data_model_id, db): + db.delete_from( + self._table, + where_conditions={"dataset_id": dataset_id, "data_model_id": data_model_id}, + ) + + def get_next_dataset_id(self, db): + result = db.get_max_dataset_id() + return result + 1 if result else 1 + + def get_dataset_status(self, data_model_id, db): + return db.get_dataset_status(data_model_id) + + def set_dataset_status(self, status, dataset_id, db): + db.update_dataset_status(status, dataset_id) + + def get_dataset_id(self, code, data_model_id, db): + return db.get_dataset_id(code, data_model_id) + + +class MetadataTable(Table): + def __init__(self, data_model): + self.name = get_metadata_table_name(data_model) + self._table = sql.Table( + self.name, + Base.metadata, + sql.Column("code", SQLTYPES.STRING, primary_key=True), + sql.Column("metadata", SQLTYPES.JSON), + extend_existing=True, + ) + + def set_table(self, table): + self._table = table + + @classmethod + def from_db(cls, data_model, db): + res = db.get_metadata(data_model) + new_table = cls(data_model) + new_table.set_table( + { + code: CommonDataElement.from_metadata(metadata) + for code, metadata in res.items() + } + ) + return new_table + + @staticmethod + def get_values_from_cdes(cdes): + return [{"code": cde.code, "metadata": cde.metadata} for cde in cdes] + + def insert_values(self, values, db): + db.execute(f'INSERT INTO "{self.name}" VALUES(:code, :metadata)', values) diff --git a/mipdb/dataelements.py b/mipdb/dataelements.py index d3e8372..1936226 100644 --- a/mipdb/dataelements.py +++ b/mipdb/dataelements.py @@ -164,4 +164,6 @@ def validate_metadata(code, metadata): valid_metadata_types = ["nominal", "real", "integer", "text"] if metadata["type"] not in valid_metadata_types: - raise InvalidDataModelError(f"The CDE {code} has an 'type' the only valid types are:{valid_metadata_types} ") + raise InvalidDataModelError( + f"The CDE {code} has an 'type' the only valid types are:{valid_metadata_types} " + ) diff --git a/mipdb/properties.py b/mipdb/properties.py index 8f1d433..3cc289b 100644 --- a/mipdb/properties.py +++ b/mipdb/properties.py @@ -7,39 +7,31 @@ class Properties: def __init__(self, properties) -> None: self.properties = properties if not self.properties: - self.properties = json.dumps({"tags": [], "properties": {}}) + self.properties = {"tags": [], "properties": {}} def remove_tag(self, tag): - properties_dict = json.loads(self.properties) - if tag in properties_dict["tags"]: - properties_dict["tags"].remove(tag) - self.properties = json.dumps(properties_dict) + if tag in self.properties["tags"]: + self.properties["tags"].remove(tag) else: raise UserInputError("Tag does not exist") def add_tag(self, tag): - properties_dict = json.loads(self.properties) - if tag not in properties_dict["tags"]: - properties_dict["tags"].append(tag) - self.properties = json.dumps(properties_dict) + if tag not in self.properties["tags"]: + self.properties["tags"].append(tag) else: raise UserInputError("Tag already exists") def remove_property(self, key, value): - properties_dict = json.loads(self.properties) - if (key, value) in properties_dict["properties"].items(): - properties_dict["properties"].pop(key) - self.properties = json.dumps(properties_dict) + if (key, value) in self.properties["properties"].items(): + self.properties["properties"].pop(key) else: raise UserInputError("Property does not exist") def add_property(self, key, value, force): - properties_dict = json.loads(self.properties) - if key in properties_dict["properties"] and not force: + if key in self.properties["properties"] and not force: raise UserInputError( "Property already exists.\n" "If you want to force override the property, please use the '--force' flag" ) else: - properties_dict["properties"][key] = value - self.properties = json.dumps(properties_dict) + self.properties["properties"][key] = value diff --git a/mipdb/schema.py b/mipdb/schema.py index 1fa8aeb..a86b916 100644 --- a/mipdb/schema.py +++ b/mipdb/schema.py @@ -1,6 +1,5 @@ import sqlalchemy as sql -from mipdb.database import DataBase from mipdb.exceptions import UserInputError @@ -16,10 +15,10 @@ def __init__(self, name) -> None: def __repr__(self) -> str: return f"Schema(name={self.name})" - def create(self, db: DataBase): + def create(self, db): db.create_schema(self.name) - def drop(self, db: DataBase): + def drop(self, db): db.drop_schema(self.name) def _validate_schema_name(self): diff --git a/mipdb/tables.py b/mipdb/tables.py deleted file mode 100644 index c9052d2..0000000 --- a/mipdb/tables.py +++ /dev/null @@ -1,441 +0,0 @@ -from abc import ABC, abstractmethod -import json -from enum import Enum -from typing import Union, List - -import sqlalchemy as sql -from sqlalchemy import ForeignKey, Integer, MetaData -from sqlalchemy.ext.compiler import compiles - -from mipdb.database import DataBase, Connection, credentials_from_config -from mipdb.data_frame import DATASET_COLUMN_NAME -from mipdb.database import DataBase, Connection -from mipdb.database import METADATA_SCHEMA -from mipdb.database import METADATA_TABLE -from mipdb.dataelements import CommonDataElement -from mipdb.exceptions import UserInputError -from mipdb.schema import Schema - -RECORDS_PER_COPY = 100000 - - -class User(Enum): - credentials = credentials_from_config() - executor = credentials['MONETDB_LOCAL_USERNAME'] if credentials['MONETDB_LOCAL_USERNAME'] else "executor" - admin = credentials['MONETDB_ADMIN_USERNAME'] if credentials['MONETDB_ADMIN_USERNAME'] else "admin" - guest = credentials['MONETDB_PUBLIC_USERNAME'] if credentials['MONETDB_PUBLIC_USERNAME'] else "guest" - - -@compiles(sql.types.JSON, "monetdb") -def compile_binary_monetdb(type_, compiler, **kw): - # The monetdb plugin for sqlalchemy doesn't seem to implement the JSON - # datatype hence we need to teach sqlalchemy how to compile it - return "JSON" - - -class SQLTYPES: - INTEGER = sql.Integer - STRING = sql.String(255) - FLOAT = sql.Float - JSON = sql.types.JSON - - -STR2SQLTYPE = {"int": SQLTYPES.INTEGER, "text": SQLTYPES.STRING, "real": SQLTYPES.FLOAT} -TEMPORARY_TABLE_NAME = "temp" - - -class Table(ABC): - _table: sql.Table - - @abstractmethod - def __init__(self, schema): - pass - - @property - def table(self): - return self._table - - def create(self, db: Union[DataBase, Connection]): - db.create_table(self._table) - db.grant_select_access_rights(self._table, User.executor.value) - - def exists(self, db: Union[DataBase, Connection]): - return db.table_exists(self._table) - - def insert_values(self, values, db: Union[DataBase, Connection]): - db.insert_values_to_table(self._table, values) - - def delete(self, db: Union[DataBase, Connection]): - db.delete_table_values(self._table) - - def get_row_count(self, db): - return db.get_row_count(self.table.fullname) - - def get_column_distinct(self, column, db): - return db.get_column_distinct(column, self.table.fullname) - - def drop(self, db: Union[DataBase, Connection]): - db.drop_table(self._table) - - -class DataModelTable(Table): - def __init__(self, schema): - self.data_model_id_seq = sql.Sequence( - "data_model_id_seq", metadata=schema.schema - ) - self._table = sql.Table( - "data_models", - schema.schema, - sql.Column( - "data_model_id", - SQLTYPES.INTEGER, - self.data_model_id_seq, - primary_key=True, - ), - sql.Column("code", SQLTYPES.STRING, nullable=False), - sql.Column("version", SQLTYPES.STRING, nullable=False), - sql.Column("label", SQLTYPES.STRING), - sql.Column("status", SQLTYPES.STRING, nullable=False), - sql.Column("properties", SQLTYPES.JSON), - ) - - def drop_sequence(self, db: Union[DataBase, Connection]): - if db.get_executor(): - self.data_model_id_seq.drop(db.get_executor()) - - def get_data_models(self, db, columns: list = None): - if columns and not set(columns).issubset(self.table.columns.keys()): - non_existing_columns = list(set(columns) - set(self.table.columns.keys())) - raise ValueError( - f"The columns: {non_existing_columns} do not exist in the data models schema" - ) - return db.get_data_models(columns) - - def get_data_model(self, data_model_id, db, columns: list = None): - if columns and not set(columns).issubset(self.table.columns.keys()): - non_existing_columns = list(set(columns) - set(self.table.columns.keys())) - raise ValueError( - f"The columns: {non_existing_columns} do not exist in the data model's schema" - ) - return db.get_data_model(data_model_id, columns) - - def get_dataset_count_by_data_model_id(self, db): - return db.get_dataset_count_by_data_model_id() - - def get_data_model_id(self, code, version, db): - return db.get_data_model_id(code, version) - - def get_data_model_properties(self, data_model_id, db): - return db.get_data_model_properties(data_model_id) - - def set_data_model_properties(self, properties, data_model_id, db): - db.set_data_model_properties(properties, data_model_id) - - def get_data_model_status(self, data_model_id, db): - return db.get_data_model_status(data_model_id) - - def set_data_model_status(self, status, data_model_id, db): - db.update_data_model_status(status, data_model_id) - - def delete_data_model(self, code, version, db): - delete = sql.text( - f"DELETE FROM {METADATA_SCHEMA}.data_models " - "WHERE code = :code " - "AND version = :version " - ) - db.execute(delete, code=code, version=version) - - def get_next_data_model_id(self, db): - return db.execute(self.data_model_id_seq) - - -class DatasetsTable(Table): - def __init__(self, schema): - self.dataset_id_seq = sql.Sequence("dataset_id_seq", metadata=schema.schema) - self._table = sql.Table( - "datasets", - schema.schema, - sql.Column( - "dataset_id", - SQLTYPES.INTEGER, - self.dataset_id_seq, - primary_key=True, - ), - sql.Column( - "data_model_id", - SQLTYPES.INTEGER, - ForeignKey("data_models.data_model_id"), - nullable=False, - ), - sql.Column("code", SQLTYPES.STRING, nullable=False), - sql.Column("label", SQLTYPES.STRING), - sql.Column("status", SQLTYPES.STRING, nullable=False), - sql.Column("csv_path", SQLTYPES.STRING, nullable=False), - sql.Column("properties", SQLTYPES.JSON), - ) - - def drop_sequence(self, db: Union[DataBase, Connection]): - if db.get_executor(): - self.dataset_id_seq.drop(db.get_executor()) - - def get_values(self, db, data_model_id=None, columns=None): - if columns and not set(columns).issubset(self.table.columns.keys()): - non_existing_columns = list(set(columns) - set(self.table.columns.keys())) - raise ValueError( - f"The columns: {non_existing_columns} do not exist in the datasets schema" - ) - datasets = db.get_values(data_model_id, columns) - if columns and len(columns) == 1: - return [attribute for attribute, *_ in datasets] - return db.get_values(data_model_id, columns) - - def get_dataset(self, db, dataset_id=None, columns=None): - if columns and not set(columns).issubset(self.table.columns.keys()): - non_existing_columns = list(set(columns) - set(self.table.columns.keys())) - raise ValueError( - f"The columns: {non_existing_columns} do not exist in the datasets schema" - ) - return db.get_dataset(dataset_id, columns) - - def get_data_count_by_dataset(self, data_model_fullname, db): - return db.get_data_count_by_dataset(data_model_fullname) - - def get_dataset_properties(self, dataset_id, db): - return db.get_dataset_properties(dataset_id) - - def set_dataset_properties(self, properties, dataset_id, db): - db.set_dataset_properties(properties, dataset_id) - - def delete_dataset(self, dataset_id, data_model_id, db): - delete = sql.text( - f"DELETE FROM {METADATA_SCHEMA}.datasets " - "WHERE dataset_id = :dataset_id " - "AND data_model_id = :data_model_id " - ) - db.execute(delete, dataset_id=dataset_id, data_model_id=data_model_id) - - def get_next_dataset_id(self, db): - return db.execute(self.dataset_id_seq) - - def get_dataset_status(self, data_model_id, db): - return db.get_dataset_status(data_model_id) - - def set_dataset_status(self, status, dataset_id, db): - db.update_dataset_status(status, dataset_id) - - def get_dataset_id(self, code, data_model_id, db): - return db.get_dataset_id(code, data_model_id) - - -class ActionsTable(Table): - def __init__(self, schema): - self.action_id_seq = sql.Sequence("action_id_seq", metadata=schema.schema) - self._table = sql.Table( - "actions", - schema.schema, - sql.Column( - "action_id", - SQLTYPES.INTEGER, - self.action_id_seq, - primary_key=True, - ), - sql.Column("action", SQLTYPES.JSON), - ) - - def drop_sequence(self, db: Union[DataBase, Connection]): - if db.get_executor(): - self.action_id_seq.drop(db.get_executor()) - - def insert_values(self, values, db: Union[DataBase, Connection]): - # Needs to be overridden because sqlalchemy and monetdb are not cooperating - # well when inserting values to JSON columns - query = sql.text( - f'INSERT INTO "{METADATA_SCHEMA}".actions VALUES(:action_id, :action)' - ) - db.execute(query, values) - - def get_next_id(self, db): - return db.execute(self.action_id_seq) - - -class PrimaryDataTable(Table): - def __init__(self): - self._table = None - - def set_table(self, table): - self._table = table - - @classmethod - def from_cdes( - cls, schema: Schema, cdes: List[CommonDataElement] - ) -> "PrimaryDataTable": - columns = [ - sql.Column( - cde.code, STR2SQLTYPE[json.loads(cde.metadata)["sql_type"]], quote=True - ) - for cde in cdes - ] - columns.insert( - 0, - sql.Column( - "row_id", - SQLTYPES.INTEGER, - primary_key=True, - quote=True, - ), - ) - table = sql.Table( - "primary_data", - schema.schema, - *columns, - ) - new_table = cls() - new_table.set_table(table) - return new_table - - @classmethod - def from_db(cls, schema: Schema, db: DataBase) -> "PrimaryDataTable": - table = sql.Table( - "primary_data", schema.schema, autoload_with=db.get_executor() - ) - new_table = cls() - table.columns = [ - sql.Column(column.name, quote=True) for column in list(table.columns) - ] - new_table.set_table(table) - return new_table - - def remove_dataset(self, dataset_name, schema_full_name, db): - delete = sql.text( - f'DELETE FROM "{schema_full_name}"."primary_data" ' - "WHERE dataset = :dataset_name " - ) - db.execute(delete, dataset_name=dataset_name) - - -class MetadataTable(Table): - def __init__(self, schema: Schema) -> None: - self.schema = schema.name - self._table = sql.Table( - METADATA_TABLE, - schema.schema, - sql.Column("code", SQLTYPES.STRING, primary_key=True), - sql.Column("metadata", SQLTYPES.JSON), - ) - - def set_table(self, table): - self._table = table - - @classmethod - def from_db(cls, schema, db): - res = db.get_metadata(schema) - new_table = cls(schema) - new_table.set_table( - { - code: CommonDataElement.from_metadata(json.loads(metadata)) - for code, metadata in res.items() - } - ) - return new_table - - @staticmethod - def get_values_from_cdes(cdes): - return [{"code": cde.code, "metadata": cde.metadata} for cde in cdes] - - def insert_values(self, values, db: Union[DataBase, Connection]): - # Needs to be overridden because sqlalchemy and monetdb are not cooperating - # well when inserting values to JSON columns - query = sql.text( - f'INSERT INTO "{self.schema}".{METADATA_TABLE} VALUES(:code, :metadata)' - ) - db.execute(query, values) - - -class TemporaryTable(Table): - def __init__(self, dataframe_sql_type_per_column, db): - columns = [ - sql.Column(name, STR2SQLTYPE[sql_type], quote=True) - for name, sql_type in dataframe_sql_type_per_column.items() - ] - - self._table = sql.Table( - TEMPORARY_TABLE_NAME, - MetaData(bind=db.get_executor()), - *columns, - prefixes=["TEMPORARY"], - ) - - def validate_csv(self, csv_path, cdes_with_min_max, cdes_with_enumerations, db): - validated_datasets = [] - offset = 2 - - while True: - self.load_csv( - csv_path=csv_path, offset=offset, records=RECORDS_PER_COPY, db=db - ) - offset += RECORDS_PER_COPY - - table_count = self.get_row_count(db=db) - if not table_count: - break - - validated_datasets = set(validated_datasets) | set( - self.get_column_distinct(DATASET_COLUMN_NAME, db) - ) - self._validate_enumerations_restriction(cdes_with_enumerations, db) - self._validate_min_max_restriction(cdes_with_min_max, db) - self.delete(db) - - # If the temp contains fewer rows than RECORDS_PER_COPY - # that means we have read all the records in the csv and we need to stop the iteration. - if table_count < RECORDS_PER_COPY: - break - - return validated_datasets - - def _validate_min_max_restriction(self, cdes_with_min_max, db): - for cde, min_max in cdes_with_min_max.items(): - min_value, max_value = min_max - cde_invalid_values = db.execute( - f"SELECT \"{cde}\" FROM {self.table.fullname} WHERE \"{cde}\" NOT BETWEEN '{min_value}' AND '{max_value}' " - ).fetchone() - if cde_invalid_values: - raise Exception( - f"In the column: '{cde}' the following values are invalid: '{cde_invalid_values}'" - ) - - def load_csv( - self, - csv_path, - db, - records=None, - offset=2, - ): - self._validate_csv_contains_eof(csv_path=csv_path) - db.copy_csv_in_table( - file_location=csv_path, - records=records, - offset=offset, - table_name=self.table.name, - ) - - def _validate_csv_contains_eof(self, csv_path): - with open(csv_path, "rb") as f: - last_line = f.readlines()[-1] - if not last_line.endswith(b"\n"): - raise UserInputError( - f"CSV:'{csv_path}' does not end with a valid EOF delimiter." - ) - - def _validate_enumerations_restriction(self, cdes_with_enumerations, db): - for cde, enumerations in cdes_with_enumerations.items(): - cde_invalid_values = db.execute( - f'SELECT "{cde}" from {self.table.fullname} where "{cde}" not in ({str(enumerations)[1:-1]})' - ).fetchone() - if cde_invalid_values: - raise Exception( - f"In the column: '{cde}' the following values are invalid: '{cde_invalid_values}'" - ) - - def set_table(self, table): - self._table = table diff --git a/mipdb/usecases.py b/mipdb/usecases.py index ba7a398..9a78fc7 100644 --- a/mipdb/usecases.py +++ b/mipdb/usecases.py @@ -1,19 +1,19 @@ import copy -import datetime -import json import os from abc import ABC, abstractmethod import pandas as pd -from mipdb.database import DataBase -from mipdb.database import METADATA_SCHEMA +from mipdb.databases.monetdb import MonetDB from mipdb.data_frame_schema import DataFrameSchema -from mipdb.exceptions import ForeignKeyError, InvalidDatasetError -from mipdb.exceptions import UserInputError +from mipdb.exceptions import ForeignKeyError, InvalidDatasetError, UserInputError +from mipdb.databases.monetdb_tables import ( + PrimaryDataTable, + TemporaryTable, + RECORDS_PER_COPY, +) from mipdb.properties import Properties from mipdb.reader import CSVDataFrameReader -from mipdb.schema import Schema from mipdb.dataelements import ( flatten_cdes, validate_dataset_present_on_cdes_with_proper_format, @@ -23,15 +23,9 @@ get_cdes_with_enumerations, get_dataset_enums, ) -from mipdb.tables import ( - DataModelTable, - DatasetsTable, - ActionsTable, - MetadataTable, - PrimaryDataTable, - TemporaryTable, - RECORDS_PER_COPY, -) +from mipdb.schema import Schema +from mipdb.databases.sqlite import SQLiteDB +from mipdb.databases.sqlite_tables import DataModelTable, DatasetsTable, MetadataTable from mipdb.data_frame import DataFrame, DATASET_COLUMN_NAME LONGITUDINAL = "longitudinal" @@ -42,181 +36,135 @@ class UseCase(ABC): @abstractmethod def execute(self, *args, **kwargs) -> None: - """Executes use case logic with arguments from cli command. Has side - effects but no return values.""" - - -def is_db_initialized(db): - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - datasets_table = DatasetsTable(schema=metadata) - actions_table = ActionsTable(schema=metadata) - - with db.begin() as conn: - if ( - "mipdb_metadata" in db.get_schemas() - and data_model_table.exists(conn) - and datasets_table.exists(conn) - and actions_table.exists(conn) - ): - return True - else: - raise UserInputError( - "You need to initialize the database!\n " - "Try mipdb init --port " - ) + """Executes use case logic with arguments from CLI command.""" + + +def is_db_initialized(db: SQLiteDB): + if not (DataModelTable().exists(db) and DatasetsTable().exists(db)): + raise UserInputError("You need to initialize the database!\nTry mipdb init") class InitDB(UseCase): - def __init__(self, db: DataBase) -> None: + def __init__(self, db: SQLiteDB) -> None: self.db = db def execute(self) -> None: - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - datasets_table = DatasetsTable(schema=metadata) - actions_table = ActionsTable(schema=metadata) - - with self.db.begin() as conn: - if "mipdb_metadata" not in self.db.get_schemas(): - metadata.create(conn) - if not data_model_table.exists(conn): - data_model_table.drop_sequence(conn) - data_model_table.create(conn) - if not datasets_table.exists(conn): - datasets_table.drop_sequence(conn) - datasets_table.create(conn) - if not actions_table.exists(conn): - actions_table.drop_sequence(conn) - actions_table.create(conn) + data_model_table, datasets_table = DataModelTable(), DatasetsTable() + if not data_model_table.exists(self.db): + data_model_table.create(self.db) + if not datasets_table.exists(self.db): + datasets_table.create(self.db) class AddDataModel(UseCase): - def __init__(self, db: DataBase) -> None: - self.db = db - is_db_initialized(db) + def __init__(self, sqlite_db: SQLiteDB, monetdb: MonetDB) -> None: + self.sqlite_db = sqlite_db + self.monetdb = monetdb + is_db_initialized(sqlite_db) def execute(self, data_model_metadata) -> None: - code = data_model_metadata["code"] - version = data_model_metadata["version"] - name = get_data_model_fullname(code, version) + code, version = data_model_metadata["code"], data_model_metadata["version"] + data_model = get_data_model_fullname(code, version) cdes = flatten_cdes(copy.deepcopy(data_model_metadata)) - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - - with self.db.begin() as conn: - data_model_id = data_model_table.get_next_data_model_id(conn) - schema = self._create_schema(name, conn) - self._create_primary_data_table(schema, cdes, conn) - self._create_metadata_table(schema, conn, cdes) - values = dict( - data_model_id=data_model_id, - code=code, - version=version, - label=data_model_metadata["label"], - status="ENABLED", - ) - data_model_table.insert_values(values, conn) - - data_model_details = _get_data_model_details(data_model_id, conn) - update_actions( - conn=conn, - action="ADD DATA MODEL", - data_model_details=data_model_details, - ) - AddPropertyToDataModel(self.db).execute( - code=code, - version=version, - key="cdes", - value=data_model_metadata, - force=True, - ) - if LONGITUDINAL in data_model_metadata: - longitudinal = data_model_metadata[LONGITUDINAL] - if not isinstance(longitudinal, bool): - raise UserInputError(f"Longitudinal flag should be boolean, value given: {longitudinal}") - if longitudinal: - TagDataModel(self.db).execute( - code=code, version=version, tag=LONGITUDINAL - ) - - def _create_schema(self, name, conn): - schema = Schema(name) - schema.create(conn) - return schema - - def _create_primary_data_table(self, schema, cdes, conn): - primary_data_table = PrimaryDataTable.from_cdes(schema, cdes) - primary_data_table.create(conn) - - def _create_metadata_table(self, schema, conn, cdes): - metadata_table = MetadataTable(schema) - metadata_table.create(conn) + data_model_table = DataModelTable() + data_model_id = data_model_table.get_next_data_model_id(self.sqlite_db) + self._create_primary_data_table(data_model, cdes) + self._create_metadata_table(data_model, cdes) + properties = Properties( + data_model_table.get_data_model_properties(data_model_id, self.sqlite_db) + ) + properties.add_property("cdes", data_model_metadata, True) + values = dict( + data_model_id=data_model_id, + code=code, + version=version, + label=data_model_metadata["label"], + status="ENABLED", + properties=properties.properties, + ) + data_model_table.insert_values(values, self.sqlite_db) + self._tag_longitudinal_if_needed(data_model_metadata, code, version) + + def _create_primary_data_table(self, data_model, cdes): + with self.monetdb.begin() as conn: + schema = Schema(data_model) + schema.create(conn) + PrimaryDataTable.from_cdes(schema, cdes).create(conn) + + def _create_metadata_table(self, data_model, cdes): + metadata_table = MetadataTable(data_model) + metadata_table.create(self.sqlite_db) values = metadata_table.get_values_from_cdes(cdes) - metadata_table.insert_values(values, conn) + metadata_table.insert_values(values, self.sqlite_db) + + def _tag_longitudinal_if_needed(self, data_model_metadata, code, version): + if LONGITUDINAL in data_model_metadata: + longitudinal = data_model_metadata[LONGITUDINAL] + if not isinstance(longitudinal, bool): + raise UserInputError( + f"Longitudinal flag should be boolean, value given: {longitudinal}" + ) + if longitudinal: + TagDataModel(self.sqlite_db).execute( + code=code, version=version, tag=LONGITUDINAL + ) class ValidateDataModel(UseCase): def execute(self, data_model_metadata) -> None: if "version" not in data_model_metadata: - raise UserInputError("You need to include a version on the CDEsMetadata.json") - + raise UserInputError( + "You need to include a version on the CDEsMetadata.json" + ) cdes = flatten_cdes(copy.deepcopy(data_model_metadata)) validate_dataset_present_on_cdes_with_proper_format(cdes) if LONGITUDINAL in data_model_metadata: longitudinal = data_model_metadata[LONGITUDINAL] if not isinstance(longitudinal, bool): - raise UserInputError(f"Longitudinal flag should be boolean, value given: {longitudinal}") + raise UserInputError( + f"Longitudinal flag should be boolean, value given: {longitudinal}" + ) if longitudinal: validate_longitudinal_data_model(cdes) class DeleteDataModel(UseCase): - def __init__(self, db: DataBase) -> None: - self.db = db - is_db_initialized(db) + def __init__(self, sqlite_db: SQLiteDB, monetdb: MonetDB) -> None: + self.sqlite_db = sqlite_db + self.monetdb = monetdb + is_db_initialized(sqlite_db) def execute(self, code, version, force) -> None: name = get_data_model_fullname(code, version) schema = Schema(name) - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - - with self.db.begin() as conn: - data_model_id = data_model_table.get_data_model_id(code, version, conn) - if not force: - self._validate_data_model_deletion(name, data_model_id, conn) - - data_model_details = _get_data_model_details(data_model_id, conn) - self._delete_datasets(data_model_id, code, version) + data_model_table = DataModelTable() + data_model_id = data_model_table.get_data_model_id( + code, version, self.sqlite_db + ) + if not force: + self._validate_data_model_deletion(name, data_model_id) + MetadataTable(data_model=name).drop(self.sqlite_db) + self._delete_datasets(data_model_id, code, version) + with self.monetdb.begin() as conn: schema.drop(conn) - data_model_table.delete_data_model(code, version, conn) - update_actions( - conn=conn, - action="DELETE DATA MODEL", - data_model_details=data_model_details, - ) + data_model_table.delete_data_model(code, version, self.sqlite_db) - def _validate_data_model_deletion(self, data_model_name, data_model_id, conn): - metadata = Schema(METADATA_SCHEMA) - datasets_table = DatasetsTable(schema=metadata) - datasets = datasets_table.get_values(conn, data_model_id) - if not len(datasets) == 0: + def _validate_data_model_deletion(self, data_model_name, data_model_id): + datasets = DatasetsTable().get_dataset_codes( + db=self.sqlite_db, columns=["code"], data_model_id=data_model_id + ) + if datasets: raise ForeignKeyError( - f"The Data Model:{data_model_name} cannot be deleted because it contains Datasets: {datasets}" - f"\nIf you want to force delete everything, please use the '--force' flag" + f"The Data Model:{data_model_name} cannot be deleted because it contains Datasets: {datasets}\nIf you want to force delete everything, please use the '--force' flag" ) def _delete_datasets(self, data_model_id, data_model_code, data_model_version): - metadata = Schema(METADATA_SCHEMA) - datasets_table = DatasetsTable(schema=metadata) - with self.db.begin() as conn: - dataset_codes = datasets_table.get_values( - data_model_id=data_model_id, columns=["code"], db=conn - ) - + datasets_table = DatasetsTable() + dataset_codes = datasets_table.get_dataset_codes( + data_model_id=data_model_id, columns=["code"], db=self.sqlite_db + ) for dataset_code in dataset_codes: - DeleteDataset(self.db).execute( + DeleteDataset(sqlite_db=self.sqlite_db, monetdb=self.monetdb).execute( dataset_code, data_model_code=data_model_code, data_model_version=data_model_version, @@ -224,9 +172,10 @@ def _delete_datasets(self, data_model_id, data_model_code, data_model_version): class ImportCSV(UseCase): - def __init__(self, db: DataBase) -> None: - self.db = db - is_db_initialized(db) + def __init__(self, sqlite_db: SQLiteDB, monetdb: MonetDB) -> None: + self.sqlite_db = sqlite_db + self.monetdb = monetdb + is_db_initialized(sqlite_db) def execute( self, csv_path, copy_from_file, data_model_code, data_model_version @@ -235,65 +184,48 @@ def execute( code=data_model_code, version=data_model_version ) data_model = Schema(data_model_name) - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - datasets_table = DatasetsTable(schema=metadata) + data_model_id = DataModelTable().get_data_model_id( + data_model_code, data_model_version, self.sqlite_db + ) + metadata_table = MetadataTable.from_db(data_model_name, self.sqlite_db) + cdes = metadata_table.table + dataset_enumerations = get_dataset_enums(cdes) + sql_type_per_column = get_sql_type_per_column(cdes) + # In case the DATA_PATH is empty it will return the whole path. + relative_csv_path = csv_path.split(os.getenv("DATA_PATH"))[-1] - with self.db.begin() as conn: - data_model_id = data_model_table.get_data_model_id( - data_model_code, data_model_version, conn - ) - metadata_table = MetadataTable.from_db(data_model, conn) - cdes = metadata_table.table - dataset_enumerations = get_dataset_enums(cdes) - sql_type_per_column = get_sql_type_per_column(cdes) - # In case the DATA_PATH is empty it will return the whole path. - relative_csv_path = csv_path.split(os.getenv("DATA_PATH"))[-1] - - if copy_from_file: - imported_datasets = self.import_csv_with_volume( - csv_path=csv_path, - sql_type_per_column=sql_type_per_column, - data_model=data_model, - conn=conn, - ) - else: - imported_datasets = self._import_csv( - csv_path=relative_csv_path, data_model=data_model, conn=conn + with self.monetdb.begin() as monetdb_conn: + imported_datasets = ( + self.import_csv_with_volume( + csv_path, sql_type_per_column, data_model, monetdb_conn ) + if copy_from_file + else self._import_csv(relative_csv_path, data_model, monetdb_conn) + ) - existing_datasets = datasets_table.get_values( - columns=["code"], data_model_id=data_model_id, db=conn + existing_datasets = DatasetsTable().get_dataset_codes( + columns=["code"], data_model_id=data_model_id, db=self.sqlite_db + ) + dataset_id = self._get_next_dataset_id() + for dataset in set(imported_datasets) - set(existing_datasets): + values = dict( + data_model_id=data_model_id, + dataset_id=dataset_id, + code=dataset, + label=dataset_enumerations[dataset], + csv_path=relative_csv_path, + status="ENABLED", + properties=None, ) - for dataset in set(imported_datasets) - set(existing_datasets): - dataset_id = self._get_next_dataset_id(conn) - values = dict( - data_model_id=data_model_id, - dataset_id=dataset_id, - code=dataset, - label=dataset_enumerations[dataset], - csv_path=relative_csv_path, - status="ENABLED", - ) - datasets_table.insert_values(values, conn) - data_model_details = _get_data_model_details(data_model_id, conn) - dataset_details = _get_dataset_details(dataset_id, conn) - update_actions( - conn=conn, - action="ADD DATASET", - data_model_details=data_model_details, - dataset_details=dataset_details, - ) + DatasetsTable().insert_values(values, self.sqlite_db) + dataset_id += 1 - def _get_next_dataset_id(self, conn): - metadata = Schema(METADATA_SCHEMA) - datasets_table = DatasetsTable(schema=metadata) - dataset_id = datasets_table.get_next_dataset_id(conn) - return dataset_id + def _get_next_dataset_id(self): + return DatasetsTable().get_next_dataset_id(self.sqlite_db) - def _create_temporary_table(self, dataframe_sql_type_per_column, conn): - temporary_table = TemporaryTable(dataframe_sql_type_per_column, conn) - temporary_table.create(conn) + def _create_temporary_table(self, dataframe_sql_type_per_column, db): + temporary_table = TemporaryTable(dataframe_sql_type_per_column, db) + temporary_table.create(db) return temporary_table def import_csv_with_volume(self, csv_path, sql_type_per_column, data_model, conn): @@ -337,43 +269,33 @@ def import_csv_with_volume(self, csv_path, sql_type_per_column, data_model, conn def insert_csv_to_db(self, csv_path, temporary_table, data_model, db): primary_data_table = PrimaryDataTable.from_db(data_model, db) - offset = 2 - imported_datasets = [] - # If we load a csv to 'temp' and then insert them to the 'primary_data' in the case of a big file (3gb), - # for a sort period of time will have a spike of memory usage because the data will be stored in both tables. - # The workaround for that is to load the csv in batches. + offset, imported_datasets = 2, [] while True: temporary_table.load_csv( csv_path=csv_path, offset=offset, records=RECORDS_PER_COPY, db=db ) offset += RECORDS_PER_COPY - table_count = temporary_table.get_row_count(db=db) if not table_count: break - imported_datasets = set(imported_datasets) | set( temporary_table.get_column_distinct(DATASET_COLUMN_NAME, db) ) db.copy_data_table_to_another_table(primary_data_table, temporary_table) temporary_table.delete(db) - - # If the temp contains fewer rows than RECORDS_PER_COPY - # that means we have read all the records in the csv and we need to stop the iteration. if table_count < RECORDS_PER_COPY: break - return imported_datasets def _import_csv(self, csv_path, data_model, conn): - imported_datasets = [] - primary_data_table = PrimaryDataTable.from_db(data_model, conn) + imported_datasets, primary_data_table = [], PrimaryDataTable.from_db( + data_model, conn + ) with CSVDataFrameReader(csv_path).get_reader() as reader: for dataset_data in reader: dataframe = DataFrame(dataset_data) imported_datasets = set(imported_datasets) | set(dataframe.datasets) - values = dataframe.to_dict() - primary_data_table.insert_values(values, conn) + primary_data_table.insert_values(dataframe.to_dict(), conn) return imported_datasets @@ -409,69 +331,61 @@ class ValidateDataset(UseCase): Database constraints must NOT be used as part of the validation process since that could result in partially imported csvs. """ - def __init__(self, db: DataBase) -> None: - self.db = db - is_db_initialized(db) + def __init__(self, sqlite_db: SQLiteDB, monetdb: MonetDB) -> None: + self.sqlite_db = sqlite_db + self.monetdb = monetdb + is_db_initialized(sqlite_db) def execute( self, csv_path, copy_from_file, data_model_code, data_model_version ) -> None: - data_model_name = get_data_model_fullname( + data_model = get_data_model_fullname( code=data_model_code, version=data_model_version ) - data_model = Schema(data_model_name) - - with self.db.begin() as conn: - csv_columns = pd.read_csv(csv_path, nrows=0).columns.tolist() - if DATASET_COLUMN_NAME not in csv_columns: - raise InvalidDatasetError( - "The 'dataset' column is required to exist in the csv." - ) - metadata_table = MetadataTable.from_db(data_model, conn) - cdes = metadata_table.table - sql_type_per_column = get_sql_type_per_column(cdes) - cdes_with_min_max = get_cdes_with_min_max(cdes, csv_columns) - cdes_with_enumerations = get_cdes_with_enumerations(cdes, csv_columns) - dataset_enumerations = get_dataset_enums(cdes) - if self.is_data_model_longitudinal( - data_model_code, data_model_version, conn - ): - are_data_valid_longitudinal(csv_path) + csv_columns = pd.read_csv(csv_path, nrows=0).columns.tolist() + if DATASET_COLUMN_NAME not in csv_columns: + raise InvalidDatasetError( + "The 'dataset' column is required to exist in the csv." + ) + metadata_table = MetadataTable.from_db(data_model, self.sqlite_db) + cdes = metadata_table.table + sql_type_per_column = get_sql_type_per_column(cdes) + cdes_with_min_max = get_cdes_with_min_max(cdes, csv_columns) + cdes_with_enumerations = get_cdes_with_enumerations(cdes, csv_columns) + dataset_enumerations = get_dataset_enums(cdes) + if self.is_data_model_longitudinal(data_model_code, data_model_version): + are_data_valid_longitudinal(csv_path) - if copy_from_file: + if copy_from_file: + with self.monetdb.begin() as monetdb_conn: validated_datasets = self.validate_csv_with_volume( csv_path, sql_type_per_column, cdes_with_min_max, cdes_with_enumerations, - conn, - ) - else: - validated_datasets = self.validate_csv( - csv_path, - sql_type_per_column, - cdes_with_min_max, - cdes_with_enumerations, + monetdb_conn, ) - self.verify_datasets_exist_in_enumerations( - datasets=validated_datasets, - dataset_enumerations=dataset_enumerations, + else: + validated_datasets = self.validate_csv( + csv_path, sql_type_per_column, cdes_with_min_max, cdes_with_enumerations ) + self.verify_datasets_exist_in_enumerations( + validated_datasets, dataset_enumerations + ) - def is_data_model_longitudinal(self, data_model_code, data_model_version, conn): - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - data_model_id = data_model_table.get_data_model_id( - data_model_code, data_model_version, conn + def is_data_model_longitudinal(self, data_model_code, data_model_version): + data_model_id = DataModelTable().get_data_model_id( + data_model_code, data_model_version, self.sqlite_db + ) + properties = DataModelTable().get_data_model_properties( + data_model_id, self.sqlite_db ) - properties = data_model_table.get_data_model_properties(data_model_id, conn) - return "longitudinal" in json.loads(properties)["tags"] + return LONGITUDINAL in properties["tags"] def validate_csv( self, csv_path, sql_type_per_column, cdes_with_min_max, cdes_with_enumerations ): imported_datasets = [] - csv_columns = pd.read_csv(csv_path, nrows=0).columns.tolist() dataframe_schema = DataFrameSchema( sql_type_per_column, cdes_with_min_max, cdes_with_enumerations, csv_columns @@ -552,7 +466,9 @@ def execute(self, csv_path, data_model_metadata) -> None: if LONGITUDINAL in data_model_metadata: longitudinal = data_model_metadata[LONGITUDINAL] if not isinstance(longitudinal, bool): - raise UserInputError(f"Longitudinal flag should be boolean, value given: {longitudinal}") + raise UserInputError( + f"Longitudinal flag should be boolean, value given: {longitudinal}" + ) if longitudinal: are_data_valid_longitudinal(csv_path) validated_datasets = self.validate_csv( @@ -593,584 +509,324 @@ def verify_datasets_exist_in_enumerations(self, datasets, dataset_enumerations): class DeleteDataset(UseCase): - def __init__(self, db: DataBase) -> None: - self.db = db - is_db_initialized(db) + def __init__(self, sqlite_db: SQLiteDB, monetdb: MonetDB) -> None: + self.sqlite_db = sqlite_db + self.monetdb = monetdb + is_db_initialized(sqlite_db) def execute(self, dataset_code, data_model_code, data_model_version) -> None: data_model_fullname = get_data_model_fullname( code=data_model_code, version=data_model_version ) - data_model = Schema(data_model_fullname) - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - datasets_table = DatasetsTable(schema=metadata) - - with self.db.begin() as conn: - primary_data_table = PrimaryDataTable.from_db(data_model, conn) - primary_data_table.remove_dataset(dataset_code, data_model_fullname, conn) - data_model_id = data_model_table.get_data_model_id( - data_model_code, data_model_version, conn - ) - dataset_id = datasets_table.get_dataset_id( - dataset_code, data_model_id, conn - ) - - data_model_details = _get_data_model_details(data_model_id, conn) - dataset_details = _get_dataset_details(dataset_id, conn) - datasets_table.delete_dataset(dataset_id, data_model_id, conn) - update_actions( - conn=conn, - action="DELETE DATASET", - data_model_details=data_model_details, - dataset_details=dataset_details, + with self.monetdb.begin() as conn: + primary_data_table = PrimaryDataTable.from_db( + Schema(data_model_fullname), conn ) + primary_data_table.remove_dataset(dataset_code, data_model_fullname, conn) + data_model_id = DataModelTable().get_data_model_id( + data_model_code, data_model_version, self.sqlite_db + ) + dataset_id = DatasetsTable().get_dataset_id( + dataset_code, data_model_id, self.sqlite_db + ) + DatasetsTable().delete_dataset(dataset_id, data_model_id, self.sqlite_db) class EnableDataModel(UseCase): - def __init__(self, db: DataBase) -> None: + def __init__(self, db: SQLiteDB) -> None: self.db = db - is_db_initialized(db) def execute(self, code, version) -> None: - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - - with self.db.begin() as conn: - data_model_id = data_model_table.get_data_model_id(code, version, conn) - current_status = data_model_table.get_data_model_status(data_model_id, conn) - if current_status != "ENABLED": - data_model_table.set_data_model_status("ENABLED", data_model_id, conn) - data_model_details = _get_data_model_details(data_model_id, conn) - update_actions( - conn=conn, - action="ENABLE DATA MODEL", - data_model_details=data_model_details, - ) - - else: - raise UserInputError("The data model was already enabled") + data_model_table = DataModelTable() + data_model_id = data_model_table.get_data_model_id(code, version, self.db) + current_status = data_model_table.get_data_model_status(data_model_id, self.db) + if current_status != "ENABLED": + data_model_table.set_data_model_status("ENABLED", data_model_id, self.db) + else: + raise UserInputError("The data model was already enabled") class DisableDataModel(UseCase): - def __init__(self, db: DataBase) -> None: + def __init__(self, db: SQLiteDB) -> None: self.db = db - is_db_initialized(db) def execute(self, code, version) -> None: - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - - with self.db.begin() as conn: - data_model_id = data_model_table.get_data_model_id(code, version, conn) - current_status = data_model_table.get_data_model_status(data_model_id, conn) - - if current_status != "DISABLED": - data_model_table.set_data_model_status("DISABLED", data_model_id, conn) - data_model_details = _get_data_model_details(data_model_id, conn) - update_actions( - conn=conn, - action="DISABLE DATA MODEL", - data_model_details=data_model_details, - ) - else: - raise UserInputError("The data model was already disabled") + data_model_table = DataModelTable() + data_model_id = data_model_table.get_data_model_id(code, version, self.db) + current_status = data_model_table.get_data_model_status(data_model_id, self.db) + if current_status != "DISABLED": + data_model_table.set_data_model_status("DISABLED", data_model_id, self.db) + else: + raise UserInputError("The data model was already disabled") class EnableDataset(UseCase): - def __init__(self, db: DataBase) -> None: + def __init__(self, db: SQLiteDB) -> None: self.db = db - is_db_initialized(db) def execute(self, dataset_code, data_model_code, data_model_version) -> None: - metadata = Schema(METADATA_SCHEMA) - datasets_table = DatasetsTable(schema=metadata) - data_model_table = DataModelTable(schema=metadata) - - with self.db.begin() as conn: - - data_model_id = data_model_table.get_data_model_id( - data_model_code, data_model_version, conn - ) - dataset_id = datasets_table.get_dataset_id( - dataset_code, data_model_id, conn - ) - current_status = datasets_table.get_dataset_status(dataset_id, conn) - if current_status != "ENABLED": - datasets_table.set_dataset_status("ENABLED", dataset_id, conn) - - data_model_details = _get_data_model_details(data_model_id, conn) - dataset_details = _get_dataset_details(dataset_id, conn) - update_actions( - conn=conn, - action="ENABLE DATASET", - data_model_details=data_model_details, - dataset_details=dataset_details, - ) - else: - raise UserInputError("The dataset was already enabled") + datasets_table = DatasetsTable() + data_model_table = DataModelTable() + data_model_id = data_model_table.get_data_model_id( + data_model_code, data_model_version, self.db + ) + dataset_id = datasets_table.get_dataset_id(dataset_code, data_model_id, self.db) + current_status = datasets_table.get_dataset_status(dataset_id, self.db) + if current_status != "ENABLED": + datasets_table.set_dataset_status("ENABLED", dataset_id, self.db) + else: + raise UserInputError("The dataset was already enabled") class DisableDataset(UseCase): - def __init__(self, db: DataBase) -> None: + def __init__(self, db: SQLiteDB) -> None: self.db = db - is_db_initialized(db) def execute(self, dataset_code, data_model_code, data_model_version) -> None: - metadata = Schema(METADATA_SCHEMA) - datasets_table = DatasetsTable(schema=metadata) - data_model_table = DataModelTable(schema=metadata) - with self.db.begin() as conn: - - data_model_id = data_model_table.get_data_model_id( - data_model_code, data_model_version, conn - ) - dataset_id = datasets_table.get_dataset_id( - dataset_code, data_model_id, conn - ) - current_status = datasets_table.get_dataset_status(dataset_id, conn) - if current_status != "DISABLED": - datasets_table.set_dataset_status("DISABLED", dataset_id, conn) - - data_model_details = _get_data_model_details(data_model_id, conn) - dataset_details = _get_dataset_details(dataset_id, conn) - update_actions( - conn=conn, - action="DISABLE DATASET", - data_model_details=data_model_details, - dataset_details=dataset_details, - ) - - else: - raise UserInputError("The dataset was already disabled") + datasets_table = DatasetsTable() + data_model_table = DataModelTable() + data_model_id = data_model_table.get_data_model_id( + data_model_code, data_model_version, self.db + ) + dataset_id = datasets_table.get_dataset_id(dataset_code, data_model_id, self.db) + current_status = datasets_table.get_dataset_status(dataset_id, self.db) + if current_status != "DISABLED": + datasets_table.set_dataset_status("DISABLED", dataset_id, self.db) + else: + raise UserInputError("The dataset was already disabled") class TagDataModel(UseCase): - def __init__(self, db: DataBase) -> None: + def __init__(self, db: SQLiteDB) -> None: self.db = db - is_db_initialized(db) def execute(self, code, version, tag) -> None: - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - - with self.db.begin() as conn: - data_model_id = data_model_table.get_data_model_id(code, version, conn) - properties = Properties( - data_model_table.get_data_model_properties(data_model_id, conn) - ) - properties.add_tag(tag) - data_model_table.set_data_model_properties( - properties.properties, data_model_id, conn - ) - - data_model_details = _get_data_model_details(data_model_id, conn) - update_actions( - conn=conn, - action="ADD DATA MODEL TAG", - data_model_details=data_model_details, - ) + data_model_table = DataModelTable() + data_model_id = data_model_table.get_data_model_id(code, version, self.db) + properties = Properties( + data_model_table.get_data_model_properties(data_model_id, self.db) + ) + properties.add_tag(tag) + data_model_table.set_data_model_properties( + properties.properties, data_model_id, self.db + ) class UntagDataModel(UseCase): - def __init__(self, db: DataBase) -> None: + def __init__(self, db: SQLiteDB) -> None: self.db = db - is_db_initialized(db) def execute(self, code, version, tag) -> None: - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - - with self.db.begin() as conn: - data_model_id = data_model_table.get_data_model_id(code, version, conn) - properties = Properties( - data_model_table.get_data_model_properties(data_model_id, conn) - ) - properties.remove_tag(tag) - data_model_table.set_data_model_properties( - properties.properties, data_model_id, conn - ) - - data_model_details = _get_data_model_details(data_model_id, conn) - update_actions( - conn=conn, - action="REMOVE DATA MODEL TAG", - data_model_details=data_model_details, - ) + data_model_table = DataModelTable() + data_model_id = data_model_table.get_data_model_id(code, version, self.db) + properties = Properties( + data_model_table.get_data_model_properties(data_model_id, self.db) + ) + properties.remove_tag(tag) + data_model_table.set_data_model_properties( + properties.properties, data_model_id, self.db + ) class AddPropertyToDataModel(UseCase): - def __init__(self, db: DataBase) -> None: + def __init__(self, db: SQLiteDB) -> None: self.db = db - is_db_initialized(db) def execute(self, code, version, key, value, force) -> None: - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - - with self.db.begin() as conn: - data_model_id = data_model_table.get_data_model_id(code, version, conn) - properties = Properties( - data_model_table.get_data_model_properties(data_model_id, conn) - ) - properties.add_property(key, value, force) - data_model_table.set_data_model_properties( - properties.properties, data_model_id, conn - ) - - data_model_details = _get_data_model_details(data_model_id, conn) - update_actions( - conn=conn, - action="ADD DATA MODEL TAG", - data_model_details=data_model_details, - ) + data_model_table = DataModelTable() + data_model_id = data_model_table.get_data_model_id(code, version, self.db) + properties = Properties( + data_model_table.get_data_model_properties(data_model_id, self.db) + ) + properties.add_property(key, value, force) + data_model_table.set_data_model_properties( + properties.properties, data_model_id, self.db + ) class RemovePropertyFromDataModel(UseCase): - def __init__(self, db: DataBase) -> None: + def __init__(self, db: SQLiteDB) -> None: self.db = db - is_db_initialized(db) def execute(self, code, version, key, value) -> None: - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - - with self.db.begin() as conn: - data_model_id = data_model_table.get_data_model_id(code, version, conn) - - properties = Properties( - data_model_table.get_data_model_properties(data_model_id, conn) - ) - properties.remove_property(key, value) - data_model_table.set_data_model_properties( - properties.properties, data_model_id, conn - ) - - data_model_details = _get_data_model_details(data_model_id, conn) - update_actions( - conn=conn, - action="REMOVE DATA MODEL TAG", - data_model_details=data_model_details, - ) + data_model_table = DataModelTable() + data_model_id = data_model_table.get_data_model_id(code, version, self.db) + properties = Properties( + data_model_table.get_data_model_properties(data_model_id, self.db) + ) + properties.remove_property(key, value) + data_model_table.set_data_model_properties( + properties.properties, data_model_id, self.db + ) class TagDataset(UseCase): - def __init__(self, db: DataBase) -> None: + def __init__(self, db: SQLiteDB) -> None: self.db = db - is_db_initialized(db) def execute(self, dataset_code, data_model_code, data_model_version, tag) -> None: - metadata = Schema(METADATA_SCHEMA) - dataset_table = DatasetsTable(schema=metadata) - data_model_table = DataModelTable(schema=metadata) - - with self.db.begin() as conn: - data_model_id = data_model_table.get_data_model_id( - data_model_code, data_model_version, conn - ) - dataset_id = dataset_table.get_dataset_id(dataset_code, data_model_id, conn) - properties = Properties( - dataset_table.get_dataset_properties(data_model_id, conn) - ) - properties.add_tag(tag) - dataset_table.set_dataset_properties( - properties.properties, dataset_id, conn - ) - - data_model_details = _get_data_model_details(data_model_id, conn) - dataset_details = _get_dataset_details(dataset_id, conn) - update_actions( - conn=conn, - action="ADD DATASET TAG", - data_model_details=data_model_details, - dataset_details=dataset_details, - ) + datasets_table = DatasetsTable() + data_model_table = DataModelTable() + data_model_id = data_model_table.get_data_model_id( + data_model_code, data_model_version, self.db + ) + dataset_id = datasets_table.get_dataset_id(dataset_code, data_model_id, self.db) + properties = Properties( + datasets_table.get_dataset_properties(data_model_id, self.db) + ) + properties.add_tag(tag) + datasets_table.set_dataset_properties( + properties.properties, dataset_id, self.db + ) class UntagDataset(UseCase): - def __init__(self, db: DataBase) -> None: + def __init__(self, db: SQLiteDB) -> None: self.db = db - is_db_initialized(db) def execute(self, dataset, data_model_code, version, tag) -> None: - metadata = Schema(METADATA_SCHEMA) - dataset_table = DatasetsTable(schema=metadata) - data_model_table = DataModelTable(schema=metadata) - - with self.db.begin() as conn: - data_model_id = data_model_table.get_data_model_id( - data_model_code, version, conn - ) - dataset_id = dataset_table.get_dataset_id(dataset, data_model_id, conn) - properties = Properties( - dataset_table.get_dataset_properties(data_model_id, conn) - ) - properties.remove_tag(tag) - dataset_table.set_dataset_properties( - properties.properties, dataset_id, conn - ) - - data_model_details = _get_data_model_details(data_model_id, conn) - dataset_details = _get_dataset_details(dataset_id, conn) - update_actions( - conn=conn, - action="REMOVE DATASET TAG", - data_model_details=data_model_details, - dataset_details=dataset_details, - ) + datasets_table = DatasetsTable() + data_model_table = DataModelTable() + data_model_id = data_model_table.get_data_model_id( + data_model_code, version, self.db + ) + dataset_id = datasets_table.get_dataset_id(dataset, data_model_id, self.db) + properties = Properties( + datasets_table.get_dataset_properties(data_model_id, self.db) + ) + properties.remove_tag(tag) + datasets_table.set_dataset_properties( + properties.properties, dataset_id, self.db + ) class AddPropertyToDataset(UseCase): - def __init__(self, db: DataBase) -> None: + def __init__(self, db: SQLiteDB) -> None: self.db = db - is_db_initialized(db) def execute(self, dataset, data_model_code, version, key, value, force) -> None: - metadata = Schema(METADATA_SCHEMA) - dataset_table = DatasetsTable(schema=metadata) - data_model_table = DataModelTable(schema=metadata) - with self.db.begin() as conn: - data_model_id = data_model_table.get_data_model_id( - data_model_code, version, conn - ) - dataset_id = dataset_table.get_dataset_id(dataset, data_model_id, conn) - properties = Properties( - dataset_table.get_dataset_properties(data_model_id, conn) - ) - properties.add_property(key, value, force) - dataset_table.set_dataset_properties( - properties.properties, dataset_id, conn - ) - data_model_details = _get_data_model_details(data_model_id, conn) - dataset_details = _get_dataset_details(dataset_id, conn) - update_actions( - conn=conn, - action="ADD DATASET TAG", - data_model_details=data_model_details, - dataset_details=dataset_details, - ) + datasets_table = DatasetsTable() + data_model_table = DataModelTable() + data_model_id = data_model_table.get_data_model_id( + data_model_code, version, self.db + ) + dataset_id = datasets_table.get_dataset_id(dataset, data_model_id, self.db) + properties = Properties( + datasets_table.get_dataset_properties(data_model_id, self.db) + ) + properties.add_property(key, value, force) + datasets_table.set_dataset_properties( + properties.properties, dataset_id, self.db + ) class RemovePropertyFromDataset(UseCase): - def __init__(self, db: DataBase) -> None: + def __init__(self, db: SQLiteDB) -> None: self.db = db - is_db_initialized(db) def execute(self, dataset, data_model_code, version, key, value) -> None: - metadata = Schema(METADATA_SCHEMA) - dataset_table = DatasetsTable(schema=metadata) - data_model_table = DataModelTable(schema=metadata) - with self.db.begin() as conn: - data_model_id = data_model_table.get_data_model_id( - data_model_code, version, conn - ) - dataset_id = dataset_table.get_dataset_id(dataset, data_model_id, conn) - properties = Properties( - dataset_table.get_dataset_properties(data_model_id, conn) - ) - properties.remove_property(key, value) - dataset_table.set_dataset_properties( - properties.properties, dataset_id, conn - ) - data_model_details = _get_data_model_details(data_model_id, conn) - dataset_details = _get_dataset_details(dataset_id, conn) - update_actions( - conn=conn, - action="REMOVE DATASET TAG", - data_model_details=data_model_details, - dataset_details=dataset_details, - ) + datasets_table = DatasetsTable() + data_model_table = DataModelTable() + data_model_id = data_model_table.get_data_model_id( + data_model_code, version, self.db + ) + dataset_id = datasets_table.get_dataset_id(dataset, data_model_id, self.db) + properties = Properties( + datasets_table.get_dataset_properties(data_model_id, self.db) + ) + properties.remove_property(key, value) + datasets_table.set_dataset_properties( + properties.properties, dataset_id, self.db + ) class ListDataModels(UseCase): - def __init__(self, db: DataBase) -> None: + def __init__(self, db: SQLiteDB) -> None: self.db = db - is_db_initialized(db) def execute(self) -> None: - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - - with self.db.begin() as conn: - - data_model_row_columns = [ - "data_model_id", - "code", - "version", - "label", - "status", - ] - - data_model_rows = data_model_table.get_data_models( - db=conn, columns=data_model_row_columns + data_model_table = DataModelTable() + data_model_row_columns = ["data_model_id", "code", "version", "label", "status"] + data_model_rows = data_model_table.get_data_models( + db=self.db, columns=data_model_row_columns + ) + dataset_count_by_data_model_id = { + data_model_id: dataset_count + for data_model_id, dataset_count in data_model_table.get_dataset_count_by_data_model_id( + self.db ) + } + data_models_info = [ + list(row) + [dataset_count_by_data_model_id.get(row[0], 0)] + for row in data_model_rows + ] - dataset_count_by_data_model_id = { - data_model_id: dataset_count - for data_model_id, dataset_count in data_model_table.get_dataset_count_by_data_model_id( - conn - ) - } - - data_models_info = [] - - for row in data_model_rows: - data_model_id, *_ = row - dataset_count = ( - dataset_count_by_data_model_id[data_model_id] - if data_model_id in dataset_count_by_data_model_id - else 0 - ) - data_model_info = list(row) + [dataset_count] - data_models_info.append(data_model_info) - - if not data_models_info: - print("There are no data models.") - return + if not data_models_info: + print("There are no data models.") + return - data_model_info_columns = data_model_row_columns + ["count"] - df = pd.DataFrame(data_models_info, columns=data_model_info_columns) - print(df) + df = pd.DataFrame(data_models_info, columns=data_model_row_columns + ["count"]) + print(df) class ListDatasets(UseCase): - def __init__(self, db: DataBase) -> None: - self.db = db - is_db_initialized(db) + def __init__(self, sqlite_db: SQLiteDB, monetdb: MonetDB) -> None: + self.sqlite_db = sqlite_db + self.monetdb = monetdb def execute(self) -> None: - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - dataset_table = DatasetsTable(schema=metadata) - - with self.db.begin() as conn: - dataset_row_columns = [ - "dataset_id", - "data_model_id", - "code", - "label", - "status", - ] - dataset_rows = dataset_table.get_values(conn, columns=dataset_row_columns) - - data_model_fullname_by_data_model_id = { - data_model_id: get_data_model_fullname(code, version) - for data_model_id, code, version in data_model_table.get_data_models( - conn, ["data_model_id", "code", "version"] - ) - } - - datasets_info = [] - for row in dataset_rows: - _, data_model_id, dataset_code, *_ = row - data_model_fullname = data_model_fullname_by_data_model_id[ - data_model_id - ] + data_model_table = DataModelTable() + dataset_table = DatasetsTable() + dataset_row_columns = ["dataset_id", "data_model_id", "code", "label", "status"] + dataset_rows = dataset_table.get_datasets( + self.sqlite_db, columns=dataset_row_columns + ) + data_model_fullname_by_data_model_id = { + data_model_id: get_data_model_fullname(code, version) + for data_model_id, code, version in data_model_table.get_data_models( + self.sqlite_db, ["data_model_id", "code", "version"] + ) + } + datasets_info = [] + for row in dataset_rows: + data_model_fullname = data_model_fullname_by_data_model_id[row[1]] + with self.monetdb.begin() as conn: + primary_data_table = PrimaryDataTable.from_db( + Schema(data_model_fullname), conn + ) dataset_count = { - dataset: dataset_count - for dataset, dataset_count in dataset_table.get_data_count_by_dataset( + dataset: count + for dataset, count in primary_data_table.get_data_count_by_dataset( data_model_fullname, conn ) - }[dataset_code] + }.get(row[2], 0) + datasets_info.append(list(row) + [dataset_count]) - dataset_info = list(row) + [dataset_count] - datasets_info.append(dataset_info) + if not datasets_info: + print("There are no datasets.") + return - if not datasets_info: - print("There are no datasets.") - return - - dataset_info_columns = dataset_row_columns + ["count"] - df = pd.DataFrame(datasets_info, columns=dataset_info_columns) - print(df) + df = pd.DataFrame(datasets_info, columns=dataset_row_columns + ["count"]) + print(df) class Cleanup(UseCase): - def __init__(self, db: DataBase) -> None: - self.db = db - is_db_initialized(db) + def __init__(self, sqlite_db: SQLiteDB, monetdb: MonetDB) -> None: + self.sqlite_db = sqlite_db + self.monetdb = monetdb def execute(self) -> None: - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - data_model_rows = [] - - with self.db.begin() as conn: - data_model_row_columns = [ - "code", - "version", - ] - data_model_rows = data_model_table.get_data_models( - conn, columns=data_model_row_columns - ) + data_model_table = DataModelTable() + data_model_rows = data_model_table.get_data_models( + self.sqlite_db, columns=["code", "version"] + ) - for data_model_row in data_model_rows: - code, version = data_model_row - DeleteDataModel(self.db).execute(code=code, version=version, force=True) + for code, version in data_model_rows: + DeleteDataModel(sqlite_db=self.sqlite_db, monetdb=self.monetdb).execute( + code=code, version=version, force=True + ) def get_data_model_fullname(code, version): return f"{code}:{version}" - - -class DatasetDetails: - def __init__(self, dataset_id, code, label): - self.dataset_id = dataset_id - self.code = code - self.label = label - - -class DataModelDetails: - def __init__(self, data_model_id, code, version, label): - self.data_model_id = data_model_id - self.code = code - self.version = version - self.label = label - - -def update_actions( - conn, - action, - data_model_details: DataModelDetails, - dataset_details: DatasetDetails = None, -): - metadata = Schema(METADATA_SCHEMA) - actions_table = ActionsTable(schema=metadata) - - record = dict( - data_model_id=data_model_details.data_model_id, - data_model_code=data_model_details.code, - data_model_label=data_model_details.label, - data_model_version=data_model_details.version, - ) - - if dataset_details: - record["dataset_code"] = dataset_details.code - record["dataset_id"] = dataset_details.dataset_id - record["dataset_label"] = dataset_details.label - - record["action"] = action - record["user"] = conn.get_current_user() - record["date"] = datetime.datetime.now().isoformat() - - action_record = dict() - action_record["action_id"] = actions_table.get_next_id(conn) - action_record["action"] = json.dumps(record) - actions_table.insert_values(action_record, conn) - - -def _get_data_model_details(data_model_id, conn): - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - code, version, label = data_model_table.get_data_model( - data_model_id=data_model_id, db=conn, columns=["code", "version", "label"] - ) - return DataModelDetails(data_model_id, code, version, label) - - -def _get_dataset_details(dataset_id, conn): - metadata = Schema(METADATA_SCHEMA) - dataset_table = DatasetsTable(schema=metadata) - code, label = dataset_table.get_dataset(conn, dataset_id, ["code", "label"]) - return DatasetDetails(dataset_id, code, label) diff --git a/tests/conftest.py b/tests/conftest.py index af2c20b..4231b15 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,19 +4,24 @@ import pytest import docker -from mipdb.commands import get_db_config -from mipdb.database import MonetDB +from mipdb.commands import get_monetdb_config +from mipdb.databases.monetdb import MonetDB +from mipdb.databases.monetdb_tables import User from mipdb.reader import JsonFileReader -from mipdb.tables import User +from mipdb.databases.sqlite import SQLiteDB +TEST_DIR = os.path.dirname(os.path.realpath(__file__)) +SQLiteDB_PATH = f"{TEST_DIR}/sqlite.db" DATA_MODEL_FILE = "tests/data/success/data_model_v_1_0/CDEsMetadata.json" DATASET_FILE = "tests/data/success/data_model_v_1_0/dataset.csv" DATA_FOLDER = "tests/data/" SUCCESS_DATA_FOLDER = DATA_FOLDER + "success" FAIL_DATA_FOLDER = DATA_FOLDER + "fail" -ABSOLUTE_PATH_DATA_FOLDER = f"{os.path.dirname(os.path.realpath(__file__))}/data/" -ABSOLUTE_PATH_DATASET_FILE = f"{os.path.dirname(os.path.realpath(__file__))}/data/success/data_model_v_1_0/dataset.csv" -ABSOLUTE_PATH_DATASET_FILE_MULTIPLE_DATASET = f"{os.path.dirname(os.path.realpath(__file__))}/data/success/data_model_v_1_0/dataset123.csv" +ABSOLUTE_PATH_DATA_FOLDER = f"{TEST_DIR}/data/" +ABSOLUTE_PATH_DATASET_FILE = f"{TEST_DIR}/data/success/data_model_v_1_0/dataset.csv" +ABSOLUTE_PATH_DATASET_FILE_MULTIPLE_DATASET = ( + f"{TEST_DIR}/data/success/data_model_v_1_0/dataset123.csv" +) ABSOLUTE_PATH_SUCCESS_DATA_FOLDER = ABSOLUTE_PATH_DATA_FOLDER + "success" ABSOLUTE_PATH_FAIL_DATA_FOLDER = ABSOLUTE_PATH_DATA_FOLDER + "fail" IP = "127.0.0.1" @@ -24,8 +29,8 @@ USERNAME = "admin" PASSWORD = "executor" DB_NAME = "db" - -DEFAULT_OPTIONS = [ +SQLiteDB_OPTION = ["--sqlite_db_path", SQLiteDB_PATH] +MONETDB_OPTIONS = [ "--ip", IP, "--port", @@ -87,15 +92,32 @@ def monetdb_container(): @pytest.fixture(scope="function") -def db(): - dbconfig = get_db_config(IP, PORT, USERNAME, PASSWORD, DB_NAME) - return MonetDB.from_config(dbconfig) +def sqlite_db(): + return SQLiteDB.from_config({"db_path": SQLiteDB_PATH}) @pytest.fixture(scope="function") -def cleanup_db(db): - yield - schemas = db.get_schemas() +def monetdb(): + dbconfig = get_monetdb_config(IP, PORT, USERNAME, PASSWORD, DB_NAME) + return MonetDB.from_config(dbconfig) + + +def cleanup_monetdb(monetdb): + schemas = monetdb.get_schemas() for schema in schemas: if schema not in [user.value for user in User]: - db.drop_schema(schema) + monetdb.drop_schema(schema) + + +def cleanup_sqlite(sqlite_db): + sqlite_tables = sqlite_db.get_all_tables() + if sqlite_tables: + for table in sqlite_tables: + sqlite_db.execute(f'DROP TABLE "{table}";') + + +@pytest.fixture(scope="function") +def cleanup_db(sqlite_db, monetdb): + yield + cleanup_sqlite(sqlite_db) + cleanup_monetdb(monetdb) diff --git a/tests/mocks.py b/tests/mocks.py deleted file mode 100644 index e0b9135..0000000 --- a/tests/mocks.py +++ /dev/null @@ -1,90 +0,0 @@ -from contextlib import contextmanager -import sqlalchemy as sql - -from mipdb.database import MonetDB - - -class MonetDBMock(MonetDB): - """Mock version of MonetDB used for testing. Objects of this class have - exactly the same functionality as objects of the MonetDB class except that - queries are not actually executed against an external Monet DB service but - are stored in the captured_queries instance attribute. All parameters for - prepared SQL queries are stored in captured_multiparams and - captured_params.""" - - def __init__(self) -> None: - self.captured_queries = [] - self.captured_multiparams = [] - self.captured_params = [] - - def mock_executor(sql, *multiparams, **params): - self.captured_queries.append(str(sql)) - self.captured_multiparams.append(multiparams) - self.captured_params.append(params) - - url = "monetdb://mock:mock@mock:0/mock" - self._executor = sql.create_engine(url, strategy="mock", executor=mock_executor) - - @contextmanager - def begin(self): - # Mock engine in SQLAlchemy doesn't have a begin method probably - # because it makes no sense beginning a transaction on a mock engine. - # However, in order to have unit tests without having to use an - # external database, I do the following trick and it works! - yield self - - def get_schemas(self): - return ["mipdb_metadata"] - - def table_exists(self, table): - return True - - def get_current_user(self): - return "test_user" - - def get_data_model_id(self, code, version): - return 1 - - def get_dataset_id(self, code, data_model_id): - return 1 - - def get_executor(self): - return None - - def get_values(self, data_model_id=None, columns=None): - if columns: - return [list(range(1, len(columns) + 1))] - return [[1, 2]] - - def get_data_models(self, columns=None): - if columns: - return [list(range(1, len(columns) + 1))] - return [[1, 2]] - - def get_dataset_status(self, dataset_id): - return "WrongStatus" - - def get_metadata(self, schema): - return { - "var1": '{"code": "var1", "sql_type": "text", "description": "", "label": "Variable 1", "methodology": "", "is_categorical": false, "type": "nominal"}', - "subjectcode": '{"label": "subjectcode", "code": "subjectcode", "sql_type": "text", "description": "", "methodology": "", "is_categorical": false, "type": "nominal"}', - "var2": '{"code": "var2", "sql_type": "text", "description": "", "enumerations": {"1": "Level1", "2.0": "Level2"}, "label": "Variable 2", "methodology": "", "is_categorical": true, "type": "nominal"}', - "dataset": '{"code": "dataset", "sql_type": "text", "description": "", "enumerations": {"dataset": "Dataset", "dataset1": "Dataset 1", "dataset2": "Dataset 2"}, "label": "Dataset", "methodology": "", "is_categorical": true, "type": "nominal"}', - "var3": '{"code": "var3", "sql_type": "real", "description": "", "label": "Variable 3", "methodology": "", "is_categorical": false, "min": 0, "max": 100, "type": "nominal"}', - "var4": '{"code": "var4", "sql_type": "int", "units": "years", "description": "", "label": "Variable 4", "methodology": "", "is_categorical": false, "type": "nominal"}', - } - - def get_data_model_status(self, data_model_id): - return "WrongStatus" - - def get_dataset_properties(self, dataset_id): - return '{"tags":["tag1"], "properties": {"key1": "value1"}}' - - def get_data_model_properties(self, dataset_id): - return '{"tags":["tag1"], "properties": {"key1": "value1"}}' - - def get_data_model(self, data_model_id, columns): - return "code", "version", "label" - - def get_dataset(self, dataset_id, columns): - return "code", "label" diff --git a/tests/test_commands.py b/tests/test_commands.py index 07dd135..019cd8f 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -18,46 +18,50 @@ from mipdb import list_data_models from mipdb import list_datasets from mipdb import validate_dataset +from mipdb.commands import validate_folder from mipdb.exceptions import ExitCode +from mipdb.databases.sqlite import Dataset, DataModel +from mipdb.databases.sqlite_tables import DataModelTable from tests.conftest import ( DATASET_FILE, ABSOLUTE_PATH_DATASET_FILE, ABSOLUTE_PATH_SUCCESS_DATA_FOLDER, SUCCESS_DATA_FOLDER, ABSOLUTE_PATH_FAIL_DATA_FOLDER, - DEFAULT_OPTIONS, ABSOLUTE_PATH_DATASET_FILE_MULTIPLE_DATASET, + MONETDB_OPTIONS, + SQLiteDB_OPTION, + ABSOLUTE_PATH_DATASET_FILE_MULTIPLE_DATASET, ) from tests.conftest import DATA_MODEL_FILE @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_init(db): +def test_init(sqlite_db): # Setup runner = CliRunner() - # Check data_model not present already - assert "mipdb_metadata" not in db.get_schemas() - # Test - result = runner.invoke(init, DEFAULT_OPTIONS) + data_model_table = DataModelTable() + assert not data_model_table.exists(sqlite_db) + result = runner.invoke(init, SQLiteDB_OPTION) assert result.exit_code == ExitCode.OK - assert "mipdb_metadata" in db.get_schemas() - assert db.execute(f"select * from mipdb_metadata.data_models").fetchall() == [] - assert db.execute(f"select * from mipdb_metadata.actions").fetchall() == [] + assert sqlite_db.execute_fetchall(f"select * from data_models") == [] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_add_data_model(db): +def test_add_data_model(sqlite_db): # Setup runner = CliRunner() # Check data_model not present already - assert "data_model:1.0" not in db.get_schemas() - runner.invoke(init, DEFAULT_OPTIONS) + + runner.invoke(init, SQLiteDB_OPTION) # Test - result = runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + result = runner.invoke( + add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS + ) assert result.exit_code == ExitCode.OK - assert "data_model:1.0" in db.get_schemas() - data_models = db.execute(f"select * from mipdb_metadata.data_models").fetchall() + + data_models = sqlite_db.execute_fetchall(f"select * from data_models") data_model_id, code, version, desc, status, properties = data_models[0] assert ( data_model_id == 1 @@ -74,52 +78,41 @@ def test_add_data_model(db): cdes = properties["properties"]["cdes"] assert "groups" in cdes or "variables" in cdes assert "code" in cdes and "label" in cdes and "version" in cdes - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[0] - assert action_id == 1 - assert action != "" - assert json.loads(action)["action"] == "ADD DATA MODEL" - metadata = db.execute( - f'select * from "data_model:1.0".variables_metadata' - ).fetchall() - # TODO better test + metadata = sqlite_db.execute_fetchall( + f'select * from "data_model:1.0_variables_metadata"' + ) assert len(metadata) == 6 @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_delete_data_model(db): +def test_delete_data_model(sqlite_db): # Setup runner = CliRunner() # Check data_model not present already - assert "data_model:1.0" not in db.get_schemas() - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) + assert sqlite_db.get_data_models(["data_model_id"])[0][0] == 1 # Test result = runner.invoke( delete_data_model, - ["data_model", "-v", "1.0", "-f"] + DEFAULT_OPTIONS, + ["data_model", "-v", "1.0", "-f"] + SQLiteDB_OPTION + MONETDB_OPTIONS, ) assert result.exit_code == ExitCode.OK - assert "data_model:1.0" not in db.get_schemas() - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[2] - assert action_id == 3 - assert action != "" - assert json.loads(action)["action"] == "DELETE DATA MODEL" @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_add_dataset_with_volume(db): +def test_add_dataset_with_volume(sqlite_db, monetdb): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) - assert not db.get_values(columns=["code"]) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) + + assert not sqlite_db.get_values(table=Dataset.__table__, columns=["code"]) # Test result = runner.invoke( @@ -131,32 +124,31 @@ def test_add_dataset_with_volume(db): "-v", "1.0", ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) assert result.exit_code == ExitCode.OK - assert "dataset" == db.get_values(columns=["code"])[0][0] + assert ( + "dataset" + == sqlite_db.get_values(table=Dataset.__table__, columns=["code"])[0][0] + ) assert result.exit_code == ExitCode.OK - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[2] - assert action_id == 3 - assert action != "" - assert json.loads(action)["action"] == "ADD DATASET" - data = db.execute(f'select * from "data_model:1.0".primary_data').fetchall() + data = monetdb.execute(f'select * from "data_model:1.0".primary_data').fetchall() assert len(data) == 5 @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_add_dataset(db): +def test_add_dataset(sqlite_db, monetdb): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) - assert not db.get_values(columns=["code"]) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) + assert not sqlite_db.get_values(table=Dataset.__table__, columns=["code"]) # Test result = runner.invoke( @@ -170,34 +162,35 @@ def test_add_dataset(db): "--copy_from_file", False, ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) assert result.exit_code == ExitCode.OK - assert "dataset" == db.get_values(columns=["code"])[0][0] + assert ( + "dataset" + == sqlite_db.get_values(table=Dataset.__table__, columns=["code"])[0][0] + ) assert result.exit_code == ExitCode.OK - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[2] - assert action_id == 3 - assert action != "" - assert json.loads(action)["action"] == "ADD DATASET" - data = db.execute(f'select * from "data_model:1.0".primary_data').fetchall() + data = monetdb.execute(f'select * from "data_model:1.0".primary_data').fetchall() assert len(data) == 5 @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_add_two_datasets_with_same_name_different_data_model(db): +def test_add_two_datasets_with_same_name_different_data_model(sqlite_db): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) runner.invoke( add_data_model, - ["tests/data/success/data_model1_v_1_0/CDEsMetadata.json"] + DEFAULT_OPTIONS, + ["tests/data/success/data_model1_v_1_0/CDEsMetadata.json"] + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) # Test @@ -210,7 +203,8 @@ def test_add_two_datasets_with_same_name_different_data_model(db): "-v", "1.0", ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) result = runner.invoke( add_dataset, @@ -221,24 +215,25 @@ def test_add_two_datasets_with_same_name_different_data_model(db): "-v", "1.0", ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) assert result.exit_code == ExitCode.OK - assert [(1, "dataset10"), (2, "dataset10")] == db.get_values( - columns=["data_model_id", "code"] + assert [(1, "dataset10"), (2, "dataset10")] == sqlite_db.get_values( + Dataset.__table__, columns=["data_model_id", "code"] ) @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_validate_dataset_with_volume(db): +def test_validate_dataset_with_volume(sqlite_db): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) - assert not db.get_values(columns=["code"]) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) + assert not sqlite_db.get_values(table=Dataset.__table__, columns=["code"]) # Test result = runner.invoke( @@ -250,7 +245,8 @@ def test_validate_dataset_with_volume(db): "-v", "1.0", ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) assert result.exit_code == ExitCode.OK @@ -299,10 +295,10 @@ def test_validate_dataset_with_volume(db): @pytest.mark.usefixtures("monetdb_container", "cleanup_db") @pytest.mark.parametrize("data_model,dataset,exception_message", dataset_files) -def test_invalid_dataset_error_cases(data_model, dataset, exception_message, db): +def test_invalid_dataset_error_cases(data_model, dataset, exception_message): runner = CliRunner() - runner.invoke(init, DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) result = runner.invoke( add_data_model, [ @@ -311,7 +307,8 @@ def test_invalid_dataset_error_cases(data_model, dataset, exception_message, db) + data_model + "_v_1_0/CDEsMetadata.json", ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) assert result.exit_code == ExitCode.OK @@ -324,7 +321,8 @@ def test_invalid_dataset_error_cases(data_model, dataset, exception_message, db) "-v", "1.0", ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) assert ( @@ -333,16 +331,23 @@ def test_invalid_dataset_error_cases(data_model, dataset, exception_message, db) ) +def test_validate_no_db(): + runner = CliRunner() + + validation_result = runner.invoke(validate_folder, [ABSOLUTE_PATH_FAIL_DATA_FOLDER]) + assert validation_result.exit_code != ExitCode.OK + + @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_validate_dataset(db): +def test_validate_dataset(sqlite_db): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) - assert not db.get_values(columns=["code"]) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) + assert not sqlite_db.get_values(table=Dataset.__table__, columns=["code"]) # Test result = runner.invoke( @@ -356,20 +361,21 @@ def test_validate_dataset(db): "--copy_from_file", False, ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) assert result.exit_code == ExitCode.OK @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_delete_dataset_with_volume(db): +def test_delete_dataset_with_volume(sqlite_db): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) runner.invoke( add_dataset, [ @@ -379,46 +385,44 @@ def test_delete_dataset_with_volume(db): "-v", "1.0", ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, + ) + assert ( + "dataset" + == sqlite_db.get_values(table=Dataset.__table__, columns=["code"])[0][0] ) - assert "dataset" == db.get_values(columns=["code"])[0][0] # Test result = runner.invoke( delete_dataset, - ["dataset", "-d", "data_model", "-v", "1.0"] + DEFAULT_OPTIONS, + ["dataset", "-d", "data_model", "-v", "1.0"] + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) assert result.exit_code == ExitCode.OK - assert not db.get_values(columns=["code"]) - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[3] - assert action_id == 4 - assert action != "" - assert json.loads(action)["action"] == "DELETE DATASET" + assert not sqlite_db.get_values(table=Dataset.__table__, columns=["code"]) @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_load_folder_with_volume(db): +def test_load_folder_with_volume(sqlite_db, monetdb): # Setup runner = CliRunner() # Check dataset not present already - result = runner.invoke(init, DEFAULT_OPTIONS) - assert not db.get_values(columns=["code"]) + result = runner.invoke(init, SQLiteDB_OPTION) + assert not sqlite_db.get_values(table=Dataset.__table__, columns=["code"]) # Test result = runner.invoke( - load_folder, [ABSOLUTE_PATH_SUCCESS_DATA_FOLDER] + DEFAULT_OPTIONS + load_folder, + [ABSOLUTE_PATH_SUCCESS_DATA_FOLDER] + SQLiteDB_OPTION + MONETDB_OPTIONS, ) assert result.exit_code == ExitCode.OK - assert "mipdb_metadata" in db.get_schemas() - assert "data_model:1.0" in db.get_schemas() - assert "data_model1:1.0" in db.get_schemas() - - datasets = db.get_values(columns=["code"]) + datasets = sqlite_db.get_values(table=Dataset.__table__, columns=["code"]) dataset_codes = [code for code, *_ in datasets] expected = [ "dataset", @@ -429,35 +433,32 @@ def test_load_folder_with_volume(db): "dataset_longitudinal", ] assert set(expected) == set(dataset_codes) - ((count, *_), *_) = db.execute( - f'select count(*) from "data_model:1.0".primary_data' + row_ids = monetdb.execute( + f'select row_id from "data_model:1.0".primary_data' ).fetchall() - row_ids = db.execute(f'select row_id from "data_model:1.0".primary_data').fetchall() - assert list(range(1, count + 1)) == [row_id for row_id, *_ in row_ids] + assert list(range(1, len(row_ids) + 1)) == [row[0] for row in row_ids] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_load_folder(db): +def test_load_folder(sqlite_db, monetdb): # Setup runner = CliRunner() # Check dataset not present already - result = runner.invoke(init, DEFAULT_OPTIONS) - assert not db.get_values(columns=["code"]) + result = runner.invoke(init, SQLiteDB_OPTION) + assert not sqlite_db.get_values(table=Dataset.__table__, columns=["code"]) # Test result = runner.invoke( load_folder, - [SUCCESS_DATA_FOLDER, "--copy_from_file", False] + DEFAULT_OPTIONS, + [SUCCESS_DATA_FOLDER, "--copy_from_file", False] + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) assert result.exit_code == ExitCode.OK - assert "mipdb_metadata" in db.get_schemas() - assert "data_model:1.0" in db.get_schemas() - assert "data_model1:1.0" in db.get_schemas() - - datasets = db.get_values(columns=["code"]) + datasets = sqlite_db.get_values(table=Dataset.__table__, columns=["code"]) dataset_codes = [code for code, *_ in datasets] expected = [ "dataset", @@ -468,39 +469,36 @@ def test_load_folder(db): "dataset_longitudinal", ] assert set(expected) == set(dataset_codes) - ((count, *_), *_) = db.execute( - f'select count(*) from "data_model:1.0".primary_data' + row_ids = monetdb.execute( + f'select row_id from "data_model:1.0".primary_data' ).fetchall() - row_ids = db.execute(f'select row_id from "data_model:1.0".primary_data').fetchall() - assert list(range(1, count + 1)) == [row_id for row_id, *_ in row_ids] + assert list(range(1, len(row_ids) + 1)) == [row[0] for row in row_ids] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_load_folder_twice_with_volume(db): +def test_load_folder_twice_with_volume(sqlite_db, monetdb): # Setup runner = CliRunner() # Check dataset not present already - result = runner.invoke(init, DEFAULT_OPTIONS) - assert not db.get_values(columns=["code"]) + result = runner.invoke(init, SQLiteDB_OPTION) + assert not sqlite_db.get_values(table=Dataset.__table__, columns=["code"]) result = runner.invoke( - load_folder, [ABSOLUTE_PATH_SUCCESS_DATA_FOLDER] + DEFAULT_OPTIONS + load_folder, + [ABSOLUTE_PATH_SUCCESS_DATA_FOLDER] + SQLiteDB_OPTION + MONETDB_OPTIONS, ) assert result.exit_code == ExitCode.OK # Test result = runner.invoke( - load_folder, [ABSOLUTE_PATH_SUCCESS_DATA_FOLDER] + DEFAULT_OPTIONS + load_folder, + [ABSOLUTE_PATH_SUCCESS_DATA_FOLDER] + SQLiteDB_OPTION + MONETDB_OPTIONS, ) assert result.exit_code == ExitCode.OK - assert "mipdb_metadata" in db.get_schemas() - assert "data_model:1.0" in db.get_schemas() - assert "data_model1:1.0" in db.get_schemas() - - datasets = db.get_values() - dataset_codes = [code for _, _, code, *_ in datasets] + datasets = sqlite_db.get_values(table=Dataset.__table__, columns=["code"]) + dataset_codes = [dataset[0] for dataset in datasets] expected = [ "dataset", "dataset1", @@ -510,112 +508,91 @@ def test_load_folder_twice_with_volume(db): "dataset_longitudinal", ] assert set(expected) == set(dataset_codes) - ((count, *_), *_) = db.execute( - f'select count(*) from "data_model:1.0".primary_data' + row_ids = monetdb.execute( + f'select row_id from "data_model:1.0".primary_data' ).fetchall() - row_ids = db.execute(f'select row_id from "data_model:1.0".primary_data').fetchall() - assert list(range(1, count + 1)) == [row_id for row_id, *_ in row_ids] + assert list(range(1, len(row_ids) + 1)) == [row[0] for row in row_ids] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_tag_data_model(db): +def test_tag_data_model(sqlite_db): # Setup runner = CliRunner() - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) # Test result = runner.invoke( tag_data_model, - ["data_model", "-t", "tag", "-v", "1.0"] + DEFAULT_OPTIONS, + ["data_model", "-t", "tag", "-v", "1.0"] + SQLiteDB_OPTION, ) assert result.exit_code == ExitCode.OK - (properties, *_), *_ = db.execute( - f"select properties from mipdb_metadata.data_models" - ).fetchall() - assert json.loads(properties)["tags"] == ["tag"] - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[1] - assert action_id == 2 - assert action != "" - assert json.loads(action)["action"] == "ADD DATA MODEL TAG" + result = sqlite_db.get_values(table=DataModel.__table__, columns=["properties"]) + properties = result[0][0] + assert properties["tags"] == ["tag"] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_untag_data_model(db): +def test_untag_data_model(sqlite_db): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) runner.invoke( tag_data_model, - ["data_model", "-t", "tag", "-v", "1.0"] + DEFAULT_OPTIONS, + ["data_model", "-t", "tag", "-v", "1.0"] + SQLiteDB_OPTION, ) # Test result = runner.invoke( tag_data_model, - ["data_model", "-t", "tag", "-v", "1.0", "-r"] + DEFAULT_OPTIONS, + ["data_model", "-t", "tag", "-v", "1.0", "-r"] + SQLiteDB_OPTION, ) assert result.exit_code == ExitCode.OK - (properties, *_), *_ = db.execute( - f"select properties from mipdb_metadata.data_models" - ).fetchall() - assert json.loads(properties)["tags"] == [] - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - - action_id, action = action_record[3] - assert action_id == 4 - assert action != "" - assert json.loads(action)["action"] == "REMOVE DATA MODEL TAG" + result = sqlite_db.get_values(table=DataModel.__table__, columns=["properties"]) + properties = result[0][0] + assert properties["tags"] == [] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_property_data_model_addition(db): +def test_property_data_model_addition(sqlite_db): # Setup runner = CliRunner() - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) # Test result = runner.invoke( tag_data_model, - ["data_model", "-t", "key=value", "-v", "1.0"] + DEFAULT_OPTIONS, + ["data_model", "-t", "key=value", "-v", "1.0"] + SQLiteDB_OPTION, ) assert result.exit_code == ExitCode.OK - (properties, *_), *_ = db.execute( - f"select properties from mipdb_metadata.data_models" - ).fetchall() + result = sqlite_db.get_values(table=DataModel.__table__, columns=["properties"]) + properties = result[0][0] assert ( - "key" in json.loads(properties)["properties"] - and json.loads(properties)["properties"]["key"] == "value" + "key" in properties["properties"] and properties["properties"]["key"] == "value" ) - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[1] - assert action_id == 2 - assert action != "" - assert json.loads(action)["action"] == "ADD DATA MODEL TAG" @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_property_data_model_deletion(db): +def test_property_data_model_deletion(sqlite_db): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) runner.invoke( tag_data_model, - ["data_model", "-t", "key=value", "-v", "1.0"] + DEFAULT_OPTIONS, + ["data_model", "-t", "key=value", "-v", "1.0"] + SQLiteDB_OPTION, ) # Test @@ -629,30 +606,23 @@ def test_property_data_model_deletion(db): "1.0", "-r", ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION, ) assert result.exit_code == ExitCode.OK - (properties, *_), *_ = db.execute( - f"select properties from mipdb_metadata.data_models" - ).fetchall() - assert "key" not in json.loads(properties)["properties"] - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - - action_id, action = action_record[3] - assert action_id == 4 - assert action != "" - assert json.loads(action)["action"] == "REMOVE DATA MODEL TAG" + result = sqlite_db.get_values(table=DataModel.__table__, columns=["properties"]) + properties = result[0][0] + assert "key" not in properties["properties"] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_tag_dataset(db): +def test_tag_dataset(sqlite_db): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) runner.invoke( add_dataset, [ @@ -664,7 +634,8 @@ def test_tag_dataset(db): "--copy_from_file", False, ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) # Test @@ -679,29 +650,25 @@ def test_tag_dataset(db): "-v", "1.0", ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION, ) assert result.exit_code == ExitCode.OK - (properties, *_), *_ = db.execute( - f"select properties from mipdb_metadata.datasets" - ).fetchall() - assert '{"tags":["tag"],"properties":{}}' == properties - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[3] - assert action_id == 4 - assert action != "" - assert json.loads(action)["action"] == "ADD DATASET TAG" + properties = sqlite_db.get_values(table=Dataset.__table__, columns=["properties"]) + + assert {"tags": ["tag"], "properties": {}} == properties[0][0] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_untag_dataset(db): +def test_untag_dataset(sqlite_db): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - result = runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + result = runner.invoke( + add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS + ) assert result.exit_code == ExitCode.OK result = runner.invoke( @@ -715,7 +682,8 @@ def test_untag_dataset(db): "--copy_from_file", False, ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) assert result.exit_code == ExitCode.OK result = runner.invoke( @@ -729,7 +697,7 @@ def test_untag_dataset(db): "-v", "1.0", ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION, ) assert result.exit_code == ExitCode.OK @@ -746,29 +714,23 @@ def test_untag_dataset(db): "1.0", "-r", ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION, ) assert result.exit_code == ExitCode.OK - (properties, *_), *_ = db.execute( - f"select properties from mipdb_metadata.datasets" - ).fetchall() - assert '{"tags":[],"properties":{}}' == properties - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[3] - assert action_id == 4 - assert action != "" - assert json.loads(action)["action"] == "ADD DATASET TAG" + properties = sqlite_db.get_values(table=Dataset.__table__, columns=["properties"]) + + assert {"tags": [], "properties": {}} == properties[0][0] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_property_dataset_addition(db): +def test_property_dataset_addition(sqlite_db): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) runner.invoke( add_dataset, [ @@ -780,7 +742,8 @@ def test_property_dataset_addition(db): "--copy_from_file", False, ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) # Test @@ -795,29 +758,25 @@ def test_property_dataset_addition(db): "-v", "1.0", ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION, ) assert result.exit_code == ExitCode.OK - (properties, *_), *_ = db.execute( - f"select properties from mipdb_metadata.datasets" - ).fetchall() - assert '{"tags":[],"properties":{"key":"value"}}' == properties - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[3] - assert action_id == 4 - assert action != "" - assert json.loads(action)["action"] == "ADD DATASET TAG" + properties = sqlite_db.get_values(table=Dataset.__table__, columns=["properties"]) + + assert {"tags": [], "properties": {"key": "value"}} == properties[0][0] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_property_dataset_deletion(db): +def test_property_dataset_deletion(sqlite_db): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - result = runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + result = runner.invoke( + add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS + ) assert result.exit_code == ExitCode.OK result = runner.invoke( @@ -831,7 +790,8 @@ def test_property_dataset_deletion(db): "--copy_from_file", False, ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) assert result.exit_code == ExitCode.OK result = runner.invoke( @@ -845,7 +805,7 @@ def test_property_dataset_deletion(db): "-v", "1.0", ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION, ) assert result.exit_code == ExitCode.OK @@ -862,80 +822,64 @@ def test_property_dataset_deletion(db): "1.0", "-r", ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION, ) assert result.exit_code == ExitCode.OK - (properties, *_), *_ = db.execute( - f"select properties from mipdb_metadata.datasets" - ).fetchall() - assert '{"tags":[],"properties":{}}' == properties - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[3] - assert action_id == 4 - assert action != "" - assert json.loads(action)["action"] == "ADD DATASET TAG" + properties = sqlite_db.get_values(table=Dataset.__table__, columns=["properties"]) + + assert {"tags": [], "properties": {}} == properties[0][0] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_enable_data_model(db): +def test_enable_data_model(sqlite_db): # Setup runner = CliRunner() # Check status is disabled - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) result = runner.invoke( - disable_data_model, ["data_model", "-v", "1.0"] + DEFAULT_OPTIONS + disable_data_model, ["data_model", "-v", "1.0"] + SQLiteDB_OPTION ) - assert _get_status(db, "data_models") == "DISABLED" + assert _get_status(sqlite_db, "data_models") == "DISABLED" # Test result = runner.invoke( - enable_data_model, ["data_model", "-v", "1.0"] + DEFAULT_OPTIONS + enable_data_model, ["data_model", "-v", "1.0"] + SQLiteDB_OPTION ) assert result.exit_code == ExitCode.OK - assert _get_status(db, "data_models") == "ENABLED" - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[2] - assert action_id == 3 - assert action != "" - assert json.loads(action)["action"] == "DISABLE DATA MODEL" + assert _get_status(sqlite_db, "data_models") == "ENABLED" @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_disable_data_model(db): +def test_disable_data_model(sqlite_db): # Setup runner = CliRunner() # Check status is enabled - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) - assert _get_status(db, "data_models") == "ENABLED" + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) + assert _get_status(sqlite_db, "data_models") == "ENABLED" # Test result = runner.invoke( - disable_data_model, ["data_model", "-v", "1.0"] + DEFAULT_OPTIONS + disable_data_model, ["data_model", "-v", "1.0"] + SQLiteDB_OPTION ) assert result.exit_code == ExitCode.OK - assert _get_status(db, "data_models") == "DISABLED" - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[2] - assert action_id == 3 - assert action != "" - assert json.loads(action)["action"] == "DISABLE DATA MODEL" + assert _get_status(sqlite_db, "data_models") == "DISABLED" @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_enable_dataset(db): +def test_enable_dataset(sqlite_db): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) runner.invoke( add_dataset, [ @@ -947,37 +891,33 @@ def test_enable_dataset(db): "--copy_from_file", False, ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) result = runner.invoke( disable_dataset, - ["dataset", "-d", "data_model", "-v", "1.0"] + DEFAULT_OPTIONS, + ["dataset", "-d", "data_model", "-v", "1.0"] + SQLiteDB_OPTION, ) - assert _get_status(db, "datasets") == "DISABLED" + assert _get_status(sqlite_db, "datasets") == "DISABLED" # Test result = runner.invoke( enable_dataset, - ["dataset", "-d", "data_model", "-v", "1.0"] + DEFAULT_OPTIONS, + ["dataset", "-d", "data_model", "-v", "1.0"] + SQLiteDB_OPTION, ) assert result.exit_code == ExitCode.OK - assert _get_status(db, "datasets") == "ENABLED" - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[3] - assert action_id == 4 - assert action != "" - assert json.loads(action)["action"] == "DISABLE DATASET" + assert _get_status(sqlite_db, "datasets") == "ENABLED" @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_disable_dataset(db): +def test_disable_dataset(sqlite_db): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) runner.invoke( add_dataset, [ @@ -989,36 +929,32 @@ def test_disable_dataset(db): "--copy_from_file", False, ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) - assert _get_status(db, "datasets") == "ENABLED" + assert _get_status(sqlite_db, "datasets") == "ENABLED" # Test result = runner.invoke( disable_dataset, - ["dataset", "-d", "data_model", "-v", "1.0"] + DEFAULT_OPTIONS, + ["dataset", "-d", "data_model", "-v", "1.0"] + SQLiteDB_OPTION, ) - assert _get_status(db, "datasets") == "DISABLED" + assert _get_status(sqlite_db, "datasets") == "DISABLED" assert result.exit_code == ExitCode.OK - action_record = db.execute(f"select * from mipdb_metadata.actions").fetchall() - action_id, action = action_record[3] - assert action_id == 4 - assert action != "" - assert json.loads(action)["action"] == "DISABLE DATASET" @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_list_data_models(db): +def test_list_data_models(): # Setup runner = CliRunner() # Check data_model not present already - assert "data_model:1.0" not in db.get_schemas() - runner.invoke(init, DEFAULT_OPTIONS) - result = runner.invoke(list_data_models, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) - result_with_data_model = runner.invoke(list_data_models, DEFAULT_OPTIONS) + + runner.invoke(init, SQLiteDB_OPTION) + result = runner.invoke(list_data_models, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) + result_with_data_model = runner.invoke(list_data_models, SQLiteDB_OPTION) runner.invoke( add_dataset, [ @@ -1030,10 +966,11 @@ def test_list_data_models(db): "--copy_from_file", False, ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) result_with_data_model_and_dataset = runner.invoke( - list_data_models, DEFAULT_OPTIONS + list_data_models, SQLiteDB_OPTION ) # Test @@ -1061,14 +998,14 @@ def test_list_data_models(db): @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_list_datasets(db): +def test_list_datasets(): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) - result = runner.invoke(list_datasets, DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) + result = runner.invoke(list_datasets, SQLiteDB_OPTION + MONETDB_OPTIONS) runner.invoke( add_dataset, [ @@ -1080,34 +1017,31 @@ def test_list_datasets(db): "--copy_from_file", True, ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, + ) + result_with_dataset = runner.invoke( + list_datasets, SQLiteDB_OPTION + MONETDB_OPTIONS ) - result_with_dataset = runner.invoke(list_datasets, DEFAULT_OPTIONS) # Test assert result.exit_code == ExitCode.OK assert result.stdout == "There are no datasets.\n" assert result_with_dataset.exit_code == ExitCode.OK - assert ( - "dataset_id data_model_id code label status count".strip(" ") - in result_with_dataset.stdout.strip(" ") - ) - assert ( - "dataset2 Dataset 2 ENABLED 2".strip(" ") - in result_with_dataset.stdout.strip(" ") - ) - assert ( - "dataset1 Dataset 1 ENABLED 2".strip(" ") - in result_with_dataset.stdout.strip(" ") - ) - assert ( - "dataset Dataset ENABLED 1".strip(" ") - in result_with_dataset.stdout.strip(" ") - ) + assert "dataset_id data_model_id code label status count".strip( + " " + ) in result_with_dataset.stdout.strip(" ") + assert "dataset2 Dataset 2 ENABLED 2".strip( + " " + ) in result_with_dataset.stdout.strip(" ") + assert "dataset1 Dataset 1 ENABLED 2".strip( + " " + ) in result_with_dataset.stdout.strip(" ") + assert "dataset Dataset ENABLED 1".strip( + " " + ) in result_with_dataset.stdout.strip(" ") def _get_status(db, schema_name): - (status, *_), *_ = db.execute( - f'SELECT status FROM "mipdb_metadata".{schema_name}' - ).fetchall() + (status, *_), *_ = db.execute_fetchall(f"SELECT status FROM {schema_name}") return status diff --git a/tests/test_database.py b/tests/test_database.py index a9222f8..f17a242 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -5,55 +5,44 @@ from mipdb import init import pytest -import sqlalchemy as sql - from mipdb.exceptions import DataBaseError -from mipdb.tables import TemporaryTable -from tests.conftest import DATASET_FILE, DEFAULT_OPTIONS +from mipdb.databases.sqlite import DataModel, Dataset +from tests.conftest import DATASET_FILE, MONETDB_OPTIONS, SQLiteDB_OPTION from tests.conftest import DATA_MODEL_FILE -from tests.mocks import MonetDBMock - - -def test_create_schema(): - db = MonetDBMock() - db.create_schema("a_schema") - assert "CREATE SCHEMA a_schema" in db.captured_queries[0] - - -def test_get_schemas(): - db = MonetDBMock() - schemas = db.get_schemas() - assert schemas == ["mipdb_metadata"] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_update_data_model_status(db): +def test_update_data_model_status(sqlite_db): # Setup runner = CliRunner() - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) # Check the status of data model is disabled - res = db.execute( - 'SELECT status from "mipdb_metadata".data_models where data_model_id = 1' + result = sqlite_db.get_values( + table=DataModel.__table__, + columns=["status"], + where_conditions={"data_model_id": 1}, ) - assert list(res)[0][0] == "ENABLED" + assert result[0][0] == "ENABLED" # Test - db.update_data_model_status("DISABLED", 1) - res = db.execute( - 'SELECT status from "mipdb_metadata".data_models where data_model_id = 1' + sqlite_db.update_data_model_status("DISABLED", 1) + result = sqlite_db.get_values( + table=DataModel.__table__, + columns=["status"], + where_conditions={"data_model_id": 1}, ) - assert list(res)[0][0] == "DISABLED" + assert result[0][0] == "DISABLED" @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def update_dataset_status(db): +def update_dataset_status(sqlite_db): # Setup runner = CliRunner() - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) runner.invoke( add_dataset, [ @@ -65,54 +54,38 @@ def update_dataset_status(db): "--copy_from_file", False, ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) # Check the status of dataset is disabled - res = db.execute( - sql.text('SELECT status from "mipdb_metadata".datasets where dataset_id = 1') + + result = sqlite_db.get_values( + table=Dataset.__table__, + columns=["status"], + where_conditions={"data_model_id": 1}, ) - assert list(res)[0][0] == "DISABLED" + assert result[0][0] == "DISABLED" # Test - db.update_dataset_status("ENABLED", 1) - res = db.execute( - sql.text('SELECT status from "mipdb_metadata".datasets where dataset_id = 1') + sqlite_db.update_dataset_status("ENABLED", 1) + result = sqlite_db.get_values( + table=Dataset.__table__, + columns=["status"], + where_conditions={"data_model_id": 1}, ) - assert list(res)[0][0] == "ENABLED" - - -@pytest.mark.database -@pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_get_schemas_with_db(db): - # Setup - runner = CliRunner() - # Check schema not present already - assert "data_model:1.0" not in db.get_schemas() - - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) - - # Check schema present - schemas = db.get_schemas() - assert "data_model:1.0" in db.get_schemas() - - -def test_get_datasets(): - db = MonetDBMock() - datasets = db.get_values() - assert datasets == [[1, 2]] + assert result[0][0] == "ENABLED" @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_get_datasets_with_db(db): +def test_get_datasets_with_db(sqlite_db): # Setup runner = CliRunner() # Check dataset not present already - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) runner.invoke( add_dataset, [ @@ -124,66 +97,70 @@ def test_get_datasets_with_db(db): "--copy_from_file", False, ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) # Check dataset present - datasets = db.get_values(columns=["code"]) - assert ("dataset",) in datasets - assert len(datasets) == 1 + datasets = sqlite_db.get_values(Dataset.__table__, columns=["code"]) + assert ("dataset",) == datasets[0] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_get_data_model_id_with_db(db): +def test_get_data_model_id_with_db(sqlite_db): # Setup runner = CliRunner() - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) # Test success - data_model_id = db.get_data_model_id("data_model", "1.0") + data_model_id = sqlite_db.get_data_model_id("data_model", "1.0") assert data_model_id == 1 @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_get_data_model_id_not_found_error(db): +def test_get_data_model_id_not_found_error(sqlite_db): # Setup runner = CliRunner() - runner.invoke(init, DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) # Test when there is no schema in the database with the specific code and version with pytest.raises(DataBaseError): - data_model_id = db.get_data_model_id("schema", "1.0") + data_model_id = sqlite_db.get_data_model_id("schema", "1.0") @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_get_data_model_id_duplication_error(db): +def test_get_data_model_id_duplication_error(sqlite_db): # Setup runner = CliRunner() - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) - db.execute( - sql.text( - 'INSERT INTO "mipdb_metadata".data_models (data_model_id, code, version, status)' - "VALUES (2, 'data_model', '1.0', 'DISABLED')" - ) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) + sqlite_db.insert_values_to_table( + DataModel.__table__, + { + "data_model_id": 2, + "label": "data_model", + "code": "data_model", + "version": "1.0", + "status": "DISABLED", + }, ) # Test when there more than one schema ids with the specific code and version with pytest.raises(DataBaseError): - db.get_data_model_id("data_model", "1.0") + sqlite_db.get_data_model_id("data_model", "1.0") @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_get_dataset_id_with_db(db): +def test_get_dataset_id_with_db(sqlite_db): # Setup runner = CliRunner() - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) runner.invoke( add_dataset, [ @@ -195,21 +172,22 @@ def test_get_dataset_id_with_db(db): "--copy_from_file", False, ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) # Test - dataset_id = db.get_dataset_id("dataset", 1) + dataset_id = sqlite_db.get_dataset_id("dataset", 1) assert dataset_id == 1 @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_get_dataset_id_duplication_error(db): +def test_get_dataset_id_duplication_error(sqlite_db): # Setup runner = CliRunner() - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) runner.invoke( add_dataset, [ @@ -221,66 +199,35 @@ def test_get_dataset_id_duplication_error(db): "--copy_from_file", False, ] - + DEFAULT_OPTIONS, + + SQLiteDB_OPTION + + MONETDB_OPTIONS, ) - db.execute( - sql.text( - 'INSERT INTO "mipdb_metadata".datasets (dataset_id, data_model_id, code, csv_path, status)' - "VALUES (2, 1, 'dataset', '/opt/data/data_model/dataset.csv', 'DISABLED')" - ) + sqlite_db.insert_values_to_table( + Dataset.__table__, + { + "dataset_id": 2, + "data_model_id": 1, + "label": "dataset", + "code": "dataset", + "csv_path": "/opt/data/data_model/dataset.csv", + "status": "DISABLED", + }, ) # Test when there more than one dataset ids with the specific code and data_model_id with pytest.raises(DataBaseError): - dataset_id = db.get_dataset_id("dataset", 1) + dataset_id = sqlite_db.get_dataset_id("dataset", 1) @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_get_dataset_id_not_found_error(db): +def test_get_dataset_id_not_found_error(sqlite_db): # Setup runner = CliRunner() - runner.invoke(init, DEFAULT_OPTIONS) - runner.invoke(add_data_model, [DATA_MODEL_FILE] + DEFAULT_OPTIONS) + runner.invoke(init, SQLiteDB_OPTION) + runner.invoke(add_data_model, [DATA_MODEL_FILE] + SQLiteDB_OPTION + MONETDB_OPTIONS) # Test when there is no dataset in the database with the specific code and data_model_id with pytest.raises(DataBaseError): - dataset_id = db.get_dataset_id("dataset", 1) - - -def test_drop_schema(): - db = MonetDBMock() - db.drop_schema("a_schema") - assert 'DROP SCHEMA "a_schema" CASCADE' in db.captured_queries[0] - - -def test_create_table(): - db = MonetDBMock() - table = sql.Table("a_table", sql.MetaData(), sql.Column("a_column", sql.Integer)) - db.create_table(table) - assert "CREATE TABLE a_table" in db.captured_queries[0] - - -def test_drop_table(): - db = MonetDBMock() - table = sql.Table("a_table", sql.MetaData(), sql.Column("a_column", sql.Integer)) - db.drop_table(table) - assert "DROP TABLE a_table" in db.captured_queries[0] - - -def test_insert_values_to_table(): - db = MonetDBMock() - table = sql.Table("a_table", sql.MetaData(), sql.Column("a_column", sql.Integer)) - values = [1, 2, 3] - db.insert_values_to_table(table, values) - assert "INSERT INTO a_table" in db.captured_queries[0] - assert values == db.captured_multiparams[0][0] - - -def test_grant_select_access_rights(): - db = MonetDBMock() - table = TemporaryTable({"col1": "int", "col2": "int"}, db) - table.create(db) - assert "CREATE TEMPORARY " in db.captured_queries[0] - assert "GRANT SELECT" in db.captured_queries[1] + dataset_id = sqlite_db.get_dataset_id("dataset", 1) diff --git a/tests/test_dataelements.py b/tests/test_dataelements.py index 696440a..7764434 100644 --- a/tests/test_dataelements.py +++ b/tests/test_dataelements.py @@ -4,14 +4,27 @@ flatten_cdes, CommonDataElement, validate_dataset_present_on_cdes_with_proper_format, - validate_longitudinal_data_model, validate_metadata, + validate_longitudinal_data_model, + validate_metadata, ) from mipdb.exceptions import InvalidDataModelError def test_dataelements_type_is_not_valid(): with pytest.raises(InvalidDataModelError): - metadata = {"code": "dataset", "sql_type": "int", "description": "", "enumerations": {"dataset": "Dataset", "dataset1": "Dataset 1", "dataset2": "Dataset 2"}, "label": "Dataset", "methodology": "", "is_categorical": True} + metadata = { + "code": "dataset", + "sql_type": "int", + "description": "", + "enumerations": { + "dataset": "Dataset", + "dataset1": "Dataset 1", + "dataset2": "Dataset 2", + }, + "label": "Dataset", + "methodology": "", + "is_categorical": True, + } validate_metadata("dataset", metadata) @@ -76,7 +89,7 @@ def test_make_cde(): "description": "", "label": "", "methodology": "", - "type": "nominal" + "type": "nominal", } cde = CommonDataElement.from_metadata(metadata) assert hasattr(cde, "code") diff --git a/tests/test_properties.py b/tests/test_properties.py index 499b5ad..a6b28e6 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -7,48 +7,48 @@ @pytest.fixture def properties(): return Properties( - '{"tags": ["tag1", "tag2"], "properties": {"key1": "value1", "key2": "value2"}}' + {"tags": ["tag1", "tag2"], "properties": {"key1": "value1", "key2": "value2"}} ) def test_add_property(properties): properties.add_property(key="key", value="value", force=False) - assert ( - properties.properties - == '{"tags": ["tag1", "tag2"], "properties": {"key1": "value1", "key2": "value2", "key": "value"}}' - ) + assert properties.properties == { + "tags": ["tag1", "tag2"], + "properties": {"key1": "value1", "key2": "value2", "key": "value"}, + } def test_add_property_with_existing_key_with_force(properties): properties.add_property(key="key2", value="value1", force=True) - assert ( - properties.properties - == '{"tags": ["tag1", "tag2"], "properties": {"key1": "value1", "key2": "value1"}}' - ) + assert properties.properties == { + "tags": ["tag1", "tag2"], + "properties": {"key1": "value1", "key2": "value1"}, + } def test_add_tag(properties): properties.add_tag(tag="tag") - assert ( - properties.properties - == '{"tags": ["tag1", "tag2", "tag"], "properties": {"key1": "value1", "key2": "value2"}}' - ) + assert properties.properties == { + "tags": ["tag1", "tag2", "tag"], + "properties": {"key1": "value1", "key2": "value2"}, + } def test_remove_property(properties): properties.remove_property(key="key1", value="value1") - assert ( - properties.properties - == '{"tags": ["tag1", "tag2"], "properties": {"key2": "value2"}}' - ) + assert properties.properties == { + "tags": ["tag1", "tag2"], + "properties": {"key2": "value2"}, + } def test_remove_tag(properties): properties.remove_tag(tag="tag1") - assert ( - properties.properties - == '{"tags": ["tag2"], "properties": {"key1": "value1", "key2": "value2"}}' - ) + assert properties.properties == { + "tags": ["tag2"], + "properties": {"key1": "value1", "key2": "value2"}, + } def test_tag_already_exists(properties): diff --git a/tests/test_schema.py b/tests/test_schema.py index a021b90..5e2993e 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -3,13 +3,13 @@ import pytest from mipdb.exceptions import UserInputError +from mipdb.databases.monetdb import MonetDB from mipdb.schema import Schema -from mipdb.database import DataBase @pytest.fixture def mockdb(): - return Mock(spec_set=DataBase) + return Mock(spec_set=MonetDB) def test_create_schema(mockdb): diff --git a/tests/test_tables.py b/tests/test_tables.py index c7d116c..858f5be 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -1,24 +1,14 @@ -import json - from mipdb.exceptions import DataBaseError import pytest +from mipdb.databases.monetdb_tables import PrimaryDataTable from mipdb.schema import Schema -from mipdb.tables import ( - ActionsTable, +from mipdb.databases.sqlite_tables import ( DataModelTable, - DatasetsTable, MetadataTable, - PrimaryDataTable, ) from mipdb.dataelements import CommonDataElement, flatten_cdes -from tests.mocks import MonetDBMock - - -@pytest.fixture -def metadata(): - return Schema("mipdb_metadata") @pytest.fixture @@ -26,134 +16,38 @@ def cdes(data_model_metadata): return flatten_cdes(data_model_metadata) -def test_get_data_models(metadata): - # Setup - db = MonetDBMock() - # Test - data_models = DataModelTable(schema=metadata) - data_models.get_data_models(db=db, columns=["data_model_id", "code"]) - - -def test_get_data_models_without_valid_columns(metadata): - # Setup - db = MonetDBMock() - # Test - data_models = DataModelTable(schema=metadata) - with pytest.raises(ValueError): - data_models.get_data_models( - db=db, columns=["data_model_id", "non-existing column"] - ) - - -def test_get_datasets(metadata): - # Setup - db = MonetDBMock() - # Test - datasets = DatasetsTable(schema=metadata) - datasets.get_values(db=db, columns=["dataset_id", "data_model_id"]) - - -def test_get_datasets_without_valid_columns(metadata): - # Setup - db = MonetDBMock() - # Test - datasets = DatasetsTable(schema=metadata) - with pytest.raises(ValueError): - datasets.get_values(db=db, columns=["dataset_id", "non-existing column"]) - - -def test_data_models_table_mockdb(metadata): - # Setup - db = MonetDBMock() - # Test - DataModelTable(schema=metadata).create(db) - assert f"CREATE SEQUENCE mipdb_metadata.data_model_id_seq" == db.captured_queries[0] - expected_create = ( - f"\nCREATE TABLE mipdb_metadata.data_models (" - "\n\tdata_model_id INTEGER NOT NULL, " - "\n\tcode VARCHAR(255) NOT NULL, " - "\n\tversion VARCHAR(255) NOT NULL, " - "\n\tlabel VARCHAR(255), " - "\n\tstatus VARCHAR(255) NOT NULL, " - "\n\tproperties JSON, " - "\n\tPRIMARY KEY (data_model_id)" - "\n)\n\n" - ) - assert expected_create == db.captured_queries[1] - - @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_data_models_table_realdb(db): - # Setup - schema = Schema("schema") - schema.create(db) +def test_data_models_table_realdb(sqlite_db): # Test - DataModelTable(schema=schema).create(db) - res = db.execute( - "SELECT name, type FROM sys.columns WHERE " - "table_id=(SELECT id FROM sys.tables " - "WHERE name='data_models' AND system=FALSE)" - ) - assert res.fetchall() != [] - - -def test_actions_table(metadata): - # Setup - db = MonetDBMock() - # Test - ActionsTable(schema=metadata).create(db) - assert f"CREATE SEQUENCE mipdb_metadata.action_id_seq" in db.captured_queries[0] - assert f"CREATE TABLE mipdb_metadata.actions" in db.captured_queries[1] - - -def test_delete_schema(metadata): - # Setup - db = MonetDBMock() - data_models_table = DataModelTable(schema=metadata) - # Test - data_models_table.delete_data_model(code="schema", version="1.0", db=db) - expected = f"DELETE FROM mipdb_metadata.data_models WHERE code = :code AND version = :version " - assert expected in db.captured_queries[0] + DataModelTable().create(sqlite_db) + assert sqlite_db.get_all_tables() != [] class TestVariablesMetadataTable: - def test_create_table_mockdb(self): - # Setup - db = MonetDBMock() - metadata_table = MetadataTable(Schema("schema:1.0")) - # Test - metadata_table.create(db) - assert f'CREATE TABLE "schema:1.0".variables_metadata' in db.captured_queries[0] - @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") - def test_create_table_with_db(self, db): + def test_create_table_with_db(self, sqlite_db): # Setup - schema = Schema("schema:1.0") - schema.create(db) - metadata_table = MetadataTable(schema) + + metadata_table = MetadataTable("data_model:1.0") # Test - metadata_table.create(db) - res = db.execute(f'SELECT * FROM "schema:1.0".variables_metadata').fetchall() - assert res == [] + metadata_table.create(sqlite_db) + res = sqlite_db.get_metadata("data_model:1.0") + assert res == {} @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") - def test_insert_values_with_db(self, db, data_model_metadata): + def test_insert_values_with_db(self, sqlite_db, data_model_metadata): # Setup - schema = Schema("schema:1.0") - schema.create(db) - metadata_table = MetadataTable(schema) - metadata_table.create(db) + + metadata_table = MetadataTable("data_model:1.0") + metadata_table.create(sqlite_db) # Test values = metadata_table.get_values_from_cdes(flatten_cdes(data_model_metadata)) - metadata_table.insert_values(values, db) - res = db.execute( - "SELECT code, json.filter(metadata, '$.is_categorical') " - f'FROM "schema:1.0".variables_metadata' - ) - result = [(name, json.loads(val)) for name, val in res.fetchall()] + metadata_table.insert_values(values, sqlite_db) + res = sqlite_db.get_metadata("data_model:1.0") + result = [(code, metadata["is_categorical"]) for code, metadata in res.items()] assert result == [ ("var1", False), ("subjectcode", False), @@ -165,7 +59,7 @@ def test_insert_values_with_db(self, db, data_model_metadata): def test_get_values_from_cdes_full_schema_data(self, data_model_metadata): # Setup - metadata_table = MetadataTable(Schema("schema:1.0")) + metadata_table = MetadataTable("data_model:1.0") cdes = flatten_cdes(data_model_metadata) # Test result = metadata_table.get_values_from_cdes(cdes) @@ -173,17 +67,17 @@ def test_get_values_from_cdes_full_schema_data(self, data_model_metadata): @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") - def test_load_from_db(self, data_model_metadata, db): + def test_load_from_db(self, data_model_metadata, sqlite_db): # Setup - schema = Schema("schema:1.0") - schema.create(db) - metadata_table = MetadataTable(schema) - metadata_table.create(db) + + data_model = "data_model:1.0" + metadata_table = MetadataTable(data_model) + metadata_table.create(sqlite_db) values = metadata_table.get_values_from_cdes(flatten_cdes(data_model_metadata)) - metadata_table.insert_values(values, db) + metadata_table.insert_values(values, sqlite_db) # Test - schema = Schema("schema:1.0") - metadata_table = MetadataTable.from_db(schema, db) + + metadata_table = MetadataTable.from_db(data_model, sqlite_db) assert all(isinstance(cde, str) for cde in metadata_table.table.keys()) assert all( isinstance(cde, CommonDataElement) for cde in metadata_table.table.values() @@ -191,60 +85,40 @@ def test_load_from_db(self, data_model_metadata, db): class TestPrimaryDataTable: - def test_create_table_mockdb(self, cdes): - # Setup - db = MonetDBMock() - schema = Schema("schema:1.0") - # Test - primary_data_table = PrimaryDataTable.from_cdes(schema, cdes) - primary_data_table.create(db) - expected = ( - '\nCREATE TABLE "schema:1.0".primary_data (' - '\n\t"row_id" INTEGER NOT NULL, ' - '\n\t"var1" VARCHAR(255), ' - '\n\t"subjectcode" VARCHAR(255), ' - '\n\t"var2" VARCHAR(255), ' - '\n\t"dataset" VARCHAR(255), ' - '\n\t"var3" FLOAT, ' - '\n\t"var4" INTEGER, ' - '\n\tPRIMARY KEY ("row_id")\n)\n\n' - ) - assert db.captured_queries[0] == expected - @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") - def test_create_table_with_db(self, cdes, db): + def test_create_table_with_db(self, cdes, monetdb): # Setup schema = Schema("schema:1.0") - schema.create(db) + schema.create(monetdb) # Test primary_data_table = PrimaryDataTable.from_cdes(schema, cdes) - primary_data_table.create(db) - res = db.execute('SELECT * FROM "schema:1.0".primary_data').fetchall() + primary_data_table.create(monetdb) + res = monetdb.execute('SELECT * FROM "schema:1.0".primary_data').fetchall() assert res == [] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") - def test_drop_table_with_db(self, cdes, db): + def test_drop_table_with_db(self, cdes, monetdb): # Setup schema = Schema("schema:1.0") - schema.create(db) + schema.create(monetdb) primary_data_table = PrimaryDataTable.from_cdes(schema, cdes) - primary_data_table.create(db) + primary_data_table.create(monetdb) # Test - primary_data_table.drop(db) + primary_data_table.drop(monetdb) with pytest.raises(DataBaseError): - db.execute('SELECT * FROM "schema:1.0".primary_data').fetchall() + monetdb.execute('SELECT * FROM "schema:1.0".primary_data').fetchall() @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") - def test_reflect_table_from_db(self, cdes, db): + def test_reflect_table_from_db(self, cdes, monetdb): # Setup schema = Schema("schema:1.0") - schema.create(db) - PrimaryDataTable.from_cdes(schema, cdes).create(db) + schema.create(monetdb) + PrimaryDataTable.from_cdes(schema, cdes).create(monetdb) # Test - primary_data_table = PrimaryDataTable.from_db(schema, db) + primary_data_table = PrimaryDataTable.from_db(schema, monetdb) column_names = [c.name for c in list(primary_data_table.table.columns)] assert column_names != [] diff --git a/tests/test_usecases.py b/tests/test_usecases.py index 20b1696..f02bc9c 100644 --- a/tests/test_usecases.py +++ b/tests/test_usecases.py @@ -1,14 +1,13 @@ -import json from unittest.mock import patch import pandas as pd import pytest -from mipdb.database import METADATA_SCHEMA, MonetDB +from mipdb.databases.monetdb import MonetDB from mipdb.exceptions import ForeignKeyError, DataBaseError, InvalidDatasetError from mipdb.exceptions import UserInputError -from mipdb.schema import Schema -from mipdb.tables import DataModelTable, DatasetsTable, ActionsTable +from mipdb.databases.sqlite import Dataset +from mipdb.databases.sqlite_tables import DataModelTable, DatasetsTable from mipdb.usecases import ( AddPropertyToDataset, check_unique_longitudinal_dataset_primary_keys, @@ -34,7 +33,6 @@ from mipdb.usecases import ValidateDataset from mipdb.usecases import is_db_initialized from tests.conftest import DATASET_FILE, ABSOLUTE_PATH_DATASET_FILE -from tests.mocks import MonetDBMock # NOTE Some use cases have a main responsibility (e.g. add a new data_model) which @@ -49,17 +47,14 @@ @pytest.mark.usefixtures("monetdb_container", "cleanup_db") def test_init_with_db(db): # Setup - InitDB(db).execute() - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - datasets_table = DatasetsTable(schema=metadata) - actions_table = ActionsTable(schema=metadata) + InitDB(sqlite_db).execute() + data_model_table = DataModelTable() + datasets_table = DatasetsTable() # Test - assert "mipdb_metadata" in db.get_schemas() + assert data_model_table.exists(db) assert datasets_table.exists(db) - assert actions_table.exists(db) @pytest.mark.database @@ -71,216 +66,145 @@ def test_is_db_initialized_with_db_fail(db): @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_is_db_initialized_with_db_fail(db): - InitDB(db).execute() - assert is_db_initialized(db=db) +def test_is_db_initialized_with_db_fail(sqlite_db): + InitDB(sqlite_db).execute() + is_db_initialized(db=sqlite_db) @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_init_with_db(db): +def test_init_with_db(sqlite_db): # Setup - InitDB(db).execute() - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - datasets_table = DatasetsTable(schema=metadata) - actions_table = ActionsTable(schema=metadata) - InitDB(db).execute() - + InitDB(sqlite_db).execute() + InitDB(sqlite_db).execute() + data_model_table = DataModelTable() + datasets_table = DatasetsTable() # Test - assert "mipdb_metadata" in db.get_schemas() - assert data_model_table.exists(db) - assert datasets_table.exists(db) - assert actions_table.exists(db) + + assert data_model_table.exists(sqlite_db) + assert datasets_table.exists(sqlite_db) @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_re_init_with_missing_schema_with_db(db): +def test_re_init_with_missing_schema_with_db(sqlite_db): # Setup - InitDB(db).execute() - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - datasets_table = DatasetsTable(schema=metadata) - actions_table = ActionsTable(schema=metadata) - db.execute(f'DROP SCHEMA "mipdb_metadata" CASCADE') - assert "mipdb_metadata" not in db.get_schemas() - InitDB(db).execute() + InitDB(sqlite_db).execute() + data_model_table = DataModelTable() + datasets_table = DatasetsTable() + InitDB(sqlite_db).execute() - # Test - assert "mipdb_metadata" in db.get_schemas() - assert data_model_table.exists(db) - assert datasets_table.exists(db) - assert actions_table.exists(db) + assert data_model_table.exists(sqlite_db) + assert datasets_table.exists(sqlite_db) @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_re_init_with_missing_actions_table_with_db(db): +def test_re_init_with_missing_actions_table_with_db(sqlite_db): # Setup - InitDB(db).execute() - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - datasets_table = DatasetsTable(schema=metadata) - actions_table = ActionsTable(schema=metadata) - db.execute(f'DROP TABLE "mipdb_metadata".actions') - assert "mipdb_metadata" in db.get_schemas() - assert data_model_table.exists(db) - assert datasets_table.exists(db) - assert not actions_table.exists(db) - InitDB(db).execute() + InitDB(sqlite_db).execute() + data_model_table = DataModelTable() + datasets_table = DatasetsTable() + + assert data_model_table.exists(sqlite_db) + assert datasets_table.exists(sqlite_db) + InitDB(sqlite_db).execute() # Test - assert "mipdb_metadata" in db.get_schemas() - assert data_model_table.exists(db) - assert datasets_table.exists(db) - assert actions_table.exists(db) + + assert data_model_table.exists(sqlite_db) + assert datasets_table.exists(sqlite_db) @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_re_init_with_missing_data_models_table_with_db(db): +def test_re_init_with_missing_data_models_table_with_db(sqlite_db): # Setup - InitDB(db).execute() - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - datasets_table = DatasetsTable(schema=metadata) - actions_table = ActionsTable(schema=metadata) - db.execute(f'DROP TABLE "mipdb_metadata".data_models CASCADE') - assert "mipdb_metadata" in db.get_schemas() - assert not data_model_table.exists(db) - assert datasets_table.exists(db) - assert actions_table.exists(db) - InitDB(db).execute() + InitDB(sqlite_db).execute() + data_model_table = DataModelTable() + datasets_table = DatasetsTable() + sqlite_db.execute(f"DROP TABLE data_models") + + assert not data_model_table.exists(sqlite_db) + assert datasets_table.exists(sqlite_db) + InitDB(sqlite_db).execute() # Test - assert "mipdb_metadata" in db.get_schemas() - assert data_model_table.exists(db) - assert datasets_table.exists(db) - assert actions_table.exists(db) + + assert data_model_table.exists(sqlite_db) + assert datasets_table.exists(sqlite_db) @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_re_init_with_missing_datasets_table_with_db(db): +def test_re_init_with_missing_datasets_table_with_db(sqlite_db): # Setup - InitDB(db).execute() - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) - datasets_table = DatasetsTable(schema=metadata) - actions_table = ActionsTable(schema=metadata) - db.execute(f'DROP TABLE "mipdb_metadata".datasets') - assert "mipdb_metadata" in db.get_schemas() - assert data_model_table.exists(db) - assert not datasets_table.exists(db) - assert actions_table.exists(db) - InitDB(db).execute() + InitDB(sqlite_db).execute() - # Test - assert "mipdb_metadata" in db.get_schemas() - assert data_model_table.exists(db) - assert datasets_table.exists(db) - assert actions_table.exists(db) + data_model_table = DataModelTable() + datasets_table = DatasetsTable() + sqlite_db.execute(f"DROP TABLE datasets") + assert data_model_table.exists(sqlite_db) + assert not datasets_table.exists(sqlite_db) + InitDB(sqlite_db).execute() -def test_add_data_model_mock(data_model_metadata): - db = MonetDBMock() - AddDataModel(db).execute(data_model_metadata=data_model_metadata) - assert 'CREATE SCHEMA "data_model:1.0"' in db.captured_queries[1] - assert 'CREATE TABLE "data_model:1.0".primary_data' in db.captured_queries[2] - assert ( - f'GRANT SELECT ON TABLE "data_model:1.0"."primary_data" TO executor WITH GRANT OPTION;' - in db.captured_queries[3] - ) - assert f'CREATE TABLE "data_model:1.0".variables_metadata' in db.captured_queries[4] - assert ( - f'GRANT SELECT ON TABLE "data_model:1.0"."variables_metadata" TO executor WITH GRANT OPTION;' - in db.captured_queries[5] - ) - assert f'INSERT INTO "data_model:1.0".variables_metadata' in db.captured_queries[6] - assert len(db.captured_queries) > 5 # verify that handlers issued more queries + # Test + + assert data_model_table.exists(sqlite_db) + assert datasets_table.exists(sqlite_db) @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_add_data_model_with_db(db, data_model_metadata): +def test_add_data_model_with_db(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata=data_model_metadata) - - # Test - schemas = db.get_schemas() - assert "mipdb_metadata" in schemas - assert "data_model:1.0" in schemas - - -def test_delete_data_model(): - db = MonetDBMock() - code = "data_model" - version = "1.0" - force = True - DeleteDataModel(db).execute(code=code, version=version, force=force) - - assert 'DELETE FROM "data_model:1.0"."primary_data"' in db.captured_queries[0] - assert "DELETE FROM mipdb_metadata.datasets" in db.captured_queries[1] - assert 'INSERT INTO "mipdb_metadata".actions' in db.captured_queries[3] - assert 'DROP SCHEMA "data_model:1.0" CASCADE' in db.captured_queries[4] - assert "DELETE FROM mipdb_metadata.data_models" in db.captured_queries[5] - assert 'INSERT INTO "mipdb_metadata".actions' in db.captured_queries[7] + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db=sqlite_db, monetdb=monetdb).execute( + data_model_metadata=data_model_metadata + ) @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_delete_data_model_with_db(db, data_model_metadata): +def test_delete_data_model_with_db(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata=data_model_metadata) - schemas = db.get_schemas() - assert "mipdb_metadata" in schemas - assert "data_model:1.0" in schemas + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata=data_model_metadata) # Test with force False - DeleteDataModel(db).execute( + DeleteDataModel(sqlite_db, monetdb).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], force=False, ) - schemas = db.get_schemas() - assert "mipdb_metadata" in schemas - assert "data_model:1.0" not in schemas @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_delete_data_model_with_db_with_force(db, data_model_metadata): +def test_delete_data_model_with_db_with_force(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata=data_model_metadata) - schemas = db.get_schemas() - assert "mipdb_metadata" in schemas - assert "data_model:1.0" in schemas + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata=data_model_metadata) # Test with force True - DeleteDataModel(db).execute( + DeleteDataModel(sqlite_db, monetdb).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], force=True, ) - schemas = db.get_schemas() - assert "mipdb_metadata" in schemas - assert "data_model:1.0" not in schemas @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_delete_data_model_with_datasets_with_db(db, data_model_metadata): +def test_delete_data_model_with_datasets_with_db( + sqlite_db, monetdb, data_model_metadata +): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - schemas = db.get_schemas() - assert "mipdb_metadata" in schemas - assert "data_model:1.0" in schemas - ImportCSV(db).execute( + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + + ImportCSV(sqlite_db, monetdb).execute( csv_path=DATASET_FILE, copy_from_file=False, data_model_code="data_model", @@ -289,7 +213,7 @@ def test_delete_data_model_with_datasets_with_db(db, data_model_metadata): # Test with force False with pytest.raises(ForeignKeyError): - DeleteDataModel(db).execute( + DeleteDataModel(sqlite_db, monetdb).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], force=False, @@ -298,14 +222,14 @@ def test_delete_data_model_with_datasets_with_db(db, data_model_metadata): @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_delete_data_model_with_datasets_with_db_with_force(db, data_model_metadata): +def test_delete_data_model_with_datasets_with_db_with_force( + sqlite_db, monetdb, data_model_metadata +): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - schemas = db.get_schemas() - assert "mipdb_metadata" in schemas - assert "data_model:1.0" in schemas - ImportCSV(db).execute( + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + + ImportCSV(sqlite_db, monetdb).execute( csv_path=DATASET_FILE, copy_from_file=False, data_model_code="data_model", @@ -313,80 +237,71 @@ def test_delete_data_model_with_datasets_with_db_with_force(db, data_model_metad ) # Test with force True - DeleteDataModel(db).execute( + DeleteDataModel(sqlite_db, monetdb).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], force=True, ) - schemas = db.get_schemas() - assert "mipdb_metadata" in schemas - assert "data_model:1.0" not in schemas @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_add_dataset(db, data_model_metadata): +def test_add_dataset(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) # Test success - ImportCSV(db).execute( + ImportCSV(sqlite_db, monetdb).execute( csv_path=DATASET_FILE, copy_from_file=False, data_model_code="data_model", data_model_version="1.0", ) - (code, csv_path), *_ = db.execute(f"SELECT code, csv_path FROM mipdb_metadata.datasets").fetchall() + (code, csv_path), *_ = sqlite_db.execute_fetchall( + f"SELECT code, csv_path FROM datasets" + ) assert "dataset.csv" in csv_path - res = db.execute('SELECT * FROM "data_model:1.0".primary_data').fetchall() + res = monetdb.execute('SELECT * FROM "data_model:1.0".primary_data') assert res != [] -def test_insert_dataset_mock(data_model_metadata): - db = MonetDBMock() - ImportCSV(db).execute(DATASET_FILE, False, "data_model", "1.0") - assert 'INSERT INTO "data_model:1.0".primary_data' in db.captured_queries[0] - assert "Sequence('dataset_id_seq'" in db.captured_queries[1] - assert "INSERT INTO mipdb_metadata.datasets" in db.captured_queries[2] - assert "Sequence('action_id_seq'" in db.captured_queries[3] - assert 'INSERT INTO "mipdb_metadata".actions' in db.captured_queries[4] - assert len(db.captured_queries) > 3 # verify that handlers issued more queries - - @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_add_dataset_with_db_with_multiple_datasets(db, data_model_metadata): +def test_add_dataset_with_db_with_multiple_datasets( + sqlite_db, monetdb, data_model_metadata +): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) # Test - ImportCSV(db).execute( + ImportCSV(sqlite_db, monetdb).execute( csv_path="tests/data/success/data_model_v_1_0/dataset123.csv", copy_from_file=False, data_model_code="data_model", data_model_version="1.0", ) - datasets = db.get_values(columns=["data_model_id", "code"]) + datasets = sqlite_db.get_values( + table=Dataset.__table__, columns=["data_model_id", "code"] + ) assert len(datasets) == 3 assert all(code in ["dataset", "dataset1", "dataset2"] for dmi, code in datasets) @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_add_dataset_with_small_record_copy(db, data_model_metadata): +def test_add_dataset_with_small_record_copy(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - with patch("mipdb.tables.RECORDS_PER_COPY", 1): + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + with patch("mipdb.databases.monetdb_tables.RECORDS_PER_COPY", 1): # Test - ImportCSV(db).execute( + ImportCSV(sqlite_db, monetdb).execute( csv_path=DATASET_FILE, copy_from_file=False, data_model_code="data_model", data_model_version="1.0", ) - records = db.execute( + records = monetdb.execute( f'SELECT count(*) FROM "data_model:1.0".primary_data' ).fetchall() assert 5 == records[0][0] @@ -394,19 +309,21 @@ def test_add_dataset_with_small_record_copy(db, data_model_metadata): @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_add_dataset_with_small_record_copy_with_volume(db, data_model_metadata): +def test_add_dataset_with_small_record_copy_with_volume( + sqlite_db, monetdb, data_model_metadata +): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - with patch("mipdb.tables.RECORDS_PER_COPY", 1): + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + with patch("mipdb.databases.monetdb_tables.RECORDS_PER_COPY", 1): # Test - ImportCSV(db).execute( + ImportCSV(sqlite_db, monetdb).execute( csv_path=ABSOLUTE_PATH_DATASET_FILE, copy_from_file=True, data_model_code="data_model", data_model_version="1.0", ) - records = db.execute( + records = monetdb.execute( f'SELECT count(*) FROM "data_model:1.0".primary_data' ).fetchall() assert 5 == records[0][0] @@ -414,19 +331,19 @@ def test_add_dataset_with_small_record_copy_with_volume(db, data_model_metadata) @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_csv_legnth_equals_records_per_copy(db, data_model_metadata): +def test_csv_legnth_equals_records_per_copy(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - with patch("mipdb.tables.RECORDS_PER_COPY", 5): + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + with patch("mipdb.databases.monetdb_tables.RECORDS_PER_COPY", 5): # Test - ImportCSV(db).execute( + ImportCSV(sqlite_db, monetdb).execute( csv_path=ABSOLUTE_PATH_DATASET_FILE, copy_from_file=True, data_model_code="data_model", data_model_version="1.0", ) - records = db.execute( + records = monetdb.execute( f'SELECT count(*) FROM "data_model:1.0".primary_data' ).fetchall() assert 5 == records[0][0] @@ -449,12 +366,12 @@ def test_check_duplicate_pairs_fail(): @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_validate_dataset(db, data_model_metadata): +def test_validate_dataset(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) # Test success - ValidateDataset(db).execute( + ValidateDataset(sqlite_db, monetdb).execute( csv_path=DATASET_FILE, copy_from_file=False, data_model_code="data_model", @@ -462,353 +379,260 @@ def test_validate_dataset(db, data_model_metadata): ) -def test_delete_dataset(): - db = MonetDBMock() - dataset = "dataset" - code = "data_model" - version = "1.0" - DeleteDataset(db).execute( - dataset_code=dataset, data_model_code=code, data_model_version=version - ) - - assert ( - 'DELETE FROM "data_model:1.0"."primary_data" WHERE dataset = :dataset_name ' - in db.captured_queries[0] - ) - assert ( - "DELETE FROM mipdb_metadata.datasets WHERE dataset_id = :dataset_id AND data_model_id = :data_model_id " - in db.captured_queries[1] - ) - assert ( - 'INSERT INTO "mipdb_metadata".actions VALUES(:action_id, :action)' - in db.captured_queries[3] - ) - - @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_delete_dataset_with_db(db, data_model_metadata): +def test_delete_dataset_with_db(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - ImportCSV(db).execute( + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + ImportCSV(sqlite_db, monetdb).execute( csv_path=DATASET_FILE, copy_from_file=False, data_model_code="data_model", data_model_version="1.0", ) - datasets = db.get_values(columns=["code"]) + datasets = sqlite_db.get_values(table=Dataset.__table__, columns=["code"]) assert len(datasets) == 1 assert ("dataset",) in datasets # Test - DeleteDataset(db).execute( + DeleteDataset(sqlite_db, monetdb).execute( dataset_code="dataset", data_model_code=data_model_metadata["code"], data_model_version=data_model_metadata["version"], ) - datasets = db.get_values(columns=["code"]) + datasets = sqlite_db.get_values(table=Dataset.__table__, columns=["code"]) assert len(datasets) == 0 assert ("dataset",) not in datasets -def test_enable_data_model(): - db = MonetDBMock() - code = "data_model" - version = "1.0" - EnableDataModel(db).execute(code=code, version=version) - assert "UPDATE mipdb_metadata.data_models" in db.captured_queries[0] - assert "Sequence('action_id_seq'" in db.captured_queries[1] - assert 'INSERT INTO "mipdb_metadata".actions' in db.captured_queries[2] - - @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_enable_data_model_with_db(db, data_model_metadata): - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata=data_model_metadata) - DisableDataModel(db).execute( +def test_enable_data_model_with_db(sqlite_db, monetdb, data_model_metadata): + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata=data_model_metadata) + DisableDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"] ) - status = db.execute(f"SELECT status FROM mipdb_metadata.data_models").fetchone() - assert status[0] == "DISABLED" - EnableDataModel(db).execute( + status = sqlite_db.execute_fetchall(f"SELECT status FROM data_models") + assert status[0][0] == "DISABLED" + EnableDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"] ) - status = db.execute(f"SELECT status FROM mipdb_metadata.data_models").fetchone() - assert status[0] == "ENABLED" + status = sqlite_db.execute_fetchall(f"SELECT status FROM data_models") + assert status[0][0] == "ENABLED" @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_enable_data_model_already_enabled_with_db(db, data_model_metadata): - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - status = db.execute(f"SELECT status FROM mipdb_metadata.data_models").fetchone() - assert status[0] == "ENABLED" +def test_enable_data_model_already_enabled_with_db( + sqlite_db, monetdb, data_model_metadata +): + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + status = sqlite_db.execute_fetchall(f"SELECT status FROM data_models") + assert status[0][0] == "ENABLED" with pytest.raises(UserInputError): - EnableDataModel(db).execute( + EnableDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"] ) -def test_disable_data_model(): - db = MonetDBMock() - code = "data_model" - version = "1.0" - DisableDataModel(db).execute(code, version) - assert "UPDATE mipdb_metadata.data_models" in db.captured_queries[0] - assert "Sequence('action_id_seq'" in db.captured_queries[1] - assert 'INSERT INTO "mipdb_metadata".actions' in db.captured_queries[2] - - @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_disable_data_model_with_db(db, data_model_metadata): - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - status = db.execute(f"SELECT status FROM mipdb_metadata.data_models").fetchone() - assert status[0] == "ENABLED" - DisableDataModel(db).execute( +def test_disable_data_model_with_db(sqlite_db, monetdb, data_model_metadata): + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + status = sqlite_db.execute_fetchall(f"SELECT status FROM data_models") + assert status[0][0] == "ENABLED" + DisableDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"] ) - status = db.execute(f"SELECT status FROM mipdb_metadata.data_models").fetchone() - assert status[0] == "DISABLED" + status = sqlite_db.execute_fetchall(f"SELECT status FROM data_models") + assert status[0][0] == "DISABLED" @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_disable_data_model_already_disabled_with_db(db, data_model_metadata): - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - DisableDataModel(db).execute( +def test_disable_data_model_already_disabled_with_db( + sqlite_db, monetdb, data_model_metadata +): + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + DisableDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"] ) - status = db.execute(f"SELECT status FROM mipdb_metadata.data_models").fetchone() - assert status[0] == "DISABLED" + status = sqlite_db.execute_fetchall(f"SELECT status FROM data_models") + assert status[0][0] == "DISABLED" with pytest.raises(UserInputError): - DisableDataModel(db).execute( + DisableDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"] ) -def test_enable_dataset(): - db = MonetDBMock() - dataset = "dataset" - code = "data_model" - version = "1.0" - EnableDataset(db).execute(dataset, code, version) - assert "UPDATE mipdb_metadata.datasets" in db.captured_queries[0] - assert "Sequence('action_id_seq'" in db.captured_queries[1] - assert 'INSERT INTO "mipdb_metadata".actions' in db.captured_queries[2] - - @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_enable_dataset_with_db(db, data_model_metadata): - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - ImportCSV(db).execute( +def test_enable_dataset_with_db(sqlite_db, monetdb, data_model_metadata): + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + ImportCSV(sqlite_db, monetdb).execute( csv_path=DATASET_FILE, copy_from_file=False, data_model_code="data_model", data_model_version="1.0", ) - DisableDataset(db).execute( + DisableDataset(sqlite_db).execute( dataset_code="dataset", data_model_code=data_model_metadata["code"], data_model_version=data_model_metadata["version"], ) - status = db.execute(f"SELECT status FROM mipdb_metadata.datasets").fetchone() - assert status[0] == "DISABLED" - EnableDataset(db).execute( + status = sqlite_db.execute_fetchall(f"SELECT status FROM datasets") + assert status[0][0] == "DISABLED" + EnableDataset(sqlite_db).execute( dataset_code="dataset", data_model_code=data_model_metadata["code"], data_model_version=data_model_metadata["version"], ) - status = db.execute(f"SELECT status FROM mipdb_metadata.datasets").fetchone() - assert status[0] == "ENABLED" + status = sqlite_db.execute_fetchall(f"SELECT status FROM datasets") + assert status[0][0] == "ENABLED" @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_enable_dataset_already_enabled_with_db(db, data_model_metadata): - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - ImportCSV(db).execute( +def test_enable_dataset_already_enabled_with_db( + sqlite_db, monetdb, data_model_metadata +): + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + ImportCSV(sqlite_db, monetdb).execute( csv_path=DATASET_FILE, copy_from_file=False, data_model_code="data_model", data_model_version="1.0", ) - status = db.execute(f"SELECT status FROM mipdb_metadata.datasets").fetchone() - assert status[0] == "ENABLED" + status = sqlite_db.execute_fetchall(f"SELECT status FROM datasets") + assert status[0][0] == "ENABLED" with pytest.raises(UserInputError): - EnableDataset(db).execute( + EnableDataset(sqlite_db).execute( dataset_code="dataset", data_model_code=data_model_metadata["code"], data_model_version=data_model_metadata["version"], ) -def test_disable_dataset(): - db = MonetDBMock() - dataset = "dataset" - code = "data_model" - version = "1.0" - DisableDataset(db).execute(dataset, code, version) - assert "UPDATE mipdb_metadata.datasets" in db.captured_queries[0] - assert "Sequence('action_id_seq'" in db.captured_queries[1] - assert 'INSERT INTO "mipdb_metadata".actions' in db.captured_queries[2] - - @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_disable_dataset_with_db(db, data_model_metadata): - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - ImportCSV(db).execute( +def test_disable_dataset_with_db(sqlite_db, monetdb, data_model_metadata): + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + ImportCSV(sqlite_db, monetdb).execute( csv_path=DATASET_FILE, copy_from_file=False, data_model_code="data_model", data_model_version="1.0", ) - status = db.execute(f"SELECT status FROM mipdb_metadata.datasets").fetchone() - assert status[0] == "ENABLED" - DisableDataset(db).execute( + status = sqlite_db.execute_fetchall(f"SELECT status FROM datasets") + assert status[0][0] == "ENABLED" + DisableDataset(sqlite_db).execute( dataset_code="dataset", data_model_code=data_model_metadata["code"], data_model_version=data_model_metadata["version"], ) - status = db.execute(f"SELECT status FROM mipdb_metadata.datasets").fetchone() - assert status[0] == "DISABLED" + status = sqlite_db.execute_fetchall(f"SELECT status FROM datasets") + assert status[0][0] == "DISABLED" @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_disable_dataset_already_disabled_with_db(db, data_model_metadata): - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - ImportCSV(db).execute( +def test_disable_dataset_already_disabled_with_db( + sqlite_db, monetdb, data_model_metadata +): + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + ImportCSV(sqlite_db, monetdb).execute( csv_path=DATASET_FILE, copy_from_file=False, data_model_code="data_model", data_model_version="1.0", ) - DisableDataset(db).execute( + DisableDataset(sqlite_db).execute( dataset_code="dataset", data_model_code=data_model_metadata["code"], data_model_version=data_model_metadata["version"], ) - status = db.execute(f"SELECT status FROM mipdb_metadata.datasets").fetchone() - assert status[0] == "DISABLED" + status = sqlite_db.execute_fetchall(f"SELECT status FROM datasets") + assert status[0][0] == "DISABLED" with pytest.raises(UserInputError): - DisableDataset(db).execute( + DisableDataset(sqlite_db).execute( dataset_code="dataset", data_model_code=data_model_metadata["code"], data_model_version=data_model_metadata["version"], ) -def test_tag_data_model(): - db = MonetDBMock() - TagDataModel(db).execute(code="data_model", version="1.0", tag="tag") - assert "UPDATE mipdb_metadata.data_models SET properties" in db.captured_queries[0] - assert "Sequence('action_id_seq'" in db.captured_queries[1] - assert 'INSERT INTO "mipdb_metadata".actions ' in db.captured_queries[2] - - -def test_untag_data_model(): - db = MonetDBMock() - UntagDataModel(db).execute(code="data_model", version="1.0", tag="tag1") - assert "UPDATE mipdb_metadata.data_models SET properties" in db.captured_queries[0] - assert "Sequence('action_id_seq'" in db.captured_queries[1] - assert 'INSERT INTO "mipdb_metadata".actions ' in db.captured_queries[2] - - -def test_add_property2data_model(): - db = MonetDBMock() - AddPropertyToDataModel(db).execute( - code="data_model", version="1.0", key="key", value="value", force=False - ) - assert "UPDATE mipdb_metadata.data_models SET properties" in db.captured_queries[0] - assert "Sequence('action_id_seq'" in db.captured_queries[1] - assert 'INSERT INTO "mipdb_metadata".actions ' in db.captured_queries[2] - - -def test_remove_property_from_data_model(): - db = MonetDBMock() - RemovePropertyFromDataModel(db).execute( - code="data_model", version="1.0", key="key1", value="value1" - ) - assert "UPDATE mipdb_metadata.data_models SET properties" in db.captured_queries[0] - assert "Sequence('action_id_seq'" in db.captured_queries[1] - assert 'INSERT INTO "mipdb_metadata".actions ' in db.captured_queries[2] - - @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_tag_data_model_with_db(db, data_model_metadata): +def test_tag_data_model_with_db(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) # Test - TagDataModel(db).execute( + TagDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], tag="tag", ) - properties = db.get_data_model_properties(1) - assert json.loads(properties)["tags"] == ["tag"] + properties = sqlite_db.get_data_model_properties(1) + assert properties["tags"] == ["tag"] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_untag_data_model_with_db(db, data_model_metadata): +def test_untag_data_model_with_db(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - TagDataModel(db).execute( + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + TagDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], tag="tag1", ) - TagDataModel(db).execute( + TagDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], tag="tag2", ) - TagDataModel(db).execute( + TagDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], tag="tag3", ) # Test - UntagDataModel(db).execute( + UntagDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], tag="tag1", ) - properties = db.get_data_model_properties(1) - assert json.loads(properties)["tags"] == ["tag2", "tag3"] + properties = sqlite_db.get_data_model_properties(1) + assert properties["tags"] == ["tag2", "tag3"] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_add_property2data_model_with_db(db, data_model_metadata): +def test_add_property2data_model_with_db(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) # Test - AddPropertyToDataModel(db).execute( + AddPropertyToDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], key="key", @@ -816,20 +640,21 @@ def test_add_property2data_model_with_db(db, data_model_metadata): force=False, ) - properties = db.get_data_model_properties(1) + properties = sqlite_db.get_data_model_properties(1) assert ( - "key" in json.loads(properties)["properties"] - and json.loads(properties)["properties"]["key"] == "value" + "key" in properties["properties"] and properties["properties"]["key"] == "value" ) @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_add_property2data_model_with_force_and_db(db, data_model_metadata): +def test_add_property2data_model_with_force_and_db( + sqlite_db, monetdb, data_model_metadata +): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - AddPropertyToDataModel(db).execute( + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + AddPropertyToDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], key="key", @@ -838,7 +663,7 @@ def test_add_property2data_model_with_force_and_db(db, data_model_metadata): ) # Test - AddPropertyToDataModel(db).execute( + AddPropertyToDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], key="key", @@ -846,27 +671,29 @@ def test_add_property2data_model_with_force_and_db(db, data_model_metadata): force=True, ) - properties = db.get_data_model_properties(1) + properties = sqlite_db.get_data_model_properties(1) assert ( - "key" in json.loads(properties)["properties"] - and json.loads(properties)["properties"]["key"] == "value1" + "key" in properties["properties"] + and properties["properties"]["key"] == "value1" ) @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_remove_property_from_data_model_with_db(db, data_model_metadata): +def test_remove_property_from_data_model_with_db( + sqlite_db, monetdb, data_model_metadata +): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - AddPropertyToDataModel(db).execute( + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + AddPropertyToDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], key="key1", value="value1", force=False, ) - AddPropertyToDataModel(db).execute( + AddPropertyToDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], key="key2", @@ -875,111 +702,59 @@ def test_remove_property_from_data_model_with_db(db, data_model_metadata): ) # Test - RemovePropertyFromDataModel(db).execute( + RemovePropertyFromDataModel(sqlite_db).execute( code=data_model_metadata["code"], version=data_model_metadata["version"], key="key1", value="value1", ) - properties = db.get_data_model_properties(1) + properties = sqlite_db.get_data_model_properties(1) assert ( - "key2" in json.loads(properties)["properties"] - and json.loads(properties)["properties"]["key2"] == "value2" - ) - - -def test_tag_dataset(): - db = MonetDBMock() - TagDataset(db).execute( - dataset_code="dataset", - data_model_code="data_model", - data_model_version="1.0", - tag="tag", - ) - assert "UPDATE mipdb_metadata.datasets SET properties" in db.captured_queries[0] - assert "Sequence('action_id_seq'" in db.captured_queries[1] - assert 'INSERT INTO "mipdb_metadata".actions ' in db.captured_queries[2] - - -def test_untag_dataset(): - db = MonetDBMock() - UntagDataset(db).execute( - dataset="dataset", data_model_code="data_model", version="1.0", tag="tag1" + "key2" in properties["properties"] + and properties["properties"]["key2"] == "value2" ) - assert "UPDATE mipdb_metadata.datasets SET properties" in db.captured_queries[0] - assert "Sequence('action_id_seq'" in db.captured_queries[1] - assert 'INSERT INTO "mipdb_metadata".actions ' in db.captured_queries[2] - - -def test_add_property2dataset(): - db = MonetDBMock() - AddPropertyToDataset(db).execute( - dataset="dataset", - data_model_code="data_model", - version="1.0", - key="key", - value="value", - force=False, - ) - assert "UPDATE mipdb_metadata.datasets SET properties" in db.captured_queries[0] - assert "Sequence('action_id_seq'" in db.captured_queries[1] - assert 'INSERT INTO "mipdb_metadata".actions ' in db.captured_queries[2] - - -def test_remove_property_from_dataset(): - db = MonetDBMock() - RemovePropertyFromDataset(db).execute( - dataset="dataset", - data_model_code="data_model", - version="1.0", - key="key1", - value="value1", - ) - assert "UPDATE mipdb_metadata.datasets SET properties" in db.captured_queries[0] - assert "Sequence('action_id_seq'" in db.captured_queries[1] - assert 'INSERT INTO "mipdb_metadata".actions ' in db.captured_queries[2] @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_tag_dataset_with_db(db, data_model_metadata): +def test_tag_dataset_with_db(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - ImportCSV(db).execute(DATASET_FILE, False, "data_model", "1.0") + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + ImportCSV(sqlite_db, monetdb).execute(DATASET_FILE, False, "data_model", "1.0") # Test - TagDataset(db).execute( + TagDataset(sqlite_db).execute( dataset_code="dataset", data_model_code=data_model_metadata["code"], data_model_version=data_model_metadata["version"], tag="tag", ) - properties = db.get_dataset_properties(1) - assert properties == '{"tags":["tag"],"properties":{}}' + properties = sqlite_db.get_dataset_properties(1) + assert properties == {"tags": ["tag"], "properties": {}} @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_untag_dataset_with_db(db, data_model_metadata): +def test_untag_dataset_with_db(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - ImportCSV(db).execute(DATASET_FILE, False, "data_model", "1.0") - TagDataset(db).execute( + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + ImportCSV(sqlite_db, monetdb).execute(DATASET_FILE, False, "data_model", "1.0") + TagDataset(sqlite_db).execute( dataset_code="dataset", data_model_code=data_model_metadata["code"], data_model_version=data_model_metadata["version"], tag="tag1", ) - TagDataset(db).execute( + TagDataset(sqlite_db).execute( dataset_code="dataset", data_model_code=data_model_metadata["code"], data_model_version=data_model_metadata["version"], tag="tag2", ) - TagDataset(db).execute( + TagDataset(sqlite_db).execute( dataset_code="dataset", data_model_code=data_model_metadata["code"], data_model_version=data_model_metadata["version"], @@ -987,27 +762,27 @@ def test_untag_dataset_with_db(db, data_model_metadata): ) # Test - UntagDataset(db).execute( + UntagDataset(sqlite_db).execute( dataset="dataset", data_model_code=data_model_metadata["code"], version=data_model_metadata["version"], tag="tag1", ) - properties = db.get_dataset_properties(1) - assert properties == '{"tags":["tag2","tag3"],"properties":{}}' + properties = sqlite_db.get_dataset_properties(1) + assert properties == {"tags": ["tag2", "tag3"], "properties": {}} @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_add_property2dataset_with_db(db, data_model_metadata): +def test_add_property2dataset_with_db(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - ImportCSV(db).execute(DATASET_FILE, False, "data_model", "1.0") + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + ImportCSV(sqlite_db, monetdb).execute(DATASET_FILE, False, "data_model", "1.0") # Test - AddPropertyToDataset(db).execute( + AddPropertyToDataset(sqlite_db).execute( dataset="dataset", data_model_code=data_model_metadata["code"], version=data_model_metadata["version"], @@ -1016,18 +791,18 @@ def test_add_property2dataset_with_db(db, data_model_metadata): force=False, ) - properties = db.get_dataset_properties(1) - assert properties == '{"tags":[],"properties":{"key":"value"}}' + properties = sqlite_db.get_dataset_properties(1) + assert properties == {"tags": [], "properties": {"key": "value"}} @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_remove_property_from_dataset_with_db(db, data_model_metadata): +def test_remove_property_from_dataset_with_db(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() - AddDataModel(db).execute(data_model_metadata) - ImportCSV(db).execute(DATASET_FILE, False, "data_model", "1.0") - AddPropertyToDataset(db).execute( + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + ImportCSV(sqlite_db, monetdb).execute(DATASET_FILE, False, "data_model", "1.0") + AddPropertyToDataset(sqlite_db).execute( dataset="dataset", data_model_code=data_model_metadata["code"], version=data_model_metadata["version"], @@ -1035,7 +810,7 @@ def test_remove_property_from_dataset_with_db(db, data_model_metadata): value="value", force=False, ) - AddPropertyToDataset(db).execute( + AddPropertyToDataset(sqlite_db).execute( dataset="dataset", data_model_code=data_model_metadata["code"], version=data_model_metadata["version"], @@ -1043,7 +818,7 @@ def test_remove_property_from_dataset_with_db(db, data_model_metadata): value="value1", force=False, ) - AddPropertyToDataset(db).execute( + AddPropertyToDataset(sqlite_db).execute( dataset="dataset", data_model_code=data_model_metadata["code"], version=data_model_metadata["version"], @@ -1053,24 +828,24 @@ def test_remove_property_from_dataset_with_db(db, data_model_metadata): ) # Test - RemovePropertyFromDataset(db).execute( + RemovePropertyFromDataset(sqlite_db).execute( dataset="dataset", data_model_code=data_model_metadata["code"], version=data_model_metadata["version"], key="key2", value="value2", ) - properties = db.get_dataset_properties(1) - assert ( - properties == '{"tags":[],"properties":{"key":"value","key1":"value1"}}' - ) + properties = sqlite_db.get_dataset_properties(1) + assert properties == {"tags": [], "properties": {"key": "value", "key1": "value1"}} @pytest.mark.database @pytest.mark.usefixtures("monetdb_container", "cleanup_db") -def test_grant_select_access_rights(db): +def test_grant_select_access_rights(sqlite_db, monetdb, data_model_metadata): # Setup - InitDB(db).execute() + InitDB(sqlite_db).execute() + AddDataModel(sqlite_db, monetdb).execute(data_model_metadata) + ImportCSV(sqlite_db, monetdb).execute(DATASET_FILE, False, "data_model", "1.0") # Validation that the user 'executor' can only access data but not drop the data models table executor_config = { @@ -1080,12 +855,11 @@ def test_grant_select_access_rights(db): "username": "executor", "password": "executor", } - metadata = Schema(METADATA_SCHEMA) - data_model_table = DataModelTable(schema=metadata) + db_connected_by_executor = MonetDB.from_config(executor_config) result = db_connected_by_executor.execute( - f"select * from {METADATA_SCHEMA}.data_models" + f'select * from "data_model:1.0"."primary_data"' ) - assert result.fetchall() == [] + assert result != [] with pytest.raises(DataBaseError): - data_model_table.drop(db_connected_by_executor) + db_connected_by_executor.execute('DROP TABLE "data_model:1.0"."primary_data"')