Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MINOR: fix mssql integration test #17923

Merged
merged 7 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@
VERSIONS["grpc-tools"],
VERSIONS["neo4j"],
"testcontainers==3.7.1;python_version<'3.9'",
"testcontainers==4.8.0;python_version>='3.9'",
"testcontainers==4.8.1;python_version>='3.9'",
TeddyCr marked this conversation as resolved.
Show resolved Hide resolved
"minio==7.2.5",
*plugins["mlflow"],
*plugins["datalake-s3"],
Expand Down
94 changes: 68 additions & 26 deletions ingestion/tests/integration/sql_server/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import shutil
import tempfile

import pytest
from sqlalchemy import create_engine, text
Expand All @@ -22,24 +23,55 @@
from ..conftest import ingestion_config as base_ingestion_config


@pytest.fixture(scope="module")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More a question on my end. Wouldn't scope=module make it so the fixture would be destroyed at the end of the last test in sql_server/test_lineage.py for example?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed. With containers a best practice is to have session (since its most expensive to setup) level fixtures for the containers and module level fixtures for data entities like databases or tables.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But for this case the data stays static throughout the tests so its fine like this.

def mssql_container(tmp_path_factory):
container = SqlServerContainer(
"mcr.microsoft.com/mssql/server:2017-latest", dbname="AdventureWorks"
@pytest.fixture(scope="session")
def db_name():
return "AdventureWorksLT2022"


class CustomSqlServerContainer(SqlServerContainer):
def start(self) -> "DbContainer":
dockerfile = f"""
FROM {self.image}
USER root
RUN mkdir -p /data
RUN chown mssql /data
USER mssql
"""
temp_dir = os.path.join(tempfile.gettempdir(), "mssql")
os.makedirs(temp_dir, exist_ok=True)
temp_dockerfile_path = os.path.join(temp_dir, "Dockerfile")
with open(temp_dockerfile_path, "w") as temp_dockerfile:
temp_dockerfile.write(dockerfile)
self.get_docker_client().build(temp_dir, tag=self.image)
return super().start()

def _configure(self) -> None:
super()._configure()
self.with_env("SQL_SA_PASSWORD", self.password)


@pytest.fixture(scope="session")
def mssql_container(tmp_path_factory, db_name):
container = CustomSqlServerContainer(
"mcr.microsoft.com/mssql/server:2022-latest", dbname="master"
)
data_dir = tmp_path_factory.mktemp("data")
shutil.copy(
os.path.join(os.path.dirname(__file__), "data", "AdventureWorks2017.bak"),
os.path.join(os.path.dirname(__file__), "data", f"{db_name}.bak"),
str(data_dir),
)
with open(data_dir / "install.sql", "w") as f:
f.write(
"""
f"""
USE [master]
RESTORE DATABASE [AdventureWorks]
FROM DISK = '/data/AdventureWorks2017.bak'
WITH MOVE 'AdventureWorks2017' TO '/var/opt/mssql/data/AdventureWorks.mdf',
MOVE 'AdventureWorks2017_log' TO '/var/opt/mssql/data/AdventureWorks_log.ldf'
RESTORE FILELISTONLY
FROM DISK = '/data/{db_name}.bak';
GO

RESTORE DATABASE [{db_name}]
FROM DISK = '/data/{db_name}.bak'
WITH MOVE '{db_name}_Data' TO '/var/opt/mssql/data/{db_name}.mdf',
MOVE '{db_name}_Log' TO '/var/opt/mssql/data/{db_name}.ldf';
GO
"""
)
Expand All @@ -49,17 +81,22 @@ def mssql_container(tmp_path_factory):
copy_dir_to_container(str(data_dir), docker_container, "/data")
res = docker_container.exec_run(
[
"/opt/mssql-tools/bin/sqlcmd",
"-S",
"localhost",
"-U",
container.username,
"-P",
container.password,
"-d",
"master",
"-i",
"/data/install.sql",
"bash",
"-c",
" ".join(
[
"/opt/mssql-tools*/bin/sqlcmd",
"-U",
container.username,
"-P",
f"'{container.password}'",
"-d",
"master",
"-i",
"/data/install.sql",
"-C",
]
),
]
)
if res[0] != 0:
Expand All @@ -72,7 +109,7 @@ def mssql_container(tmp_path_factory):
transaciton = conn.begin()
conn.execute(
text(
"SELECT * INTO AdventureWorks.HumanResources.DepartmenCopy FROM AdventureWorks.HumanResources.Department;"
f"SELECT * INTO {db_name}.SalesLT.CustomerCopy FROM {db_name}.SalesLT.Customer;"
)
)
transaciton.commit()
Expand All @@ -91,7 +128,7 @@ def scheme(request):


@pytest.fixture(scope="module")
def create_service_request(mssql_container, scheme, tmp_path_factory):
def create_service_request(mssql_container, scheme, tmp_path_factory, db_name):
return CreateDatabaseServiceRequest(
name="docker_test_" + tmp_path_factory.mktemp("mssql").name + "_" + scheme.name,
serviceType=DatabaseServiceType.Mssql,
Expand All @@ -101,7 +138,7 @@ def create_service_request(mssql_container, scheme, tmp_path_factory):
password=mssql_container.password,
hostPort="localhost:"
+ mssql_container.get_exposed_port(mssql_container.port),
database="AdventureWorks",
database=db_name,
scheme=scheme,
ingestAllDatabases=True,
connectionOptions={
Expand All @@ -115,12 +152,17 @@ def create_service_request(mssql_container, scheme, tmp_path_factory):

@pytest.fixture(scope="module")
def ingestion_config(
db_service, tmp_path_factory, workflow_config, sink_config, base_ingestion_config
db_service,
tmp_path_factory,
workflow_config,
sink_config,
base_ingestion_config,
db_name,
):
base_ingestion_config["source"]["sourceConfig"]["config"][
"databaseFilterPattern"
] = {
"includes": ["TestDB", "AdventureWorks"],
"includes": ["TestDB", db_name],
}
return base_ingestion_config

Expand Down
Binary file not shown.
Binary file not shown.
7 changes: 4 additions & 3 deletions ingestion/tests/integration/sql_server/test_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ def language_config(mssql_container, request):


@pytest.fixture()
def lineage_config(language_config, db_service, workflow_config, sink_config):
def lineage_config(language_config, db_service, workflow_config, sink_config, db_name):
return {
"source": {
"type": "mssql-lineage",
"serviceName": db_service.fullyQualifiedName.root,
"sourceConfig": {
"config": {
"type": "DatabaseLineage",
"databaseFilterPattern": {"includes": ["TestDB", "AdventureWorks"]},
"databaseFilterPattern": {"includes": ["TestDB", db_name]},
},
},
},
Expand All @@ -52,13 +52,14 @@ def test_lineage(
lineage_config,
db_service,
metadata,
db_name,
):
search_cache.clear()
run_workflow(MetadataWorkflow, ingestion_config)
run_workflow(MetadataWorkflow, lineage_config)
department_table = metadata.get_by_name(
Table,
f"{db_service.fullyQualifiedName.root}.AdventureWorks.HumanResources.Department",
f"{db_service.fullyQualifiedName.root}.{db_name}.SalesLT.Customer",
nullable=False,
)
lineage = metadata.get_lineage_by_id(Table, department_table.id.root)
Expand Down
24 changes: 19 additions & 5 deletions ingestion/tests/integration/sql_server/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,29 @@ def test_ingest_metadata(
ingestion_config,
db_service,
metadata,
db_name,
):
run_workflow(MetadataWorkflow, ingestion_config)
table: Table = metadata.get_by_name(
Table,
f"{db_service.fullyQualifiedName.root}.AdventureWorks.HumanResources.Department",
f"{db_service.fullyQualifiedName.root}.{db_name}.SalesLT.Customer",
)
assert table is not None
assert table.columns[0].name.root == "DepartmentID"
assert [c.name.root for c in table.columns] == [
"CustomerID",
"NameStyle",
"Title",
"FirstName",
"MiddleName",
"LastName",
"Suffix",
"CompanyName",
"SalesPerson",
"EmailAddress",
"Phone",
"PasswordHash",
"PasswordSalt",
"rowguid",
"ModifiedDate",
]
assert table.columns[0].constraint == Constraint.PRIMARY_KEY
assert table.columns[1].name.root == "Name"
assert table.columns[2].name.root == "GroupName"
assert table.columns[3].name.root == "ModifiedDate"
4 changes: 2 additions & 2 deletions ingestion/tests/integration/sql_server/test_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


@pytest.fixture()
def usage_config(db_service, workflow_config):
def usage_config(db_service, workflow_config, db_name):
return {
"source": {
"type": "mssql-usage",
Expand All @@ -19,7 +19,7 @@ def usage_config(db_service, workflow_config):
"config": {
"queryLogDuration": 2,
"resultLimit": 1000,
"databaseFilterPattern": {"includes": ["TestDB", "AdventureWorks"]},
"databaseFilterPattern": {"includes": ["TestDB", db_name]},
},
},
},
Expand Down
Loading