Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

edit snippet to make more runnable (#2066) #2079

Merged
merged 1 commit into from
Nov 20, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import os

import modal

from tests.pipeline.utils import assert_load_info


def test_modal_snippet() -> None:
# @@@DLT_SNIPPET_START modal_image
import modal

# Define the Modal Image
image = modal.Image.debian_slim().pip_install(
"dlt>=1.1.0",
"dlt[duckdb]", # destination
"dlt[sql_database]", # source (MySQL)
"dlt[parquet]", # file format dependency
"pymysql", # database driver for MySQL source
)

Expand All @@ -25,19 +24,19 @@ def test_modal_snippet() -> None:
@app.function(
volumes={"/data/": vol},
schedule=modal.Period(days=1),
secrets=[modal.Secret.from_name("sql-secret")],
serialized=True,
serialized=True
)
def load_tables() -> None:
import dlt
import os
from dlt.sources.sql_database import sql_database

# Define the source database credentials; in production, you would save this as a Modal Secret which can be referenced here as an environment variable
os.environ["SOURCES__SQL_DATABASE__CREDENTIALS"] = (
"mysql+pymysql://[email protected]:4497/Rfam"
)
# Load tables "family" and "genome"
source = sql_database().with_resources("family", "genome")
# Load tables "family" and "genome" with minimal reflection to avoid column constraint error
source = sql_database(reflection_level="minimal").with_resources("family", "genome")

# Create dlt pipeline object
pipeline = dlt.pipeline(
Expand All @@ -50,7 +49,7 @@ def load_tables() -> None:
)

# Run the pipeline
load_info = pipeline.run(source)
load_info = pipeline.run(source, write_disposition="replace")

# Print run statistics
print(load_info)
Expand Down
Loading