databrickslabs · nakazax · Apr 25, 2025 · Jun 4, 2025 · Jun 4, 2025 · Jun 4, 2025
@@ -0,0 +1,4 @@
+{
+  "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
+  "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------"
+}
@@ -0,0 +1,173 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Databricks
+.databricks/
+
+# Project specific
+.clinerules/
+.vscode/
+scratch/**
+!scratch/README.md
+test_output/
+databricks.yml
@@ -0,0 +1,24 @@
+---
+title: "sql2dbx"
+language: python
+author: "Hiroyuki Nakazato"
+date: 2025-4-25
+
+tags:
+- sql-migration-tool
+- multi-dialect-sql
+- llm
+- automation
+---
+
+# sql2dbx
+**sql2dbx** is an automation tool designed to convert SQL files into Databricks notebooks. It leverages Large Language Models (LLMs) to perform the conversion based on system prompts tailored for various SQL dialects. sql2dbx consists of a series of Databricks notebooks.
+
+## How to Execute
+1. Clone the [databrickslabs/sandbox](https://github.com/databrickslabs/sandbox) repository.
+2. Import the `sql2dbx` folder into your Databricks workspace.
+3. Run either notebook as your entry point:
+   - `notebooks/00_main` (English)
+   - `notebooks/00_main_ja` (Japanese)
+
+These notebooks contain all instructions and documentation needed to use sql2dbx.
@@ -0,0 +1,51 @@
+-- ==========================================
+-- MySQL EXAMPLE #1: Multi-Statement Data Transformation
+-- ==========================================
+
+-- Create a table for orders
+CREATE TABLE Orders (
+    OrderID INT,
+    CustomerName VARCHAR(100),
+    OrderDate DATETIME DEFAULT NOW(),
+    OrderTotal DECIMAL(10,2)
+);
+
+-- Insert some sample orders
+INSERT INTO Orders (OrderID, CustomerName, OrderTotal)
+VALUES
+    (101, 'Alice', 200.00),
+    (102, 'Bob', 350.75),
+    (103, 'Charlie', 99.99);
+
+-- Create a temporary table for order statuses
+CREATE TEMPORARY TABLE TempOrderStatus (
+    OrderID INT,
+    Status VARCHAR(50)
+);
+
+-- Insert statuses
+INSERT INTO TempOrderStatus (OrderID, Status)
+VALUES
+    (101, 'PROCESSING'),
+    (102, 'SHIPPED'),
+    (104, 'CANCELLED');
+
+-- Update orders with a discount if they appear in the temporary status table
+-- Demonstrates MySQL's UPDATE with JOIN syntax
+UPDATE Orders AS o
+JOIN TempOrderStatus AS t ON o.OrderID = t.OrderID
+SET o.OrderTotal = o.OrderTotal * 0.90  -- 10% discount
+WHERE t.Status = 'SHIPPED';
+
+-- Delete any order older than 90 days if not referenced in TempOrderStatus
+DELETE o
+FROM Orders AS o
+WHERE o.OrderDate < DATE_SUB(NOW(), INTERVAL 90 DAY)
+  AND o.OrderID NOT IN (SELECT OrderID FROM TempOrderStatus);
+
+-- Final check
+SELECT * FROM Orders;
+
+-- Clean up
+-- DROP TABLE IF EXISTS TempOrderStatus; -- Temp tables are automatically dropped at the end of the session
+DROP TABLE IF EXISTS Orders;
@@ -0,0 +1,54 @@
+-- ==========================================
+-- MySQL EXAMPLE #2: Stored Procedure with Threshold Checking
+-- ==========================================
+
+DELIMITER $$
+
+CREATE PROCEDURE DemoThresholdCheck(
+    IN p_table_name VARCHAR(64),
+    IN p_threshold DECIMAL(10,2),
+    OUT p_rows_updated INT
+)
+BEGIN
+    -- Declare a handler to catch any SQL errors, then roll back
+    DECLARE EXIT HANDLER FOR SQLEXCEPTION 
+    BEGIN
+        ROLLBACK;
+        SET p_rows_updated = -1;
+    END;
+
+    -- Start a transaction
+    START TRANSACTION;
+
+    -- 1) Create a temporary table that captures rows above the threshold
+    --    We'll build this query dynamically based on p_table_name
+    SET @sql = CONCAT(
+        'CREATE TEMPORARY TABLE TempData AS ',
+        'SELECT id, metric ',
+        'FROM ', p_table_name, ' ',
+        'WHERE metric > ', p_threshold
+    );
+
+    PREPARE stmt FROM @sql;
+    EXECUTE stmt;
+    DEALLOCATE PREPARE stmt;
+
+    -- 2) Update the original table to cap values at the threshold
+    SET @sql = CONCAT(
+        'UPDATE ', p_table_name, ' ',
+        'SET metric = ', p_threshold, ' ',
+        'WHERE metric > ', p_threshold
+    );
+
+    PREPARE stmt FROM @sql;
+    EXECUTE stmt;
+    SET p_rows_updated = ROW_COUNT();  -- track how many rows changed
+    DEALLOCATE PREPARE stmt;
+
+    -- DROP TEMPORARY TABLE IF EXISTS TempData; -- Temp tables are automatically dropped at the end of the session
+
+    -- Commit the transaction
+    COMMIT;
+END $$
+
+DELIMITER ;