Skip to content

Commit

Permalink
Integrate sqlfluff as dbt cli command
Browse files Browse the repository at this point in the history
  • Loading branch information
ismailsimsek committed Oct 16, 2024
1 parent 0da7344 commit 1fd040f
Show file tree
Hide file tree
Showing 7 changed files with 251 additions and 1 deletion.
4 changes: 4 additions & 0 deletions opendbt/dbt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,7 @@ def patch_dbt():
f"Unsupported dbt version {dbt_version}, please make sure dbt version is supported/integrated by opendbt")

# shared code patches
import opendbt.dbt.shared.cli.main
dbt.cli.main.sqlfluff = opendbt.dbt.shared.cli.main.sqlfluff
dbt.cli.main.sqlfluff_lint = opendbt.dbt.shared.cli.main.sqlfluff_lint
dbt.cli.main.sqlfluff_fix = opendbt.dbt.shared.cli.main.sqlfluff_fix
Empty file.
105 changes: 105 additions & 0 deletions opendbt/dbt/shared/cli/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import click
from dbt.cli import requires, params as p
from dbt.cli.main import global_flags, cli

from opendbt.dbt.v17.task.sqlfluff import SqlFluffTasks


# dbt docs
@cli.group()
@click.pass_context
@global_flags
def sqlfluff(ctx, **kwargs):
"""Generate or serve the documentation website for your project"""


# dbt docs generate
@sqlfluff.command("lint")
@click.pass_context
@global_flags
@p.defer
@p.deprecated_defer
@p.exclude
@p.favor_state
@p.deprecated_favor_state
@p.full_refresh
@p.include_saved_query
@p.indirect_selection
@p.profile
@p.profiles_dir
@p.project_dir
@p.resource_type
@p.select
@p.selector
@p.show
@p.state
@p.defer_state
@p.deprecated_state
@p.store_failures
@p.target
@p.target_path
@p.threads
@p.vars
@requires.postflight
@requires.preflight
@requires.profile
@requires.project
@requires.runtime_config
@requires.manifest(write=False)
def sqlfluff_lint(ctx, **kwargs):
"""Generate the documentation website for your project"""
task = SqlFluffTasks(
ctx.obj["flags"],
ctx.obj["runtime_config"],
ctx.obj["manifest"],
)

results = task.lint()
success = task.interpret_results(results)
return results, success


# dbt docs generate
@sqlfluff.command("fix")
@click.pass_context
@global_flags
@p.defer
@p.deprecated_defer
@p.exclude
@p.favor_state
@p.deprecated_favor_state
@p.full_refresh
@p.include_saved_query
@p.indirect_selection
@p.profile
@p.profiles_dir
@p.project_dir
@p.resource_type
@p.select
@p.selector
@p.show
@p.state
@p.defer_state
@p.deprecated_state
@p.store_failures
@p.target
@p.target_path
@p.threads
@p.vars
@requires.postflight
@requires.preflight
@requires.profile
@requires.project
@requires.runtime_config
@requires.manifest(write=False)
def sqlfluff_fix(ctx, **kwargs):
"""Generate the documentation website for your project"""
task = SqlFluffTasks(
ctx.obj["flags"],
ctx.obj["runtime_config"],
ctx.obj["manifest"],
)

results = task.fix()
success = task.interpret_results(results)
return results, success
85 changes: 85 additions & 0 deletions opendbt/dbt/v17/task/sqlfluff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import os
from datetime import datetime
from typing import Optional

from dbt.contracts.results import (
CatalogResults,
CatalogArtifact, RunExecutionResult,
)
from dbt.task.compile import CompileTask
from sqlfluff.cli import commands
from sqlfluff.core import Linter, FluffConfig
from sqlfluff.core.linter.linted_dir import LintedDir


class SqlFluffTasks(CompileTask):

def __init__(self, args, config, manifest):
super().__init__(args, config, manifest)
self.sqlfluff_config = FluffConfig.from_path(path=self.config.project_root)
self.linter = Linter(self.sqlfluff_config)
# dummy result
run_result = RunExecutionResult(
results=[],
elapsed_time=0.0,
generated_at=datetime.utcnow(),
# args=dbt.utils.args_to_dict(self.args),
args={},
)
self.results = CatalogArtifact.from_results(
nodes={},
sources={},
generated_at=datetime.utcnow(),
errors=None,
compile_results=run_result,
)

def lint(self) -> CatalogArtifact:
os.chdir(self.config.project_root)
lint_result = self.linter.lint_path(path=self.config.project_root)
return self.return_result(lint_result=lint_result)

def fix(self) -> CatalogArtifact:
os.chdir(self.config.project_root)
# lint_result = commands.fix(paths=(self.config.project_root))
# lint_result = self.linter.lint_paths(paths=(self.config.project_root,), fix=True, apply_fixes=True)
# Instantiate the linter
lnt, formatter = commands.get_linter_and_formatter(cfg=self.sqlfluff_config)
# Dispatch the detailed config from the linter.
# lint_result = formatter.dispatch_config(lnt)
# sys.exit(1)

lint_result: LintedDir = lnt.lint_path(
path=self.config.project_root,
fix=True,
# ignore_non_existent_files=False,
# processes=None,
# If --check is set, then don't apply any fixes until the end.
# apply_fixes=not check,
# fixed_file_suffix=fixed_suffix,
# fix_even_unparsable=fix_even_unparsable,
# If --check is not set, then don't apply any fixes until the end.
# NOTE: This should enable us to limit the memory overhead of keeping
# a large parsed project in memory unless necessary.
# retain_files=check,
)
# formatter.dispatch_config(lnt)
# lnt.fix(formatter=formatter)

return self.return_result(lint_result=lint_result)

def return_result(self, lint_result):
violations: list = lint_result.get_violations()
self.results.errors = violations
if violations:
print("SqlFluff Linting Errors")
print("\n".join([str(item) for item in lint_result.as_records()]))
return self.results

@classmethod
def interpret_results(self, results: Optional[CatalogResults]) -> bool:
if results is None:
return False
if hasattr(results, "errors") and results.errors:
return False
return True
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
include_package_data=True,
license="Apache License 2.0",
test_suite='tests',
install_requires=["dbt-duckdb>=1.6"],
install_requires=["dbt-duckdb>=1.6", "sqlfluff", "sqlfluff-templater-dbt"],
extras_require={
"airflow": ["apache-airflow"],
"test": ["testcontainers>=3.7,<4.9"],
Expand Down
39 changes: 39 additions & 0 deletions tests/resources/dbttest/.sqlfluff
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
[sqlfluff]
templater = dbt
dialect = duckdb
# This change (from jinja to dbt templater) will make linting slower
# because linting will first compile dbt code into data warehouse code.
runaway_limit = 1000
max_line_length = 120
indent_unit = space

[sqlfluff:indentation]
tab_space_size = 4

[sqlfluff:layout:type:comma]
spacing_before = touch
line_position = trailing

[sqlfluff:rules:capitalisation.keywords]
capitalisation_policy = upper

[sqlfluff:rules:aliasing.table]
aliasing = explicit

[sqlfluff:rules:aliasing.column]
aliasing = explicit

[sqlfluff:rules:aliasing.expression]
allow_scalar = False

[sqlfluff:rules:capitalisation.identifiers]
extended_capitalisation_policy = upper

[sqlfluff:rules:capitalisation.functions]
capitalisation_policy = lower

[sqlfluff:rules:capitalisation.literals]
capitalisation_policy = lower

[sqlfluff:rules:ambiguous.column_references] # Number in group by
group_by_and_order_by_style = implicit
17 changes: 17 additions & 0 deletions tests/test_dbt_sqlfluff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from pathlib import Path
from unittest import TestCase

from opendbt import OpenDbtProject


class TestDbtDocs(TestCase):
RESOURCES_DIR = Path(__file__).parent.joinpath("resources")
DBTTEST_DIR = RESOURCES_DIR.joinpath("dbttest")

def test_run_sqlfluff_lint(self):
dp = OpenDbtProject(project_dir=self.DBTTEST_DIR, profiles_dir=self.DBTTEST_DIR)
dp.run(command="sqlfluff", args=['lint'])

def test_run_sqlfluff_fix(self):
dp = OpenDbtProject(project_dir=self.DBTTEST_DIR, profiles_dir=self.DBTTEST_DIR)
dp.run(command="sqlfluff", args=['fix'])

0 comments on commit 1fd040f

Please sign in to comment.