Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 1.6.latest] Redact values from logs due 'duplicate key' error #782

Merged
merged 1 commit into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20230915-091507.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Redact cases where raw data can be leaked logs
time: 2023-09-15T09:15:07.430443+10:00
custom:
Author: jaypeedevlin
Issue: "772"
11 changes: 8 additions & 3 deletions dbt/adapters/snowflake/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,11 @@

logger = AdapterLogger("Snowflake")
_TOKEN_REQUEST_URL = "https://{}.snowflakecomputing.com/oauth/token-request"
ROW_VALUE_REGEX = re.compile(r"Row Values: \[(.|\n)*\]")

ERROR_REDACTION_PATTERNS = {
re.compile(r"Row Values: \[(.|\n)*\]"): "Row Values: [redacted]",
re.compile(r"Duplicate field key '(.|\n)*'"): "Duplicate field key '[redacted]'",
}


@dataclass
Expand Down Expand Up @@ -271,13 +275,14 @@ def exception_handler(self, sql):
try:
yield
except snowflake.connector.errors.ProgrammingError as e:
unscrubbed_msg = str(e)
msg = str(e)

# A class of Snowflake errors -- such as a failure from attempting to merge
# duplicate rows -- includes row values in the error message, i.e.
# [12345, "col_a_value", "col_b_value", etc...]. We don't want to log potentially
# sensitive user data.
msg = re.sub(ROW_VALUE_REGEX, "Row Values: [redacted]", unscrubbed_msg)
for regex_pattern, replacement_message in ERROR_REDACTION_PATTERNS.items():
msg = re.sub(regex_pattern, replacement_message, msg)

logger.debug("Snowflake query id: {}".format(e.sfqid))
logger.debug("Snowflake error: {}".format(msg))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pytest

from dbt.tests.util import (
run_dbt,
)

_MODELS__view = """
{{ config(
materialized='table',
) }}

with dupes as (
select 'foo' as key, 1 as value
union all
select 'foo' as key, 2 as value
)

select
object_agg(key, value) as agg
from dupes
"""


class TestDuplicateKeyNotInExceptions:
@pytest.fixture(scope="class")
def models(self):
return {"model.sql": _MODELS__view}

def test_row_values_were_scrubbed_from_duplicate_merge_exception(self, project):
result = run_dbt(["run", "-s", "model"], expect_pass=False)
assert len(result) == 1
assert "Duplicate field key '[redacted]'" in result[0].message
assert "'foo'" not in result[0].message
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ def test_row_values_were_scrubbed_from_duplicate_merge_exception(self, project):
result = run_dbt(["run", "-s", "model"], expect_pass=False)
assert len(result) == 1
assert "Row Values: [redacted]" in result[0].message
assert "'one'" not in result[0].message