Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PoC] - prevent leaking of secrets when running validate_dict #2027

Draft
wants to merge 1 commit into
base: devel
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion dlt/common/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Any, AnyStr, Dict, List, Sequence, Optional, Iterable, Type, TypedDict

from dlt.common.typing import StrAny


class ExceptionTrace(TypedDict, total=False):
"""Exception trace. NOTE: we intend to change it with an extended line by line trace with code snippets"""
Expand Down Expand Up @@ -97,18 +99,22 @@ class DictValidationException(DltException):
def __init__(
self,
msg: str,
doc: StrAny,
path: str,
expected_type: Type[Any] = None,
field: str = None,
value: Any = None,
nested_exceptions: List["DictValidationException"] = None,
) -> None:
from dlt.common.utils import obfuscate_values_in_string

self.doc = doc
self.path = path
self.expected_type = expected_type
self.field = field
self.value = value
self.nested_exceptions = nested_exceptions
self.msg = msg
self.msg = obfuscate_values_in_string(doc, msg)
super().__init__(msg)

def __str__(self) -> str:
Expand Down
8 changes: 6 additions & 2 deletions dlt/common/schema/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool:
if not isinstance(pv, str):
raise DictValidationException(
f"field {pk} value {pv} has invalid type {type(pv).__name__} while str is expected",
{},
path,
t,
pk,
Expand All @@ -255,6 +256,7 @@ def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool:
except Exception as e:
raise DictValidationException(
f"field {pk} value {pv[3:]} does not compile as regex: {str(e)}",
{},
path,
t,
pk,
Expand All @@ -264,6 +266,7 @@ def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool:
if RE_NON_ALPHANUMERIC_UNDERSCORE.match(pv):
raise DictValidationException(
f"field {pk} value {pv} looks like a regex, please prefix with re:",
{},
path,
t,
pk,
Expand All @@ -283,6 +286,7 @@ def validator(path: str, pk: str, pv: Any, t: Any) -> bool:
raise DictValidationException(
f"field {pk} value {pv} has invalid type {type(pv).__name__} while"
" str is expected",
{},
path,
t,
pk,
Expand All @@ -291,11 +295,11 @@ def validator(path: str, pk: str, pv: Any, t: Any) -> bool:
try:
if naming.normalize_path(pv) != pv:
raise DictValidationException(
f"field {pk}: {pv} is not a valid column name", path, t, pk, pv
f"field {pk}: {pv} is not a valid column name", {}, path, t, pk, pv
)
except ValueError:
raise DictValidationException(
f"field {pk}: {pv} is not a valid column name", path, t, pk, pv
f"field {pk}: {pv} is not a valid column name", {}, path, t, pk, pv
)
return True
else:
Expand Down
39 changes: 39 additions & 0 deletions dlt/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,3 +635,42 @@ def __getattribute__(self, name: str) -> Any:
raise RuntimeError("This instance has been dropped and cannot be used anymore.")

return DefunctClass


def obfuscate_string(s: str) -> str:
"""Obfuscates string by replacing some or all characters with asterisks"""
if len(s) < 6:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd consider replacing this magic number with a constant and maybe use a single constant for both this place as well as here:

def _mask_secret(secret: Optional[str]) -> str:

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually I think we can get rid of this mask_secrets code entirely, no?

return "*" * len(s)
return s[0] + "*" * (len(s) - 2) + s[-1]


def all_strings(d: StrAny) -> List[str]:
"""Returns all string values found in the given object"""
strings = []

if isinstance(d, str):
return [d]
if isinstance(d, dict):
for v in d.values():
if isinstance(v, str):
strings.append(v)
elif isinstance(v, dict):
strings.extend(all_strings(v))
elif isinstance(d, list):
for v in d:
strings.extend(all_strings(v))
else:
return []
return strings


def obfuscate_values_in_string(d: StrAny, msg: str) -> str:
"""Obfuscates all string values found in the dictionary and its nested dictionaries in the message"""

# create mapping of obfuscated strings
obfuscated_strings = {s: obfuscate_string(s) for s in all_strings(d)}

for s in obfuscated_strings:
msg = msg.replace(s, obfuscated_strings[s])

return msg
16 changes: 12 additions & 4 deletions dlt/common/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@ def validate_dict(
# check missing props
missing = set(required_props.keys()).difference(props.keys())
if len(missing):
raise DictValidationException(f"following required fields are missing {missing}", path)
raise DictValidationException(f"following required fields are missing {missing}", doc, path)
# check unknown props
unexpected = set(props.keys()).difference(allowed_props.keys())
if len(unexpected):
raise DictValidationException(f"following fields are unexpected {unexpected}", path)
raise DictValidationException(f"following fields are unexpected {unexpected}", doc, path)

def verify_prop(pk: str, pv: Any, t: Any) -> None:
# covers none in optional and union types
Expand Down Expand Up @@ -108,6 +108,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
msg += f"For {get_type_name(failed.expected_type)}: " + str(failed) + "\n"
raise DictValidationException(
msg,
doc,
path,
t,
pk,
Expand All @@ -118,13 +119,14 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
a_l = get_literal_args(t)
if pv not in a_l:
raise DictValidationException(
f"field '{pk}' with value {pv} is not one of: {a_l}", path, t, pk, pv
f"field '{pk}' with value {pv} is not one of: {a_l}", doc, path, t, pk, pv
)
elif t in [int, bool, str, float]:
if not isinstance(pv, t):
raise DictValidationException(
f"field '{pk}' with value {pv} has invalid type '{type(pv).__name__}' while"
f" '{t.__name__}' is expected",
doc,
path,
t,
pk,
Expand All @@ -135,6 +137,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
raise DictValidationException(
f"field '{pk}' with value {pv} has invalid type '{type(pv).__name__}' while"
f" '{get_type_name(t)}' is expected",
doc,
path,
t,
pk,
Expand All @@ -146,6 +149,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
raise DictValidationException(
f"field '{pk}' with value {pv} has invalid type '{type(pv).__name__}' while"
" 'list' is expected",
doc,
path,
t,
pk,
Expand All @@ -160,6 +164,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
raise DictValidationException(
f"field '{pk}' with value {pv} has invalid type '{type(pv).__name__}' while"
" 'dict' is expected",
doc,
path,
t,
pk,
Expand All @@ -170,7 +175,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
for d_k, d_v in pv.items():
if not isinstance(d_k, str):
raise DictValidationException(
f"field '{pk}' with key {d_k} must be a string", path, t, pk, d_k
f"field '{pk}' with key {d_k} must be a string", doc, path, t, pk, d_k
)
verify_prop(f"{pk}[{d_k}]", d_v, d_v_t)
elif t is Any:
Expand All @@ -188,6 +193,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
raise DictValidationException(
f"field '{pk}' expects callable (function or class instance) but got "
f" '{pv}'. Mind that signatures are not validated",
doc,
path,
t,
pk,
Expand All @@ -203,6 +209,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
raise DictValidationException(
f"field '{pk}' expects class '{type_name}' but got instance of"
f" '{pv_type_name}'",
doc,
path,
t,
pk,
Expand All @@ -212,6 +219,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
type_name = get_type_name(t)
raise DictValidationException(
f"field '{pk}' has expected type '{type_name}' which lacks validator",
doc,
path,
t,
pk,
Expand Down
16 changes: 16 additions & 0 deletions tests/common/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Optional,
Union,
)
from dlt.common.configuration.specs import BaseConfiguration

from dlt.common import Decimal, jsonpath
from dlt.common.exceptions import DictValidationException
Expand Down Expand Up @@ -406,6 +407,21 @@ class TTestRecordClassUnion(TypedDict):
validate_dict(TTestRecordClassUnion, test_item_2, path=".")


def test_secrets_obfuscation() -> None:
class Config(TypedDict):
a: str
b: int

with pytest.raises(DictValidationException) as e:
validate_dict(Config, {"a": "123456", "b": {"c": "inner_value"}}, ".")
assert "inner_value" not in e.value.msg
assert "123456" not in e.value.msg
# NOTE: message before this change was:
# "field 'b' with value {'c': 'inner_value'} has invalid type 'dict' while 'int' is expected"
# "inner_value" is now obfuscated
assert "{'c': 'i*********e'}" in e.value.msg


# def test_union_merge() -> None:
# """Overriding fields is simply illegal in TypedDict"""
# class EndpointResource(TypedDict, total=False):
Expand Down
Loading