Skip to content

Commit

Permalink
add test; move function to util
Browse files Browse the repository at this point in the history
  • Loading branch information
linglp committed Feb 13, 2025
1 parent 9a24156 commit c7872de
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 57 deletions.
63 changes: 6 additions & 57 deletions schematic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@
from schematic.loader import LOADER
from schematic.version import __version__
from dotenv import load_dotenv
from schematic.utils.remove_sensitive_data_utils import (
redact_string,
redacted_sensitive_data_in_exception,
)

Synapse.allow_client_caching(False)
logger = logging.getLogger(__name__)
Expand All @@ -58,59 +62,6 @@
USER_AGENT |= USER_AGENT_LIBRARY


class FilterSensitiveData:
"""A custom span processor that filters out sensitive data from the spans.
It filters out the data from the attributes and events of the spans.
Args:
SpanProcessor (opentelemetry.sdk.trace.SpanProcessor): The base class that provides hooks for processing spans during their lifecycle
"""

def __init__(self) -> None:
self.sensitive_patterns = {
"google_sheets": r"https://sheets\.googleapis\.com/v4/spreadsheets/[\w-]+"
}

self._compiled_patterns = {
name: re.compile(pattern)
for name, pattern in self.sensitive_patterns.items()
}

def _redact_string(self, value: str) -> str:
"""remove sensitive data from a string
Args:
value (str): a string that may contain sensitive data
Returns:
str: remove sensitive data from string
"""
redacted = value
for pattern_name, pattern in self._compiled_patterns.items():
redacted = pattern.sub(f"[REDACTED_{pattern_name.upper()}]", redacted)
return redacted

def redacted_sensitive_data_in_exception(
self, exception_attributes: Dict[str, str]
) -> Dict[str, str]:
"""remove sensitive data in exception
Args:
exception_attributes (dict):a dictionary of exception attributes
Returns:
dict: a dictionary of exception attributes with sensitive data redacted
"""
redacted_exception_attributes = {}
for key, value in exception_attributes.items():
# remove sensitive information from exception message and stacktrace
if key == "exception.message" or key == "exception.stacktrace":
redacted_exception_attributes[key] = self._redact_string(value)
else:
redacted_exception_attributes[key] = value
return redacted_exception_attributes


class AttributePropagatingSpanProcessor(SpanProcessor):
"""A custom span processor that propagates specific attributes from the parent span
to the child span when the child span is started.
Expand Down Expand Up @@ -277,11 +228,9 @@ def _readable_span_alternate(self: SpanSdk) -> ReadableSpan:
redacted_events = []
for event in self._events:
attributes = event.attributes
redacted_event_attributes = (
FilterSensitiveData().redacted_sensitive_data_in_exception(attributes)
)
redacted_event_attributes = redacted_sensitive_data_in_exception(attributes)
redacted_event = Event(
name=FilterSensitiveData()._redact_string(event.name),
name=redact_string(event.name),
attributes=redacted_event_attributes,
timestamp=event.timestamp,
)
Expand Down
44 changes: 44 additions & 0 deletions schematic/utils/remove_sensitive_data_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from typing import Dict
import re


def redact_string(value: str) -> str:
"""remove sensitive data from a string
Args:
value (str): a string that may contain sensitive data
Returns:
str: remove sensitive data from string
"""
sensitive_patterns = {
"google_sheets": r"https://sheets\.googleapis\.com/v4/spreadsheets/[\w-]+"
}
_compiled_patterns = {
name: re.compile(pattern) for name, pattern in sensitive_patterns.items()
}
redacted = value
for pattern_name, pattern in _compiled_patterns.items():
redacted = pattern.sub(f"[REDACTED_{pattern_name.upper()}]", redacted)
return redacted


def redacted_sensitive_data_in_exception(
exception_attributes: Dict[str, str]
) -> Dict[str, str]:
"""remove sensitive data in exception
Args:
exception_attributes (dict):a dictionary of exception attributes
Returns:
dict: a dictionary of exception attributes with sensitive data redacted
"""
redacted_exception_attributes = {}
for key, value in exception_attributes.items():
# remove sensitive information from exception message and stacktrace
if key == "exception.message" or key == "exception.stacktrace":
redacted_exception_attributes[key] = redact_string(value)
else:
redacted_exception_attributes[key] = value
return redacted_exception_attributes
33 changes: 33 additions & 0 deletions tests/unit/test_filter_sensitive_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from schematic.utils.remove_sensitive_data_utils import (
redact_string,
redacted_sensitive_data_in_exception,
)


class TestFilterSensitiveData:
def test_redact_string(self) -> None:
# given a string with sensitive data, make sure that they are redacted
sensitive_data = "googleapiclient.errors.HttpError: <HttpError 400 when requesting https://sheets.googleapis.com/v4/spreadsheets/11234budyhf:batchUpdate?fields=%2A&alt=json returned abc>"
redacted_data = redact_string(sensitive_data)
assert (
redacted_data
== "googleapiclient.errors.HttpError: <HttpError 400 when requesting [REDACTED_GOOGLE_SHEETS]:batchUpdate?fields=%2A&alt=json returned abc>"
)

def test_redacted_sensitive_data_in_exception(self) -> None:
# given a dictionary of exception attributes, make sure that sensitive data is redacted
exception_attributes = {
"exception.message": "googleapiclient.errors.HttpError: <HttpError 400 when requesting https://sheets.googleapis.com/v4/spreadsheets/11234budyhf:batchUpdate?fields=%2A&alt=json returned>",
"exception.stacktrace": 'Traceback (most recent call last):\n File "<stdin>", line 1, in <module>\n File "<string>", line 1, in <module>\n File "/usr/local/lib/python3.7/dist-packages/googleapiclient/_helpers.py", line 134, in positional_wrapper\n return wrapped(*args, **kwargs)\n File "/usr/local/lib/python3.7/dist-packages/googleapiclient/http.py", line 905, in execute\n raise HttpError(resp, content, uri=self.uri)\ngoogleapiclient.errors.HttpError: <HttpError 400 when requesting https://sheets.googleapis.com/v4/spreadsheets/11234budyhf:batchUpdate?fields=%2A&alt=json returned>',
}
redacted_exception_attributes = redacted_sensitive_data_in_exception(
exception_attributes
)
assert (
redacted_exception_attributes["exception.message"]
== "googleapiclient.errors.HttpError: <HttpError 400 when requesting [REDACTED_GOOGLE_SHEETS]:batchUpdate?fields=%2A&alt=json returned>"
)
assert (
redacted_exception_attributes["exception.stacktrace"]
== 'Traceback (most recent call last):\n File "<stdin>", line 1, in <module>\n File "<string>", line 1, in <module>\n File "/usr/local/lib/python3.7/dist-packages/googleapiclient/_helpers.py", line 134, in positional_wrapper\n return wrapped(*args, **kwargs)\n File "/usr/local/lib/python3.7/dist-packages/googleapiclient/http.py", line 905, in execute\n raise HttpError(resp, content, uri=self.uri)\ngoogleapiclient.errors.HttpError: <HttpError 400 when requesting [REDACTED_GOOGLE_SHEETS]:batchUpdate?fields=%2A&alt=json returned>'
)

0 comments on commit c7872de

Please sign in to comment.