diff --git a/schematic/__init__.py b/schematic/__init__.py index 12449e686..317f2a0ab 100644 --- a/schematic/__init__.py +++ b/schematic/__init__.py @@ -40,6 +40,10 @@ from schematic.loader import LOADER from schematic.version import __version__ from dotenv import load_dotenv +from schematic.utils.remove_sensitive_data_utils import ( + redact_string, + redacted_sensitive_data_in_exception, +) Synapse.allow_client_caching(False) logger = logging.getLogger(__name__) @@ -58,59 +62,6 @@ USER_AGENT |= USER_AGENT_LIBRARY -class FilterSensitiveData: - """A custom span processor that filters out sensitive data from the spans. - It filters out the data from the attributes and events of the spans. - - Args: - SpanProcessor (opentelemetry.sdk.trace.SpanProcessor): The base class that provides hooks for processing spans during their lifecycle - """ - - def __init__(self) -> None: - self.sensitive_patterns = { - "google_sheets": r"https://sheets\.googleapis\.com/v4/spreadsheets/[\w-]+" - } - - self._compiled_patterns = { - name: re.compile(pattern) - for name, pattern in self.sensitive_patterns.items() - } - - def _redact_string(self, value: str) -> str: - """remove sensitive data from a string - - Args: - value (str): a string that may contain sensitive data - - Returns: - str: remove sensitive data from string - """ - redacted = value - for pattern_name, pattern in self._compiled_patterns.items(): - redacted = pattern.sub(f"[REDACTED_{pattern_name.upper()}]", redacted) - return redacted - - def redacted_sensitive_data_in_exception( - self, exception_attributes: Dict[str, str] - ) -> Dict[str, str]: - """remove sensitive data in exception - - Args: - exception_attributes (dict):a dictionary of exception attributes - - Returns: - dict: a dictionary of exception attributes with sensitive data redacted - """ - redacted_exception_attributes = {} - for key, value in exception_attributes.items(): - # remove sensitive information from exception message and stacktrace - if key == "exception.message" or key == "exception.stacktrace": - redacted_exception_attributes[key] = self._redact_string(value) - else: - redacted_exception_attributes[key] = value - return redacted_exception_attributes - - class AttributePropagatingSpanProcessor(SpanProcessor): """A custom span processor that propagates specific attributes from the parent span to the child span when the child span is started. @@ -277,11 +228,9 @@ def _readable_span_alternate(self: SpanSdk) -> ReadableSpan: redacted_events = [] for event in self._events: attributes = event.attributes - redacted_event_attributes = ( - FilterSensitiveData().redacted_sensitive_data_in_exception(attributes) - ) + redacted_event_attributes = redacted_sensitive_data_in_exception(attributes) redacted_event = Event( - name=FilterSensitiveData()._redact_string(event.name), + name=redact_string(event.name), attributes=redacted_event_attributes, timestamp=event.timestamp, ) diff --git a/schematic/utils/remove_sensitive_data_utils.py b/schematic/utils/remove_sensitive_data_utils.py new file mode 100644 index 000000000..2dbe59363 --- /dev/null +++ b/schematic/utils/remove_sensitive_data_utils.py @@ -0,0 +1,44 @@ +from typing import Dict +import re + + +def redact_string(value: str) -> str: + """remove sensitive data from a string + + Args: + value (str): a string that may contain sensitive data + + Returns: + str: remove sensitive data from string + """ + sensitive_patterns = { + "google_sheets": r"https://sheets\.googleapis\.com/v4/spreadsheets/[\w-]+" + } + _compiled_patterns = { + name: re.compile(pattern) for name, pattern in sensitive_patterns.items() + } + redacted = value + for pattern_name, pattern in _compiled_patterns.items(): + redacted = pattern.sub(f"[REDACTED_{pattern_name.upper()}]", redacted) + return redacted + + +def redacted_sensitive_data_in_exception( + exception_attributes: Dict[str, str] +) -> Dict[str, str]: + """remove sensitive data in exception + + Args: + exception_attributes (dict):a dictionary of exception attributes + + Returns: + dict: a dictionary of exception attributes with sensitive data redacted + """ + redacted_exception_attributes = {} + for key, value in exception_attributes.items(): + # remove sensitive information from exception message and stacktrace + if key == "exception.message" or key == "exception.stacktrace": + redacted_exception_attributes[key] = redact_string(value) + else: + redacted_exception_attributes[key] = value + return redacted_exception_attributes diff --git a/tests/unit/test_filter_sensitive_data.py b/tests/unit/test_filter_sensitive_data.py new file mode 100644 index 000000000..6f7ec36e6 --- /dev/null +++ b/tests/unit/test_filter_sensitive_data.py @@ -0,0 +1,33 @@ +from schematic.utils.remove_sensitive_data_utils import ( + redact_string, + redacted_sensitive_data_in_exception, +) + + +class TestFilterSensitiveData: + def test_redact_string(self) -> None: + # given a string with sensitive data, make sure that they are redacted + sensitive_data = "googleapiclient.errors.HttpError: " + redacted_data = redact_string(sensitive_data) + assert ( + redacted_data + == "googleapiclient.errors.HttpError: " + ) + + def test_redacted_sensitive_data_in_exception(self) -> None: + # given a dictionary of exception attributes, make sure that sensitive data is redacted + exception_attributes = { + "exception.message": "googleapiclient.errors.HttpError: ", + "exception.stacktrace": 'Traceback (most recent call last):\n File "", line 1, in \n File "", line 1, in \n File "/usr/local/lib/python3.7/dist-packages/googleapiclient/_helpers.py", line 134, in positional_wrapper\n return wrapped(*args, **kwargs)\n File "/usr/local/lib/python3.7/dist-packages/googleapiclient/http.py", line 905, in execute\n raise HttpError(resp, content, uri=self.uri)\ngoogleapiclient.errors.HttpError: ', + } + redacted_exception_attributes = redacted_sensitive_data_in_exception( + exception_attributes + ) + assert ( + redacted_exception_attributes["exception.message"] + == "googleapiclient.errors.HttpError: " + ) + assert ( + redacted_exception_attributes["exception.stacktrace"] + == 'Traceback (most recent call last):\n File "", line 1, in \n File "", line 1, in \n File "/usr/local/lib/python3.7/dist-packages/googleapiclient/_helpers.py", line 134, in positional_wrapper\n return wrapped(*args, **kwargs)\n File "/usr/local/lib/python3.7/dist-packages/googleapiclient/http.py", line 905, in execute\n raise HttpError(resp, content, uri=self.uri)\ngoogleapiclient.errors.HttpError: ' + )