Skip to content

Commit

Permalink
logging fixes
Browse files Browse the repository at this point in the history
Signed-off-by: Andre Furlan <[email protected]>
  • Loading branch information
andrefurlan-db committed Feb 16, 2024
1 parent f4e1d30 commit 98d46de
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 5 deletions.
2 changes: 1 addition & 1 deletion examples/custom_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
logger = logging.getLogger("databricks.sql")
logger.setLevel(logging.DEBUG)
fh = logging.FileHandler("pysqllogs.log")
fh.setFormatter(logging.Formatter("%(asctime)s %(process)d %(thread)d %(message)s"))
fh.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(process)d %(thread)d %(message)s"))
fh.setLevel(logging.DEBUG)
logger.addHandler(fh)

Expand Down
32 changes: 32 additions & 0 deletions src/databricks/sql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,38 @@
threadsafety = 1 # Threads may share the module, but not connections.
paramstyle = "pyformat" # Python extended format codes, e.g. ...WHERE name=%(name)s

import re


class RedactUrlQueryParamsFilter(logging.Filter):
pattern = re.compile(r"(\?|&)([\w-]+)=([^&\s]+)")
mask = r"\1\2=<REDACTED>"

def __init__(self):
super().__init__()

def redact(self, string):
return re.sub(self.pattern, self.mask, str(string))

def filter(self, record):
record.msg = self.redact(str(record.msg))
if isinstance(record.args, dict):
for k in record.args.keys():
record.args[k] = (
self.redact(record.args[k])
if isinstance(record.arg[k], str)
else record.args[k]
)
else:
record.args = tuple(
(self.redact(arg) if isinstance(arg, str) else arg)
for arg in record.args
)

return True


logging.getLogger("urllib3.connectionpool").addFilter(RedactUrlQueryParamsFilter())

class DBAPITypeObject(object):
def __init__(self, *values):
Expand Down
12 changes: 8 additions & 4 deletions src/databricks/sql/cloudfetch/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import threading
import time
import os
from threading import get_ident
import re
from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -69,7 +69,7 @@ def is_file_download_successful(self) -> bool:

if not self.is_download_finished.wait(timeout=timeout):
self.is_download_timedout = True
logger.debug(
logger.error(
f"cloud fetch download timed out after {self.settings.download_timeout} seconds for link representing rows {self.result_link.startRowOffset} to {self.result_link.startRowOffset + self.result_link.rowCount}"
)
# there are some weird cases when the is_download_finished is not set, but the file is downloaded successfully
Expand Down Expand Up @@ -138,7 +138,7 @@ def run(self):
)
self.is_file_downloaded_successfully = success
except Exception as e:
logger.debug(
logger.error(
f"exception downloading file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
)
logger.error(e)
Expand Down Expand Up @@ -206,7 +206,10 @@ def decompress_data(compressed_data: bytes) -> bytes:

def http_get_with_retry(url, max_retries=5, backoff_factor=2, download_timeout=60):
attempts = 0
pattern = re.compile(r"(\?|&)([\w-]+)=([^&\s]+)")
mask = r"\1\2=<REDACTED>"

# TODO: introduce connection pooling. I am seeing weird errors without it.
while attempts < max_retries:
try:
session = requests.Session()
Expand All @@ -219,7 +222,8 @@ def http_get_with_retry(url, max_retries=5, backoff_factor=2, download_timeout=6
else:
logger.error(response)
except requests.RequestException as e:
print(f"request failed with exception: {e}")
# if this is not redacted, it will print the pre-signed URL
logger.error(f"request failed with exception: {re.sub(pattern, mask, str(e))}")
finally:
session.close()
# Exponential backoff before the next attempt
Expand Down

0 comments on commit 98d46de

Please sign in to comment.