Skip to content

Commit 98d46de

Browse files
logging fixes
Signed-off-by: Andre Furlan <[email protected]>
1 parent f4e1d30 commit 98d46de

File tree

3 files changed

+41
-5
lines changed

3 files changed

+41
-5
lines changed

examples/custom_logger.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
logger = logging.getLogger("databricks.sql")
77
logger.setLevel(logging.DEBUG)
88
fh = logging.FileHandler("pysqllogs.log")
9-
fh.setFormatter(logging.Formatter("%(asctime)s %(process)d %(thread)d %(message)s"))
9+
fh.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(process)d %(thread)d %(message)s"))
1010
fh.setLevel(logging.DEBUG)
1111
logger.addHandler(fh)
1212

src/databricks/sql/__init__.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,38 @@
77
threadsafety = 1 # Threads may share the module, but not connections.
88
paramstyle = "pyformat" # Python extended format codes, e.g. ...WHERE name=%(name)s
99

10+
import re
11+
12+
13+
class RedactUrlQueryParamsFilter(logging.Filter):
14+
pattern = re.compile(r"(\?|&)([\w-]+)=([^&\s]+)")
15+
mask = r"\1\2=<REDACTED>"
16+
17+
def __init__(self):
18+
super().__init__()
19+
20+
def redact(self, string):
21+
return re.sub(self.pattern, self.mask, str(string))
22+
23+
def filter(self, record):
24+
record.msg = self.redact(str(record.msg))
25+
if isinstance(record.args, dict):
26+
for k in record.args.keys():
27+
record.args[k] = (
28+
self.redact(record.args[k])
29+
if isinstance(record.arg[k], str)
30+
else record.args[k]
31+
)
32+
else:
33+
record.args = tuple(
34+
(self.redact(arg) if isinstance(arg, str) else arg)
35+
for arg in record.args
36+
)
37+
38+
return True
39+
40+
41+
logging.getLogger("urllib3.connectionpool").addFilter(RedactUrlQueryParamsFilter())
1042

1143
class DBAPITypeObject(object):
1244
def __init__(self, *values):

src/databricks/sql/cloudfetch/downloader.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import threading
66
import time
77
import os
8-
from threading import get_ident
8+
import re
99
from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
1010

1111
logger = logging.getLogger(__name__)
@@ -69,7 +69,7 @@ def is_file_download_successful(self) -> bool:
6969

7070
if not self.is_download_finished.wait(timeout=timeout):
7171
self.is_download_timedout = True
72-
logger.debug(
72+
logger.error(
7373
f"cloud fetch download timed out after {self.settings.download_timeout} seconds for link representing rows {self.result_link.startRowOffset} to {self.result_link.startRowOffset + self.result_link.rowCount}"
7474
)
7575
# there are some weird cases when the is_download_finished is not set, but the file is downloaded successfully
@@ -138,7 +138,7 @@ def run(self):
138138
)
139139
self.is_file_downloaded_successfully = success
140140
except Exception as e:
141-
logger.debug(
141+
logger.error(
142142
f"exception downloading file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
143143
)
144144
logger.error(e)
@@ -206,7 +206,10 @@ def decompress_data(compressed_data: bytes) -> bytes:
206206

207207
def http_get_with_retry(url, max_retries=5, backoff_factor=2, download_timeout=60):
208208
attempts = 0
209+
pattern = re.compile(r"(\?|&)([\w-]+)=([^&\s]+)")
210+
mask = r"\1\2=<REDACTED>"
209211

212+
# TODO: introduce connection pooling. I am seeing weird errors without it.
210213
while attempts < max_retries:
211214
try:
212215
session = requests.Session()
@@ -219,7 +222,8 @@ def http_get_with_retry(url, max_retries=5, backoff_factor=2, download_timeout=6
219222
else:
220223
logger.error(response)
221224
except requests.RequestException as e:
222-
print(f"request failed with exception: {e}")
225+
# if this is not redacted, it will print the pre-signed URL
226+
logger.error(f"request failed with exception: {re.sub(pattern, mask, str(e))}")
223227
finally:
224228
session.close()
225229
# Exponential backoff before the next attempt

0 commit comments

Comments
 (0)