Added log_util.py for creating structured logs for log-based metric.

datacommonsorg · Jan 29, 2025 · 74a4611 · 74a4611
1 parent d7d7302
commit 74a4611
Show file tree

Hide file tree

Showing 2 changed files with 60 additions and 14 deletions.
diff --git a/import-automation/executor/main.py b/import-automation/executor/main.py
@@ -17,6 +17,7 @@
 import logging
 import json
 import os
+import sys
 import time
 
 from absl import flags
@@ -28,14 +29,20 @@
 from app.service import github_api
 from app.service import email_notifier
 
+REPO_DIR = os.path.dirname(
+    os.path.dirname(
+        os.path.dirname(
+            os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
+sys.path.append(os.path.join(REPO_DIR, 'util'))
+
+from log_util import log_metric
+
 FLAGS = flags.FLAGS
 flags.DEFINE_string('import_name', '', 'Absoluate import name.')
 flags.DEFINE_string('import_config', '', 'Import executor configuration.')
 
 CLOUD_RUN_JOB_NAME = os.getenv("CLOUD_RUN_JOB")
-# The `log_type` label helps filter log lines, which is useful for creating
-# log-based metrics.  Each log type has a similar set of fields for easier parsing.
-LOG_TYPE_LABEL = "log_type"
+
 # log_type for capturing status of auto import cloud run jobs.
 # Required fields - log_type, message, status, latency_secs.
 AUTO_IMPORT_JOB_STATUS_LOG_TYPE = "auto-import-job-status"
@@ -67,17 +74,12 @@ def scheduled_updates(absolute_import_name: str, import_config: str):
     elapsed_time_secs = int(time.time() - start_time)
     message = (f"Cloud Run Job [{CLOUD_RUN_JOB_NAME}] completed with status= "
                f"[{result.status}] in [{elapsed_time_secs}] seconds.)")
-    # With Python logging lib, json is interpreted as text (populates textPayload field).
-    # Using print to populate json as structured logs (populate jsonPayload field).
-    # Ref: https://cloud.google.com/functions/docs/monitoring/logging#writing_structured_logs
-    print(
-        json.dumps({
-            LOG_TYPE_LABEL: AUTO_IMPORT_JOB_STATUS_LOG_TYPE,
-            "message": message,
-            "severity": "INFO" if result.status == 'succeeded' else "ERROR",
-            "status": result.status,
-            "latency_secs": elapsed_time_secs,
-        }))
+
+    log_metric(AUTO_IMPORT_JOB_STATUS_LOG_TYPE,
+               "INFO" if result.status == 'succeeded' else "ERROR", message, {
+                   "status": result.status,
+                   "latency_secs": elapsed_time_secs,
+               })
     if result.status == 'failed':
         return 1
     return 0

diff --git a/util/log_util.py b/util/log_util.py
@@ -0,0 +1,44 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utility functions for logging."""
+
+import json
+
+
+def log_struct(level: str, message: str, labels: dict):
+    """Logs a structured message. In GCP Log Explorer, the labels will appear under `jsonPayload`.
+    
+    Args:
+        level: Log level (e.g., "INFO", "WARNING", "ERROR").
+        message: Log message.
+        labels: Additional labels to include in the log.
+    """
+    # With Python logging lib, json is interpreted as text (populates textPayload field).
+    # Using print to populate json as structured logs (populate jsonPayload field).
+    # Ref: https://cloud.google.com/functions/docs/monitoring/logging#writing_structured_logs
+    print(json.dumps({"message": message, "severity": level, **labels}))
+
+
+def log_metric(log_type: str, level: str, message: str, metric_labels: dict):
+    """Logs a structured message which can be used to create a log-based metric.
+        In GCP Log Explorer, the labels will appear under `jsonPayload`.
+
+    Args:
+        log_type: Log type identifier; used for filtering log lines when creating log-based metrics.
+            Published as `log_type` label in structured log.
+        level: Log level ("INFO", "WARNING", "ERROR").
+        message: Log message.
+        metric_labels: Labels for the metric.
+    """
+    log_struct(level, message, {"log_type": log_type, **metric_labels})