From 27564b9c00232e3e12635fe5ea533f6bbf4550de Mon Sep 17 00:00:00 2001
From: knikure <knikure@amazon.com>
Date: Tue, 18 Jun 2024 14:41:24 +0000
Subject: [PATCH 1/2] Enable telemetry for estimator, predictor, and processing
 functions

---
 src/sagemaker/base_predictor.py               |   3 +
 src/sagemaker/estimator.py                    |   4 +
 src/sagemaker/processing.py                   |   4 +
 src/sagemaker/telemetry/constants.py          |   3 +
 src/sagemaker/telemetry/telemetry_logging.py  | 158 ++++++++++--------
 .../sagemaker/huggingface/test_estimator.py   |   2 +-
 .../sagemaker/huggingface/test_processing.py  |  14 +-
 .../sagemaker/local/test_local_pipeline.py    |   1 +
 .../test_huggingface_pytorch_compiler.py      |   8 +-
 .../test_huggingface_tensorflow_compiler.py   |   6 +-
 .../test_pytorch_compiler.py                  |   8 +-
 .../test_tensorflow_compiler.py               |   8 +-
 .../workflow/test_processing_step.py          |  49 ++++--
 tests/unit/test_chainer.py                    |   2 +-
 tests/unit/test_estimator.py                  |   1 +
 tests/unit/test_mxnet.py                      |   2 +-
 tests/unit/test_predictor.py                  |  32 +++-
 tests/unit/test_processing.py                 |  62 +++++--
 tests/unit/test_pytorch.py                    |   2 +-
 tests/unit/test_rl.py                         |   2 +-
 tests/unit/test_sklearn.py                    |   2 +-
 tests/unit/test_xgboost.py                    |   6 +-
 22 files changed, 242 insertions(+), 137 deletions(-)

diff --git a/src/sagemaker/base_predictor.py b/src/sagemaker/base_predictor.py
index 1a7eea9cd7..d4dcc269ba 100644
--- a/src/sagemaker/base_predictor.py
+++ b/src/sagemaker/base_predictor.py
@@ -62,6 +62,8 @@
 from sagemaker.compute_resource_requirements.resource_requirements import (
     ResourceRequirements,
 )
+from sagemaker.telemetry.telemetry_logging import _telemetry_emitter
+from sagemaker.telemetry.constants import Feature
 
 LOGGER = logging.getLogger("sagemaker")
 
@@ -145,6 +147,7 @@ def __init__(
         self._content_type = None
         self._accept = None
 
+    @_telemetry_emitter(Feature.PREDICTOR, "sagemaker.predictor.predict")
     def predict(
         self,
         data,
diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
index b6af6cf5de..b37c8d563f 100644
--- a/src/sagemaker/estimator.py
+++ b/src/sagemaker/estimator.py
@@ -105,6 +105,8 @@
 from sagemaker.workflow.entities import PipelineVariable
 from sagemaker.workflow.parameters import ParameterString
 from sagemaker.workflow.pipeline_context import PipelineSession, runnable_by_pipeline
+from sagemaker.telemetry.telemetry_logging import _telemetry_emitter
+from sagemaker.telemetry.constants import Feature
 
 logger = logging.getLogger(__name__)
 
@@ -1272,6 +1274,7 @@ def latest_job_profiler_artifacts_path(self):
             )
         return None
 
+    @_telemetry_emitter(Feature.ESTIMATOR, "sagemaker.estimator.fit")
     @runnable_by_pipeline
     def fit(
         self,
@@ -1527,6 +1530,7 @@ def logs(self):
         """
         self.sagemaker_session.logs_for_job(self.latest_training_job.name, wait=True)
 
+    @_telemetry_emitter(Feature.ESTIMATOR, "sagemaker.estimator.deploy")
     def deploy(
         self,
         initial_instance_count=None,
diff --git a/src/sagemaker/processing.py b/src/sagemaker/processing.py
index 7b16e3cba3..ee759e8bc5 100644
--- a/src/sagemaker/processing.py
+++ b/src/sagemaker/processing.py
@@ -62,6 +62,8 @@
 from sagemaker.dataset_definition.inputs import S3Input, DatasetDefinition
 from sagemaker.apiutils._base_types import ApiObject
 from sagemaker.s3 import S3Uploader
+from sagemaker.telemetry.telemetry_logging import _telemetry_emitter
+from sagemaker.telemetry.constants import Feature
 
 logger = logging.getLogger(__name__)
 
@@ -201,6 +203,7 @@ def __init__(
             env, PROCESSING_JOB_ENVIRONMENT_PATH, sagemaker_session=self.sagemaker_session
         )
 
+    @_telemetry_emitter(Feature.PROCESSING, "processing.run")
     @runnable_by_pipeline
     def run(
         self,
@@ -616,6 +619,7 @@ def get_run_args(
         )
         return RunArgs(code=code, inputs=inputs, outputs=outputs, arguments=arguments)
 
+    @_telemetry_emitter(Feature.PROCESSING, "processing.run")
     @runnable_by_pipeline
     def run(
         self,
diff --git a/src/sagemaker/telemetry/constants.py b/src/sagemaker/telemetry/constants.py
index 332d706351..e5c994a491 100644
--- a/src/sagemaker/telemetry/constants.py
+++ b/src/sagemaker/telemetry/constants.py
@@ -25,6 +25,9 @@ class Feature(Enum):
     SDK_DEFAULTS = 1
     LOCAL_MODE = 2
     REMOTE_FUNCTION = 3
+    ESTIMATOR = 4
+    PREDICTOR = 5
+    PROCESSING = 6
 
     def __str__(self):  # pylint: disable=E0307
         """Return the feature name."""
diff --git a/src/sagemaker/telemetry/telemetry_logging.py b/src/sagemaker/telemetry/telemetry_logging.py
index d2b91a321c..d29f15530c 100644
--- a/src/sagemaker/telemetry/telemetry_logging.py
+++ b/src/sagemaker/telemetry/telemetry_logging.py
@@ -52,6 +52,9 @@
     str(Feature.SDK_DEFAULTS): 1,
     str(Feature.LOCAL_MODE): 2,
     str(Feature.REMOTE_FUNCTION): 3,
+    str(Feature.ESTIMATOR): 4,
+    str(Feature.PREDICTOR): 5,
+    str(Feature.PROCESSING): 6,
 }
 
 STATUS_TO_CODE = {
@@ -66,89 +69,26 @@ def _telemetry_emitter(feature: str, func_name: str):
     def decorator(func):
         @functools.wraps(func)
         def wrapper(*args, **kwargs):
+            self_instance = None
             sagemaker_session = None
+            func_name_derived = None
             if len(args) > 0 and hasattr(args[0], "sagemaker_session"):
                 # Get the sagemaker_session from the instance method args
+                self_instance = args[0]
                 sagemaker_session = args[0].sagemaker_session
+                if feature in (
+                    Feature.ESTIMATOR,
+                    Feature.PREDICTOR,
+                    Feature.PROCESSING,
+                ):
+                    func_name_derived = f"{self_instance.__class__.__name__}.{func.__name__}"
             elif feature == Feature.REMOTE_FUNCTION:
                 # Get the sagemaker_session from the function keyword arguments for remote function
                 sagemaker_session = kwargs.get(
                     "sagemaker_session", _get_default_sagemaker_session()
                 )
 
-            if sagemaker_session:
-                logger.debug("sagemaker_session found, preparing to emit telemetry...")
-                logger.info(TELEMETRY_OPT_OUT_MESSAGING)
-                response = None
-                caught_ex = None
-                studio_app_type = process_studio_metadata_file()
-
-                # Check if telemetry is opted out
-                telemetry_opt_out_flag = resolve_value_from_config(
-                    direct_input=None,
-                    config_path=TELEMETRY_OPT_OUT_PATH,
-                    default_value=False,
-                    sagemaker_session=sagemaker_session,
-                )
-                logger.debug("TelemetryOptOut flag is set to: %s", telemetry_opt_out_flag)
-
-                # Construct the feature list to track feature combinations
-                feature_list: List[int] = [FEATURE_TO_CODE[str(feature)]]
-
-                if sagemaker_session.sagemaker_config and feature != Feature.SDK_DEFAULTS:
-                    feature_list.append(FEATURE_TO_CODE[str(Feature.SDK_DEFAULTS)])
-
-                if sagemaker_session.local_mode and feature != Feature.LOCAL_MODE:
-                    feature_list.append(FEATURE_TO_CODE[str(Feature.LOCAL_MODE)])
-
-                # Construct the extra info to track platform and environment usage metadata
-                extra = (
-                    f"{func_name}"
-                    f"&x-sdkVersion={SDK_VERSION}"
-                    f"&x-env={PYTHON_VERSION}"
-                    f"&x-sys={OS_NAME_VERSION}"
-                    f"&x-platform={studio_app_type}"
-                )
-
-                # Add endpoint ARN to the extra info if available
-                if sagemaker_session.endpoint_arn:
-                    extra += f"&x-endpointArn={sagemaker_session.endpoint_arn}"
-
-                start_timer = perf_counter()
-                try:
-                    # Call the original function
-                    response = func(*args, **kwargs)
-                    stop_timer = perf_counter()
-                    elapsed = stop_timer - start_timer
-                    extra += f"&x-latency={round(elapsed, 2)}"
-                    if not telemetry_opt_out_flag:
-                        _send_telemetry_request(
-                            STATUS_TO_CODE[str(Status.SUCCESS)],
-                            feature_list,
-                            sagemaker_session,
-                            None,
-                            None,
-                            extra,
-                        )
-                except Exception as e:  # pylint: disable=W0703
-                    stop_timer = perf_counter()
-                    elapsed = stop_timer - start_timer
-                    extra += f"&x-latency={round(elapsed, 2)}"
-                    if not telemetry_opt_out_flag:
-                        _send_telemetry_request(
-                            STATUS_TO_CODE[str(Status.FAILURE)],
-                            feature_list,
-                            sagemaker_session,
-                            str(e),
-                            e.__class__.__name__,
-                            extra,
-                        )
-                    caught_ex = e
-                finally:
-                    if caught_ex:
-                        raise caught_ex
-                    return response  # pylint: disable=W0150
-            else:
+            if not sagemaker_session:
                 logger.debug(
                     "Unable to send telemetry for function %s. "
                     "sagemaker_session is not provided or not valid.",
@@ -156,6 +96,78 @@ def wrapper(*args, **kwargs):
                 )
                 return func(*args, **kwargs)
 
+            logger.debug("sagemaker_session found, preparing to emit telemetry...")
+            logger.info(TELEMETRY_OPT_OUT_MESSAGING)
+            response = None
+            caught_ex = None
+            studio_app_type = process_studio_metadata_file()
+
+            # Check if telemetry is opted out
+            telemetry_opt_out_flag = resolve_value_from_config(
+                direct_input=None,
+                config_path=TELEMETRY_OPT_OUT_PATH,
+                default_value=False,
+                sagemaker_session=sagemaker_session,
+            )
+            logger.debug("TelemetryOptOut flag is set to: %s", telemetry_opt_out_flag)
+
+            # Construct the feature list to track feature combinations
+            feature_list: List[int] = [FEATURE_TO_CODE[str(feature)]]
+
+            if sagemaker_session.sagemaker_config and feature != Feature.SDK_DEFAULTS:
+                feature_list.append(FEATURE_TO_CODE[str(Feature.SDK_DEFAULTS)])
+
+            if sagemaker_session.local_mode and feature != Feature.LOCAL_MODE:
+                feature_list.append(FEATURE_TO_CODE[str(Feature.LOCAL_MODE)])
+
+            # Construct the extra info to track platform and environment usage metadata
+            extra = (
+                f"{func_name_derived if func_name_derived else func_name}"
+                f"&x-sdkVersion={SDK_VERSION}"
+                f"&x-env={PYTHON_VERSION}"
+                f"&x-sys={OS_NAME_VERSION}"
+                f"&x-platform={studio_app_type}"
+            )
+
+            # Add endpoint ARN to the extra info if available
+            if sagemaker_session.endpoint_arn:
+                extra += f"&x-endpointArn={sagemaker_session.endpoint_arn}"
+
+            start_timer = perf_counter()
+            try:
+                # Call the original function
+                response = func(*args, **kwargs)
+                stop_timer = perf_counter()
+                elapsed = stop_timer - start_timer
+                extra += f"&x-latency={round(elapsed, 2)}"
+                if not telemetry_opt_out_flag:
+                    _send_telemetry_request(
+                        STATUS_TO_CODE[str(Status.SUCCESS)],
+                        feature_list,
+                        sagemaker_session,
+                        None,
+                        None,
+                        extra,
+                    )
+            except Exception as e:  # pylint: disable=W0703
+                stop_timer = perf_counter()
+                elapsed = stop_timer - start_timer
+                extra += f"&x-latency={round(elapsed, 2)}"
+                if not telemetry_opt_out_flag:
+                    _send_telemetry_request(
+                        STATUS_TO_CODE[str(Status.FAILURE)],
+                        feature_list,
+                        sagemaker_session,
+                        str(e),
+                        e.__class__.__name__,
+                        extra,
+                    )
+                caught_ex = e
+            finally:
+                if caught_ex:
+                    raise caught_ex
+                return response  # pylint: disable=W0150
+
         return wrapper
 
     return decorator
diff --git a/tests/unit/sagemaker/huggingface/test_estimator.py b/tests/unit/sagemaker/huggingface/test_estimator.py
index 3ad641a321..d6cf88d811 100644
--- a/tests/unit/sagemaker/huggingface/test_estimator.py
+++ b/tests/unit/sagemaker/huggingface/test_estimator.py
@@ -241,7 +241,7 @@ def test_huggingface(
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(
         huggingface_training_version, f"pytorch{huggingface_pytorch_training_version}"
diff --git a/tests/unit/sagemaker/huggingface/test_processing.py b/tests/unit/sagemaker/huggingface/test_processing.py
index 491f4ab5df..c5e51e50e0 100644
--- a/tests/unit/sagemaker/huggingface/test_processing.py
+++ b/tests/unit/sagemaker/huggingface/test_processing.py
@@ -13,11 +13,13 @@
 from __future__ import absolute_import
 
 import pytest
-from mock import Mock, patch, MagicMock
+from mock import Mock, patch, MagicMock, mock_open
+import json
 
 from sagemaker.huggingface.processing import HuggingFaceProcessor
 from sagemaker.fw_utils import UploadedCode
 from sagemaker.session_settings import SessionSettings
+from sagemaker.user_agent import process_studio_metadata_file
 
 from .huggingface_utils import get_full_gpu_image_uri, GPU_INSTANCE_TYPE, REGION
 
@@ -64,6 +66,16 @@ def uploaded_code(
     return UploadedCode(s3_prefix=s3_prefix, script_name=script_name)
 
 
+@pytest.fixture(autouse=True)
+def mock_process_studio_metadata_file(tmp_path):
+    studio_file = tmp_path / "resource-metadata.json"
+    studio_file.write_text(json.dumps({"AppType": "TestAppType"}))
+
+    with patch("os.path.exists", return_value=True):
+        with patch("sagemaker.user_agent.open", mock_open(read_data=studio_file.read_text())):
+            yield process_studio_metadata_file
+
+
 @patch("sagemaker.utils._botocore_resolver")
 @patch("os.path.exists", return_value=True)
 @patch("os.path.isfile", return_value=True)
diff --git a/tests/unit/sagemaker/local/test_local_pipeline.py b/tests/unit/sagemaker/local/test_local_pipeline.py
index 548a70027b..c999322b18 100644
--- a/tests/unit/sagemaker/local/test_local_pipeline.py
+++ b/tests/unit/sagemaker/local/test_local_pipeline.py
@@ -171,6 +171,7 @@ def local_sagemaker_session(boto_session):
     # For tests which doesn't verify config file injection, operate with empty config
 
     local_session_mock.sagemaker_config = {}
+    local_session_mock.endpoint_arn = None
     return local_session_mock
 
 
diff --git a/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py b/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py
index e1c21cf662..fee26668d2 100644
--- a/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py
+++ b/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py
@@ -402,7 +402,7 @@ def test_pytorchxla_distribution(
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(
         huggingface_training_compiler_version,
@@ -463,7 +463,7 @@ def test_default_compiler_config(
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(
         huggingface_training_compiler_version,
@@ -519,7 +519,7 @@ def test_debug_compiler_config(
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(
         huggingface_training_compiler_version,
@@ -575,7 +575,7 @@ def test_disable_compiler_config(
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(
         huggingface_training_compiler_version,
diff --git a/tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py b/tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py
index e0d172f6e0..8bfb694040 100644
--- a/tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py
+++ b/tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py
@@ -349,7 +349,7 @@ def test_default_compiler_config(
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(
         huggingface_training_compiler_version,
@@ -407,7 +407,7 @@ def test_debug_compiler_config(
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(
         huggingface_training_compiler_version,
@@ -465,7 +465,7 @@ def test_disable_compiler_config(
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(
         huggingface_training_compiler_version,
diff --git a/tests/unit/sagemaker/training_compiler/test_pytorch_compiler.py b/tests/unit/sagemaker/training_compiler/test_pytorch_compiler.py
index 34a1236a7f..cac31313cd 100644
--- a/tests/unit/sagemaker/training_compiler/test_pytorch_compiler.py
+++ b/tests/unit/sagemaker/training_compiler/test_pytorch_compiler.py
@@ -344,7 +344,7 @@ def test_pytorchxla_distribution(
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(
         pytorch_training_compiler_version,
@@ -403,7 +403,7 @@ def test_default_compiler_config(
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(
         pytorch_training_compiler_version,
@@ -458,7 +458,7 @@ def test_debug_compiler_config(
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(
         pytorch_training_compiler_version,
@@ -513,7 +513,7 @@ def test_disable_compiler_config(
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(
         pytorch_training_compiler_version,
diff --git a/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py b/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py
index ac42bb53ab..6c76558568 100644
--- a/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py
+++ b/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py
@@ -289,7 +289,7 @@ def test_default(
         sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
         assert sagemaker_call_names == ["train", "logs_for_job"]
         boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-        assert boto_call_names == ["resource"]
+        assert boto_call_names == ["resource", "client"]
 
         expected_train_args = _create_train_job(
             tensorflow_training_version,
@@ -348,7 +348,7 @@ def test_byoc(
         sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
         assert sagemaker_call_names == ["train", "logs_for_job"]
         boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-        assert boto_call_names == ["resource"]
+        assert boto_call_names == ["resource", "client"]
 
         expected_train_args = _create_train_job(
             tensorflow_training_version,
@@ -399,7 +399,7 @@ def test_debug_compiler_config(
         sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
         assert sagemaker_call_names == ["train", "logs_for_job"]
         boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-        assert boto_call_names == ["resource"]
+        assert boto_call_names == ["resource", "client"]
 
         expected_train_args = _create_train_job(
             tensorflow_training_version,
@@ -450,7 +450,7 @@ def test_disable_compiler_config(
         sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
         assert sagemaker_call_names == ["train", "logs_for_job"]
         boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-        assert boto_call_names == ["resource"]
+        assert boto_call_names == ["resource", "client"]
 
         expected_train_args = _create_train_job(
             tensorflow_training_version,
diff --git a/tests/unit/sagemaker/workflow/test_processing_step.py b/tests/unit/sagemaker/workflow/test_processing_step.py
index 0dcd7c2495..785d532bdd 100644
--- a/tests/unit/sagemaker/workflow/test_processing_step.py
+++ b/tests/unit/sagemaker/workflow/test_processing_step.py
@@ -14,7 +14,7 @@
 
 import json
 import os
-from mock import patch
+from mock import patch, mock_open
 
 import pytest
 import warnings
@@ -56,6 +56,7 @@
 from sagemaker.network import NetworkConfig
 from sagemaker.pytorch.estimator import PyTorch
 from sagemaker import utils, Model
+from sagemaker.user_agent import process_studio_metadata_file
 
 from sagemaker.clarify import (
     SageMakerClarifyProcessor,
@@ -247,6 +248,15 @@ def network_config():
     )
 
 
+def mock_process_studio_metadata_file(tmp_path):
+    studio_file = tmp_path / "resource-metadata.json"
+    studio_file.write_text(json.dumps({"AppType": "TestAppType"}))
+
+    with patch("os.path.exists", return_value=True):
+        with patch("sagemaker.user_agent.open", mock_open(read_data=studio_file.read_text())):
+            yield process_studio_metadata_file
+
+
 @pytest.mark.parametrize(
     "experiment_config, expected_experiment_config",
     [
@@ -1127,25 +1137,28 @@ def test_spark_processor_local_code(spark_processor, processing_input, pipeline_
 @patch("os.path.exists", return_value=True)
 @patch("os.path.isfile", return_value=True)
 def test_processor_with_role_as_pipeline_parameter(
-    exists_mock, isfile_mock, processor_args, pipeline_session
+    exists_mock, isfile_mock, tmp_path, processor_args, pipeline_session
 ):
-    processor, run_inputs = processor_args
-    processor.sagemaker_session = pipeline_session
-    processor.run(**run_inputs)
-
-    step_args = processor.run(**run_inputs)
-    step = ProcessingStep(
-        name="MyProcessingStep",
-        step_args=step_args,
-    )
-    pipeline = Pipeline(
-        name="MyPipeline",
-        steps=[step],
-        sagemaker_session=pipeline_session,
-    )
+    studio_file = tmp_path / "resource-metadata.json"
+    studio_file.write_text(json.dumps({"AppType": "TestApp"}))
+    with patch("builtins.open", mock_open(read_data=studio_file.read_text())):
+        processor, run_inputs = processor_args
+        processor.sagemaker_session = pipeline_session
+        processor.run(**run_inputs)
+
+        step_args = processor.run(**run_inputs)
+        step = ProcessingStep(
+            name="MyProcessingStep",
+            step_args=step_args,
+        )
+        pipeline = Pipeline(
+            name="MyPipeline",
+            steps=[step],
+            sagemaker_session=pipeline_session,
+        )
 
-    step_def = json.loads(pipeline.definition())["Steps"][0]
-    assert step_def["Arguments"]["RoleArn"] == {"Get": f"Parameters.{_PARAM_ROLE_NAME}"}
+        step_def = json.loads(pipeline.definition())["Steps"][0]
+        assert step_def["Arguments"]["RoleArn"] == {"Get": f"Parameters.{_PARAM_ROLE_NAME}"}
 
 
 @patch("sagemaker.workflow.utilities._pipeline_config", MOCKED_PIPELINE_CONFIG_WITH_CUSTOM_PREFIX)
diff --git a/tests/unit/test_chainer.py b/tests/unit/test_chainer.py
index 8ad2ae0bab..dfaf13812e 100644
--- a/tests/unit/test_chainer.py
+++ b/tests/unit/test_chainer.py
@@ -354,7 +354,7 @@ def test_chainer(strftime, time, sagemaker_session, chainer_version, chainer_py_
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(chainer_version, chainer_py_version)
     expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
index b557a9c9f0..d00667cd60 100644
--- a/tests/unit/test_estimator.py
+++ b/tests/unit/test_estimator.py
@@ -2354,6 +2354,7 @@ def test_local_code_location():
         local_mode=True,
         spec=sagemaker.local.LocalSession,
         settings=SessionSettings(),
+        endpoint_arn=None,
     )
 
     sms.sagemaker_config = {}
diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py
index 4a584dfae4..dbb11cd63e 100644
--- a/tests/unit/test_mxnet.py
+++ b/tests/unit/test_mxnet.py
@@ -360,7 +360,7 @@ def test_mxnet(
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     actual_train_args = sagemaker_session.method_calls[0][2]
     job_name = actual_train_args["job_name"]
diff --git a/tests/unit/test_predictor.py b/tests/unit/test_predictor.py
index 1e4f6d0f0a..7400ea50a3 100644
--- a/tests/unit/test_predictor.py
+++ b/tests/unit/test_predictor.py
@@ -65,7 +65,9 @@ def empty_sagemaker_session():
     return ims
 
 
-def test_predict_call_pass_through():
+@patch("sagemaker.telemetry.telemetry_logging.resolve_value_from_config")
+def test_predict_call_pass_through(mock_resolve_value_from_config):
+    mock_resolve_value_from_config.return_value = True
     sagemaker_session = empty_sagemaker_session()
     predictor = Predictor(ENDPOINT, sagemaker_session)
 
@@ -89,7 +91,9 @@ def test_predict_call_pass_through():
     assert result == RETURN_VALUE
 
 
-def test_predict_call_with_target_variant():
+@patch("sagemaker.telemetry.telemetry_logging.resolve_value_from_config")
+def test_predict_call_with_target_variant(mock_resolve_value_from_config):
+    mock_resolve_value_from_config.return_value = True
     sagemaker_session = empty_sagemaker_session()
     predictor = Predictor(ENDPOINT, sagemaker_session)
 
@@ -112,7 +116,9 @@ def test_predict_call_with_target_variant():
     assert result == RETURN_VALUE
 
 
-def test_predict_call_with_inference_id():
+@patch("sagemaker.telemetry.telemetry_logging.resolve_value_from_config")
+def test_predict_call_with_inference_id(mock_resolve_value_from_config):
+    mock_resolve_value_from_config.return_value = True
     sagemaker_session = empty_sagemaker_session()
     predictor = Predictor(ENDPOINT, sagemaker_session)
 
@@ -135,7 +141,9 @@ def test_predict_call_with_inference_id():
     assert result == RETURN_VALUE
 
 
-def test_multi_model_predict_call():
+@patch("sagemaker.telemetry.telemetry_logging.resolve_value_from_config")
+def test_multi_model_predict_call(mock_resolve_value_from_config):
+    mock_resolve_value_from_config.return_value = True
     sagemaker_session = empty_sagemaker_session()
     predictor = Predictor(ENDPOINT, sagemaker_session)
 
@@ -180,7 +188,9 @@ def json_sagemaker_session():
     return ims
 
 
-def test_predict_call_with_json():
+@patch("sagemaker.telemetry.telemetry_logging.resolve_value_from_config")
+def test_predict_call_with_json(mock_resolve_value_from_config):
+    mock_resolve_value_from_config.return_value = True
     sagemaker_session = json_sagemaker_session()
     predictor = Predictor(ENDPOINT, sagemaker_session, serializer=JSONSerializer())
 
@@ -222,8 +232,10 @@ def ret_csv_sagemaker_session():
     return ims
 
 
-def test_predict_call_with_csv():
+@patch("sagemaker.telemetry.telemetry_logging.resolve_value_from_config")
+def test_predict_call_with_csv(mock_resolve_value_from_config):
     sagemaker_session = ret_csv_sagemaker_session()
+    mock_resolve_value_from_config.return_value = True
     predictor = Predictor(
         ENDPOINT, sagemaker_session, serializer=CSVSerializer(), deserializer=CSVDeserializer()
     )
@@ -245,8 +257,10 @@ def test_predict_call_with_csv():
     assert result == [["1", "2", "3"]]
 
 
-def test_predict_call_with_multiple_accept_types():
+@patch("sagemaker.telemetry.telemetry_logging.resolve_value_from_config")
+def test_predict_call_with_multiple_accept_types(mock_resolve_value_from_config):
     sagemaker_session = ret_csv_sagemaker_session()
+    mock_resolve_value_from_config.return_value = True
     predictor = Predictor(
         ENDPOINT, sagemaker_session, serializer=CSVSerializer(), deserializer=PandasDeserializer()
     )
@@ -696,8 +710,10 @@ def test_setting_serializer_deserializer_atts_changes_content_accept_types():
     assert predictor.content_type == "text/csv"
 
 
-def test_custom_attributes():
+@patch("sagemaker.telemetry.telemetry_logging.resolve_value_from_config")
+def test_custom_attributes(mock_resolve_value_from_config):
     sagemaker_session = empty_sagemaker_session()
+    mock_resolve_value_from_config.return_value = False
     predictor = Predictor(ENDPOINT, sagemaker_session=sagemaker_session)
 
     sagemaker_session.sagemaker_runtime_client.invoke_endpoint = Mock(
diff --git a/tests/unit/test_processing.py b/tests/unit/test_processing.py
index 93e3d91f87..e7467d8b02 100644
--- a/tests/unit/test_processing.py
+++ b/tests/unit/test_processing.py
@@ -15,8 +15,9 @@
 import copy
 
 import pytest
-from mock import Mock, patch, MagicMock
+from mock import Mock, patch, MagicMock, mock_open
 from packaging import version
+import json
 
 from sagemaker import LocalSession
 from sagemaker.dataset_definition.inputs import (
@@ -46,6 +47,7 @@
 from sagemaker.workflow.pipeline_context import PipelineSession, _PipelineConfig
 from sagemaker.workflow.functions import Join
 from sagemaker.workflow.execution_variables import ExecutionVariables
+from sagemaker.user_agent import process_studio_metadata_file
 from tests.unit import SAGEMAKER_CONFIG_PROCESSING_JOB
 
 BUCKET_NAME = "mybucket"
@@ -133,6 +135,26 @@ def uploaded_code(
     return UploadedCode(s3_prefix=s3_prefix, script_name=script_name)
 
 
+@pytest.fixture(autouse=True)
+def mock_process_studio_metadata_file(tmp_path):
+    studio_file = tmp_path / "resource-metadata.json"
+    studio_file.write_text(json.dumps({"AppType": "TestAppType"}))
+
+    with patch("os.path.exists", return_value=True):
+        with patch("sagemaker.user_agent.open", mock_open(read_data=studio_file.read_text())):
+            yield process_studio_metadata_file
+
+
+@pytest.fixture()
+def base_config_with_schema():
+    return {"SchemaVersion": "1.0"}
+
+
+@pytest.fixture()
+def base_local_config():
+    return {"region_name": "us-west-2"}
+
+
 @patch("sagemaker.utils._botocore_resolver")
 @patch("os.path.exists", return_value=True)
 @patch("os.path.isfile", return_value=True)
@@ -208,18 +230,28 @@ def test_sklearn_with_all_parameters(
     sagemaker_session.process.assert_called_with(**expected_args)
 
 
-def test_local_mode_disables_local_code_by_default():
-    processor = Processor(
-        image_uri="",
-        role=ROLE,
-        instance_count=1,
-        instance_type="local",
-    )
+@patch("sagemaker.config.config._load_config_from_file")
+@patch("sagemaker.config.config.load_sagemaker_config")
+def test_local_mode_disables_local_code_by_default(
+    mock_load_config, mock_load_config_from_file, base_config_with_schema, base_local_config
+):
+    local_config = base_config_with_schema
+    local_config["local"] = base_local_config
+    mock_load_config.return_value = local_config
+    mock_load_config_from_file.return_value = local_config
+
+    with patch("sagemaker.config.config._load_config_from_file.open", mock_open(read_data="")):
+        processor = Processor(
+            image_uri="",
+            role=ROLE,
+            instance_count=1,
+            instance_type="local",
+        )
 
-    # Most tests use a fixture for sagemaker_session for consistent behaviour, so this unit test
-    # checks that the default initialization disables unsupported 'local_code' mode:
-    assert processor.sagemaker_session._disable_local_code
-    assert isinstance(processor.sagemaker_session, LocalSession)
+        # Most tests use a fixture for sagemaker_session for consistent behaviour, so this unit test
+        # checks that the default initialization disables unsupported 'local_code' mode:
+        assert processor.sagemaker_session._disable_local_code
+        assert isinstance(processor.sagemaker_session, LocalSession)
 
 
 @patch("sagemaker.utils._botocore_resolver")
@@ -281,7 +313,11 @@ def test_sklearn_with_all_parameters_via_run_args(
 @patch("os.path.exists", return_value=True)
 @patch("os.path.isfile", return_value=True)
 def test_sklearn_with_all_parameters_via_run_args_called_twice(
-    exists_mock, isfile_mock, botocore_resolver, sklearn_version, sagemaker_session
+    exists_mock,
+    isfile_mock,
+    botocore_resolver,
+    sklearn_version,
+    sagemaker_session,
 ):
     botocore_resolver.return_value.construct_endpoint.return_value = {"hostname": ECR_HOSTNAME}
 
diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py
index 618d0d7ea8..e35f17331c 100644
--- a/tests/unit/test_pytorch.py
+++ b/tests/unit/test_pytorch.py
@@ -337,7 +337,7 @@ def test_pytorch(
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(pytorch_inference_version, pytorch_inference_py_version)
     expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
diff --git a/tests/unit/test_rl.py b/tests/unit/test_rl.py
index d9c4129cf6..fb337f0063 100644
--- a/tests/unit/test_rl.py
+++ b/tests/unit/test_rl.py
@@ -335,7 +335,7 @@ def test_rl(time, strftime, sagemaker_session, coach_mxnet_version):
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(
         RLToolkit.COACH.value, coach_mxnet_version, RLFramework.MXNET.value
diff --git a/tests/unit/test_sklearn.py b/tests/unit/test_sklearn.py
index b0df31fee1..68302ee572 100644
--- a/tests/unit/test_sklearn.py
+++ b/tests/unit/test_sklearn.py
@@ -332,7 +332,7 @@ def test_sklearn(time, strftime, sagemaker_session, sklearn_version):
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(sklearn_version)
     expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
diff --git a/tests/unit/test_xgboost.py b/tests/unit/test_xgboost.py
index 18eab98149..12d2354dad 100644
--- a/tests/unit/test_xgboost.py
+++ b/tests/unit/test_xgboost.py
@@ -330,7 +330,7 @@ def test_xgboost_cpu(time, strftime, sagemaker_session, xgboost_framework_versio
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(xgboost_framework_version)
     expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
@@ -377,7 +377,7 @@ def test_xgboost_gpu(time, strftime, sagemaker_session, xgboost_gpu_framework_ve
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(
         xgboost_gpu_framework_version, instance_type=GPU_INSTANCE_TYPE
@@ -427,7 +427,7 @@ def test_distributed_training(time, strftime, sagemaker_session, xgboost_framewo
     sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
     assert sagemaker_call_names == ["train", "logs_for_job"]
     boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
-    assert boto_call_names == ["resource"]
+    assert boto_call_names == ["resource", "client"]
 
     expected_train_args = _create_train_job(xgboost_framework_version, DIST_INSTANCE_COUNT)
     expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs

From c0e4496fa0b48ad6c003f6cd656b1888f8b49ca2 Mon Sep 17 00:00:00 2001
From: knikure <knikure@amazon.com>
Date: Thu, 20 Jun 2024 15:40:27 +0000
Subject: [PATCH 2/2] Fix processing tests

---
 tests/unit/test_processing.py | 58 +++++++++++++----------------------
 1 file changed, 21 insertions(+), 37 deletions(-)

diff --git a/tests/unit/test_processing.py b/tests/unit/test_processing.py
index e7467d8b02..fcf941fdc5 100644
--- a/tests/unit/test_processing.py
+++ b/tests/unit/test_processing.py
@@ -136,23 +136,16 @@ def uploaded_code(
 
 
 @pytest.fixture(autouse=True)
-def mock_process_studio_metadata_file(tmp_path):
-    studio_file = tmp_path / "resource-metadata.json"
-    studio_file.write_text(json.dumps({"AppType": "TestAppType"}))
-
-    with patch("os.path.exists", return_value=True):
-        with patch("sagemaker.user_agent.open", mock_open(read_data=studio_file.read_text())):
-            yield process_studio_metadata_file
-
-
-@pytest.fixture()
-def base_config_with_schema():
-    return {"SchemaVersion": "1.0"}
-
+def mock_process_studio_metadata_file(request, tmp_path):
+    if "no_mock_studio_metadata" in request.keywords:
+        yield process_studio_metadata_file
+    else:
+        studio_file = tmp_path / "resource-metadata.json"
+        studio_file.write_text(json.dumps({"AppType": "TestAppType"}))
 
-@pytest.fixture()
-def base_local_config():
-    return {"region_name": "us-west-2"}
+        with patch("os.path.exists", return_value=True):
+            with patch("sagemaker.user_agent.open", mock_open(read_data=studio_file.read_text())):
+                yield process_studio_metadata_file
 
 
 @patch("sagemaker.utils._botocore_resolver")
@@ -230,28 +223,19 @@ def test_sklearn_with_all_parameters(
     sagemaker_session.process.assert_called_with(**expected_args)
 
 
-@patch("sagemaker.config.config._load_config_from_file")
-@patch("sagemaker.config.config.load_sagemaker_config")
-def test_local_mode_disables_local_code_by_default(
-    mock_load_config, mock_load_config_from_file, base_config_with_schema, base_local_config
-):
-    local_config = base_config_with_schema
-    local_config["local"] = base_local_config
-    mock_load_config.return_value = local_config
-    mock_load_config_from_file.return_value = local_config
-
-    with patch("sagemaker.config.config._load_config_from_file.open", mock_open(read_data="")):
-        processor = Processor(
-            image_uri="",
-            role=ROLE,
-            instance_count=1,
-            instance_type="local",
-        )
+@pytest.mark.no_mock_studio_metadata
+def test_local_mode_disables_local_code_by_default():
+    processor = Processor(
+        image_uri="",
+        role=ROLE,
+        instance_count=1,
+        instance_type="local",
+    )
 
-        # Most tests use a fixture for sagemaker_session for consistent behaviour, so this unit test
-        # checks that the default initialization disables unsupported 'local_code' mode:
-        assert processor.sagemaker_session._disable_local_code
-        assert isinstance(processor.sagemaker_session, LocalSession)
+    # Most tests use a fixture for sagemaker_session for consistent behaviour, so this unit test
+    # checks that the default initialization disables unsupported 'local_code' mode:
+    assert processor.sagemaker_session._disable_local_code
+    assert isinstance(processor.sagemaker_session, LocalSession)
 
 
 @patch("sagemaker.utils._botocore_resolver")