From c578e6fbba270306b83ea4112fa511a9ed209505 Mon Sep 17 00:00:00 2001 From: Qing Lan Date: Wed, 26 Jun 2024 09:58:06 -0700 Subject: [PATCH] [CI] fix bugs (#2111) --- .github/workflows/llm_integration.yml | 2 +- .../rolling_batch/lmi_dist_rolling_batch.py | 27 ++++++++++--------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/.github/workflows/llm_integration.yml b/.github/workflows/llm_integration.yml index d31503595..baa749a50 100644 --- a/.github/workflows/llm_integration.yml +++ b/.github/workflows/llm_integration.yml @@ -137,7 +137,7 @@ jobs: working-directory: tests/integration run: | for file in outputs/*; do if [ -f "$file" ]; then echo "Contents of $file:"; cat "$file"; echo; fi; done - rm -rf outputs && rm -rf models + sudo rm -rf outputs && sudo rm -rf models rm awscurl docker rm -f $(docker ps -aq) || true - name: Upload test logs diff --git a/engines/python/setup/djl_python/rolling_batch/lmi_dist_rolling_batch.py b/engines/python/setup/djl_python/rolling_batch/lmi_dist_rolling_batch.py index 051508215..1383abb9f 100644 --- a/engines/python/setup/djl_python/rolling_batch/lmi_dist_rolling_batch.py +++ b/engines/python/setup/djl_python/rolling_batch/lmi_dist_rolling_batch.py @@ -189,20 +189,21 @@ def inference(self, self.request_cache, request_output, self.get_tokenizer()) # Record SD metrics completion_output = request_output.outputs[0] - if (self.lmi_dist_config.record_acceptance_rate + if ( + self.lmi_dist_config.record_acceptance_rate or self.lmi_dist_config.speculative_telemetry - ) and request_output.finished: - if self.supports_speculative_decoding and completion_output.acceptance_history: - record = get_speculative_decoding_metrics_record( - completion_output, request_output) - if self.lmi_dist_config.record_acceptance_rate: - logging.info(f"Speculative Decoding {record}") - if self.lmi_dist_config.speculative_telemetry and os.environ.get( - "SAGEMAKER_SECURE_MODE") == "true": - telemetry_manager.record_speculative(record) - else: - logging.warning( - f"Ignoring logging speculative decoding metrics") + ) and self.lmi_dist_config.speculative_draft_model and request_output.finished: + try: + if self.supports_speculative_decoding and completion_output.acceptance_history: + record = get_speculative_decoding_metrics_record( + completion_output, request_output) + if self.lmi_dist_config.record_acceptance_rate: + logging.info(f"Speculative Decoding {record}") + if self.lmi_dist_config.speculative_telemetry and os.environ.get( + "SAGEMAKER_SECURE_MODE") == "true": + telemetry_manager.record_speculative(record) + except: + logging.debug("SD telemetry collection failed, ignore") for request in self.active_requests: request_output = request.request_output