From bbe34e8f61847128ba1b89a54f8c931894871e05 Mon Sep 17 00:00:00 2001
From: bstrzele <bartosz.strzelecki@intel.com>
Date: Mon, 23 Dec 2024 10:30:52 +0000
Subject: [PATCH 1/5] process time

---
 .../mediapipegraphexecutor.hpp                | 19 ++++++++
 src/metric_config.cpp                         |  1 +
 src/metric_config.hpp                         |  2 +
 src/model_metric_reporter.cpp                 | 47 +++++++++++++++++++
 src/model_metric_reporter.hpp                 | 30 ++++++++++++
 src/test/metrics_flow_test.cpp                | 12 +++++
 6 files changed, 111 insertions(+)

diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp
index b52bc07037..def898599e 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.hpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp
@@ -27,6 +27,7 @@
 #include "../model_metric_reporter.hpp"
 #include "../profiler.hpp"
 #include "../status.hpp"
+#include "../timer.hpp"
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #include "mediapipe/framework/calculator_graph.h"
@@ -104,6 +105,12 @@ class MediapipeGraphExecutor {
         MetricGaugeGuard currentGraphsGuard(this->mediapipeServableMetricReporter->currentGraphs.get());
         ::mediapipe::CalculatorGraph graph;
         MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
+        enum : unsigned int {
+            PROCESS,
+            TIMER_END2
+        };
+        Timer<TIMER_END2> timer;
+        timer.start(PROCESS);
         std::unordered_map<std::string, ::mediapipe::OutputStreamPoller> outputPollers;
         for (auto& name : this->outputNames) {
             if (name.empty()) {
@@ -196,6 +203,9 @@ class MediapipeGraphExecutor {
             INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
         }
         MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code()));
+        timer.stop(PROCESS);
+        double processTime = timer.template elapsed<std::chrono::microseconds>(PROCESS);
+        OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime);
         if (outputPollers.size() != outputPollersWithReceivedPacket.size()) {
             SPDLOG_DEBUG("Mediapipe failed to execute. Failed to receive all output packets");
             return Status(StatusCode::MEDIAPIPE_EXECUTION_ERROR, "Unknown error during mediapipe execution");
@@ -218,6 +228,12 @@ class MediapipeGraphExecutor {
                 // Init
                 MP_RETURN_ON_FAIL(graph.Initialize(this->config), "graph initialization", StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
             }
+            enum : unsigned int {
+                PROCESS,
+                TIMER_END2
+            };
+            Timer<TIMER_END2> timer;
+            timer.start(PROCESS);
             {
                 OVMS_PROFILE_SCOPE("Mediapipe graph installing packet observers");
                 // Installing observers
@@ -334,6 +350,9 @@ class MediapipeGraphExecutor {
                 MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code()));
                 SPDLOG_DEBUG("Graph {}: Done execution", this->name);
             }
+            timer.stop(PROCESS);
+            double processTime = timer.template elapsed<std::chrono::microseconds>(PROCESS);
+            OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime);
             return StatusCode::OK;
         } catch (...) {
             SPDLOG_DEBUG("Graph {}: Exception while processing MediaPipe graph", this->name);
diff --git a/src/metric_config.cpp b/src/metric_config.cpp
index b40f2db5ab..7fe8830e48 100644
--- a/src/metric_config.cpp
+++ b/src/metric_config.cpp
@@ -59,6 +59,7 @@ const std::string METRIC_NAME_REQUESTS_ACCEPTED = "ovms_requests_accepted";
 const std::string METRIC_NAME_REQUESTS_REJECTED = "ovms_requests_rejected";
 
 const std::string METRIC_NAME_GRAPH_ERROR = "ovms_graph_error";
+const std::string METRIC_NAME_PROCESSING_TIME = "ovms_processing_time";
 
 bool MetricConfig::validateEndpointPath(const std::string& endpoint) {
     std::regex valid_endpoint_regex("^/[a-zA-Z0-9]*$");
diff --git a/src/metric_config.hpp b/src/metric_config.hpp
index b31c369c85..9c589300ec 100644
--- a/src/metric_config.hpp
+++ b/src/metric_config.hpp
@@ -45,6 +45,7 @@ extern const std::string METRIC_NAME_REQUESTS_ACCEPTED;
 extern const std::string METRIC_NAME_REQUESTS_REJECTED;
 
 extern const std::string METRIC_NAME_GRAPH_ERROR;
+extern const std::string METRIC_NAME_PROCESSING_TIME;
 
 class Status;
 /**
@@ -98,6 +99,7 @@ class MetricConfig {
         {METRIC_NAME_REQUESTS_ACCEPTED},
         {METRIC_NAME_REQUESTS_REJECTED},
         {METRIC_NAME_GRAPH_ERROR},
+        {METRIC_NAME_PROCESSING_TIME},
         {METRIC_NAME_RESPONSES}};
 };
 }  // namespace ovms
diff --git a/src/model_metric_reporter.cpp b/src/model_metric_reporter.cpp
index 8792c0ec3c..34bf93f63d 100644
--- a/src/model_metric_reporter.cpp
+++ b/src/model_metric_reporter.cpp
@@ -333,6 +333,10 @@ MediapipeServableMetricReporter::MediapipeServableMetricReporter(const MetricCon
         return;
     }
 
+    for (int i = 0; i < NUMBER_OF_BUCKETS; i++) {
+        this->buckets.emplace_back(floor(BUCKET_MULTIPLIER * pow(BUCKET_POWER_BASE, i)));
+    }
+
     auto familyName = METRIC_NAME_CURRENT_GRAPHS;
     if (metricConfig->isFamilyEnabled(familyName)) {
         auto family = registry->createFamily<MetricGauge>(familyName,
@@ -552,6 +556,49 @@ MediapipeServableMetricReporter::MediapipeServableMetricReporter(const MetricCon
             {"interface", "REST"}});
         THROW_IF_NULL(this->requestSuccessRestModelReady, "cannot create metric");
     }
+    familyName = METRIC_NAME_PROCESSING_TIME;
+    if (metricConfig->isFamilyEnabled(familyName)) {
+        auto family = registry->createFamily<MetricHistogram>(familyName,
+            "Time packet spent in the MediaPipe graph.");
+        THROW_IF_NULL(family, "cannot create family");
+
+        // KFS
+        this->processingTimeGrpcModelInfer = family->addMetric({{"name", graphName},
+                                                                   {"api", "KServe"},
+                                                                   {"method", "ModelInfer"},
+                                                                   {"interface", "gRPC"}},
+            this->buckets);
+        THROW_IF_NULL(this->processingTimeGrpcModelInfer, "cannot create metric");
+
+        this->processingTimeGrpcModelInferStream = family->addMetric({{"name", graphName},
+                                                                         {"api", "KServe"},
+                                                                         {"method", "ModelInferStream"},
+                                                                         {"interface", "gRPC"}},
+            this->buckets);
+        THROW_IF_NULL(this->processingTimeGrpcModelInfer, "cannot create metric");
+
+        this->processingTimeRestModelInfer = family->addMetric({{"name", graphName},
+                                                                   {"api", "KServe"},
+                                                                   {"method", "ModelInfer"},
+                                                                   {"interface", "REST"}},
+            this->buckets);
+        THROW_IF_NULL(this->processingTimeRestModelInfer, "cannot create metric");
+
+        // V3
+        this->processingTimeRestV3Unary = family->addMetric({{"name", graphName},
+                                                                {"api", "V3"},
+                                                                {"method", "Unary"},
+                                                                {"interface", "REST"}},
+            this->buckets);
+        THROW_IF_NULL(this->processingTimeRestV3Unary, "cannot create metric");
+
+        this->processingTimeRestV3Stream = family->addMetric({{"name", graphName},
+                                                                 {"api", "V3"},
+                                                                 {"method", "Stream"},
+                                                                 {"interface", "REST"}},
+            this->buckets);
+        THROW_IF_NULL(this->processingTimeRestV3Stream, "cannot create metric");
+    }
 }
 
 }  // namespace ovms
diff --git a/src/model_metric_reporter.hpp b/src/model_metric_reporter.hpp
index 1a2ffb3461..d820af7078 100644
--- a/src/model_metric_reporter.hpp
+++ b/src/model_metric_reporter.hpp
@@ -156,6 +156,9 @@ class ModelMetricReporter : public ServableMetricReporter {
 class MediapipeServableMetricReporter {
     MetricRegistry* registry;
 
+protected:
+    std::vector<double> buckets;
+
 public:
     std::unique_ptr<MetricGauge> currentGraphs;
 
@@ -205,6 +208,33 @@ class MediapipeServableMetricReporter {
     std::unique_ptr<MetricCounter> requestErrorRestV3Unary;
     std::unique_ptr<MetricCounter> requestErrorRestV3Stream;
 
+    std::unique_ptr<MetricHistogram> processingTimeGrpcModelInfer;
+    std::unique_ptr<MetricHistogram> processingTimeGrpcModelInferStream;
+    std::unique_ptr<MetricHistogram> processingTimeRestModelInfer;
+    std::unique_ptr<MetricHistogram> processingTimeRestV3Unary;
+    std::unique_ptr<MetricHistogram> processingTimeRestV3Stream;
+
+    inline MetricHistogram* getProcessingTimeMetric(const ExecutionContext& context) {
+        if (context.interface == ExecutionContext::Interface::GRPC) {
+            if (context.method == ExecutionContext::Method::ModelInfer)
+                return this->processingTimeGrpcModelInfer.get();
+            if (context.method == ExecutionContext::Method::ModelInferStream)
+                return this->processingTimeGrpcModelInferStream.get();
+            return nullptr;
+        } else if (context.interface == ExecutionContext::Interface::REST) {
+            if (context.method == ExecutionContext::Method::ModelInfer)
+                return this->processingTimeRestModelInfer.get();
+            if (context.method == ExecutionContext::Method::V3Unary)
+                return this->processingTimeRestV3Unary.get();
+            if (context.method == ExecutionContext::Method::V3Stream)
+                return this->processingTimeRestV3Stream.get();
+            return nullptr;
+        } else {
+            return nullptr;
+        }
+        return nullptr;
+    }
+
     inline MetricCounter* getRequestsMetric(const ExecutionContext& context, bool success = true) {
         if (context.interface == ExecutionContext::Interface::GRPC) {
             if (context.method == ExecutionContext::Method::ModelInfer)
diff --git a/src/test/metrics_flow_test.cpp b/src/test/metrics_flow_test.cpp
index d358a73b28..9ba74dd26a 100644
--- a/src/test/metrics_flow_test.cpp
+++ b/src/test/metrics_flow_test.cpp
@@ -395,6 +395,9 @@ TEST_F(MetricFlowTest, GrpcModelInfer) {
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, mpName, "gRPC", "ModelInfer", "KServe", numberOfRejectedRequests);
 
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, mpName, "gRPC", "ModelInfer", "KServe", numberOfAcceptedRequests);
+
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"gRPC\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests)));
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"REST\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(0)));
 #endif
 
     EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_REQUEST_TIME + std::string{"_count{interface=\"gRPC\",name=\""} + modelName + std::string{"\",version=\"1\"} "} + std::to_string(numberOfSuccessRequests)));
@@ -714,6 +717,9 @@ TEST_F(MetricFlowTest, RestModelInfer) {
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, mpName, "REST", "ModelInfer", "KServe", numberOfRejectedRequests);
 
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, mpName, "REST", "ModelInfer", "KServe", numberOfAcceptedRequests);
+
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"gRPC\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(0)));
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"REST\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests)));
 #endif
 
     EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_REQUEST_TIME + std::string{"_count{interface=\"gRPC\",name=\""} + modelName + std::string{"\",version=\"1\"} "} + std::to_string(0)));
@@ -835,6 +841,8 @@ TEST_F(MetricFlowTest, RestV3Unary) {
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_ACCEPTED, "dummy_gpt", "REST", "Unary", "V3", numberOfAcceptedRequests * 2);
     // checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, "dummy_gpt", "REST", "Unary", "V3", numberOfRejectedRequests);
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, "dummy_gpt", "REST", "Unary", "V3", numberOfAcceptedRequests * 2);
+
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"V3\",interface=\"REST\",method=\"Unary\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2)));
 }
 #endif
 
@@ -887,6 +895,9 @@ TEST_F(MetricFlowTest, RestV3Stream) {
     // checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, "dummy_gpt", "REST", "Stream", "V3", numberOfRejectedRequests);
     const int numberOfMockedChunksPerRequest = 9;  // Defined in openai_chat_completions_mock_calculator.cpp
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, "dummy_gpt", "REST", "Stream", "V3", numberOfAcceptedRequests * numberOfMockedChunksPerRequest * 2);
+
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"V3\",interface=\"REST\",method=\"Stream\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2)));
+
     SPDLOG_ERROR(server.collect());
 }
 #endif
@@ -993,6 +1004,7 @@ std::string MetricFlowTest::prepareConfigContent() {
                          R"(",")" + METRIC_NAME_REQUESTS_REJECTED +
                          R"(",")" + METRIC_NAME_RESPONSES +
                          R"(",")" + METRIC_NAME_GRAPH_ERROR +
+                         R"(",")" + METRIC_NAME_PROCESSING_TIME +
                          R"("]
             }
         },

From 4811c7de8d5ebf41059f9db312cd76ad3b96717e Mon Sep 17 00:00:00 2001
From: bstrzele <bartosz.strzelecki@intel.com>
Date: Mon, 23 Dec 2024 11:20:14 +0000
Subject: [PATCH 2/5] review fix

---
 src/metric_config.cpp          |  2 +-
 src/model_metric_reporter.cpp  | 23 ++++-------------------
 src/model_metric_reporter.hpp  | 25 ++++++++-----------------
 src/test/metrics_flow_test.cpp | 10 ++++------
 4 files changed, 17 insertions(+), 43 deletions(-)

diff --git a/src/metric_config.cpp b/src/metric_config.cpp
index 7fe8830e48..200fded329 100644
--- a/src/metric_config.cpp
+++ b/src/metric_config.cpp
@@ -59,7 +59,7 @@ const std::string METRIC_NAME_REQUESTS_ACCEPTED = "ovms_requests_accepted";
 const std::string METRIC_NAME_REQUESTS_REJECTED = "ovms_requests_rejected";
 
 const std::string METRIC_NAME_GRAPH_ERROR = "ovms_graph_error";
-const std::string METRIC_NAME_PROCESSING_TIME = "ovms_processing_time";
+const std::string METRIC_NAME_PROCESSING_TIME = "ovms_graph_processing_time_us";
 
 bool MetricConfig::validateEndpointPath(const std::string& endpoint) {
     std::regex valid_endpoint_regex("^/[a-zA-Z0-9]*$");
diff --git a/src/model_metric_reporter.cpp b/src/model_metric_reporter.cpp
index 34bf93f63d..2b6c34c55a 100644
--- a/src/model_metric_reporter.cpp
+++ b/src/model_metric_reporter.cpp
@@ -564,38 +564,23 @@ MediapipeServableMetricReporter::MediapipeServableMetricReporter(const MetricCon
 
         // KFS
         this->processingTimeGrpcModelInfer = family->addMetric({{"name", graphName},
-                                                                   {"api", "KServe"},
-                                                                   {"method", "ModelInfer"},
-                                                                   {"interface", "gRPC"}},
+                                                                   {"method", "ModelInfer"}},
             this->buckets);
         THROW_IF_NULL(this->processingTimeGrpcModelInfer, "cannot create metric");
 
         this->processingTimeGrpcModelInferStream = family->addMetric({{"name", graphName},
-                                                                         {"api", "KServe"},
-                                                                         {"method", "ModelInferStream"},
-                                                                         {"interface", "gRPC"}},
+                                                                         {"method", "ModelInferStream"}},
             this->buckets);
         THROW_IF_NULL(this->processingTimeGrpcModelInfer, "cannot create metric");
 
-        this->processingTimeRestModelInfer = family->addMetric({{"name", graphName},
-                                                                   {"api", "KServe"},
-                                                                   {"method", "ModelInfer"},
-                                                                   {"interface", "REST"}},
-            this->buckets);
-        THROW_IF_NULL(this->processingTimeRestModelInfer, "cannot create metric");
-
         // V3
         this->processingTimeRestV3Unary = family->addMetric({{"name", graphName},
-                                                                {"api", "V3"},
-                                                                {"method", "Unary"},
-                                                                {"interface", "REST"}},
+                                                                {"method", "Unary"}},
             this->buckets);
         THROW_IF_NULL(this->processingTimeRestV3Unary, "cannot create metric");
 
         this->processingTimeRestV3Stream = family->addMetric({{"name", graphName},
-                                                                 {"api", "V3"},
-                                                                 {"method", "Stream"},
-                                                                 {"interface", "REST"}},
+                                                                 {"method", "Stream"}},
             this->buckets);
         THROW_IF_NULL(this->processingTimeRestV3Stream, "cannot create metric");
     }
diff --git a/src/model_metric_reporter.hpp b/src/model_metric_reporter.hpp
index d820af7078..4146431f32 100644
--- a/src/model_metric_reporter.hpp
+++ b/src/model_metric_reporter.hpp
@@ -215,23 +215,14 @@ class MediapipeServableMetricReporter {
     std::unique_ptr<MetricHistogram> processingTimeRestV3Stream;
 
     inline MetricHistogram* getProcessingTimeMetric(const ExecutionContext& context) {
-        if (context.interface == ExecutionContext::Interface::GRPC) {
-            if (context.method == ExecutionContext::Method::ModelInfer)
-                return this->processingTimeGrpcModelInfer.get();
-            if (context.method == ExecutionContext::Method::ModelInferStream)
-                return this->processingTimeGrpcModelInferStream.get();
-            return nullptr;
-        } else if (context.interface == ExecutionContext::Interface::REST) {
-            if (context.method == ExecutionContext::Method::ModelInfer)
-                return this->processingTimeRestModelInfer.get();
-            if (context.method == ExecutionContext::Method::V3Unary)
-                return this->processingTimeRestV3Unary.get();
-            if (context.method == ExecutionContext::Method::V3Stream)
-                return this->processingTimeRestV3Stream.get();
-            return nullptr;
-        } else {
-            return nullptr;
-        }
+        if (context.method == ExecutionContext::Method::ModelInfer)
+            return this->processingTimeGrpcModelInfer.get();
+        if (context.method == ExecutionContext::Method::ModelInferStream)
+            return this->processingTimeGrpcModelInferStream.get();
+        if (context.method == ExecutionContext::Method::V3Unary)
+            return this->processingTimeRestV3Unary.get();
+        if (context.method == ExecutionContext::Method::V3Stream)
+            return this->processingTimeRestV3Stream.get();
         return nullptr;
     }
 
diff --git a/src/test/metrics_flow_test.cpp b/src/test/metrics_flow_test.cpp
index 9ba74dd26a..d392e40e2e 100644
--- a/src/test/metrics_flow_test.cpp
+++ b/src/test/metrics_flow_test.cpp
@@ -396,8 +396,7 @@ TEST_F(MetricFlowTest, GrpcModelInfer) {
 
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, mpName, "gRPC", "ModelInfer", "KServe", numberOfAcceptedRequests);
 
-    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"gRPC\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests)));
-    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"REST\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(0)));
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests)));
 #endif
 
     EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_REQUEST_TIME + std::string{"_count{interface=\"gRPC\",name=\""} + modelName + std::string{"\",version=\"1\"} "} + std::to_string(numberOfSuccessRequests)));
@@ -718,8 +717,7 @@ TEST_F(MetricFlowTest, RestModelInfer) {
 
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, mpName, "REST", "ModelInfer", "KServe", numberOfAcceptedRequests);
 
-    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"gRPC\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(0)));
-    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"REST\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests)));
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests)));
 #endif
 
     EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_REQUEST_TIME + std::string{"_count{interface=\"gRPC\",name=\""} + modelName + std::string{"\",version=\"1\"} "} + std::to_string(0)));
@@ -842,7 +840,7 @@ TEST_F(MetricFlowTest, RestV3Unary) {
     // checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, "dummy_gpt", "REST", "Unary", "V3", numberOfRejectedRequests);
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, "dummy_gpt", "REST", "Unary", "V3", numberOfAcceptedRequests * 2);
 
-    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"V3\",interface=\"REST\",method=\"Unary\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2)));
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{method=\"Unary\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2)));
 }
 #endif
 
@@ -896,7 +894,7 @@ TEST_F(MetricFlowTest, RestV3Stream) {
     const int numberOfMockedChunksPerRequest = 9;  // Defined in openai_chat_completions_mock_calculator.cpp
     checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, "dummy_gpt", "REST", "Stream", "V3", numberOfAcceptedRequests * numberOfMockedChunksPerRequest * 2);
 
-    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"V3\",interface=\"REST\",method=\"Stream\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2)));
+    EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{method=\"Stream\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2)));
 
     SPDLOG_ERROR(server.collect());
 }

From e80f8d0ca40da85953063a9e4b4ad34857ed90f4 Mon Sep 17 00:00:00 2001
From: bstrzele <bartosz.strzelecki@intel.com>
Date: Thu, 2 Jan 2025 13:38:55 +0000
Subject: [PATCH 3/5] documentation

---
 docs/metrics.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/metrics.md b/docs/metrics.md
index 99bcc10c5c..8a6e2b3b04 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -241,6 +241,7 @@ For [MediaPipe Graphs](./mediapipe.md) execution there are 4 generic metrics whi
 | counter      | ovms_responses | Useful to track number of packets generated by MediaPipe graph. Keep in mind that single request may trigger production of multiple (or zero) packets, therefore tracking number of responses is complementary to tracking accepted requests. For example tracking streaming partial responses of LLM text generation graphs. |
 | gauge      | ovms_current_graphs | Number of graphs currently in-process. For unary communication it is equal to number of currently processing requests (each request initializes separate MediaPipe graph). For streaming communication it is equal to number of active client connections. Each connection is able to reuse the graph and decide when to delete it when the connection is closed. |
 | counter      | ovms_graph_error | Counts errors in MediaPipe graph execution phase. For example V3 LLM text generation fails in LLMCalculator due to missing prompt - calculator returns an error and graph cancels. |
+| histogram      | ovms_graph_processing_time_us | Time for which mediapipe graph was opened. |
 
 Exposing custom metrics in calculator implementations (MediaPipe graph nodes) is not supported yet.
 

From 2455f222a03596d590cbf0827b5d100fd3a7a2d3 Mon Sep 17 00:00:00 2001
From: bstrzele <bartosz.strzelecki@intel.com>
Date: Fri, 10 Jan 2025 10:27:47 +0000
Subject: [PATCH 4/5] fix

---
 docs/metrics.md                                   | 2 +-
 src/mediapipe_internal/mediapipegraphexecutor.hpp | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/metrics.md b/docs/metrics.md
index 8a6e2b3b04..162501be8d 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -241,7 +241,7 @@ For [MediaPipe Graphs](./mediapipe.md) execution there are 4 generic metrics whi
 | counter      | ovms_responses | Useful to track number of packets generated by MediaPipe graph. Keep in mind that single request may trigger production of multiple (or zero) packets, therefore tracking number of responses is complementary to tracking accepted requests. For example tracking streaming partial responses of LLM text generation graphs. |
 | gauge      | ovms_current_graphs | Number of graphs currently in-process. For unary communication it is equal to number of currently processing requests (each request initializes separate MediaPipe graph). For streaming communication it is equal to number of active client connections. Each connection is able to reuse the graph and decide when to delete it when the connection is closed. |
 | counter      | ovms_graph_error | Counts errors in MediaPipe graph execution phase. For example V3 LLM text generation fails in LLMCalculator due to missing prompt - calculator returns an error and graph cancels. |
-| histogram      | ovms_graph_processing_time_us | Time for which mediapipe graph was opened. |
+| histogram      | ovms_graph_processing_time_us | Time for which mediapipe graph was opened and been successfully closed. |
 
 Exposing custom metrics in calculator implementations (MediaPipe graph nodes) is not supported yet.
 
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp
index def898599e..e3466dc380 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.hpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp
@@ -203,13 +203,13 @@ class MediapipeGraphExecutor {
             INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
         }
         MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code()));
-        timer.stop(PROCESS);
-        double processTime = timer.template elapsed<std::chrono::microseconds>(PROCESS);
-        OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime);
         if (outputPollers.size() != outputPollersWithReceivedPacket.size()) {
             SPDLOG_DEBUG("Mediapipe failed to execute. Failed to receive all output packets");
             return Status(StatusCode::MEDIAPIPE_EXECUTION_ERROR, "Unknown error during mediapipe execution");
         }
+        timer.stop(PROCESS);
+        double processTime = timer.template elapsed<std::chrono::microseconds>(PROCESS);
+        OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime);
         INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getResponsesMetric(executionContext));
         SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name);
         return StatusCode::OK;

From f6c43b3bd0f8b43cc196181b22b18ee788aa6468 Mon Sep 17 00:00:00 2001
From: bstrzele <bartosz.strzelecki@intel.com>
Date: Fri, 10 Jan 2025 10:33:17 +0000
Subject: [PATCH 5/5] fix wording

---
 docs/metrics.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/metrics.md b/docs/metrics.md
index 162501be8d..143604c6b8 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -241,7 +241,7 @@ For [MediaPipe Graphs](./mediapipe.md) execution there are 4 generic metrics whi
 | counter      | ovms_responses | Useful to track number of packets generated by MediaPipe graph. Keep in mind that single request may trigger production of multiple (or zero) packets, therefore tracking number of responses is complementary to tracking accepted requests. For example tracking streaming partial responses of LLM text generation graphs. |
 | gauge      | ovms_current_graphs | Number of graphs currently in-process. For unary communication it is equal to number of currently processing requests (each request initializes separate MediaPipe graph). For streaming communication it is equal to number of active client connections. Each connection is able to reuse the graph and decide when to delete it when the connection is closed. |
 | counter      | ovms_graph_error | Counts errors in MediaPipe graph execution phase. For example V3 LLM text generation fails in LLMCalculator due to missing prompt - calculator returns an error and graph cancels. |
-| histogram      | ovms_graph_processing_time_us | Time for which mediapipe graph was opened and been successfully closed. |
+| histogram      | ovms_graph_processing_time_us | Time for which mediapipe graph was opened and has been successfully closed. |
 
 Exposing custom metrics in calculator implementations (MediaPipe graph nodes) is not supported yet.