From bbe34e8f61847128ba1b89a54f8c931894871e05 Mon Sep 17 00:00:00 2001 From: bstrzele Date: Mon, 23 Dec 2024 10:30:52 +0000 Subject: [PATCH 1/5] process time --- .../mediapipegraphexecutor.hpp | 19 ++++++++ src/metric_config.cpp | 1 + src/metric_config.hpp | 2 + src/model_metric_reporter.cpp | 47 +++++++++++++++++++ src/model_metric_reporter.hpp | 30 ++++++++++++ src/test/metrics_flow_test.cpp | 12 +++++ 6 files changed, 111 insertions(+) diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp index b52bc07037..def898599e 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.hpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp @@ -27,6 +27,7 @@ #include "../model_metric_reporter.hpp" #include "../profiler.hpp" #include "../status.hpp" +#include "../timer.hpp" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include "mediapipe/framework/calculator_graph.h" @@ -104,6 +105,12 @@ class MediapipeGraphExecutor { MetricGaugeGuard currentGraphsGuard(this->mediapipeServableMetricReporter->currentGraphs.get()); ::mediapipe::CalculatorGraph graph; MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); + enum : unsigned int { + PROCESS, + TIMER_END2 + }; + Timer timer; + timer.start(PROCESS); std::unordered_map outputPollers; for (auto& name : this->outputNames) { if (name.empty()) { @@ -196,6 +203,9 @@ class MediapipeGraphExecutor { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code())); + timer.stop(PROCESS); + double processTime = timer.template elapsed(PROCESS); + OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime); if (outputPollers.size() != outputPollersWithReceivedPacket.size()) { SPDLOG_DEBUG("Mediapipe failed to execute. Failed to receive all output packets"); return Status(StatusCode::MEDIAPIPE_EXECUTION_ERROR, "Unknown error during mediapipe execution"); @@ -218,6 +228,12 @@ class MediapipeGraphExecutor { // Init MP_RETURN_ON_FAIL(graph.Initialize(this->config), "graph initialization", StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR); } + enum : unsigned int { + PROCESS, + TIMER_END2 + }; + Timer timer; + timer.start(PROCESS); { OVMS_PROFILE_SCOPE("Mediapipe graph installing packet observers"); // Installing observers @@ -334,6 +350,9 @@ class MediapipeGraphExecutor { MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code())); SPDLOG_DEBUG("Graph {}: Done execution", this->name); } + timer.stop(PROCESS); + double processTime = timer.template elapsed(PROCESS); + OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime); return StatusCode::OK; } catch (...) { SPDLOG_DEBUG("Graph {}: Exception while processing MediaPipe graph", this->name); diff --git a/src/metric_config.cpp b/src/metric_config.cpp index b40f2db5ab..7fe8830e48 100644 --- a/src/metric_config.cpp +++ b/src/metric_config.cpp @@ -59,6 +59,7 @@ const std::string METRIC_NAME_REQUESTS_ACCEPTED = "ovms_requests_accepted"; const std::string METRIC_NAME_REQUESTS_REJECTED = "ovms_requests_rejected"; const std::string METRIC_NAME_GRAPH_ERROR = "ovms_graph_error"; +const std::string METRIC_NAME_PROCESSING_TIME = "ovms_processing_time"; bool MetricConfig::validateEndpointPath(const std::string& endpoint) { std::regex valid_endpoint_regex("^/[a-zA-Z0-9]*$"); diff --git a/src/metric_config.hpp b/src/metric_config.hpp index b31c369c85..9c589300ec 100644 --- a/src/metric_config.hpp +++ b/src/metric_config.hpp @@ -45,6 +45,7 @@ extern const std::string METRIC_NAME_REQUESTS_ACCEPTED; extern const std::string METRIC_NAME_REQUESTS_REJECTED; extern const std::string METRIC_NAME_GRAPH_ERROR; +extern const std::string METRIC_NAME_PROCESSING_TIME; class Status; /** @@ -98,6 +99,7 @@ class MetricConfig { {METRIC_NAME_REQUESTS_ACCEPTED}, {METRIC_NAME_REQUESTS_REJECTED}, {METRIC_NAME_GRAPH_ERROR}, + {METRIC_NAME_PROCESSING_TIME}, {METRIC_NAME_RESPONSES}}; }; } // namespace ovms diff --git a/src/model_metric_reporter.cpp b/src/model_metric_reporter.cpp index 8792c0ec3c..34bf93f63d 100644 --- a/src/model_metric_reporter.cpp +++ b/src/model_metric_reporter.cpp @@ -333,6 +333,10 @@ MediapipeServableMetricReporter::MediapipeServableMetricReporter(const MetricCon return; } + for (int i = 0; i < NUMBER_OF_BUCKETS; i++) { + this->buckets.emplace_back(floor(BUCKET_MULTIPLIER * pow(BUCKET_POWER_BASE, i))); + } + auto familyName = METRIC_NAME_CURRENT_GRAPHS; if (metricConfig->isFamilyEnabled(familyName)) { auto family = registry->createFamily(familyName, @@ -552,6 +556,49 @@ MediapipeServableMetricReporter::MediapipeServableMetricReporter(const MetricCon {"interface", "REST"}}); THROW_IF_NULL(this->requestSuccessRestModelReady, "cannot create metric"); } + familyName = METRIC_NAME_PROCESSING_TIME; + if (metricConfig->isFamilyEnabled(familyName)) { + auto family = registry->createFamily(familyName, + "Time packet spent in the MediaPipe graph."); + THROW_IF_NULL(family, "cannot create family"); + + // KFS + this->processingTimeGrpcModelInfer = family->addMetric({{"name", graphName}, + {"api", "KServe"}, + {"method", "ModelInfer"}, + {"interface", "gRPC"}}, + this->buckets); + THROW_IF_NULL(this->processingTimeGrpcModelInfer, "cannot create metric"); + + this->processingTimeGrpcModelInferStream = family->addMetric({{"name", graphName}, + {"api", "KServe"}, + {"method", "ModelInferStream"}, + {"interface", "gRPC"}}, + this->buckets); + THROW_IF_NULL(this->processingTimeGrpcModelInfer, "cannot create metric"); + + this->processingTimeRestModelInfer = family->addMetric({{"name", graphName}, + {"api", "KServe"}, + {"method", "ModelInfer"}, + {"interface", "REST"}}, + this->buckets); + THROW_IF_NULL(this->processingTimeRestModelInfer, "cannot create metric"); + + // V3 + this->processingTimeRestV3Unary = family->addMetric({{"name", graphName}, + {"api", "V3"}, + {"method", "Unary"}, + {"interface", "REST"}}, + this->buckets); + THROW_IF_NULL(this->processingTimeRestV3Unary, "cannot create metric"); + + this->processingTimeRestV3Stream = family->addMetric({{"name", graphName}, + {"api", "V3"}, + {"method", "Stream"}, + {"interface", "REST"}}, + this->buckets); + THROW_IF_NULL(this->processingTimeRestV3Stream, "cannot create metric"); + } } } // namespace ovms diff --git a/src/model_metric_reporter.hpp b/src/model_metric_reporter.hpp index 1a2ffb3461..d820af7078 100644 --- a/src/model_metric_reporter.hpp +++ b/src/model_metric_reporter.hpp @@ -156,6 +156,9 @@ class ModelMetricReporter : public ServableMetricReporter { class MediapipeServableMetricReporter { MetricRegistry* registry; +protected: + std::vector buckets; + public: std::unique_ptr currentGraphs; @@ -205,6 +208,33 @@ class MediapipeServableMetricReporter { std::unique_ptr requestErrorRestV3Unary; std::unique_ptr requestErrorRestV3Stream; + std::unique_ptr processingTimeGrpcModelInfer; + std::unique_ptr processingTimeGrpcModelInferStream; + std::unique_ptr processingTimeRestModelInfer; + std::unique_ptr processingTimeRestV3Unary; + std::unique_ptr processingTimeRestV3Stream; + + inline MetricHistogram* getProcessingTimeMetric(const ExecutionContext& context) { + if (context.interface == ExecutionContext::Interface::GRPC) { + if (context.method == ExecutionContext::Method::ModelInfer) + return this->processingTimeGrpcModelInfer.get(); + if (context.method == ExecutionContext::Method::ModelInferStream) + return this->processingTimeGrpcModelInferStream.get(); + return nullptr; + } else if (context.interface == ExecutionContext::Interface::REST) { + if (context.method == ExecutionContext::Method::ModelInfer) + return this->processingTimeRestModelInfer.get(); + if (context.method == ExecutionContext::Method::V3Unary) + return this->processingTimeRestV3Unary.get(); + if (context.method == ExecutionContext::Method::V3Stream) + return this->processingTimeRestV3Stream.get(); + return nullptr; + } else { + return nullptr; + } + return nullptr; + } + inline MetricCounter* getRequestsMetric(const ExecutionContext& context, bool success = true) { if (context.interface == ExecutionContext::Interface::GRPC) { if (context.method == ExecutionContext::Method::ModelInfer) diff --git a/src/test/metrics_flow_test.cpp b/src/test/metrics_flow_test.cpp index d358a73b28..9ba74dd26a 100644 --- a/src/test/metrics_flow_test.cpp +++ b/src/test/metrics_flow_test.cpp @@ -395,6 +395,9 @@ TEST_F(MetricFlowTest, GrpcModelInfer) { checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, mpName, "gRPC", "ModelInfer", "KServe", numberOfRejectedRequests); checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, mpName, "gRPC", "ModelInfer", "KServe", numberOfAcceptedRequests); + + EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"gRPC\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests))); + EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"REST\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(0))); #endif EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_REQUEST_TIME + std::string{"_count{interface=\"gRPC\",name=\""} + modelName + std::string{"\",version=\"1\"} "} + std::to_string(numberOfSuccessRequests))); @@ -714,6 +717,9 @@ TEST_F(MetricFlowTest, RestModelInfer) { checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, mpName, "REST", "ModelInfer", "KServe", numberOfRejectedRequests); checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, mpName, "REST", "ModelInfer", "KServe", numberOfAcceptedRequests); + + EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"gRPC\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(0))); + EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"REST\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests))); #endif EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_REQUEST_TIME + std::string{"_count{interface=\"gRPC\",name=\""} + modelName + std::string{"\",version=\"1\"} "} + std::to_string(0))); @@ -835,6 +841,8 @@ TEST_F(MetricFlowTest, RestV3Unary) { checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_ACCEPTED, "dummy_gpt", "REST", "Unary", "V3", numberOfAcceptedRequests * 2); // checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, "dummy_gpt", "REST", "Unary", "V3", numberOfRejectedRequests); checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, "dummy_gpt", "REST", "Unary", "V3", numberOfAcceptedRequests * 2); + + EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"V3\",interface=\"REST\",method=\"Unary\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2))); } #endif @@ -887,6 +895,9 @@ TEST_F(MetricFlowTest, RestV3Stream) { // checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, "dummy_gpt", "REST", "Stream", "V3", numberOfRejectedRequests); const int numberOfMockedChunksPerRequest = 9; // Defined in openai_chat_completions_mock_calculator.cpp checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, "dummy_gpt", "REST", "Stream", "V3", numberOfAcceptedRequests * numberOfMockedChunksPerRequest * 2); + + EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"V3\",interface=\"REST\",method=\"Stream\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2))); + SPDLOG_ERROR(server.collect()); } #endif @@ -993,6 +1004,7 @@ std::string MetricFlowTest::prepareConfigContent() { R"(",")" + METRIC_NAME_REQUESTS_REJECTED + R"(",")" + METRIC_NAME_RESPONSES + R"(",")" + METRIC_NAME_GRAPH_ERROR + + R"(",")" + METRIC_NAME_PROCESSING_TIME + R"("] } }, From 4811c7de8d5ebf41059f9db312cd76ad3b96717e Mon Sep 17 00:00:00 2001 From: bstrzele Date: Mon, 23 Dec 2024 11:20:14 +0000 Subject: [PATCH 2/5] review fix --- src/metric_config.cpp | 2 +- src/model_metric_reporter.cpp | 23 ++++------------------- src/model_metric_reporter.hpp | 25 ++++++++----------------- src/test/metrics_flow_test.cpp | 10 ++++------ 4 files changed, 17 insertions(+), 43 deletions(-) diff --git a/src/metric_config.cpp b/src/metric_config.cpp index 7fe8830e48..200fded329 100644 --- a/src/metric_config.cpp +++ b/src/metric_config.cpp @@ -59,7 +59,7 @@ const std::string METRIC_NAME_REQUESTS_ACCEPTED = "ovms_requests_accepted"; const std::string METRIC_NAME_REQUESTS_REJECTED = "ovms_requests_rejected"; const std::string METRIC_NAME_GRAPH_ERROR = "ovms_graph_error"; -const std::string METRIC_NAME_PROCESSING_TIME = "ovms_processing_time"; +const std::string METRIC_NAME_PROCESSING_TIME = "ovms_graph_processing_time_us"; bool MetricConfig::validateEndpointPath(const std::string& endpoint) { std::regex valid_endpoint_regex("^/[a-zA-Z0-9]*$"); diff --git a/src/model_metric_reporter.cpp b/src/model_metric_reporter.cpp index 34bf93f63d..2b6c34c55a 100644 --- a/src/model_metric_reporter.cpp +++ b/src/model_metric_reporter.cpp @@ -564,38 +564,23 @@ MediapipeServableMetricReporter::MediapipeServableMetricReporter(const MetricCon // KFS this->processingTimeGrpcModelInfer = family->addMetric({{"name", graphName}, - {"api", "KServe"}, - {"method", "ModelInfer"}, - {"interface", "gRPC"}}, + {"method", "ModelInfer"}}, this->buckets); THROW_IF_NULL(this->processingTimeGrpcModelInfer, "cannot create metric"); this->processingTimeGrpcModelInferStream = family->addMetric({{"name", graphName}, - {"api", "KServe"}, - {"method", "ModelInferStream"}, - {"interface", "gRPC"}}, + {"method", "ModelInferStream"}}, this->buckets); THROW_IF_NULL(this->processingTimeGrpcModelInfer, "cannot create metric"); - this->processingTimeRestModelInfer = family->addMetric({{"name", graphName}, - {"api", "KServe"}, - {"method", "ModelInfer"}, - {"interface", "REST"}}, - this->buckets); - THROW_IF_NULL(this->processingTimeRestModelInfer, "cannot create metric"); - // V3 this->processingTimeRestV3Unary = family->addMetric({{"name", graphName}, - {"api", "V3"}, - {"method", "Unary"}, - {"interface", "REST"}}, + {"method", "Unary"}}, this->buckets); THROW_IF_NULL(this->processingTimeRestV3Unary, "cannot create metric"); this->processingTimeRestV3Stream = family->addMetric({{"name", graphName}, - {"api", "V3"}, - {"method", "Stream"}, - {"interface", "REST"}}, + {"method", "Stream"}}, this->buckets); THROW_IF_NULL(this->processingTimeRestV3Stream, "cannot create metric"); } diff --git a/src/model_metric_reporter.hpp b/src/model_metric_reporter.hpp index d820af7078..4146431f32 100644 --- a/src/model_metric_reporter.hpp +++ b/src/model_metric_reporter.hpp @@ -215,23 +215,14 @@ class MediapipeServableMetricReporter { std::unique_ptr processingTimeRestV3Stream; inline MetricHistogram* getProcessingTimeMetric(const ExecutionContext& context) { - if (context.interface == ExecutionContext::Interface::GRPC) { - if (context.method == ExecutionContext::Method::ModelInfer) - return this->processingTimeGrpcModelInfer.get(); - if (context.method == ExecutionContext::Method::ModelInferStream) - return this->processingTimeGrpcModelInferStream.get(); - return nullptr; - } else if (context.interface == ExecutionContext::Interface::REST) { - if (context.method == ExecutionContext::Method::ModelInfer) - return this->processingTimeRestModelInfer.get(); - if (context.method == ExecutionContext::Method::V3Unary) - return this->processingTimeRestV3Unary.get(); - if (context.method == ExecutionContext::Method::V3Stream) - return this->processingTimeRestV3Stream.get(); - return nullptr; - } else { - return nullptr; - } + if (context.method == ExecutionContext::Method::ModelInfer) + return this->processingTimeGrpcModelInfer.get(); + if (context.method == ExecutionContext::Method::ModelInferStream) + return this->processingTimeGrpcModelInferStream.get(); + if (context.method == ExecutionContext::Method::V3Unary) + return this->processingTimeRestV3Unary.get(); + if (context.method == ExecutionContext::Method::V3Stream) + return this->processingTimeRestV3Stream.get(); return nullptr; } diff --git a/src/test/metrics_flow_test.cpp b/src/test/metrics_flow_test.cpp index 9ba74dd26a..d392e40e2e 100644 --- a/src/test/metrics_flow_test.cpp +++ b/src/test/metrics_flow_test.cpp @@ -396,8 +396,7 @@ TEST_F(MetricFlowTest, GrpcModelInfer) { checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, mpName, "gRPC", "ModelInfer", "KServe", numberOfAcceptedRequests); - EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"gRPC\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests))); - EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"REST\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(0))); + EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests))); #endif EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_REQUEST_TIME + std::string{"_count{interface=\"gRPC\",name=\""} + modelName + std::string{"\",version=\"1\"} "} + std::to_string(numberOfSuccessRequests))); @@ -718,8 +717,7 @@ TEST_F(MetricFlowTest, RestModelInfer) { checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, mpName, "REST", "ModelInfer", "KServe", numberOfAcceptedRequests); - EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"gRPC\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(0))); - EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"KServe\",interface=\"REST\",method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests))); + EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{method=\"ModelInfer\",name=\""} + mpName + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests))); #endif EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_REQUEST_TIME + std::string{"_count{interface=\"gRPC\",name=\""} + modelName + std::string{"\",version=\"1\"} "} + std::to_string(0))); @@ -842,7 +840,7 @@ TEST_F(MetricFlowTest, RestV3Unary) { // checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, "dummy_gpt", "REST", "Unary", "V3", numberOfRejectedRequests); checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, "dummy_gpt", "REST", "Unary", "V3", numberOfAcceptedRequests * 2); - EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"V3\",interface=\"REST\",method=\"Unary\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2))); + EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{method=\"Unary\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2))); } #endif @@ -896,7 +894,7 @@ TEST_F(MetricFlowTest, RestV3Stream) { const int numberOfMockedChunksPerRequest = 9; // Defined in openai_chat_completions_mock_calculator.cpp checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, "dummy_gpt", "REST", "Stream", "V3", numberOfAcceptedRequests * numberOfMockedChunksPerRequest * 2); - EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{api=\"V3\",interface=\"REST\",method=\"Stream\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2))); + EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{method=\"Stream\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2))); SPDLOG_ERROR(server.collect()); } From e80f8d0ca40da85953063a9e4b4ad34857ed90f4 Mon Sep 17 00:00:00 2001 From: bstrzele Date: Thu, 2 Jan 2025 13:38:55 +0000 Subject: [PATCH 3/5] documentation --- docs/metrics.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/metrics.md b/docs/metrics.md index 99bcc10c5c..8a6e2b3b04 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -241,6 +241,7 @@ For [MediaPipe Graphs](./mediapipe.md) execution there are 4 generic metrics whi | counter | ovms_responses | Useful to track number of packets generated by MediaPipe graph. Keep in mind that single request may trigger production of multiple (or zero) packets, therefore tracking number of responses is complementary to tracking accepted requests. For example tracking streaming partial responses of LLM text generation graphs. | | gauge | ovms_current_graphs | Number of graphs currently in-process. For unary communication it is equal to number of currently processing requests (each request initializes separate MediaPipe graph). For streaming communication it is equal to number of active client connections. Each connection is able to reuse the graph and decide when to delete it when the connection is closed. | | counter | ovms_graph_error | Counts errors in MediaPipe graph execution phase. For example V3 LLM text generation fails in LLMCalculator due to missing prompt - calculator returns an error and graph cancels. | +| histogram | ovms_graph_processing_time_us | Time for which mediapipe graph was opened. | Exposing custom metrics in calculator implementations (MediaPipe graph nodes) is not supported yet. From 2455f222a03596d590cbf0827b5d100fd3a7a2d3 Mon Sep 17 00:00:00 2001 From: bstrzele Date: Fri, 10 Jan 2025 10:27:47 +0000 Subject: [PATCH 4/5] fix --- docs/metrics.md | 2 +- src/mediapipe_internal/mediapipegraphexecutor.hpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/metrics.md b/docs/metrics.md index 8a6e2b3b04..162501be8d 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -241,7 +241,7 @@ For [MediaPipe Graphs](./mediapipe.md) execution there are 4 generic metrics whi | counter | ovms_responses | Useful to track number of packets generated by MediaPipe graph. Keep in mind that single request may trigger production of multiple (or zero) packets, therefore tracking number of responses is complementary to tracking accepted requests. For example tracking streaming partial responses of LLM text generation graphs. | | gauge | ovms_current_graphs | Number of graphs currently in-process. For unary communication it is equal to number of currently processing requests (each request initializes separate MediaPipe graph). For streaming communication it is equal to number of active client connections. Each connection is able to reuse the graph and decide when to delete it when the connection is closed. | | counter | ovms_graph_error | Counts errors in MediaPipe graph execution phase. For example V3 LLM text generation fails in LLMCalculator due to missing prompt - calculator returns an error and graph cancels. | -| histogram | ovms_graph_processing_time_us | Time for which mediapipe graph was opened. | +| histogram | ovms_graph_processing_time_us | Time for which mediapipe graph was opened and been successfully closed. | Exposing custom metrics in calculator implementations (MediaPipe graph nodes) is not supported yet. diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp index def898599e..e3466dc380 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.hpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp @@ -203,13 +203,13 @@ class MediapipeGraphExecutor { INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext)); } MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code())); - timer.stop(PROCESS); - double processTime = timer.template elapsed(PROCESS); - OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime); if (outputPollers.size() != outputPollersWithReceivedPacket.size()) { SPDLOG_DEBUG("Mediapipe failed to execute. Failed to receive all output packets"); return Status(StatusCode::MEDIAPIPE_EXECUTION_ERROR, "Unknown error during mediapipe execution"); } + timer.stop(PROCESS); + double processTime = timer.template elapsed(PROCESS); + OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime); INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getResponsesMetric(executionContext)); SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name); return StatusCode::OK; From f6c43b3bd0f8b43cc196181b22b18ee788aa6468 Mon Sep 17 00:00:00 2001 From: bstrzele Date: Fri, 10 Jan 2025 10:33:17 +0000 Subject: [PATCH 5/5] fix wording --- docs/metrics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/metrics.md b/docs/metrics.md index 162501be8d..143604c6b8 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -241,7 +241,7 @@ For [MediaPipe Graphs](./mediapipe.md) execution there are 4 generic metrics whi | counter | ovms_responses | Useful to track number of packets generated by MediaPipe graph. Keep in mind that single request may trigger production of multiple (or zero) packets, therefore tracking number of responses is complementary to tracking accepted requests. For example tracking streaming partial responses of LLM text generation graphs. | | gauge | ovms_current_graphs | Number of graphs currently in-process. For unary communication it is equal to number of currently processing requests (each request initializes separate MediaPipe graph). For streaming communication it is equal to number of active client connections. Each connection is able to reuse the graph and decide when to delete it when the connection is closed. | | counter | ovms_graph_error | Counts errors in MediaPipe graph execution phase. For example V3 LLM text generation fails in LLMCalculator due to missing prompt - calculator returns an error and graph cancels. | -| histogram | ovms_graph_processing_time_us | Time for which mediapipe graph was opened and been successfully closed. | +| histogram | ovms_graph_processing_time_us | Time for which mediapipe graph was opened and has been successfully closed. | Exposing custom metrics in calculator implementations (MediaPipe graph nodes) is not supported yet.