Skip to content

Commit

Permalink
[V1][Metrics] Hook up IterationStats for Prometheus metrics (vllm-pro…
Browse files Browse the repository at this point in the history
…ject#12478)

Signed-off-by: Mark McLoughlin <[email protected]>
  • Loading branch information
markmc authored and NickLucche committed Feb 7, 2025
1 parent aa176e8 commit 3f70c88
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 12 deletions.
7 changes: 5 additions & 2 deletions tests/entrypoints/openai/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,6 @@ async def client(server):
@pytest.mark.asyncio
async def test_metrics_counts(server: RemoteOpenAIServer,
client: openai.AsyncClient, use_v1: bool):
if use_v1:
pytest.skip("Skipping test on vllm V1")
for _ in range(_NUM_REQUESTS):
# sending a request triggers the metrics to be logged.
await client.completions.create(
Expand All @@ -120,6 +118,9 @@ async def test_metrics_counts(server: RemoteOpenAIServer,

# Loop over all expected metric_families
for metric_family, suffix_values_list in EXPECTED_VALUES.items():
if use_v1 and metric_family not in EXPECTED_METRICS_V1:
continue

found_metric = False

# Check to see if the metric_family is found in the prom endpoint.
Expand Down Expand Up @@ -199,6 +200,8 @@ async def test_metrics_counts(server: RemoteOpenAIServer,
EXPECTED_METRICS_V1 = [
"vllm:num_requests_running",
"vllm:num_requests_waiting",
"vllm:prompt_tokens_total",
"vllm:generation_tokens_total",
]


Expand Down
3 changes: 2 additions & 1 deletion vllm/v1/engine/async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,8 @@ def _log_stats(
return

for logger in self.stat_loggers:
logger.log(scheduler_stats=scheduler_stats)
logger.log(scheduler_stats=scheduler_stats,
iteration_stats=iteration_stats)

def encode(
self,
Expand Down
68 changes: 59 additions & 9 deletions vllm/v1/metrics/loggers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import time
from abc import ABC, abstractmethod
from typing import Dict
from typing import Dict, List

import numpy as np
import prometheus_client

from vllm.logger import init_logger
from vllm.v1.metrics.stats import SchedulerStats
from vllm.v1.metrics.stats import IterationStats, SchedulerStats

logger = init_logger(__name__)

Expand All @@ -15,27 +16,61 @@
class StatLoggerBase(ABC):

@abstractmethod
def log(self, scheduler_stats: SchedulerStats):
def log(self, scheduler_stats: SchedulerStats,
iteration_stats: IterationStats):
...


class LoggingStatLogger(StatLoggerBase):

def __init__(self):
self.last_log_time = time.monotonic()
self._reset(time.monotonic())

def log(self, scheduler_stats: SchedulerStats):
"""Log Stats to standard output."""
def _reset(self, now):
self.last_log_time = now

# Tracked stats over current local logging interval.
self.num_prompt_tokens: List[int] = []
self.num_generation_tokens: List[int] = []

def _local_interval_elapsed(self, now: float) -> bool:
# Log every _LOCAL_LOGGING_INTERVAL_SEC.
elapsed_time = now - self.last_log_time
return elapsed_time > _LOCAL_LOGGING_INTERVAL_SEC

def _track_iteration_stats(self, iteration_stats: IterationStats):
# Save tracked stats for token counters.
self.num_prompt_tokens.append(iteration_stats.num_prompt_tokens)
self.num_generation_tokens.append(
iteration_stats.num_generation_tokens)

def _get_throughput(self, tracked_stats: List[int], now: float) -> float:
# Compute summary metrics for tracked stats
return float(np.sum(tracked_stats) / (now - self.last_log_time))

def log(self, scheduler_stats: SchedulerStats,
iteration_stats: IterationStats):
"""Log Stats to standard output."""

self._track_iteration_stats(iteration_stats)

now = time.monotonic()
if now - self.last_log_time < _LOCAL_LOGGING_INTERVAL_SEC:
if not self._local_interval_elapsed(now):
return
self.last_log_time = now

prompt_throughput = self._get_throughput(self.num_prompt_tokens, now)
generation_throughput = self._get_throughput(
self.num_generation_tokens, now)

self._reset(now)

# Format and print output.
logger.info(
"Avg prompt throughput: %.1f tokens/s, "
"Avg generation throughput: %.1f tokens/s, "
"Running: %d reqs, Waiting: %d reqs ",
prompt_throughput,
generation_throughput,
scheduler_stats.num_running_reqs,
scheduler_stats.num_waiting_reqs,
)
Expand All @@ -61,11 +96,26 @@ def __init__(self, labels: Dict[str, str]):
documentation="Number of requests waiting to be processed.",
labelnames=labelnames).labels(*labelvalues)

def log(self, scheduler_stats: SchedulerStats):
self.counter_prompt_tokens = prometheus_client.Counter(
name="vllm:prompt_tokens_total",
documentation="Number of prefill tokens processed.",
labelnames=labelnames).labels(*labelvalues)

self.counter_generation_tokens = prometheus_client.Counter(
name="vllm:generation_tokens_total",
documentation="Number of generation tokens processed.",
labelnames=labelnames).labels(*labelvalues)

def log(self, scheduler_stats: SchedulerStats,
iteration_stats: IterationStats):
"""Log to prometheus."""
self.gauge_scheduler_running.set(scheduler_stats.num_running_reqs)
self.gauge_scheduler_waiting.set(scheduler_stats.num_waiting_reqs)

self.counter_prompt_tokens.inc(iteration_stats.num_prompt_tokens)
self.counter_generation_tokens.inc(
iteration_stats.num_generation_tokens)

@staticmethod
def _unregister_vllm_metrics():
# Unregister any existing vLLM collectors (for CI/CD
Expand Down

0 comments on commit 3f70c88

Please sign in to comment.