Skip to content

Commit

Permalink
minor
Browse files Browse the repository at this point in the history
Signed-off-by: Cody Yu <[email protected]>
  • Loading branch information
comaniac committed Feb 7, 2025
1 parent 7e5d784 commit 56a73c8
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 9 deletions.
4 changes: 2 additions & 2 deletions vllm/v1/core/kv_cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ def usage(self) -> float:
self.num_gpu_blocks)

def make_prefix_cache_stats(self) -> PrefixCacheStats:
"""Get (and reset) the prefix cache query and hit counts.
"""Get (and reset) the prefix cache stats.
Returns:
The prefix caching stats - query count, and hit count.
The current prefix caching stats.
"""
stats = self.prefix_cache_stats
self.prefix_cache_stats = PrefixCacheStats()
Expand Down
13 changes: 7 additions & 6 deletions vllm/v1/core/kv_cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class PrefixCachingMetrics:

def __init__(self, interval: int = 1000):
self.interval = interval
# The current aggregated query total and hit.
# The current aggregated values.
self.aggregated_requests = 0
self.aggregated_query_total = 0
self.aggregated_query_hit = 0
Expand All @@ -56,20 +56,21 @@ def observe(self, stats: PrefixCacheStats):
When there are more than `interval` requests, the oldest set of
requestsare removed from the metrics.
Stats:
reset: Whether reset_prefix_cache was invoked.
requests: The number of requests in this update.
queries: The number of queries in these requests.
hits: The number of hits in these requests.
Args:
stats: The prefix cache stats.
"""
# reset_prefix_cache was invoked before the current update.
# Reset the metrics before aggregating the current stats.
if stats.reset:
self.reset()

# Update the metrics.
self.query_queue.append((stats.requests, stats.queries, stats.hits))
self.aggregated_requests += stats.requests
self.aggregated_query_total += stats.queries
self.aggregated_query_hit += stats.hits

# Remove the oldest stats if the number of requests exceeds.
if self.aggregated_requests > self.interval:
old_requests, old_queries, old_hits = self.query_queue.popleft()
self.aggregated_requests -= old_requests
Expand Down
6 changes: 5 additions & 1 deletion vllm/v1/metrics/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,14 @@
@dataclass
class PrefixCacheStats:
"""Stores prefix cache hit statistics."""
# Whether reset_prefix_cache was invoked.
reset: bool = False
# The number of requests in this update.
requests: int = 0
hits: int = 0
# The number of queries in these requests.
queries: int = 0
# The number of hits in these requests.
hits: int = 0


@dataclass
Expand Down

0 comments on commit 56a73c8

Please sign in to comment.