We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent a8d59cc commit 1c08e34Copy full SHA for 1c08e34
shortfin/python/shortfin_apps/llm/components/kvcache/base_attention_cache.py
@@ -53,7 +53,8 @@ def acquire_pages_for_tokens(
53
54
No token at idx < n_cached_token should be written to. TODO: consider enforcing this.
55
"""
56
- pages_needed = math.ceil(len(tokens) + extra_token_slots / self.tokens_per_page)
+ token_count = len(tokens)
57
+ pages_needed = math.ceil(token_count / self.tokens_per_page)
58
pages = self.page_pool.acquire_free_pages(pages_needed)
59
60
n_cached_tokens = 0
0 commit comments