From c59abb5305b21db96a4ed26a472d2b07b54382de Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Sat, 22 Feb 2025 19:04:12 +0100
Subject: [PATCH] test accumulation

---
 tests/unit/test_llm.py | 48 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py
index f94e98676ff4..0b35e45ee576 100644
--- a/tests/unit/test_llm.py
+++ b/tests/unit/test_llm.py
@@ -435,7 +435,7 @@ def test_get_token_count_error_handling(
 def test_llm_token_usage(mock_litellm_completion, default_config):
     # This mock response includes usage details with prompt_tokens,
     # completion_tokens, prompt_tokens_details.cached_tokens, and model_extra.cache_creation_input_tokens
-    mock_response = {
+    mock_response_1 = {
         'id': 'test-response-usage',
         'choices': [{'message': {'content': 'Usage test response'}}],
         'usage': {
@@ -445,12 +445,28 @@ def test_llm_token_usage(mock_litellm_completion, default_config):
             'model_extra': {'cache_creation_input_tokens': 5},
         },
     }
-    mock_litellm_completion.return_value = mock_response
+
+    # Create a second usage scenario to test accumulation and a different response_id
+    mock_response_2 = {
+        'id': 'test-response-usage-2',
+        'choices': [{'message': {'content': 'Second usage test response'}}],
+        'usage': {
+            'prompt_tokens': 7,
+            'completion_tokens': 2,
+            'prompt_tokens_details': {'cached_tokens': 1},
+            'model_extra': {'cache_creation_input_tokens': 3},
+        },
+    }
+
+    # We'll make mock_litellm_completion return these responses in sequence
+    mock_litellm_completion.side_effect = [mock_response_1, mock_response_2]
 
     llm = LLM(config=default_config)
+
+    # First call: usage_1
     _ = llm.completion(messages=[{'role': 'user', 'content': 'Hello usage!'}])
 
-    # Check that the metrics tracked these tokens
+    # Check that the metrics tracked these tokens for the first response
     assert llm.metrics.get()['accumulated_prompt_tokens'] == 12
     assert llm.metrics.get()['accumulated_completion_tokens'] == 3
     assert llm.metrics.get()['accumulated_cache_read_tokens'] == 2
@@ -464,3 +480,29 @@ def test_llm_token_usage(mock_litellm_completion, default_config):
     assert usage_entry['completion_tokens'] == 3
     assert usage_entry['cache_read_tokens'] == 2
     assert usage_entry['cache_write_tokens'] == 5
+    # Check the response_id
+    assert usage_entry['response_id'] == 'test-response-usage'
+
+    # Second call: usage_2
+    _ = llm.completion(messages=[{'role': 'user', 'content': 'Hello again!'}])
+
+    # Now check accumulated totals
+    metrics_dict = llm.metrics.get()
+    # Prompt tokens = 12 + 7 = 19
+    assert metrics_dict['accumulated_prompt_tokens'] == 19
+    # Completion tokens = 3 + 2 = 5
+    assert metrics_dict['accumulated_completion_tokens'] == 5
+    # Cache read = 2 + 1 = 3
+    assert metrics_dict['accumulated_cache_read_tokens'] == 3
+    # Cache write = 5 + 3 = 8
+    assert metrics_dict['accumulated_cache_write_tokens'] == 8
+
+    # Also verify we have two usage records now
+    tokens_usage_list = metrics_dict['tokens_usages']
+    assert len(tokens_usage_list) == 2
+    latest_entry = tokens_usage_list[-1]
+    assert latest_entry['prompt_tokens'] == 7
+    assert latest_entry['completion_tokens'] == 2
+    assert latest_entry['cache_read_tokens'] == 1
+    assert latest_entry['cache_write_tokens'] == 3
+    assert latest_entry['response_id'] == 'test-response-usage-2'