diff --git a/council/contexts/_budget.py b/council/contexts/_budget.py
index 49285a42..0b9b9104 100644
--- a/council/contexts/_budget.py
+++ b/council/contexts/_budget.py
@@ -13,16 +13,6 @@ class BudgetExpiredException(Exception):
 class Consumption:
     """
     A class representing a consumption measurement with value, unit, and kind information.
-
-    Attributes:
-        _value (float): The numeric value of the consumption measurement.
-        _unit (str): The unit of measurement for the consumption (e.g., tokens, api_calls, etc.).
-        _kind (str): The kind or category of the consumption.
-
-    Methods:
-        __init__(value: float, unit: str, kind: str):
-            Initializes a Consumption instance with the provided value, unit, and kind.
-
     """
 
     def __init__(self, value: float, unit: str, kind: str) -> None:
@@ -41,14 +31,17 @@ def __init__(self, value: float, unit: str, kind: str) -> None:
 
     @property
     def value(self) -> float:
+        """The numeric value of the consumption measurement."""
         return self._value
 
     @property
     def unit(self) -> str:
+        """The unit of measurement for the consumption (e.g., tokens, api_calls, etc.)."""
         return self._unit
 
     @property
     def kind(self) -> str:
+        """The kind or category of the consumption."""
         return self._kind
 
     def __str__(self) -> str:
@@ -83,14 +76,22 @@ def to_dict(self) -> Dict[str, Any]:
 
     @staticmethod
     def call(value: int, kind: str) -> Consumption:
+        """Returns a Consumption instance with "call" unit."""
         return Consumption(value, "call", kind)
 
+    @staticmethod
+    def duration(value: float, kind: str) -> Consumption:
+        """Returns a Consumption instance with "second" unit."""
+        return Consumption(value, "second", kind)
+
     @staticmethod
     def token(value: int, kind: str) -> Consumption:
+        """Returns a Consumption instance with "token" unit."""
         return Consumption(value, "token", kind)
 
     @staticmethod
     def cost(value: float, kind: str) -> Consumption:
+        """Returns a Consumption instance with "USD" unit."""
         return Consumption(value, "USD", kind)
 
 
diff --git a/council/llm/__init__.py b/council/llm/__init__.py
index 9709460a..a213c12d 100644
--- a/council/llm/__init__.py
+++ b/council/llm/__init__.py
@@ -9,7 +9,7 @@
 from .llm_exception import LLMException, LLMCallException, LLMCallTimeoutException, LLMTokenLimitException
 from .llm_message import LLMMessageRole, LLMMessage, LLMMessageTokenCounterBase
 from .llm_base import LLMBase, LLMResult, LLMConfigurationBase
-from .llm_cost import LLMCostCard, LLMConsumptionCalculatorBase
+from .llm_cost import LLMCostCard, LLMConsumptionCalculatorBase, TokenKind, LLMCostManagerSpec, LLMCostManagerObject
 from .llm_fallback import LLMFallback
 from .llm_middleware import (
     LLMRequest,
diff --git a/council/llm/anthropic.py b/council/llm/anthropic.py
index 726311a4..572eccf7 100644
--- a/council/llm/anthropic.py
+++ b/council/llm/anthropic.py
@@ -8,15 +8,49 @@
 from council.llm import LLMMessage
 
 
+class Usage:
+    """Represents token usage statistics for an Anthropic API request."""
+
+    def __init__(
+        self,
+        prompt_tokens: int,
+        completion_tokens: int,
+        cache_creation_prompt_tokens: int,
+        cache_read_prompt_tokens: int,
+    ):
+        self.prompt_tokens = prompt_tokens
+        self.completion_tokens = completion_tokens
+        self.cache_creation_prompt_tokens = cache_creation_prompt_tokens
+        self.cache_read_prompt_tokens = cache_read_prompt_tokens
+        self.total_tokens = cache_creation_prompt_tokens + cache_read_prompt_tokens + prompt_tokens + completion_tokens
+
+    @staticmethod
+    def from_dict(values: Dict[str, int]) -> Usage:
+        prompt_tokens = values["input_tokens"]
+        completion_tokens = values["output_tokens"]
+        cache_creation_prompt_tokens = values.get("cache_creation_input_tokens", 0)
+        cache_read_prompt_tokens = values.get("cache_read_input_tokens", 0)
+        return Usage(prompt_tokens, completion_tokens, cache_creation_prompt_tokens, cache_read_prompt_tokens)
+
+    @staticmethod
+    def empty() -> Usage:
+        return Usage(0, 0, 0, 0)
+
+
 class AnthropicAPIClientResult:
-    def __init__(self, choices: List[str], raw_response: Optional[Dict[str, Any]] = None) -> None:
+    def __init__(self, choices: List[str], usage: Usage, raw_response: Optional[Dict[str, Any]] = None) -> None:
         self._choices = choices
+        self._usage = usage
         self._raw_response = raw_response
 
     @property
     def choices(self) -> List[str]:
         return self._choices
 
+    @property
+    def usage(self) -> Usage:
+        return self._usage
+
     @property
     def raw_response(self) -> Optional[Dict[str, Any]]:
         return self._raw_response
@@ -24,7 +58,7 @@ def raw_response(self) -> Optional[Dict[str, Any]]:
     @staticmethod
     def from_completion(result: Completion) -> AnthropicAPIClientResult:
         """For legacy completion API"""
-        return AnthropicAPIClientResult(choices=[result.completion])
+        return AnthropicAPIClientResult(choices=[result.completion], usage=Usage.empty())
 
 
 class AnthropicAPIClientWrapper(ABC):
diff --git a/council/llm/anthropic_llm.py b/council/llm/anthropic_llm.py
index 8b0e695e..e89f9eb6 100644
--- a/council/llm/anthropic_llm.py
+++ b/council/llm/anthropic_llm.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any, Dict, List, Mapping, Optional, Sequence
+from typing import Any, List, Mapping, Optional, Sequence
 
 from anthropic import Anthropic, APIStatusError, APITimeoutError
 from council.contexts import Consumption, LLMContext
@@ -12,13 +12,16 @@
     LLMConfigObject,
     LLMConsumptionCalculatorBase,
     LLMCostCard,
+    LLMCostManagerObject,
     LLMMessage,
     LLMMessageTokenCounterBase,
     LLMProviders,
     LLMResult,
+    TokenKind,
 )
+from council.utils.utils import DurationManager
 
-from .anthropic import AnthropicAPIClientWrapper
+from .anthropic import AnthropicAPIClientWrapper, Usage
 from .anthropic_completion_llm import AnthropicCompletionLLM
 from .anthropic_messages_llm import AnthropicMessagesLLM
 
@@ -35,22 +38,9 @@ def count_messages_token(self, messages: Sequence[LLMMessage]) -> int:
 
 
 class AnthropicConsumptionCalculator(LLMConsumptionCalculatorBase):
-    # https://www.anthropic.com/pricing#anthropic-api
-    COSTS: Mapping[str, LLMCostCard] = {
-        "claude-3-haiku-20240307": LLMCostCard(input=0.25, output=1.25),
-        "claude-3-sonnet-20240229": LLMCostCard(input=3.00, output=15.00),
-        "claude-3-5-sonnet-20240620": LLMCostCard(input=3.00, output=15.00),
-        "claude-3-5-sonnet-20241022": LLMCostCard(input=3.00, output=15.00),
-        "claude-3-opus-20240229": LLMCostCard(input=15.00, output=75.00),
-    }
-
-    # input - cache write; output - cache read; note - not all model support prompt caching
-    COSTS_CACHING: Mapping[str, LLMCostCard] = {
-        "claude-3-haiku-20240307": LLMCostCard(input=0.30, output=0.03),
-        "claude-3-5-sonnet-20240620": LLMCostCard(input=3.75, output=0.30),
-        "claude-3-5-sonnet-20241022": LLMCostCard(input=3.75, output=0.30),
-        "claude-3-opus-20240229": LLMCostCard(input=18.75, output=1.50),
-    }
+    _cost_manager = LLMCostManagerObject.anthropic()
+    COSTS: Mapping[str, LLMCostCard] = _cost_manager.get_cost_map("default")
+    COSTS_CACHING: Mapping[str, LLMCostCard] = _cost_manager.get_cost_map("caching")
 
     def find_model_costs(self) -> Optional[LLMCostCard]:
         return self.COSTS.get(self.model)
@@ -58,45 +48,42 @@ def find_model_costs(self) -> Optional[LLMCostCard]:
     def find_caching_costs(self) -> Optional[LLMCostCard]:
         return self.COSTS_CACHING.get(self.model)
 
-    def get_cache_consumptions(self, usage: Dict[str, int]) -> List[Consumption]:
+    def get_anthropic_consumptions(self, duration: float, usage: Usage) -> List[Consumption]:
         """
         Get consumptions specific for Anthropic prompt caching:
             - 1 call
+            - specified duration
             - cache_creation_prompt, cache_read_prompt, prompt, completion and total tokens
             - costs if both regular and caching LLMCostCards can be found
         """
-        consumptions = self.get_cache_token_consumptions(usage) + self.get_cache_cost_consumptions(usage)
-        return self.filter_zeros(consumptions)  # could occur for cache tokens
 
-    def get_cache_token_consumptions(self, usage: Dict[str, int]) -> List[Consumption]:
-        total = sum(
-            [
-                usage["cache_creation_prompt_tokens"],
-                usage["cache_read_prompt_tokens"],
-                usage["prompt_tokens"],
-                usage["completion_tokens"],
-            ]
+        consumptions = self.get_anthropic_base_consumptions(duration, usage) + self.get_anthropic_cost_consumptions(
+            usage
         )
+        return self.filter_zeros(consumptions)  # could occur for cache tokens
+
+    def get_anthropic_base_consumptions(self, duration: float, usage: Usage) -> List[Consumption]:
         return [
             Consumption.call(1, self.model),
-            Consumption.token(usage["cache_creation_prompt_tokens"], self.format_kind("cache_creation_prompt")),
-            Consumption.token(usage["cache_read_prompt_tokens"], self.format_kind("cache_read_prompt")),
-            Consumption.token(usage["prompt_tokens"], self.format_kind("prompt")),
-            Consumption.token(usage["completion_tokens"], self.format_kind("completion")),
-            Consumption.token(total, self.format_kind("total")),
+            Consumption.duration(duration, self.model),
+            Consumption.token(usage.cache_creation_prompt_tokens, self.format_kind(TokenKind.cache_creation_prompt)),
+            Consumption.token(usage.cache_read_prompt_tokens, self.format_kind(TokenKind.cache_read_prompt)),
+            Consumption.token(usage.prompt_tokens, self.format_kind(TokenKind.prompt)),
+            Consumption.token(usage.completion_tokens, self.format_kind(TokenKind.completion)),
+            Consumption.token(usage.total_tokens, self.format_kind(TokenKind.total)),
         ]
 
-    def get_cache_cost_consumptions(self, usage: Dict[str, int]) -> List[Consumption]:
+    def get_anthropic_cost_consumptions(self, usage: Usage) -> List[Consumption]:
         cost_card = self.find_model_costs()
         caching_cost_card = self.find_caching_costs()
 
         if cost_card is None or caching_cost_card is None:
             return []
 
-        prompt_tokens_cost = cost_card.input_cost(usage["prompt_tokens"])
-        completion_tokens_cost = cost_card.output_cost(usage["completion_tokens"])
-        cache_creation_prompt_tokens_cost = caching_cost_card.input_cost(usage["cache_creation_prompt_tokens"])
-        cache_read_prompt_tokens_cost = caching_cost_card.output_cost(usage["cache_read_prompt_tokens"])
+        prompt_tokens_cost = cost_card.input_cost(usage.prompt_tokens)
+        completion_tokens_cost = cost_card.output_cost(usage.completion_tokens)
+        cache_creation_prompt_tokens_cost = caching_cost_card.input_cost(usage.cache_creation_prompt_tokens)
+        cache_read_prompt_tokens_cost = caching_cost_card.output_cost(usage.cache_read_prompt_tokens)
 
         total_cost = sum(
             [
@@ -108,11 +95,13 @@ def get_cache_cost_consumptions(self, usage: Dict[str, int]) -> List[Consumption
         )
 
         return [
-            Consumption.cost(cache_creation_prompt_tokens_cost, self.format_kind("cache_creation_prompt", cost=True)),
-            Consumption.cost(cache_read_prompt_tokens_cost, self.format_kind("cache_read_prompt", cost=True)),
-            Consumption.cost(prompt_tokens_cost, self.format_kind("prompt", cost=True)),
-            Consumption.cost(completion_tokens_cost, self.format_kind("completion", cost=True)),
-            Consumption.cost(total_cost, self.format_kind("total", cost=True)),
+            Consumption.cost(
+                cache_creation_prompt_tokens_cost, self.format_kind(TokenKind.cache_creation_prompt, cost=True)
+            ),
+            Consumption.cost(cache_read_prompt_tokens_cost, self.format_kind(TokenKind.cache_read_prompt, cost=True)),
+            Consumption.cost(prompt_tokens_cost, self.format_kind(TokenKind.prompt, cost=True)),
+            Consumption.cost(completion_tokens_cost, self.format_kind(TokenKind.completion, cost=True)),
+            Consumption.cost(total_cost, self.format_kind(TokenKind.total, cost=True)),
         ]
 
 
@@ -130,11 +119,11 @@ def __init__(self, config: AnthropicLLMConfiguration, name: Optional[str] = None
 
     def _post_chat_request(self, context: LLMContext, messages: Sequence[LLMMessage], **kwargs: Any) -> LLMResult:
         try:
-            response = self._api.post_chat_request(messages=messages)
-            usage = response.raw_response["usage"] if response.raw_response is not None else {}
+            with DurationManager() as timer:
+                response = self._api.post_chat_request(messages=messages)
             return LLMResult(
                 choices=response.choices,
-                consumptions=self.to_consumptions(usage),
+                consumptions=self.to_consumptions(timer.duration, response.usage),
                 raw_response=response.raw_response,
             )
         except APITimeoutError as e:
@@ -142,16 +131,10 @@ def _post_chat_request(self, context: LLMContext, messages: Sequence[LLMMessage]
         except APIStatusError as e:
             raise LLMCallException(code=e.status_code, error=e.message, llm_name=self._name) from e
 
-    def to_consumptions(self, usage: Dict[str, int]) -> Sequence[Consumption]:
-        if "input_tokens" not in usage or "output_tokens" not in usage:
-            return []
-
+    def to_consumptions(self, duration: float, usage: Usage) -> Sequence[Consumption]:
         model = self._configuration.model_name()
         consumption_calculator = AnthropicConsumptionCalculator(model)
-        if "cache_creation_input_tokens" in usage:
-            return consumption_calculator.get_cache_consumptions(usage)
-
-        return consumption_calculator.get_consumptions(usage["input_tokens"], usage["output_tokens"])
+        return consumption_calculator.get_anthropic_consumptions(duration, usage)
 
     def _get_api_wrapper(self) -> AnthropicAPIClientWrapper:
         if self._configuration is not None and self._configuration.model_name() == "claude-2":
diff --git a/council/llm/anthropic_messages_llm.py b/council/llm/anthropic_messages_llm.py
index df2055e7..cc534155 100644
--- a/council/llm/anthropic_messages_llm.py
+++ b/council/llm/anthropic_messages_llm.py
@@ -6,7 +6,7 @@
 from anthropic._types import NOT_GIVEN
 from anthropic.types import MessageParam, TextBlock
 from council.llm import AnthropicLLMConfiguration, LLMMessage, LLMMessageRole
-from council.llm.anthropic import AnthropicAPIClientResult, AnthropicAPIClientWrapper
+from council.llm.anthropic import AnthropicAPIClientResult, AnthropicAPIClientWrapper, Usage
 from council.llm.llm_message import LLMCacheControlData
 
 
@@ -44,7 +44,9 @@ def post_chat_request(self, messages: Sequence[LLMMessage]) -> AnthropicAPIClien
         )
         choices = [content.text for content in completion.content if isinstance(content, TextBlock)]
 
-        return AnthropicAPIClientResult(choices=choices, raw_response=completion.to_dict())
+        return AnthropicAPIClientResult(
+            choices=choices, usage=Usage.from_dict(completion.usage.to_dict()), raw_response=completion.to_dict()
+        )
 
     @staticmethod
     def _to_anthropic_system_messages(messages: Sequence[LLMMessage]) -> Dict[str, List[Dict[str, Any]]]:
diff --git a/council/llm/data/anthropic-costs.yaml b/council/llm/data/anthropic-costs.yaml
new file mode 100644
index 00000000..89e85746
--- /dev/null
+++ b/council/llm/data/anthropic-costs.yaml
@@ -0,0 +1,44 @@
+kind: LLMCostManager
+version: 0.1
+metadata:
+  name: anthropic-costs
+  labels:
+    provider: Anthropic
+    reference: https://www.anthropic.com/pricing#anthropic-api
+spec:
+  default:
+    description: |
+      Default model costs
+    models:
+      claude-3-haiku-20240307:
+        input: 0.25
+        output: 1.25
+      claude-3-sonnet-20240229:
+        input: 3.00
+        output: 15.00
+      claude-3-5-sonnet-20240620:
+        input: 3.00
+        output: 15.00
+      claude-3-5-sonnet-20241022:
+        input: 3.00
+        output: 15.00
+      claude-3-opus-20240229:
+        input: 15.00
+        output: 75.00
+  caching:
+    description: |
+      Prompt caching costs: input - cache write; output - cache read; 
+      Note - not all model support prompt caching
+    models:
+      claude-3-haiku-20240307:
+        input: 0.30
+        output: 0.03
+      claude-3-5-sonnet-20240620:
+        input: 3.75
+        output: 0.30
+      claude-3-5-sonnet-20241022:
+        input: 3.75
+        output: 0.30
+      claude-3-opus-20240229:
+        input: 18.75
+        output: 1.50
diff --git a/council/llm/data/gemini-costs.yaml b/council/llm/data/gemini-costs.yaml
new file mode 100644
index 00000000..c868fb30
--- /dev/null
+++ b/council/llm/data/gemini-costs.yaml
@@ -0,0 +1,40 @@
+kind: LLMCostManager
+version: 0.1
+metadata:
+  name: gemini-costs
+  labels:
+    provider: Google
+    reference: https://ai.google.dev/pricing
+spec:
+  under_128k:
+    description: |
+      Costs for prompt tokens up to 128k
+    models:
+      gemini-1.5-flash:
+        input: 0.075
+        output: 0.30
+      gemini-1.5-flash-8b:
+        input: 0.0375
+        output: 0.15
+      gemini-1.5-pro:
+        input: 1.25
+        output: 5.00
+      gemini-1.0-pro:
+        input: 0.50
+        output: 1.50
+  over_128k:
+    description: |
+      Costs for prompt tokens over 128k
+    models:
+      gemini-1.5-flash:
+        input: 0.15
+        output: 0.60
+      gemini-1.5-flash-8b:
+        input: 0.075
+        output: 0.30
+      gemini-1.5-pro:
+        input: 2.50
+        output: 10.00
+      gemini-1.0-pro:
+        input: 0.50
+        output: 1.50
diff --git a/council/llm/data/openai-costs.yaml b/council/llm/data/openai-costs.yaml
new file mode 100644
index 00000000..ec9d5249
--- /dev/null
+++ b/council/llm/data/openai-costs.yaml
@@ -0,0 +1,90 @@
+kind: LLMCostManager
+version: 0.1
+metadata:
+  name: openai-costs
+  labels:
+    provider: OpenAI
+    reference: https://openai.com/api/pricing/
+spec:
+  gpt_35_turbo_family:
+    description: |
+      Costs for GPT-3.5 Turbo family models
+    models:
+      gpt-3.5-turbo-0125:
+        input: 0.50
+        output: 1.50
+      gpt-3.5-turbo-instruct:
+        input: 1.50
+        output: 2.00
+      gpt-3.5-turbo-1106:
+        input: 1.00
+        output: 2.00
+      gpt-3.5-turbo-0613:
+        input: 1.50
+        output: 2.00
+      gpt-3.5-turbo-16k-0613:
+        input: 3.00
+        output: 4.00
+      gpt-3.5-turbo-0301:
+        input: 1.50
+        output: 2.00
+  gpt_4_family:
+    description: |
+      Costs for GPT-4 family models
+    models:
+      gpt-4-turbo:
+        input: 10.00
+        output: 30.00
+      gpt-4-turbo-2024-04-09:
+        input: 10.00
+        output: 30.00
+      gpt-4:
+        input: 30.00
+        output: 60.00
+      gpt-4-32k:
+        input: 60.00
+        output: 120.00
+      gpt-4-0125-preview:
+        input: 10.00
+        output: 30.00
+      gpt-4-1106-preview:
+        input: 10.00
+        output: 30.00
+      gpt-4-vision-preview:
+        input: 10.00
+        output: 30.00
+  gpt_4o_family:
+    description: |
+      Costs for GPT-4o family models
+    models:
+      gpt-4o:
+        input: 2.50
+        output: 10.00
+      gpt-4o-2024-08-06:
+        input: 2.50
+        output: 10.00
+      gpt-4o-2024-05-13:
+        input: 5.00
+        output: 15.00
+      gpt-4o-mini:
+        input: 0.150
+        output: 0.60
+      gpt-4o-mini-2024-07-18:
+        input: 0.150
+        output: 0.60
+  o1_family:
+    description: |
+      Costs for o1 family models
+    models:
+      o1-preview:
+        input: 15.00
+        output: 60.00
+      o1-preview-2024-09-12:
+        input: 15.00
+        output: 60.00
+      o1-mini:
+        input: 3.00
+        output: 12.00
+      o1-mini-2024-09-12:
+        input: 3.00
+        output: 12.00
diff --git a/council/llm/gemini_llm.py b/council/llm/gemini_llm.py
index b08cb286..9ba37122 100644
--- a/council/llm/gemini_llm.py
+++ b/council/llm/gemini_llm.py
@@ -10,32 +10,22 @@
     LLMConfigObject,
     LLMConsumptionCalculatorBase,
     LLMCostCard,
+    LLMCostManagerObject,
     LLMMessage,
     LLMMessageRole,
     LLMProviders,
     LLMResult,
 )
+from council.utils.utils import DurationManager
 from google.ai.generativelanguage import FileData
 from google.ai.generativelanguage_v1 import HarmCategory  # type: ignore
 from google.generativeai.types import GenerateContentResponse, HarmBlockThreshold  # type: ignore
 
 
 class GeminiConsumptionCalculator(LLMConsumptionCalculatorBase):
-    # https://ai.google.dev/pricing
-    # different strategy for prompt up to 128k tokens
-    COSTS_UNDER_128k: Mapping[str, LLMCostCard] = {
-        "gemini-1.5-flash": LLMCostCard(input=0.075, output=0.30),
-        "gemini-1.5-flash-8b": LLMCostCard(input=0.0375, output=0.15),
-        "gemini-1.5-pro": LLMCostCard(input=1.25, output=5.00),
-        "gemini-1.0-pro": LLMCostCard(input=0.50, output=1.50),
-    }
-
-    COSTS_OVER_128k: Mapping[str, LLMCostCard] = {
-        "gemini-1.5-flash": LLMCostCard(input=0.15, output=0.60),
-        "gemini-1.5-flash-8b": LLMCostCard(input=0.075, output=0.30),
-        "gemini-1.5-pro": LLMCostCard(input=2.50, output=10.00),
-        "gemini-1.0-pro": LLMCostCard(input=0.50, output=1.50),
-    }
+    _cost_manager = LLMCostManagerObject.gemini()
+    COSTS_UNDER_128k: Mapping[str, LLMCostCard] = _cost_manager.get_cost_map("under_128k")
+    COSTS_OVER_128k: Mapping[str, LLMCostCard] = _cost_manager.get_cost_map("over_128k")
 
     def __init__(self, model: str, num_tokens: int) -> None:
         super().__init__(model)
@@ -67,16 +57,17 @@ def __init__(self, config: GeminiLLMConfiguration) -> None:
     def _post_chat_request(self, context: LLMContext, messages: Sequence[LLMMessage], **kwargs: Any) -> LLMResult:
         history, last = self._to_chat_history(messages=messages)
         chat = self._model.start_chat(history=history)
-        response = chat.send_message(last)
-        return LLMResult(choices=[response.text], consumptions=self.to_consumptions(response))
+        with DurationManager() as timer:
+            response = chat.send_message(last)
+        return LLMResult(choices=[response.text], consumptions=self.to_consumptions(timer.duration, response))
 
-    def to_consumptions(self, response: GenerateContentResponse) -> Sequence[Consumption]:
+    def to_consumptions(self, duration: float, response: GenerateContentResponse) -> Sequence[Consumption]:
         model = self._configuration.model_name()
         prompt_tokens = response.usage_metadata.prompt_token_count
         completion_tokens = response.usage_metadata.candidates_token_count
 
-        consumption_calculator = GeminiConsumptionCalculator(model, prompt_tokens)
-        return consumption_calculator.get_consumptions(prompt_tokens, completion_tokens)
+        calculator = GeminiConsumptionCalculator(model, prompt_tokens)
+        return calculator.get_consumptions(duration, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens)
 
     @staticmethod
     def from_env() -> GeminiLLM:
diff --git a/council/llm/llm_cost.py b/council/llm/llm_cost.py
index 2fb775df..154baf3a 100644
--- a/council/llm/llm_cost.py
+++ b/council/llm/llm_cost.py
@@ -1,7 +1,18 @@
+from __future__ import annotations
+
 import abc
-from typing import List, Optional, Tuple
+import os
+from enum import Enum
+from typing import Any, Dict, Final, List, Optional, Tuple
 
+import yaml
 from council.contexts import Consumption
+from council.utils import DataObject, DataObjectSpecBase
+
+DATA_PATH: Final[str] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
+ANTHROPIC_COSTS_FILENAME: Final[str] = "anthropic-costs.yaml"
+GEMINI_COSTS_FILENAME: Final[str] = "gemini-costs.yaml"
+OPENAI_COSTS_FILENAME: Final[str] = "openai-costs.yaml"
 
 
 class LLMCostCard:
@@ -13,25 +24,53 @@ def __init__(self, input: float, output: float) -> None:
 
     @property
     def input(self) -> float:
+        """Cost per million input (prompt) tokens."""
         return self._input
 
     @property
     def output(self) -> float:
+        """Cost per million output (completion) tokens."""
         return self._output
 
     def __str__(self) -> str:
         return f"${self.input}/${self.output} per 1m tokens"
 
     def input_cost(self, tokens: int) -> float:
+        """Get prompt_tokens_cost for a given amount of input tokens."""
         return tokens * self.input / 1e6
 
     def output_cost(self, tokens: int) -> float:
+        """Get completion_token_cost for a given amount of completion tokens."""
         return tokens * self.output / 1e6
 
     def get_costs(self, prompt_tokens: int, completion_tokens: int) -> Tuple[float, float]:
         """Return tuple of (prompt_tokens_cost, completion_token_cost)"""
         return self.input_cost(prompt_tokens), self.output_cost(completion_tokens)
 
+    @staticmethod
+    def from_dict(data: Dict[str, float]) -> LLMCostCard:
+        return LLMCostCard(input=data["input"], output=data["output"])
+
+
+class TokenKind(str, Enum):
+    prompt = "prompt"
+    """Prompt tokens"""
+
+    completion = "completion"
+    """Completion tokens"""
+
+    total = "total"
+    """Total tokens"""
+
+    reasoning = "reasoning"
+    """Reasoning tokens, specific for OpenAI o1 models"""
+
+    cache_creation_prompt = "cache_creation_prompt"
+    """Cache creation prompt tokens, specific for Anthropic prompt caching"""
+
+    cache_read_prompt = "cache_read_prompt"
+    """Cache read prompt tokens, specific for Anthropic and OpenAI prompt caching"""
+
 
 class LLMConsumptionCalculatorBase(abc.ABC):
     """Helper class to manage LLM consumptions."""
@@ -39,58 +78,47 @@ class LLMConsumptionCalculatorBase(abc.ABC):
     def __init__(self, model: str):
         self.model = model
 
-    def format_kind(self, token_kind: str, cost: bool = False) -> str:
+    def format_kind(self, token_kind: TokenKind, cost: bool = False) -> str:
         """Format Consumption.kind - from 'prompt' to '{self.model}:prompt_tokens'"""
-        options = [
-            "prompt",
-            "completion",
-            "total",
-            "reasoning",  # OpenAI o1
-            "cache_creation_prompt",  # Anthropic prompt caching
-            "cache_read_prompt",  # Anthropic & OpenAI prompt caching
-        ]
-        result = f"{self.model}:"
-        if token_kind not in options:
-            raise ValueError(
-                f"Unknown kind `{token_kind}` for LLMConsumptionCalculator; expected one of `{','.join(options)}`"
-            )
-
-        result += f"{token_kind}_tokens"
+        kind = token_kind.value
+        return f"{self.model}:{kind}_tokens" if not cost else f"{self.model}:{kind}_tokens_cost"
 
-        if cost:
-            result += "_cost"
-
-        return result
+    def get_consumptions(self, duration: float, *, prompt_tokens: int, completion_tokens: int) -> List[Consumption]:
+        """Get base and cost consumptions if any"""
+        base_consumptions = self.get_base_consumptions(
+            duration, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens
+        )
+        cost_consumptions = self.get_cost_consumptions(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens)
+        return base_consumptions + cost_consumptions
 
-    def get_consumptions(self, prompt_tokens: int, completion_tokens: int) -> List[Consumption]:
+    def get_base_consumptions(
+        self, duration: float, *, prompt_tokens: int, completion_tokens: int
+    ) -> List[Consumption]:
         """
-        Get default consumptions:
+        Get base consumptions:
             - 1 call
+            - specified duration
             - prompt, completion and total tokens
-            - cost for prompt, completion and total tokens if LLMCostCard can be found
         """
-        return self.get_token_consumptions(prompt_tokens, completion_tokens) + self.get_cost_consumptions(
-            prompt_tokens, completion_tokens
-        )
-
-    def get_token_consumptions(self, prompt_tokens: int, completion_tokens: int) -> List[Consumption]:
         return [
             Consumption.call(1, self.model),
-            Consumption.token(prompt_tokens, self.format_kind("prompt")),
-            Consumption.token(completion_tokens, self.format_kind("completion")),
-            Consumption.token(prompt_tokens + completion_tokens, self.format_kind("total")),
+            Consumption.duration(duration, self.model),
+            Consumption.token(prompt_tokens, self.format_kind(TokenKind.prompt)),
+            Consumption.token(completion_tokens, self.format_kind(TokenKind.completion)),
+            Consumption.token(prompt_tokens + completion_tokens, self.format_kind(TokenKind.total)),
         ]
 
-    def get_cost_consumptions(self, prompt_tokens: int, completion_tokens: int) -> List[Consumption]:
+    def get_cost_consumptions(self, *, prompt_tokens: int, completion_tokens: int) -> List[Consumption]:
+        """Get cost for prompt, completion and total tokens if LLMCostCard can be found."""
         cost_card = self.find_model_costs()
         if cost_card is None:
             return []
 
         prompt_tokens_cost, completion_tokens_cost = cost_card.get_costs(prompt_tokens, completion_tokens)
         return [
-            Consumption.cost(prompt_tokens_cost, self.format_kind("prompt", cost=True)),
-            Consumption.cost(completion_tokens_cost, self.format_kind("completion", cost=True)),
-            Consumption.cost(prompt_tokens_cost + completion_tokens_cost, self.format_kind("total", cost=True)),
+            Consumption.cost(prompt_tokens_cost, self.format_kind(TokenKind.prompt, cost=True)),
+            Consumption.cost(completion_tokens_cost, self.format_kind(TokenKind.completion, cost=True)),
+            Consumption.cost(prompt_tokens_cost + completion_tokens_cost, self.format_kind(TokenKind.total, cost=True)),
         ]
 
     @abc.abstractmethod
@@ -101,3 +129,71 @@ def find_model_costs(self) -> Optional[LLMCostCard]:
     @staticmethod
     def filter_zeros(consumptions: List[Consumption]) -> List[Consumption]:
         return list(filter(lambda consumption: consumption.value > 0, consumptions))
+
+
+class LLMCostManagerSpec(DataObjectSpecBase):
+    def __init__(self, costs: Dict[str, Dict[str, LLMCostCard]]) -> None:
+        """
+        Initializes a new instance of LLMCostManagerSpec
+
+        Args:
+            costs (Dict[str, Dict[str, LLMCostCard]]): collection of cost cards of shape
+            {category: {model_1: LLMCostCard, model_2: LLMCostCard}, another_category: {...}}
+        """
+        self.costs = costs
+
+    @classmethod
+    def from_dict(cls, values: Dict[str, Any]) -> LLMCostManagerSpec:
+        costs = {
+            category: {
+                model: LLMCostCard.from_dict(model_data) for model, model_data in category_data["models"].items()
+            }
+            for category, category_data in values.items()
+        }
+
+        return LLMCostManagerSpec(costs)
+
+    def to_dict(self) -> Dict[str, Any]:
+        return self.costs
+
+    def __str__(self) -> str:
+        return f"LLMCostCards for {len(self.costs.keys())} categories"
+
+
+class LLMCostManagerObject(DataObject[LLMCostManagerSpec]):
+    """
+    Helper class to instantiate an LLMCostManagerObject from a YAML file
+    """
+
+    @classmethod
+    def from_dict(cls, values: Dict[str, Any]) -> LLMCostManagerObject:
+        return super()._from_dict(LLMCostManagerSpec, values)
+
+    @classmethod
+    def from_yaml(cls, filename: str) -> LLMCostManagerObject:
+        with open(filename, "r", encoding="utf-8") as f:
+            values = yaml.safe_load(f)
+            cls._check_kind(values, "LLMCostManager")
+            return LLMCostManagerObject.from_dict(values)
+
+    @staticmethod
+    def anthropic():
+        """Get LLMCostManager for Anthropic models"""
+        return LLMCostManagerObject.from_yaml(os.path.join(DATA_PATH, ANTHROPIC_COSTS_FILENAME))
+
+    @staticmethod
+    def gemini():
+        """Get LLMCostManager for Gemini models"""
+        return LLMCostManagerObject.from_yaml(os.path.join(DATA_PATH, GEMINI_COSTS_FILENAME))
+
+    @staticmethod
+    def openai():
+        """Get LLMCostManager for OpenAI models"""
+        return LLMCostManagerObject.from_yaml(os.path.join(DATA_PATH, OPENAI_COSTS_FILENAME))
+
+    def get_cost_map(self, category: str) -> Dict[str, LLMCostCard]:
+        """Get cost mapping {model: LLMCostCard} for a given category"""
+        if category not in self.spec.costs:
+            raise ValueError(f"Unexpected category `{category}` for LLMCostManager")
+
+        return self.spec.costs[category]
diff --git a/council/llm/openai_chat_completions_llm.py b/council/llm/openai_chat_completions_llm.py
index f487e311..8990774b 100644
--- a/council/llm/openai_chat_completions_llm.py
+++ b/council/llm/openai_chat_completions_llm.py
@@ -10,12 +10,13 @@
     LLMCallException,
     LLMConsumptionCalculatorBase,
     LLMCostCard,
+    LLMCostManagerObject,
     LLMMessage,
     LLMMessageTokenCounterBase,
     LLMResult,
+    TokenKind,
 )
-
-from ..utils import truncate_dict_values_to_str
+from council.utils.utils import DurationManager, truncate_dict_values_to_str
 
 
 class Provider(Protocol):
@@ -123,40 +124,11 @@ def from_dict(obj: Any) -> Usage:
 
 
 class OpenAIConsumptionCalculator(LLMConsumptionCalculatorBase):
-    # https://openai.com/api/pricing/
-    COSTS_gpt_35_turbo_FAMILY: Mapping[str, LLMCostCard] = {
-        "gpt-3.5-turbo-0125": LLMCostCard(input=0.50, output=1.50),
-        "gpt-3.5-turbo-instruct": LLMCostCard(input=1.50, output=2.00),
-        "gpt-3.5-turbo-1106": LLMCostCard(input=1.00, output=2.00),
-        "gpt-3.5-turbo-0613": LLMCostCard(input=1.50, output=2.00),
-        "gpt-3.5-turbo-16k-0613": LLMCostCard(input=3.00, output=4.00),
-        "gpt-3.5-turbo-0301": LLMCostCard(input=1.50, output=2.00),
-    }
-
-    COSTS_gpt_4_FAMILY: Mapping[str, LLMCostCard] = {
-        "gpt-4-turbo": LLMCostCard(input=10.00, output=30.00),
-        "gpt-4-turbo-2024-04-09": LLMCostCard(input=10.00, output=30.00),
-        "gpt-4": LLMCostCard(input=30.00, output=60.00),
-        "gpt-4-32k": LLMCostCard(input=60.00, output=120.00),
-        "gpt-4-0125-preview": LLMCostCard(input=10.00, output=30.00),
-        "gpt-4-1106-preview": LLMCostCard(input=10.00, output=30.00),
-        "gpt-4-vision-preview": LLMCostCard(input=10.00, output=30.00),
-    }
-
-    COSTS_gpt_4o_FAMILY: Mapping[str, LLMCostCard] = {
-        "gpt-4o": LLMCostCard(input=2.50, output=10.00),
-        "gpt-4o-2024-08-06": LLMCostCard(input=2.50, output=10.00),
-        "gpt-4o-2024-05-13": LLMCostCard(input=5.00, output=15.00),
-        "gpt-4o-mini": LLMCostCard(input=0.150, output=0.60),
-        "gpt-4o-mini-2024-07-18": LLMCostCard(input=0.150, output=0.60),
-    }
-
-    COSTS_o1_FAMILY: Mapping[str, LLMCostCard] = {
-        "o1-preview": LLMCostCard(input=15.00, output=60.00),
-        "o1-preview-2024-09-12": LLMCostCard(input=15.00, output=60.00),
-        "o1-mini": LLMCostCard(input=3.00, output=12.00),
-        "o1-mini-2024-09-12": LLMCostCard(input=3.00, output=12.00),
-    }
+    _cost_manager = LLMCostManagerObject.openai()
+    COSTS_gpt_35_turbo_FAMILY: Mapping[str, LLMCostCard] = _cost_manager.get_cost_map("gpt_35_turbo_family")
+    COSTS_gpt_4_FAMILY: Mapping[str, LLMCostCard] = _cost_manager.get_cost_map("gpt_4_family")
+    COSTS_gpt_4o_FAMILY: Mapping[str, LLMCostCard] = _cost_manager.get_cost_map("gpt_4o_family")
+    COSTS_o1_FAMILY: Mapping[str, LLMCostCard] = _cost_manager.get_cost_map("o1_family")
 
     def find_model_costs(self) -> Optional[LLMCostCard]:
         if self.model.startswith("o1"):
@@ -170,24 +142,26 @@ def find_model_costs(self) -> Optional[LLMCostCard]:
 
         return None
 
-    def get_openai_consumptions(self, usage: Usage) -> List[Consumption]:
+    def get_openai_consumptions(self, duration: float, usage: Usage) -> List[Consumption]:
         """
         Get consumptions specific for OpenAI:
             - 1 call
+            - specified duration
             - cache_read_prompt, prompt, reasoning, completion and total tokens
             - costs LLMCostCard can be found
         """
-        consumptions = self.get_openai_token_consumptions(usage) + self.get_openai_cost_consumptions(usage)
+        consumptions = self.get_openai_base_consumptions(duration, usage) + self.get_openai_cost_consumptions(usage)
         return self.filter_zeros(consumptions)  # could occur for cache/reasoning tokens
 
-    def get_openai_token_consumptions(self, usage: Usage) -> List[Consumption]:
+    def get_openai_base_consumptions(self, duration: float, usage: Usage) -> List[Consumption]:
         return [
             Consumption.call(1, self.model),
-            Consumption.token(usage.cached_tokens, self.format_kind("cache_read_prompt")),
-            Consumption.token(usage.prompt_tokens, self.format_kind("prompt")),
-            Consumption.token(usage.reasoning_tokens, self.format_kind("reasoning")),
-            Consumption.token(usage.completion_tokens, self.format_kind("completion")),
-            Consumption.token(usage.total_tokens, self.format_kind("total")),
+            Consumption.duration(duration, self.model),
+            Consumption.token(usage.cached_tokens, self.format_kind(TokenKind.cache_read_prompt)),
+            Consumption.token(usage.prompt_tokens, self.format_kind(TokenKind.prompt)),
+            Consumption.token(usage.reasoning_tokens, self.format_kind(TokenKind.reasoning)),
+            Consumption.token(usage.completion_tokens, self.format_kind(TokenKind.completion)),
+            Consumption.token(usage.total_tokens, self.format_kind(TokenKind.total)),
         ]
 
     def get_openai_cost_consumptions(self, usage: Usage) -> List[Consumption]:
@@ -202,11 +176,11 @@ def get_openai_cost_consumptions(self, usage: Usage) -> List[Consumption]:
         total_cost = sum([cached_tokens_cost, prompt_tokens_cost, reasoning_tokens_cost, completion_tokens_cost])
 
         return [
-            Consumption.cost(cached_tokens_cost, self.format_kind("cache_read_prompt", cost=True)),
-            Consumption.cost(prompt_tokens_cost, self.format_kind("prompt", cost=True)),
-            Consumption.cost(reasoning_tokens_cost, self.format_kind("reasoning", cost=True)),
-            Consumption.cost(completion_tokens_cost, self.format_kind("completion", cost=True)),
-            Consumption.cost(total_cost, self.format_kind("total", cost=True)),
+            Consumption.cost(cached_tokens_cost, self.format_kind(TokenKind.cache_read_prompt, cost=True)),
+            Consumption.cost(prompt_tokens_cost, self.format_kind(TokenKind.prompt, cost=True)),
+            Consumption.cost(reasoning_tokens_cost, self.format_kind(TokenKind.reasoning, cost=True)),
+            Consumption.cost(completion_tokens_cost, self.format_kind(TokenKind.completion, cost=True)),
+            Consumption.cost(total_cost, self.format_kind(TokenKind.total, cost=True)),
         ]
 
 
@@ -250,9 +224,9 @@ def choices(self) -> Sequence[Choice]:
     def raw_response(self) -> Dict[str, Any]:
         return self._raw_response
 
-    def to_consumptions(self) -> Sequence[Consumption]:
+    def to_consumptions(self, duration: float) -> Sequence[Consumption]:
         consumption_calculator = OpenAIConsumptionCalculator(self.model)
-        return consumption_calculator.get_openai_consumptions(self.usage)
+        return consumption_calculator.get_openai_consumptions(duration, self.usage)
 
     @staticmethod
     def from_response(response: Dict[str, Any]) -> OpenAIChatCompletionsResult:
@@ -289,13 +263,14 @@ def _post_chat_request(self, context: LLMContext, messages: Sequence[LLMMessage]
         context.logger.debug(
             f'message="Sending chat GPT completions request to {self._name}" payload="{truncate_dict_values_to_str(payload, 100)}"'
         )
-        r = self._post_request(payload)
+        with DurationManager() as timer:
+            r = self._post_request(payload)
         context.logger.debug(
             f'message="Got chat GPT completions result from {self._name}" id="{r.id}" model="{r.model}" {r.usage}'
         )
         return LLMResult(
             choices=[c.message.content for c in r.choices],
-            consumptions=r.to_consumptions(),
+            consumptions=r.to_consumptions(timer.duration),
             raw_response=r.raw_response,
         )
 
diff --git a/council/utils/utils.py b/council/utils/utils.py
index 6ebd822c..27fa862e 100644
--- a/council/utils/utils.py
+++ b/council/utils/utils.py
@@ -1,4 +1,15 @@
-from typing import Dict
+import time
+from typing import ContextManager, Dict
+
+
+class DurationManager(ContextManager):
+    def __enter__(self):
+        self.start_time = time.time()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.end_time = time.time()
+        self.duration = self.end_time - self.start_time
 
 
 def truncate_dict_values_to_str(data: Dict, max_length: int = 20):
diff --git a/docs/source/reference/contexts/consumption.rst b/docs/source/reference/contexts/consumption.rst
index 138d00ca..5e58bc8a 100644
--- a/docs/source/reference/contexts/consumption.rst
+++ b/docs/source/reference/contexts/consumption.rst
@@ -4,3 +4,4 @@ Consumption
 -----------
 
 .. autoclass:: council.contexts.Consumption
+   :member-order: bysource
diff --git a/docs/source/reference/llm/llm_cost.rst b/docs/source/reference/llm/llm_cost.rst
new file mode 100644
index 00000000..71d9cb99
--- /dev/null
+++ b/docs/source/reference/llm/llm_cost.rst
@@ -0,0 +1,21 @@
+LLMCostCard
+-----------
+
+.. autoclass:: council.llm.LLMCostCard
+
+LLMConsumptionCalculatorBase
+----------------------------
+
+.. autoclass:: council.llm.LLMConsumptionCalculatorBase
+
+LLMCostManagerObject
+--------------------
+
+.. autoclass:: council.llm.LLMCostManagerObject
+   :member-order: bysource
+
+TokenKind
+---------
+
+.. autoclass:: council.llm.TokenKind
+   :member-order: bysource
diff --git a/tests/integration/llm/test_llm_caching_middleware.py b/tests/integration/llm/test_llm_caching_middleware.py
index 09bd527e..86368362 100644
--- a/tests/integration/llm/test_llm_caching_middleware.py
+++ b/tests/integration/llm/test_llm_caching_middleware.py
@@ -32,6 +32,8 @@ def execute_llm_func(llm_func: LLMFunction, message: str, to_print: str, **kwarg
         response = llm_func.execute(message, **kwargs)
         print(f"\n{to_print}")
         print(f"\tResponse duration: {response.duration:.3f}s")
+        for consumption in response.result.consumptions:
+            print(f"\t{consumption}")
 
         return response
 
diff --git a/tests/unit/llm/test_llm_consumption_calculators.py b/tests/unit/llm/test_llm_consumption_calculators.py
index bac6de8e..f046869b 100644
--- a/tests/unit/llm/test_llm_consumption_calculators.py
+++ b/tests/unit/llm/test_llm_consumption_calculators.py
@@ -1,8 +1,8 @@
 import unittest
 
-from council.llm.anthropic_llm import AnthropicConsumptionCalculator
+from council.llm.anthropic_llm import AnthropicConsumptionCalculator, Usage as AnthropicUsage
 from council.llm.gemini_llm import GeminiConsumptionCalculator
-from council.llm.openai_chat_completions_llm import OpenAIConsumptionCalculator, Usage
+from council.llm.openai_chat_completions_llm import OpenAIConsumptionCalculator, Usage as OpenAIUsage
 
 
 class TestAnthropicConsumptionCalculator(unittest.TestCase):
@@ -23,13 +23,13 @@ def test_haiku_cost_calculation(self):
         self.assertEqual(completion_cost, 0.0625)  # $1.25 * 0.05
 
     def test_haiku_cache_cost_calculation(self):
-        consumptions = AnthropicConsumptionCalculator("claude-3-haiku-20240307").get_cache_cost_consumptions(
-            {
-                "cache_creation_prompt_tokens": 1_000_000,
-                "cache_read_prompt_tokens": 500_000,
-                "prompt_tokens": 100_000,
-                "completion_tokens": 50_000,
-            }
+        consumptions = AnthropicConsumptionCalculator("claude-3-haiku-20240307").get_anthropic_cost_consumptions(
+            AnthropicUsage(
+                prompt_tokens=100_000,
+                completion_tokens=50_000,
+                cache_creation_prompt_tokens=1_000_000,
+                cache_read_prompt_tokens=500_000,
+            )
         )
 
         cache_creation_cost = next(c for c in consumptions if "cache_creation_prompt_tokens_cost" in c.kind)
@@ -56,13 +56,13 @@ def test_sonnet_cache_cost_calculation(self):
         sonnet_versions = ["claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022"]
 
         for version in sonnet_versions:
-            consumptions = AnthropicConsumptionCalculator(version).get_cache_cost_consumptions(
-                {
-                    "cache_creation_prompt_tokens": 1_000_000,
-                    "cache_read_prompt_tokens": 500_000,
-                    "prompt_tokens": 100_000,
-                    "completion_tokens": 50_000,
-                }
+            consumptions = AnthropicConsumptionCalculator(version).get_anthropic_cost_consumptions(
+                AnthropicUsage(
+                    prompt_tokens=100_000,
+                    completion_tokens=50_000,
+                    cache_creation_prompt_tokens=1_000_000,
+                    cache_read_prompt_tokens=500_000,
+                )
             )
 
             cache_creation_cost = next(c for c in consumptions if "cache_creation_prompt_tokens_cost" in c.kind)
@@ -83,13 +83,13 @@ def test_opus_cost_calculation(self):
         self.assertEqual(completion_cost, 3.75)  # $75.00 * 0.05
 
     def test_opus_cache_cost_calculation(self):
-        consumptions = AnthropicConsumptionCalculator("claude-3-opus-20240229").get_cache_cost_consumptions(
-            {
-                "cache_creation_prompt_tokens": 1_000_000,
-                "cache_read_prompt_tokens": 500_000,
-                "prompt_tokens": 100_000,
-                "completion_tokens": 50_000,
-            }
+        consumptions = AnthropicConsumptionCalculator("claude-3-opus-20240229").get_anthropic_cost_consumptions(
+            AnthropicUsage(
+                prompt_tokens=100_000,
+                completion_tokens=50_000,
+                cache_creation_prompt_tokens=1_000_000,
+                cache_read_prompt_tokens=500_000,
+            )
         )
 
         cache_creation_cost = next(c for c in consumptions if "cache_creation_prompt_tokens_cost" in c.kind)
@@ -103,13 +103,13 @@ def test_invalid_model(self):
 
     def test_invalid_model_cache_costs(self):
         # doesn't support caching
-        consumptions = AnthropicConsumptionCalculator("claude-3-sonnet-20240229").get_cache_cost_consumptions(
-            {
-                "cache_creation_prompt_tokens": 1_000_000,
-                "cache_read_prompt_tokens": 500_000,
-                "prompt_tokens": 100_000,
-                "completion_tokens": 50_000,
-            }
+        consumptions = AnthropicConsumptionCalculator("claude-3-sonnet-20240229").get_anthropic_cost_consumptions(
+            AnthropicUsage(
+                prompt_tokens=100_000,
+                completion_tokens=50_000,
+                cache_creation_prompt_tokens=1_000_000,
+                cache_read_prompt_tokens=500_000,
+            )
         )
 
         self.assertEqual(len(consumptions), 0)
@@ -117,7 +117,7 @@ def test_invalid_model_cache_costs(self):
     def test_consumption_units_and_types(self):
         model = "claude-3-haiku-20240307"
         calculator = AnthropicConsumptionCalculator(model)
-        consumptions = calculator.get_cost_consumptions(1_000, 1_000)
+        consumptions = calculator.get_cost_consumptions(prompt_tokens=1_000, completion_tokens=1_000)
 
         for consumption in consumptions:
             self.assertEqual(consumption.unit, "USD")
@@ -125,13 +125,10 @@ def test_consumption_units_and_types(self):
 
     def test_cache_consumption_units_and_types(self):
         model = "claude-3-5-sonnet-20241022"
-        consumptions = AnthropicConsumptionCalculator(model).get_cache_cost_consumptions(
-            {
-                "cache_creation_prompt_tokens": 1000,
-                "cache_read_prompt_tokens": 500,
-                "prompt_tokens": 100,
-                "completion_tokens": 50,
-            }
+        consumptions = AnthropicConsumptionCalculator(model).get_anthropic_cost_consumptions(
+            AnthropicUsage(
+                prompt_tokens=100, completion_tokens=50, cache_creation_prompt_tokens=1000, cache_read_prompt_tokens=500
+            )
         )
 
         for consumption in consumptions:
@@ -269,7 +266,7 @@ def test_invalid_models(self):
         self.assertIsNone(OpenAIConsumptionCalculator("gpt-4-invalid").find_model_costs())
 
     def test_cached_tokens_cost_calculations(self):
-        usage = Usage(
+        usage = OpenAIUsage(
             completion_tokens=0,
             prompt_tokens=500_000,
             total_tokens=1_500_000,
@@ -289,7 +286,7 @@ def test_cached_tokens_cost_calculations(self):
         self.assertEqual(prompt_cost, 1.25)
 
     def test_reasoning_tokens_cost_calculations(self):
-        usage = Usage(
+        usage = OpenAIUsage(
             completion_tokens=1_000_000,
             prompt_tokens=0,
             total_tokens=2_000_000,