[FEAT][CalCulatePricing]

The-Swarm-Corporation · Feb 24, 2024 · d8b321e · d8b321e
1 parent d1c282d
commit d8b321e
Show file tree

Hide file tree

Showing 6 changed files with 1,098 additions and 20 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "swarms-cloud"
-version = "0.1.1"
+version = "0.1.2"
 description = "Swarms Cloud - Pytorch"
 license = "MIT"
 authors = ["Kye Gomez <[email protected]>"]
@@ -26,11 +26,9 @@ python = "^3.9"
 swarms = "*"
 fastapi = "*"
 skypilot = "*"
-supabase = "*"
-
-
-[tool.poetry.dev-dependencies]
-# Add development dependencies here
+torch = "*"
+einops = "*"
+pydantic = "*"
 
 
 [tool.poetry.group.lint.dependencies]

diff --git a/servers/fuyu_api.py b/servers/fuyu_api.py
@@ -7,7 +7,7 @@
 from executor import GenerationExecutor
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse, Response, StreamingResponse
-from swarms import Fuyu, Conversation
+from swarms import Fuyu
 
 TIMEOUT_KEEP_ALIVE = 5  # seconds.
 TIMEOUT_TO_PREVENT_DEADLOCK = 1  # seconds.
@@ -47,12 +47,10 @@ async def generate(request: Request) -> Response:
     model = Fuyu(
         model_name=model_name,
         max_new_tokens=max_new_tokens,
-        args=args  # Injecting args into the Fuyu model
+        args=args,  # Injecting args into the Fuyu model
     )
     response = model.run(
-        request_dict.pop("prompt"),
-        request_dict.pop("max_num_tokens", 8),
-        streaming
+        request_dict.pop("prompt"), request_dict.pop("max_num_tokens", 8), streaming
     )
 
     async def stream_results() -> AsyncGenerator[bytes, None]:
@@ -72,11 +70,13 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
         "args": {
             "model_dir": args.model_dir,
             "tokenizer_type": args.tokenizer_type,
-            "max_beam_width": args.max_beam_width
-        }
+            "max_beam_width": args.max_beam_width,
+        },
     }
 
-    return JSONResponse({"model_config": model_config, "choices": [{"text": response.text}]})
+    return JSONResponse(
+        {"model_config": model_config, "choices": [{"text": response.text}]}
+    )
 
 
 async def main(args):

diff --git a/swarms_cloud/__init__.py b/swarms_cloud/__init__.py
@@ -43,7 +43,7 @@
     OutputOpenAISpec,
     OpenAIAPIWrapper,
 )
-
+from swarms_cloud.calculate_pricing import calculate_pricing
 
 __all__ = [
     "generate_api_key",
@@ -84,4 +84,5 @@
     "InputOpenAISpec",
     "OutputOpenAISpec",
     "OpenAIAPIWrapper",
+    "calculate_pricing",
 ]
diff --git a/swarms_cloud/calculate_pricing.py b/swarms_cloud/calculate_pricing.py
@@ -0,0 +1,63 @@
+from transformers import PreTrainedTokenizer
+
+
+# Function to calculate tokens and pricing
+def calculate_pricing(texts, tokenizer: PreTrainedTokenizer, rate_per_million=0.01):
+    """
+    Calculates the pricing for a given list of texts based on the number of tokens, sentences, words, characters, and paragraphs.
+
+    Args:
+        texts (list): A list of texts to calculate pricing for.
+        tokenizer (PreTrainedTokenizer): A pre-trained tokenizer object used to tokenize the texts.
+        rate_per_million (float, optional): The rate per million tokens used to calculate the cost. Defaults to 0.01.
+
+    Returns:
+        tuple: A tuple containing the total number of tokens, sentences, words, characters, paragraphs, and the calculated cost.
+
+    Example usage:
+    >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
+    >>> texts = ["This is the first example text.", "This is the second example text."]
+    >>> total_tokens, total_sentences, total_words, total_characters, total_paragraphs, cost = calculate_pricing(texts, tokenizer)
+    >>> print(f"Total tokens processed: {total_tokens}")
+    >>> print(f"Total cost: ${cost:.5f}")
+
+    """
+    total_tokens = 0
+    total_sentences = 0
+    total_words = 0
+    total_characters = 0
+    total_paragraphs = 0
+
+    for text in texts:
+        # Tokenize the text and count tokens
+        tokens = tokenizer.encode(text, add_special_tokens=True)
+        total_tokens += len(tokens)
+
+        # Count sentences
+        sentences = text.count(".") + text.count("!") + text.count("?")
+        total_sentences += sentences
+
+        # Count words
+        words = len(text.split())
+        total_words += words
+
+        # Count characters
+        characters = len(text)
+        total_characters += characters
+
+        # Count paragraphs
+        paragraphs = text.count("\n\n") + 1
+        total_paragraphs += paragraphs
+
+    # Calculate total cost with high precision
+    cost = (total_tokens / 1_000_000) * rate_per_million
+    print(f"Total cost: ${float(cost):.10f}")
+
+    return (
+        total_tokens,
+        total_sentences,
+        total_words,
+        total_characters,
+        total_paragraphs,
+        cost,
+    )