[CLEANUP]

The-Swarm-Corporation · Jul 23, 2024 · d60b550 · d60b550
1 parent 0db021e
commit d60b550
Show file tree

Hide file tree

Showing 7 changed files with 18 additions and 232 deletions.
diff --git a/.tflint.hcl b/.tflint.hcl
diff --git a/api.py b/api.py
diff --git a/servers/.DS_Store b/servers/.DS_Store
diff --git a/servers/agent/api.py b/servers/agent/api.py
@@ -42,7 +42,7 @@ class AgentOutput(BaseModel):
     completions: ChatCompletionResponse
 
 
-def count_tokens(
+async def count_tokens(
     text: str,
 ):
     try:
@@ -60,7 +60,7 @@ def count_tokens(
         raise HTTPException(status_code=400, detail=str(e))
 
 
-def model_router(model_name: str):
+async def model_router(model_name: str):
     """
     Function to switch to the specified model.
 
@@ -107,17 +107,19 @@ def model_router(model_name: str):
 )
 
 
-@app.get("/v1/models", response_model=List[str])
-async def list_models():
-    """
-    An endpoint to list available models. It returns a list of model names.
-    This is useful for clients to query and understand what models are available for use.
-    """
-    model_names = ["OpenAIChat", "GPT4o", "GPT4VisionAPI", "Anthropic"]
-    return model_names
+# @app.get("/v1/models", response_model=ModelList)
+# async def list_models():
+#     """
+#     An endpoint to list available models. It returns a list of model cards.
+#     This is useful for clients to query and understand what models are available for use.
+#     """
+#     model_card = ModelCard(
+#         id="cogvlm-chat-17b"
+#     )  # can be replaced by your model id like cogagent-chat-18b
+#     return ModelList(data=[model_card])
 
 
-@app.post("/v1/agent/completions", response_model=AgentOutput)
+@app.post("v1/agent/completions", response_model=AgentOutput)
 async def agent_completions(agent_input: AgentInput):
     try:
         logger.info(f"Received request: {agent_input}")
@@ -185,4 +187,4 @@ async def agent_completions(agent_input: AgentInput):
 if __name__ == "__main__":
     import uvicorn
 
-    uvicorn.run(app, host="0.0.0.0", port=8000, use_colors=True, log_level="info")
+    uvicorn.run(app, host="0.0.0.0", port=8080, use_colors=True, log_level="info")
diff --git a/servers/pali_gemma/deployment.yaml → servers/pali_gemma/sky_serve.yaml b/servers/pali_gemma/deployment.yaml → servers/pali_gemma/sky_serve.yaml
@@ -1,7 +1,7 @@
 envs:
   MODEL_NAME: google/paligemma-3b-pt-224
   MODEL_ARCH: internlm
-  HUGGING_FACE_HUB_TOKEN: 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
+  HUGGING_FACE_HUB_TOKEN: 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
   MODEL_PORT: 8080
 
 resources:
@@ -35,13 +35,7 @@ service:
 
 
 setup: |
-  docker run --runtime nvidia --gpus all \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    --env "HUGGING_FACE_HUB_TOKEN=<secret>" \
-    -p 8000:8000 \
-    --ipc=host \
-    vllm/vllm-openai:latest \
-    --model mistralai/Mistral-7B-v0.1
+  pip install vllm
 
 run: | 
-  lmdeploy serve api_server google/paligemma-3b-pt-224 --server-port 8080
+  vllm serve google/paligemma-3b-pt-224 --chat-template template_llava.jinja
diff --git a/sky_serve.yaml b/sky_serve.yaml
@@ -1,5 +1,5 @@
 envs:
-  OPENAI_API_KEY: "sk-proj"
+  OPENAI_API_KEY: "sk-proj-3ITMdHfIzL3Myk93zSjQT3BlbkFJTPIIAFZhWz8wJiNdZfKt"
   MODEL_NAME: "OpenAIChat"
 
 # Service configuration

diff --git a/swarms_cloud.png b/swarms_cloud.png