[CLEANUP]

The-Swarm-Corporation · Jul 17, 2024 · 08d030c · 08d030c
1 parent 94d7306
commit 08d030c
Show file tree

Hide file tree

Showing 6 changed files with 71 additions and 59 deletions.
diff --git a/api.py b/api.py
@@ -187,4 +187,4 @@ async def agent_completions(agent_input: AgentInput):
 if __name__ == "__main__":
     import uvicorn
 
-    uvicorn.run(app, host="0.0.0.0", port=8000, use_colors=True, log_level="info")
+    uvicorn.run(app, host="0.0.0.0", port=8080, use_colors=True, log_level="info")
diff --git a/requirements.txt b/requirements.txt
@@ -23,5 +23,11 @@ optimum
 auto-gptq
 whisperx
 shortuuid
-exxa
-hf_transfer
+hf_transfer
+swarms
+fastapi
+uvicorn
+tiktoken
+pydantic
+asyncio
+swarms-cloud
diff --git a/servers/agent/api.py b/servers/agent/api.py
@@ -182,9 +182,9 @@ async def agent_completions(agent_input: AgentInput):
         raise HTTPException(status_code=400, detail=str(e))
 
 
-# if __name__ == "__main__":
-#     import uvicorn
+if __name__ == "__main__":
+    import uvicorn
 
-#     uvicorn.run(
-#         app, host="0.0.0.0", port=8000, use_colors=True, log_level="info"
-#     )
+    uvicorn.run(
+        app, host="0.0.0.0", port=8000, use_colors=True, log_level="info"
+    )
diff --git a/servers/agent/sky_serve.yaml b/servers/agent/sky_serve.yaml
diff --git a/servers/llama3/sky_serve_two.yaml b/servers/llama3/sky_serve_two.yaml
@@ -0,0 +1,34 @@
+envs:
+  MODEL_NAME: meta-llama/Meta-Llama-3-70B-Instruct
+  OPENAI_API_KEY: 
+# Service configuration
+service:
+  readiness_probe:
+    path: /v1/agent/completions  # Path for the readiness probe
+  readiness_probe: /v1/health  # Additional readiness probe
+
+  # Replica Policy
+  replica_policy:
+    min_replicas: 1  # Minimum number of replicas
+    max_replicas: 10  # Maximum number of replicas
+    target_qps_per_replica: 2.5  # Target queries per second per replica
+    upscale_delay_seconds: 200  # Delay before upscaling replicas
+    downscale_delay_seconds: 1200  # Delay before downscaling replicas
+
+resources:
+  # accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
+  accelerators: {A10g, A10, L40, A40} # We can use cheaper accelerators for 8B model.
+  # cpus: 32+
+  use_spot: True
+  disk_size: 100  # Ensure model checkpoints can fit.
+  # disk_tier: best
+  ports: 8081  # Expose to internet traffic.
+
+setup: |
+  git clone https://github.com/kyegomez/swarms-cloud.git
+  cd swarms-cloud
+  pip3 install -r requirements.txt
+
+
+run: |
+  python3 api.py
diff --git a/sky_serve.yaml b/sky_serve.yaml
@@ -1,34 +1,45 @@
 envs:
-  MODEL_NAME: meta-llama/Meta-Llama-3-70B-Instruct
-  OPENAI_API_KEY: 
+  OPENAI_API_KEY: "sk-proj-3ITMdHfIzL3Myk93zSjQT3BlbkFJTPIIAFZhWz8wJiNdZfKt"
+  MODEL_NAME: "OpenAIChat"
+
 # Service configuration
 service:
   readiness_probe:
     path: /v1/agent/completions  # Path for the readiness probe
-  readiness_probe: /v1/health  # Additional readiness probe
+    post_data:
+      model: $MODEL_NAME  # Specify the model name
+      messages:
+        - role: user
+          content: Hello! What is your name?  # Specify the initial message
+      max_tokens: 1  # Maximum number of tokens
+  # readiness_probe: /v1/health  # Additional readiness probe
 
   # Replica Policy
   replica_policy:
-    min_replicas: 1  # Minimum number of replicas
-    max_replicas: 10  # Maximum number of replicas
+    min_replicas: 3  # Minimum number of replicas
+    max_replicas: 100  # Maximum number of replicas
     target_qps_per_replica: 2.5  # Target queries per second per replica
-    upscale_delay_seconds: 200  # Delay before upscaling replicas
-    downscale_delay_seconds: 1200  # Delay before downscaling replicas
+    upscale_delay_seconds: 40  # Delay before upscaling replicas
+    downscale_delay_seconds: 1000  # Delay before downscaling replicas
+
+# workdir: .
 
 resources:
-  # accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
-  accelerators: {A10g, A10, L40, A40} # We can use cheaper accelerators for 8B model.
+  accelerators: [L4, A10g, A100, T4] ## Small models
   # cpus: 32+
+  # memory: 32
   use_spot: True
-  disk_size: 100  # Ensure model checkpoints can fit.
+  # disk_size: 512  # Ensure model checkpoints (~246GB) can fit.
   # disk_tier: best
-  ports: 8081  # Expose to internet traffic.
+  ports: 8080  # Expose to internet traffic.
 
 setup: |
   git clone https://github.com/kyegomez/swarms-cloud.git
+
   cd swarms-cloud
-  pip3 install -r requirements.txt
 
+  # Install dependencies
+  pip install -r requirements.txt
 
 run: |
   python3 api.py