From 08d030cf9296b38a4cc67673819289c57f22127b Mon Sep 17 00:00:00 2001
From: Kye Gomez <swarms_wd@Kyes-MBP.attlocal.net>
Date: Wed, 17 Jul 2024 09:32:14 -0700
Subject: [PATCH] [CLEANUP]

---
 api.py                            |  2 +-
 requirements.txt                  | 10 ++++++--
 servers/agent/api.py              | 10 ++++----
 servers/agent/sky_serve.yaml      | 39 -------------------------------
 servers/llama3/sky_serve_two.yaml | 34 +++++++++++++++++++++++++++
 sky_serve.yaml                    | 35 +++++++++++++++++----------
 6 files changed, 71 insertions(+), 59 deletions(-)
 delete mode 100644 servers/agent/sky_serve.yaml
 create mode 100644 servers/llama3/sky_serve_two.yaml

diff --git a/api.py b/api.py
index 9d0f6b5..bfb638b 100644
--- a/api.py
+++ b/api.py
@@ -187,4 +187,4 @@ async def agent_completions(agent_input: AgentInput):
 if __name__ == "__main__":
     import uvicorn
 
-    uvicorn.run(app, host="0.0.0.0", port=8000, use_colors=True, log_level="info")
+    uvicorn.run(app, host="0.0.0.0", port=8080, use_colors=True, log_level="info")
diff --git a/requirements.txt b/requirements.txt
index 5075090..4c64587 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,5 +23,11 @@ optimum
 auto-gptq
 whisperx
 shortuuid
-exxa
-hf_transfer
\ No newline at end of file
+hf_transfer
+swarms
+fastapi
+uvicorn
+tiktoken
+pydantic
+asyncio
+swarms-cloud
\ No newline at end of file
diff --git a/servers/agent/api.py b/servers/agent/api.py
index e874d42..1515bdc 100644
--- a/servers/agent/api.py
+++ b/servers/agent/api.py
@@ -182,9 +182,9 @@ async def agent_completions(agent_input: AgentInput):
         raise HTTPException(status_code=400, detail=str(e))
 
 
-# if __name__ == "__main__":
-#     import uvicorn
+if __name__ == "__main__":
+    import uvicorn
 
-#     uvicorn.run(
-#         app, host="0.0.0.0", port=8000, use_colors=True, log_level="info"
-#     )
+    uvicorn.run(
+        app, host="0.0.0.0", port=8000, use_colors=True, log_level="info"
+    )
diff --git a/servers/agent/sky_serve.yaml b/servers/agent/sky_serve.yaml
deleted file mode 100644
index faa8636..0000000
--- a/servers/agent/sky_serve.yaml
+++ /dev/null
@@ -1,39 +0,0 @@
-envs:
-  OPENAI_API_KEY: 
-# Service configuration
-service:
-  readiness_probe:
-    path: /v1/agent/completions  # Path for the readiness probe
-    post_data:
-      model: $MODEL_NAME  # Specify the model name
-      messages:
-        - role: user
-          content: Hello! What is your name?  # Specify the initial message
-      max_tokens: 1  # Maximum number of tokens
-  readiness_probe: /v1/health  # Additional readiness probe
-
-  # Replica Policy
-  replica_policy:
-    min_replicas: 0  # Minimum number of replicas
-    max_replicas: 10  # Maximum number of replicas
-    target_qps_per_replica: 2.5  # Target queries per second per replica
-    upscale_delay_seconds: 40  # Delay before upscaling replicas
-    downscale_delay_seconds: 50  # Delay before downscaling replicas
-
-resources:
-  accelerators: {A10g}  # Use the cheapest GPU accelerator
-  use_spot: True
-  disk_size: 100  # Ensure model checkpoints can fit.
-  ports: 8081  # Expose to internet traffic.
-
-setup: |
-  git clone https://github.com/kyegomez/swarms-cloud.git
-
-  cd swarms-cloud
-  cd servers/agent
-
-  # Install dependencies
-  pip install -r requirements.txt
-
-run: |
-  uvicorn api:app --host 0.0.0.0 --port 8081
\ No newline at end of file
diff --git a/servers/llama3/sky_serve_two.yaml b/servers/llama3/sky_serve_two.yaml
new file mode 100644
index 0000000..71a4182
--- /dev/null
+++ b/servers/llama3/sky_serve_two.yaml
@@ -0,0 +1,34 @@
+envs:
+  MODEL_NAME: meta-llama/Meta-Llama-3-70B-Instruct
+  OPENAI_API_KEY: 
+# Service configuration
+service:
+  readiness_probe:
+    path: /v1/agent/completions  # Path for the readiness probe
+  readiness_probe: /v1/health  # Additional readiness probe
+
+  # Replica Policy
+  replica_policy:
+    min_replicas: 1  # Minimum number of replicas
+    max_replicas: 10  # Maximum number of replicas
+    target_qps_per_replica: 2.5  # Target queries per second per replica
+    upscale_delay_seconds: 200  # Delay before upscaling replicas
+    downscale_delay_seconds: 1200  # Delay before downscaling replicas
+
+resources:
+  # accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
+  accelerators: {A10g, A10, L40, A40} # We can use cheaper accelerators for 8B model.
+  # cpus: 32+
+  use_spot: True
+  disk_size: 100  # Ensure model checkpoints can fit.
+  # disk_tier: best
+  ports: 8081  # Expose to internet traffic.
+
+setup: |
+  git clone https://github.com/kyegomez/swarms-cloud.git
+  cd swarms-cloud
+  pip3 install -r requirements.txt
+
+
+run: |
+  python3 api.py
\ No newline at end of file
diff --git a/sky_serve.yaml b/sky_serve.yaml
index 71a4182..267a315 100644
--- a/sky_serve.yaml
+++ b/sky_serve.yaml
@@ -1,34 +1,45 @@
 envs:
-  MODEL_NAME: meta-llama/Meta-Llama-3-70B-Instruct
-  OPENAI_API_KEY: 
+  OPENAI_API_KEY: "sk-proj-3ITMdHfIzL3Myk93zSjQT3BlbkFJTPIIAFZhWz8wJiNdZfKt"
+  MODEL_NAME: "OpenAIChat"
+
 # Service configuration
 service:
   readiness_probe:
     path: /v1/agent/completions  # Path for the readiness probe
-  readiness_probe: /v1/health  # Additional readiness probe
+    post_data:
+      model: $MODEL_NAME  # Specify the model name
+      messages:
+        - role: user
+          content: Hello! What is your name?  # Specify the initial message
+      max_tokens: 1  # Maximum number of tokens
+  # readiness_probe: /v1/health  # Additional readiness probe
 
   # Replica Policy
   replica_policy:
-    min_replicas: 1  # Minimum number of replicas
-    max_replicas: 10  # Maximum number of replicas
+    min_replicas: 3  # Minimum number of replicas
+    max_replicas: 100  # Maximum number of replicas
     target_qps_per_replica: 2.5  # Target queries per second per replica
-    upscale_delay_seconds: 200  # Delay before upscaling replicas
-    downscale_delay_seconds: 1200  # Delay before downscaling replicas
+    upscale_delay_seconds: 40  # Delay before upscaling replicas
+    downscale_delay_seconds: 1000  # Delay before downscaling replicas
+
+# workdir: .
 
 resources:
-  # accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
-  accelerators: {A10g, A10, L40, A40} # We can use cheaper accelerators for 8B model.
+  accelerators: [L4, A10g, A100, T4] ## Small models
   # cpus: 32+
+  # memory: 32
   use_spot: True
-  disk_size: 100  # Ensure model checkpoints can fit.
+  # disk_size: 512  # Ensure model checkpoints (~246GB) can fit.
   # disk_tier: best
-  ports: 8081  # Expose to internet traffic.
+  ports: 8080  # Expose to internet traffic.
 
 setup: |
   git clone https://github.com/kyegomez/swarms-cloud.git
+
   cd swarms-cloud
-  pip3 install -r requirements.txt
 
+  # Install dependencies
+  pip install -r requirements.txt
 
 run: |
   python3 api.py
\ No newline at end of file