Skip to content

Commit

Permalink
[CLEANUP]
Browse files Browse the repository at this point in the history
  • Loading branch information
Kye Gomez authored and Kye Gomez committed Jul 17, 2024
1 parent 94d7306 commit 08d030c
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 59 deletions.
2 changes: 1 addition & 1 deletion api.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,4 +187,4 @@ async def agent_completions(agent_input: AgentInput):
if __name__ == "__main__":
import uvicorn

uvicorn.run(app, host="0.0.0.0", port=8000, use_colors=True, log_level="info")
uvicorn.run(app, host="0.0.0.0", port=8080, use_colors=True, log_level="info")
10 changes: 8 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,11 @@ optimum
auto-gptq
whisperx
shortuuid
exxa
hf_transfer
hf_transfer
swarms
fastapi
uvicorn
tiktoken
pydantic
asyncio
swarms-cloud
10 changes: 5 additions & 5 deletions servers/agent/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,9 @@ async def agent_completions(agent_input: AgentInput):
raise HTTPException(status_code=400, detail=str(e))


# if __name__ == "__main__":
# import uvicorn
if __name__ == "__main__":
import uvicorn

# uvicorn.run(
# app, host="0.0.0.0", port=8000, use_colors=True, log_level="info"
# )
uvicorn.run(
app, host="0.0.0.0", port=8000, use_colors=True, log_level="info"
)
39 changes: 0 additions & 39 deletions servers/agent/sky_serve.yaml

This file was deleted.

34 changes: 34 additions & 0 deletions servers/llama3/sky_serve_two.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
envs:
MODEL_NAME: meta-llama/Meta-Llama-3-70B-Instruct
OPENAI_API_KEY:
# Service configuration
service:
readiness_probe:
path: /v1/agent/completions # Path for the readiness probe
readiness_probe: /v1/health # Additional readiness probe

# Replica Policy
replica_policy:
min_replicas: 1 # Minimum number of replicas
max_replicas: 10 # Maximum number of replicas
target_qps_per_replica: 2.5 # Target queries per second per replica
upscale_delay_seconds: 200 # Delay before upscaling replicas
downscale_delay_seconds: 1200 # Delay before downscaling replicas

resources:
# accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
accelerators: {A10g, A10, L40, A40} # We can use cheaper accelerators for 8B model.
# cpus: 32+
use_spot: True
disk_size: 100 # Ensure model checkpoints can fit.
# disk_tier: best
ports: 8081 # Expose to internet traffic.

setup: |
git clone https://github.com/kyegomez/swarms-cloud.git
cd swarms-cloud
pip3 install -r requirements.txt
run: |
python3 api.py
35 changes: 23 additions & 12 deletions sky_serve.yaml
Original file line number Diff line number Diff line change
@@ -1,34 +1,45 @@
envs:
MODEL_NAME: meta-llama/Meta-Llama-3-70B-Instruct
OPENAI_API_KEY:
OPENAI_API_KEY: "sk-proj-3ITMdHfIzL3Myk93zSjQT3BlbkFJTPIIAFZhWz8wJiNdZfKt"
MODEL_NAME: "OpenAIChat"

# Service configuration
service:
readiness_probe:
path: /v1/agent/completions # Path for the readiness probe
readiness_probe: /v1/health # Additional readiness probe
post_data:
model: $MODEL_NAME # Specify the model name
messages:
- role: user
content: Hello! What is your name? # Specify the initial message
max_tokens: 1 # Maximum number of tokens
# readiness_probe: /v1/health # Additional readiness probe

# Replica Policy
replica_policy:
min_replicas: 1 # Minimum number of replicas
max_replicas: 10 # Maximum number of replicas
min_replicas: 3 # Minimum number of replicas
max_replicas: 100 # Maximum number of replicas
target_qps_per_replica: 2.5 # Target queries per second per replica
upscale_delay_seconds: 200 # Delay before upscaling replicas
downscale_delay_seconds: 1200 # Delay before downscaling replicas
upscale_delay_seconds: 40 # Delay before upscaling replicas
downscale_delay_seconds: 1000 # Delay before downscaling replicas

# workdir: .

resources:
# accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
accelerators: {A10g, A10, L40, A40} # We can use cheaper accelerators for 8B model.
accelerators: [L4, A10g, A100, T4] ## Small models
# cpus: 32+
# memory: 32
use_spot: True
disk_size: 100 # Ensure model checkpoints can fit.
# disk_size: 512 # Ensure model checkpoints (~246GB) can fit.
# disk_tier: best
ports: 8081 # Expose to internet traffic.
ports: 8080 # Expose to internet traffic.

setup: |
git clone https://github.com/kyegomez/swarms-cloud.git
cd swarms-cloud
pip3 install -r requirements.txt
# Install dependencies
pip install -r requirements.txt
run: |
python3 api.py

0 comments on commit 08d030c

Please sign in to comment.