From 08d030cf9296b38a4cc67673819289c57f22127b Mon Sep 17 00:00:00 2001 From: Kye Gomez Date: Wed, 17 Jul 2024 09:32:14 -0700 Subject: [PATCH] [CLEANUP] --- api.py | 2 +- requirements.txt | 10 ++++++-- servers/agent/api.py | 10 ++++---- servers/agent/sky_serve.yaml | 39 ------------------------------- servers/llama3/sky_serve_two.yaml | 34 +++++++++++++++++++++++++++ sky_serve.yaml | 35 +++++++++++++++++---------- 6 files changed, 71 insertions(+), 59 deletions(-) delete mode 100644 servers/agent/sky_serve.yaml create mode 100644 servers/llama3/sky_serve_two.yaml diff --git a/api.py b/api.py index 9d0f6b5..bfb638b 100644 --- a/api.py +++ b/api.py @@ -187,4 +187,4 @@ async def agent_completions(agent_input: AgentInput): if __name__ == "__main__": import uvicorn - uvicorn.run(app, host="0.0.0.0", port=8000, use_colors=True, log_level="info") + uvicorn.run(app, host="0.0.0.0", port=8080, use_colors=True, log_level="info") diff --git a/requirements.txt b/requirements.txt index 5075090..4c64587 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,5 +23,11 @@ optimum auto-gptq whisperx shortuuid -exxa -hf_transfer \ No newline at end of file +hf_transfer +swarms +fastapi +uvicorn +tiktoken +pydantic +asyncio +swarms-cloud \ No newline at end of file diff --git a/servers/agent/api.py b/servers/agent/api.py index e874d42..1515bdc 100644 --- a/servers/agent/api.py +++ b/servers/agent/api.py @@ -182,9 +182,9 @@ async def agent_completions(agent_input: AgentInput): raise HTTPException(status_code=400, detail=str(e)) -# if __name__ == "__main__": -# import uvicorn +if __name__ == "__main__": + import uvicorn -# uvicorn.run( -# app, host="0.0.0.0", port=8000, use_colors=True, log_level="info" -# ) + uvicorn.run( + app, host="0.0.0.0", port=8000, use_colors=True, log_level="info" + ) diff --git a/servers/agent/sky_serve.yaml b/servers/agent/sky_serve.yaml deleted file mode 100644 index faa8636..0000000 --- a/servers/agent/sky_serve.yaml +++ /dev/null @@ -1,39 +0,0 @@ -envs: - OPENAI_API_KEY: -# Service configuration -service: - readiness_probe: - path: /v1/agent/completions # Path for the readiness probe - post_data: - model: $MODEL_NAME # Specify the model name - messages: - - role: user - content: Hello! What is your name? # Specify the initial message - max_tokens: 1 # Maximum number of tokens - readiness_probe: /v1/health # Additional readiness probe - - # Replica Policy - replica_policy: - min_replicas: 0 # Minimum number of replicas - max_replicas: 10 # Maximum number of replicas - target_qps_per_replica: 2.5 # Target queries per second per replica - upscale_delay_seconds: 40 # Delay before upscaling replicas - downscale_delay_seconds: 50 # Delay before downscaling replicas - -resources: - accelerators: {A10g} # Use the cheapest GPU accelerator - use_spot: True - disk_size: 100 # Ensure model checkpoints can fit. - ports: 8081 # Expose to internet traffic. - -setup: | - git clone https://github.com/kyegomez/swarms-cloud.git - - cd swarms-cloud - cd servers/agent - - # Install dependencies - pip install -r requirements.txt - -run: | - uvicorn api:app --host 0.0.0.0 --port 8081 \ No newline at end of file diff --git a/servers/llama3/sky_serve_two.yaml b/servers/llama3/sky_serve_two.yaml new file mode 100644 index 0000000..71a4182 --- /dev/null +++ b/servers/llama3/sky_serve_two.yaml @@ -0,0 +1,34 @@ +envs: + MODEL_NAME: meta-llama/Meta-Llama-3-70B-Instruct + OPENAI_API_KEY: +# Service configuration +service: + readiness_probe: + path: /v1/agent/completions # Path for the readiness probe + readiness_probe: /v1/health # Additional readiness probe + + # Replica Policy + replica_policy: + min_replicas: 1 # Minimum number of replicas + max_replicas: 10 # Maximum number of replicas + target_qps_per_replica: 2.5 # Target queries per second per replica + upscale_delay_seconds: 200 # Delay before upscaling replicas + downscale_delay_seconds: 1200 # Delay before downscaling replicas + +resources: + # accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8} + accelerators: {A10g, A10, L40, A40} # We can use cheaper accelerators for 8B model. + # cpus: 32+ + use_spot: True + disk_size: 100 # Ensure model checkpoints can fit. + # disk_tier: best + ports: 8081 # Expose to internet traffic. + +setup: | + git clone https://github.com/kyegomez/swarms-cloud.git + cd swarms-cloud + pip3 install -r requirements.txt + + +run: | + python3 api.py \ No newline at end of file diff --git a/sky_serve.yaml b/sky_serve.yaml index 71a4182..267a315 100644 --- a/sky_serve.yaml +++ b/sky_serve.yaml @@ -1,34 +1,45 @@ envs: - MODEL_NAME: meta-llama/Meta-Llama-3-70B-Instruct - OPENAI_API_KEY: + OPENAI_API_KEY: "sk-proj-3ITMdHfIzL3Myk93zSjQT3BlbkFJTPIIAFZhWz8wJiNdZfKt" + MODEL_NAME: "OpenAIChat" + # Service configuration service: readiness_probe: path: /v1/agent/completions # Path for the readiness probe - readiness_probe: /v1/health # Additional readiness probe + post_data: + model: $MODEL_NAME # Specify the model name + messages: + - role: user + content: Hello! What is your name? # Specify the initial message + max_tokens: 1 # Maximum number of tokens + # readiness_probe: /v1/health # Additional readiness probe # Replica Policy replica_policy: - min_replicas: 1 # Minimum number of replicas - max_replicas: 10 # Maximum number of replicas + min_replicas: 3 # Minimum number of replicas + max_replicas: 100 # Maximum number of replicas target_qps_per_replica: 2.5 # Target queries per second per replica - upscale_delay_seconds: 200 # Delay before upscaling replicas - downscale_delay_seconds: 1200 # Delay before downscaling replicas + upscale_delay_seconds: 40 # Delay before upscaling replicas + downscale_delay_seconds: 1000 # Delay before downscaling replicas + +# workdir: . resources: - # accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8} - accelerators: {A10g, A10, L40, A40} # We can use cheaper accelerators for 8B model. + accelerators: [L4, A10g, A100, T4] ## Small models # cpus: 32+ + # memory: 32 use_spot: True - disk_size: 100 # Ensure model checkpoints can fit. + # disk_size: 512 # Ensure model checkpoints (~246GB) can fit. # disk_tier: best - ports: 8081 # Expose to internet traffic. + ports: 8080 # Expose to internet traffic. setup: | git clone https://github.com/kyegomez/swarms-cloud.git + cd swarms-cloud - pip3 install -r requirements.txt + # Install dependencies + pip install -r requirements.txt run: | python3 api.py \ No newline at end of file