generated from kyegomez/Python-Package-Template
-
-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Kye Gomez
authored and
Kye Gomez
committed
Jul 17, 2024
1 parent
94d7306
commit 08d030c
Showing
6 changed files
with
71 additions
and
59 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
envs: | ||
MODEL_NAME: meta-llama/Meta-Llama-3-70B-Instruct | ||
OPENAI_API_KEY: | ||
# Service configuration | ||
service: | ||
readiness_probe: | ||
path: /v1/agent/completions # Path for the readiness probe | ||
readiness_probe: /v1/health # Additional readiness probe | ||
|
||
# Replica Policy | ||
replica_policy: | ||
min_replicas: 1 # Minimum number of replicas | ||
max_replicas: 10 # Maximum number of replicas | ||
target_qps_per_replica: 2.5 # Target queries per second per replica | ||
upscale_delay_seconds: 200 # Delay before upscaling replicas | ||
downscale_delay_seconds: 1200 # Delay before downscaling replicas | ||
|
||
resources: | ||
# accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8} | ||
accelerators: {A10g, A10, L40, A40} # We can use cheaper accelerators for 8B model. | ||
# cpus: 32+ | ||
use_spot: True | ||
disk_size: 100 # Ensure model checkpoints can fit. | ||
# disk_tier: best | ||
ports: 8081 # Expose to internet traffic. | ||
|
||
setup: | | ||
git clone https://github.com/kyegomez/swarms-cloud.git | ||
cd swarms-cloud | ||
pip3 install -r requirements.txt | ||
run: | | ||
python3 api.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,45 @@ | ||
envs: | ||
MODEL_NAME: meta-llama/Meta-Llama-3-70B-Instruct | ||
OPENAI_API_KEY: | ||
OPENAI_API_KEY: "sk-proj-3ITMdHfIzL3Myk93zSjQT3BlbkFJTPIIAFZhWz8wJiNdZfKt" | ||
MODEL_NAME: "OpenAIChat" | ||
|
||
# Service configuration | ||
service: | ||
readiness_probe: | ||
path: /v1/agent/completions # Path for the readiness probe | ||
readiness_probe: /v1/health # Additional readiness probe | ||
post_data: | ||
model: $MODEL_NAME # Specify the model name | ||
messages: | ||
- role: user | ||
content: Hello! What is your name? # Specify the initial message | ||
max_tokens: 1 # Maximum number of tokens | ||
# readiness_probe: /v1/health # Additional readiness probe | ||
|
||
# Replica Policy | ||
replica_policy: | ||
min_replicas: 1 # Minimum number of replicas | ||
max_replicas: 10 # Maximum number of replicas | ||
min_replicas: 3 # Minimum number of replicas | ||
max_replicas: 100 # Maximum number of replicas | ||
target_qps_per_replica: 2.5 # Target queries per second per replica | ||
upscale_delay_seconds: 200 # Delay before upscaling replicas | ||
downscale_delay_seconds: 1200 # Delay before downscaling replicas | ||
upscale_delay_seconds: 40 # Delay before upscaling replicas | ||
downscale_delay_seconds: 1000 # Delay before downscaling replicas | ||
|
||
# workdir: . | ||
|
||
resources: | ||
# accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8} | ||
accelerators: {A10g, A10, L40, A40} # We can use cheaper accelerators for 8B model. | ||
accelerators: [L4, A10g, A100, T4] ## Small models | ||
# cpus: 32+ | ||
# memory: 32 | ||
use_spot: True | ||
disk_size: 100 # Ensure model checkpoints can fit. | ||
# disk_size: 512 # Ensure model checkpoints (~246GB) can fit. | ||
# disk_tier: best | ||
ports: 8081 # Expose to internet traffic. | ||
ports: 8080 # Expose to internet traffic. | ||
|
||
setup: | | ||
git clone https://github.com/kyegomez/swarms-cloud.git | ||
cd swarms-cloud | ||
pip3 install -r requirements.txt | ||
# Install dependencies | ||
pip install -r requirements.txt | ||
run: | | ||
python3 api.py |