meta-llama · ashwinb · Dec 4, 2024 · Nov 20, 2024 · Nov 25, 2024 · Nov 25, 2024
@@ -80,6 +80,7 @@ Additionally, we have designed every element of the Stack such that APIs as well
 |  **API Provider Builder** |  **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
 | :----: | :----: | :----: | :----: | :----: | :----: | :----: |
 |  Meta Reference  |  Single Node | :heavy_check_mark:  |  :heavy_check_mark:  |  :heavy_check_mark:  |  :heavy_check_mark:  |  :heavy_check_mark:  |
+|  Cerebras  |  Single Node  |   | :heavy_check_mark:  |    |    |   |
 |  Fireworks  |  Hosted  | :heavy_check_mark:  | :heavy_check_mark:  |  :heavy_check_mark:  |    |   |
 |  AWS Bedrock  |  Hosted  |    |  :heavy_check_mark:  |    | :heavy_check_mark:  | |
 |  Together  |  Hosted  |  :heavy_check_mark:  |  :heavy_check_mark:  |   | :heavy_check_mark:  |  |
@@ -95,6 +96,7 @@ Additionally, we have designed every element of the Stack such that APIs as well
 |:----------------:	|:------------------------------------------:	|:-----------------------:	|
 |  Meta Reference  	| [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) 	|       [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html)       	|
 |  Meta Reference Quantized  	| [llamastack/distribution-meta-reference-quantized-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-quantized-gpu/general) 	|       [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-quantized-gpu.html)       	|
+|      Cerebras     |       [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general)       	|       [Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/cerebras.html)       	|
 |      Ollama      	|       [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general)       	|       [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html)       	|
 |        TGI       	|         [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general)        	|       [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html)       	|
 |        Together       	|         [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general)        	|       [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/together.html)       	|

@@ -0,0 +1 @@
+../../llama_stack/templates/cerebras/build.yaml
@@ -0,0 +1,16 @@
+services:
+  llamastack:
+    image: llamastack/distribution-cerebras
+    network_mode: "host"
+    volumes:
+      - ~/.llama:/root/.llama
+      - ./run.yaml:/root/llamastack-run-cerebras.yaml
+    ports:
+      - "5000:5000"
+    entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-cerebras.yaml"
+    deploy:
+      restart_policy:
+        condition: on-failure
+        delay: 3s
+        max_attempts: 5
+        window: 60s
@@ -0,0 +1 @@
+../../llama_stack/templates/cerebras/run.yaml
@@ -1,5 +1,5 @@
 {
-  "hf-serverless": [
+  "tgi": [
     "aiohttp",
     "aiosqlite",
     "blobfile",
@@ -27,7 +27,7 @@
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "together": [
+  "remote-vllm": [
     "aiosqlite",
     "blobfile",
     "chardet",
@@ -39,6 +39,7 @@
     "matplotlib",
     "nltk",
     "numpy",
+    "openai",
     "pandas",
     "pillow",
     "psycopg2-binary",
@@ -47,7 +48,6 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
-    "together",
     "tqdm",
     "transformers",
     "uvicorn",
@@ -81,19 +81,22 @@
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "remote-vllm": [
+  "meta-reference-quantized-gpu": [
+    "accelerate",
     "aiosqlite",
     "blobfile",
     "chardet",
     "chromadb-client",
+    "fairscale",
     "faiss-cpu",
     "fastapi",
+    "fbgemm-gpu",
     "fire",
     "httpx",
+    "lm-format-enforcer",
     "matplotlib",
     "nltk",
     "numpy",
-    "openai",
     "pandas",
     "pillow",
     "psycopg2-binary",
@@ -102,22 +105,28 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "torch",
+    "torchao==0.5.0",
+    "torchvision",
     "tqdm",
     "transformers",
     "uvicorn",
+    "zmq",
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "fireworks": [
+  "meta-reference-gpu": [
+    "accelerate",
     "aiosqlite",
     "blobfile",
     "chardet",
     "chromadb-client",
+    "fairscale",
     "faiss-cpu",
     "fastapi",
     "fire",
-    "fireworks-ai",
     "httpx",
+    "lm-format-enforcer",
     "matplotlib",
     "nltk",
     "numpy",
@@ -129,13 +138,16 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "torch",
+    "torchvision",
     "tqdm",
     "transformers",
     "uvicorn",
+    "zmq",
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "tgi": [
+  "hf-serverless": [
     "aiohttp",
     "aiosqlite",
     "blobfile",
@@ -163,10 +175,9 @@
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "bedrock": [
+  "together": [
     "aiosqlite",
     "blobfile",
-    "boto3",
     "chardet",
     "chromadb-client",
     "faiss-cpu",
@@ -184,27 +195,27 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "together",
     "tqdm",
     "transformers",
     "uvicorn",
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "meta-reference-gpu": [
-    "accelerate",
+  "ollama": [
+    "aiohttp",
     "aiosqlite",
     "blobfile",
     "chardet",
     "chromadb-client",
-    "fairscale",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
-    "lm-format-enforcer",
     "matplotlib",
     "nltk",
     "numpy",
+    "ollama",
     "pandas",
     "pillow",
     "psycopg2-binary",
@@ -213,28 +224,22 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
-    "torch",
-    "torchvision",
     "tqdm",
     "transformers",
     "uvicorn",
-    "zmq",
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "meta-reference-quantized-gpu": [
-    "accelerate",
+  "bedrock": [
     "aiosqlite",
     "blobfile",
+    "boto3",
     "chardet",
     "chromadb-client",
-    "fairscale",
     "faiss-cpu",
     "fastapi",
-    "fbgemm-gpu",
     "fire",
     "httpx",
-    "lm-format-enforcer",
     "matplotlib",
     "nltk",
     "numpy",
@@ -246,17 +251,13 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
-    "torch",
-    "torchao==0.5.0",
-    "torchvision",
     "tqdm",
     "transformers",
     "uvicorn",
-    "zmq",
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "ollama": [
+  "hf-endpoint": [
     "aiohttp",
     "aiosqlite",
     "blobfile",
@@ -266,10 +267,10 @@
     "fastapi",
     "fire",
     "httpx",
+    "huggingface_hub",
     "matplotlib",
     "nltk",
     "numpy",
-    "ollama",
     "pandas",
     "pillow",
     "psycopg2-binary",
@@ -284,17 +285,42 @@
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "hf-endpoint": [
-    "aiohttp",
+  "fireworks": [
     "aiosqlite",
     "blobfile",
     "chardet",
     "chromadb-client",
     "faiss-cpu",
     "fastapi",
     "fire",
+    "fireworks-ai",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "cerebras": [
+    "aiosqlite",
+    "blobfile",
+    "cerebras_cloud_sdk",
+    "chardet",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
     "httpx",
-    "huggingface_hub",
     "matplotlib",
     "nltk",
     "numpy",
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		../../llama_stack/templates/cerebras/build.yaml