bentoml
diff --git a/‎.editorconfig
Lines changed: 0 additions & 3 deletions b/‎.editorconfig
Lines changed: 0 additions & 3 deletions
diff --git a/‎.gitattributes
Lines changed: 0 additions & 1 deletion b/‎.gitattributes
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/workflows/binary-releases.yml
Lines changed: 0 additions & 2 deletions b/‎.github/workflows/binary-releases.yml
Lines changed: 0 additions & 2 deletions
diff --git a/‎.github/workflows/build-pypi.yml
Lines changed: 0 additions & 6 deletions b/‎.github/workflows/build-pypi.yml
Lines changed: 0 additions & 6 deletions
diff --git a/‎README.md
Lines changed: 16 additions & 8 deletions b/‎README.md
Lines changed: 16 additions & 8 deletions
diff --git a/‎docs/.eslintrc.cjs
Lines changed: 0 additions & 105 deletions b/‎docs/.eslintrc.cjs
Lines changed: 0 additions & 105 deletions
diff --git a/‎docs/README.md
Lines changed: 0 additions & 9 deletions b/‎docs/README.md
Lines changed: 0 additions & 9 deletions
diff --git a/‎docs/components/features/index.tsx
Lines changed: 0 additions & 30 deletions b/‎docs/components/features/index.tsx
Lines changed: 0 additions & 30 deletions
diff --git a/‎docs/components/features/style.module.css
Lines changed: 0 additions & 135 deletions b/‎docs/components/features/style.module.css
Lines changed: 0 additions & 135 deletions
diff --git a/‎docs/components/icons/arrow-right.svg
Lines changed: 0 additions & 11 deletions b/‎docs/components/icons/arrow-right.svg
Lines changed: 0 additions & 11 deletions
@@ -7,9 +7,6 @@ charset = utf-8
 indent_style = space
 indent_size = 2
 
-[openllm-client/src/openllm_client/pb/v1/*]
-indent_size = unset
-
 [/node_modules/*]
 indent_size = unset
 indent_style = unset
 
@@ -3,7 +3,6 @@
 *_pb2*.pyi linguist-generated=true
 
 # Python sdk
-openllm-python/tests/models/__snapshots__/* linguist-generated=true
 openllm-python/README.md linguist-generated=true
 openllm-python/CHANGELOG.md linguist-generated=true
 openllm-core/src/openllm_core/config/__init__.py linguist-generated=true
 
@@ -7,7 +7,6 @@ on:
     branches: [main]
     paths-ignore:
       - '*.md'
-      - 'docs/**'
       - 'changelog.d/**'
       - 'assets/**'
       - 'openllm-node/**'
@@ -16,7 +15,6 @@ on:
     branches: [main]
     paths-ignore:
       - '*.md'
-      - 'docs/**'
       - 'changelog.d/**'
       - 'assets/**'
       - 'openllm-node/**'
 
@@ -18,18 +18,12 @@ on:
   push:
     branches: [main]
     paths-ignore:
-      - 'docs/**'
-      - 'bazel/**'
-      - 'typings/**'
       - '*.md'
       - 'changelog.d/**'
       - 'assets/**'
   pull_request:
     branches: [main]
     paths-ignore:
-      - 'docs/**'
-      - 'bazel/**'
-      - 'typings/**'
       - '*.md'
       - 'changelog.d/**'
       - 'assets/**'
 
@@ -760,11 +760,8 @@ Quantization is a technique to reduce the storage and computation requirements f
 
 OpenLLM supports the following quantization techniques
 
-- [LLM.int8(): 8-bit Matrix Multiplication](https://arxiv.org/abs/2208.07339) through [bitsandbytes](https://github.com/TimDettmers/bitsandbytes)
-- [SpQR: A Sparse-Quantized Representation for Near-Lossless LLM Weight Compression
-  ](https://arxiv.org/abs/2306.03078) through [bitsandbytes](https://github.com/TimDettmers/bitsandbytes)
-- [AWQ: Activation-aware Weight Quantization](https://arxiv.org/abs/2306.00978),
-- [GPTQ: Accurate Post-Training Quantization](https://arxiv.org/abs/2210.17323)
+- [AWQ: Activation-aware Weight Quantization](https://arxiv.org/abs/2306.00978).
+- [GPTQ: Accurate Post-Training Quantization](https://arxiv.org/abs/2210.17323).
 - [SqueezeLLM: Dense-and-Sparse Quantization](https://arxiv.org/abs/2306.07629).
 
 > [!NOTE]
@@ -816,10 +813,21 @@ from llama_index.llms.openllm import OpenLLMAPI
 Spin up an OpenLLM server, and connect to it by specifying its URL:
 
 ```python
-from langchain.llms import OpenLLM
+from langchain.llms import OpenLLMAPI
 
-llm = OpenLLM(server_url='http://44.23.123.1:3000', server_type='http')
-llm('What is the difference between a duck and a goose? And why there are so many Goose in Canada?')
+llm = OpenLLMAPI(server_url='http://44.23.123.1:3000')
+llm.invoke('What is the difference between a duck and a goose? And why there are so many Goose in Canada?')
+
+# streaming
+for it in llm.stream('What is the difference between a duck and a goose? And why there are so many Goose in Canada?'):
+  print(it, flush=True, end='')
+
+# async context
+await llm.ainvoke('What is the difference between a duck and a goose? And why there are so many Goose in Canada?')
+
+# async streaming
+async for it in llm.astream('What is the difference between a duck and a goose? And why there are so many Goose in Canada?'):
+  print(it, flush=True, end='')
 ```
 
 <!-- hatch-fancy-pypi-readme interim stop -->