adjust docstring

Signed-off-by: KuntaiDu <[email protected]>
vllm-project · Nov 20, 2024 · 6768108 · 6768108
1 parent a36c12c
commit 6768108
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 15 deletions.
diff --git a/vllm/distributed/kv_transfer/kv_connector/pynccl_connector/buffer.py b/vllm/distributed/kv_transfer/kv_connector/pynccl_connector/buffer.py
@@ -1,11 +1,13 @@
 """
-    This file implements a simple torch distributed connector by 3 classes:
-    - `TorchDistributedPipe`: a tensor transmission pipe between vllm instances,
-        using `torch.distributed`
-    - `TorchDistributedBuffer`: a buffer to store tensors, implemented on top 
-        of `TorchDistributedPipe`
-    - `TorchDistributedConnector`: a torch distributed connector between P/D 
-      instance, implemented on top of `TorchDistributedBuffer`
+    Implements a distributed key-value (KV) cache transfer mechanism for vLLM 
+    instances with buffer management.
+
+    Key Features:
+    - Distributed KV cache transmission using PyNccl pipes.
+    - Non-blocking `insert`, blocking `drop_select`.
+    - Use CPU signal pipe to avoid racing condition
+    - Handles buffer size constraints and provide backpressure mechanism to 
+      stop the prefill instance when the decode instance is slow.
 """
 import threading
 import time

diff --git a/vllm/distributed/kv_transfer/kv_connector/pynccl_connector/pipe.py b/vllm/distributed/kv_transfer/kv_connector/pynccl_connector/pipe.py
@@ -1,12 +1,14 @@
 """
-    This file implements a simple PyNccl pipe that can send and receive 
-    Optional[torch.Tensor] between two ranks.
-    
-    We will first transmit the metadata, and then the tensor.
-    Metadata format:
-    Metadata = Dict[str, Optional[torch.Tensor]]
-    - "dtype": The data type of the tensor (tensor.dtype) or None
-    - "shape": The shape of the tensor (tensor.shape) or None
+    This module implements a PyNccl pipe for sending and receiving 
+    Optional[torch.Tensor] between distributed ranks with advanced 
+    communication features.
+
+    Key Features:
+    - Supports sending and receiving tensors with metadata
+    - Handles both CUDA and CPU device communications
+    - Implements a non-blocking tensor transfer mechanism
+    - Manages buffer size and provides backpressure control
+    - Supports distributed process groups with configurable parameters
 """
 
 import threading