Refine warmup error (#19)

huggingface · Jul 9, 2024 · a5a076c · a5a076c
1 parent bb45b6b
commit a5a076c
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -2,16 +2,21 @@
 
 To use [🤗 text-embeddings-inference](https://github.com/huggingface/text-embeddings-inference) on Habana Gaudi/Gaudi2, follow these steps:
 
-1. Build the Docker image located in this folder with:
+1. Pull the official Docker image with:
    ```bash
-   docker build -f Dockerfile-hpu -t tei_gaudi .
+   docker pull ghcr.io/huggingface/tei-gaudi:latest
    ```
+> [!NOTE]
+> Alternatively, you can build the Docker image using `Dockerfile-hpu` located in this folder with:
+> ```bash
+> docker build -f Dockerfile-hpu -t tei_gaudi .
+> ```
 2. Launch a local server instance on 1 Gaudi card:
    ```bash
    model=BAAI/bge-large-en-v1.5
    volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
 
-   docker run -p 8080:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tei_gaudi --model-id $model --pooling cls
+   docker run -p 8080:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e MAX_WARMUP_SEQUENCE_LENGTH=512 --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tei-gaudi:latest --model-id $model --pooling cls
    ```
 3. You can then send a request:
    ```bash

diff --git a/backends/src/lib.rs b/backends/src/lib.rs
@@ -136,15 +136,15 @@ impl Backend {
         }
         if max_warmup_length > max_input_length {
             return Err(BackendError::Start(
-                "max_warmup_length exceeds model's max_input_length".to_string()
+                format!("max_warmup_length ({max_warmup_length}) exceeds model's max_input_length ({max_input_length}), you can modify this value adding `-e MAX_WARMUP_SEQUENCE_LENGTH=<new_warmup_length>` to your Docker run command")
             ));
         }
         if seq_bucket_size > max_warmup_length {
             return Err(BackendError::Start(
-                "PAD_SEQUENCE_TO_MULTIPLE_OF exceeds model's max warmup length".to_string()
+                format!("PAD_SEQUENCE_TO_MULTIPLE_OF ({seq_bucket_size}) exceeds model's max warmup length ({max_warmup_length}), you can modify these values adding `-e PAD_SEQUENCE_TO_MULTIPLE_OF=<new_value>` or `-e MAX_WARMUP_SEQUENCE_LENGTH=<new_value> to your Docker run command`")
             ));
         }
-  
+
         max_input_length = std::cmp::min(max_input_length, max_warmup_length);
         let mut seq_lengths: Vec<u32> = (seq_bucket_size..max_input_length+1).step_by(seq_bucket_size as usize).collect();
         if let Some(&last) = seq_lengths.last() {