diff --git a/Dockerfile b/Dockerfile
index e1a24d011..c0645cf9a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -13,7 +13,7 @@ ENV PATH="/opt/venv/bin:$PATH"
 COPY requirements.txt .
 RUN pip install --upgrade pip && \
     pip install --no-cache-dir -r requirements.txt
-RUN python3 -m pip install llama-cpp-python
+RUN python3 -m pip install llama-cpp-python==0.2.7 --force-reinstall --upgrade --no-cache-dir
 
 RUN python3.10 -c "import nltk; nltk.download('punkt')" && \
   python3.10 -c "import nltk; nltk.download('averaged_perceptron_tagger')"
diff --git a/Dockerfile-gpu b/Dockerfile-gpu
index 3cb7e0858..0b11e1b41 100644
--- a/Dockerfile-gpu
+++ b/Dockerfile-gpu
@@ -32,7 +32,7 @@ COPY . .
 ENV CUDA_DOCKER_ARCH=all
 ENV LLAMA_CUBLAS=1
 
-RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
+RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python==0.2.7 --force-reinstall --upgrade --no-cache-dir
 
 # Make necessary scripts executable
 RUN chmod +x ./entrypoint.sh ./wait-for-it.sh ./install_tool_dependencies.sh ./entrypoint_celery.sh
diff --git a/docker-compose-gpu.yml b/docker-compose-gpu.yml
index 87e8ead0d..2428b0b8a 100644
--- a/docker-compose-gpu.yml
+++ b/docker-compose-gpu.yml
@@ -38,7 +38,7 @@ services:
         reservations:
           devices:
             - driver: nvidia
-              count: 1
+              count: all
               capabilities: [gpu]
   gui:
     build:
diff --git a/requirements.txt b/requirements.txt
index ab45bb1c7..9ebdd1a49 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -158,3 +158,4 @@ google-generativeai==0.1.0
 unstructured==0.8.1
 ai21==1.2.6
 typing-extensions==4.5.0
+llama_cpp_python==0.2.7
\ No newline at end of file