fix: Optimize Dockerfile and extend TTS generation time

- Consolidate pip installations in Dockerfile for better layer caching - Add --no-cache-dir flag to reduce image size - Modify COPY command for app directory - Increase TTS generation max tokens from 30 to 60 seconds
manascb1344 · Feb 22, 2025 · 7d57b33 · 7d57b33
1 parent aab9af0
commit 7d57b33
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 8 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -21,15 +21,12 @@ RUN git clone https://github.com/Zyphra/Zonos.git /app/zonos && \
     pip3 install /app/zonos
 
 # Install specific wheel files with GPU support
-RUN pip3 install flash-attn --no-build-isolation --no-deps \
-    && FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE pip3 install flash-attn --no-build-isolation
-
-RUN pip3 install https://github.com/state-spaces/mamba/releases/download/v2.2.4/mamba_ssm-2.2.4+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
-
-RUN pip3 install https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.5.0.post8/causal_conv1d-1.5.0.post8+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+RUN FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE pip3 install flash-attn --no-build-isolation \
+    && pip3 install --no-cache-dir https://github.com/state-spaces/mamba/releases/download/v2.2.4/mamba_ssm-2.2.4+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl \
+    && pip3 install --no-cache-dir https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.5.0.post8/causal_conv1d-1.5.0.post8+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 
 # Copy application code
-COPY app app/
+COPY app/ app/
 COPY pyproject.toml .
 
 # Environment variables

diff --git a/app/services/tts.py b/app/services/tts.py
@@ -135,7 +135,7 @@ def generate_audio(
 
         # Generate audio
         logger.info("Generating audio")
-        max_new_tokens = 86 * 30  # ~30 seconds of audio
+        max_new_tokens = 86 * 60  # ~30 seconds of audio
         codes = selected_model.generate(
             prefix_conditioning=conditioning,
             audio_prefix_codes=audio_prefix_codes,