nextcloud · kyteinsky · Aug 5, 2024 · Jul 10, 2024 · Jul 12, 2024 · Jul 16, 2024
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
diff --git a/.gitignore b/.gitignore
@@ -93,3 +93,4 @@ MANIFEST
 converted/
 
 geckodriver.log
+models/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,27 @@
+ci:
+  skip: [pyright]
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: check-yaml
+      - id: check-toml
+      - id: mixed-line-ending
+      - id: trailing-whitespace
+        files: lib
+      - id: end-of-file-fixer
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.3.5
+    hooks:
+      - id: ruff
+
+  - repo: local
+    hooks:
+      - id: pyright
+        name: pyright
+        entry: pyright
+        language: system
+        types: [python]
+        pass_filenames: false
diff --git a/Dockerfile b/Dockerfile
@@ -1,22 +1,38 @@
-FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
+FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04
 
-RUN \
-  apt update && \
-  apt install -y python3 python3-pip
+ENV DEBIAN_FRONTEND noninteractive
 
-COPY requirements.txt /
+RUN apt-get update && \
+  apt-get install -y software-properties-common && \
+  add-apt-repository -y ppa:deadsnakes/ppa && \
+  apt-get update && \
+  apt-get install -y --no-install-recommends python3.11 python3.11-venv python3-pip vim git && \
+  update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \
+  apt-get -y clean && \
+  rm -rf /var/lib/apt/lists/*
 
-ADD cs[s] /app/css
-ADD im[g] /app/img
-ADD j[s] /app/js
-ADD l10[n] /app/l10n
-ADD li[b] /app/lib
-ADD model[s] /app/models
+# Set working directory
+WORKDIR /app
+
+# Copy requirements files
+COPY requirements.txt .
+
+# Install requirements
+RUN python3 -m pip install --no-cache-dir --no-deps -r requirements.txt
 
-RUN \
-  python3 -m pip install -r requirements.txt && rm -rf ~/.cache && rm requirements.txt
+ENV NVIDIA_VISIBLE_DEVICES all
+ENV NVIDIA_DRIVER_CAPABILITIES compute
+ENV DEBIAN_FRONTEND dialog
+
+# Copy application files
+ADD cs[s]  /app/css
+ADD im[g]  /app/img
+ADD j[s]   /app/js
+ADD l10[n] /app/l10n
+ADD li[b]  /app/lib
+ADD config.json    /app/config.json
+ADD languages.json /app/languages.json
 
-WORKDIR /app/lib
-ENTRYPOINT ["python3", "main.py"]
+ENTRYPOINT ["python3", "lib/main.py"]
 
 LABEL org.opencontainers.image.source="https://github.com/nextcloud/translate2"
diff --git a/README.md b/README.md
@@ -1 +1 @@
-# Nextcloud Local Machine Translation
+# Nextcloud Local Machine Translation
diff --git a/config.json b/config.json
@@ -0,0 +1,19 @@
+{
+    "__comment::log_level": "Log level for the app, see https://docs.python.org/3/library/logging.html#logging-levels",
+    "__comment::tokenizer_file": "The tokenizer file name inside the model directory (loader.model_path)",
+    "__comment::loader": "CTranslate2 loader options, see https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html#ctranslate2.Translator.__init__. Use 'model_path' key for local paths or 'model_name' key for models hosted on Hugging Face. Both can't be used at the same time.",
+    "__comment::inference": "CTranslate2 inference options, see the kwargs in https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html#ctranslate2.Translator.translate_batch",
+    "__comment::changes_to_the_config": "the program needs to be restarted if you change this file since it is stored in memory on startup",
+    "log_level": 20,
+    "tokenizer_file": "spiece.model",
+    "loader": {
+        "model_name": "Nextcloud-AI/madlad400-3b-mt-ct2-int8_float32",
+        "inter_threads": 4,
+        "intra_threads": 0
+    },
+    "inference": {
+        "max_batch_size": 8192,
+        "sampling_temperature": 0.0001,
+        "disable_unk": true
+    }
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -93,3 +93,4 @@ MANIFEST
		converted/

		geckodriver.log
		models/
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		# Nextcloud Local Machine Translation
		# Nextcloud Local Machine Translation