From 92e63281230559a39df3b71736aecc83e953760c Mon Sep 17 00:00:00 2001 From: Anupam Kumar Date: Wed, 10 Jul 2024 14:36:41 +0530 Subject: [PATCH 01/10] wip Signed-off-by: Anupam Kumar --- .pre-commit-config.yaml | 27 +++ Dockerfile | 43 +++- config.json | 16 ++ languages.json | 420 ++++++++++++++++++++++++++++++++++++++++ lib/Service.py | 102 +++++----- lib/main.py | 62 ++++-- requirements.in.txt | 3 + requirements.txt | 44 ++++- 8 files changed, 640 insertions(+), 77 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 config.json create mode 100644 languages.json create mode 100644 requirements.in.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..cb19835 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +ci: + skip: [pyright] + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-yaml + - id: check-toml + - id: mixed-line-ending + - id: trailing-whitespace + files: lib + - id: end-of-file-fixer + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.3.5 + hooks: + - id: ruff + + - repo: local + hooks: + - id: pyright + name: pyright + entry: pyright + language: system + types: [python] + pass_filenames: false diff --git a/Dockerfile b/Dockerfile index 994e7ee..a7d96a9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,22 +1,47 @@ -FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 +FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04 -RUN \ - apt update && \ - apt install -y python3 python3-pip +ENV DEBIAN_FRONTEND=noninteractive -COPY requirements.txt / +RUN apt-get update +RUN apt-get install -y software-properties-common +RUN add-apt-repository -y ppa:deadsnakes/ppa +RUN apt-get update +RUN apt-get install -y --no-install-recommends python3.11 python3.11-venv python3-pip vim git pciutils +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 +RUN apt-get -y clean +RUN rm -rf /var/lib/apt/lists/* +ENV NVIDIA_VISIBLE_DEVICES all +ENV NVIDIA_DRIVER_CAPABILITIES compute +ENV DEBIAN_FRONTEND=dialog + +# Set working directory +WORKDIR /app + +# Copy requirements files +COPY requirements.txt . + +# Install requirements +RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel +RUN python3 -m pip install --no-cache-dir https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.82-cu121/llama_cpp_python-0.2.82-cp311-cp311-linux_x86_64.whl +RUN sed -i '/llama_cpp_python/d' requirements.txt +RUN python3 -m pip install --no-cache-dir --no-deps -r requirements.txt + + +# CUDA 12.1 compat lib +ENV LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH +ENV LIBRARY_PATH=/usr/local/cuda/compat:$LIBRARY_PATH + +# Copy application files ADD cs[s] /app/css ADD im[g] /app/img ADD j[s] /app/js ADD l10[n] /app/l10n ADD li[b] /app/lib ADD model[s] /app/models - -RUN \ - python3 -m pip install -r requirements.txt && rm -rf ~/.cache && rm requirements.txt +ADD config.json /app/config.json WORKDIR /app/lib -ENTRYPOINT ["python3", "main.py"] +ENTRYPOINT ["python3", "-u", "main.py"] LABEL org.opencontainers.image.source="https://github.com/nextcloud/translate2" diff --git a/config.json b/config.json new file mode 100644 index 0000000..e3a0d07 --- /dev/null +++ b/config.json @@ -0,0 +1,16 @@ +{ + "llama": [ + { + "model_file": "madlad400-3b-mt-q8_0.gguf", + "n_ctx": 8192, + "max_tokens": 8192, + "n_batch": 512 + }, + { + "model_file": "madlad400-7b-mt-bt-q8_0.gguf", + "n_ctx": 4096, + "max_tokens": 4096, + "n_batch": 256 + } + ] +} diff --git a/languages.json b/languages.json new file mode 100644 index 0000000..1f834a2 --- /dev/null +++ b/languages.json @@ -0,0 +1,420 @@ +{ + "en": "English", + "ru": "Russian", + "es": "Spanish", + "fr": "French", + "de": "German", + "it": "Italian", + "pt": "Portuguese", + "pl": "Polish", + "nl": "Dutch", + "vi": "Vietnamese", + "tr": "Turkish", + "sv": "Swedish", + "id": "Indonesian", + "ro": "Romanian", + "cs": "Czech", + "zh": "Mandarin Chinese", + "hu": "Hungarian", + "ja": "Japanese", + "th": "Thai", + "fi": "Finnish", + "fa": "Persian", + "uk": "Ukrainian", + "da": "Danish", + "el": "Greek", + "no": "Norwegian", + "bg": "Bulgarian", + "sk": "Slovak", + "ko": "Korean", + "ar": "Arabic", + "lt": "Lithuanian", + "ca": "Catalan", + "sl": "Slovenian", + "he": "Hebrew", + "et": "Estonian", + "lv": "Latvian", + "hi": "Hindi", + "sq": "Albanian", + "ms": "Malay", + "az": "Azerbaijani", + "sr": "Serbian", + "ta": "Tamil", + "hr": "Croatian", + "kk": "Kazakh", + "is": "Icelandic", + "ml": "Malayalam", + "mr": "Marathi", + "te": "Telugu", + "af": "Afrikaans", + "gl": "Galician", + "fil": "Filipino", + "be": "Belarusian", + "mk": "Macedonian", + "eu": "Basque", + "bn": "Bengali", + "ka": "Georgian", + "mn": "Mongolian", + "bs": "Bosnian", + "uz": "Uzbek", + "ur": "Urdu", + "sw": "Swahili", + "yue": "Cantonese", + "ne": "Nepali", + "kn": "Kannada", + "kaa": "Kara-Kalpak", + "gu": "Gujarati", + "si": "Sinhala", + "cy": "Welsh", + "eo": "Esperanto", + "la": "Latin", + "hy": "Armenian", + "ky": "Kyrghyz", + "tg": "Tajik", + "ga": "Irish", + "mt": "Maltese", + "my": "Myanmar (Burmese)", + "km": "Khmer", + "tt": "Tatar", + "so": "Somali", + "ku": "Kurdish (Kurmanji)", + "ps": "Pashto", + "pa": "Punjabi", + "rw": "Kinyarwanda", + "lo": "Lao", + "ha": "Hausa", + "dv": "Dhivehi", + "fy": "W. Frisian", + "lb": "Luxembourgish", + "ckb": "Kurdish (Sorani)", + "mg": "Malagasy", + "gd": "Scottish Gaelic", + "am": "Amharic", + "ug": "Uyghur", + "ht": "Haitian Creole", + "grc": "Ancient Greek", + "hmn": "Hmong", + "sd": "Sindhi", + "jv": "Javanese", + "mi": "Maori", + "tk": "Turkmen", + "ceb": "Cebuano", + "yi": "Yiddish", + "ba": "Bashkir", + "fo": "Faroese", + "or": "Odia (Oriya)", + "xh": "Xhosa", + "su": "Sundanese", + "kl": "Kalaallisut", + "ny": "Chichewa", + "sm": "Samoan", + "sn": "Shona", + "co": "Corsican", + "zu": "Zulu", + "ig": "Igbo", + "yo": "Yoruba", + "pap": "Papiamento", + "st": "Sesotho", + "haw": "Hawaiian", + "as": "Assamese", + "oc": "Occitan", + "cv": "Chuvash", + "lus": "Mizo", + "tet": "Tetum", + "gsw": "Swiss German", + "sah": "Yakut", + "br": "Breton", + "rm": "Romansh", + "sa": "Sanskrit", + "bo": "Tibetan", + "om": "Oromo", + "se": "N. Sami", + "ce": "Chechen", + "cnh": "Hakha Chin", + "ilo": "Ilocano", + "hil": "Hiligaynon", + "udm": "Udmurt", + "os": "Ossetian", + "lg": "Luganda", + "ti": "Tigrinya", + "vec": "Venetian", + "ts": "Tsonga", + "tyv": "Tuvinian", + "kbd": "Kabardian", + "ee": "Ewe", + "iba": "Iban", + "av": "Avar", + "kha": "Khasi", + "to": "Tonga (Tonga Islands)", + "tn": "Tswana", + "nso": "Sepedi", + "fj": "Fijian", + "zza": "Zaza", + "ak": "Twi", + "ada": "Adangme", + "otq": "Querétaro Otomi", + "dz": "Dzongkha", + "bua": "Buryat", + "cfm": "Falam Chin", + "ln": "Lingala", + "chm": "Meadow Mari", + "gn": "Guarani", + "krc": "Karachay-Balkar", + "wa": "Walloon", + "hif": "Fiji Hindi", + "yua": "Yucateco", + "srn": "Sranan Tongo", + "war": "Waray (Philippines)", + "rom": "Romani", + "bik": "Central Bikol", + "pam": "Pampanga", + "sg": "Sango", + "lu": "Luba-Katanga", + "ady": "Adyghe", + "kbp": "Kabiyè", + "syr": "Syriac", + "ltg": "Latgalian", + "myv": "Erzya", + "iso": "Isoko", + "kac": "Kachin", + "bho": "Bhojpuri", + "ay": "Aymara", + "kum": "Kumyk", + "qu": "Quechua", + "za": "Zhuang", + "pag": "Pangasinan", + "ngu": "Guerrero Nahuatl", + "ve": "Venda", + "pck": "Paite Chin", + "zap": "Zapotec", + "tyz": "Tày", + "hui": "Huli", + "bbc": "Batak Toba", + "tzo": "Tzotzil", + "tiv": "Tiv", + "ksd": "Kuanua", + "gom": "Goan Konkani", + "min": "Minangkabau", + "ang": "Old English", + "nhe": "E. Huasteca Nahuatl", + "bgp": "E. Baluchi", + "nzi": "Nzima", + "nnb": "Nande", + "nv": "Navajo", + "zxx": "Noise", + "bci": "Baoulé", + "kv": "Komi", + "new": "Newari", + "mps": "Dadibi", + "alt": "S. Altai", + "meu": "Motu", + "bew": "Betawi", + "fon": "Fon", + "iu": "Inuktitut", + "abt": "Ambulas", + "mgh": "Makhuwa-Meetto", + "mnw": "Mon", + "tvl": "Tuvalu", + "dov": "Dombe", + "tlh": "Klingon", + "ho": "Hiri Motu", + "kw": "Cornish", + "mrj": "Hill Mari", + "meo": "Kedah Malay", + "crh": "Crimean Tatar", + "mbt": "Matigsalug Manobo", + "emp": "N. Emberá", + "ace": "Achinese", + "ium": "Iu Mien", + "mam": "Mam", + "gym": "Ngäbere", + "mai": "Maithili", + "crs": "Seselwa Creole French", + "pon": "Pohnpeian", + "ubu": "Umbu-Ungu", + "fip": "Fipa", + "quc": "K’iche’", + "gv": "Manx", + "kj": "Kuanyama", + "btx": "Batak Karo", + "ape": "Bukiyip", + "chk": "Chuukese", + "rcf": "Réunion Creole French", + "shn": "Shan", + "tzh": "Tzeltal", + "mdf": "Moksha", + "ppk": "Uma", + "ss": "Swati", + "gag": "Gagauz", + "cab": "Garifuna", + "kri": "Krio", + "seh": "Sena", + "ibb": "Ibibio", + "tbz": "Ditammari", + "bru": "E. Bru", + "enq": "Enga", + "ach": "Acoli", + "cuk": "San Blas Kuna", + "kmb": "Kimbundu", + "wo": "Wolof", + "kek": "Kekchí", + "qub": "Huallaga Huánuco Quechua", + "tab": "Tabassaran", + "bts": "Batak Simalungun", + "kos": "Kosraean", + "rwo": "Rawa", + "cak": "Kaqchikel", + "tuc": "Mutu", + "bum": "Bulu", + "cjk": "Chokwe", + "gil": "Gilbertese", + "stq": "Saterfriesisch", + "tsg": "Tausug", + "quh": "S. Bolivian Quechua", + "mak": "Makasar", + "arn": "Mapudungun", + "ban": "Balinese", + "jiv": "Shuar", + "sja": "Epena", + "yap": "Yapese", + "tcy": "Tulu", + "toj": "Tojolabal", + "twu": "Termanu", + "xal": "Kalmyk", + "amu": "Guerrero Amuzgo", + "rmc": "Carpathian Romani", + "hus": "Huastec", + "nia": "Nias", + "kjh": "Khakas", + "bm": "Bambara", + "guh": "Guahibo", + "mas": "Masai", + "acf": "St Lucian Creole French", + "dtp": "Kadazan Dusun", + "ksw": "S’gaw Karen", + "bzj": "Belize Kriol English", + "din": "Dinka", + "zne": "Zande", + "mad": "Madurese", + "msi": "Sabah Malay", + "mag": "Magahi", + "mkn": "Kupang Malay", + "kg": "Kongo", + "lhu": "Lahu", + "ch": "Chamorro", + "qvi": "Imbabura H. Quichua", + "mh": "Marshallese", + "djk": "E. Maroon Creole", + "sus": "Susu", + "mfe": "Morisien", + "srm": "Saramaccan", + "dyu": "Dyula", + "ctu": "Chol", + "gui": "E. Bolivian Guaraní", + "pau": "Palauan", + "inb": "Inga", + "bi": "Bislama", + "mni": "Meiteilon (Manipuri)", + "guc": "Wayuu", + "jam": "Jamaican Creole English", + "wal": "Wolaytta", + "jac": "Popti’", + "bas": "Basa (Cameroon)", + "gor": "Gorontalo", + "skr": "Saraiki", + "nyu": "Nyungwe", + "noa": "Woun Meu", + "sda": "Toraja-Sa’dan", + "gub": "Guajajára", + "nog": "Nogai", + "cni": "Asháninka", + "teo": "Teso", + "tdx": "Tandroy-Mahafaly Malagasy", + "sxn": "Sangir", + "rki": "Rakhine", + "nr": "South Ndebele", + "frp": "Arpitan", + "alz": "Alur", + "taj": "E. Tamang", + "lrc": "N. Luri", + "cce": "Chopi", + "rn": "Rundi", + "jvn": "Caribbean Javanese", + "hvn": "Sabu", + "nij": "Ngaju", + "dwr": "Dawro", + "izz": "Izii", + "msm": "Agusan Manobo", + "bus": "Bokobaru", + "ktu": "Kituba (DRC)", + "chr": "Cherokee", + "maz": "Central Mazahua", + "tzj": "Tz’utujil", + "suz": "Sunwar", + "knj": "W. Kanjobal", + "bim": "Bimoba", + "gvl": "Gulay", + "bqc": "Boko (Benin)", + "tca": "Ticuna", + "pis": "Pijin", + "prk": "Parauk", + "laj": "Lango (Uganda)", + "mel": "Central Melanau", + "qxr": "Cañar H. Quichua", + "niq": "Nandi", + "ahk": "Akha", + "shp": "Shipibo-Conibo", + "hne": "Chhattisgarhi", + "spp": "Supyire Senoufo", + "koi": "Komi-Permyak", + "krj": "Kinaray-A", + "quf": "Lambayeque Quechua", + "luz": "S. Luri", + "agr": "Aguaruna", + "tsc": "Tswa", + "mqy": "Manggarai", + "gof": "Gofa", + "gbm": "Garhwali", + "miq": "Mískito", + "dje": "Zarma", + "awa": "Awadhi", + "bjj": "Kanauji", + "qvz": "N. Pastaza Quichua", + "sjp": "Surjapuri", + "tll": "Tetela", + "raj": "Rajasthani", + "kjg": "Khmu", + "bgz": "Banggai", + "quy": "Ayacucho Quechua", + "cbk": "Chavacano", + "akb": "Batak Angkola", + "oj": "Ojibwa", + "ify": "Keley-I Kallahan", + "mey": "Hassaniyya", + "ks": "Kashmiri", + "cac": "Chuj", + "brx": "Bodo (India)", + "qup": "S. Pastaza Quechua", + "syl": "Sylheti", + "jax": "Jambi Malay", + "ff": "Fulfulde", + "ber": "Tamazight (Tfng)", + "tks": "Takestani", + "trp": "Kok Borok", + "mrw": "Maranao", + "adh": "Adhola", + "smt": "Simte", + "srr": "Serer", + "ffm": "Maasina Fulfulde", + "qvc": "Cajamarca Quechua", + "mtr": "Mewari", + "ann": "Obolo", + "kaa-Latn": "Kara-Kalpak (Latn)", + "aa": "Afar", + "noe": "Nimadi", + "nut": "Nung (Viet Nam)", + "gyn": "Guyanese Creole English", + "kwi": "Awa-Cuaiquer", + "xmm": "Manado Malay", + "msb": "Masbatenyo" +} diff --git a/lib/Service.py b/lib/Service.py index e17fb6d..9d73a6d 100644 --- a/lib/Service.py +++ b/lib/Service.py @@ -1,59 +1,71 @@ +"""Translation service""" + +import json import os +from pathlib import Path from time import perf_counter -from transformers import pipeline + +from llama_cpp.llama import Llama + + +class LoaderException(Exception): + pass + + +class LlamaContext: + def __init__(self, model_name: str, gpu_accelerated: bool): + try: + with open("../config.json") as f: + config = json.loads(f.read())["llama"][model_name] + config["model_path"] = Path("../models/", config["model_name"]) + del config["model_name"] + + self.llama = Llama(n_gpu_layers=-1 if gpu_accelerated else 0, **config) + except Exception as e: + raise LoaderException( + f"Error reading config, ensure config.json is present at {Path('..', os.getcwd())}" + ) from e + + def __enter__(self): + self.start = perf_counter() + return self.llama + + def __exit__(self, exc_type, exc_value, exc_tb): + print(f"time taken {perf_counter() - self.start}") + del self.llama + class Service: - dir_path = os.path.dirname(os.path.realpath(__file__)) + gpu_accelerated = os.getenv("COMPUTE_DEVICE", "cuda") != "cpu" + temperature = 0.1 + + def __init__(self): + try: + with open("../languages.json") as f: + self.languages = json.loads(f.read()) + except Exception as e: + raise Exception( + f"Error reading languages list, ensure languages.json is present at {Path('..', os.getcwd())}" + ) from e def get_lang_names(self): - return { - 'de': 'German', - 'en': 'English', - 'es': 'Spanish', - 'fr': 'French', - 'zh': 'Chinese', - 'it': 'Italian', - 'sv': 'Swedish', - 'ar': 'Arabic', - 'fi': 'Finnish', - 'nl': 'Dutch', - 'ja': 'Japanese', - 'tr': 'Turkish', - } + return self.languages def get_models(self): models = [] + languages = self.get_lang_names() - for file in os.scandir(self.dir_path + "/../models/"): - if os.path.isdir(file.path): - models.append(file.name) + for file in os.scandir("../models/"): + if os.path.isfile(file.path) and file.name.endswith(".gguf"): + models.append((file.name, languages)) return models - def get_langs(self): - lang_names = self.get_lang_names() - from_languages = {} - to_languages = {} - for model_name in self.get_models(): - [from_language, to_language] = model_name.split('-', 2) - from_languages[from_language] = lang_names[from_language] - to_languages[to_language] = lang_names[to_language] - return from_languages, to_languages - - def translate(self, from_language, to_language, text): - model_name = from_language + "-" + to_language - print(f"model: {model_name}") - - if not model_name in self.get_models(): - if 'en-'+to_language in self.get_models() and from_language+'-en' in self.get_models(): - return self.translate('en', to_language, self.translate(from_language, 'en', text)) - - raise Exception('Requested model is not available') - - translator = pipeline("translation", model=self.dir_path + "/../models/" + model_name) - print("translating") - start = perf_counter() - translation = translator(text) - print(f"time taken {perf_counter() - start}") + def translate(self, model_name: str, to_language: str, text: str): + print("translating text to", to_language) + + with LlamaContext(model_name, self.gpu_accelerated) as llama: + translation = llama(f"<2{to_language}> {text}", temperature=self.temperature) + print(translation) - return translation[0]['translation_text'] + return translation.choices[0].text diff --git a/lib/main.py b/lib/main.py index d5b81fd..561d98c 100644 --- a/lib/main.py +++ b/lib/main.py @@ -1,18 +1,20 @@ -"""Tha main module of the translate2 app -""" +"""The main module of the translate2 app""" import queue import threading import typing from contextlib import asynccontextmanager -from fastapi import Depends, FastAPI, responses, Body +# todo +from dotenv import load_dotenv +from fastapi import Body, FastAPI, Request, responses from nc_py_api import AsyncNextcloudApp, NextcloudApp -from nc_py_api.ex_app import LogLvl, anc_app, run_app, set_handlers -import torch -from Service import Service +from nc_py_api.ex_app import LogLvl, run_app, set_handlers +from Service import LoaderException, Service + +# todo +load_dotenv() -cuda = torch.cuda.is_available() service = Service() @asynccontextmanager @@ -30,15 +32,23 @@ async def lifespan(_app: FastAPI): TASK_LIST: queue.Queue = queue.Queue(maxsize=100) +@APP.exception_handler(LoaderException) +async def _(request: Request, exc: LoaderException): + print(f"Loader Error: {request.url.path}:", exc) + return responses.JSONResponse({ + "error": "The resource loader is facing some issues, please check the logs for more info" + }, 500) + + class BackgroundProcessTask(threading.Thread): def run(self, *args, **kwargs): # pylint: disable=unused-argument while True: task = TASK_LIST.get(block=True) try: - translation = service.translate(task.get("from_language"), task.get("to_language"), task.get("text")) + translation = service.translate(task["model"], task["to_language"], task["text"]) NextcloudApp().providers.translations.report_result( task_id=task["id"], - result=str(translation), + result=str(translation).strip(), ) except Exception as e: # noqa print(str(e)) @@ -50,15 +60,22 @@ def run(self, *args, **kwargs): # pylint: disable=unused-argument @APP.post("/translate") async def tiny_llama( - _nc: typing.Annotated[AsyncNextcloudApp, Depends(anc_app)], + name: typing.Annotated[str, Body()], from_language: typing.Annotated[str, Body()], to_language: typing.Annotated[str, Body()], text: typing.Annotated[str, Body()], task_id: typing.Annotated[int, Body()], ): try: - print({"text": text, "from_language": from_language, "to_language": to_language, "id": task_id}) - TASK_LIST.put({"text": text, "from_language": from_language, "to_language": to_language, "id": task_id}, block=False) + task = { + "model": name[11:], + "text": text, + "from_language": from_language, + "to_language": to_language, + "id": task_id, + } + print(task) + TASK_LIST.put(task) except queue.Full: return responses.JSONResponse(content={"error": "task queue is full"}, status_code=429) return responses.Response() @@ -67,16 +84,23 @@ async def tiny_llama( async def enabled_handler(enabled: bool, nc: AsyncNextcloudApp) -> str: print(f"enabled={enabled}") if enabled is True: - from_languages, to_languages = service.get_langs() - print(to_languages) - print(from_languages) - await nc.providers.translations.register('translate2', "Local Machine translation", '/translate', from_languages, to_languages) + models = service.get_models() + + for (model_name, languages) in models: + print( + f"Supported languages in model {model_name}: ({len(languages)}): {list(languages.values())[:10]}, ..." + ) + await nc.providers.translations.register( + f"translate2:{model_name}", + "Local Machine Translation", + "/translate", + languages, + languages, + ) else: - await nc.providers.speech_to_text.unregister('translate2') + await nc.providers.speech_to_text.unregister("translate2") return "" - - if __name__ == "__main__": run_app("main:APP", log_level="trace") diff --git a/requirements.in.txt b/requirements.in.txt new file mode 100644 index 0000000..02fb47d --- /dev/null +++ b/requirements.in.txt @@ -0,0 +1,3 @@ +fastapi +llama_cpp_python +nc_py_api[app]>=0.8.0 diff --git a/requirements.txt b/requirements.txt index 8135952..50ceaec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,40 @@ -nc_py_api[app]>=0.8.0 -torch -transformers -sentencepiece \ No newline at end of file +annotated-types==0.7.0 +anyio==4.4.0 +certifi==2024.7.4 +click==8.1.7 +diskcache==5.6.3 +dnspython==2.6.1 +email_validator==2.2.0 +fastapi==0.111.0 +fastapi-cli==0.0.4 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +idna==3.7 +Jinja2==3.1.4 +llama_cpp_python @ https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.82-cu121/llama_cpp_python-0.2.82-cp311-cp311-linux_x86_64.whl#sha256=8faeb9347543e9752c3dc117cd2f4d214a158444d648f6ddf0b9c360e96f2694 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +mdurl==0.1.2 +nc-py-api==0.13.0 +numpy==2.0.0 +orjson==3.10.6 +pydantic==2.8.2 +pydantic_core==2.20.1 +Pygments==2.18.0 +python-dotenv==1.0.1 +python-multipart==0.0.9 +PyYAML==6.0.1 +rich==13.7.1 +shellingham==1.5.4 +sniffio==1.3.1 +starlette==0.37.2 +typer==0.12.3 +typing_extensions==4.12.2 +ujson==5.10.0 +uvicorn==0.30.1 +uvloop==0.19.0 +watchfiles==0.22.0 +websockets==12.0 +xmltodict==0.13.0 From 2e5b9d56fd2bb9a148345d232daf479cc508405e Mon Sep 17 00:00:00 2001 From: Anupam Kumar Date: Fri, 12 Jul 2024 15:40:02 +0530 Subject: [PATCH 02/10] llama-cpp-python implementation Signed-off-by: Anupam Kumar --- config.json | 20 +++++---------- lib/Service.py | 70 ++++++++++++++++++++++++++------------------------ lib/main.py | 34 ++++++++++++------------ 3 files changed, 61 insertions(+), 63 deletions(-) diff --git a/config.json b/config.json index e3a0d07..9d18c25 100644 --- a/config.json +++ b/config.json @@ -1,16 +1,8 @@ { - "llama": [ - { - "model_file": "madlad400-3b-mt-q8_0.gguf", - "n_ctx": 8192, - "max_tokens": 8192, - "n_batch": 512 - }, - { - "model_file": "madlad400-7b-mt-bt-q8_0.gguf", - "n_ctx": 4096, - "max_tokens": 4096, - "n_batch": 256 - } - ] + "llama": { + "model_file": "madlad400-3b-mt-q8_0.gguf", + "n_ctx": 8192, + "max_tokens": 8192, + "n_batch": 512 + } } diff --git a/lib/Service.py b/lib/Service.py index 9d73a6d..9e09fd2 100644 --- a/lib/Service.py +++ b/lib/Service.py @@ -2,70 +2,74 @@ import json import os -from pathlib import Path +import re +from contextlib import contextmanager from time import perf_counter from llama_cpp.llama import Llama +GPU_ACCELERATED = os.getenv("COMPUTE_DEVICE", "cuda") != "cpu" +TEMPERATURE = 0.1 + class LoaderException(Exception): pass -class LlamaContext: - def __init__(self, model_name: str, gpu_accelerated: bool): - try: - with open("../config.json") as f: - config = json.loads(f.read())["llama"][model_name] - config["model_path"] = Path("../models/", config["model_name"]) - del config["model_name"] +def clean_text(text: str) -> str: + return re.sub(r"(\r?\n)+", " ", text).strip() - self.llama = Llama(n_gpu_layers=-1 if gpu_accelerated else 0, **config) - except Exception as e: - raise LoaderException( - f"Error reading config, ensure config.json is present at {Path('..', os.getcwd())}" - ) from e - def __enter__(self): - self.start = perf_counter() - return self.llama +@contextmanager +def llama_context(): + try: + with open(os.path.join(os.getcwd(), "../config.json")) as f: + # todo + config = json.loads(f.read())["llama"] + config["model_path"] = os.path.join(os.getcwd(), "../models/", config["model_file"]) + del config["model_file"] - def __exit__(self, exc_type, exc_value, exc_tb): - print(f"time taken {perf_counter() - self.start}") - del self.llama + llama = Llama(n_gpu_layers=-1 if GPU_ACCELERATED else 0, **config) + except Exception as e: + raise LoaderException( + "Error reading config, ensure config.json is present in the project root" + ) from e + start = perf_counter() + yield llama + elapsed = perf_counter() - start + print(f"time taken: {elapsed:.2f} s") + del llama -class Service: - gpu_accelerated = os.getenv("COMPUTE_DEVICE", "cuda") != "cpu" - temperature = 0.1 +class Service: def __init__(self): try: with open("../languages.json") as f: self.languages = json.loads(f.read()) except Exception as e: raise Exception( - f"Error reading languages list, ensure languages.json is present at {Path('..', os.getcwd())}" + "Error reading languages list, ensure languages.json is present in the project root" ) from e def get_lang_names(self): return self.languages - def get_models(self): - models = [] - languages = self.get_lang_names() + # def get_models(self): + # models = [] + # languages = self.get_lang_names() - for file in os.scandir("../models/"): - if os.path.isfile(file.path) and file.name.endswith(".gguf"): - models.append((file.name, languages)) + # for file in os.scandir("../models/"): + # if os.path.isfile(file.path) and file.name.endswith(".gguf"): + # models.append((file.name, languages)) - return models + # return models - def translate(self, model_name: str, to_language: str, text: str): + def translate(self, to_language: str, text: str): print("translating text to", to_language) - with LlamaContext(model_name, self.gpu_accelerated) as llama: - translation = llama(f"<2{to_language}> {text}", temperature=self.temperature) + with llama_context() as llama: + translation = llama(f"<2{to_language}> {clean_text(text)}", temperature=TEMPERATURE) print(translation) return translation.choices[0].text diff --git a/lib/main.py b/lib/main.py index 561d98c..f40781f 100644 --- a/lib/main.py +++ b/lib/main.py @@ -45,7 +45,7 @@ def run(self, *args, **kwargs): # pylint: disable=unused-argument while True: task = TASK_LIST.get(block=True) try: - translation = service.translate(task["model"], task["to_language"], task["text"]) + translation = service.translate(task["to_language"], task["text"]) NextcloudApp().providers.translations.report_result( task_id=task["id"], result=str(translation).strip(), @@ -60,15 +60,16 @@ def run(self, *args, **kwargs): # pylint: disable=unused-argument @APP.post("/translate") async def tiny_llama( - name: typing.Annotated[str, Body()], + # name: typing.Annotated[str, Body()], from_language: typing.Annotated[str, Body()], to_language: typing.Annotated[str, Body()], text: typing.Annotated[str, Body()], task_id: typing.Annotated[int, Body()], ): try: + # todo task = { - "model": name[11:], + # "model": name[11:], "text": text, "from_language": from_language, "to_language": to_language, @@ -84,19 +85,20 @@ async def tiny_llama( async def enabled_handler(enabled: bool, nc: AsyncNextcloudApp) -> str: print(f"enabled={enabled}") if enabled is True: - models = service.get_models() - - for (model_name, languages) in models: - print( - f"Supported languages in model {model_name}: ({len(languages)}): {list(languages.values())[:10]}, ..." - ) - await nc.providers.translations.register( - f"translate2:{model_name}", - "Local Machine Translation", - "/translate", - languages, - languages, - ) + # models = service.get_models() + + # for (model_name, languages) in models: + languages = service.get_lang_names() + print( + f"Supported languages: ({len(languages)}): {list(languages.values())[:10]}, ..." + ) + await nc.providers.translations.register( + "translate2", + "Local Machine Translation", + "/translate", + languages, + languages, + ) else: await nc.providers.speech_to_text.unregister("translate2") return "" From 8b27a37ea61cefd6e4d3aaaff245885f808901d2 Mon Sep 17 00:00:00 2001 From: Anupam Kumar Date: Tue, 16 Jul 2024 14:17:54 +0530 Subject: [PATCH 03/10] switch to ctranslate2 - add logger config - dedicated config file for model tweaking Signed-off-by: Anupam Kumar --- config.json | 23 ++++++++++--- lib/Service.py | 80 +++++++++++++++++++++++++-------------------- lib/main.py | 60 ++++++++++++++++++++-------------- pyproject.toml | 2 +- requirements.in.txt | 3 +- requirements.txt | 6 ++-- 6 files changed, 105 insertions(+), 69 deletions(-) diff --git a/config.json b/config.json index 9d18c25..3ba65ea 100644 --- a/config.json +++ b/config.json @@ -1,8 +1,21 @@ { - "llama": { - "model_file": "madlad400-3b-mt-q8_0.gguf", - "n_ctx": 8192, - "max_tokens": 8192, - "n_batch": 512 + "__comment::log_level": "Log level for the app, see https://docs.python.org/3/library/logging.html#logging-levels", + "__comment::tokenizer_file": "The tokenizer file name inside the model directory (loader.model_path)", + "__comment::loader": "CTranslate2 loader options, see https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html#ctranslate2.Translator.__init__", + "__comment::inference": "CTranslate2 inference options, see the kwargs in https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html#ctranslate2.Translator.translate_batch", + "__comment::changes_to_the_config": "the program needs to be restarted if you change this file since it is stored in memory on startup", + + "log_level": 10, + "tokenizer_file": "spiece.model", + "loader": { + "model_path": "models/madlad400-3b-mt-ct2-int8_float32", + "inter_threads": 4, + "intra_threads": 0 + }, + "inference": { + "max_batch_size": 8192, + "sampling_temperature": 0.0001, + "no_repeat_ngram_size": 8, + "disable_unk": true } } diff --git a/lib/Service.py b/lib/Service.py index 9e09fd2..ed57d91 100644 --- a/lib/Service.py +++ b/lib/Service.py @@ -1,19 +1,18 @@ """Translation service""" import json +import logging import os import re from contextlib import contextmanager from time import perf_counter -from llama_cpp.llama import Llama +import ctranslate2 +from sentencepiece import SentencePieceProcessor GPU_ACCELERATED = os.getenv("COMPUTE_DEVICE", "cuda") != "cpu" -TEMPERATURE = 0.1 - -class LoaderException(Exception): - pass +logger = logging.getLogger(__name__) def clean_text(text: str) -> str: @@ -21,31 +20,41 @@ def clean_text(text: str) -> str: @contextmanager -def llama_context(): +def translate_context(config: dict): try: - with open(os.path.join(os.getcwd(), "../config.json")) as f: - # todo - config = json.loads(f.read())["llama"] - config["model_path"] = os.path.join(os.getcwd(), "../models/", config["model_file"]) - del config["model_file"] - - llama = Llama(n_gpu_layers=-1 if GPU_ACCELERATED else 0, **config) + tokenizer = SentencePieceProcessor() + tokenizer.Load(os.path.join(config["loader"]["model_path"], config["tokenizer_file"])) + + translator = ctranslate2.Translator( + **{ + "device": "cuda" if GPU_ACCELERATED else "cpu", + **config["loader"], + } + ) + except KeyError as e: + raise Exception("Incorrect config file") from e except Exception as e: - raise LoaderException( - "Error reading config, ensure config.json is present in the project root" - ) from e + raise Exception("Error loading the translation model") from e start = perf_counter() - yield llama + yield (tokenizer, translator) elapsed = perf_counter() - start - print(f"time taken: {elapsed:.2f} s") - del llama + + logger.info(f"time taken: {elapsed:.2f}s") + del tokenizer + # todo: offload to cpu? + del translator class Service: - def __init__(self): + def __init__(self, config: dict): + global logger try: - with open("../languages.json") as f: + self.config = config + ctranslate2.set_log_level(config["log_level"]) + logger.setLevel(config["log_level"]) + + with open("languages.json") as f: self.languages = json.loads(f.read()) except Exception as e: raise Exception( @@ -55,21 +64,22 @@ def __init__(self): def get_lang_names(self): return self.languages - # def get_models(self): - # models = [] - # languages = self.get_lang_names() - - # for file in os.scandir("../models/"): - # if os.path.isfile(file.path) and file.name.endswith(".gguf"): - # models.append((file.name, languages)) + def translate(self, to_language: str, text: str) -> str: + logger.debug(f"translating text to: {to_language}") - # return models + with translate_context(self.config) as (tokenizer, translator): + input_tokens = tokenizer.Encode(f"<2{to_language}> {clean_text(text)}", out_type=str) + results = translator.translate_batch( + [input_tokens], + batch_type="tokens", + **self.config["inference"], + ) - def translate(self, to_language: str, text: str): - print("translating text to", to_language) + if len(results) == 0 or len(results[0].hypotheses) == 0: + raise Exception("Empty result returned from translator") - with llama_context() as llama: - translation = llama(f"<2{to_language}> {clean_text(text)}", temperature=TEMPERATURE) + # todo: handle multiple hypotheses + translation = tokenizer.Decode(results[0].hypotheses[0]) - print(translation) - return translation.choices[0].text + logger.info(f"Translated string: {translation}") + return translation diff --git a/lib/main.py b/lib/main.py index f40781f..b3aac7e 100644 --- a/lib/main.py +++ b/lib/main.py @@ -1,24 +1,33 @@ """The main module of the translate2 app""" +import json +import logging import queue import threading import typing from contextlib import asynccontextmanager -# todo +import uvicorn.logging from dotenv import load_dotenv from fastapi import Body, FastAPI, Request, responses from nc_py_api import AsyncNextcloudApp, NextcloudApp from nc_py_api.ex_app import LogLvl, run_app, set_handlers -from Service import LoaderException, Service +from Service import Service -# todo load_dotenv() -service = Service() +with open("config.json") as f: + config = json.loads(f.read()) + +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(config["log_level"]) + +service = Service(config) + @asynccontextmanager -async def lifespan(_app: FastAPI): +async def lifespan(_: FastAPI): set_handlers( APP, enabled_handler, @@ -32,11 +41,19 @@ async def lifespan(_app: FastAPI): TASK_LIST: queue.Queue = queue.Queue(maxsize=100) -@APP.exception_handler(LoaderException) -async def _(request: Request, exc: LoaderException): - print(f"Loader Error: {request.url.path}:", exc) +@APP.exception_handler(Exception) +async def _(request: Request, exc: Exception): + logger.error("Error processing request", request.url.path, exc) + + task: dict | None = getattr(exc, "args", None) + + nc = NextcloudApp() + nc.log(LogLvl.ERROR, str(exc)) + if task: + nc.providers.translations.report_result(task["id"], error=str(exc)) + return responses.JSONResponse({ - "error": "The resource loader is facing some issues, please check the logs for more info" + "error": "An error occurred while processing the request, please check the logs for more info" }, 500) @@ -51,31 +68,25 @@ def run(self, *args, **kwargs): # pylint: disable=unused-argument result=str(translation).strip(), ) except Exception as e: # noqa - print(str(e)) - nc = NextcloudApp() - nc.log(LogLvl.ERROR, str(e)) - nc.providers.translations.report_result(task["id"], error=str(e)) - + e.args = task + raise e @APP.post("/translate") async def tiny_llama( - # name: typing.Annotated[str, Body()], from_language: typing.Annotated[str, Body()], to_language: typing.Annotated[str, Body()], text: typing.Annotated[str, Body()], task_id: typing.Annotated[int, Body()], ): try: - # todo task = { - # "model": name[11:], "text": text, "from_language": from_language, "to_language": to_language, "id": task_id, } - print(task) + logger.debug(task) TASK_LIST.put(task) except queue.Full: return responses.JSONResponse(content={"error": "task queue is full"}, status_code=429) @@ -85,12 +96,12 @@ async def tiny_llama( async def enabled_handler(enabled: bool, nc: AsyncNextcloudApp) -> str: print(f"enabled={enabled}") if enabled is True: - # models = service.get_models() - - # for (model_name, languages) in models: languages = service.get_lang_names() - print( - f"Supported languages: ({len(languages)}): {list(languages.values())[:10]}, ..." + logger.info( + "Supported languages short list", { + "count": len(languages), + "languages": list(languages.keys())[:10], + } ) await nc.providers.translations.register( "translate2", @@ -105,4 +116,5 @@ async def enabled_handler(enabled: bool, nc: AsyncNextcloudApp) -> str: if __name__ == "__main__": - run_app("main:APP", log_level="trace") + uvicorn_log_level = uvicorn.logging.TRACE_LOG_LEVEL if config["log_level"] == logging.DEBUG else config["log_level"] + run_app("main:APP", log_level=uvicorn_log_level) diff --git a/pyproject.toml b/pyproject.toml index 7e1f410..fe7f01f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ preview = true line-length = 120 target-version = "py310" select = ["A", "B", "C", "D", "E", "F", "G", "I", "S", "SIM", "PIE", "Q", "RET", "RUF", "UP" , "W"] -extend-ignore = ["D101", "D102", "D103", "D105", "D107", "D203", "D213", "D401", "I001", "RUF100", "D400", "D415"] +extend-ignore = ["D101", "D102", "D103", "D105", "D107", "D203", "D213", "D401", "I001", "RUF100", "D400", "D415", "G004"] [tool.isort] profile = "black" diff --git a/requirements.in.txt b/requirements.in.txt index 02fb47d..c19c7c0 100644 --- a/requirements.in.txt +++ b/requirements.in.txt @@ -1,3 +1,4 @@ fastapi -llama_cpp_python +ctranslate2 nc_py_api[app]>=0.8.0 +sentencepiece diff --git a/requirements.txt b/requirements.txt index 50ceaec..6861b22 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ annotated-types==0.7.0 anyio==4.4.0 certifi==2024.7.4 click==8.1.7 -diskcache==5.6.3 +ctranslate2==4.3.1 dnspython==2.6.1 email_validator==2.2.0 fastapi==0.111.0 @@ -13,11 +13,10 @@ httptools==0.6.1 httpx==0.27.0 idna==3.7 Jinja2==3.1.4 -llama_cpp_python @ https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.82-cu121/llama_cpp_python-0.2.82-cp311-cp311-linux_x86_64.whl#sha256=8faeb9347543e9752c3dc117cd2f4d214a158444d648f6ddf0b9c360e96f2694 markdown-it-py==3.0.0 MarkupSafe==2.1.5 mdurl==0.1.2 -nc-py-api==0.13.0 +nc-py-api==0.14.0 numpy==2.0.0 orjson==3.10.6 pydantic==2.8.2 @@ -27,6 +26,7 @@ python-dotenv==1.0.1 python-multipart==0.0.9 PyYAML==6.0.1 rich==13.7.1 +sentencepiece==0.2.0 shellingham==1.5.4 sniffio==1.3.1 starlette==0.37.2 From 7a075658b27aeb8ca70bf31dcbebf7b7d7eb5014 Mon Sep 17 00:00:00 2001 From: Anupam Kumar Date: Wed, 31 Jul 2024 13:41:44 +0530 Subject: [PATCH 04/10] use models_to_fetch for model download Signed-off-by: Anupam Kumar --- config.json | 39 +++++++++++++++++++-------------------- lib/Service.py | 18 ++++++++++++------ lib/main.py | 33 +++++++++++++++++++++++++++------ lib/util.py | 24 ++++++++++++++++++++++++ requirements.in.txt | 3 ++- requirements.txt | 10 +++++++++- 6 files changed, 93 insertions(+), 34 deletions(-) create mode 100644 lib/util.py diff --git a/config.json b/config.json index 3ba65ea..4096246 100644 --- a/config.json +++ b/config.json @@ -1,21 +1,20 @@ { - "__comment::log_level": "Log level for the app, see https://docs.python.org/3/library/logging.html#logging-levels", - "__comment::tokenizer_file": "The tokenizer file name inside the model directory (loader.model_path)", - "__comment::loader": "CTranslate2 loader options, see https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html#ctranslate2.Translator.__init__", - "__comment::inference": "CTranslate2 inference options, see the kwargs in https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html#ctranslate2.Translator.translate_batch", - "__comment::changes_to_the_config": "the program needs to be restarted if you change this file since it is stored in memory on startup", - - "log_level": 10, - "tokenizer_file": "spiece.model", - "loader": { - "model_path": "models/madlad400-3b-mt-ct2-int8_float32", - "inter_threads": 4, - "intra_threads": 0 - }, - "inference": { - "max_batch_size": 8192, - "sampling_temperature": 0.0001, - "no_repeat_ngram_size": 8, - "disable_unk": true - } -} + "__comment::log_level": "Log level for the app, see https://docs.python.org/3/library/logging.html#logging-levels", + "__comment::tokenizer_file": "The tokenizer file name inside the model directory (loader.model_path)", + "__comment::loader": "CTranslate2 loader options, see https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html#ctranslate2.Translator.__init__. Use 'model_path' key for local paths or 'model_name' key for models hosted on Hugging Face. Both can't be used at the same time.", + "__comment::inference": "CTranslate2 inference options, see the kwargs in https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html#ctranslate2.Translator.translate_batch", + "__comment::changes_to_the_config": "the program needs to be restarted if you change this file since it is stored in memory on startup", + "log_level": 20, + "tokenizer_file": "spiece.model", + "loader": { + "model_name": "Nextcloud-AI/madlad400-3b-mt-ct2-int8_float32", + "inter_threads": 4, + "intra_threads": 0 + }, + "inference": { + "max_batch_size": 8192, + "sampling_temperature": 0.0001, + "no_repeat_ngram_size": 8, + "disable_unk": true + } +} \ No newline at end of file diff --git a/lib/Service.py b/lib/Service.py index ed57d91..82128d0 100644 --- a/lib/Service.py +++ b/lib/Service.py @@ -3,22 +3,19 @@ import json import logging import os -import re from contextlib import contextmanager +from copy import deepcopy from time import perf_counter import ctranslate2 from sentencepiece import SentencePieceProcessor +from util import clean_text GPU_ACCELERATED = os.getenv("COMPUTE_DEVICE", "cuda") != "cpu" logger = logging.getLogger(__name__) -def clean_text(text: str) -> str: - return re.sub(r"(\r?\n)+", " ", text).strip() - - @contextmanager def translate_context(config: dict): try: @@ -50,7 +47,7 @@ class Service: def __init__(self, config: dict): global logger try: - self.config = config + self.load_config(config) ctranslate2.set_log_level(config["log_level"]) logger.setLevel(config["log_level"]) @@ -64,6 +61,15 @@ def __init__(self, config: dict): def get_lang_names(self): return self.languages + def load_config(self, config: dict): + config_copy = deepcopy(config) + config_copy["loader"].pop("model_name", None) + + if "hf_model_path" in config_copy["loader"]: + config_copy["loader"]["model_path"] = config_copy["loader"].pop("hf_model_path") + + self.config = config_copy + def translate(self, to_language: str, text: str) -> str: logger.debug(f"translating text to: {to_language}") diff --git a/lib/main.py b/lib/main.py index b3aac7e..a84197f 100644 --- a/lib/main.py +++ b/lib/main.py @@ -1,7 +1,7 @@ """The main module of the translate2 app""" -import json import logging +import os import queue import threading import typing @@ -13,24 +13,44 @@ from nc_py_api import AsyncNextcloudApp, NextcloudApp from nc_py_api.ex_app import LogLvl, run_app, set_handlers from Service import Service +from util import load_config_file, save_config_file load_dotenv() -with open("config.json") as f: - config = json.loads(f.read()) +config = load_config_file() +# logging config logging.basicConfig() logger = logging.getLogger(__name__) logger.setLevel(config["log_level"]) -service = Service(config) + +class ModelConfig(dict): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __setitem__(self, key, value): + if key == "path": + config["loader"]["hf_model_path"] = value + service.load_config(config) + save_config_file(config) + + super().__setitem__(key, value) + + +# download models if "model_name" key is present in the config +models_to_fetch = None +cache_dir = os.getenv("APP_PERSISTENT_STORAGE", "models/") +if "model_name" in config["loader"]: + models_to_fetch = { config["loader"]["model_name"]: ModelConfig({ "cache_dir": cache_dir }) } @asynccontextmanager async def lifespan(_: FastAPI): set_handlers( - APP, - enabled_handler, + fast_api_app=APP, + enabled_handler=enabled_handler, + models_to_fetch=models_to_fetch, ) t = BackgroundProcessTask() t.start() @@ -39,6 +59,7 @@ async def lifespan(_: FastAPI): APP = FastAPI(lifespan=lifespan) TASK_LIST: queue.Queue = queue.Queue(maxsize=100) +service = Service(config) @APP.exception_handler(Exception) diff --git a/lib/util.py b/lib/util.py new file mode 100644 index 0000000..fd9f9bf --- /dev/null +++ b/lib/util.py @@ -0,0 +1,24 @@ +"""Utility functions""" + +import json +import re + + +def clean_text(text: str) -> str: + return re.sub(r"(\r?\n)+", " ", text).strip() + + +def load_config_file(path: str = "config.json") -> dict: + with open(path) as f: + config = json.loads(f.read()) + if "model_name" in config["loader"] and "model_path" in config["loader"]: + raise Exception("Both 'model_name' and 'model_path' keys are present in the config. Please remove one of them.") # noqa: E501 + if "model_name" not in config["loader"] and "model_path" not in config["loader"]: + raise Exception("Neither 'model_name' nor 'model_path' keys are present in the config. Please add one of them.") # noqa: E501 + return config + + +def save_config_file(config: dict, path: str = "config.json") -> None: + with open(path, "w") as f: + f.write(json.dumps(config, indent=4)) + diff --git a/requirements.in.txt b/requirements.in.txt index c19c7c0..a99069d 100644 --- a/requirements.in.txt +++ b/requirements.in.txt @@ -1,4 +1,5 @@ fastapi ctranslate2 -nc_py_api[app]>=0.8.0 +huggingface_hub +nc_py_api[app]>=0.15.0 sentencepiece diff --git a/requirements.txt b/requirements.txt index 6861b22..a7b8d6a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,38 +1,46 @@ annotated-types==0.7.0 anyio==4.4.0 certifi==2024.7.4 +charset-normalizer==3.3.2 click==8.1.7 ctranslate2==4.3.1 dnspython==2.6.1 email_validator==2.2.0 fastapi==0.111.0 fastapi-cli==0.0.4 +filelock==3.15.4 +fsspec==2024.6.1 h11==0.14.0 httpcore==1.0.5 httptools==0.6.1 httpx==0.27.0 +huggingface-hub==0.23.4 idna==3.7 Jinja2==3.1.4 markdown-it-py==3.0.0 MarkupSafe==2.1.5 mdurl==0.1.2 -nc-py-api==0.14.0 +nc-py-api==0.15.1 numpy==2.0.0 orjson==3.10.6 +packaging==24.1 pydantic==2.8.2 pydantic_core==2.20.1 Pygments==2.18.0 python-dotenv==1.0.1 python-multipart==0.0.9 PyYAML==6.0.1 +requests==2.32.3 rich==13.7.1 sentencepiece==0.2.0 shellingham==1.5.4 sniffio==1.3.1 starlette==0.37.2 +tqdm==4.66.4 typer==0.12.3 typing_extensions==4.12.2 ujson==5.10.0 +urllib3==2.2.2 uvicorn==0.30.1 uvloop==0.19.0 watchfiles==0.22.0 From 9c3d1f275dc815e0105f8fb8e9690a2693175771 Mon Sep 17 00:00:00 2001 From: Anupam Kumar Date: Wed, 31 Jul 2024 13:43:52 +0530 Subject: [PATCH 05/10] add models dir to .gitignore Signed-off-by: Anupam Kumar --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ce93ed1..5f54590 100644 --- a/.gitignore +++ b/.gitignore @@ -93,3 +93,4 @@ MANIFEST converted/ geckodriver.log +models/ From 558225bcde2ee57722eeae368cf354de4ad09e42 Mon Sep 17 00:00:00 2001 From: Anupam Kumar Date: Wed, 31 Jul 2024 13:52:01 +0530 Subject: [PATCH 06/10] update dockerfile Signed-off-by: Anupam Kumar --- Dockerfile | 49 ++++++++++++++++++++----------------------------- 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/Dockerfile b/Dockerfile index a7d96a9..5338591 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,19 +1,15 @@ -FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04 +FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04 -ENV DEBIAN_FRONTEND=noninteractive +ENV DEBIAN_FRONTEND noninteractive -RUN apt-get update -RUN apt-get install -y software-properties-common -RUN add-apt-repository -y ppa:deadsnakes/ppa -RUN apt-get update -RUN apt-get install -y --no-install-recommends python3.11 python3.11-venv python3-pip vim git pciutils -RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 -RUN apt-get -y clean -RUN rm -rf /var/lib/apt/lists/* - -ENV NVIDIA_VISIBLE_DEVICES all -ENV NVIDIA_DRIVER_CAPABILITIES compute -ENV DEBIAN_FRONTEND=dialog +RUN apt-get update && \ + apt-get install -y software-properties-common && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y --no-install-recommends python3.11 python3.11-venv python3-pip vim git && \ + update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \ + apt-get -y clean && \ + rm -rf /var/lib/apt/lists/* # Set working directory WORKDIR /app @@ -22,26 +18,21 @@ WORKDIR /app COPY requirements.txt . # Install requirements -RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel -RUN python3 -m pip install --no-cache-dir https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.82-cu121/llama_cpp_python-0.2.82-cp311-cp311-linux_x86_64.whl -RUN sed -i '/llama_cpp_python/d' requirements.txt RUN python3 -m pip install --no-cache-dir --no-deps -r requirements.txt - -# CUDA 12.1 compat lib -ENV LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH -ENV LIBRARY_PATH=/usr/local/cuda/compat:$LIBRARY_PATH +ENV NVIDIA_VISIBLE_DEVICES all +ENV NVIDIA_DRIVER_CAPABILITIES compute +ENV DEBIAN_FRONTEND dialog # Copy application files -ADD cs[s] /app/css -ADD im[g] /app/img -ADD j[s] /app/js +ADD cs[s] /app/css +ADD im[g] /app/img +ADD j[s] /app/js ADD l10[n] /app/l10n -ADD li[b] /app/lib -ADD model[s] /app/models -ADD config.json /app/config.json +ADD li[b] /app/lib +ADD config.json /app/config.json +ADD languages.json /app/languages.json -WORKDIR /app/lib -ENTRYPOINT ["python3", "-u", "main.py"] +ENTRYPOINT ["python3", "lib/main.py"] LABEL org.opencontainers.image.source="https://github.com/nextcloud/translate2" From cb327f341df1af93ef56643af26f855fd3e48175 Mon Sep 17 00:00:00 2001 From: Anupam Kumar Date: Fri, 2 Aug 2024 14:12:28 +0530 Subject: [PATCH 07/10] update integration test workflow Signed-off-by: Anupam Kumar --- .github/workflows/integration-test.yml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index a193b19..4c7fc2d 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -102,15 +102,18 @@ jobs: ./occ maintenance:install --verbose --database=${{ matrix.databases }} --database-name=nextcloud --database-host=127.0.0.1 --database-port=$PGSQL_PORT --database-user=root --database-pass=rootpassword --admin-user admin --admin-pass password php -S localhost:8080 & - - name: Enable app and app_api + - name: Enable app_api run: ./occ app:enable -vvv -f app_api - name: Setup python 3.11 uses: actions/setup-python@v5 with: python-version: '3.11' + cache: 'pip' + cache-dependency-path: | + requirements.txt - - name: Install and init backend + - name: Install and start ex-app's server env: PYTHONUNBUFFERED: 1 APP_HOST: 0.0.0.0 @@ -121,8 +124,7 @@ jobs: NEXTCLOUD_URL: http://localhost:8080 working-directory: ${{ env.APP_NAME }} run: | - pip install -r requirements.txt - make download-a-few-models + pip install --no-deps -r requirements.txt python3 lib/main.py & - name: Register backend @@ -130,11 +132,11 @@ jobs: ./occ app_api:daemon:register --net host manual_install "Manual Install" manual-install http localhost http://localhost:8080 ./occ app_api:app:register translate2 manual_install --json-info "{\"appid\":\"translate2\",\"name\":\"Local Machine Translation\",\"daemon_config_name\":\"manual_install\",\"version\":\"1.0.0\",\"secret\":\"12345\",\"port\":9081,\"scopes\":[\"AI_PROVIDERS\"],\"system_app\":0}" --force-scopes --wait-finish - - name: Scan files + - name: Test translation run: | - curl --header "Content-Type: application/json" -X POST http://localhost:8080/ocs/v2.php/translation/translate --data '{"text":"Hallo Welt","fromLanguage":"de","toLanguage":"en"}' + curl --header "OCS-APIRequest: true" --header "Content-Type: application/json" -X POST http://localhost:8080/ocs/v2.php/translation/translate --data '{"text":"Hallo Welt","fromLanguage":"de","toLanguage":"en"}' - name: Show log on failure if: always() run: | - tail data/nextcloud.log \ No newline at end of file + tail data/nextcloud.log From bc1aaba519b89cf2e72c5bcf22d219ab4294b3d1 Mon Sep 17 00:00:00 2001 From: Anupam Kumar Date: Fri, 2 Aug 2024 15:31:13 +0530 Subject: [PATCH 08/10] minor fixes and papercuts Signed-off-by: Anupam Kumar --- config.json | 1 - lib/Service.py | 22 ++++++++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/config.json b/config.json index 4096246..3f59b3e 100644 --- a/config.json +++ b/config.json @@ -14,7 +14,6 @@ "inference": { "max_batch_size": 8192, "sampling_temperature": 0.0001, - "no_repeat_ngram_size": 8, "disable_unk": true } } \ No newline at end of file diff --git a/lib/Service.py b/lib/Service.py index 82128d0..67ddcfe 100644 --- a/lib/Service.py +++ b/lib/Service.py @@ -15,6 +15,9 @@ logger = logging.getLogger(__name__) +if os.getenv("CI") is not None: + ctranslate2.set_random_seed(420) + @contextmanager def translate_context(config: dict): @@ -33,14 +36,17 @@ def translate_context(config: dict): except Exception as e: raise Exception("Error loading the translation model") from e - start = perf_counter() - yield (tokenizer, translator) - elapsed = perf_counter() - start - - logger.info(f"time taken: {elapsed:.2f}s") - del tokenizer - # todo: offload to cpu? - del translator + try: + start = perf_counter() + yield (tokenizer, translator) + elapsed = perf_counter() - start + logger.info(f"time taken: {elapsed:.2f}s") + except Exception as e: + raise Exception("Error translating the input text") from e + finally: + del tokenizer + # todo: offload to cpu? + del translator class Service: From a825554a0570573d6de5ea1ac8186ed2567bcdc6 Mon Sep 17 00:00:00 2001 From: Anupam Kumar Date: Fri, 2 Aug 2024 15:47:44 +0530 Subject: [PATCH 09/10] remove integration test temporarily Signed-off-by: Anupam Kumar --- .github/workflows/integration-test.yml | 142 ------------------------- 1 file changed, 142 deletions(-) delete mode 100644 .github/workflows/integration-test.yml diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml deleted file mode 100644 index 4c7fc2d..0000000 --- a/.github/workflows/integration-test.yml +++ /dev/null @@ -1,142 +0,0 @@ -# SPDX-FileCopyrightText: Nextcloud contributors -# SPDX-License-Identifier: AGPL-3.0-or-later - -name: Integration test - -on: - pull_request: - push: - branches: - - main - - stable* - -env: - APP_NAME: translate2 - -concurrency: - group: integration-test-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - - -jobs: - transcription: - runs-on: ubuntu-latest - - strategy: - # do not stop on another job's failure - fail-fast: false - matrix: - php-versions: [ '8.1' ] - databases: [ 'sqlite' ] - server-versions: [ 'master' ] - - name: Integration test on ${{ matrix.server-versions }} php@${{ matrix.php-versions }} - - env: - MYSQL_PORT: 4444 - PGSQL_PORT: 4445 - - services: - mysql: - image: mariadb:10.5 - ports: - - 4444:3306/tcp - env: - MYSQL_ROOT_PASSWORD: rootpassword - options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 5 - postgres: - image: postgres - ports: - - 4445:5432/tcp - env: - POSTGRES_USER: root - POSTGRES_PASSWORD: rootpassword - POSTGRES_DB: nextcloud - options: --health-cmd pg_isready --health-interval 5s --health-timeout 2s --health-retries 5 - - steps: - - name: Checkout server - uses: actions/checkout@v4 - with: - repository: nextcloud/server - ref: ${{ matrix.server-versions }} - - - name: Checkout submodules - shell: bash - run: | - auth_header="$(git config --local --get http.https://github.com/.extraheader)" - git submodule sync --recursive - git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1 - - - name: Set up php ${{ matrix.php-versions }} - uses: shivammathur/setup-php@v2 - with: - php-version: ${{ matrix.php-versions }} - tools: phpunit - extensions: mbstring, iconv, fileinfo, intl, sqlite, pdo_mysql, pdo_sqlite, pgsql, pdo_pgsql, gd, zip - - - name: Checkout app - uses: actions/checkout@v4 - with: - path: ${{ env.APP_NAME }} - - - name: Checkout AppAPI - uses: actions/checkout@v4 - with: - repository: cloud-py-api/app_api - path: apps/app_api - - - name: Set up Nextcloud - if: ${{ matrix.databases != 'pgsql'}} - run: | - sleep 25 - mkdir data - ./occ maintenance:install --verbose --database=${{ matrix.databases }} --database-name=nextcloud --database-host=127.0.0.1 --database-port=$MYSQL_PORT --database-user=root --database-pass=rootpassword --admin-user admin --admin-pass password - php -S localhost:8080 & - - - name: Set up Nextcloud - if: ${{ matrix.databases == 'pgsql'}} - run: | - sleep 25 - mkdir data - ./occ maintenance:install --verbose --database=${{ matrix.databases }} --database-name=nextcloud --database-host=127.0.0.1 --database-port=$PGSQL_PORT --database-user=root --database-pass=rootpassword --admin-user admin --admin-pass password - php -S localhost:8080 & - - - name: Enable app_api - run: ./occ app:enable -vvv -f app_api - - - name: Setup python 3.11 - uses: actions/setup-python@v5 - with: - python-version: '3.11' - cache: 'pip' - cache-dependency-path: | - requirements.txt - - - name: Install and start ex-app's server - env: - PYTHONUNBUFFERED: 1 - APP_HOST: 0.0.0.0 - APP_ID: translate2 - APP_PORT: 9081 - APP_SECRET: 12345 - APP_VERSION: 1.0.0 - NEXTCLOUD_URL: http://localhost:8080 - working-directory: ${{ env.APP_NAME }} - run: | - pip install --no-deps -r requirements.txt - python3 lib/main.py & - - - name: Register backend - run: | - ./occ app_api:daemon:register --net host manual_install "Manual Install" manual-install http localhost http://localhost:8080 - ./occ app_api:app:register translate2 manual_install --json-info "{\"appid\":\"translate2\",\"name\":\"Local Machine Translation\",\"daemon_config_name\":\"manual_install\",\"version\":\"1.0.0\",\"secret\":\"12345\",\"port\":9081,\"scopes\":[\"AI_PROVIDERS\"],\"system_app\":0}" --force-scopes --wait-finish - - - name: Test translation - run: | - curl --header "OCS-APIRequest: true" --header "Content-Type: application/json" -X POST http://localhost:8080/ocs/v2.php/translation/translate --data '{"text":"Hallo Welt","fromLanguage":"de","toLanguage":"en"}' - - - name: Show log on failure - if: always() - run: | - tail data/nextcloud.log From a42d03f057a5bee1dce562fc40e091061d84b466 Mon Sep 17 00:00:00 2001 From: Anupam Kumar Date: Mon, 5 Aug 2024 12:46:44 +0530 Subject: [PATCH 10/10] lint fix Signed-off-by: Anupam Kumar --- README.md | 2 +- config.json | 2 +- lib/main.py | 4 ++-- lib/util.py | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ce9fc2d..a8b6c47 100644 --- a/README.md +++ b/README.md @@ -1 +1 @@ -# Nextcloud Local Machine Translation \ No newline at end of file +# Nextcloud Local Machine Translation diff --git a/config.json b/config.json index 3f59b3e..899f9c1 100644 --- a/config.json +++ b/config.json @@ -16,4 +16,4 @@ "sampling_temperature": 0.0001, "disable_unk": true } -} \ No newline at end of file +} diff --git a/lib/main.py b/lib/main.py index a84197f..3f92512 100644 --- a/lib/main.py +++ b/lib/main.py @@ -49,8 +49,8 @@ def __setitem__(self, key, value): async def lifespan(_: FastAPI): set_handlers( fast_api_app=APP, - enabled_handler=enabled_handler, - models_to_fetch=models_to_fetch, + enabled_handler=enabled_handler, # type: ignore + models_to_fetch=models_to_fetch, # type: ignore ) t = BackgroundProcessTask() t.start() diff --git a/lib/util.py b/lib/util.py index fd9f9bf..02be859 100644 --- a/lib/util.py +++ b/lib/util.py @@ -21,4 +21,3 @@ def load_config_file(path: str = "config.json") -> dict: def save_config_file(config: dict, path: str = "config.json") -> None: with open(path, "w") as f: f.write(json.dumps(config, indent=4)) -