-
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add unigram truecaser * Add CPU only docker image * Add Latin default tokenizer * Add vim to docker image for rebasing * Add SMT integration test * Update CI packages
- Loading branch information
1 parent
f8f3fc5
commit 9d7c432
Showing
64 changed files
with
11,047 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,12 +50,12 @@ jobs: | |
- name: Lint with isort | ||
run: poetry run isort . --check-only | ||
- name: Setup Node for pyright | ||
uses: actions/setup-node@v3 | ||
uses: actions/setup-node@v4 | ||
with: | ||
node-version: "12" | ||
node-version: "14" | ||
- name: Lint with pyright | ||
run: | | ||
npm install -g [email protected].313 | ||
npm install -g [email protected].362 | ||
poetry run pyright | ||
- name: Test with pytest | ||
run: poetry run pytest --cov --cov-report=xml | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#compatability with Tensorflow 2.6.0 as per https://www.tensorflow.org/install/source#gpu | ||
ARG PYTHON_VERSION=3.11 | ||
ARG UBUNTU_VERSION=focal | ||
ARG POETRY_VERSION=1.6.1 | ||
|
||
FROM python:$PYTHON_VERSION-slim as builder | ||
ARG POETRY_VERSION | ||
|
||
ENV POETRY_HOME=/opt/poetry | ||
ENV POETRY_VENV=/opt/poetry-venv | ||
ENV POETRY_CACHE_DIR=/opt/.cache | ||
|
||
# Install poetry separated from system interpreter | ||
RUN python3 -m venv $POETRY_VENV \ | ||
&& $POETRY_VENV/bin/pip install -U pip setuptools \ | ||
&& $POETRY_VENV/bin/pip install poetry==${POETRY_VERSION} | ||
|
||
# Add `poetry` to PATH | ||
ENV PATH="${PATH}:${POETRY_VENV}/bin" | ||
|
||
WORKDIR /src | ||
COPY poetry.lock pyproject.toml /src | ||
RUN poetry export --with=gpu --without-hashes -f requirements.txt > requirements.txt | ||
|
||
|
||
FROM python:$PYTHON_VERSION | ||
WORKDIR /root | ||
|
||
COPY --from=builder /src/requirements.txt . | ||
RUN --mount=type=cache,target=/root/.cache \ | ||
pip install --no-cache-dir -r requirements.txt && rm requirements.txt | ||
|
||
COPY . . | ||
RUN pip install --no-deps . && rm -r /root/* | ||
|
||
CMD ["bash"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,17 @@ | ||
from .clearml_shared_file_service import ClearMLSharedFileService | ||
from .local_shared_file_service import LocalSharedFileService | ||
from .nmt_engine_build_job import NmtEngineBuildJob | ||
from .nmt_model_factory import NmtModelFactory | ||
from .shared_file_service import PretranslationInfo, PretranslationWriter, SharedFileService | ||
from .smt_engine_build_job import SmtEngineBuildJob | ||
|
||
__all__ = [ | ||
"ClearMLSharedFileService", | ||
"LocalSharedFileService", | ||
"NmtEngineBuildJob", | ||
"NmtModelFactory", | ||
"PretranslationInfo", | ||
"PretranslationWriter", | ||
"SharedFileService", | ||
"SmtEngineBuildJob", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import argparse | ||
import logging | ||
from typing import Callable, Optional | ||
|
||
from clearml import Task | ||
|
||
from ..utils.canceled_error import CanceledError | ||
from ..utils.progress_status import ProgressStatus | ||
from .clearml_shared_file_service import ClearMLSharedFileService | ||
from .config import SETTINGS | ||
from .smt_engine_build_job import SmtEngineBuildJob | ||
|
||
# Setup logging | ||
logging.basicConfig( | ||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | ||
level=logging.INFO, | ||
) | ||
|
||
logger = logging.getLogger(str(__package__) + ".build_smt_engine") | ||
|
||
|
||
def run(args: dict) -> None: | ||
progress: Optional[Callable[[ProgressStatus], None]] = None | ||
check_canceled: Optional[Callable[[], None]] = None | ||
task = None | ||
if args["clearml"]: | ||
task = Task.init() | ||
|
||
def clearml_check_canceled() -> None: | ||
if task.get_status() == "stopped": | ||
raise CanceledError | ||
|
||
check_canceled = clearml_check_canceled | ||
|
||
def clearml_progress(status: ProgressStatus) -> None: | ||
if status.percent_completed is not None: | ||
task.get_logger().report_single_value(name="progress", value=round(status.percent_completed, 4)) | ||
|
||
progress = clearml_progress | ||
|
||
try: | ||
logger.info("SMT Engine Build Job started") | ||
|
||
SETTINGS.update(args) | ||
shared_file_service = ClearMLSharedFileService(SETTINGS) | ||
smt_engine_build_job = SmtEngineBuildJob(SETTINGS, shared_file_service) | ||
smt_engine_build_job.run(progress=progress, check_canceled=check_canceled) | ||
logger.info("Finished") | ||
except Exception as e: | ||
if task: | ||
if task.get_status() == "stopped": | ||
return | ||
else: | ||
task.mark_failed(status_reason=type(e).__name__, status_message=str(e)) | ||
raise e | ||
|
||
|
||
def main() -> None: | ||
parser = argparse.ArgumentParser(description="Trains an SMT model.") | ||
parser.add_argument("--model-type", required=True, type=str, help="Model type") | ||
parser.add_argument("--build-id", required=True, type=str, help="Build id") | ||
parser.add_argument("--save-model", required=True, type=str, help="Save the model using the specified base name") | ||
parser.add_argument("--clearml", default=False, action="store_true", help="Initializes a ClearML task") | ||
parser.add_argument("--build-options", default=None, type=str, help="Build configurations") | ||
args = parser.parse_args() | ||
|
||
input_args = {k: v for k, v in vars(args).items() if v is not None} | ||
|
||
run(input_args) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import logging | ||
import shutil | ||
from pathlib import Path | ||
|
||
from .shared_file_service import SharedFileService | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class LocalSharedFileService(SharedFileService): | ||
def _download_file(self, path: str, cache: bool = False) -> Path: | ||
return self._get_path(path) | ||
|
||
def _download_folder(self, path: str, cache: bool = False) -> Path: | ||
return self._get_path(path) | ||
|
||
def _exists_file(self, path: str) -> bool: | ||
return self._get_path(path).exists() | ||
|
||
def _upload_file(self, path: str, local_file_path: Path) -> None: | ||
dst_path = self._get_path(path) | ||
dst_path.parent.mkdir(parents=True, exist_ok=True) | ||
shutil.copyfile(local_file_path, dst_path) | ||
|
||
def _upload_folder(self, path: str, local_folder_path: Path) -> None: | ||
dst_path = self._get_path(path) | ||
dst_path.mkdir(parents=True, exist_ok=True) | ||
shutil.copyfile(local_folder_path, dst_path) | ||
|
||
def _get_path(self, name: str) -> Path: | ||
# Don't use shared file folder for local files | ||
return Path(f"{self._shared_file_uri}/{name}") |
Oops, something went wrong.