-
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2f7f44f
commit cf58b8e
Showing
4 changed files
with
174 additions
and
92 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import logging | ||
from abc import ABC, abstractmethod | ||
from typing import Any, Callable, Optional, Tuple | ||
|
||
from ..utils.phased_progress_reporter import PhasedProgressReporter | ||
from ..utils.progress_status import ProgressStatus | ||
from .shared_file_service import SharedFileService | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class EngineBuildJob(ABC): | ||
def __init__(self, config: Any, shared_file_service: SharedFileService) -> None: | ||
self._config = config | ||
self._shared_file_service = shared_file_service | ||
self._train_corpus_size = -1 | ||
self._confidence = -1 | ||
|
||
def run( | ||
self, | ||
progress: Optional[Callable[[ProgressStatus], None]] = None, | ||
check_canceled: Optional[Callable[[], None]] = None, | ||
) -> Tuple[int, float]: | ||
if check_canceled is not None: | ||
check_canceled() | ||
|
||
self.start_job() | ||
self.init_corpus() | ||
progress_reporter = self._get_progress_reporter(progress) | ||
|
||
if self._parallel_corpus_size == 0: | ||
self.respond_to_no_training_corpus() | ||
else: | ||
self.train_model(progress_reporter, check_canceled) | ||
|
||
if check_canceled is not None: | ||
check_canceled() | ||
|
||
logger.info("Pretranslating segments") | ||
self.pretranslate_segments(progress_reporter, check_canceled) | ||
|
||
self.save_model() | ||
return self._train_corpus_size, self._confidence | ||
|
||
@abstractmethod | ||
def start_job(self) -> None: ... | ||
|
||
def init_corpus(self) -> None: | ||
logger.info("Downloading data files") | ||
self._source_corpus = self._shared_file_service.create_source_corpus() | ||
self._target_corpus = self._shared_file_service.create_target_corpus() | ||
self._parallel_corpus = self._source_corpus.align_rows(self._target_corpus) | ||
self._parallel_corpus_size = self._parallel_corpus.count(include_empty=False) | ||
|
||
@abstractmethod | ||
def _get_progress_reporter( | ||
self, progress: Optional[Callable[[ProgressStatus], None]] | ||
) -> PhasedProgressReporter: ... | ||
|
||
@abstractmethod | ||
def respond_to_no_training_corpus(self) -> None: ... | ||
|
||
@abstractmethod | ||
def train_model( | ||
self, | ||
progress_reporter: PhasedProgressReporter, | ||
check_canceled: Optional[Callable[[], None]], | ||
) -> None: ... | ||
|
||
@abstractmethod | ||
def pretranslate_segments( | ||
self, | ||
progress_reporter: PhasedProgressReporter, | ||
check_canceled: Optional[Callable[[], None]], | ||
) -> None: ... | ||
|
||
@abstractmethod | ||
def save_model(self) -> None: ... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.