diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 28625af7..2bcbc068 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -337,7 +337,7 @@ jobs: -tgt_vocab /tmp/eole.vocab.tgt \ -src_vocab_size 1000 \ -tgt_vocab_size 1000 \ - -model '{"hidden_size": 10, "embeddings": {"word_vec_size": 5}}' \ + -model '{"architecture": "rnn", "hidden_size": 10, "embeddings": {"word_vec_size": 5}}' \ -training '{"batch_size": 10, "num_workers": 0, "bucket_size": 1024, "train_steps": 10, "model_path": "/tmp/eole.model", "save_checkpoint_steps": 10}' \ -report_every 5 sed -i '1s/^/new_tok\t100000000\n/' /tmp/eole.vocab.src @@ -347,7 +347,6 @@ jobs: -tgt_vocab /tmp/eole.vocab.tgt \ -src_vocab_size 1000 \ -tgt_vocab_size 1000 \ - -model '{"hidden_size": 10, "embeddings": {"word_vec_size": 5}}' \ -training '{"batch_size": 10, "num_workers": 0, "bucket_size": 1024, "train_steps": 20, "train_from": "/tmp/eole.model/step_10", "save_checkpoint_steps": 10, "update_vocab": True, "reset_optim": "states"}' \ -report_every 5 - name: Test checkpoint vocabulary update with LM @@ -366,7 +365,6 @@ jobs: -config eole/tests/data/lm_data.yaml \ -src_vocab /tmp/eole.vocab.src \ -tgt_vocab /tmp/eole.vocab.src \ - -model '{"layers": 2, "hidden_size": 16, "transformer_ff": 64, "embeddings": {"word_vec_size": 16}, "encoder": None, "decoder": {"decoder_type": "transformer_lm", "heads": 4}}' \ -training '{"batch_size": 10, "num_workers": 0, "bucket_size": 1024, "train_steps": 20, "train_from": "/tmp/lm.eole.model/step_10", "save_checkpoint_steps": 10, "update_vocab": True, "reset_optim": "states"}' \ -src_vocab_size 1000 \ -tgt_vocab_size 1000 \ diff --git a/eole/config/common.py b/eole/config/common.py index c2fded9e..1e4443bd 100644 --- a/eole/config/common.py +++ b/eole/config/common.py @@ -1,7 +1,6 @@ # import torch from typing import List, Literal -from pydantic import Field, computed_field, model_validator -from functools import cached_property +from pydantic import Field, model_validator from eole.config.config import Config # from eole.utils.logging import logger @@ -33,8 +32,7 @@ class DistributedConfig(Config): default=60, description="Timeout for one GPU to wait for the others." ) - @computed_field - @cached_property + @property def parallel_gpu(self) -> int: # converted to a `property` by `computed_field` return self.world_size if self.parallel_mode == "tensor_parallel" else 1 diff --git a/eole/config/run.py b/eole/config/run.py index f2e7bd63..af4e133c 100644 --- a/eole/config/run.py +++ b/eole/config/run.py @@ -91,7 +91,7 @@ class PredictConfig( src_subword_vocab: str | None = ( None # patch for CT2 inference engine (to improve later) ) - model: ModelConfig + model: ModelConfig | None = None @model_validator(mode="after") def _validate_predict_config(self): diff --git a/eole/decoders/ensemble.py b/eole/decoders/ensemble.py index fd4cb100..e1ff73f3 100644 --- a/eole/decoders/ensemble.py +++ b/eole/decoders/ensemble.py @@ -10,7 +10,7 @@ import copy from eole.encoders.encoder import EncoderBase from eole.decoders.decoder import DecoderBase -from eole.models import EncoderDecoderModel, get_model_class +from eole.models import EncoderDecoderModel, BaseModel class EnsembleDecoderOutput(object): @@ -191,7 +191,7 @@ def load_test_model(config, device_id=0): for i, model_path in enumerate(config.model_path): config2.model_path = [config.model_path[i]] print(config2.model) - vocabs, model, model_config = get_model_class(config2.model).load_test_model( + vocabs, model, model_config = BaseModel.load_test_model( config2, device_id, model_path=model_path ) if shared_vocabs is None: diff --git a/eole/predict/__init__.py b/eole/predict/__init__.py index 8bb6b1e1..7e78e5ea 100644 --- a/eole/predict/__init__.py +++ b/eole/predict/__init__.py @@ -9,7 +9,7 @@ from eole.predict.greedy_search import GreedySearch, GreedySearchLM from eole.predict.penalties import PenaltyBuilder from eole.decoders.ensemble import load_test_model as ensemble_load_test_model -from eole.models import get_model_class +from eole.models import BaseModel import codecs @@ -44,7 +44,7 @@ def build_predictor(config, device_id=0, report_score=True, logger=None, out_fil load_test_model = ( ensemble_load_test_model if len(config.model_path) > 1 - else get_model_class(config.model).load_test_model + else BaseModel.load_test_model ) vocabs, model, model_config = load_test_model(config, device_id) diff --git a/eole/tests/pull_request_check.sh b/eole/tests/pull_request_check.sh index a725c2a3..d75734a8 100755 --- a/eole/tests/pull_request_check.sh +++ b/eole/tests/pull_request_check.sh @@ -271,7 +271,7 @@ ${PYTHON} eole/bin/main.py train \ -src_vocab $TMP_OUT_DIR/eole.vocab.src \ -tgt_vocab $TMP_OUT_DIR/eole.vocab.tgt \ -src_vocab_size 1000 -tgt_vocab_size 1000 \ - -model '{"hidden_size": 10, "embeddings": {"word_vec_size": 5}}' \ + -model '{"architecture": "rnn", "hidden_size": 10, "embeddings": {"word_vec_size": 5}}' \ -training '{"batch_size": 10, "num_workers": 0, "bucket_size": 1024, "train_steps": 10, "model_path": "'"$TMP_OUT_DIR"'/eole.model", "save_checkpoint_steps": 10}' \ -report_every 5 \ >> ${LOG_FILE} 2>&1 @@ -281,7 +281,6 @@ ${PYTHON} eole/bin/main.py train \ -src_vocab $TMP_OUT_DIR/eole.vocab.src \ -tgt_vocab $TMP_OUT_DIR/eole.vocab.tgt \ -src_vocab_size 1000 -tgt_vocab_size 1000 \ - -model '{"hidden_size": 10, "embeddings": {"word_vec_size": 5}}' \ -training '{"batch_size": 10, "num_workers": 0, "bucket_size": 1024, "train_steps": 20, "train_from": "'"$TMP_OUT_DIR"'/eole.model/step_10", "save_checkpoint_steps": 10, "update_vocab": True, "reset_optim": "states"}' \ -report_every 5 \ >> ${LOG_FILE} 2>&1 diff --git a/eole/tests/test_models.py b/eole/tests/test_models.py index c604ffb8..f35f2473 100644 --- a/eole/tests/test_models.py +++ b/eole/tests/test_models.py @@ -113,7 +113,7 @@ def encoder_forward(self, opt, source_l=3, bsize=1): # Initialize vectors to compare size with test_hid = torch.zeros( - self.opt.model.encoder.layers, bsize, opt.model.encoder.hidden_size + opt.model.encoder.layers, bsize, opt.model.encoder.hidden_size ) test_out = torch.zeros(bsize, source_l, opt.model.decoder.hidden_size) @@ -192,17 +192,26 @@ def test_method(self): """ # opt.brnn = False # deprecated and not used here -test_embeddings = [[("model", {"architecture": "transformer"})]] +test_embeddings = [ + [("model", {"architecture": "rnn"})], + [("model", {"architecture": "transformer"})], +] for p in test_embeddings: _add_test(p, "embeddings_forward") tests_encoder = [ - # not supported anymore - # [ - # # ("encoder_type", "mean"), - # ("model", {"architecture": "custom", "encoder": {"encoder_type": "mean"}}) - # ], + [ + # ("encoder_type", "mean"), + ( + "model", + { + "architecture": "custom", + "encoder": {"encoder_type": "mean"}, + "decoder": {"decoder_type": "rnn"}, + }, + ) + ], # [('encoder_type', 'transformer'), # ('word_vec_size', 16), ('hidden_size', 16)], ]