From aad26000ddab54dc25356a52679dc000409839f5 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Mon, 20 Nov 2023 12:45:11 -0500 Subject: [PATCH 1/4] Better error messages for #62 --- .../translation/huggingface/hugging_face_nmt_engine.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/machine/translation/huggingface/hugging_face_nmt_engine.py b/machine/translation/huggingface/hugging_face_nmt_engine.py index e730d16..8191075 100644 --- a/machine/translation/huggingface/hugging_face_nmt_engine.py +++ b/machine/translation/huggingface/hugging_face_nmt_engine.py @@ -47,14 +47,18 @@ def __init__( and src_lang not in cast(Any, self._tokenizer).lang_code_to_id and src_lang not in additional_special_tokens ): - raise ValueError(f"'{src_lang}' is not a valid language code.") + raise ValueError( + f"'{src_lang}' is not a valid language code. This error can happen when there is no matching training data and the language code is not in the NLLB-200." + ) if ( tgt_lang is not None and tgt_lang not in cast(Any, self._tokenizer).lang_code_to_id and tgt_lang not in additional_special_tokens ): - raise ValueError(f"'{tgt_lang}' is not a valid language code.") + raise ValueError( + f"'{tgt_lang}' is not a valid language code. This error can happen when there is no matching training data and the language code is not in the NLLB-200." + ) self._pipeline = _TranslationPipeline( model=model, From 7f7d8e9186d77b9e3220ced9b1f4b5f319999f48 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Mon, 20 Nov 2023 12:55:48 -0500 Subject: [PATCH 2/4] Linting error --- machine/translation/huggingface/hugging_face_nmt_engine.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/machine/translation/huggingface/hugging_face_nmt_engine.py b/machine/translation/huggingface/hugging_face_nmt_engine.py index 8191075..ad1418c 100644 --- a/machine/translation/huggingface/hugging_face_nmt_engine.py +++ b/machine/translation/huggingface/hugging_face_nmt_engine.py @@ -48,7 +48,8 @@ def __init__( and src_lang not in additional_special_tokens ): raise ValueError( - f"'{src_lang}' is not a valid language code. This error can happen when there is no matching training data and the language code is not in the NLLB-200." + f"'{src_lang}' is not a valid language code. This error can happen when " + + "there is no matching training data and the language code is not in the NLLB-200." ) if ( @@ -57,7 +58,8 @@ def __init__( and tgt_lang not in additional_special_tokens ): raise ValueError( - f"'{tgt_lang}' is not a valid language code. This error can happen when there is no matching training data and the language code is not in the NLLB-200." + f"'{tgt_lang}' is not a valid language code. This error can happen when " + + "there is no matching training data and the language code is not in the NLLB-200." ) self._pipeline = _TranslationPipeline( From cf606611a8c6bd6bb81936b0ddbbf2ae1f2e2bca Mon Sep 17 00:00:00 2001 From: John Lambert Date: Tue, 21 Nov 2023 10:27:13 -0500 Subject: [PATCH 3/4] Update from reviewer comments --- .../translation/huggingface/hugging_face_nmt_engine.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/machine/translation/huggingface/hugging_face_nmt_engine.py b/machine/translation/huggingface/hugging_face_nmt_engine.py index ad1418c..6f18bf0 100644 --- a/machine/translation/huggingface/hugging_face_nmt_engine.py +++ b/machine/translation/huggingface/hugging_face_nmt_engine.py @@ -47,20 +47,14 @@ def __init__( and src_lang not in cast(Any, self._tokenizer).lang_code_to_id and src_lang not in additional_special_tokens ): - raise ValueError( - f"'{src_lang}' is not a valid language code. This error can happen when " - + "there is no matching training data and the language code is not in the NLLB-200." - ) + raise ValueError(f"The specified model does not support the language code '{src_lang}'") if ( tgt_lang is not None and tgt_lang not in cast(Any, self._tokenizer).lang_code_to_id and tgt_lang not in additional_special_tokens ): - raise ValueError( - f"'{tgt_lang}' is not a valid language code. This error can happen when " - + "there is no matching training data and the language code is not in the NLLB-200." - ) + raise ValueError(f"The specified model does not support the language code '{src_lang}'") self._pipeline = _TranslationPipeline( model=model, From 2143397a800395ea5ef9a679ce57cfd589450d0b Mon Sep 17 00:00:00 2001 From: John Lambert Date: Tue, 21 Nov 2023 14:12:05 -0500 Subject: [PATCH 4/4] Reviewer comments - fix bug --- machine/translation/huggingface/hugging_face_nmt_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine/translation/huggingface/hugging_face_nmt_engine.py b/machine/translation/huggingface/hugging_face_nmt_engine.py index 6f18bf0..3f617c8 100644 --- a/machine/translation/huggingface/hugging_face_nmt_engine.py +++ b/machine/translation/huggingface/hugging_face_nmt_engine.py @@ -54,7 +54,7 @@ def __init__( and tgt_lang not in cast(Any, self._tokenizer).lang_code_to_id and tgt_lang not in additional_special_tokens ): - raise ValueError(f"The specified model does not support the language code '{src_lang}'") + raise ValueError(f"The specified model does not support the language code '{tgt_lang}'") self._pipeline = _TranslationPipeline( model=model,