moving translation wrapper to a file, adding tests

citadel-ai · Jan 17, 2024 · 4eb0676 · 4eb0676
1 parent 0dca8ba
commit 4eb0676
Show file tree

Hide file tree

Showing 5 changed files with 112 additions and 67 deletions.
diff --git a/src/langcheck/metrics/de/__init__.py b/src/langcheck/metrics/de/__init__.py
@@ -1,4 +1,5 @@
 from langcheck.metrics.de._tokenizers import DeTokenizer
+from langcheck.metrics.de._translation import Translate
 from langcheck.metrics.de.reference_based_text_quality import (
     rouge1, rouge2, rougeL, semantic_similarity)
 from langcheck.metrics.de.reference_free_text_quality import (
@@ -21,4 +22,5 @@
     'sentiment',
     'toxicity',
     'DeTokenizer',
+    'Translate',
 ]
diff --git a/src/langcheck/metrics/de/_translation.py b/src/langcheck/metrics/de/_translation.py
@@ -0,0 +1,64 @@
+from math import floor
+
+from nltk.tokenize import sent_tokenize
+from transformers.pipelines import pipeline
+
+
+class Translate:
+    '''Translation class based on HuggingFace's translation pipeline.'''
+
+    def __init__(self, model_name: str) -> None:
+        '''
+        Initialize the Translation class with given parameters.
+
+        Args:
+            model_name: The name of the model to use for translation
+        '''
+        self._translation_pipeline = pipeline("translation",
+                                              model=model_name,
+                                              tokenizer=model_name)
+        self._max_length = self._translation_pipeline.model.config.max_length
+
+    def _translate(self, texts: str) -> str:
+        '''Translate the texts using the translation pipeline.
+        It splits the texts into blocks and translates each block separately,
+        avoiding problems with long texts.
+        Args:
+            texts: The texts to translate
+        Returns:
+            The translated texts
+        '''
+        tokenization = self._translation_pipeline.tokenizer(
+            texts, return_tensors="pt")  # type: ignore
+        if tokenization.input_ids.shape[1] > (self._max_length / 2):
+            blocks = floor(
+                tokenization.input_ids.shape[1] / self._max_length) + 3
+            sentences = sent_tokenize(texts)
+            # Split sentences into a number of blocks, e.g., 2 blocks = 2 groups
+            len_block = floor(len(sentences) / blocks) + 1
+            sentences_list = []
+            for i in range(blocks):
+                sentences_list.append(sentences[i * len_block:(i + 1) *
+                                                len_block])
+            texts_ = [" ".join(sent) for sent in sentences_list]
+        else:
+            texts_ = [texts]
+        texts_en = []
+        for text in texts_:
+            print(text)
+            text_en = [
+                str(d['translation_text'])  # type: ignore
+                for d in self._translation_pipeline(text)  # type: ignore
+            ]
+            texts_en.append(" ".join(text_en))
+        text_en_final = " ".join(texts_en)
+        return text_en_final
+
+    def __call__(self, text: str) -> str:
+        '''Translate the text using the translation pipeline.
+        Args:
+            text: The text to translate
+        Returns:
+            The translated text
+        '''
+        return self._translate(text)
diff --git a/src/langcheck/metrics/de/reference_free_text_quality.py b/src/langcheck/metrics/de/reference_free_text_quality.py
@@ -7,12 +7,11 @@
 from transformers.models.auto.modeling_auto import \
     AutoModelForSequenceClassification
 from transformers.models.auto.tokenization_auto import AutoTokenizer
-from transformers.pipelines import pipeline
-from transformers.pipelines.base import Pipeline
 
 from langcheck._handle_logs import _handle_logging_level
 from langcheck.metrics._detoxify import Detoxify
 from langcheck.metrics._validation import validate_parameters_reference_free
+from langcheck.metrics.de._translation import Translate
 from langcheck.metrics.de.reference_based_text_quality import \
     semantic_similarity
 from langcheck.metrics.en.reference_free_text_quality import _toxicity_openai
@@ -32,7 +31,6 @@
 _sentiment_model = None
 
 _translation_model_path = 'Helsinki-NLP/opus-mt-de-en'
-_translation_pipeline: Pipeline | None = None
 
 _toxicity_model = None
 
@@ -148,22 +146,13 @@ def fluency(
     Parrot fluency model to calculate the fluency scores, from the English
     counterpart.
     '''
-    global _translation_pipeline
-    if _translation_pipeline is None:
-        _translation_pipeline = pipeline('translation',
-                                         model=_translation_model_path)
+    translation = Translate(_translation_model_path)
 
     if isinstance(generated_outputs, str):
         generated_outputs = [generated_outputs]
 
     # Translate to English
-    generated_outputs_en = [
-        cast(str,
-             d['translation_text'])  # type: ignore[reportGeneralTypeIssues]
-        for d in _translation_pipeline(
-            generated_outputs
-        )  # type: ignore[reportOptionalIterable]  # NOQA: E501
-    ]
+    generated_outputs_en = [translation(str) for str in generated_outputs]
 
     _metric_value = en_fluency(generated_outputs_en, prompts, model_type,
                                openai_client, openai_args)

diff --git a/src/langcheck/metrics/de/source_based_text_quality.py b/src/langcheck/metrics/de/source_based_text_quality.py
@@ -1,64 +1,23 @@
 from __future__ import annotations
 
-from math import floor
-from typing import Dict, List, Optional, cast
+from typing import Dict, List, Optional
 
-from nltk.tokenize import sent_tokenize
 from openai import OpenAI
-from transformers.pipelines import pipeline
-from transformers.pipelines.base import Pipeline
 
 from langcheck.metrics._validation import (
     validate_parameters_context_relevance, validate_parameters_source_based)
+from langcheck.metrics.de._translation import Translate
 from langcheck.metrics.en._openai import OpenAIBasedEvaluator
 from langcheck.metrics.en.source_based_text_quality import \
     factual_consistency as en_factual_consistency
 from langcheck.metrics.metric_value import MetricValue
 from langcheck.utils.progess_bar import tqdm_wrapper
 
 _factual_consistency_translation_model_path = 'Helsinki-NLP/opus-mt-de-en'
-_factual_consistency_translation_pipeline: Pipeline | None = None
 
 LANG = 'de'
 
 
-def _translate(texts: str, _translation_pipeline: Pipeline) -> str:
-    '''Translate the texts using the translation pipeline.
-    It splits the texts into blocks and translates each block separately,
-    avoiding problems with long texts.
-    Args:
-        texts: The texts to translate
-        _translation_pipeline: The translation pipeline
-    Returns:
-        The translated texts
-    '''
-    tokenization = _translation_pipeline.tokenizer(
-        texts, return_tensors="pt")  # type: ignore
-    if tokenization.input_ids.shape[1] > (
-            _translation_pipeline.model.config.max_length / 2):
-        max_length = _translation_pipeline.model.config.max_length
-        blocks = floor(tokenization.input_ids.shape[1] / max_length) + 3
-        sentences = sent_tokenize(texts)
-        # Split sentences into a number of blocks, e.g., 2 blocks = 2 groups
-        len_block = floor(len(sentences) / blocks) + 1
-        # print(len_block, len(sentences), blocks)
-        sentences_list = []
-        for i in range(blocks):
-            sentences_list.append(sentences[i * len_block:(i + 1) * len_block])
-        texts_ = [" ".join(sent) for sent in sentences_list]
-    else:
-        texts_ = [texts]
-    texts_en = []
-    for text in texts_:
-        text_en = [
-            str(d['translation_text'])  # type: ignore
-            for d in _translation_pipeline(text)  # type: ignore
-        ]
-        texts_en.append(" ".join(text_en))
-    text_en_final = " ".join(texts_en)
-    return text_en_final
-
-
 def factual_consistency(
     generated_outputs: List[str] | str,
     sources: List[str] | str,
@@ -128,21 +87,14 @@ def factual_consistency(
         metric_value.language = LANG
         return metric_value
 
-    global _factual_consistency_translation_pipeline
-    if _factual_consistency_translation_pipeline is None:
-        _factual_consistency_translation_pipeline = pipeline(
-            'translation', model=_factual_consistency_translation_model_path)
+    translation = Translate(_factual_consistency_translation_model_path)
 
     # Translate the sources and generated outputs to English.
     # Currently, the type checks are not working for the pipeline, since
     # too diverse types can be returned.
-    en_source = [
-        _translate(source, _factual_consistency_translation_pipeline)
-        for source in sources
-    ]
+    en_source = [translation(source) for source in sources]
     en_generated_outputs = [
-        _translate(gen_out, _factual_consistency_translation_pipeline)
-        for gen_out in generated_outputs
+        translation(gen_out) for gen_out in generated_outputs
     ]
     # Compute the factual consistency scores in English.
     factual_consistency_scores = en_factual_consistency(

diff --git a/tests/metrics/de/test_translation.py b/tests/metrics/de/test_translation.py
@@ -0,0 +1,38 @@
+from typing import List
+
+import pytest
+
+from langcheck.metrics.de import Translate
+
+
+@pytest.mark.parametrize(
+    'de_text,en_text',
+    [
+        ([
+            'Ich habe keine persönlichen Meinungen, Emotionen oder Bewusstsein.',  # noqa: E501
+            'I have no personal opinions, emotions or consciousness.'
+        ]),
+        ([
+            'Mein Freund. Willkommen in den Karpaten.',
+            'My friend, welcome to the Carpathians.'
+        ]),
+        ([
+            'Tokio ist die Hauptstadt von Japan.',
+            'Tokyo is the capital of Japan.'
+        ]),
+    ])
+def test_translate_de_en(de_text: str, en_text: str) -> None:
+    translation = Translate('Helsinki-NLP/opus-mt-de-en')
+    assert translation(de_text) == en_text
+
+
+@pytest.mark.parametrize('en_text,de_text', [
+    ('I have no personal opinions, emotions or consciousness.',
+     'Ich habe keine persönlichen Meinungen, Emotionen oder Bewusstsein.'),
+    ('My Friend. Welcome to the Carpathians. I am anxiously expecting you.',
+     'Willkommen bei den Karpaten, ich erwarte Sie.'),
+    ('Tokyo is the capital of Japan.', 'Tokio ist die Hauptstadt Japans.'),
+])
+def test_translate_en_de(en_text: str, de_text: List[str]) -> None:
+    translation = Translate('Helsinki-NLP/opus-mt-en-de')
+    assert translation(en_text) == de_text