citadel-ai · yosukehigashi · Jan 22, 2024 · Dec 5, 2023 · Dec 5, 2023 · Dec 5, 2023
diff --git a/benchmarking/data/qags_cnndm-de.json b/benchmarking/data/qags_cnndm-de.json
diff --git a/benchmarking/data/qags_xsum-de.json b/benchmarking/data/qags_xsum-de.json
diff --git a/src/langcheck/metrics/de/_translation.py b/src/langcheck/metrics/de/_translation.py
@@ -31,27 +31,30 @@ def _translate(self, texts: str) -> str:
         tokenization = self._translation_pipeline.tokenizer(
             texts, return_tensors="pt")  # type: ignore
         if tokenization.input_ids.shape[1] > (self._max_length / 2):
-            blocks = floor(
-                tokenization.input_ids.shape[1] / self._max_length) + 3
+            # Split the text into blocks, if it is too long
+            # adding 2 blocks to avoid problems with long texts
+            # NB: this comes from a few 100 tests, but it is not a science
+            blocks = floor(2 * tokenization.input_ids.shape[1] /
+                           self._max_length)
             sentences = sent_tokenize(texts)
             # Split sentences into a number of blocks, e.g., 2 blocks = 2 groups
             len_block = floor(len(sentences) / blocks) + 1
             sentences_list = []
             for i in range(blocks):
                 sentences_list.append(sentences[i * len_block:(i + 1) *
                                                 len_block])
-            texts_ = [" ".join(sent) for sent in sentences_list]
+            text_list = [" ".join(sent) for sent in sentences_list]
         else:
-            texts_ = [texts]
-        texts_en = []
-        for text in texts_:
+            text_list = [texts]
+        translated_texts = []
+        for text in text_list:
             text_en = [
                 str(d['translation_text'])  # type: ignore
                 for d in self._translation_pipeline(text)  # type: ignore
             ]
-            texts_en.append(" ".join(text_en))
-        text_en_final = " ".join(texts_en)
-        return text_en_final
+            translated_texts.append(" ".join(text_en))
+        text_translated_final = " ".join(translated_texts)
+        return text_translated_final
 
     def __call__(self, text: str) -> str:
         '''Translate the text using the translation pipeline.

diff --git a/src/langcheck/metrics/de/source_based_text_quality.py b/src/langcheck/metrics/de/source_based_text_quality.py
@@ -80,6 +80,8 @@ def factual_consistency(
     ], ('Unsupported model type. '
         'The supported ones are ["local", "openai", "azure_openai"]')
 
+    # The English prompt works well enough for German, like with Japanese
+    # TODO: Investigate performance improvement with German prompt / translation
     if model_type == 'openai' or model_type == 'azure_openai':
         metric_value = en_factual_consistency(generated_outputs, sources,
                                               prompts, model_type,
@@ -96,6 +98,7 @@ def factual_consistency(
     en_generated_outputs = [
         translation(gen_out) for gen_out in generated_outputs
     ]
+
     # Compute the factual consistency scores in English.
     factual_consistency_scores = en_factual_consistency(
         generated_outputs=en_generated_outputs, sources=en_source).metric_values