use simpler image for testing emotion detector (#190)

* use simpler image for testing * include age in faces test again * fix typo * try with newer tensorflow version * remove testing for age again * try with tensorflow newer versions only for breaking change in transformers * force transformers to use pytorch
ssciwr · May 31, 2024 · 38498e3 · 38498e3
1 parent 922a64f
commit 38498e3
Show file tree

Hide file tree

Showing 7 changed files with 68 additions and 12 deletions.
diff --git a/ammico/faces.py b/ammico/faces.py
@@ -203,7 +203,7 @@ def facial_expression_analysis(self) -> dict:
         result = {"number_faces": len(faces) if len(faces) <= 3 else 3}
         # We limit ourselves to three faces
         for i, face in enumerate(faces[:3]):
-            result[f"person{ i+1 }"] = self.analyze_single_face(face)
+            result[f"person{i+1}"] = self.analyze_single_face(face)
         self.clean_subdict(result)
         return self.subdict
 

diff --git a/ammico/multimodal_search.py b/ammico/multimodal_search.py
@@ -332,7 +332,8 @@ def parsing_images(
             path_to_lib = lavis.__file__[:-11] + "models/clip_models/"
             url = "https://raw.githubusercontent.com/salesforce/LAVIS/main/lavis/models/clip_models/bpe_simple_vocab_16e6.txt.gz"
             r = requests.get(url, allow_redirects=False)
-            open(path_to_lib + "bpe_simple_vocab_16e6.txt.gz", "wb").write(r.content)
+            with open(path_to_lib + "bpe_simple_vocab_16e6.txt.gz", "wb") as f:
+                f.write(r.content)
 
         image_keys = sorted(self.subdict.keys())
         image_names = [self.subdict[k]["filename"] for k in image_keys]

diff --git a/ammico/test/data/example_faces.json b/ammico/test/data/example_faces.json
@@ -1,10 +1,10 @@
 {
         "face": "Yes", 
-        "multiple_faces": "Yes", 
-        "no_faces": 11, 
-        "wears_mask": ["No", "No", "Yes"], 
-        "gender": ["Man", "Man", "Man"], 
-        "race": ["white", "white", null], 
-        "emotion": ["sad", "fear", null], 
-        "emotion (category)": ["Negative", "Negative", null]
+        "multiple_faces": "No", 
+        "no_faces": 1, 
+        "wears_mask": ["No"], 
+        "gender": ["Woman"], 
+        "race": ["asian"], 
+        "emotion": ["happy"], 
+        "emotion (category)": ["Positive"]
 }    
diff --git a/ammico/test/data/pexels-pixabay-415829.jpg b/ammico/test/data/pexels-pixabay-415829.jpg
diff --git a/ammico/test/test_faces.py b/ammico/test/test_faces.py
@@ -21,15 +21,15 @@ def test_set_keys():
 
 def test_analyse_faces(get_path):
     mydict = {
-        "filename": get_path + "IMG_2746.png",
+        "filename": get_path + "pexels-pixabay-415829.jpg",
     }
     mydict.update(fc.EmotionDetector(mydict).analyse_image())
 
     with open(get_path + "example_faces.json", "r") as file:
         out_dict = json.load(file)
     # delete the filename key
     mydict.pop("filename", None)
-    # delete the age key, as this is conflicting - gives different results sometimes
+    # do not test for age, as this is not a reliable metric
     mydict.pop("age", None)
     for key in mydict.keys():
         assert mydict[key] == out_dict[key]
diff --git a/ammico/text.py b/ammico/text.py
@@ -238,6 +238,7 @@ def text_summary(self):
             revision=self.revision_summary,
             min_length=5,
             max_length=20,
+            framework="pt",
         )
         try:
             summary = pipe(self.subdict["text_english"][0:max_number_of_characters])
@@ -258,6 +259,7 @@ def text_sentiment_transformers(self):
             model=self.model_sentiment,
             revision=self.revision_sentiment,
             truncation=True,
+            framework="pt",
         )
         result = pipe(self.subdict["text_english"])
         self.subdict["sentiment"] = result[0]["label"]
@@ -272,6 +274,7 @@ def text_ner(self):
             model=self.model_ner,
             revision=self.revision_ner,
             aggregation_strategy="simple",
+            framework="pt",
         )
         result = pipe(self.subdict["text_english"])
         self.subdict["entity"] = []
@@ -281,6 +284,58 @@ def text_ner(self):
             self.subdict["entity_type"].append(entity["entity_group"])
 
 
+class TextAnalyzer:
+    """Used to get text from a csv and then run the TextDetector on it."""
+
+    def __init__(self, csv_path: str, column_key: str = None) -> None:
+        """Init the TextTranslator class.
+
+        Args:
+            csv_path (str): Path to the CSV file containing the text entries.
+            column_key (str): Key for the column containing the text entries.
+                Defaults to None.
+        """
+        self.csv_path = csv_path
+        self.column_key = column_key
+        self._check_valid_csv_path()
+        self._check_file_exists()
+
+    def _check_valid_csv_path(self):
+        if not isinstance(self.csv_path, str):
+            raise ValueError("The provided path to the CSV file is not a string.")
+        if not self.csv_path.endswith(".csv"):
+            raise ValueError("The provided file is not a CSV file.")
+
+    def _check_file_exists(self):
+        try:
+            with open(self.csv_path, "r") as file:  # noqa
+                pass
+        except FileNotFoundError:
+            raise FileNotFoundError("The provided CSV file does not exist.")
+
+    def read_csv(self) -> dict:
+        """Read the CSV file and return the dictionary with the text entries.
+
+        Returns:
+            dict: The dictionary with the text entries.
+        """
+        df = pd.read_csv(self.csv_path, encoding="utf8")
+        if not self.column_key:
+            self.column_key = "text"
+
+        if self.column_key not in df:
+            raise ValueError(
+                "The provided column key is not in the CSV file. Please check."
+            )
+        self.mylist = df[self.column_key].to_list()
+        self.mydict = {}
+        for i, text in enumerate(self.mylist):
+            self.mydict[self.csv_path + "row-" + str(i)] = {
+                "filename": self.csv_path,
+                "text": text,
+            }
+
+
 class PostprocessText:
     def __init__(
         self,

diff --git a/pyproject.toml b/pyproject.toml
@@ -46,7 +46,7 @@ dependencies = [
     "ammico-lavis",
     "setuptools",
     "spacy",
-    "tensorflow<=2.12.3",
+    "tensorflow>=2.13.0",
     "torch<2.1.0",
     "transformers",
     "google-cloud-vision",