+segmentAndPredictCNN +adjusted weight multiplier

Aniike-t · Oct 13, 2024 · ca50b9e · ca50b9e
1 parent c36b9f5
commit ca50b9e
Show file tree

Hide file tree

Showing 8 changed files with 59 additions and 18 deletions.
diff --git a/ProjectAssets/CBIR_updated_flowchart.png b/ProjectAssets/CBIR_updated_flowchart.png
diff --git a/README.md b/README.md
@@ -1,8 +1,10 @@
-## **Working of above CBIR system**
+# **Content Based Image Retreival**
 
-![working_flowchart](ProjectAssets/cbir_flowchart.png)
+## Working
 
-## Important links:
+![working_flowchart](ProjectAssets/CBIR_updated_flowchart.png)
+
+## Important links
 
 1. https://ar5iv.labs.arxiv.org/html/1706.06064v1
 2. https://arxiv.org/abs/2312.10089v1
diff --git a/backend/system/Retrieval/RetrieveAlgo.py b/backend/system/Retrieval/RetrieveAlgo.py
@@ -37,9 +37,9 @@ def create_vector_space_model(self):
             weight = row['weight']
 
             # Check if the feature is in the included_words list
-            query_weight = 1.0  # Default weight
+            query_weight = 0.75  # Default weight
             if feature in self.included_words:
-                query_weight = 3.0  # Example increased weight for included words
+                query_weight = 3.5  # Example increased weight for included words
 
             # Update the vector for this filename
             feature_index = features.index(feature)

diff --git a/backend/system/methods/getFeaturesCNN.py b/backend/system/methods/getFeaturesCNN.py
@@ -28,20 +28,14 @@ def split_multiword_features(self, feature_data):
             for value in values:
                 processed_data.append({
                     "filename": item['filename'],
+                    "region": item.get('region', "Original"),
                     "feature_type": item['feature_type'],
                     "feature_value": value,
                     "probability": item['probability']
                 })
         return processed_data
 
-    def get_features(self, filename):
-        try:
-            img = Image.open(filename).convert("RGB")
-        except Exception as e:
-            print(f"Error opening image {filename}: {e}")
-            return []
-
-        # Process the image
+    def extract_features_from_image(self, img, filename, region_name="Original"):
         inputs = self.processor(images=img, return_tensors="pt")
 
         with torch.no_grad():  # Disable gradient calculation
@@ -55,12 +49,55 @@ def get_features(self, filename):
         for i in range(top5_prob.size(0)):
             feature_list.append({
                 "filename": filename,
+                "region": region_name,
                 "feature_type": "Image classification",
                 "feature_value": self.model.config.id2label[top5_catid[i].item()],
                 "probability": round(top5_prob[i].item(), 4)
             })
 
-        processed_feature_list = self.split_multiword_features(feature_list)
+        return feature_list
+
+    def get_quadrant_images(self, img):
+        width, height = img.size
+        mid_x, mid_y = width // 2, height // 2
+
+        quadrants = {
+            "Top Left": img.crop((0, 0, mid_x, mid_y)),
+            "Top Right": img.crop((mid_x, 0, width, mid_y)),
+            "Bottom Left": img.crop((0, mid_y, mid_x, height)),
+            "Bottom Right": img.crop((mid_x, mid_y, width, height)),
+        }
+        return quadrants
+
+    def adjust_probabilities(self, original_features, quadrant_features):
+        for q_feature in quadrant_features:
+            # Apply 0.75 multiplier for quadrant features
+            q_feature['probability'] *= 0.75
+            for o_feature in original_features:
+                # If feature value matches, apply 1.25 multiplier
+                if q_feature['feature_value'] == o_feature['feature_value']:
+                    q_feature['probability'] *= 1.25
+        return quadrant_features
+
+    def get_features(self, filename):
+        try:
+            img = Image.open(filename).convert("RGB")
+        except Exception as e:
+            print(f"Error opening image {filename}: {e}")
+            return []
+
+        # Extract features from the original image
+        original_features = self.extract_features_from_image(img, filename, "Original")
+
+        # Get features from quadrants
+        quadrants = self.get_quadrant_images(img)
+        all_features = original_features.copy()
+        for quadrant_name, quadrant_img in quadrants.items():
+            quadrant_features = self.extract_features_from_image(quadrant_img, filename, quadrant_name)
+            adjusted_quadrant_features = self.adjust_probabilities(original_features, quadrant_features)
+            all_features += adjusted_quadrant_features
+
+        processed_feature_list = self.split_multiword_features(all_features)
         processed_feature_list_extra = self.process_top_features(processed_feature_list)
 
         # Combine processed features

diff --git a/backend/system/tokenisation/SQLqueryGenerator.py b/backend/system/tokenisation/SQLqueryGenerator.py
@@ -54,6 +54,7 @@ def generate_query(self):
                 self.sql_query += f" AND feature_value != '{word}'"
 
         self.sql_query += ") ORDER BY filename;"  # Add GROUP BY clause
+        print(self.sql_query)
         return self.sql_query, self.fixed_words
 
 # Example usage

diff --git a/backend/system/tokenisation/SentenceConv.py b/backend/system/tokenisation/SentenceConv.py
@@ -50,4 +50,4 @@ def convert_to_query(self, sentence):
 # converter = SentenceConverter()
 # sentence = "A car behind tree without wheel"
 # result = converter.convert_to_query(sentence)
-# print(result)  # Output: car tree -bush
+# print(result)  # Output: car tree -wheel
diff --git a/backend/system/tokenisation/wordnetExtraction.py b/backend/system/tokenisation/wordnetExtraction.py
@@ -3,6 +3,7 @@
 from nltk.corpus import wordnet
 import nltk
 from nltk.tokenize import word_tokenize
+import math
 
 # Set NLTK data path to the current folder
 nltk_data_path = os.path.join(os.getcwd(), 'nltk_data')
@@ -25,7 +26,7 @@ def download_nltk_data():
 
 download_nltk_data()
 
-def get_synonyms(word, limit=3):
+def get_synonyms(word, limit=10):
     """Get top synonyms for a given word from WordNet, limited to a certain number."""
     synonyms = set()
     for syn in wordnet.synsets(word):
@@ -44,7 +45,7 @@ def process_top_features(features):
     processed_features = []
 
     features_sorted = sorted(features, key=lambda x: x['probability'], reverse=True)
-    top_half_count = len(features_sorted) // 2
+    top_half_count = math.ceil(len(features_sorted) // 2)
     top_features = features_sorted[:top_half_count]
 
     for feature in top_features:
@@ -59,7 +60,7 @@ def process_top_features(features):
                     "filename": feature['filename'],
                     "feature_type": feature['feature_type'],
                     "feature_value": synonym,
-                    "probability": feature['probability'] / 1.5
+                    "probability": feature['probability'] *0.35
                 })
 
     return processed_features
diff --git a/backend/system/utils/math.py b/backend/system/utils/math.py