Skip to content

Commit

Permalink
+segmentAndPredictCNN +adjusted weight multiplier
Browse files Browse the repository at this point in the history
  • Loading branch information
Aniike-t committed Oct 13, 2024
1 parent c36b9f5 commit ca50b9e
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 18 deletions.
Binary file added ProjectAssets/CBIR_updated_flowchart.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
## **Working of above CBIR system**
# **Content Based Image Retreival**

![working_flowchart](ProjectAssets/cbir_flowchart.png)
## Working

## Important links:
![working_flowchart](ProjectAssets/CBIR_updated_flowchart.png)

## Important links

1. https://ar5iv.labs.arxiv.org/html/1706.06064v1
2. https://arxiv.org/abs/2312.10089v1
4 changes: 2 additions & 2 deletions backend/system/Retrieval/RetrieveAlgo.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ def create_vector_space_model(self):
weight = row['weight']

# Check if the feature is in the included_words list
query_weight = 1.0 # Default weight
query_weight = 0.75 # Default weight
if feature in self.included_words:
query_weight = 3.0 # Example increased weight for included words
query_weight = 3.5 # Example increased weight for included words

# Update the vector for this filename
feature_index = features.index(feature)
Expand Down
55 changes: 46 additions & 9 deletions backend/system/methods/getFeaturesCNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,14 @@ def split_multiword_features(self, feature_data):
for value in values:
processed_data.append({
"filename": item['filename'],
"region": item.get('region', "Original"),
"feature_type": item['feature_type'],
"feature_value": value,
"probability": item['probability']
})
return processed_data

def get_features(self, filename):
try:
img = Image.open(filename).convert("RGB")
except Exception as e:
print(f"Error opening image {filename}: {e}")
return []

# Process the image
def extract_features_from_image(self, img, filename, region_name="Original"):
inputs = self.processor(images=img, return_tensors="pt")

with torch.no_grad(): # Disable gradient calculation
Expand All @@ -55,12 +49,55 @@ def get_features(self, filename):
for i in range(top5_prob.size(0)):
feature_list.append({
"filename": filename,
"region": region_name,
"feature_type": "Image classification",
"feature_value": self.model.config.id2label[top5_catid[i].item()],
"probability": round(top5_prob[i].item(), 4)
})

processed_feature_list = self.split_multiword_features(feature_list)
return feature_list

def get_quadrant_images(self, img):
width, height = img.size
mid_x, mid_y = width // 2, height // 2

quadrants = {
"Top Left": img.crop((0, 0, mid_x, mid_y)),
"Top Right": img.crop((mid_x, 0, width, mid_y)),
"Bottom Left": img.crop((0, mid_y, mid_x, height)),
"Bottom Right": img.crop((mid_x, mid_y, width, height)),
}
return quadrants

def adjust_probabilities(self, original_features, quadrant_features):
for q_feature in quadrant_features:
# Apply 0.75 multiplier for quadrant features
q_feature['probability'] *= 0.75
for o_feature in original_features:
# If feature value matches, apply 1.25 multiplier
if q_feature['feature_value'] == o_feature['feature_value']:
q_feature['probability'] *= 1.25
return quadrant_features

def get_features(self, filename):
try:
img = Image.open(filename).convert("RGB")
except Exception as e:
print(f"Error opening image {filename}: {e}")
return []

# Extract features from the original image
original_features = self.extract_features_from_image(img, filename, "Original")

# Get features from quadrants
quadrants = self.get_quadrant_images(img)
all_features = original_features.copy()
for quadrant_name, quadrant_img in quadrants.items():
quadrant_features = self.extract_features_from_image(quadrant_img, filename, quadrant_name)
adjusted_quadrant_features = self.adjust_probabilities(original_features, quadrant_features)
all_features += adjusted_quadrant_features

processed_feature_list = self.split_multiword_features(all_features)
processed_feature_list_extra = self.process_top_features(processed_feature_list)

# Combine processed features
Expand Down
1 change: 1 addition & 0 deletions backend/system/tokenisation/SQLqueryGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def generate_query(self):
self.sql_query += f" AND feature_value != '{word}'"

self.sql_query += ") ORDER BY filename;" # Add GROUP BY clause
print(self.sql_query)
return self.sql_query, self.fixed_words

# Example usage
Expand Down
2 changes: 1 addition & 1 deletion backend/system/tokenisation/SentenceConv.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,4 @@ def convert_to_query(self, sentence):
# converter = SentenceConverter()
# sentence = "A car behind tree without wheel"
# result = converter.convert_to_query(sentence)
# print(result) # Output: car tree -bush
# print(result) # Output: car tree -wheel
7 changes: 4 additions & 3 deletions backend/system/tokenisation/wordnetExtraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from nltk.corpus import wordnet
import nltk
from nltk.tokenize import word_tokenize
import math

# Set NLTK data path to the current folder
nltk_data_path = os.path.join(os.getcwd(), 'nltk_data')
Expand All @@ -25,7 +26,7 @@ def download_nltk_data():

download_nltk_data()

def get_synonyms(word, limit=3):
def get_synonyms(word, limit=10):
"""Get top synonyms for a given word from WordNet, limited to a certain number."""
synonyms = set()
for syn in wordnet.synsets(word):
Expand All @@ -44,7 +45,7 @@ def process_top_features(features):
processed_features = []

features_sorted = sorted(features, key=lambda x: x['probability'], reverse=True)
top_half_count = len(features_sorted) // 2
top_half_count = math.ceil(len(features_sorted) // 2)
top_features = features_sorted[:top_half_count]

for feature in top_features:
Expand All @@ -59,7 +60,7 @@ def process_top_features(features):
"filename": feature['filename'],
"feature_type": feature['feature_type'],
"feature_value": synonym,
"probability": feature['probability'] / 1.5
"probability": feature['probability'] *0.35
})

return processed_features
Empty file removed backend/system/utils/math.py
Empty file.

0 comments on commit ca50b9e

Please sign in to comment.