Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing Alegre path for tipline image search. #1776

Merged
merged 2 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/models/bot/alegre.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def similar_items_ids_and_scores(team_ids, thresholds = {})
'UploadedImage' => 'image',
}[self.media.type].to_s
threshold = [{value: thresholds.dig(media_type.to_sym, :value)}] || Bot::Alegre.get_threshold_for_query(media_type, self, true)
ids_and_scores = Bot::Alegre.get_items_with_similar_media(Bot::Alegre.media_file_url(self), threshold, team_ids, "/#{media_type}/similarity/search/").to_h
ids_and_scores = Bot::Alegre.get_items_with_similar_media_v2(Bot::Alegre.media_file_url(self), threshold, team_ids, media_type).to_h
elsif self.is_text?
ids_and_scores = {}
threads = []
Expand Down
2 changes: 1 addition & 1 deletion app/models/concerns/alegre_similarity.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_items_with_similarity(type, pm, threshold)
if type == 'text'
self.get_merged_items_with_similar_text(pm, threshold)
else
results = self.get_items_with_similar_media(self.media_file_url(pm), threshold, pm.team_id, "/#{type}/similarity/search/").reject{ |id, _score_with_context| pm.id == id }
results = self.get_items_with_similar_media_v2(self.media_file_url(pm), threshold, pm.team_id, type).reject{ |id, _score_with_context| pm.id == id }
self.merge_response_with_source_and_target_fields(results, type)
end
end
Expand Down
13 changes: 12 additions & 1 deletion app/models/concerns/alegre_v2.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@ def host
end

def sync_path(project_media)
"/similarity/sync/#{get_type(project_media)}"
self.sync_path_for_type(get_type(project_media))
end

def sync_path_for_type(type)
"/similarity/sync/#{type}"
end

def async_path(project_media)
Expand Down Expand Up @@ -256,5 +260,12 @@ def get_similar_items_v2(project_media, field)
def relate_project_media(project_media, field=nil)
self.add_relationships(project_media, self.get_similar_items_v2(project_media, field)) unless project_media.is_blank?
end

def get_items_with_similar_media_v2(media_url, threshold, team_ids, type)
alegre_path = ['audio', 'image'].include?(type) ? self.sync_path_for_type(type) : "/#{type}/similarity/search/"
# FIXME: Stop using this method from v1 once all media types are supported by v2
# FIXME: Alegre crashes if `media_url` was already requested before, this is why I append a hash
self.get_items_with_similar_media("#{media_url}?hash=#{SecureRandom.hex}", threshold, team_ids, alegre_path)
end
end
end
5 changes: 3 additions & 2 deletions app/models/concerns/smooch_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def get_search_results(uid, last_message, team_id, language)
type = message['type']
after = self.date_filter(team_id)
query = message['text']
query = message['mediaUrl'] unless type == 'text'
query = CheckS3.rewrite_url(message['mediaUrl']) unless type == 'text'
results = self.search_for_similar_published_fact_checks(type, query, [team_id], after, nil, language).select{ |pm| is_a_valid_search_result(pm) }
rescue StandardError => e
self.handle_search_error(uid, e, language)
Expand Down Expand Up @@ -161,10 +161,11 @@ def search_for_similar_published_fact_checks_no_cache(type, query, team_ids, aft
end
else
media_url = Twitter::TwitterText::Extractor.extract_urls(query)[0]
Rails.logger.info "[Smooch Bot] Got media_url #{media_url} from query #{query}"
return [] if media_url.blank?
media_url = self.save_locally_and_return_url(media_url, type, feed_id)
threshold = Bot::Alegre.get_threshold_for_query(type, pm)[0][:value]
alegre_results = Bot::Alegre.get_items_with_similar_media(media_url, [{ value: threshold }], team_ids, "/#{type}/similarity/search/")
alegre_results = Bot::Alegre.get_items_with_similar_media_v2(media_url, [{ value: threshold }], team_ids, type)
results = self.parse_search_results_from_alegre(alegre_results, after, feed_id, team_ids)
Rails.logger.info "[Smooch Bot] Media similarity search got #{results.count} results while looking for '#{query}' after date #{after.inspect} for teams #{team_ids}"
end
Expand Down
2 changes: 1 addition & 1 deletion app/resources/api/v2/report_resource.rb
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def self.apply_media_similarity_filter(organization_ids, threshold, media_path,
unless media.blank?
media[0].rewind
CheckS3.write(media_path, media[0].content_type.gsub(/^video/, 'application'), media[0].read)
ids_and_scores = Bot::Alegre.get_items_with_similar_media(CheckS3.public_url(media_path), [{ value: threshold }], organization_ids, "/#{media_type}/similarity/search/")
ids_and_scores = Bot::Alegre.get_items_with_similar_media_v2(CheckS3.public_url(media_path), [{ value: threshold }], organization_ids, media_type)
RequestStore.store[:scores] = ids_and_scores # Store the scores so we can return them
ids = ids_and_scores.keys.uniq || [0]
CheckS3.delete(media_path)
Expand Down
2 changes: 1 addition & 1 deletion lib/check_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def alegre_file_similar_items
file_path = "check_search/#{hash}"
end
threshold = Bot::Alegre.get_threshold_for_query(@options['file_type'], ProjectMedia.new(team_id: Team.current.id))[0][:value]
results = Bot::Alegre.get_items_with_similar_media(CheckS3.public_url(file_path), [{ value: threshold }], @options['team_id'].first, "/#{@options['file_type']}/similarity/search/")
results = Bot::Alegre.get_items_with_similar_media_v2(CheckS3.public_url(file_path), [{ value: threshold }], @options['team_id'].first, @options['file_type'])
results.blank? ? [0] : results.keys
end

Expand Down
Loading
Loading