Skip to content

Commit

Permalink
[WIP] Ticket CV2-5919: Implementing logic for argument threshold (f…
Browse files Browse the repository at this point in the history
…or both explainers and fact-checks)
  • Loading branch information
caiosba committed Jan 19, 2025
1 parent 421c426 commit a49943f
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 15 deletions.
10 changes: 9 additions & 1 deletion app/graph/types/team_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,15 @@ def statistics(period:, language: nil, platform: nil)
def bot_query(search_text:, threshold: nil, max_number_of_words: nil, enable_language_detection: nil, should_restrict_by_language: nil, enable_link_shortening: nil, utm_code: nil)
return nil unless User.current&.is_admin # Feature flag

results = object.search_for_similar_articles(search_text)
settings = {
threshold: threshold,
max_number_of_words: max_number_of_words,
enable_language_detection: enable_language_detection,
should_restrict_by_language: should_restrict_by_language,
enable_link_shortening: enable_link_shortening,
utm_code: utm_code
}.with_indifferent_access
results = object.search_for_similar_articles(search_text, nil, settings)
results.map(&:as_tipline_search_result)
end
end
19 changes: 15 additions & 4 deletions app/models/concerns/smooch_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,20 @@ def search_for_similar_published_fact_checks(type, query, team_ids, limit, after
end
end

def get_setting_value_or_default(setting_name, custom_settings)
custom_value = custom_settings.to_h.with_indifferent_access[setting_name]
return custom_value unless custom_value.nil?
case setting_name.to_sym
when :threshold
self.get_text_similarity_threshold
else
nil
end
end

# "type" is text, video, audio or image
# "query" is either a piece of text of a media URL
def search_for_similar_published_fact_checks_no_cache(type, query, team_ids, limit, after = nil, feed_id = nil, language = nil, published_only = true)
def search_for_similar_published_fact_checks_no_cache(type, query, team_ids, limit, after = nil, feed_id = nil, language = nil, published_only = true, settings = nil)
results = []
pm = nil
pm = ProjectMedia.new(team_id: team_ids[0]) if team_ids.size == 1 # We'll use the settings of a team instead of global settings when there is only one team
Expand All @@ -183,7 +194,7 @@ def search_for_similar_published_fact_checks_no_cache(type, query, team_ids, lim
if Bot::Alegre.get_number_of_words(text) <= self.max_number_of_words_for_keyword_search
results = self.search_by_keywords_for_similar_published_fact_checks(words, after, team_ids, limit, feed_id, language, published_only)
else
alegre_results = Bot::Alegre.get_merged_similar_items(pm, [{ value: self.get_text_similarity_threshold }], Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS, text, team_ids)
alegre_results = Bot::Alegre.get_merged_similar_items(pm, [{ value: self.get_setting_value_or_default('threshold', settings) }], Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS, text, team_ids)
results = self.parse_search_results_from_alegre(alegre_results, limit, published_only, after, feed_id, team_ids)
Rails.logger.info "[Smooch Bot] Text similarity search got #{results.count} results while looking for '#{text}' after date #{after.inspect} for teams #{team_ids}"
end
Expand Down Expand Up @@ -310,7 +321,7 @@ def ask_for_feedback_when_all_search_results_are_received(app_id, language, work
end
end

def search_for_explainers(uid, query, team_id, limit, language = nil)
def search_for_explainers(uid, query, team_id, limit, language = nil, settings = nil)
results = nil
begin
text = ::Bot::Smooch.extract_claim(query)
Expand All @@ -319,7 +330,7 @@ def search_for_explainers(uid, query, team_id, limit, language = nil)
results = results.where(language: language) if !language.nil? && should_restrict_by_language?([team_id])
results = results.order('updated_at DESC')
else
results = Explainer.search_by_similarity(text, language, team_id, limit)
results = Explainer.search_by_similarity(text, language, team_id, limit, settings.to_h.with_indifferent_access[:threshold])
end
rescue StandardError => e
self.handle_search_error(uid, e, language) unless uid.blank?
Expand Down
5 changes: 3 additions & 2 deletions app/models/explainer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,9 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp)
end
end

def self.search_by_similarity(text, language, team_id, limit)
def self.search_by_similarity(text, language, team_id, limit, custom_threshold = nil)
models_thresholds = Explainer.get_alegre_models_and_thresholds(team_id)
models_thresholds.each { |model, _threshold| models_thresholds[model] = custom_threshold } unless custom_threshold.blank?
context = {
type: 'explainer',
team_id: team_id
Expand All @@ -116,7 +117,7 @@ def self.search_by_similarity(text, language, team_id, limit)
per_model_threshold: models_thresholds,
context: context
}
response = Bot::Alegre.query_sync_with_params(params, "text")
response = Bot::Alegre.query_sync_with_params(params, 'text')
results = response['result'].to_a.sort_by{ |result| [result['model'] != Bot::Alegre::ELASTICSEARCH_MODEL ? 1 : 0, result['_score']] }.reverse
explainer_ids = results.collect{ |result| result.dig('context', 'explainer_id').to_i }.uniq.first(limit)
explainer_ids.empty? ? Explainer.none : Explainer.where(team_id: team_id, id: explainer_ids)
Expand Down
17 changes: 9 additions & 8 deletions app/models/team.rb
Original file line number Diff line number Diff line change
Expand Up @@ -563,15 +563,16 @@ def filter_by_keywords(query, filters, type = 'FactCheck')
query.where(Arel.sql("#{tsvector} @@ #{tsquery}"))
end

def search_for_similar_articles(query, pm = nil)
def search_for_similar_articles(query, pm = nil, settings = nil)
# query: expected to be text
# pm: to request a most relevant to specific item and also include both FactCheck & Explainer
limit = pm.nil? ? CheckConfig.get('most_relevant_team_limit', 3, :integer) : CheckConfig.get('most_relevant_item_limit', 10, :integer)
threads = []
fc_items = []
ex_items = []
threads << Thread.new {
result_ids = Bot::Smooch.search_for_similar_published_fact_checks_no_cache('text', query, [self.id], limit, nil, nil, nil, false).map(&:id)
# FIXME: Threads approach not working locally - requests from GraphiQL hang forever.
# threads << Thread.new {
result_ids = Bot::Smooch.search_for_similar_published_fact_checks_no_cache('text', query, [self.id], limit, nil, nil, nil, false, settings).map(&:id)
unless result_ids.blank?
fc_items = FactCheck.joins(claim_description: :project_media).where('project_medias.id': result_ids)
if pm.nil?
Expand All @@ -583,13 +584,13 @@ def search_for_similar_articles(query, pm = nil)
fc_items = fc_items.where.not('fact_checks.id' => pm.fact_check_id) unless pm&.fact_check_id.nil?
end
end
}
threads << Thread.new {
ex_items = Bot::Smooch.search_for_explainers(nil, query, self.id, limit).distinct
# }
# threads << Thread.new {
ex_items = Bot::Smooch.search_for_explainers(nil, query, self.id, limit, nil, settings).distinct
# Exclude the ones already applied to a target item
ex_items = ex_items.where.not(id: pm.explainer_ids) unless pm&.explainer_ids.blank?
}
threads.map(&:join)
# }
# threads.map(&:join)
items = fc_items
# Get Explainers if no fact-check returned or get similar_articles for a ProjectMedia
items += ex_items if items.blank? || !pm.nil?
Expand Down

0 comments on commit a49943f

Please sign in to comment.