diff --git a/app/graph/types/team_type.rb b/app/graph/types/team_type.rb index c755c4201..88083f823 100644 --- a/app/graph/types/team_type.rb +++ b/app/graph/types/team_type.rb @@ -413,7 +413,15 @@ def statistics(period:, language: nil, platform: nil) def bot_query(search_text:, threshold: nil, max_number_of_words: nil, enable_language_detection: nil, should_restrict_by_language: nil, enable_link_shortening: nil, utm_code: nil) return nil unless User.current&.is_admin # Feature flag - results = object.search_for_similar_articles(search_text) + settings = { + threshold: threshold, + max_number_of_words: max_number_of_words, + enable_language_detection: enable_language_detection, + should_restrict_by_language: should_restrict_by_language, + enable_link_shortening: enable_link_shortening, + utm_code: utm_code + }.with_indifferent_access + results = object.search_for_similar_articles(search_text, nil, settings) results.map(&:as_tipline_search_result) end end diff --git a/app/models/concerns/smooch_search.rb b/app/models/concerns/smooch_search.rb index c82bf7131..7681cc08b 100644 --- a/app/models/concerns/smooch_search.rb +++ b/app/models/concerns/smooch_search.rb @@ -160,9 +160,20 @@ def search_for_similar_published_fact_checks(type, query, team_ids, limit, after end end + def get_setting_value_or_default(setting_name, custom_settings) + custom_value = custom_settings.to_h.with_indifferent_access[setting_name] + return custom_value unless custom_value.nil? + case setting_name.to_sym + when :threshold + self.get_text_similarity_threshold + else + nil + end + end + # "type" is text, video, audio or image # "query" is either a piece of text of a media URL - def search_for_similar_published_fact_checks_no_cache(type, query, team_ids, limit, after = nil, feed_id = nil, language = nil, published_only = true) + def search_for_similar_published_fact_checks_no_cache(type, query, team_ids, limit, after = nil, feed_id = nil, language = nil, published_only = true, settings = nil) results = [] pm = nil pm = ProjectMedia.new(team_id: team_ids[0]) if team_ids.size == 1 # We'll use the settings of a team instead of global settings when there is only one team @@ -183,7 +194,7 @@ def search_for_similar_published_fact_checks_no_cache(type, query, team_ids, lim if Bot::Alegre.get_number_of_words(text) <= self.max_number_of_words_for_keyword_search results = self.search_by_keywords_for_similar_published_fact_checks(words, after, team_ids, limit, feed_id, language, published_only) else - alegre_results = Bot::Alegre.get_merged_similar_items(pm, [{ value: self.get_text_similarity_threshold }], Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS, text, team_ids) + alegre_results = Bot::Alegre.get_merged_similar_items(pm, [{ value: self.get_setting_value_or_default('threshold', settings) }], Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS, text, team_ids) results = self.parse_search_results_from_alegre(alegre_results, limit, published_only, after, feed_id, team_ids) Rails.logger.info "[Smooch Bot] Text similarity search got #{results.count} results while looking for '#{text}' after date #{after.inspect} for teams #{team_ids}" end @@ -310,7 +321,7 @@ def ask_for_feedback_when_all_search_results_are_received(app_id, language, work end end - def search_for_explainers(uid, query, team_id, limit, language = nil) + def search_for_explainers(uid, query, team_id, limit, language = nil, settings = nil) results = nil begin text = ::Bot::Smooch.extract_claim(query) @@ -319,7 +330,7 @@ def search_for_explainers(uid, query, team_id, limit, language = nil) results = results.where(language: language) if !language.nil? && should_restrict_by_language?([team_id]) results = results.order('updated_at DESC') else - results = Explainer.search_by_similarity(text, language, team_id, limit) + results = Explainer.search_by_similarity(text, language, team_id, limit, settings.to_h.with_indifferent_access[:threshold]) end rescue StandardError => e self.handle_search_error(uid, e, language) unless uid.blank? diff --git a/app/models/explainer.rb b/app/models/explainer.rb index 96e81be93..da16eeeb2 100644 --- a/app/models/explainer.rb +++ b/app/models/explainer.rb @@ -103,8 +103,9 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp) end end - def self.search_by_similarity(text, language, team_id, limit) + def self.search_by_similarity(text, language, team_id, limit, custom_threshold = nil) models_thresholds = Explainer.get_alegre_models_and_thresholds(team_id) + models_thresholds.each { |model, _threshold| models_thresholds[model] = custom_threshold } unless custom_threshold.blank? context = { type: 'explainer', team_id: team_id @@ -116,7 +117,7 @@ def self.search_by_similarity(text, language, team_id, limit) per_model_threshold: models_thresholds, context: context } - response = Bot::Alegre.query_sync_with_params(params, "text") + response = Bot::Alegre.query_sync_with_params(params, 'text') results = response['result'].to_a.sort_by{ |result| [result['model'] != Bot::Alegre::ELASTICSEARCH_MODEL ? 1 : 0, result['_score']] }.reverse explainer_ids = results.collect{ |result| result.dig('context', 'explainer_id').to_i }.uniq.first(limit) explainer_ids.empty? ? Explainer.none : Explainer.where(team_id: team_id, id: explainer_ids) diff --git a/app/models/team.rb b/app/models/team.rb index f5519b8ee..6102807f7 100644 --- a/app/models/team.rb +++ b/app/models/team.rb @@ -563,15 +563,16 @@ def filter_by_keywords(query, filters, type = 'FactCheck') query.where(Arel.sql("#{tsvector} @@ #{tsquery}")) end - def search_for_similar_articles(query, pm = nil) + def search_for_similar_articles(query, pm = nil, settings = nil) # query: expected to be text # pm: to request a most relevant to specific item and also include both FactCheck & Explainer limit = pm.nil? ? CheckConfig.get('most_relevant_team_limit', 3, :integer) : CheckConfig.get('most_relevant_item_limit', 10, :integer) threads = [] fc_items = [] ex_items = [] - threads << Thread.new { - result_ids = Bot::Smooch.search_for_similar_published_fact_checks_no_cache('text', query, [self.id], limit, nil, nil, nil, false).map(&:id) + # FIXME: Threads approach not working locally - requests from GraphiQL hang forever. + # threads << Thread.new { + result_ids = Bot::Smooch.search_for_similar_published_fact_checks_no_cache('text', query, [self.id], limit, nil, nil, nil, false, settings).map(&:id) unless result_ids.blank? fc_items = FactCheck.joins(claim_description: :project_media).where('project_medias.id': result_ids) if pm.nil? @@ -583,13 +584,13 @@ def search_for_similar_articles(query, pm = nil) fc_items = fc_items.where.not('fact_checks.id' => pm.fact_check_id) unless pm&.fact_check_id.nil? end end - } - threads << Thread.new { - ex_items = Bot::Smooch.search_for_explainers(nil, query, self.id, limit).distinct + # } + # threads << Thread.new { + ex_items = Bot::Smooch.search_for_explainers(nil, query, self.id, limit, nil, settings).distinct # Exclude the ones already applied to a target item ex_items = ex_items.where.not(id: pm.explainer_ids) unless pm&.explainer_ids.blank? - } - threads.map(&:join) + # } + # threads.map(&:join) items = fc_items # Get Explainers if no fact-check returned or get similar_articles for a ProjectMedia items += ex_items if items.blank? || !pm.nil?