Skip to content

Commit

Permalink
CV2-6038 refactor has claim search filter into has article filter (#2205
Browse files Browse the repository at this point in the history
)

* CV2-6038: include explainer_title to has_article filter

* CV2-6038: set value with nil instead of remove field from ES doc

* CV2-6038: fix tests

* CV2-6038: search by explainer_title field

* CV2-6038: add rake task to migrate exisintg explainers

* CV2-6038: use a single query for explainer titles

* CV2-6038: apply PR comments
  • Loading branch information
melsawy authored Feb 11, 2025
1 parent d84d284 commit 0e95c0e
Show file tree
Hide file tree
Showing 13 changed files with 151 additions and 88 deletions.
26 changes: 5 additions & 21 deletions app/lib/check_elastic_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,6 @@ def update_elasticsearch_doc(keys, data = {}, pm_id = nil, skip_get_data = false
ElasticSearchWorker.perform_in(1.second, YAML::dump(model), YAML::dump(options), 'update_doc')
end

def remove_fields_from_elasticsearch_doc(keys, pm_id)
return if self.disable_es_callbacks || RequestStore.store[:disable_es_callbacks]
options = { keys: keys, pm_id: pm_id }
model = { klass: self.class.name, id: self.id }
ElasticSearchWorker.perform_in(1.second, YAML::dump(model), YAML::dump(options), 'remove_fields')
end

def update_recent_activity(obj)
# update `updated_at` date for both PG & ES
updated_at = Time.now
Expand All @@ -56,13 +49,11 @@ def update_elasticsearch_doc_bg(options)
data = get_elasticsearch_data(options[:data], options[:skip_get_data])
fields = {}
options[:keys].each do |k|
unless data[k].nil?
if data[k].class.to_s == 'Hash'
value = get_fresh_value(data[k].with_indifferent_access)
fields[k] = value unless value.nil?
else
fields[k] = data[k]
end
if data[k].class.to_s == 'Hash'
value = get_fresh_value(data[k].with_indifferent_access)
fields[k] = value
else
fields[k] = data[k]
end
end
if fields.count
Expand All @@ -72,13 +63,6 @@ def update_elasticsearch_doc_bg(options)
end
end

def remove_fields_from_elasticsearch_doc_bg(options)
options[:keys].each do |k|
source = "ctx._source.remove('#{k}')"
$repository.client.update index: CheckElasticSearchModel.get_index_alias, id: options[:doc_id], body: { script: { source: source } }
end
end

# Get a fresh data based on data(Hash)
def get_fresh_value(data)
value = data['default']
Expand Down
22 changes: 0 additions & 22 deletions app/models/annotations/embed.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ class Embed < Dynamic
end

Dynamic.class_eval do
after_commit :update_elasticsearch_metadata, on: [:create, :update], if: proc { |d| ['metadata', 'verification_status'].include?(d.annotation_type) }

def title=(title)
self.set_metadata_field('title', title)
end
Expand Down Expand Up @@ -36,26 +34,6 @@ def get_metadata_field(field)
data[field.to_s]
end

def update_elasticsearch_metadata
unless self.annotated.nil?
keys = %w(title description)
if self.annotated_type == 'ProjectMedia'
self.update_es_metadata_pm_annotation(keys, self.annotated)
elsif self.annotated_type == 'Media' && self.annotated.type == 'Link'
self.annotated.project_medias.each do |pm|
m = pm.get_annotations('metadata').last
self.update_elasticsearch_doc(keys, { 'title' => pm.title, 'description' => pm.description }, pm.id) if m.nil?
end
end
end
end

def update_es_metadata_pm_annotation(keys, pm)
data = {}
keys.each { |k| data[k] = self.send(k) }
self.update_elasticsearch_doc(keys, data, pm.id)
end

def metadata_for_registration_account(data)
return nil unless self.annotation_type == 'metadata'
unless data.nil?
Expand Down
26 changes: 13 additions & 13 deletions app/models/claim_description.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,14 @@ def text_fields

def article_elasticsearch_data(action = 'create_or_update')
return if self.project_media_id.nil? || self.disable_es_callbacks || RequestStore.store[:disable_es_callbacks]
if action == 'destroy'
self.remove_fields_from_elasticsearch_doc(['claim_description_content', 'claim_description_context'], self.project_media_id)
else
data = { 'claim_description_content' => self.description, 'claim_description_context' => self.context }
self.index_in_elasticsearch(self.project_media_id, data)
end
data = action == 'destroy' ? {
'claim_description_content' => nil,
'claim_description_context' => nil
} : {
'claim_description_content' => self.description,
'claim_description_context' => self.context
}
self.index_in_elasticsearch(self.project_media_id, data)
end

def project_media_was
Expand Down Expand Up @@ -90,13 +92,11 @@ def update_report
fact_check.save!
end
# update ES
# Remove claim_description fields
self.remove_fields_from_elasticsearch_doc(['claim_description_content', 'claim_description_context'], pm.id)
# clear claim_description fields
data = { 'claim_description_content' => nil, 'claim_description_context' => nil }
# clear fact-check values
unless self.fact_check.nil?
data = { 'fact_check_title' => '', 'fact_check_summary' => '', 'fact_check_url' => '', 'fact_check_languages' => [] }
self.fact_check.index_in_elasticsearch(pm.id, data)
end
data.merge!({ 'fact_check_title' => '', 'fact_check_summary' => '', 'fact_check_url' => '', 'fact_check_languages' => [] }) unless self.fact_check.nil?
self.index_in_elasticsearch(pm.id, data)
end
end

Expand Down Expand Up @@ -128,7 +128,7 @@ def migrate_claim_and_fact_check_logs

def log_relevant_article_results
fc = self.fact_check
self.project_media.delay.log_relevant_results(fc.class.name, fc.id, User.current&.id, self.class.actor_session_id)
self.project_media.delay.log_relevant_results(fc.class.name, fc.id, User.current&.id, self.class.actor_session_id) unless fc.nil?
end

def cant_apply_article_to_item_if_article_is_in_the_trash
Expand Down
1 change: 1 addition & 0 deletions app/models/concerns/project_media_cached_fields.rb
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def title_or_description_update
update_on: FACT_CHECK_EVENTS

cached_field :description,
update_es: true,
recalculate: :recalculate_description,
update_on: title_or_description_update

Expand Down
6 changes: 6 additions & 0 deletions app/models/concerns/project_media_getters.rb
Original file line number Diff line number Diff line change
Expand Up @@ -213,4 +213,10 @@ def team_avatar
def fact_check
self.claim_description&.fact_check
end

def explainers_titles
# Get the title for all explainer assigned to the item
titles = Explainer.joins(:explainer_items).where('explainer_items.project_media_id = ?', self.id).map(&:title).join("\n")
titles.blank? ? nil : titles
end
end
19 changes: 19 additions & 0 deletions app/models/explainer_item.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class ExplainerItem < ApplicationRecord
validate :cant_apply_article_to_item_if_article_is_in_the_trash

after_create :log_relevant_article_results
after_commit :update_elasticsearch_data

def version_metadata(_changes)
{ explainer_title: self.explainer.title }.to_json
Expand All @@ -25,6 +26,24 @@ def cant_apply_article_to_item_if_article_is_in_the_trash
errors.add(:base, I18n.t(:cant_apply_article_to_item_if_article_is_in_the_trash)) if self.explainer&.trashed
end

def update_elasticsearch_data
return if self.disable_es_callbacks || RequestStore.store[:disable_es_callbacks]
pm = self.project_media
# touch item to update `updated_at` date
if ProjectMedia.exists?(pm.id)
updated_at = Time.now
pm.update_columns(updated_at: updated_at)
data = { updated_at: updated_at.utc }
data['explainer_title'] = {
method: "explainers_titles",
klass: pm.class.name,
id: pm.id,
default: nil,
}
pm.update_elasticsearch_doc(data.keys, data, pm.id, true)
end
end

def log_relevant_article_results
ex = self.explainer
self.project_media.delay.log_relevant_results(ex.class.name, ex.id, User.current&.id, self.class.actor_session_id)
Expand Down
2 changes: 2 additions & 0 deletions app/repositories/media_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -132,5 +132,7 @@ class MediaSearch
indexes :negative_tipline_search_results_count, { type: 'long' }

indexes :tipline_search_results_count, { type: 'long' }

indexes :explainer_title, { type: 'text', analyzer: 'check' }
end
end
1 change: 0 additions & 1 deletion app/workers/elastic_search_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def perform(model_data, options, type)
ops = {
'create_doc' => 'create_elasticsearch_doc_bg',
'update_doc' => 'update_elasticsearch_doc_bg',
'remove_fields' => 'remove_fields_from_elasticsearch_doc_bg',
'update_doc_team' => 'update_elasticsearch_doc_team_bg',
'create_update_doc_nested' => 'create_update_nested_obj_bg',
'destroy_doc' => 'destroy_elasticsearch_doc',
Expand Down
13 changes: 13 additions & 0 deletions db/migrate/20250205224319_add_mapping_for_explainer_title_field.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class AddMappingForExplainerTitleField < ActiveRecord::Migration[6.1]
def change
options = {
index: CheckElasticSearchModel.get_index_alias,
body: {
properties: {
explainer_title: { type: 'text', analyzer: 'check' },
}
}
}
$repository.client.indices.put_mapping options
end
end
26 changes: 15 additions & 11 deletions lib/check_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def should_hit_elasticsearch?
end
filters_blank = true
['tags', 'keyword', 'language', 'fc_language', 'request_language', 'report_language', 'team_tasks', 'assigned_to', 'report_status', 'range_numeric',
'has_claim', 'cluster_teams', 'published_by', 'annotated_by', 'channels', 'cluster_published_reports'
'has_article', 'cluster_teams', 'published_by', 'annotated_by', 'channels', 'cluster_published_reports'
].each do |filter|
filters_blank = false unless @options[filter].blank?
end
Expand Down Expand Up @@ -309,7 +309,7 @@ def build_es_medias_query
custom_conditions.concat integer_terms_query('channel', 'channels')
custom_conditions.concat integer_terms_query('source_id', 'sources')
custom_conditions.concat doc_conditions
custom_conditions.concat has_claim_conditions
custom_conditions.concat has_article_conditions
custom_conditions.concat file_filter
custom_conditions.concat range_filter(:es)
custom_conditions.concat numeric_range_filter
Expand Down Expand Up @@ -469,8 +469,7 @@ def keyword_conditions
return [] if @options["keyword"].blank? || @options["keyword"].class.name != 'String'
set_keyword_fields
keyword_c = []
field_conditions = build_keyword_conditions_media_fields
keyword_c.concat field_conditions
keyword_c.concat build_keyword_conditions_media_fields
# Search in requests
[['request_username', 'username'], ['request_identifier', 'identifier'], ['request_content', 'content']].each do |pair|
keyword_c << {
Expand All @@ -497,7 +496,7 @@ def set_keyword_fields
def build_keyword_conditions_media_fields
es_fields = []
conditions = []
%w(title description url claim_description_content fact_check_title fact_check_summary claim_description_context fact_check_url source_name).each do |f|
%w(title description url claim_description_content fact_check_title fact_check_summary claim_description_context fact_check_url source_name explainer_title).each do |f|
es_fields << f if should_include_keyword_field?(f)
end
es_fields << 'analysis_title' if should_include_keyword_field?('title')
Expand Down Expand Up @@ -530,13 +529,18 @@ def request_language_conditions
[{ nested: { path: 'requests', query: { terms: { 'requests.language': @options['request_language'] } } } }]
end

def has_claim_conditions
def has_article_conditions
conditions = []
return conditions unless @options.has_key?('has_claim')
if @options['has_claim'].include?('NO_VALUE')
conditions << { bool: { must_not: [ { exists: { field: 'claim_description_content' } } ] } }
elsif @options['has_claim'].include?('ANY_VALUE')
conditions << { exists: { field: 'claim_description_content' } }
return conditions unless @options.has_key?('has_article')
# Build a condidtion with fields that define the item has_article
has_article_c = []
['claim_description_content', 'explainer_title'].each do |field|
has_article_c << { exists: { field: field } }
end
if @options['has_article'].include?('NO_VALUE')
conditions << { bool: { must_not: has_article_c } }
elsif @options['has_article'].include?('ANY_VALUE')
conditions << { bool: { should: has_article_c } }
end
conditions
end
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
namespace :check do
namespace :migrate do
task project_media_explainer_title: :environment do
started = Time.now.to_i
index_alias = CheckElasticSearchModel.get_index_alias
skip_pmids = []
es_body = []
ExplainerItem.find_each do |raw|
next if skip_pmids.include?(raw.project_media_id)
pm = raw.project_media
doc_id = Base64.encode64("ProjectMedia/#{pm.id}")
fields = { 'explainer_title' => pm.explainers_titles }
es_body << { update: { _index: index_alias, _id: doc_id, retry_on_conflict: 3, data: { doc: fields } } }
skip_pmids << pm.id
end
$repository.client.bulk body: es_body unless es_body.blank?
minutes = ((Time.now.to_i - started) / 60).to_i
puts "[#{Time.now}] Done in #{minutes} minutes."
end
end
end
20 changes: 9 additions & 11 deletions test/controllers/elastic_search_7_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,10 @@ def setup
assert_equal [pm.id, pm2.id], result.medias.map(&:id).sort
end

test "should filter by keyword and tite description tags and notes fields fields" do
test "should filter by keyword and field settings(tite description tags)" do
RequestStore.store[:skip_cached_field_update] = false
create_verification_status_stuff(false)
t = create_team
p = create_project team: t
pender_url = CheckConfig.get('pender_url_private') + '/api/medias'
url = 'http://test.com'
response = '{"type":"media","data":{"url":"' + url + '/normalized","type":"item", "title": "search_title", "description":"search_desc"}}'
Expand All @@ -183,20 +183,20 @@ def setup
WebMock.stub_request(:get, pender_url).with({ query: { url: url2 } }).to_return(body: response)
m = create_media(account: create_valid_account, url: url)
m1 = create_media(account: create_valid_account, url: url2)
pm = create_project_media project: p, media: m, disable_es_callbacks: false
pm2 = create_project_media project: p, media: m1, disable_es_callbacks: false
pm = create_project_media team: t, media: m, disable_es_callbacks: false
pm2 = create_project_media team: t, media: m1, disable_es_callbacks: false
# add analysis to pm2
pm2.analysis = { title: 'override_title', content: 'override_description' }
# add tags to pm3
pm3 = create_project_media project: p, disable_es_callbacks: false
pm3 = create_project_media team: t, disable_es_callbacks: false
create_tag tag: 'search_title', annotated: pm3, disable_es_callbacks: false
create_tag tag: 'another_desc', annotated: pm3, disable_es_callbacks: false
create_tag tag: 'newtag', annotated: pm3, disable_es_callbacks: false
sleep 2
sleep 1
result = CheckSearch.new({keyword: 'search_title'}.to_json, nil, t.id)
assert_equal [pm.id, pm2.id, pm3.id], result.medias.map(&:id).sort
assert_equal [pm.id, pm3.id], result.medias.map(&:id).sort
result = CheckSearch.new({keyword: 'search_title', keyword_fields: {fields: ['title']}}.to_json, nil, t.id)
assert_equal [pm.id, pm2.id], result.medias.map(&:id).sort
assert_equal [pm.id], result.medias.map(&:id)
result = CheckSearch.new({keyword: 'search_desc', keyword_fields: {fields: ['description']}}.to_json, nil, t.id)
assert_equal [pm.id], result.medias.map(&:id)
result = CheckSearch.new({keyword: 'override_title', keyword_fields: {fields: ['title']}}.to_json, nil, t.id)
Expand All @@ -206,9 +206,7 @@ def setup
result = CheckSearch.new({keyword: 'search_title', keyword_fields: {fields: ['tags']}}.to_json, nil, t.id)
assert_equal [pm3.id], result.medias.map(&:id)
result = CheckSearch.new({keyword: 'another_desc', keyword_fields: {fields:['description', 'tags']}}.to_json, nil, t.id)
assert_equal [pm2.id, pm3.id], result.medias.map(&:id).sort
create_comment annotated: pm, text: 'item notepm', disable_es_callbacks: false
create_comment annotated: pm2, text: 'item comment', disable_es_callbacks: false
assert_equal [pm3.id], result.medias.map(&:id)
end

test "should search by media url" do
Expand Down
Loading

0 comments on commit 0e95c0e

Please sign in to comment.