Skip to content

Commit

Permalink
CV2-6038: add rake task to migrate existing FactCheck (#2221)
Browse files Browse the repository at this point in the history
* CV2-6038: add rake task to migrate existing FactCheck

* CV2-6038: add ProjectMedia join
  • Loading branch information
melsawy authored Feb 16, 2025
1 parent 1f27910 commit ac7403d
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 16 deletions.
2 changes: 1 addition & 1 deletion app/models/claim_description.rb
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def update_report
# clear claim_description fields
data = { 'claim_description_content' => nil, 'claim_description_context' => nil }
# clear fact-check values
data.merge!({ 'fact_check_title' => '', 'fact_check_summary' => '', 'fact_check_url' => '', 'fact_check_languages' => [] }) unless self.fact_check.nil?
data.merge!({ 'fact_check_title' => nil, 'fact_check_summary' => nil, 'fact_check_url' => nil, 'fact_check_languages' => [] }) unless self.fact_check.nil?
self.index_in_elasticsearch(pm.id, data)
end
end
Expand Down
1 change: 1 addition & 0 deletions app/models/concerns/team_associations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ module TeamAssociations
has_many :tipline_newsletters
has_many :tipline_requests, as: :associated
has_many :explainers, dependent: :destroy
has_many :claim_descriptions
has_many :api_keys

has_annotations
Expand Down
6 changes: 3 additions & 3 deletions app/models/fact_check.rb
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@ def update_report
def article_elasticsearch_data(action = 'create_or_update')
return if self.project_media.nil? || self.disable_es_callbacks || RequestStore.store[:disable_es_callbacks]
data = action == 'destroy' ? {
'fact_check_title' => '',
'fact_check_summary' => '',
'fact_check_url' => '',
'fact_check_title' => nil,
'fact_check_summary' => nil,
'fact_check_url' => nil,
'fact_check_languages' => []
} : {
'fact_check_title' => self.title,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,32 +1,38 @@
namespace :check do
namespace :migrate do
task index_fc_url_and_cd_context_search_fields: :environment do
task index_fact_check_fields: :environment do
# This rake task to index the following fields
# 1) claim_description_context
# 2) fact_check_url
# 1) claim_description [content, context]
# 2) fact_check [title, summary, url, language]
started = Time.now.to_i
index_alias = CheckElasticSearchModel.get_index_alias
client = $repository.client
last_team_id = Rails.cache.read('check:migrate:index_new_search_fields:team_id') || 0
last_team_id = Rails.cache.read('check:migrate:index_fact_check_fields:team_id') || 0
Team.where('id > ?', last_team_id).find_each do |team|
team.project_medias.find_in_batches(:batch_size => 1000) do |pms|
team.claim_descriptions.joins(:project_media).find_in_batches(:batch_size => 1000) do |cds|
es_body = []
ids = pms.map(&:id)
ProjectMedia.select('project_medias.id as id, fc.url as url, cd.context as context')
ids = cds.map(&:id)
ClaimDescription.select('claim_descriptions.project_media_id as pm_id, claim_descriptions.description, claim_descriptions.context, fact_checks.*')
.where(id: ids)
.joins("INNER JOIN claim_descriptions cd ON project_medias.id = cd.project_media_id")
.joins("INNER JOIN fact_checks fc ON cd.id = fc.claim_description_id")
.joins(:fact_check)
.find_in_batches(:batch_size => 1000) do |items|
print '.'
items.each do |item|
doc_id = Base64.encode64("ProjectMedia/#{item['id']}")
fields = { 'fact_check_url' => item['url'], 'claim_description_context' => item['context'] }
doc_id = Base64.encode64("ProjectMedia/#{item['pm_id']}")
fields = {
'claim_description_content' => item['description'],
'claim_description_context' => item['context'],
'fact_check_title' => item['title'],
'fact_check_summary' => item['summary'],
'fact_check_url' => item['url'],
'fact_check_languages' => [item['language']]
}
es_body << { update: { _index: index_alias, _id: doc_id, retry_on_conflict: 3, data: { doc: fields } } }
end
end
client.bulk body: es_body unless es_body.blank?
end
Rails.cache.write('check:migrate:index_new_search_fields:team_id', team.id)
Rails.cache.write('check:migrate:index_fact_check_fields:team_id', team.id)
end
minutes = ((Time.now.to_i - started) / 60).to_i
puts "[#{Time.now}] Done in #{minutes} minutes."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,33 @@ namespace :check do
minutes = ((Time.now.to_i - started) / 60).to_i
puts "[#{Time.now}] Done in #{minutes} minutes."
end

task project_media_initiate_fact_check: :environment do
started = Time.now.to_i
Team.find_each do |team|
params = {
claim_description_content:nil,
claim_description_context:nil,
fact_check_title:nil,
fact_check_summary:nil,
fact_check_url:nil,
fact_check_languages:[]
}
options = {
index: CheckElasticSearchModel.get_index_alias,
conflicts: 'proceed',
body: {
script: {
source: "ctx._source.claim_description_content = params.claim_description_content;ctx._source.claim_description_context = params.claim_description_context;ctx._source.fact_check_title=params.fact_check_title;ctx._source.fact_check_summary=params.fact_check_summary;ctx._source.fact_check_url=params.fact_check_url;ctx._source.fact_check_languages=params.fact_check_languages",
params: params
},
query: { term: { team_id: { value: team.id } } }
}
}
$repository.client.update_by_query options
end
minutes = ((Time.now.to_i - started) / 60).to_i
puts "[#{Time.now}] Done in #{minutes} minutes."
end
end
end

0 comments on commit ac7403d

Please sign in to comment.