Skip to content

Commit

Permalink
Merge branch 'develop' into feature/CV2-6069-rake-task-to-export-text…
Browse files Browse the repository at this point in the history
…-similarity-data
  • Loading branch information
caiosba committed Feb 12, 2025
2 parents 383726b + a7ccd92 commit b13d7f1
Show file tree
Hide file tree
Showing 24 changed files with 224 additions and 117 deletions.
6 changes: 3 additions & 3 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ GEM
rexml
crass (1.0.6)
daemons (1.4.1)
date (3.3.4)
date (3.4.1)
debugger-ruby_core_source (1.3.8)
declarative (0.0.20)
deep_cloneable (3.2.0)
Expand Down Expand Up @@ -436,7 +436,7 @@ GEM
net-http-digest_auth (1.4.1)
net-http-persistent (4.0.1)
connection_pool (~> 2.2)
net-imap (0.4.10)
net-imap (0.4.19)
date
net-protocol
net-pop (0.1.2)
Expand Down Expand Up @@ -831,7 +831,7 @@ GEM
rack (>= 1, < 3)
thor (1.3.1)
thread_safe (0.3.6)
timeout (0.4.1)
timeout (0.4.3)
tins (1.31.0)
sync
to_regexp (0.2.1)
Expand Down
3 changes: 2 additions & 1 deletion app/graph/types/query_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,12 @@ def me

def team(id: nil, slug: nil, random: nil)
tid = id.to_i
team = nil
unless slug.blank?
team = Team.where(slug: slug).first
tid = team.id unless team.nil?
end
team.reload if random
team.reload if team && random
tid = Team.current&.id || User.current&.teams&.first&.id if tid === 0
GraphqlCrudOperations.load_if_can(Team, tid.to_i, context)
end
Expand Down
2 changes: 2 additions & 0 deletions app/graph/types/team_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ def tipline_requests(from_timestamp:, to_timestamp:)
argument :tags, [GraphQL::Types::String, null: true], required: false, camelize: false
argument :language, [GraphQL::Types::String, null: true], required: false, camelize: false
argument :updated_at, GraphQL::Types::String, required: false, camelize: false # JSON
argument :created_at, GraphQL::Types::String, required: false, camelize: false # JSON
argument :text, GraphQL::Types::String, required: false, camelize: false # Search by text
argument :standalone, GraphQL::Types::Boolean, required: false, camelize: false # Not applied to any item (fact-checks only)
argument :publisher_ids, [GraphQL::Types::Int, null: true], required: false, camelize: false
Expand Down Expand Up @@ -347,6 +348,7 @@ def articles(**args)
argument :tags, [GraphQL::Types::String, null: true], required: false, camelize: false
argument :language, [GraphQL::Types::String, null: true], required: false, camelize: false
argument :updated_at, GraphQL::Types::String, required: false, camelize: false # JSON
argument :created_at, GraphQL::Types::String, required: false, camelize: false # JSON
argument :text, GraphQL::Types::String, required: false, camelize: false # Search by text
argument :standalone, GraphQL::Types::Boolean, required: false, camelize: false # Not applied to any item (fact-checks only)
argument :publisher_ids, [GraphQL::Types::Int, null: true], required: false, camelize: false
Expand Down
26 changes: 5 additions & 21 deletions app/lib/check_elastic_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,6 @@ def update_elasticsearch_doc(keys, data = {}, pm_id = nil, skip_get_data = false
ElasticSearchWorker.perform_in(1.second, YAML::dump(model), YAML::dump(options), 'update_doc')
end

def remove_fields_from_elasticsearch_doc(keys, pm_id)
return if self.disable_es_callbacks || RequestStore.store[:disable_es_callbacks]
options = { keys: keys, pm_id: pm_id }
model = { klass: self.class.name, id: self.id }
ElasticSearchWorker.perform_in(1.second, YAML::dump(model), YAML::dump(options), 'remove_fields')
end

def update_recent_activity(obj)
# update `updated_at` date for both PG & ES
updated_at = Time.now
Expand All @@ -56,13 +49,11 @@ def update_elasticsearch_doc_bg(options)
data = get_elasticsearch_data(options[:data], options[:skip_get_data])
fields = {}
options[:keys].each do |k|
unless data[k].nil?
if data[k].class.to_s == 'Hash'
value = get_fresh_value(data[k].with_indifferent_access)
fields[k] = value unless value.nil?
else
fields[k] = data[k]
end
if data[k].class.to_s == 'Hash'
value = get_fresh_value(data[k].with_indifferent_access)
fields[k] = value
else
fields[k] = data[k]
end
end
if fields.count
Expand All @@ -72,13 +63,6 @@ def update_elasticsearch_doc_bg(options)
end
end

def remove_fields_from_elasticsearch_doc_bg(options)
options[:keys].each do |k|
source = "ctx._source.remove('#{k}')"
$repository.client.update index: CheckElasticSearchModel.get_index_alias, id: options[:doc_id], body: { script: { source: source } }
end
end

# Get a fresh data based on data(Hash)
def get_fresh_value(data)
value = data['default']
Expand Down
22 changes: 0 additions & 22 deletions app/models/annotations/embed.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ class Embed < Dynamic
end

Dynamic.class_eval do
after_commit :update_elasticsearch_metadata, on: [:create, :update], if: proc { |d| ['metadata', 'verification_status'].include?(d.annotation_type) }

def title=(title)
self.set_metadata_field('title', title)
end
Expand Down Expand Up @@ -36,26 +34,6 @@ def get_metadata_field(field)
data[field.to_s]
end

def update_elasticsearch_metadata
unless self.annotated.nil?
keys = %w(title description)
if self.annotated_type == 'ProjectMedia'
self.update_es_metadata_pm_annotation(keys, self.annotated)
elsif self.annotated_type == 'Media' && self.annotated.type == 'Link'
self.annotated.project_medias.each do |pm|
m = pm.get_annotations('metadata').last
self.update_elasticsearch_doc(keys, { 'title' => pm.title, 'description' => pm.description }, pm.id) if m.nil?
end
end
end
end

def update_es_metadata_pm_annotation(keys, pm)
data = {}
keys.each { |k| data[k] = self.send(k) }
self.update_elasticsearch_doc(keys, data, pm.id)
end

def metadata_for_registration_account(data)
return nil unless self.annotation_type == 'metadata'
unless data.nil?
Expand Down
1 change: 1 addition & 0 deletions app/models/bot/smooch.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,7 @@ def self.replicate_status_to_children(pm_id, status, uid, tid)
s = target.annotations.where(annotation_type: 'verification_status').last&.load
next if s.nil? || s.status == status
s.status = status
s.bypass_status_publish_check = true
s.save!
end
User.current = nil
Expand Down
26 changes: 13 additions & 13 deletions app/models/claim_description.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,14 @@ def text_fields

def article_elasticsearch_data(action = 'create_or_update')
return if self.project_media_id.nil? || self.disable_es_callbacks || RequestStore.store[:disable_es_callbacks]
if action == 'destroy'
self.remove_fields_from_elasticsearch_doc(['claim_description_content', 'claim_description_context'], self.project_media_id)
else
data = { 'claim_description_content' => self.description, 'claim_description_context' => self.context }
self.index_in_elasticsearch(self.project_media_id, data)
end
data = action == 'destroy' ? {
'claim_description_content' => nil,
'claim_description_context' => nil
} : {
'claim_description_content' => self.description,
'claim_description_context' => self.context
}
self.index_in_elasticsearch(self.project_media_id, data)
end

def project_media_was
Expand Down Expand Up @@ -90,13 +92,11 @@ def update_report
fact_check.save!
end
# update ES
# Remove claim_description fields
self.remove_fields_from_elasticsearch_doc(['claim_description_content', 'claim_description_context'], pm.id)
# clear claim_description fields
data = { 'claim_description_content' => nil, 'claim_description_context' => nil }
# clear fact-check values
unless self.fact_check.nil?
data = { 'fact_check_title' => '', 'fact_check_summary' => '', 'fact_check_url' => '', 'fact_check_languages' => [] }
self.fact_check.index_in_elasticsearch(pm.id, data)
end
data.merge!({ 'fact_check_title' => '', 'fact_check_summary' => '', 'fact_check_url' => '', 'fact_check_languages' => [] }) unless self.fact_check.nil?
self.index_in_elasticsearch(pm.id, data)
end
end

Expand Down Expand Up @@ -128,7 +128,7 @@ def migrate_claim_and_fact_check_logs

def log_relevant_article_results
fc = self.fact_check
self.project_media.delay.log_relevant_results(fc.class.name, fc.id, User.current&.id, self.class.actor_session_id)
self.project_media.delay.log_relevant_results(fc.class.name, fc.id, User.current&.id, self.class.actor_session_id) unless fc.nil?
end

def cant_apply_article_to_item_if_article_is_in_the_trash
Expand Down
4 changes: 3 additions & 1 deletion app/models/concerns/alegre_v2.rb
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,10 @@ def delete_package_audio(project_media, _field, params)
end

def store_package(project_media, field, params={})
type = get_type(project_media)
return if type.nil?
generic_package(project_media, field).merge(
self.send("store_package_#{get_type(project_media)}", project_media, field, params)
self.send("store_package_#{type}", project_media, field, params)
)
end

Expand Down
1 change: 1 addition & 0 deletions app/models/concerns/project_media_cached_fields.rb
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def title_or_description_update
update_on: FACT_CHECK_EVENTS

cached_field :description,
update_es: true,
recalculate: :recalculate_description,
update_on: title_or_description_update

Expand Down
6 changes: 6 additions & 0 deletions app/models/concerns/project_media_getters.rb
Original file line number Diff line number Diff line change
Expand Up @@ -213,4 +213,10 @@ def team_avatar
def fact_check
self.claim_description&.fact_check
end

def explainers_titles
# Get the title for all explainer assigned to the item
titles = Explainer.joins(:explainer_items).where('explainer_items.project_media_id = ?', self.id).map(&:title).join("\n")
titles.blank? ? nil : titles
end
end
19 changes: 19 additions & 0 deletions app/models/explainer_item.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class ExplainerItem < ApplicationRecord
validate :cant_apply_article_to_item_if_article_is_in_the_trash

after_create :log_relevant_article_results
after_commit :update_elasticsearch_data

def version_metadata(_changes)
{ explainer_title: self.explainer.title }.to_json
Expand All @@ -25,6 +26,24 @@ def cant_apply_article_to_item_if_article_is_in_the_trash
errors.add(:base, I18n.t(:cant_apply_article_to_item_if_article_is_in_the_trash)) if self.explainer&.trashed
end

def update_elasticsearch_data
return if self.disable_es_callbacks || RequestStore.store[:disable_es_callbacks]
pm = self.project_media
# touch item to update `updated_at` date
if ProjectMedia.exists?(pm.id)
updated_at = Time.now
pm.update_columns(updated_at: updated_at)
data = { updated_at: updated_at.utc }
data['explainer_title'] = {
method: "explainers_titles",
klass: pm.class.name,
id: pm.id,
default: nil,
}
pm.update_elasticsearch_doc(data.keys, data, pm.id, true)
end
end

def log_relevant_article_results
ex = self.explainer
self.project_media.delay.log_relevant_results(ex.class.name, ex.id, User.current&.id, self.class.actor_session_id)
Expand Down
6 changes: 4 additions & 2 deletions app/models/team.rb
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,8 @@ def filtered_explainers(filters = {})
query = query.where(user_id: filters[:user_ids].to_a.map(&:to_i)) unless filters[:user_ids].blank?

# Filter by date
query = query.where(updated_at: Range.new(*format_times_search_range_filter(JSON.parse(filters[:updated_at]), nil))) unless filters[:updated_at].blank?
query = query.where('explainers.created_at != explainers.updated_at').where(updated_at: Range.new(*format_times_search_range_filter(JSON.parse(filters[:updated_at]), nil))) unless filters[:updated_at].blank?
query = query.where(created_at: Range.new(*format_times_search_range_filter(JSON.parse(filters[:created_at]), nil))) unless filters[:created_at].blank?

# Filter by trashed
query = query.where(trashed: !!filters[:trashed])
Expand Down Expand Up @@ -528,7 +529,8 @@ def filtered_fact_checks(filters = {})
query = query.where('fact_checks.user_id' => filters[:user_ids].to_a.map(&:to_i)) unless filters[:user_ids].blank?

# Filter by date
query = query.where('fact_checks.updated_at' => Range.new(*format_times_search_range_filter(JSON.parse(filters[:updated_at]), nil))) unless filters[:updated_at].blank?
query = query.where('fact_checks.created_at != fact_checks.updated_at').where('fact_checks.updated_at' => Range.new(*format_times_search_range_filter(JSON.parse(filters[:updated_at]), nil))) unless filters[:updated_at].blank?
query = query.where('fact_checks.created_at' => Range.new(*format_times_search_range_filter(JSON.parse(filters[:created_at]), nil))) unless filters[:created_at].blank?

# Filter by publisher
query = query.where('fact_checks.publisher_id' => filters[:publisher_ids].to_a.map(&:to_i)) unless filters[:publisher_ids].blank?
Expand Down
2 changes: 2 additions & 0 deletions app/repositories/media_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -132,5 +132,7 @@ class MediaSearch
indexes :negative_tipline_search_results_count, { type: 'long' }

indexes :tipline_search_results_count, { type: 'long' }

indexes :explainer_title, { type: 'text', analyzer: 'check' }
end
end
1 change: 0 additions & 1 deletion app/workers/elastic_search_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def perform(model_data, options, type)
ops = {
'create_doc' => 'create_elasticsearch_doc_bg',
'update_doc' => 'update_elasticsearch_doc_bg',
'remove_fields' => 'remove_fields_from_elasticsearch_doc_bg',
'update_doc_team' => 'update_elasticsearch_doc_team_bg',
'create_update_doc_nested' => 'create_update_nested_obj_bg',
'destroy_doc' => 'destroy_elasticsearch_doc',
Expand Down
13 changes: 13 additions & 0 deletions db/migrate/20250205224319_add_mapping_for_explainer_title_field.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class AddMappingForExplainerTitleField < ActiveRecord::Migration[6.1]
def change
options = {
index: CheckElasticSearchModel.get_index_alias,
body: {
properties: {
explainer_title: { type: 'text', analyzer: 'check' },
}
}
}
$repository.client.indices.put_mapping options
end
end
26 changes: 15 additions & 11 deletions lib/check_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def should_hit_elasticsearch?
end
filters_blank = true
['tags', 'keyword', 'language', 'fc_language', 'request_language', 'report_language', 'team_tasks', 'assigned_to', 'report_status', 'range_numeric',
'has_claim', 'cluster_teams', 'published_by', 'annotated_by', 'channels', 'cluster_published_reports'
'has_article', 'cluster_teams', 'published_by', 'annotated_by', 'channels', 'cluster_published_reports'
].each do |filter|
filters_blank = false unless @options[filter].blank?
end
Expand Down Expand Up @@ -309,7 +309,7 @@ def build_es_medias_query
custom_conditions.concat integer_terms_query('channel', 'channels')
custom_conditions.concat integer_terms_query('source_id', 'sources')
custom_conditions.concat doc_conditions
custom_conditions.concat has_claim_conditions
custom_conditions.concat has_article_conditions
custom_conditions.concat file_filter
custom_conditions.concat range_filter(:es)
custom_conditions.concat numeric_range_filter
Expand Down Expand Up @@ -469,8 +469,7 @@ def keyword_conditions
return [] if @options["keyword"].blank? || @options["keyword"].class.name != 'String'
set_keyword_fields
keyword_c = []
field_conditions = build_keyword_conditions_media_fields
keyword_c.concat field_conditions
keyword_c.concat build_keyword_conditions_media_fields
# Search in requests
[['request_username', 'username'], ['request_identifier', 'identifier'], ['request_content', 'content']].each do |pair|
keyword_c << {
Expand All @@ -497,7 +496,7 @@ def set_keyword_fields
def build_keyword_conditions_media_fields
es_fields = []
conditions = []
%w(title description url claim_description_content fact_check_title fact_check_summary claim_description_context fact_check_url source_name).each do |f|
%w(title description url claim_description_content fact_check_title fact_check_summary claim_description_context fact_check_url source_name explainer_title).each do |f|
es_fields << f if should_include_keyword_field?(f)
end
es_fields << 'analysis_title' if should_include_keyword_field?('title')
Expand Down Expand Up @@ -530,13 +529,18 @@ def request_language_conditions
[{ nested: { path: 'requests', query: { terms: { 'requests.language': @options['request_language'] } } } }]
end

def has_claim_conditions
def has_article_conditions
conditions = []
return conditions unless @options.has_key?('has_claim')
if @options['has_claim'].include?('NO_VALUE')
conditions << { bool: { must_not: [ { exists: { field: 'claim_description_content' } } ] } }
elsif @options['has_claim'].include?('ANY_VALUE')
conditions << { exists: { field: 'claim_description_content' } }
return conditions unless @options.has_key?('has_article')
# Build a condidtion with fields that define the item has_article
has_article_c = []
['claim_description_content', 'explainer_title'].each do |field|
has_article_c << { exists: { field: field } }
end
if @options['has_article'].include?('NO_VALUE')
conditions << { bool: { must_not: has_article_c } }
elsif @options['has_article'].include?('ANY_VALUE')
conditions << { bool: { should: has_article_c } }
end
conditions
end
Expand Down
3 changes: 2 additions & 1 deletion lib/relay.idl
Original file line number Diff line number Diff line change
Expand Up @@ -13187,6 +13187,7 @@ type Team implements Node {
Returns the elements in the list that come before the specified cursor.
"""
before: String
created_at: String

"""
Returns the first _n_ elements from the list.
Expand All @@ -13213,7 +13214,7 @@ type Team implements Node {
updated_at: String
user_ids: [Int]
): ArticleUnionConnection
articles_count(article_type: String, imported: Boolean, language: [String], publisher_ids: [Int], rating: [String], report_status: [String], standalone: Boolean, tags: [String], target_id: Int, text: String, trashed: Boolean = false, updated_at: String, user_ids: [Int]): Int
articles_count(article_type: String, created_at: String, imported: Boolean, language: [String], publisher_ids: [Int], rating: [String], report_status: [String], standalone: Boolean, tags: [String], target_id: Int, text: String, trashed: Boolean = false, updated_at: String, user_ids: [Int]): Int
available_newsletter_header_types: JsonStringType
avatar: String
bot_query(enableLanguageDetection: Boolean, enableLinkShortening: Boolean, maxNumberOfWords: Int, searchText: String!, shouldRestrictByLanguage: Boolean, threshold: Float, utmCode: String): [TiplineSearchResult!]
Expand Down
Loading

0 comments on commit b13d7f1

Please sign in to comment.