Skip to content

Commit

Permalink
Delete the set_cluster method definition and calls from Alegre Bot (#…
Browse files Browse the repository at this point in the history
…1784)

For FactsFirstPH, elections project we run in the Philippines in early 2022, we implemented the very first version of shared feeds and global clusters. Part of that logic included adding new items to clusters automatically. We’re changing this logic, so before proceeding with the new logic, it’s important to get rid of the previous one.
  • Loading branch information
jayjay-w authored Jan 31, 2024
1 parent 6060da7 commit 8b434d7
Show file tree
Hide file tree
Showing 7 changed files with 1 addition and 95 deletions.
24 changes: 0 additions & 24 deletions app/models/bot/alegre.rb
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ def self.run(body)
self.get_extracted_text(pm)
self.get_flags(pm)
self.auto_transcription(pm)
self.set_cluster(pm)
handled = true
end
rescue StandardError => e
Expand All @@ -175,29 +174,6 @@ def self.run(body)
handled
end

def self.set_cluster(pm, force = false)
pm = ProjectMedia.find(pm.id)
return if (!pm.cluster_id.blank? || !ProjectMedia.where(team_id: pm.team_id).where.not(cluster_id: nil).exists?) && !force
team_ids = ProjectMedia.where.not(cluster_id: nil).group(:team_id).count.keys
thresholds = {
audio: { value: CheckConfig.get('audio_cluster_similarity_threshold', 0.8, :float) },
video: { value: CheckConfig.get('video_cluster_similarity_threshold', 0.8, :float) },
image: { value: CheckConfig.get('image_cluster_similarity_threshold', 0.9, :float) },
text: { value: CheckConfig.get('text_cluster_similarity_threshold', 0.9, :float) }
}
ids_and_scores = pm.similar_items_ids_and_scores(team_ids, thresholds)
main_id = ids_and_scores.max_by{ |_pm_id, score_and_context| score_and_context[:score] }&.first
main = ProjectMedia.find_by_id(main_id.to_i)
cluster = main&.cluster
unless cluster
cluster = Cluster.new
cluster.project_media = pm
cluster.skip_check_ability = true
cluster.save!
end
cluster.project_medias << pm
cluster
end

def self.get_number_of_words(text)
# Get the number of space-separated words (Does not work with Chinese/Japanese)
Expand Down
2 changes: 1 addition & 1 deletion app/models/concerns/relationship_bulk.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def run_update_callbacks(ids_json, extra_options_json)
index_alias = CheckElasticSearchModel.get_index_alias
es_body = []
versions = []
callbacks = [:reset_counters, :update_counters, :set_cluster, :propagate_inversion]
callbacks = [:reset_counters, :update_counters, :propagate_inversion]
target_ids = []
Relationship.where(id: ids, source_id: extra_options['source_id']).find_each do |r|
target_ids << r.target_id
Expand Down
17 changes: 0 additions & 17 deletions app/models/relationship.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ class Relationship < ApplicationRecord

before_validation :set_user, on: :create
before_validation :set_confirmed, if: :is_being_confirmed?, on: :update
before_validation :set_cluster, if: :is_being_confirmed?, on: :update
validate :relationship_type_is_valid, :items_are_from_the_same_team
validate :target_not_published_report, on: :create
validate :similar_item_exists, on: :create, if: proc { |r| r.is_suggested? }
Expand All @@ -31,7 +30,6 @@ class Relationship < ApplicationRecord
after_destroy :turn_on_unmatched_field, if: proc { |r| r.is_confirmed? || r.is_suggested? }
after_commit :update_counter_and_elasticsearch, on: [:create, :update]
after_commit :update_counters, :destroy_elasticsearch_relation, on: :destroy
after_commit :set_cluster, on: [:create]

has_paper_trail on: [:create, :update, :destroy], if: proc { |x| User.current.present? && !x.is_being_copied? }, versions: { class_name: 'Version' }

Expand Down Expand Up @@ -282,21 +280,6 @@ def set_confirmed
end
end

def set_cluster
if self.relationship_type.to_json == Relationship.confirmed_type.to_json && User.current && User.current&.id != BotUser.alegre_user&.id
pm = self.target
new_cluster = self.source.cluster
old_cluster = pm.cluster
if old_cluster.nil? || (old_cluster.size == 1 && old_cluster.project_media_id == pm.id)
unless old_cluster.nil?
old_cluster.skip_check_ability = true
old_cluster.destroy!
end
new_cluster.project_medias << pm unless new_cluster.nil?
end
end
end

def turn_off_unmatched_field
set_unmatched_field(0)
end
Expand Down
5 changes: 0 additions & 5 deletions lib/tasks/check_clusters.rake
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,6 @@ namespace :check do
team = Team.find_by_slug(slug)
n = ProjectMedia.where(team_id: team.id).count
i = 0
ProjectMedia.where(team_id: team.id).order('id ASC').find_each do |pm|
i += 1
c = Bot::Alegre.set_cluster(pm, true)
log "[#{i}/#{n}] [#{Time.now}] Adding item #{pm.id} to the clusters... added to cluster #{c.id}"
end
end
end
end
20 changes: 0 additions & 20 deletions test/models/bot/alegre_2_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -665,26 +665,6 @@ def teardown
Bot::Alegre.unstub(:get_items_with_similar_description)
end

test "should set cluster" do
c1 = create_cluster
c2 = create_cluster
pm1 = create_project_media team: @team, cluster_id: c1.id
pm2 = create_project_media team: @team, cluster_id: c2.id

ProjectMedia.any_instance.stubs(:similar_items_ids_and_scores).returns({ pm1.id => { score: 0.9 }, pm2.id => { score: 0.8 } })
pm3 = create_project_media team: @team
Bot::Alegre.set_cluster(pm3)
assert_equal c1.id, pm3.reload.cluster_id

ProjectMedia.any_instance.stubs(:similar_items_ids_and_scores).returns({})
pm4 = create_project_media team: @team
assert_difference 'Cluster.count' do
Bot::Alegre.set_cluster(pm4)
end

ProjectMedia.any_instance.unstub(:similar_items_ids_and_scores)
end

test "should get number of words" do
assert_equal 4, Bot::Alegre.get_number_of_words('58 This is a test !!! 123 😊')
assert_equal 1, Bot::Alegre.get_number_of_words(random_url)
Expand Down
11 changes: 0 additions & 11 deletions test/models/cluster_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,6 @@ def setup
end
end

test "should set cluster" do
t = create_team
pm1 = create_project_media team: t
c = create_cluster
c.project_medias << pm1
pm2 = create_project_media team: t
ProjectMedia.any_instance.stubs(:similar_items_ids_and_scores).returns({ pm1.id => { score: 0.9, context: {} }, random_number => { score: 0.8, context: { foo: 'bar' } } })
assert_equal c, Bot::Alegre.set_cluster(pm2)
ProjectMedia.any_instance.unstub(:similar_items_ids_and_scores)
end

test "should get requests count" do
RequestStore.store[:skip_cached_field_update] = false
t = create_team
Expand Down
17 changes: 0 additions & 17 deletions test/models/relationship_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,23 +61,6 @@ def setup
assert_equal pm_t.id, es_t['parent_id']
end

test "should set cluster" do
t = create_team
u = create_user
create_team_user team: t, user: u, role: 'admin'
s = create_project_media team: t
t = create_project_media team: t
s_c = create_cluster project_media: s
s_c.project_medias << s
t_c = create_cluster project_media: t
t_c.project_medias << t
User.stubs(:current).returns(u)
create_relationship source_id: s.id, target_id: t.id, relationship_type: Relationship.confirmed_type
assert_nil Cluster.where(id: t_c.id).last
assert_equal [s.id, t.id].sort, s_c.reload.project_media_ids.sort
User.unstub(:current)
end

test "should remove suggested relation when same items added as similar" do
team = create_team
b = create_bot name: 'Alegre', login: 'alegre'
Expand Down

0 comments on commit 8b434d7

Please sign in to comment.