From 8b434d7b087542a948778c5ca504357c08135092 Mon Sep 17 00:00:00 2001 From: Jay Joshua <7008757+jayjay-w@users.noreply.github.com> Date: Wed, 31 Jan 2024 20:01:11 +0100 Subject: [PATCH] Delete the `set_cluster` method definition and calls from Alegre Bot (#1784) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For FactsFirstPH, elections project we run in the Philippines in early 2022, we implemented the very first version of shared feeds and global clusters. Part of that logic included adding new items to clusters automatically. We’re changing this logic, so before proceeding with the new logic, it’s important to get rid of the previous one. --- app/models/bot/alegre.rb | 24 ------------------------ app/models/concerns/relationship_bulk.rb | 2 +- app/models/relationship.rb | 17 ----------------- lib/tasks/check_clusters.rake | 5 ----- test/models/bot/alegre_2_test.rb | 20 -------------------- test/models/cluster_test.rb | 11 ----------- test/models/relationship_test.rb | 17 ----------------- 7 files changed, 1 insertion(+), 95 deletions(-) diff --git a/app/models/bot/alegre.rb b/app/models/bot/alegre.rb index 9ed845d759..13a788c586 100644 --- a/app/models/bot/alegre.rb +++ b/app/models/bot/alegre.rb @@ -162,7 +162,6 @@ def self.run(body) self.get_extracted_text(pm) self.get_flags(pm) self.auto_transcription(pm) - self.set_cluster(pm) handled = true end rescue StandardError => e @@ -175,29 +174,6 @@ def self.run(body) handled end - def self.set_cluster(pm, force = false) - pm = ProjectMedia.find(pm.id) - return if (!pm.cluster_id.blank? || !ProjectMedia.where(team_id: pm.team_id).where.not(cluster_id: nil).exists?) && !force - team_ids = ProjectMedia.where.not(cluster_id: nil).group(:team_id).count.keys - thresholds = { - audio: { value: CheckConfig.get('audio_cluster_similarity_threshold', 0.8, :float) }, - video: { value: CheckConfig.get('video_cluster_similarity_threshold', 0.8, :float) }, - image: { value: CheckConfig.get('image_cluster_similarity_threshold', 0.9, :float) }, - text: { value: CheckConfig.get('text_cluster_similarity_threshold', 0.9, :float) } - } - ids_and_scores = pm.similar_items_ids_and_scores(team_ids, thresholds) - main_id = ids_and_scores.max_by{ |_pm_id, score_and_context| score_and_context[:score] }&.first - main = ProjectMedia.find_by_id(main_id.to_i) - cluster = main&.cluster - unless cluster - cluster = Cluster.new - cluster.project_media = pm - cluster.skip_check_ability = true - cluster.save! - end - cluster.project_medias << pm - cluster - end def self.get_number_of_words(text) # Get the number of space-separated words (Does not work with Chinese/Japanese) diff --git a/app/models/concerns/relationship_bulk.rb b/app/models/concerns/relationship_bulk.rb index d3526b7ca4..f973d6f0c2 100644 --- a/app/models/concerns/relationship_bulk.rb +++ b/app/models/concerns/relationship_bulk.rb @@ -73,7 +73,7 @@ def run_update_callbacks(ids_json, extra_options_json) index_alias = CheckElasticSearchModel.get_index_alias es_body = [] versions = [] - callbacks = [:reset_counters, :update_counters, :set_cluster, :propagate_inversion] + callbacks = [:reset_counters, :update_counters, :propagate_inversion] target_ids = [] Relationship.where(id: ids, source_id: extra_options['source_id']).find_each do |r| target_ids << r.target_id diff --git a/app/models/relationship.rb b/app/models/relationship.rb index 4d7cf33256..b4308a834e 100644 --- a/app/models/relationship.rb +++ b/app/models/relationship.rb @@ -12,7 +12,6 @@ class Relationship < ApplicationRecord before_validation :set_user, on: :create before_validation :set_confirmed, if: :is_being_confirmed?, on: :update - before_validation :set_cluster, if: :is_being_confirmed?, on: :update validate :relationship_type_is_valid, :items_are_from_the_same_team validate :target_not_published_report, on: :create validate :similar_item_exists, on: :create, if: proc { |r| r.is_suggested? } @@ -31,7 +30,6 @@ class Relationship < ApplicationRecord after_destroy :turn_on_unmatched_field, if: proc { |r| r.is_confirmed? || r.is_suggested? } after_commit :update_counter_and_elasticsearch, on: [:create, :update] after_commit :update_counters, :destroy_elasticsearch_relation, on: :destroy - after_commit :set_cluster, on: [:create] has_paper_trail on: [:create, :update, :destroy], if: proc { |x| User.current.present? && !x.is_being_copied? }, versions: { class_name: 'Version' } @@ -282,21 +280,6 @@ def set_confirmed end end - def set_cluster - if self.relationship_type.to_json == Relationship.confirmed_type.to_json && User.current && User.current&.id != BotUser.alegre_user&.id - pm = self.target - new_cluster = self.source.cluster - old_cluster = pm.cluster - if old_cluster.nil? || (old_cluster.size == 1 && old_cluster.project_media_id == pm.id) - unless old_cluster.nil? - old_cluster.skip_check_ability = true - old_cluster.destroy! - end - new_cluster.project_medias << pm unless new_cluster.nil? - end - end - end - def turn_off_unmatched_field set_unmatched_field(0) end diff --git a/lib/tasks/check_clusters.rake b/lib/tasks/check_clusters.rake index 42f2cacf2b..a12f8d5968 100644 --- a/lib/tasks/check_clusters.rake +++ b/lib/tasks/check_clusters.rake @@ -138,11 +138,6 @@ namespace :check do team = Team.find_by_slug(slug) n = ProjectMedia.where(team_id: team.id).count i = 0 - ProjectMedia.where(team_id: team.id).order('id ASC').find_each do |pm| - i += 1 - c = Bot::Alegre.set_cluster(pm, true) - log "[#{i}/#{n}] [#{Time.now}] Adding item #{pm.id} to the clusters... added to cluster #{c.id}" - end end end end diff --git a/test/models/bot/alegre_2_test.rb b/test/models/bot/alegre_2_test.rb index b64b51a192..e6b812047b 100644 --- a/test/models/bot/alegre_2_test.rb +++ b/test/models/bot/alegre_2_test.rb @@ -665,26 +665,6 @@ def teardown Bot::Alegre.unstub(:get_items_with_similar_description) end - test "should set cluster" do - c1 = create_cluster - c2 = create_cluster - pm1 = create_project_media team: @team, cluster_id: c1.id - pm2 = create_project_media team: @team, cluster_id: c2.id - - ProjectMedia.any_instance.stubs(:similar_items_ids_and_scores).returns({ pm1.id => { score: 0.9 }, pm2.id => { score: 0.8 } }) - pm3 = create_project_media team: @team - Bot::Alegre.set_cluster(pm3) - assert_equal c1.id, pm3.reload.cluster_id - - ProjectMedia.any_instance.stubs(:similar_items_ids_and_scores).returns({}) - pm4 = create_project_media team: @team - assert_difference 'Cluster.count' do - Bot::Alegre.set_cluster(pm4) - end - - ProjectMedia.any_instance.unstub(:similar_items_ids_and_scores) - end - test "should get number of words" do assert_equal 4, Bot::Alegre.get_number_of_words('58 This is a test !!! 123 😊') assert_equal 1, Bot::Alegre.get_number_of_words(random_url) diff --git a/test/models/cluster_test.rb b/test/models/cluster_test.rb index 3f3f78f892..61bef8e25a 100644 --- a/test/models/cluster_test.rb +++ b/test/models/cluster_test.rb @@ -87,17 +87,6 @@ def setup end end - test "should set cluster" do - t = create_team - pm1 = create_project_media team: t - c = create_cluster - c.project_medias << pm1 - pm2 = create_project_media team: t - ProjectMedia.any_instance.stubs(:similar_items_ids_and_scores).returns({ pm1.id => { score: 0.9, context: {} }, random_number => { score: 0.8, context: { foo: 'bar' } } }) - assert_equal c, Bot::Alegre.set_cluster(pm2) - ProjectMedia.any_instance.unstub(:similar_items_ids_and_scores) - end - test "should get requests count" do RequestStore.store[:skip_cached_field_update] = false t = create_team diff --git a/test/models/relationship_test.rb b/test/models/relationship_test.rb index 61acaa14a7..8eca19286e 100644 --- a/test/models/relationship_test.rb +++ b/test/models/relationship_test.rb @@ -61,23 +61,6 @@ def setup assert_equal pm_t.id, es_t['parent_id'] end - test "should set cluster" do - t = create_team - u = create_user - create_team_user team: t, user: u, role: 'admin' - s = create_project_media team: t - t = create_project_media team: t - s_c = create_cluster project_media: s - s_c.project_medias << s - t_c = create_cluster project_media: t - t_c.project_medias << t - User.stubs(:current).returns(u) - create_relationship source_id: s.id, target_id: t.id, relationship_type: Relationship.confirmed_type - assert_nil Cluster.where(id: t_c.id).last - assert_equal [s.id, t.id].sort, s_c.reload.project_media_ids.sort - User.unstub(:current) - end - test "should remove suggested relation when same items added as similar" do team = create_team b = create_bot name: 'Alegre', login: 'alegre'