From c41bddd9ce2fab488b8a0ecc3a07014ba956df47 Mon Sep 17 00:00:00 2001 From: Caio Almeida <117518+caiosba@users.noreply.github.com> Date: Thu, 11 Jan 2024 22:59:45 -0300 Subject: [PATCH 1/4] Fixing Alegre path for tipline image search Tipline image search, and actually other places of the code, were still hitting a deprecated path of Alegre API, `:type/similarity/search`, while (AFAIU) it should be `/similarity/sync/:type`. This PR fixes it by creating a `get_items_with_similar_media_v2` method that hits the new endpoint for image and audio and the old endpoint for video and text. This is not the ideal and I also noticed another problem, but I added a couple of `#FIXME`'s to the code to handle that later. Reference: CV2-4044. --- app/models/bot/alegre.rb | 2 +- app/models/concerns/alegre_similarity.rb | 2 +- app/models/concerns/alegre_v2.rb | 13 +- app/models/concerns/smooch_search.rb | 5 +- app/resources/api/v2/report_resource.rb | 2 +- lib/check_search.rb | 2 +- test/models/bot/alegre_2_test.rb | 193 ++++++----------------- test/models/bot/smooch_6_test.rb | 5 +- test/models/bot/smooch_7_test.rb | 1 + 9 files changed, 66 insertions(+), 159 deletions(-) diff --git a/app/models/bot/alegre.rb b/app/models/bot/alegre.rb index cfeb5651d6..9ed845d759 100644 --- a/app/models/bot/alegre.rb +++ b/app/models/bot/alegre.rb @@ -33,7 +33,7 @@ def similar_items_ids_and_scores(team_ids, thresholds = {}) 'UploadedImage' => 'image', }[self.media.type].to_s threshold = [{value: thresholds.dig(media_type.to_sym, :value)}] || Bot::Alegre.get_threshold_for_query(media_type, self, true) - ids_and_scores = Bot::Alegre.get_items_with_similar_media(Bot::Alegre.media_file_url(self), threshold, team_ids, "/#{media_type}/similarity/search/").to_h + ids_and_scores = Bot::Alegre.get_items_with_similar_media_v2(Bot::Alegre.media_file_url(self), threshold, team_ids, media_type).to_h elsif self.is_text? ids_and_scores = {} threads = [] diff --git a/app/models/concerns/alegre_similarity.rb b/app/models/concerns/alegre_similarity.rb index ec7ce94f9d..921cae3963 100644 --- a/app/models/concerns/alegre_similarity.rb +++ b/app/models/concerns/alegre_similarity.rb @@ -39,7 +39,7 @@ def get_items_with_similarity(type, pm, threshold) if type == 'text' self.get_merged_items_with_similar_text(pm, threshold) else - results = self.get_items_with_similar_media(self.media_file_url(pm), threshold, pm.team_id, "/#{type}/similarity/search/").reject{ |id, _score_with_context| pm.id == id } + results = self.get_items_with_similar_media_v2(self.media_file_url(pm), threshold, pm.team_id, type).reject{ |id, _score_with_context| pm.id == id } self.merge_response_with_source_and_target_fields(results, type) end end diff --git a/app/models/concerns/alegre_v2.rb b/app/models/concerns/alegre_v2.rb index df470caf0b..99ac2d145c 100644 --- a/app/models/concerns/alegre_v2.rb +++ b/app/models/concerns/alegre_v2.rb @@ -9,7 +9,11 @@ def host end def sync_path(project_media) - "/similarity/sync/#{get_type(project_media)}" + self.sync_path_for_type(get_type(project_media)) + end + + def sync_path_for_type(type) + "/similarity/sync/#{type}" end def async_path(project_media) @@ -256,5 +260,12 @@ def get_similar_items_v2(project_media, field) def relate_project_media(project_media, field=nil) self.add_relationships(project_media, self.get_similar_items_v2(project_media, field)) unless project_media.is_blank? end + + def get_items_with_similar_media_v2(media_url, threshold, team_ids, type) + alegre_path = ['audio', 'image'].include?(type) ? self.sync_path_for_type(type) : "/#{type}/similarity/search/" + # FIXME: Stop using this method from v1 once all media types are supported by v2 + # FIXME: Alegre crashes if `media_url` was already requested before, this is why I append a hash + self.get_items_with_similar_media("#{media_url}?hash=#{SecureRandom.hex}", threshold, team_ids, alegre_path) + end end end diff --git a/app/models/concerns/smooch_search.rb b/app/models/concerns/smooch_search.rb index 8b1b4c5d65..63d1c3a766 100644 --- a/app/models/concerns/smooch_search.rb +++ b/app/models/concerns/smooch_search.rb @@ -112,7 +112,7 @@ def get_search_results(uid, last_message, team_id, language) type = message['type'] after = self.date_filter(team_id) query = message['text'] - query = message['mediaUrl'] unless type == 'text' + query = CheckS3.rewrite_url(message['mediaUrl']) unless type == 'text' results = self.search_for_similar_published_fact_checks(type, query, [team_id], after, nil, language).select{ |pm| is_a_valid_search_result(pm) } rescue StandardError => e self.handle_search_error(uid, e, language) @@ -161,10 +161,11 @@ def search_for_similar_published_fact_checks_no_cache(type, query, team_ids, aft end else media_url = Twitter::TwitterText::Extractor.extract_urls(query)[0] + Rails.logger.info "[Smooch Bot] Got media_url #{media_url} from query #{query}" return [] if media_url.blank? media_url = self.save_locally_and_return_url(media_url, type, feed_id) threshold = Bot::Alegre.get_threshold_for_query(type, pm)[0][:value] - alegre_results = Bot::Alegre.get_items_with_similar_media(media_url, [{ value: threshold }], team_ids, "/#{type}/similarity/search/") + alegre_results = Bot::Alegre.get_items_with_similar_media_v2(media_url, [{ value: threshold }], team_ids, type) results = self.parse_search_results_from_alegre(alegre_results, after, feed_id, team_ids) Rails.logger.info "[Smooch Bot] Media similarity search got #{results.count} results while looking for '#{query}' after date #{after.inspect} for teams #{team_ids}" end diff --git a/app/resources/api/v2/report_resource.rb b/app/resources/api/v2/report_resource.rb index 472fb9ed1e..309de6253a 100644 --- a/app/resources/api/v2/report_resource.rb +++ b/app/resources/api/v2/report_resource.rb @@ -100,7 +100,7 @@ def self.apply_media_similarity_filter(organization_ids, threshold, media_path, unless media.blank? media[0].rewind CheckS3.write(media_path, media[0].content_type.gsub(/^video/, 'application'), media[0].read) - ids_and_scores = Bot::Alegre.get_items_with_similar_media(CheckS3.public_url(media_path), [{ value: threshold }], organization_ids, "/#{media_type}/similarity/search/") + ids_and_scores = Bot::Alegre.get_items_with_similar_media_v2(CheckS3.public_url(media_path), [{ value: threshold }], organization_ids, media_type) RequestStore.store[:scores] = ids_and_scores # Store the scores so we can return them ids = ids_and_scores.keys.uniq || [0] CheckS3.delete(media_path) diff --git a/lib/check_search.rb b/lib/check_search.rb index 0b4937745e..3698af3071 100644 --- a/lib/check_search.rb +++ b/lib/check_search.rb @@ -262,7 +262,7 @@ def alegre_file_similar_items file_path = "check_search/#{hash}" end threshold = Bot::Alegre.get_threshold_for_query(@options['file_type'], ProjectMedia.new(team_id: Team.current.id))[0][:value] - results = Bot::Alegre.get_items_with_similar_media(CheckS3.public_url(file_path), [{ value: threshold }], @options['team_id'].first, "/#{@options['file_type']}/similarity/search/") + results = Bot::Alegre.get_items_with_similar_media_v2(CheckS3.public_url(file_path), [{ value: threshold }], @options['team_id'].first, @options['file_type']) results.blank? ? [0] : results.keys end diff --git a/test/models/bot/alegre_2_test.rb b/test/models/bot/alegre_2_test.rb index db1bb7a83f..b64b51a192 100644 --- a/test/models/bot/alegre_2_test.rb +++ b/test/models/bot/alegre_2_test.rb @@ -17,8 +17,10 @@ def setup Sidekiq::Testing.inline! Bot::Alegre.stubs(:should_get_similar_items_of_type?).returns(true) Bot::Alegre.unstub(:media_file_url) + hex = SecureRandom.hex + SecureRandom.stubs(:hex).returns(hex) @media_path = random_url - @params = { url: @media_path, context: { has_custom_id: true, team_id: @team.id }, threshold: 0.9, match_across_content_types: true } + @params = { url: "#{@media_path}?hash=#{hex}", context: { has_custom_id: true, team_id: @team.id }, threshold: 0.9, match_across_content_types: true } end def teardown @@ -64,7 +66,7 @@ def teardown pm1 = create_project_media team: @team, media: create_uploaded_audio pm2 = create_project_media team: @team, media: create_uploaded_audio pm3 = create_project_media team: @team, media: create_uploaded_audio - Bot::Alegre.stubs(:request).with('post', '/audio/similarity/search/', @params).returns({ + Bot::Alegre.stubs(:request).with('post', '/similarity/sync/audio', @params).returns({ result: [ { id: 1, @@ -105,7 +107,7 @@ def teardown pm1 = create_project_media team: @team, media: create_uploaded_video pm2 = create_project_media team: @team, media: create_uploaded_audio pm3 = create_project_media team: @team, media: create_uploaded_audio - Bot::Alegre.stubs(:request).with('post', '/audio/similarity/search/', @params).returns({ + Bot::Alegre.stubs(:request).with('post', '/similarity/sync/audio', @params).returns({ result: [ { id: 2, @@ -171,8 +173,8 @@ def teardown } ] }.with_indifferent_access - Bot::Alegre.stubs(:request).with('post', '/image/similarity/search/', @params.merge({ threshold: 0.89 })).returns(result) - Bot::Alegre.stubs(:request).with('post', '/image/similarity/search/', @params.merge({ threshold: 0.95 })).returns(result) + Bot::Alegre.stubs(:request).with('post', '/similarity/sync/image', @params.merge({ threshold: 0.89 })).returns(result) + Bot::Alegre.stubs(:request).with('post', '/similarity/sync/image', @params.merge({ threshold: 0.95 })).returns(result) Bot::Alegre.stubs(:media_file_url).with(pm3).returns(@media_path) assert_difference 'Relationship.count' do Bot::Alegre.relate_project_media_to_similar_items(pm3) @@ -222,8 +224,8 @@ def teardown ] }.with_indifferent_access Bot::Alegre.stubs(:media_file_url).with(pm1a).returns(@media_path) - Bot::Alegre.stubs(:request).with('post', '/image/similarity/search/', @params.merge({ threshold: 0.89 })).returns(response) - Bot::Alegre.stubs(:request).with('post', '/image/similarity/search/', @params.merge({ threshold: 0.95 })).returns(response) + Bot::Alegre.stubs(:request).with('post', '/similarity/sync/image', @params.merge({ threshold: 0.89 })).returns(response) + Bot::Alegre.stubs(:request).with('post', '/similarity/sync/image', @params.merge({ threshold: 0.95 })).returns(response) assert_difference 'Relationship.count' do Bot::Alegre.relate_project_media_to_similar_items(pm1a) end @@ -232,170 +234,65 @@ def teardown assert_equal pm1a, Relationship.last.target end - test "should link similar images, get flags and extract text zzz" do + test "should link similar images, get flags and extract text" do image_path = random_url ft = create_field_type field_type: 'image_path', label: 'Image Path' at = create_annotation_type annotation_type: 'reverse_image', label: 'Reverse Image' create_field_instance annotation_type_object: at, name: 'reverse_image_path', label: 'Reverse Image', field_type_object: ft, optional: false stub_configs({ 'alegre_host' => 'http://alegre.test', 'alegre_token' => 'test' }) do - WebMock.stub_request(:post, 'http://alegre.test/text/langid/').to_return(body: { 'result' => { 'language' => 'es' }}.to_json) - WebMock.disable_net_connect! allow: /#{CheckConfig.get('elasticsearch_host')}|#{CheckConfig.get('storage_endpoint')}/ - WebMock.stub_request(:post, 'http://alegre.test/text/similarity/').to_return(body: 'success') - WebMock.stub_request(:delete, 'http://alegre.test/text/similarity/').to_return(body: {success: true}.to_json) - WebMock.stub_request(:post, 'http://alegre.test/text/similarity/search/').to_return(body: {success: true}.to_json) - WebMock.stub_request(:post, 'http://alegre.test/image/similarity/').to_return(body: { - "success": true - }.to_json) - WebMock.stub_request(:post, 'http://alegre.test/image/classification/').with({ body: { uri: image_path } }).to_return(body: { - "result": valid_flags_data - }.to_json) - WebMock.stub_request(:post, 'http://alegre.test/image/ocr/').with({ body: { url: image_path } }).to_return(body: { - "text": "Foo bar" - }.to_json) - WebMock.stub_request(:post, 'http://alegre.test/image/similarity/').to_return(body: 'success') - - # Similarity t = create_team - pm1 = create_project_media team: t, media: create_uploaded_image - context = [{ - "team_id" => pm1.team.id.to_s, - "project_media_id" => pm1.id.to_s - }] - WebMock.stub_request(:post, "http://alegre.test/image/similarity/search/").with(body: {:context=>{:team_id=>pm1.team_id, :has_custom_id=>true}, :match_across_content_types => true, :url=>image_path, :threshold=>0.89}).to_return(body: { - "result": [ - { - "id": pm1.id, - "sha256": "1782b1d1993fcd9f6fd8155adc6009a9693a8da7bb96d20270c4bc8a30c97570", - "phash": 17399941807326929, - "url": "https:\/\/www.gstatic.com\/webp\/gallery3\/1.png", - "context": context, - "score": 0 - } - ] - }.to_json) - WebMock.stub_request(:post, "http://alegre.test/image/similarity/search/").with(body: {:context=>{:team_id=>pm1.team_id, :has_custom_id=>true}, :match_across_content_types => true, :url=>image_path, :threshold=>0.95}).to_return(body: { - "result": [ - { - "id": pm1.id, - "sha256": "1782b1d1993fcd9f6fd8155adc6009a9693a8da7bb96d20270c4bc8a30c97570", - "phash": 17399941807326929, - "url": "https:\/\/www.gstatic.com\/webp\/gallery3\/1.png", - "context": context, - "score": 0 - } - ] - }.to_json) - WebMock.stub_request(:post, "http://alegre.test/similarity/sync/image").with(body: {:doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true}, :url=>image_path, :threshold=>0.89}).to_return(body: { - "result": [ - { - "id": pm1.id, - "sha256": "1782b1d1993fcd9f6fd8155adc6009a9693a8da7bb96d20270c4bc8a30c97570", - "phash": 17399941807326929, - "url": "https:\/\/www.gstatic.com\/webp\/gallery3\/1.png", - "context": context, - "score": 0 - } - ] - }.to_json) - WebMock.stub_request(:post, "http://alegre.test/similarity/sync/image").with(body: {:doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true}, :url=>image_path, :threshold=>0.95}).to_return(body: { - "result": [ - { - "id": pm1.id, - "sha256": "1782b1d1993fcd9f6fd8155adc6009a9693a8da7bb96d20270c4bc8a30c97570", - "phash": 17399941807326929, - "url": "https:\/\/www.gstatic.com\/webp\/gallery3\/1.png", - "context": context, - "score": 0 - } - ] - }.to_json) Bot::Alegre.stubs(:media_file_url).returns(image_path) - assert Bot::Alegre.run({ data: { dbid: pm1.id }, event: 'create_project_media' }) - Bot::Alegre.unstub(:media_file_url) - WebMock.stub_request(:post, 'http://alegre.test/similarity/sync/image').with(body: {url: image_path}.to_json).to_return(body: { - "result": [ + WebMock.disable_net_connect! allow: /#{CheckConfig.get('elasticsearch_host')}|#{CheckConfig.get('storage_endpoint')}/ + WebMock.stub_request(:post, 'http://alegre.test/text/langid/').to_return(body: { 'result' => { 'language' => 'es' }}.to_json) + WebMock.stub_request(:post, 'http://alegre.test/text/similarity/').to_return(body: 'success') + WebMock.stub_request(:delete, 'http://alegre.test/text/similarity/').to_return(body: { success: true }.to_json) + WebMock.stub_request(:post, 'http://alegre.test/text/similarity/search/').to_return(body: { success: true }.to_json) + WebMock.stub_request(:post, 'http://alegre.test/image/ocr/').to_return(body: { text: 'Foo bar' }.to_json) + WebMock.stub_request(:post, 'http://alegre.test/similarity/sync/image').to_return(body: { + result: [ { - "id": 1, - "sha256": "1782b1d1993fcd9f6fd8155adc6009a9693a8da7bb96d20270c4bc8a30c97570", - "phash": 17399941807326929, - "url": "https:\/\/www.gstatic.com\/webp\/gallery3\/1.png", - "context": context, - "score": 0 + id: random_number, + sha256: random_string, + phash: random_string, + url: image_path, + context: { team_id: t.id }, + score: 0 } ] }.to_json) - pm2 = create_project_media team: t, media: create_uploaded_image - response = {pm1.id => {:score => 0, :context => context, :model=>nil, :source_field=>"image", :target_field => "image"}} - Bot::Alegre.stubs(:media_file_url).returns(image_path) - assert_equal response, Bot::Alegre.get_items_with_similarity('image', pm2, Bot::Alegre.get_threshold_for_query('image', pm2)) # Flags Bot::Alegre.unstub(:media_file_url) - WebMock.stub_request(:post, 'http://alegre.test/image/classification/').to_return(body: { - "result": valid_flags_data - }.to_json) + WebMock.stub_request(:post, 'http://alegre.test/image/classification/').to_return(body: { result: valid_flags_data }.to_json) pm3 = create_project_media team: t, media: create_uploaded_image - WebMock.stub_request(:post, "http://alegre.test/similarity/sync/image").with(body: {:doc_id=>Bot::Alegre.item_doc_id(pm3), :context=>{:team_id=>pm3.team_id, :project_media_id=>pm3.id, :has_custom_id=>true}, :url=>image_path, :threshold=>0.89}).to_return(body: { - "result": [ - { - "id": pm1.id, - "sha256": "1782b1d1993fcd9f6fd8155adc6009a9693a8da7bb96d20270c4bc8a30c97570", - "phash": 17399941807326929, - "url": "https:\/\/www.gstatic.com\/webp\/gallery3\/1.png", - "context": context, - "score": 0 - } - ] - }.to_json) - WebMock.stub_request(:post, "http://alegre.test/similarity/sync/image").with(body: {:doc_id=>Bot::Alegre.item_doc_id(pm3), :context=>{:team_id=>pm3.team_id, :project_media_id=>pm3.id, :has_custom_id=>true}, :url=>image_path, :threshold=>0.95}).to_return(body: { - "result": [ - { - "id": pm1.id, - "sha256": "1782b1d1993fcd9f6fd8155adc6009a9693a8da7bb96d20270c4bc8a30c97570", - "phash": 17399941807326929, - "url": "https:\/\/www.gstatic.com\/webp\/gallery3\/1.png", - "context": context, - "score": 0 - } - ] - }.to_json) - Bot::Alegre.stubs(:media_file_url).returns(image_path) assert Bot::Alegre.run({ data: { dbid: pm3.id }, event: 'create_project_media' }) assert_not_nil pm3.get_annotations('flag').last - Bot::Alegre.unstub(:media_file_url) # Text extraction - Bot::Alegre.unstub(:media_file_url) pm4 = create_project_media team: t, media: create_uploaded_image - WebMock.stub_request(:post, "http://alegre.test/similarity/sync/image").with(body: {:doc_id=>Bot::Alegre.item_doc_id(pm4), :context=>{:team_id=>pm4.team_id, :project_media_id=>pm4.id, :has_custom_id=>true}, :url=>image_path, :threshold=>0.89}).to_return(body: { - "result": [ - { - "id": pm1.id, - "sha256": "1782b1d1993fcd9f6fd8155adc6009a9693a8da7bb96d20270c4bc8a30c97570", - "phash": 17399941807326929, - "url": "https:\/\/www.gstatic.com\/webp\/gallery3\/1.png", - "context": context, - "score": 0 - } - ] - }.to_json) - WebMock.stub_request(:post, "http://alegre.test/similarity/sync/image").with(body: {:doc_id=>Bot::Alegre.item_doc_id(pm4), :context=>{:team_id=>pm4.team_id, :project_media_id=>pm4.id, :has_custom_id=>true}, :url=>image_path, :threshold=>0.95}).to_return(body: { - "result": [ + assert Bot::Alegre.run({ data: { dbid: pm4.id }, event: 'create_project_media' }) + extracted_text_annotation = pm4.get_annotations('extracted_text').last + assert_equal 'Foo bar', extracted_text_annotation.data['text'] + + # Similarity + pm1 = create_project_media team: t, media: create_uploaded_image + assert Bot::Alegre.run({ data: { dbid: pm1.id }, event: 'create_project_media' }) + + pm2 = create_project_media team: t, media: create_uploaded_image + WebMock.stub_request(:post, 'http://alegre.test/similarity/sync/image').to_return(body: { + result: [ { - "id": pm1.id, - "sha256": "1782b1d1993fcd9f6fd8155adc6009a9693a8da7bb96d20270c4bc8a30c97570", - "phash": 17399941807326929, - "url": "https:\/\/www.gstatic.com\/webp\/gallery3\/1.png", - "context": context, - "score": 0 + id: pm1.id, + sha256: '1782b1d1993fcd9f6fd8155adc6009a9693a8da7bb96d20270c4bc8a30c97570', + phash: '17399941807326929', + url: image_path, + context: { team_id: t.id, project_media_id: pm1.id }, + score: 0.8 } ] }.to_json) - Bot::Alegre.stubs(:media_file_url).returns(image_path) - assert Bot::Alegre.run({ data: { dbid: pm4.id }, event: 'create_project_media' }) - extracted_text_annotation = pm4.get_annotations('extracted_text').last - assert_equal 'Foo bar', extracted_text_annotation.data['text'] - Bot::Alegre.unstub(:media_file_url) + response = { pm1.id => { score: 0.8, context: { team_id: t.id, project_media_id: pm1.id, contexts_count: 1, field: ''}, model: nil, source_field: 'image', target_field: 'image' } } + assert_equal response.to_json, Bot::Alegre.get_items_with_similarity('image', pm2, Bot::Alegre.get_threshold_for_query('image', pm2)).to_json end end @@ -605,7 +502,7 @@ def teardown pm = create_project_media quote: "Blah", team: p.team pm.analysis = { title: 'Title 1' } pm.save! - tbi.settings = {"text_similarity_model": "paraphrase-filipino-mpnet-base-v2", "text_similarity_model": ["indian-sbert", "xlm-r-bert-base-nli-stsb-mean-tokens"], "alegre_model_in_use": "paraphrase-filipino-mpnet-base-v2", "alegre_model_in_use": ["indian-sbert", "xlm-r-bert-base-nli-stsb-mean-tokens"], "text_vector_matching_threshold" => 0.92, "text_vector_xlm-r-bert-base-nli-stsb-mean-tokens_suggestion_threshold" => 0.97} + tbi.settings = {"text_similarity_model": ["indian-sbert", "xlm-r-bert-base-nli-stsb-mean-tokens"], "alegre_model_in_use": ["indian-sbert", "xlm-r-bert-base-nli-stsb-mean-tokens"], "text_vector_matching_threshold" => 0.92, "text_vector_xlm-r-bert-base-nli-stsb-mean-tokens_suggestion_threshold" => 0.97} tbi.save! assert_equal Bot::Alegre.get_threshold_for_query("text", pm, true), [{:value=>0.875, :key=>"text_elasticsearch_matching_threshold", :automatic=>true, :model=>"elasticsearch"}, {:value=>0.92, :key=>"text_vector_matching_threshold", :automatic=>true, :model=>"indian-sbert"}, {:value=>0.92, :key=>"text_vector_matching_threshold", :automatic=>true, :model=>"xlm-r-bert-base-nli-stsb-mean-tokens"}] assert_equal Bot::Alegre.get_threshold_for_query("text", pm, false), [{:value=>0.7, :key=>"text_elasticsearch_suggestion_threshold", :automatic=>false, :model=>"elasticsearch"}, {:value=>0.75, :key=>"text_vector_suggestion_threshold", :automatic=>false, :model=>"indian-sbert"}, {:value=>0.97, :key=>"text_vector_xlm-r-bert-base-nli-stsb-mean-tokens_suggestion_threshold", :automatic=>false, :model=>"xlm-r-bert-base-nli-stsb-mean-tokens"}] diff --git a/test/models/bot/smooch_6_test.rb b/test/models/bot/smooch_6_test.rb index 3d7021bb24..635405a818 100644 --- a/test/models/bot/smooch_6_test.rb +++ b/test/models/bot/smooch_6_test.rb @@ -258,6 +258,7 @@ def send_message_outside_24_hours_window(template, pm = nil) ProjectMedia.any_instance.stubs(:analysis_published_article_url).returns(random_url) Bot::Alegre.stubs(:get_items_with_similar_media).returns({ @search_result.id => { score: 0.9 } }) Bot::Smooch.stubs(:bundle_list_of_messages).returns({ 'type' => 'image', 'mediaUrl' => image_url }) + CheckS3.stubs(:rewrite_url).returns(random_url) Sidekiq::Testing.inline! do send_message 'hello', '1', '1', 'Image here', '1' assert_state 'search_result' @@ -266,10 +267,6 @@ def send_message_outside_24_hours_window(template, pm = nil) end assert_state 'main' end - Bot::Alegre.unstub(:get_merged_similar_items) - Bot::Smooch.unstub(:bundle_list_of_messages) - ProjectMedia.any_instance.unstub(:report_status) - ProjectMedia.any_instance.unstub(:analysis_published_article_url) end test "should submit query and handle search error on tipline bot v2" do diff --git a/test/models/bot/smooch_7_test.rb b/test/models/bot/smooch_7_test.rb index 2626bef893..e0041231b3 100644 --- a/test/models/bot/smooch_7_test.rb +++ b/test/models/bot/smooch_7_test.rb @@ -263,6 +263,7 @@ def teardown ProjectMedia.any_instance.stubs(:report_status).returns('published') ProjectMedia.any_instance.stubs(:analysis_published_article_url).returns(random_url) Bot::Alegre.stubs(:get_items_with_similar_media).returns({ pm.id => { score: 0.9, model: 'elasticsearch' } }) + CheckS3.stubs(:rewrite_url).returns(random_url) assert_equal [pm], Bot::Smooch.get_search_results(random_string, {}, pm.team_id, 'en') From 66c9422f190fe39a6d906b5a8ef717edf92a5f24 Mon Sep 17 00:00:00 2001 From: Caio Almeida <117518+caiosba@users.noreply.github.com> Date: Sun, 14 Jan 2024 12:01:13 -0300 Subject: [PATCH 2/4] Collecting two new data points for workspace tipline statistics: "WhatsApp Service Conversations" and "WhatsApp Business Conversations". (#1779) Reference: CV2-4121. --- app/models/monthly_team_statistic.rb | 2 + ...conversations_to_monthly_team_statistic.rb | 6 +++ db/schema.rb | 4 +- lib/check_statistics.rb | 49 ++++++++++++++++--- test/lib/check_statistics_test.rb | 35 ++++++++++++- 5 files changed, 85 insertions(+), 11 deletions(-) create mode 100644 db/migrate/20240114024701_add_whats_app_user_and_business_conversations_to_monthly_team_statistic.rb diff --git a/app/models/monthly_team_statistic.rb b/app/models/monthly_team_statistic.rb index 7ac52c12da..1774638ed7 100644 --- a/app/models/monthly_team_statistic.rb +++ b/app/models/monthly_team_statistic.rb @@ -13,6 +13,8 @@ class MonthlyTeamStatistic < ApplicationRecord language: 'Language', month: 'Month', # model method whatsapp_conversations: 'WhatsApp conversations', + whatsapp_conversations_business: 'WhatsApp marketing conversations (business-initiated)', + whatsapp_conversations_user: 'WhatsApp service conversations (user-initiated)', unique_users: 'Unique users', returning_users: 'Returning users', published_reports: 'Published reports', diff --git a/db/migrate/20240114024701_add_whats_app_user_and_business_conversations_to_monthly_team_statistic.rb b/db/migrate/20240114024701_add_whats_app_user_and_business_conversations_to_monthly_team_statistic.rb new file mode 100644 index 0000000000..b931bc0723 --- /dev/null +++ b/db/migrate/20240114024701_add_whats_app_user_and_business_conversations_to_monthly_team_statistic.rb @@ -0,0 +1,6 @@ +class AddWhatsAppUserAndBusinessConversationsToMonthlyTeamStatistic < ActiveRecord::Migration[6.1] + def change + add_column :monthly_team_statistics, :whatsapp_conversations_user, :integer + add_column :monthly_team_statistics, :whatsapp_conversations_business, :integer + end +end diff --git a/db/schema.rb b/db/schema.rb index 7a3294e4c7..b3db55b598 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2024_01_07_223820) do +ActiveRecord::Schema.define(version: 2024_01_14_024701) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -402,6 +402,8 @@ t.integer "positive_searches" t.integer "negative_searches" t.integer "newsletters_sent" + t.integer "whatsapp_conversations_user" + t.integer "whatsapp_conversations_business" t.index ["team_id", "platform", "language", "start_date"], name: "index_monthly_stats_team_platform_language_start", unique: true t.index ["team_id"], name: "index_monthly_team_statistics_on_team_id" end diff --git a/lib/check_statistics.rb b/lib/check_statistics.rb index 8f9ba19d66..8b132fbc38 100644 --- a/lib/check_statistics.rb +++ b/lib/check_statistics.rb @@ -65,12 +65,12 @@ def number_of_newsletters_sent(team_id, start_date, end_date, language) old_count + new_count end - def number_of_whatsapp_conversations(team_id, start_date, end_date) + def number_of_whatsapp_conversations(team_id, start_date, end_date, type = 'all') # "type" is "all", "user" or "business" from = start_date.to_datetime.to_i to = end_date.to_datetime.to_i # Cache it so we don't recalculate when grabbing the statistics for different languages - Rails.cache.fetch("check_statistics:whatsapp_conversations:#{team_id}:#{from}:#{to}", expires_in: 12.hours, skip_nil: true) do + Rails.cache.fetch("check_statistics:whatsapp_conversations:#{team_id}:#{from}:#{to}:#{type}", expires_in: 12.hours, skip_nil: true) do response = OpenStruct.new({ body: nil, code: 0 }) begin tbi = TeamBotInstallation.where(team_id: team_id, user: BotUser.smooch_user).last @@ -78,8 +78,22 @@ def number_of_whatsapp_conversations(team_id, start_date, end_date) # Only available for tiplines using WhatsApp Cloud API unless tbi&.get_capi_whatsapp_business_account_id.blank? uri = URI(URI.join('https://graph.facebook.com/v17.0/', tbi.get_capi_whatsapp_business_account_id.to_s)) + # Account for changes in WhatsApp pricing model + # Until May 2023: User-initiated conversations and business-initiated conversations are defined by the dimension CONVERSATION_DIRECTION, values BUSINESS_INITIATED or USER_INITIATED + # Starting June 2023: The dimension is CONVERSATION_CATEGORY, where SERVICE is user-initiated and business-initiated is defined by UTILITY, MARKETING or AUTHENTICATION + # https://developers.facebook.com/docs/whatsapp/business-management-api/analytics/#conversation-analytics-parameters + dimension_field = '' + unless type == 'all' + dimension = '' + if to < Time.parse('2023-06-01').beginning_of_day.to_i + dimension = 'CONVERSATION_DIRECTION' + else + dimension = 'CONVERSATION_CATEGORY' + end + dimension_field = ".dimensions(#{dimension})" + end params = { - fields: "conversation_analytics.start(#{from}).end(#{to}).granularity(DAILY).phone_numbers(#{tbi.get_capi_phone_number})", + fields: "conversation_analytics.start(#{from}).end(#{to}).granularity(DAILY)#{dimension_field}.phone_numbers(#{tbi.get_capi_phone_number})", access_token: tbi.get_capi_permanent_token } uri.query = Rack::Utils.build_query(params) @@ -89,11 +103,20 @@ def number_of_whatsapp_conversations(team_id, start_date, end_date) response = http.request(request) raise 'Unexpected response' if response.code.to_i >= 300 data = JSON.parse(response.body) - count = 0 + all = 0 + user = 0 + business = 0 data['conversation_analytics']['data'][0]['data_points'].each do |data_point| - count += data_point['conversation'] + count = data_point['conversation'] + all += count + user += count if data_point['conversation_direction'] == 'USER_INITIATED' || data_point['conversation_category'] == 'SERVICE' + business += count if data_point['conversation_direction'] == 'BUSINESS_INITIATED' || ['UTILITY', 'MARKETING', 'AUTHENTICATION'].include?(data_point['conversation_category']) end - count + { + all: all, + user: user, + business: business + }[type.to_sym] else nil end @@ -198,8 +221,18 @@ def get_statistics(start_date, end_date, team_id, platform, language, tracing_at statistics[:newsletters_delivered] = TiplineMessage.where(created_at: start_date..end_date, team_id: team_id, platform: platform_name, language: language, direction: 'outgoing', state: 'delivered', event: 'newsletter').count end - CheckTracer.in_span('CheckStatistics#whatsapp_conversations', attributes: tracing_attributes) do - statistics[:whatsapp_conversations] = number_of_whatsapp_conversations(team_id, start_date, end_date) if platform_name == 'WhatsApp' + if platform_name == 'WhatsApp' + CheckTracer.in_span('CheckStatistics#whatsapp_conversations', attributes: tracing_attributes) do + statistics[:whatsapp_conversations] = number_of_whatsapp_conversations(team_id, start_date, end_date, 'all') + end + + CheckTracer.in_span('CheckStatistics#whatsapp_conversations_user', attributes: tracing_attributes) do + statistics[:whatsapp_conversations_user] = number_of_whatsapp_conversations(team_id, start_date, end_date, 'user') + end + + CheckTracer.in_span('CheckStatistics#whatsapp_conversations_business', attributes: tracing_attributes) do + statistics[:whatsapp_conversations_business] = number_of_whatsapp_conversations(team_id, start_date, end_date, 'business') + end end CheckTracer.in_span('CheckStatistics#published_reports', attributes: tracing_attributes) do diff --git a/test/lib/check_statistics_test.rb b/test/lib/check_statistics_test.rb index a6e8479db1..66855676fa 100644 --- a/test/lib/check_statistics_test.rb +++ b/test/lib/check_statistics_test.rb @@ -20,7 +20,7 @@ def setup def teardown end - test 'should calculate number of WhatsApp conversations' do + test 'should calculate number of all WhatsApp conversations' do WebMock.stub_request(:get, @url).to_return(status: 200, body: { conversation_analytics: { data: [ @@ -62,7 +62,7 @@ def teardown }, id: '123456' }.to_json) - assert_equal 2300, CheckStatistics.number_of_whatsapp_conversations(@team.id, @from, @to) + assert_equal 2300, CheckStatistics.number_of_whatsapp_conversations(@team.id, @from, @to, 'all') end test 'should not calculate number of WhatsApp conversations if WhatsApp Insights API returns an error' do @@ -82,4 +82,35 @@ def teardown data = CheckStatistics.get_statistics(Time.now.yesterday, Time.now.tomorrow, @team.id, 'whatsapp', 'en') assert_equal 1, data[:newsletters_delivered] end + + test 'should calculate number of WhatsApp user-initiated and business-initiated conversations' do + url = 'https://graph.facebook.com/v17.0/123456?fields=conversation_analytics.start(1672531200).end(1675123200).granularity(DAILY).dimensions(CONVERSATION_DIRECTION).phone_numbers(12345678)&access_token=654321' + WebMock.stub_request(:get, url).to_return(status: 200, body: { + conversation_analytics: { + data: [ + { + data_points: [ + { + start: 1688454000, + end: 1688540400, + conversation: 40, + conversation_direction: 'USER_INITIATED', + cost: 0.8866 + }, + { + start: 1688281200, + end: 1688367600, + conversation: 10, + conversation_direction: 'BUSINESS_INITIATED', + cost: 0 + } + ] + } + ] + }, + id: '123456' + }.to_json) + assert_equal 40, CheckStatistics.number_of_whatsapp_conversations(@team.id, @from, @to, 'user') + assert_equal 10, CheckStatistics.number_of_whatsapp_conversations(@team.id, @from, @to, 'business') + end end From 20ac6599e2a2ea227989cf507472a70ec24701f6 Mon Sep 17 00:00:00 2001 From: Caio Almeida <117518+caiosba@users.noreply.github.com> Date: Sun, 14 Jan 2024 12:01:23 -0300 Subject: [PATCH 3/4] Use item status instead of report status label in the introduction when sending a report. (#1778) Fixes: CV2-4180 --- config/initializers/report_designer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/initializers/report_designer.rb b/config/initializers/report_designer.rb index 5fe96de943..da371ad0c4 100644 --- a/config/initializers/report_designer.rb +++ b/config/initializers/report_designer.rb @@ -56,7 +56,7 @@ def report_design_introduction(data, language) if self.annotation_type == 'report_design' introduction = self.report_design_field_value('introduction').to_s - introduction = introduction.gsub('{{status}}', self.report_design_field_value('status_label').to_s) + introduction = introduction.gsub('{{status}}', self.annotated&.status_i18n(nil, { locale: language })) introduction = introduction.gsub('{{query_date}}', self.report_design_date(Time.at(data['received']).to_date, language)) if data['received'] introduction end From ea0f6da96c9fe5550bf71b025aec462fac2c7a15 Mon Sep 17 00:00:00 2001 From: Caio <117518+caiosba@users.noreply.github.com> Date: Tue, 16 Jan 2024 18:16:52 -0300 Subject: [PATCH 4/4] Updating copy --- app/models/monthly_team_statistic.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/monthly_team_statistic.rb b/app/models/monthly_team_statistic.rb index 1774638ed7..e8fec6ef91 100644 --- a/app/models/monthly_team_statistic.rb +++ b/app/models/monthly_team_statistic.rb @@ -13,8 +13,8 @@ class MonthlyTeamStatistic < ApplicationRecord language: 'Language', month: 'Month', # model method whatsapp_conversations: 'WhatsApp conversations', - whatsapp_conversations_business: 'WhatsApp marketing conversations (business-initiated)', - whatsapp_conversations_user: 'WhatsApp service conversations (user-initiated)', + whatsapp_conversations_business: 'Business Conversations', + whatsapp_conversations_user: 'User Conversations', unique_users: 'Unique users', returning_users: 'Returning users', published_reports: 'Published reports',