From 14ba531b65289df437cde52cf9d96184431891d1 Mon Sep 17 00:00:00 2001 From: gabina Date: Tue, 26 Dec 2023 15:40:59 -0300 Subject: [PATCH 1/5] Add first version of ReferenceCounterApi and specs for that. This new class makes requests to the new Toolforge Reference Counter API --- lib/reference_counter_api.rb | 68 +++++++++++++++++++++++++ spec/lib/reference_counter_api_spec.rb | 69 ++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 lib/reference_counter_api.rb create mode 100644 spec/lib/reference_counter_api_spec.rb diff --git a/lib/reference_counter_api.rb b/lib/reference_counter_api.rb new file mode 100644 index 0000000000..2262156bbf --- /dev/null +++ b/lib/reference_counter_api.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +require_dependency "#{Rails.root}/lib/errors/api_error_handling" + +# Gets data from reference-counter Toolforge tool +# https://toolsadmin.wikimedia.org/tools/id/reference-counter +class ReferenceCounterApi + include ApiErrorHandling + + TOOLFORGE_SERVER_URL = 'https://reference-counter.toolforge.org' + + def self.valid_wiki?(wiki) + return wiki.project != 'wikidata' + end + + def initialize(wiki, update_service = nil) + raise InvalidProjectError unless ReferenceCounterApi.valid_wiki?(wiki) + @project_code = wiki.project + @language_code = wiki.language + @update_service = update_service + end + + # Return the number of references got from the references counter API. + # Return -1 if the response is different from 200 OK, or an error happens. + def get_number_of_references_from_revision_id(rev_id) + response = toolforge_server.get(references_query_url(rev_id)) + parsed_response = Oj.load(response.body) + if response.status == 200 + return parsed_response['num_ref'] + else + # Log the error and return -1 + Sentry.capture_message 'Non-200 response hitting references counter API', + level: 'warning', extra: { project_code: @project_code, + language_code: @language_code, rev_id:, + status_code: response.status, content: parsed_response } + return -1 + end + rescue StandardError => e + # Log any error + log_error(e, update_service: @update_service, + sentry_extra: { project_code: @project_code, + language_code: @language_code, rev_id: }) + return -1 + end + + class InvalidProjectError < StandardError + end + + private + + def references_query_url(rev_id) + "/api/v1/references/#{@project_code}/#{@language_code}/#{rev_id}" + end + + def toolforge_server + connection = Faraday.new( + url: TOOLFORGE_SERVER_URL, + headers: { + 'Content-Type': 'application/json' + } + ) + # connection.headers['User-Agent'] = ENV['visualizer_url'] + ' ' + Rails.env + connection + end + + TYPICAL_ERRORS = [Faraday::TimeoutError, + Faraday::ConnectionFailed].freeze +end diff --git a/spec/lib/reference_counter_api_spec.rb b/spec/lib/reference_counter_api_spec.rb new file mode 100644 index 0000000000..9388081d37 --- /dev/null +++ b/spec/lib/reference_counter_api_spec.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +require 'rails_helper' +require "#{Rails.root}/lib/reference_counter_api" + +describe ReferenceCounterApi do + let(:en_language) { 'en' } + let(:es_language) { 'es' } + let(:wikipedia_project) { 'wikipedia' } + let(:wiktionary_project) { 'wiktionary' } + let(:wikidata_project) { 'wikidata' } + let(:en_wikipedia) { Wiki.get_or_create(language: en_language, project: wikipedia_project) } + let(:es_wiktionary) { Wiki.get_or_create(language: es_language, project: wiktionary_project) } + #let(:wikidata) { Wiki.get_or_create(language: nil, project: wikidata_project) } + let(:deleted_rev_id) { 708326238 } + let(:rev_id) { 5006942 } + + it 'raises InvalidProjectError if using wikidata project' do + wikidata = Wiki.new(language: nil, project: 'wikidata') + expect do + described_class.new(wikidata) + end.to raise_error(described_class::InvalidProjectError) + end + + it 'returns the number of references if response is 200 OK', vcr: true do + ref_counter_api = described_class.new(es_wiktionary) + number_of_references = ref_counter_api.get_number_of_references_from_revision_id rev_id + expect(number_of_references).to eq(4) + end + + it 'returns -1 and logs the message if revision id is not 200 OK', vcr: true do + ref_counter_api = described_class.new(en_wikipedia) + expect(Sentry).to receive(:capture_message).with( + 'Non-200 response hitting references counter API', + level: 'warning', + extra: { + project_code: 'wikipedia', + language_code: 'en', + rev_id: 708326238, + status_code: 404, + content: { + 'description' => + "You don't have permission to view deleted text or changes between deleted revisions." + } + } + ) + number_of_references = ref_counter_api.get_number_of_references_from_revision_id deleted_rev_id + expect(number_of_references).to eq(-1) + end + + it 'returns -1 and logs the error if an unexpected error raises', vcr: true do + reference_counter_api = described_class.new(es_wiktionary) + + allow_any_instance_of(Faraday::Connection).to receive(:get) + .and_raise(Faraday::TimeoutError) + + expect_any_instance_of(described_class).to receive(:log_error).with( + Faraday::TimeoutError, + update_service: nil, + sentry_extra: { + project_code: 'wiktionary', + language_code: 'es', + rev_id: 5006942 + } + ) + number_of_references = reference_counter_api.get_number_of_references_from_revision_id rev_id + expect(number_of_references).to eq(-1) + end +end From 31ebda0ebdea49093ccd81ffd75609464822d214 Mon Sep 17 00:00:00 2001 From: gabina Date: Wed, 27 Dec 2023 14:17:05 -0300 Subject: [PATCH 2/5] Improve comments and stub wiki validations in specs to avoid wikidata raising InvalidWikiError --- lib/reference_counter_api.rb | 8 +++++--- spec/lib/reference_counter_api_spec.rb | 14 +++++--------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/lib/reference_counter_api.rb b/lib/reference_counter_api.rb index 2262156bbf..981a2501ab 100644 --- a/lib/reference_counter_api.rb +++ b/lib/reference_counter_api.rb @@ -20,8 +20,11 @@ def initialize(wiki, update_service = nil) @update_service = update_service end - # Return the number of references got from the references counter API. - # Return -1 if the response is different from 200 OK, or an error happens. + # This is the main entry point. + # Given a revision ID, it retrieves the reference count from the + # reference-counter Toolforge API. + # If the API response is not 200 or an error occurs, it returns -1. + # Any encountered errors are logged in Sentry. def get_number_of_references_from_revision_id(rev_id) response = toolforge_server.get(references_query_url(rev_id)) parsed_response = Oj.load(response.body) @@ -59,7 +62,6 @@ def toolforge_server 'Content-Type': 'application/json' } ) - # connection.headers['User-Agent'] = ENV['visualizer_url'] + ' ' + Rails.env connection end diff --git a/spec/lib/reference_counter_api_spec.rb b/spec/lib/reference_counter_api_spec.rb index 9388081d37..088a8c642c 100644 --- a/spec/lib/reference_counter_api_spec.rb +++ b/spec/lib/reference_counter_api_spec.rb @@ -4,19 +4,15 @@ require "#{Rails.root}/lib/reference_counter_api" describe ReferenceCounterApi do - let(:en_language) { 'en' } - let(:es_language) { 'es' } - let(:wikipedia_project) { 'wikipedia' } - let(:wiktionary_project) { 'wiktionary' } - let(:wikidata_project) { 'wikidata' } - let(:en_wikipedia) { Wiki.get_or_create(language: en_language, project: wikipedia_project) } - let(:es_wiktionary) { Wiki.get_or_create(language: es_language, project: wiktionary_project) } - #let(:wikidata) { Wiki.get_or_create(language: nil, project: wikidata_project) } + before { stub_wiki_validation } + + let(:en_wikipedia) { Wiki.get_or_create(language: 'en', project: 'wikipedia') } + let(:es_wiktionary) { Wiki.get_or_create(language: 'es', project: 'wiktionary') } + let(:wikidata) { Wiki.get_or_create(language: nil, project: 'wikidata') } let(:deleted_rev_id) { 708326238 } let(:rev_id) { 5006942 } it 'raises InvalidProjectError if using wikidata project' do - wikidata = Wiki.new(language: nil, project: 'wikidata') expect do described_class.new(wikidata) end.to raise_error(described_class::InvalidProjectError) From a534520ad1ea38cd26e8479e72afd697113605be Mon Sep 17 00:00:00 2001 From: gabina Date: Fri, 5 Jan 2024 23:30:51 -0300 Subject: [PATCH 3/5] Add comment about wikidata --- lib/reference_counter_api.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/reference_counter_api.rb b/lib/reference_counter_api.rb index 981a2501ab..9acdecf3a6 100644 --- a/lib/reference_counter_api.rb +++ b/lib/reference_counter_api.rb @@ -9,6 +9,10 @@ class ReferenceCounterApi TOOLFORGE_SERVER_URL = 'https://reference-counter.toolforge.org' + # This class is not designed for use with wikidata, as that wiki works pretty + # different from other wikis and it has its own method of calculating references. + # The reference-counter Toolforge API doesn't work for wikidata either for the + # same reason. def self.valid_wiki?(wiki) return wiki.project != 'wikidata' end From c2bf974437671431a0d645481f45480fa1a40834 Mon Sep 17 00:00:00 2001 From: gabina Date: Fri, 5 Jan 2024 23:32:16 -0300 Subject: [PATCH 4/5] Return nil instead of -1 in case of error --- lib/reference_counter_api.rb | 6 +++--- spec/lib/reference_counter_api_spec.rb | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/reference_counter_api.rb b/lib/reference_counter_api.rb index 9acdecf3a6..c0217fdf4a 100644 --- a/lib/reference_counter_api.rb +++ b/lib/reference_counter_api.rb @@ -27,7 +27,7 @@ def initialize(wiki, update_service = nil) # This is the main entry point. # Given a revision ID, it retrieves the reference count from the # reference-counter Toolforge API. - # If the API response is not 200 or an error occurs, it returns -1. + # If the API response is not 200 or an error occurs, it returns nil. # Any encountered errors are logged in Sentry. def get_number_of_references_from_revision_id(rev_id) response = toolforge_server.get(references_query_url(rev_id)) @@ -40,14 +40,14 @@ def get_number_of_references_from_revision_id(rev_id) level: 'warning', extra: { project_code: @project_code, language_code: @language_code, rev_id:, status_code: response.status, content: parsed_response } - return -1 + return nil end rescue StandardError => e # Log any error log_error(e, update_service: @update_service, sentry_extra: { project_code: @project_code, language_code: @language_code, rev_id: }) - return -1 + return nil end class InvalidProjectError < StandardError diff --git a/spec/lib/reference_counter_api_spec.rb b/spec/lib/reference_counter_api_spec.rb index 088a8c642c..d3c38f66b1 100644 --- a/spec/lib/reference_counter_api_spec.rb +++ b/spec/lib/reference_counter_api_spec.rb @@ -24,7 +24,7 @@ expect(number_of_references).to eq(4) end - it 'returns -1 and logs the message if revision id is not 200 OK', vcr: true do + it 'returns nil and logs the message if revision id is not 200 OK', vcr: true do ref_counter_api = described_class.new(en_wikipedia) expect(Sentry).to receive(:capture_message).with( 'Non-200 response hitting references counter API', @@ -41,10 +41,10 @@ } ) number_of_references = ref_counter_api.get_number_of_references_from_revision_id deleted_rev_id - expect(number_of_references).to eq(-1) + expect(number_of_references).to eq(nil) end - it 'returns -1 and logs the error if an unexpected error raises', vcr: true do + it 'returns nil and logs the error if an unexpected error raises', vcr: true do reference_counter_api = described_class.new(es_wiktionary) allow_any_instance_of(Faraday::Connection).to receive(:get) @@ -60,6 +60,6 @@ } ) number_of_references = reference_counter_api.get_number_of_references_from_revision_id rev_id - expect(number_of_references).to eq(-1) + expect(number_of_references).to eq(nil) end end From 8d5588284e0f06ae0aa0259bc57d2b2ca40c2460 Mon Sep 17 00:00:00 2001 From: gabina Date: Fri, 12 Jan 2024 17:12:32 -0300 Subject: [PATCH 5/5] Log reference-counter API errors at batch level instead of individually --- lib/reference_counter_api.rb | 50 +++++++++++++++++++------- spec/lib/reference_counter_api_spec.rb | 27 ++++++++------ 2 files changed, 54 insertions(+), 23 deletions(-) diff --git a/lib/reference_counter_api.rb b/lib/reference_counter_api.rb index c0217fdf4a..536f762c39 100644 --- a/lib/reference_counter_api.rb +++ b/lib/reference_counter_api.rb @@ -22,39 +22,57 @@ def initialize(wiki, update_service = nil) @project_code = wiki.project @language_code = wiki.language @update_service = update_service + @errors = [] end # This is the main entry point. - # Given a revision ID, it retrieves the reference count from the + # Given an array of revision ids, it returns a hash with the number of references + # for those revision ids. + # Format result example: + # { 'rev_id0' => { 'num_ref' => 10 } + # ... + # 'rev_idn' => { "num_ref" => 0 } + # } + def get_number_of_references_from_revision_ids(rev_ids) + # Restart errors array + @errors = [] + results = {} + rev_ids.each do |rev_id| + results.deep_merge!({ rev_id.to_s => get_number_of_references_from_revision_id(rev_id) }) + end + + log_error_batch(rev_ids) + + return results + end + + private + + # Given a revision ID, it retrieves a hash containing the reference count from the # reference-counter Toolforge API. # If the API response is not 200 or an error occurs, it returns nil. - # Any encountered errors are logged in Sentry. + # Any encountered errors are logged in Sentry at the batch level. def get_number_of_references_from_revision_id(rev_id) response = toolforge_server.get(references_query_url(rev_id)) parsed_response = Oj.load(response.body) if response.status == 200 - return parsed_response['num_ref'] + return { 'num_ref' => parsed_response['num_ref'] } else - # Log the error and return -1 + # Log the error and return empty hash Sentry.capture_message 'Non-200 response hitting references counter API', level: 'warning', extra: { project_code: @project_code, language_code: @language_code, rev_id:, status_code: response.status, content: parsed_response } - return nil + return {} end rescue StandardError => e - # Log any error - log_error(e, update_service: @update_service, - sentry_extra: { project_code: @project_code, - language_code: @language_code, rev_id: }) - return nil + @errors << e + return {} end class InvalidProjectError < StandardError end - private - def references_query_url(rev_id) "/api/v1/references/#{@project_code}/#{@language_code}/#{rev_id}" end @@ -71,4 +89,12 @@ def toolforge_server TYPICAL_ERRORS = [Faraday::TimeoutError, Faraday::ConnectionFailed].freeze + + def log_error_batch(rev_ids) + return if @errors.empty? + + log_error(@errors.first, update_service: @update_service, + sentry_extra: { rev_ids:, project_code: @project_code, + language_code: @language_code, error_count: @errors.count }) + end end diff --git a/spec/lib/reference_counter_api_spec.rb b/spec/lib/reference_counter_api_spec.rb index d3c38f66b1..a99a1c62f7 100644 --- a/spec/lib/reference_counter_api_spec.rb +++ b/spec/lib/reference_counter_api_spec.rb @@ -9,8 +9,8 @@ let(:en_wikipedia) { Wiki.get_or_create(language: 'en', project: 'wikipedia') } let(:es_wiktionary) { Wiki.get_or_create(language: 'es', project: 'wiktionary') } let(:wikidata) { Wiki.get_or_create(language: nil, project: 'wikidata') } - let(:deleted_rev_id) { 708326238 } - let(:rev_id) { 5006942 } + let(:deleted_rev_ids) { [708326238] } + let(:rev_ids) { [5006940, 5006942, 5006946] } it 'raises InvalidProjectError if using wikidata project' do expect do @@ -20,11 +20,13 @@ it 'returns the number of references if response is 200 OK', vcr: true do ref_counter_api = described_class.new(es_wiktionary) - number_of_references = ref_counter_api.get_number_of_references_from_revision_id rev_id - expect(number_of_references).to eq(4) + response = ref_counter_api.get_number_of_references_from_revision_ids rev_ids + expect(response.dig('5006940', 'num_ref')).to eq(10) + expect(response.dig('5006942', 'num_ref')).to eq(4) + expect(response.dig('5006946', 'num_ref')).to eq(2) end - it 'returns nil and logs the message if revision id is not 200 OK', vcr: true do + it 'returns empty hash and logs the message if response is not 200 OK', vcr: true do ref_counter_api = described_class.new(en_wikipedia) expect(Sentry).to receive(:capture_message).with( 'Non-200 response hitting references counter API', @@ -40,11 +42,11 @@ } } ) - number_of_references = ref_counter_api.get_number_of_references_from_revision_id deleted_rev_id - expect(number_of_references).to eq(nil) + response = ref_counter_api.get_number_of_references_from_revision_ids deleted_rev_ids + expect(response.dig('708326238')).to eq({}) end - it 'returns nil and logs the error if an unexpected error raises', vcr: true do + it 'returns empty hash and logs the error if an unexpected error raises', vcr: true do reference_counter_api = described_class.new(es_wiktionary) allow_any_instance_of(Faraday::Connection).to receive(:get) @@ -56,10 +58,13 @@ sentry_extra: { project_code: 'wiktionary', language_code: 'es', - rev_id: 5006942 + rev_ids: [5006940, 5006942, 5006946], + error_count: 3 } ) - number_of_references = reference_counter_api.get_number_of_references_from_revision_id rev_id - expect(number_of_references).to eq(nil) + response = reference_counter_api.get_number_of_references_from_revision_ids rev_ids + expect(response.dig('5006940')).to eq({}) + expect(response.dig('5006942')).to eq({}) + expect(response.dig('5006946')).to eq({}) end end