diff --git a/lib/reference_counter_api.rb b/lib/reference_counter_api.rb new file mode 100644 index 0000000000..536f762c39 --- /dev/null +++ b/lib/reference_counter_api.rb @@ -0,0 +1,100 @@ +# frozen_string_literal: true + +require_dependency "#{Rails.root}/lib/errors/api_error_handling" + +# Gets data from reference-counter Toolforge tool +# https://toolsadmin.wikimedia.org/tools/id/reference-counter +class ReferenceCounterApi + include ApiErrorHandling + + TOOLFORGE_SERVER_URL = 'https://reference-counter.toolforge.org' + + # This class is not designed for use with wikidata, as that wiki works pretty + # different from other wikis and it has its own method of calculating references. + # The reference-counter Toolforge API doesn't work for wikidata either for the + # same reason. + def self.valid_wiki?(wiki) + return wiki.project != 'wikidata' + end + + def initialize(wiki, update_service = nil) + raise InvalidProjectError unless ReferenceCounterApi.valid_wiki?(wiki) + @project_code = wiki.project + @language_code = wiki.language + @update_service = update_service + @errors = [] + end + + # This is the main entry point. + # Given an array of revision ids, it returns a hash with the number of references + # for those revision ids. + # Format result example: + # { 'rev_id0' => { 'num_ref' => 10 } + # ... + # 'rev_idn' => { "num_ref" => 0 } + # } + def get_number_of_references_from_revision_ids(rev_ids) + # Restart errors array + @errors = [] + results = {} + rev_ids.each do |rev_id| + results.deep_merge!({ rev_id.to_s => get_number_of_references_from_revision_id(rev_id) }) + end + + log_error_batch(rev_ids) + + return results + end + + private + + # Given a revision ID, it retrieves a hash containing the reference count from the + # reference-counter Toolforge API. + # If the API response is not 200 or an error occurs, it returns nil. + # Any encountered errors are logged in Sentry at the batch level. + def get_number_of_references_from_revision_id(rev_id) + response = toolforge_server.get(references_query_url(rev_id)) + parsed_response = Oj.load(response.body) + if response.status == 200 + return { 'num_ref' => parsed_response['num_ref'] } + else + # Log the error and return empty hash + Sentry.capture_message 'Non-200 response hitting references counter API', + level: 'warning', extra: { project_code: @project_code, + language_code: @language_code, rev_id:, + status_code: response.status, content: parsed_response } + return {} + end + rescue StandardError => e + @errors << e + return {} + end + + class InvalidProjectError < StandardError + end + + def references_query_url(rev_id) + "/api/v1/references/#{@project_code}/#{@language_code}/#{rev_id}" + end + + def toolforge_server + connection = Faraday.new( + url: TOOLFORGE_SERVER_URL, + headers: { + 'Content-Type': 'application/json' + } + ) + connection + end + + TYPICAL_ERRORS = [Faraday::TimeoutError, + Faraday::ConnectionFailed].freeze + + def log_error_batch(rev_ids) + return if @errors.empty? + + log_error(@errors.first, update_service: @update_service, + sentry_extra: { rev_ids:, project_code: @project_code, + language_code: @language_code, error_count: @errors.count }) + end +end diff --git a/spec/lib/reference_counter_api_spec.rb b/spec/lib/reference_counter_api_spec.rb new file mode 100644 index 0000000000..a99a1c62f7 --- /dev/null +++ b/spec/lib/reference_counter_api_spec.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +require 'rails_helper' +require "#{Rails.root}/lib/reference_counter_api" + +describe ReferenceCounterApi do + before { stub_wiki_validation } + + let(:en_wikipedia) { Wiki.get_or_create(language: 'en', project: 'wikipedia') } + let(:es_wiktionary) { Wiki.get_or_create(language: 'es', project: 'wiktionary') } + let(:wikidata) { Wiki.get_or_create(language: nil, project: 'wikidata') } + let(:deleted_rev_ids) { [708326238] } + let(:rev_ids) { [5006940, 5006942, 5006946] } + + it 'raises InvalidProjectError if using wikidata project' do + expect do + described_class.new(wikidata) + end.to raise_error(described_class::InvalidProjectError) + end + + it 'returns the number of references if response is 200 OK', vcr: true do + ref_counter_api = described_class.new(es_wiktionary) + response = ref_counter_api.get_number_of_references_from_revision_ids rev_ids + expect(response.dig('5006940', 'num_ref')).to eq(10) + expect(response.dig('5006942', 'num_ref')).to eq(4) + expect(response.dig('5006946', 'num_ref')).to eq(2) + end + + it 'returns empty hash and logs the message if response is not 200 OK', vcr: true do + ref_counter_api = described_class.new(en_wikipedia) + expect(Sentry).to receive(:capture_message).with( + 'Non-200 response hitting references counter API', + level: 'warning', + extra: { + project_code: 'wikipedia', + language_code: 'en', + rev_id: 708326238, + status_code: 404, + content: { + 'description' => + "You don't have permission to view deleted text or changes between deleted revisions." + } + } + ) + response = ref_counter_api.get_number_of_references_from_revision_ids deleted_rev_ids + expect(response.dig('708326238')).to eq({}) + end + + it 'returns empty hash and logs the error if an unexpected error raises', vcr: true do + reference_counter_api = described_class.new(es_wiktionary) + + allow_any_instance_of(Faraday::Connection).to receive(:get) + .and_raise(Faraday::TimeoutError) + + expect_any_instance_of(described_class).to receive(:log_error).with( + Faraday::TimeoutError, + update_service: nil, + sentry_extra: { + project_code: 'wiktionary', + language_code: 'es', + rev_ids: [5006940, 5006942, 5006946], + error_count: 3 + } + ) + response = reference_counter_api.get_number_of_references_from_revision_ids rev_ids + expect(response.dig('5006940')).to eq({}) + expect(response.dig('5006942')).to eq({}) + expect(response.dig('5006946')).to eq({}) + end +end