From 5c462e80447e9c12724e0951101161fb604c7da8 Mon Sep 17 00:00:00 2001 From: gabina Date: Tue, 27 Feb 2024 20:54:13 -0300 Subject: [PATCH 1/3] Do not save liftwing features in the revision features field for non-wikidata wikis. We don't need it anymore since non-wikidata wikis use only reference-counter features. --- lib/revision_score_api_handler.rb | 13 ++++-- .../importers/revision_score_importer_spec.rb | 9 ++-- spec/lib/revision_score_api_handler_spec.rb | 45 ++++++++----------- 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/lib/revision_score_api_handler.rb b/lib/revision_score_api_handler.rb index 39428cf44e..0a27acc77e 100644 --- a/lib/revision_score_api_handler.rb +++ b/lib/revision_score_api_handler.rb @@ -65,9 +65,16 @@ def complete_score(score) # Fetch the value for 'deleted, or default to 'false if not present. completed_score['deleted'] = score.fetch('deleted', false) - # Ensure 'features' is a hash. - features = score.fetch('features', {}) || {} - completed_score['features'] = features.deep_merge({ 'num_ref' => score.fetch('num_ref', nil) }) + # Ensure 'features' is a hash or nil. + # For Wikidata, use the 'features' key in the score. Otherwise, use 'num_ref' key. + completed_score['features'] = + if @wiki.project == 'wikidata' + score.fetch('features', nil) + else + # If there was an error hitting the API, set features to nil. Otherwise, use the 'num_ref'. + score.fetch('num_ref').nil? ? nil : { 'num_ref' => score['num_ref'] } + end + completed_score end diff --git a/spec/lib/importers/revision_score_importer_spec.rb b/spec/lib/importers/revision_score_importer_spec.rb index 6a2f6ec45c..9ace5f9d2b 100644 --- a/spec/lib/importers/revision_score_importer_spec.rb +++ b/spec/lib/importers/revision_score_importer_spec.rb @@ -53,7 +53,6 @@ later_score = later_revision.wp10.to_f expect(early_score).to be_between(0, 100) expect(later_score).to be_between(early_score, 100) - expect(later_revision.features['feature.wikitext.revision.external_links']).to eq(12) expect(later_revision.features['num_ref']).to eq(13) end end @@ -74,7 +73,7 @@ revision = article.revisions.first expect(revision.deleted).to eq(true) expect(revision.wp10).to be_nil - expect(revision.features).to eq({ 'num_ref' => nil }) + expect(revision.features).to eq({}) end end @@ -93,7 +92,7 @@ revision = article.revisions.first expect(revision.deleted).to eq(true) expect(revision.wp10).to be_nil - expect(revision.features).to eq({ 'num_ref' => nil }) + expect(revision.features).to eq({}) end end @@ -124,7 +123,7 @@ # no value changed for the revision revision = Revision.find_by(mw_rev_id: 662106477) expect(revision.wp10).to be_nil - expect(revision.features).to eq({ 'num_ref' => nil }) + expect(revision.features).to eq({}) expect(revision.deleted).to eq(false) end @@ -169,7 +168,7 @@ it 'returns a hash with a predicted rating and features' do VCR.use_cassette 'revision_scores/single_revision' do - expect(subject[:features]).to have_key('feature.wikitext.revision.wikilinks') + expect(subject[:features]).to have_key(Revision::REFERENCE_COUNT) expect(subject[:rating]).to eq('Stub') end end diff --git a/spec/lib/revision_score_api_handler_spec.rb b/spec/lib/revision_score_api_handler_spec.rb index ac6662b321..5053f69f92 100644 --- a/spec/lib/revision_score_api_handler_spec.rb +++ b/spec/lib/revision_score_api_handler_spec.rb @@ -14,17 +14,15 @@ expect(subject).to be_a(Hash) expect(subject.dig('829840090', 'wp10').to_f).to eq(62.805729915108664) expect(subject.dig('829840090', 'features')).to be_a(Hash) - expect(subject.dig('829840090', 'features', - 'feature.wikitext.revision.ref_tags')).to eq(132) - expect(subject.dig('829840090', 'features', 'num_ref')).to eq(132) + # Only num_ref feature is stored. LiftWing features are discarded. + expect(subject.dig('829840090', 'features')).to eq({ 'num_ref' => 132 }) expect(subject.dig('829840090', 'deleted')).to eq(false) expect(subject.dig('829840090', 'prediction')).to eq('B') expect(subject.dig('829840091', 'wp10').to_f).to eq(39.507631367268004) expect(subject.dig('829840091', 'features')).to be_a(Hash) - expect(subject.dig('829840091', 'features', - 'feature.wikitext.revision.ref_tags')).to eq(1) - expect(subject.dig('829840091', 'features', 'num_ref')).to eq(1) + # Only num_ref feature is stored. LiftWing features are discarded. + expect(subject.dig('829840091', 'features')).to eq({ 'num_ref' => 1 }) expect(subject.dig('829840091', 'deleted')).to eq(false) expect(subject.dig('829840091', 'prediction')).to eq('C') end @@ -48,21 +46,16 @@ .to_raise(Errno::ETIMEDOUT) expect(subject).to be_a(Hash) + expect(subject.dig('829840090', 'wp10').to_f).to eq(62.805729915108664) - expect(subject.dig('829840090', 'features')).to be_a(Hash) - expect(subject.dig('829840090', 'features', - 'feature.wikitext.revision.ref_tags')).to eq(132) - expect(subject.dig('829840090', 'features').key?('num_ref')).to eq(true) - expect(subject.dig('829840090', 'features', 'num_ref')).to eq(nil) + expect(subject.dig('829840090')).to have_key('features') + expect(subject.dig('829840090', 'features')).to be_nil expect(subject.dig('829840090', 'deleted')).to eq(false) expect(subject.dig('829840090', 'prediction')).to eq('B') expect(subject.dig('829840091', 'wp10').to_f).to eq(39.507631367268004) - expect(subject.dig('829840091', 'features')).to be_a(Hash) - expect(subject.dig('829840091', 'features', - 'feature.wikitext.revision.ref_tags')).to eq(1) - expect(subject.dig('829840091', 'features').key?('num_ref')).to eq(true) - expect(subject.dig('829840091', 'features', 'num_ref')).to eq(nil) + expect(subject.dig('829840091')).to have_key('features') + expect(subject.dig('829840091', 'features')).to be_nil expect(subject.dig('829840091', 'deleted')).to eq(false) expect(subject.dig('829840091', 'prediction')).to eq('C') end @@ -76,9 +69,9 @@ .to_raise(Errno::ETIMEDOUT) expect(subject).to be_a(Hash) expect(subject.dig('829840090')).to eq({ 'wp10' => nil, - 'features' => { 'num_ref' => nil }, 'deleted' => false, 'prediction' => nil }) + 'features' => nil, 'deleted' => false, 'prediction' => nil }) expect(subject.dig('829840091')).to eq({ 'wp10' => nil, - 'features' => { 'num_ref' => nil }, 'deleted' => false, 'prediction' => nil }) + 'features' => nil, 'deleted' => false, 'prediction' => nil }) end end @@ -97,8 +90,8 @@ expect(subject.dig('144495297', 'features')).to be_a(Hash) expect(subject.dig('144495297', 'features', 'feature.len()')).to eq(2) - expect(subject.dig('144495297', 'features').key?('num_ref')).to eq(true) - expect(subject.dig('144495297', 'features', 'num_ref')).to eq(nil) + # 'num_ref' key doesn't exist for wikidata features + expect(subject.dig('144495297', 'features').key?('num_ref')).to eq(false) expect(subject.dig('144495297', 'deleted')).to eq(false) expect(subject.dig('144495297', 'prediction')).to eq('D') @@ -106,8 +99,8 @@ expect(subject.dig('144495298', 'features')).to be_a(Hash) expect(subject.dig('144495298', 'features', 'feature.len()')).to eq(0) - expect(subject.dig('144495298', 'features').key?('num_ref')).to eq(true) - expect(subject.dig('144495298', 'features', 'num_ref')).to eq(nil) + # 'num_ref' key doesn't exist for wikidata features + expect(subject.dig('144495298', 'features').key?('num_ref')).to eq(false) expect(subject.dig('144495298', 'deleted')).to eq(false) expect(subject.dig('144495298', 'prediction')).to eq('E') end @@ -118,9 +111,9 @@ .to_raise(Errno::ETIMEDOUT) expect(subject).to be_a(Hash) expect(subject.dig('144495297')).to eq({ 'wp10' => nil, - 'features' => { 'num_ref' => nil }, 'deleted' => false, 'prediction' => nil }) + 'features' => nil, 'deleted' => false, 'prediction' => nil }) expect(subject.dig('144495298')).to eq({ 'wp10' => nil, - 'features' => { 'num_ref' => nil }, 'deleted' => false, 'prediction' => nil }) + 'features' => nil, 'deleted' => false, 'prediction' => nil }) end end end @@ -148,9 +141,9 @@ .to_raise(Errno::ETIMEDOUT) expect(subject).to be_a(Hash) expect(subject.dig('157412237')).to eq({ 'wp10' => nil, - 'features' => { 'num_ref' => nil }, 'deleted' => false, 'prediction' => nil }) + 'features' => nil, 'deleted' => false, 'prediction' => nil }) expect(subject.dig('157417768')).to eq({ 'wp10' => nil, - 'features' => { 'num_ref' => nil }, 'deleted' => false, 'prediction' => nil }) + 'features' => nil, 'deleted' => false, 'prediction' => nil }) end end end From c1c1478c9520ef7b2188e26aeb53e3ede16eb1a7 Mon Sep 17 00:00:00 2001 From: gabina Date: Tue, 27 Feb 2024 23:44:26 -0300 Subject: [PATCH 2/3] Make the RevisionFeedbackController use the LiftWingApi directly (instead of the RevisionScoreImporter) to get the liftwing features and predictions --- app/controllers/revision_feedback_controller.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/controllers/revision_feedback_controller.rb b/app/controllers/revision_feedback_controller.rb index d1e5ad852d..0417a5e8d8 100644 --- a/app/controllers/revision_feedback_controller.rb +++ b/app/controllers/revision_feedback_controller.rb @@ -1,16 +1,16 @@ # frozen_string_literal: true require_dependency "#{Rails.root}/lib/revision_feedback_service" -require_dependency "#{Rails.root}/lib/importers/revision_score_importer" +require_dependency "#{Rails.root}/lib/lift_wing_api" class RevisionFeedbackController < ApplicationController def index set_latest_revision_id return if @rev_id.nil? - revision_data = RevisionScoreImporter.new.fetch_data_for_revision_id(@rev_id) - @feedback = RevisionFeedbackService.new(revision_data[:features]).feedback + revision_data = LiftWingApi.new(@wiki).get_revision_data([@rev_id])[@rev_id.to_s] + @feedback = RevisionFeedbackService.new(revision_data['features']).feedback @user_feedback = Assignment.find(params['assignment_id']).assignment_suggestions - @rating = revision_data[:rating] + @rating = revision_data['prediction'] end private From 1d990309025d7b30013023ce6e4a3a2c18a05bba Mon Sep 17 00:00:00 2001 From: gabina Date: Wed, 28 Feb 2024 11:07:59 -0300 Subject: [PATCH 3/3] Improve comments --- lib/revision_score_api_handler.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/revision_score_api_handler.rb b/lib/revision_score_api_handler.rb index 0a27acc77e..5cfc9c757f 100644 --- a/lib/revision_score_api_handler.rb +++ b/lib/revision_score_api_handler.rb @@ -23,16 +23,19 @@ def initialize(language: 'en', project: 'wikipedia', wiki: nil, update_service: # The response has the following format: # { "rev_0"=> # { "wp10"=>0.296976285416441736e2, - # "features"=> { "feature.wikitext.revision.chars"=>5781.0,...,"num_ref"=>0 }, + # "features"=> { "num_ref"=>0 }, # "deleted"=>false, # "prediction"=>"Start" }, # ..., # "rev_n"=> # { "wp10"=>0.2929458626376752846e2, - # "features"=> { "feature.wikitext.revision.chars"=>4672.0,...,"num_ref"=>0 }, + # "features"=> nil, # "deleted"=>false, # "prediction"=>"Start" } # } + # + # For wikidata, "features" key contains Liftwing features. For other wikis, "features" + # key contains reference-counter response (or nil). def get_revision_data(rev_batch) scores = maybe_get_lift_wing_data rev_batch scores.deep_merge!(maybe_get_reference_data(rev_batch)) @@ -65,13 +68,14 @@ def complete_score(score) # Fetch the value for 'deleted, or default to 'false if not present. completed_score['deleted'] = score.fetch('deleted', false) - # Ensure 'features' is a hash or nil. - # For Wikidata, use the 'features' key in the score. Otherwise, use 'num_ref' key. + # Ensure 'features' has the correct value (hash or nil). + # For Wikidata, 'features' has to contain the LiftWing features. completed_score['features'] = if @wiki.project == 'wikidata' score.fetch('features', nil) else - # If there was an error hitting the API, set features to nil. Otherwise, use the 'num_ref'. + # For other wikis, 'features' has to contain the reference-counter scores if + # different from nil. Otherwise, it should be nil. score.fetch('num_ref').nil? ? nil : { 'num_ref' => score['num_ref'] } end