Skip to content

Commit

Permalink
[Data rearchitecture] Improve logs during course updates (#6149)
Browse files Browse the repository at this point in the history
* Pass in right update_service to UpdateCourseWikiTimeslices

* Pass in update_service to UpdateCourseWikiTimeslices

* Pass in the same debugger instance

* Add some basic logs through Rails.logger to identify when course metadata changed

* Make UpdateCourseWikiTimeslices return number of timeslices processed and reprocessed.

* Log the number of processed and reproceseed timeslices
  • Loading branch information
gabina authored Jan 22, 2025
1 parent 7c89a2e commit d6c8a2c
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 78 deletions.
7 changes: 4 additions & 3 deletions app/services/prepare_timeslices.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
# Ensures that the necessary timeslices are created prior to a new update
# of the course statistics.
class PrepareTimeslices
def initialize(course)
def initialize(course, debugger, update_service: nil)
@course = course
@timeslice_manager = TimesliceManager.new(@course)
@debugger = UpdateDebugger.new(@course)
@debugger = debugger
@update_service = update_service
end

# Deletes all existing timeslices and recreates them from scratch.
Expand All @@ -32,7 +33,7 @@ def adjust_timeslices
end
# Execute update tasks in a specific order
UpdateTimeslicesCourseWiki.new(@course).run
UpdateTimeslicesCourseUser.new(@course).run
UpdateTimeslicesCourseUser.new(@course, update_service: @update_service).run
UpdateTimeslicesUntrackedArticle.new(@course).run
UpdateTimeslicesCourseDate.new(@course).run
UpdateTimeslicesScopedArticle.new(@course).run
Expand Down
8 changes: 6 additions & 2 deletions app/services/update_course_stats_timeslice.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ def initialize(course)
import_uploads
update_categories
update_article_status if should_update_article_status?
@timeslice_errors = UpdateCourseWikiTimeslices.new(@course).run(all_time: @full_update)
@processed, @reprocessed = UpdateCourseWikiTimeslices.new(@course, @debugger,
update_service: self)
.run(all_time: @full_update)
update_average_pageviews
update_caches
update_wikidata_stats if wikidata
Expand Down Expand Up @@ -109,7 +111,9 @@ def log_end_of_update
UpdateLogger.update_course(@course, 'start_time' => @start_time.to_datetime,
'end_time' => @end_time.to_datetime,
'sentry_tag_uuid' => sentry_tag_uuid,
'error_count' => error_count + @timeslice_errors)
'error_count' => error_count,
'proccesed' => @processed,
'reprocessed' => @reprocessed)
end

def wikidata
Expand Down
38 changes: 27 additions & 11 deletions app/services/update_course_wiki_timeslices.rb
Original file line number Diff line number Diff line change
@@ -1,36 +1,34 @@
# frozen_string_literal: true

require_dependency "#{Rails.root}/lib/course_revision_updater"
require_dependency "#{Rails.root}/lib/analytics/histogram_plotter"
require_dependency "#{Rails.root}/lib/data_cycle/update_logger"
require_dependency "#{Rails.root}/lib/errors/update_service_error_helper"
require_dependency "#{Rails.root}/lib/timeslice_manager"
require_dependency "#{Rails.root}/lib/data_cycle/update_debugger"

#= Pulls in new revisions for a single course and updates the corresponding timeslices records.
# It updates all the tracked wikis for the course, from the latest start time for every wiki
# up to the end of update (today or end date course).
class UpdateCourseWikiTimeslices
include UpdateServiceErrorHelper

def initialize(course)
def initialize(course, debugger, update_service: nil)
@course = course
@timeslice_manager = TimesliceManager.new(@course)
@debugger = UpdateDebugger.new(@course)
@debugger = debugger
@update_service = update_service
@wikidata_stats_updater = UpdateWikidataStatsTimeslice.new(@course) if wikidata
@processed_timeslices = 0
@reprocessed_timeslices = 0
end

def run(all_time:)
pre_update(all_time)
fetch_data_and_process_timeslices_for_every_wiki(all_time)
error_count
[@processed_timeslices, @reprocessed_timeslices]
end

private

def pre_update(from_scratch)
@debugger.log_update_progress :pre_update_start
prepare_timeslices = PrepareTimeslices.new(@course)
prepare_timeslices = PrepareTimeslices.new(@course, @debugger, update_service: @update_service)
from_scratch ? prepare_timeslices.recreate_timeslices : prepare_timeslices.adjust_timeslices
@debugger.log_update_progress :pre_update_finish
end
Expand Down Expand Up @@ -63,9 +61,13 @@ def fetch_data_and_process_timeslices(wiki, first_start, latest_start)
start_date = [current_start, @course.start].max
end_date = [current_start + @timeslice_manager.timeslice_duration(wiki) - 1.second,
@course.end].min

log_processing(wiki, start_date, end_date)

fetch_data(wiki, start_date, end_date)
process_timeslices(wiki)
current_start += @timeslice_manager.timeslice_duration(wiki)
@processed_timeslices += 1
end
@debugger.log_update_progress "fetch_and_process_timeslices_finish_#{wiki.id}".to_sym
end
Expand All @@ -76,8 +78,12 @@ def fetch_data_and_reprocess_timeslices(wiki)
to_reprocess.each do |t|
start_date = [t.start, @course.start].max
end_date = [t.end - 1.second, @course.end].min

log_reprocessing(wiki, start_date, end_date)

fetch_data(wiki, start_date, end_date)
process_timeslices(wiki)
@reprocessed_timeslices += 1
end
@debugger.log_update_progress "fetch_and_reprocess_timeslices_finish_#{wiki.id}".to_sym
end
Expand All @@ -89,7 +95,7 @@ def fetch_data(wiki, timeslice_start, timeslice_end)
wiki,
timeslice_start.strftime('%Y%m%d%H%M%S'),
timeslice_end.strftime('%Y%m%d%H%M%S'),
update_service: self)
update_service: @update_service)

# Only for wikidata project, fetch wikidata stats
return unless wiki.project == 'wikidata' && @revisions.present?
Expand Down Expand Up @@ -158,7 +164,17 @@ def wikidata
end

def log_error(error)
Sentry.capture_message "#{@course.title} update timeslices error: #{error}",
Sentry.capture_message "#{@course.slug} update timeslices error: #{error}",
level: 'error'
end

def log_processing(wiki, start_date, end_date)
Rails.logger.info "UpdateCourseWikiTimeslices: Course: #{@course.slug} Wiki: #{wiki.id}.\
Processing timeslice [#{start_date}, #{end_date}]"
end

def log_reprocessing(wiki, start_date, end_date)
Rails.logger.info "UpdateCourseWikiTimeslices: Course: #{@course.slug} Wiki: #{wiki.id}.\
Reprocessing timeslice [#{start_date}, #{end_date}]"
end
end
12 changes: 12 additions & 0 deletions app/services/update_timeslices_course_date.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,18 @@ def update_timeslices_if_end_date_changed
def add_wiki_timeslices_up_to_new_end_date(wiki)
mark_old_last_wiki_timeslce_as_needs_update(wiki)

Rails.logger.info "UpdateTimeslicesCourseDate: Course: #{@course.slug}\
Adding data up to: #{@course.end}"

@timeslice_manager.create_wiki_timeslices_up_to_new_course_end_date(wiki)
end

def remove_timeslices_prior_to_start_date
mark_new_first_timeslce_as_needs_update

Rails.logger.info "UpdateTimeslicesCourseDate: Course: #{@course.slug}\
Removing data prior to: #{@course.start}"

# Delete course and course user timeslices
@timeslice_manager.delete_course_wiki_timeslices_prior_to_start_date
@timeslice_manager.delete_course_user_wiki_timeslices_prior_to_start_date
Expand All @@ -66,6 +72,9 @@ def remove_timeslices_prior_to_start_date
def remove_timeslices_after_end_date
mark_old_last_timeslce_as_needs_update

Rails.logger.info "UpdateTimeslicesCourseDate: Course: #{@course.slug}\
Removing data after to: #{@course.end}"

# Delete course and course user timeslices
@timeslice_manager.delete_course_wiki_timeslices_after_end_date
@timeslice_manager.delete_course_user_wiki_timeslices_after_end_date
Expand All @@ -77,6 +86,9 @@ def remove_timeslices_after_end_date
def add_wiki_timeslices_from_new_start_date(wiki)
mark_new_wiki_first_timeslce_as_needs_update(wiki)

Rails.logger.info "UpdateTimeslicesCourseDate: Course: #{@course.slug}\
Adding data after to: #{@course.start}"

@timeslice_manager.create_wiki_timeslices_for_new_course_start_date(wiki)
end

Expand Down
11 changes: 9 additions & 2 deletions app/services/update_timeslices_course_user.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
require_dependency "#{Rails.root}/lib/revision_data_manager"

class UpdateTimeslicesCourseUser
def initialize(course)
def initialize(course, update_service: nil)
@course = course
@timeslice_manager = TimesliceManager.new(course)
@update_sercice = update_service
end

def run
Expand Down Expand Up @@ -36,6 +37,9 @@ def run
def add_user_ids(user_ids)
return if user_ids.empty?

Rails.logger.info "UpdateTimeslicesCourseUser: Course: #{@course.slug}\
Adding new users: #{user_ids}"

@course.wikis.each do |wiki|
fetch_users_revisions_for_wiki(wiki, user_ids)
end
Expand All @@ -44,7 +48,7 @@ def add_user_ids(user_ids)
def fetch_users_revisions_for_wiki(wiki, user_ids)
users = User.find(user_ids)

manager = RevisionDataManager.new(wiki, @course)
manager = RevisionDataManager.new(wiki, @course, update_service: @update_service)
# Fetch the revisions for users for the complete period
revisions = manager.fetch_revision_data_for_users(users,
@course.start.strftime('%Y%m%d%H%M%S'),
Expand All @@ -54,6 +58,9 @@ def fetch_users_revisions_for_wiki(wiki, user_ids)

def remove_courses_users(user_ids)
return if user_ids.empty?

Rails.logger.info "UpdateTimeslicesCourseUser: Course: #{@course.slug}\
Removing old users: #{user_ids}"
# Delete course user wiki timeslices for the deleted users
@timeslice_manager.delete_course_user_timeslices_for_deleted_course_users user_ids

Expand Down
4 changes: 4 additions & 0 deletions app/services/update_timeslices_course_wiki.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,18 @@ def run
private

def remove_courses_wikis(wiki_ids)
return if wiki_ids.empty?
Rails.logger.info { "UpdateTimeslicesCourseWiki: Deleting wikis: #{wiki_ids}" }
# Delete timeslices for the deleted wikis
@timeslice_manager.delete_timeslices_for_deleted_course_wikis wiki_ids
# Delete articles courses
ArticlesCoursesCleanerTimeslice.clean_articles_courses_for_wiki_ids(@course, wiki_ids)
end

def add_courses_wikis(wiki_ids)
return if wiki_ids.empty?
wikis = Wiki.where(id: wiki_ids)
Rails.logger.info { "UpdateTimeslicesCourseWiki: Adding wikis: #{wiki_ids}" }
# Create course wiki timeslice records for new wikis
@timeslice_manager.create_timeslices_for_new_course_wiki_records(wikis)
end
Expand Down
3 changes: 3 additions & 0 deletions app/services/update_timeslices_scoped_article.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ def run
def reset(article_ids)
return if article_ids.empty?

Rails.logger.info "UpdateTimeslicesScopedArticle: Course: #{@course.slug}\
Resetting #{@article_ids}"

# Mark course wiki timeslices to be re-proccesed
articles = Article.where(id: article_ids)
ArticlesCoursesCleanerTimeslice.reset_specific_articles(@course, articles)
Expand Down
73 changes: 13 additions & 60 deletions spec/services/update_course_wiki_timeslices_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,25 @@
end
let(:enwiki) { Wiki.get_or_create(language: 'en', project: 'wikipedia') }
let(:wikidata) { Wiki.get_or_create(language: nil, project: 'wikidata') }
let(:updater) { described_class.new(course) }
let(:updater) { described_class.new(course, UpdateDebugger.new(course)) }
let(:subject) { updater.run(all_time: false) }
let(:flags) { nil }
let(:user) { create(:user, username: 'Ragesoss') }

before do
stub_wiki_validation
travel_to Date.new(2018, 12, 1)
course.campaigns << Campaign.first
course.wikis << Wiki.get_or_create(language: nil, project: 'wikidata')
JoinCourse.new(course:, user:, role: 0)
end

context 'when debugging is not enabled' do
it 'posts no Sentry logs' do
expect(Sentry).not_to receive(:capture_message)
subject
processed, reprocessed = subject
expect(processed).to eq(14)
expect(reprocessed).to eq(0)
end
end

Expand All @@ -31,15 +41,6 @@
end

context 'when there are revisions' do
before do
stub_wiki_validation
travel_to Date.new(2018, 12, 1)
course.campaigns << Campaign.first
# Create course wiki timeslices manually for wikidata
course.wikis << Wiki.get_or_create(language: nil, project: 'wikidata')
JoinCourse.new(course:, user:, role: 0)
end

it 'updates article course timeslices caches' do
VCR.use_cassette 'course_update' do
subject
Expand Down Expand Up @@ -159,7 +160,7 @@
expected_dates.each do |start_time, end_time|
expected_wikis.each do |wiki|
expect(CourseRevisionUpdater).to receive(:fetch_revisions_and_scores_for_wiki)
.with(course, wiki, start_time, end_time, update_service: updater)
.with(course, wiki, start_time, end_time, update_service: nil)
.once
end
end
Expand All @@ -169,52 +170,4 @@
end
end
end

context 'sentry course update error tracking' do
let(:flags) { { debug_updates: true } }
let(:user) { create(:user, username: 'Ragesoss') }

before do
travel_to Date.new(2018, 11, 28)
course.campaigns << Campaign.first
JoinCourse.new(course:, user:, role: 0)
end

it 'tracks update errors properly in Replica' do
allow(Sentry).to receive(:capture_exception)

# Raising errors only in Replica
stub_request(:any, %r{https://replica-revision-tools.wmcloud.org/.*}).to_raise(Errno::ECONNREFUSED)
VCR.use_cassette 'course_update/replica' do
subject
end
sentry_tag_uuid = updater.sentry_tag_uuid

# Checking whether Sentry receives correct error and tags as arguments
expect(Sentry).to have_received(:capture_exception).exactly(5).times.with(
Errno::ECONNREFUSED, anything
)
expect(Sentry).to have_received(:capture_exception)
.exactly(5).times.with anything, hash_including(tags: { update_service_id: sentry_tag_uuid,
course: course.slug })
end

it 'tracks update errors properly in LiftWing' do
allow(Sentry).to receive(:capture_exception)

# Raising errors only in LiftWing
stub_request(:any, %r{https://api.wikimedia.org/service/lw.*}).to_raise(Faraday::ConnectionFailed)
VCR.use_cassette 'course_update/lift_wing_api' do
subject
end
sentry_tag_uuid = updater.sentry_tag_uuid

# Checking whether Sentry receives correct error and tags as arguments
expect(Sentry).to have_received(:capture_exception)
.exactly(2).times.with(Faraday::ConnectionFailed, anything)
expect(Sentry).to have_received(:capture_exception)
.exactly(2).times.with anything, hash_including(tags: { update_service_id: sentry_tag_uuid,
course: course.slug })
end
end
end

0 comments on commit d6c8a2c

Please sign in to comment.