Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Data rearchitecture] ArticleScopedProgram/VisitingScholarship: Handle changes in categories and assignments #6107

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions app/models/course_data/articles_courses.rb
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def self.update_from_course(course)
end

def self.update_from_course_revisions(course, revisions)
revisions = revisions.select(&:scoped_revision)
course_article_ids = course.articles.where(wiki: course.wikis).pluck(:id)
revision_article_ids = article_ids_by_namespaces_from_revisions(course, revisions)

Expand Down
4 changes: 4 additions & 0 deletions app/models/course_types/visiting_scholarship.rb
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,8 @@ def filter_revisions(revisions)
def scoped_article_titles
assignments.pluck(:article_title)
end

def scoped_article_ids
assignments.pluck(:article_id)
end
end
2 changes: 1 addition & 1 deletion app/models/course_user_wiki_timeslice.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def self.update_course_user_wiki_timeslices(course, user_id, wiki, revisions)
.for_revisions_between(rev_start, rev_end)
timeslices.each do |timeslice|
# Group revisions that belong to the timeslice
revisions_in_timeslice = revisions[:revisions].select do |revision|
revisions_in_timeslice = revisions[:revisions].select(&:scoped_revision).select do |revision|
timeslice.start <= revision.date && revision.date < timeslice.end
end
# Get or create article course timeslice based on course, article_id,
Expand Down
2 changes: 1 addition & 1 deletion app/models/course_wiki_timeslice.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def self.update_course_wiki_timeslices(course, wiki, revisions)
.for_revisions_between(rev_start, rev_end)
course_wiki_timeslices.each do |timeslice|
# Group revisions that belong to the timeslice
revisions_in_timeslice = revisions[:revisions].select do |revision|
revisions_in_timeslice = revisions[:revisions].select(&:scoped_revision).select do |revision|
timeslice.start <= revision.date && revision.date < timeslice.end
end
# Update cache for CourseWikiTimeslice
Expand Down
4 changes: 4 additions & 0 deletions app/models/wiki_content/revision.rb
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,8 @@ def diff_stats
rescue JSON::ParserError
nil # Return nil if parsing fails (i.e., not diff_stats)
end

def scoped_revision
!views.zero?
end
end
1 change: 1 addition & 0 deletions app/services/prepare_timeslices.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def adjust_timeslices
UpdateTimeslicesCourseUser.new(@course).run
UpdateTimeslicesUntrackedArticle.new(@course).run
UpdateTimeslicesCourseDate.new(@course).run
UpdateTimeslicesScopedArticle.new(@course).run
@debugger.log_update_progress :adjust_timeslices_end
end
end
49 changes: 49 additions & 0 deletions app/services/update_timeslices_scoped_article.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# frozen_string_literal: true

require_dependency "#{Rails.root}/lib/timeslice_manager"
require_dependency "#{Rails.root}/lib/articles_courses_cleaner_timeslice"
require_dependency "#{Rails.root}/lib/revision_data_manager"

# Set as 'needs_update' timeslices associated to new scoped articles
# or to articles that are not scoped anymore due to changes in assignments
# and categories.
# Only for ArticleScopedProgram and VisitingScholarship courses
class UpdateTimeslicesScopedArticle
def initialize(course)
@course = course
@timeslice_manager = TimesliceManager.new(course)
@scoped_article_ids = course.scoped_article_ids
end

def run
return unless %w[ArticleScopedProgram VisitingScholarship].include? @course.type
# Get the scoped articles that don't have articles courses but do have ac timeslices
articles_with_timeslices = @course.article_course_timeslices
.where(article_id: @scoped_article_ids)
.pluck(:article_id)

tracked_articles = @course.articles_courses
.where(article_id: @scoped_article_ids)
.pluck(:article_id)

new_articles = articles_with_timeslices - tracked_articles
reset(new_articles)

# Get not-scoped articles with article course records
old_articles = @course.articles_courses
.where.not(article_id: @scoped_article_ids)
.pluck(:article_id)

reset(old_articles)
end

private

def reset(article_ids)
return if article_ids.empty?

# Mark course wiki timeslices to be re-proccesed
articles = Article.where(id: article_ids)
ArticlesCoursesCleanerTimeslice.reset_specific_articles(@course, articles)
end
end
65 changes: 47 additions & 18 deletions lib/revision_data_manager.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,28 @@ def initialize(wiki, course, update_service: nil)
# Returns an array of Revision records.
# As a side effect, it imports Article records.
def fetch_revision_data_for_course(timeslice_start, timeslice_end)
sub_data = get_course_revisions(@course.students, timeslice_start, timeslice_end)
all_sub_data, scoped_sub_data = get_course_revisions(@course.students, timeslice_start,
timeslice_end)
@revisions = []

# Extract all article data from the slice. Outputs a hash with article attrs.
articles = sub_data_to_article_attributes(sub_data)
articles = sub_data_to_article_attributes(all_sub_data)

# Import articles. We do this here to avoid saving article data in memory.
# Note that we create articles for all sub data (not only for scoped revisions).
ArticleImporter.new(@wiki).import_articles_from_revision_data(articles)
@articles = Article.where(wiki_id: @wiki.id, mw_page_id: articles.map { |a| a['mw_page_id'] })

# Prep: get a user dictionary for all users referred to by revisions.
users = user_dict_from_sub_data(sub_data)
users = user_dict_from_sub_data(all_sub_data)

# Now get all the revisions
# We need a slightly different article dictionary format here
article_dict = @articles.each_with_object({}) { |a, memo| memo[a.mw_page_id] = a.id }
@revisions = sub_data_to_revision_attributes(sub_data, users, articles: article_dict)
@revisions = sub_data_to_revision_attributes(all_sub_data,
users,
scoped_sub_data:,
articles: article_dict)

# TODO: resolve duplicates
# DuplicateArticleDeleter.new(@wiki).resolve_duplicates(@articles)
Expand All @@ -52,22 +57,26 @@ def fetch_revision_data_for_course(timeslice_start, timeslice_end)
# This method gets revisions for some specific users.
# It does not fetch scores. It has no side effects.
def fetch_revision_data_for_users(users, timeslice_start, timeslice_end)
sub_data = get_course_revisions(users, timeslice_start, timeslice_end)
users = user_dict_from_sub_data(sub_data)
all_sub_data, scoped_sub_data = get_course_revisions(users, timeslice_start, timeslice_end)
users = user_dict_from_sub_data(all_sub_data)

sub_data_to_revision_attributes(sub_data, users)
sub_data_to_revision_attributes(all_sub_data, users, scoped_sub_data:)
end

###########
# Helpers #
###########
private

# Returns a list of revisions for users during the given period:
# [all_sub_data, sub_data].
# - all_sub_data: all revisions within the period.
# - scoped_sub_data: revisions filtered based on the course type.
def get_course_revisions(users, start, end_date)
all_sub_data = get_revisions(users, start, end_date)
# Filter revisions based on the article type.
# Filter revisions based on the course type.
# Important for ArticleScopedProgram/VisitingScholarship courses
@course.filter_revisions(all_sub_data)
[all_sub_data, @course.filter_revisions(all_sub_data)]
end

# Get revisions made by a set of users between two dates.
Expand Down Expand Up @@ -111,23 +120,43 @@ def user_dict_from_sub_data(sub_data)
User.where(username: users).pluck(:username, :id).to_h
end

def sub_data_to_revision_attributes(sub_data, users, articles: nil)
sub_data.flat_map do |_a_id, article_data|
# Returns revisions from all_sub_data.
# scoped_sub_data contains filtered data according to the course type.
def sub_data_to_revision_attributes(all_sub_data, users, scoped_sub_data: nil, articles: nil)
all_sub_data.flat_map do |_a_id, article_data|
article_data['revisions'].map do |rev_data|
mw_page_id = rev_data['mw_page_id'].to_i
article_id = articles.nil? ? nil : articles[mw_page_id]
Revision.new({
create_revision(rev_data, scoped_sub_data, users, articles)
end
end.uniq(&:mw_rev_id)
end

def scoped_revision?(scoped_sub_data, mw_page_id, mw_rev_id)
scoped_sub_data.any? do |_, entry|
next unless entry.is_a?(Hash) && entry['article'] && entry['revisions']

entry['article']['mw_page_id'] == mw_page_id.to_s &&
entry['revisions'].any? { |rev| rev['mw_rev_id'] == mw_rev_id.to_s }
end
end

# Creates a revision record for the given revision data.
# Note that views field is currently used to track if the revision
# is a scoped one.
# TODO: change the field name. Review this
def create_revision(rev_data, scoped_sub_data, users, articles)
mw_page_id = rev_data['mw_page_id'].to_i
Revision.new({
mw_rev_id: rev_data['mw_rev_id'],
date: rev_data['date'],
characters: rev_data['characters'],
article_id:, mw_page_id:,
article_id: articles.nil? ? nil : articles[mw_page_id],
mw_page_id:,
user_id: users[rev_data['username']],
new_article: string_to_boolean(rev_data['new_article']),
system: string_to_boolean(rev_data['system']),
wiki_id: rev_data['wiki_id']
wiki_id: rev_data['wiki_id'],
views: scoped_revision?(scoped_sub_data, mw_page_id, rev_data['mw_rev_id'])
})
end
end.uniq(&:mw_rev_id)
end

# Partition revisions between those belonging to articles in/out of mainspace/userspace/draftspace
Expand Down
4 changes: 2 additions & 2 deletions lib/timeslice_manager.rb
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ def update_last_mw_rev_datetime(new_fetched_data)

# Resets course wiki timeslices. This involves:
# - Marking timeslices as needs_update for dates with associated article course timeslices
# - Deleting given article course timeslices
# - Deleting course wiki timeslcies for those dates and wikis
# - Deleting given article course timeslices if no soft
# - Deleting course user wiki timeslices for those dates and wikis
# Takes a collection of article course timeslices
def reset_timeslices_that_need_update_from_article_timeslices(timeslices,
wiki: nil,
Expand Down
99 changes: 63 additions & 36 deletions spec/lib/revision_data_manager_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,54 @@
'title' => 'Ragesoss/citing_sources',
'namespace' => '2' }]
end
let(:revision_data2) do
[{ 'mw_page_id' => '777',
'wiki_id' => 1,
'title' => 'Ragesoss/citing_sources',
'namespace' => '4' },
{ 'mw_page_id' => '123',
'wiki_id' => 1,
'title' => 'Draft article',
'namespace' => '118' }]
end
let(:sub_data) { [data1, data2] }
let(:data1) do
[
'112',
{
'article' => {
'mw_page_id' => '777',
'title' => 'Ragesoss/citing_sources',
'namespace' => '4',
'wiki_id' => 1
},
'revisions' => [
{ 'mw_rev_id' => '849116430', 'date' => '20180706', 'characters' => '569',
'mw_page_id' => '777', 'username' => 'Ragesoss', 'new_article' => 'false',
'system' => 'false', 'wiki_id' => 1 }
]
}
]
end
let(:data2) do
[
'789',
{
'article' => {
'mw_page_id' => '123',
'title' => 'Draft article',
'namespace' => '118',
'wiki_id' => 1
},
'revisions' => [
{ 'mw_rev_id' => '456', 'date' => '20180706', 'characters' => '569',
'mw_page_id' => '123', 'username' => 'Ragesoss', 'new_article' => 'false',
'system' => 'false', 'wiki_id' => 1 }
]
}
]
end
let(:filtered_sub_data) { [data1] }

before do
create(:courses_user, course:, user:)
Expand Down Expand Up @@ -62,42 +110,7 @@
end

it 'only calculates revisions scores for articles in mainspace, userspace or draftspace' do
allow(instance_class).to receive(:get_revisions).and_return(
[
[
'112',
{
'article' => {
'mw_page_id' => '777',
'title' => 'Some title',
'namespace' => '4',
'wiki_id' => 1
},
'revisions' => [
{ 'mw_rev_id' => '849116430', 'date' => '20180706', 'characters' => '569',
'mw_page_id' => '777', 'username' => 'Ragesoss', 'new_article' => 'false',
'system' => 'false', 'wiki_id' => 1 }
]
}
],
[
'789',
{
'article' => {
'mw_page_id' => '123',
'title' => 'Draft article',
'namespace' => '118',
'wiki_id' => 1
},
'revisions' => [
{ 'mw_rev_id' => '456', 'date' => '20180706', 'characters' => '569',
'mw_page_id' => '123', 'username' => 'Ragesoss', 'new_article' => 'false',
'system' => 'false', 'wiki_id' => 1 }
]
}
]
]
)
allow(instance_class).to receive(:get_revisions).and_return([data1, data2])
VCR.use_cassette 'revision_importer/all' do
revisions = subject
# Returns all revisions
Expand All @@ -117,6 +130,20 @@
subject
end
end

it 'creates articles for all revisions even for article scoped programs' do
allow_any_instance_of(described_class).to receive(:get_course_revisions)
.and_return([sub_data, filtered_sub_data])

article_importer = instance_double(ArticleImporter)
allow(ArticleImporter).to receive(:new).and_return(article_importer)

expect(article_importer).to receive(:import_articles_from_revision_data)
.once
.with(revision_data2)

subject
end
end

describe '#fetch_revision_data_for_users' do
Expand Down
10 changes: 5 additions & 5 deletions spec/models/articles_courses_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -208,15 +208,15 @@
create(:courses_user, user:, course:,
role: CoursesUsers::Roles::STUDENT_ROLE)
array_revisions << build(:revision, article:, user:, date: '2024-07-07',
system: true, new_article: true)
system: true, new_article: true, views: true)
array_revisions << build(:revision, article:, user:, date: '2024-07-06 20:05:10',
system: true, new_article: true)
system: true, new_article: true, views: true)
array_revisions << build(:revision, article:, user:, date: '2024-07-06 20:06:11',
system: true, new_article: true)
system: true, new_article: true, views: true)
array_revisions << build(:revision, article:, user:, date: '2024-07-08 20:03:01',
system: true, new_article: true)
system: true, new_article: true, views: true)
array_revisions << build(:revision, article: article3, user:, date: '2024-07-07',
system: true, new_article: true)
system: true, new_article: true, views: true)
# revision for a non-tracked wiki
array_revisions << build(:revision, article: article2, user:, date: '2024-07-06')
# revision for a non-tracked namespace
Expand Down
Loading
Loading