diff --git a/app/models/course_data/articles_courses.rb b/app/models/course_data/articles_courses.rb index cd8e4cd9e0..377c336736 100644 --- a/app/models/course_data/articles_courses.rb +++ b/app/models/course_data/articles_courses.rb @@ -182,6 +182,7 @@ def self.update_from_course(course) end def self.update_from_course_revisions(course, revisions) + revisions = revisions.select(&:scoped_revision) course_article_ids = course.articles.where(wiki: course.wikis).pluck(:id) revision_article_ids = article_ids_by_namespaces_from_revisions(course, revisions) diff --git a/app/models/course_types/visiting_scholarship.rb b/app/models/course_types/visiting_scholarship.rb index 6eee1657f7..971cf4e4f2 100644 --- a/app/models/course_types/visiting_scholarship.rb +++ b/app/models/course_types/visiting_scholarship.rb @@ -89,4 +89,8 @@ def filter_revisions(revisions) def scoped_article_titles assignments.pluck(:article_title) end + + def scoped_article_ids + assignments.pluck(:article_id) + end end diff --git a/app/models/course_user_wiki_timeslice.rb b/app/models/course_user_wiki_timeslice.rb index 1f9cd291ae..424f1d962c 100644 --- a/app/models/course_user_wiki_timeslice.rb +++ b/app/models/course_user_wiki_timeslice.rb @@ -52,7 +52,7 @@ def self.update_course_user_wiki_timeslices(course, user_id, wiki, revisions) .for_revisions_between(rev_start, rev_end) timeslices.each do |timeslice| # Group revisions that belong to the timeslice - revisions_in_timeslice = revisions[:revisions].select do |revision| + revisions_in_timeslice = revisions[:revisions].select(&:scoped_revision).select do |revision| timeslice.start <= revision.date && revision.date < timeslice.end end # Get or create article course timeslice based on course, article_id, diff --git a/app/models/course_wiki_timeslice.rb b/app/models/course_wiki_timeslice.rb index 2f08a6e94e..101caa46c7 100644 --- a/app/models/course_wiki_timeslice.rb +++ b/app/models/course_wiki_timeslice.rb @@ -56,7 +56,7 @@ def self.update_course_wiki_timeslices(course, wiki, revisions) .for_revisions_between(rev_start, rev_end) course_wiki_timeslices.each do |timeslice| # Group revisions that belong to the timeslice - revisions_in_timeslice = revisions[:revisions].select do |revision| + revisions_in_timeslice = revisions[:revisions].select(&:scoped_revision).select do |revision| timeslice.start <= revision.date && revision.date < timeslice.end end # Update cache for CourseWikiTimeslice diff --git a/app/models/wiki_content/revision.rb b/app/models/wiki_content/revision.rb index 59fbe00d28..a3b547f9b8 100644 --- a/app/models/wiki_content/revision.rb +++ b/app/models/wiki_content/revision.rb @@ -107,4 +107,8 @@ def diff_stats rescue JSON::ParserError nil # Return nil if parsing fails (i.e., not diff_stats) end + + def scoped_revision + !views.zero? + end end diff --git a/app/services/prepare_timeslices.rb b/app/services/prepare_timeslices.rb index 6aafd17b4d..bdde16e712 100644 --- a/app/services/prepare_timeslices.rb +++ b/app/services/prepare_timeslices.rb @@ -35,6 +35,7 @@ def adjust_timeslices UpdateTimeslicesCourseUser.new(@course).run UpdateTimeslicesUntrackedArticle.new(@course).run UpdateTimeslicesCourseDate.new(@course).run + UpdateTimeslicesScopedArticle.new(@course).run @debugger.log_update_progress :adjust_timeslices_end end end diff --git a/app/services/update_timeslices_scoped_article.rb b/app/services/update_timeslices_scoped_article.rb new file mode 100644 index 0000000000..e28933eb6f --- /dev/null +++ b/app/services/update_timeslices_scoped_article.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +require_dependency "#{Rails.root}/lib/timeslice_manager" +require_dependency "#{Rails.root}/lib/articles_courses_cleaner_timeslice" +require_dependency "#{Rails.root}/lib/revision_data_manager" + +# Set as 'needs_update' timeslices associated to new scoped articles +# or to articles that are not scoped anymore due to changes in assignments +# and categories. +# Only for ArticleScopedProgram and VisitingScholarship courses +class UpdateTimeslicesScopedArticle + def initialize(course) + @course = course + @timeslice_manager = TimesliceManager.new(course) + @scoped_article_ids = course.scoped_article_ids + end + + def run + return unless %w[ArticleScopedProgram VisitingScholarship].include? @course.type + # Get the scoped articles that don't have articles courses but do have ac timeslices + articles_with_timeslices = @course.article_course_timeslices + .where(article_id: @scoped_article_ids) + .pluck(:article_id) + + tracked_articles = @course.articles_courses + .where(article_id: @scoped_article_ids) + .pluck(:article_id) + + new_articles = articles_with_timeslices - tracked_articles + reset(new_articles) + + # Get not-scoped articles with article course records + old_articles = @course.articles_courses + .where.not(article_id: @scoped_article_ids) + .pluck(:article_id) + + reset(old_articles) + end + + private + + def reset(article_ids) + return if article_ids.empty? + + # Mark course wiki timeslices to be re-proccesed + articles = Article.where(id: article_ids) + ArticlesCoursesCleanerTimeslice.reset_specific_articles(@course, articles) + end +end diff --git a/lib/revision_data_manager.rb b/lib/revision_data_manager.rb index 7bcc733ba7..5849b83838 100644 --- a/lib/revision_data_manager.rb +++ b/lib/revision_data_manager.rb @@ -21,23 +21,28 @@ def initialize(wiki, course, update_service: nil) # Returns an array of Revision records. # As a side effect, it imports Article records. def fetch_revision_data_for_course(timeslice_start, timeslice_end) - sub_data = get_course_revisions(@course.students, timeslice_start, timeslice_end) + all_sub_data, scoped_sub_data = get_course_revisions(@course.students, timeslice_start, + timeslice_end) @revisions = [] # Extract all article data from the slice. Outputs a hash with article attrs. - articles = sub_data_to_article_attributes(sub_data) + articles = sub_data_to_article_attributes(all_sub_data) # Import articles. We do this here to avoid saving article data in memory. + # Note that we create articles for all sub data (not only for scoped revisions). ArticleImporter.new(@wiki).import_articles_from_revision_data(articles) @articles = Article.where(wiki_id: @wiki.id, mw_page_id: articles.map { |a| a['mw_page_id'] }) # Prep: get a user dictionary for all users referred to by revisions. - users = user_dict_from_sub_data(sub_data) + users = user_dict_from_sub_data(all_sub_data) # Now get all the revisions # We need a slightly different article dictionary format here article_dict = @articles.each_with_object({}) { |a, memo| memo[a.mw_page_id] = a.id } - @revisions = sub_data_to_revision_attributes(sub_data, users, articles: article_dict) + @revisions = sub_data_to_revision_attributes(all_sub_data, + users, + scoped_sub_data:, + articles: article_dict) # TODO: resolve duplicates # DuplicateArticleDeleter.new(@wiki).resolve_duplicates(@articles) @@ -52,10 +57,10 @@ def fetch_revision_data_for_course(timeslice_start, timeslice_end) # This method gets revisions for some specific users. # It does not fetch scores. It has no side effects. def fetch_revision_data_for_users(users, timeslice_start, timeslice_end) - sub_data = get_course_revisions(users, timeslice_start, timeslice_end) - users = user_dict_from_sub_data(sub_data) + all_sub_data, scoped_sub_data = get_course_revisions(users, timeslice_start, timeslice_end) + users = user_dict_from_sub_data(all_sub_data) - sub_data_to_revision_attributes(sub_data, users) + sub_data_to_revision_attributes(all_sub_data, users, scoped_sub_data:) end ########### @@ -63,11 +68,15 @@ def fetch_revision_data_for_users(users, timeslice_start, timeslice_end) ########### private + # Returns a list of revisions for users during the given period: + # [all_sub_data, sub_data]. + # - all_sub_data: all revisions within the period. + # - scoped_sub_data: revisions filtered based on the course type. def get_course_revisions(users, start, end_date) all_sub_data = get_revisions(users, start, end_date) - # Filter revisions based on the article type. + # Filter revisions based on the course type. # Important for ArticleScopedProgram/VisitingScholarship courses - @course.filter_revisions(all_sub_data) + [all_sub_data, @course.filter_revisions(all_sub_data)] end # Get revisions made by a set of users between two dates. @@ -111,23 +120,43 @@ def user_dict_from_sub_data(sub_data) User.where(username: users).pluck(:username, :id).to_h end - def sub_data_to_revision_attributes(sub_data, users, articles: nil) - sub_data.flat_map do |_a_id, article_data| + # Returns revisions from all_sub_data. + # scoped_sub_data contains filtered data according to the course type. + def sub_data_to_revision_attributes(all_sub_data, users, scoped_sub_data: nil, articles: nil) + all_sub_data.flat_map do |_a_id, article_data| article_data['revisions'].map do |rev_data| - mw_page_id = rev_data['mw_page_id'].to_i - article_id = articles.nil? ? nil : articles[mw_page_id] - Revision.new({ + create_revision(rev_data, scoped_sub_data, users, articles) + end + end.uniq(&:mw_rev_id) + end + + def scoped_revision?(scoped_sub_data, mw_page_id, mw_rev_id) + scoped_sub_data.any? do |_, entry| + next unless entry.is_a?(Hash) && entry['article'] && entry['revisions'] + + entry['article']['mw_page_id'] == mw_page_id.to_s && + entry['revisions'].any? { |rev| rev['mw_rev_id'] == mw_rev_id.to_s } + end + end + + # Creates a revision record for the given revision data. + # Note that views field is currently used to track if the revision + # is a scoped one. + # TODO: change the field name. Review this + def create_revision(rev_data, scoped_sub_data, users, articles) + mw_page_id = rev_data['mw_page_id'].to_i + Revision.new({ mw_rev_id: rev_data['mw_rev_id'], date: rev_data['date'], characters: rev_data['characters'], - article_id:, mw_page_id:, + article_id: articles.nil? ? nil : articles[mw_page_id], + mw_page_id:, user_id: users[rev_data['username']], new_article: string_to_boolean(rev_data['new_article']), system: string_to_boolean(rev_data['system']), - wiki_id: rev_data['wiki_id'] + wiki_id: rev_data['wiki_id'], + views: scoped_revision?(scoped_sub_data, mw_page_id, rev_data['mw_rev_id']) }) - end - end.uniq(&:mw_rev_id) end # Partition revisions between those belonging to articles in/out of mainspace/userspace/draftspace diff --git a/lib/timeslice_manager.rb b/lib/timeslice_manager.rb index d88301c527..b60a107fec 100644 --- a/lib/timeslice_manager.rb +++ b/lib/timeslice_manager.rb @@ -157,8 +157,8 @@ def update_last_mw_rev_datetime(new_fetched_data) # Resets course wiki timeslices. This involves: # - Marking timeslices as needs_update for dates with associated article course timeslices - # - Deleting given article course timeslices - # - Deleting course wiki timeslcies for those dates and wikis + # - Deleting given article course timeslices if no soft + # - Deleting course user wiki timeslices for those dates and wikis # Takes a collection of article course timeslices def reset_timeslices_that_need_update_from_article_timeslices(timeslices, wiki: nil, diff --git a/spec/lib/revision_data_manager_spec.rb b/spec/lib/revision_data_manager_spec.rb index fca35c049a..6dee9b4d14 100644 --- a/spec/lib/revision_data_manager_spec.rb +++ b/spec/lib/revision_data_manager_spec.rb @@ -19,6 +19,54 @@ 'title' => 'Ragesoss/citing_sources', 'namespace' => '2' }] end + let(:revision_data2) do + [{ 'mw_page_id' => '777', + 'wiki_id' => 1, + 'title' => 'Ragesoss/citing_sources', + 'namespace' => '4' }, + { 'mw_page_id' => '123', + 'wiki_id' => 1, + 'title' => 'Draft article', + 'namespace' => '118' }] + end + let(:sub_data) { [data1, data2] } + let(:data1) do + [ + '112', + { + 'article' => { + 'mw_page_id' => '777', + 'title' => 'Ragesoss/citing_sources', + 'namespace' => '4', + 'wiki_id' => 1 + }, + 'revisions' => [ + { 'mw_rev_id' => '849116430', 'date' => '20180706', 'characters' => '569', + 'mw_page_id' => '777', 'username' => 'Ragesoss', 'new_article' => 'false', + 'system' => 'false', 'wiki_id' => 1 } + ] + } + ] + end + let(:data2) do + [ + '789', + { + 'article' => { + 'mw_page_id' => '123', + 'title' => 'Draft article', + 'namespace' => '118', + 'wiki_id' => 1 + }, + 'revisions' => [ + { 'mw_rev_id' => '456', 'date' => '20180706', 'characters' => '569', + 'mw_page_id' => '123', 'username' => 'Ragesoss', 'new_article' => 'false', + 'system' => 'false', 'wiki_id' => 1 } + ] + } + ] + end + let(:filtered_sub_data) { [data1] } before do create(:courses_user, course:, user:) @@ -62,42 +110,7 @@ end it 'only calculates revisions scores for articles in mainspace, userspace or draftspace' do - allow(instance_class).to receive(:get_revisions).and_return( - [ - [ - '112', - { - 'article' => { - 'mw_page_id' => '777', - 'title' => 'Some title', - 'namespace' => '4', - 'wiki_id' => 1 - }, - 'revisions' => [ - { 'mw_rev_id' => '849116430', 'date' => '20180706', 'characters' => '569', - 'mw_page_id' => '777', 'username' => 'Ragesoss', 'new_article' => 'false', - 'system' => 'false', 'wiki_id' => 1 } - ] - } - ], - [ - '789', - { - 'article' => { - 'mw_page_id' => '123', - 'title' => 'Draft article', - 'namespace' => '118', - 'wiki_id' => 1 - }, - 'revisions' => [ - { 'mw_rev_id' => '456', 'date' => '20180706', 'characters' => '569', - 'mw_page_id' => '123', 'username' => 'Ragesoss', 'new_article' => 'false', - 'system' => 'false', 'wiki_id' => 1 } - ] - } - ] - ] - ) + allow(instance_class).to receive(:get_revisions).and_return([data1, data2]) VCR.use_cassette 'revision_importer/all' do revisions = subject # Returns all revisions @@ -117,6 +130,20 @@ subject end end + + it 'creates articles for all revisions even for article scoped programs' do + allow_any_instance_of(described_class).to receive(:get_course_revisions) + .and_return([sub_data, filtered_sub_data]) + + article_importer = instance_double(ArticleImporter) + allow(ArticleImporter).to receive(:new).and_return(article_importer) + + expect(article_importer).to receive(:import_articles_from_revision_data) + .once + .with(revision_data2) + + subject + end end describe '#fetch_revision_data_for_users' do diff --git a/spec/models/articles_courses_spec.rb b/spec/models/articles_courses_spec.rb index 02619ecb38..6b2b6bc20d 100644 --- a/spec/models/articles_courses_spec.rb +++ b/spec/models/articles_courses_spec.rb @@ -208,15 +208,15 @@ create(:courses_user, user:, course:, role: CoursesUsers::Roles::STUDENT_ROLE) array_revisions << build(:revision, article:, user:, date: '2024-07-07', - system: true, new_article: true) + system: true, new_article: true, views: true) array_revisions << build(:revision, article:, user:, date: '2024-07-06 20:05:10', - system: true, new_article: true) + system: true, new_article: true, views: true) array_revisions << build(:revision, article:, user:, date: '2024-07-06 20:06:11', - system: true, new_article: true) + system: true, new_article: true, views: true) array_revisions << build(:revision, article:, user:, date: '2024-07-08 20:03:01', - system: true, new_article: true) + system: true, new_article: true, views: true) array_revisions << build(:revision, article: article3, user:, date: '2024-07-07', - system: true, new_article: true) + system: true, new_article: true, views: true) # revision for a non-tracked wiki array_revisions << build(:revision, article: article2, user:, date: '2024-07-06') # revision for a non-tracked namespace diff --git a/spec/models/course_user_wiki_timeslice_spec.rb b/spec/models/course_user_wiki_timeslice_spec.rb index f237a3d3a8..f220e6484d 100644 --- a/spec/models/course_user_wiki_timeslice_spec.rb +++ b/spec/models/course_user_wiki_timeslice_spec.rb @@ -55,28 +55,32 @@ characters: 123, features: { 'num_ref' => 8 }, features_previous: { 'num_ref' => 1 }, - user_id: user.id) + user_id: user.id, + views: true) end let(:revision1) do build(:revision, article: talk_page, date: start, characters: 200, features: { 'num_ref' => 12 }, features_previous: { 'num_ref' => 10 }, - user_id: user.id) + user_id: user.id, + views: true) end let(:revision2) do build(:revision, article: sandbox, date: start, characters: -65, features: { 'num_ref' => 1 }, features_previous: { 'num_ref' => 2 }, - user_id: user.id) + user_id: user.id, + views: true) end let(:revision3) do build(:revision, article: draft, date: start, characters: 225, features: { 'num_ref' => 3 }, features_previous: { 'num_ref' => 3 }, - user_id: user.id) + user_id: user.id, + views: true) end let(:revision4) do build(:revision, article:, date: start, @@ -84,7 +88,8 @@ deleted: true, # deleted revision features: { 'num_ref' => 2 }, features_previous: { 'num_ref' => 0 }, - user_id: user.id) + user_id: user.id, + views: true) end let(:revision5) do build(:revision, article_id: -1, # revision for a non-existing article @@ -93,7 +98,8 @@ deleted: false, features: { 'num_ref' => 2 }, features_previous: { 'num_ref' => 0 }, - user_id: user.id) + user_id: user.id, + views: true) end let(:revision6) do build(:revision, article: draft, date: start, @@ -101,7 +107,8 @@ features: { 'num_ref' => 1 }, features_previous: { 'num_ref' => 0 }, user_id: user.id, - system: true) # revision made by the system + system: true, # revision made by the system + views: true) end let(:revisions) { [revision0, revision1, revision2, revision3, revision4, revision5, revision6] } let(:subject) { course_user_wiki_timeslice.update_cache_from_revisions revisions } @@ -109,9 +116,9 @@ describe '.update_course_user_wiki_timeslices' do before do TimesliceManager.new(course).create_timeslices_for_new_course_wiki_records(course.wikis) - revisions << build(:revision, article:, user_id: user.id, date: start + 26.hours) - revisions << build(:revision, article:, user_id: user.id, date: start + 50.hours) - revisions << build(:revision, article:, user_id: user.id, date: start + 51.hours) + revisions << build(:revision, article:, user_id: user.id, date: start + 26.hours, views: true) + revisions << build(:revision, article:, user_id: user.id, date: start + 50.hours, views: true) + revisions << build(:revision, article:, user_id: user.id, date: start + 51.hours, views: true) end it 'creates the right article timeslices based on the revisions' do diff --git a/spec/models/course_wiki_timeslice_spec.rb b/spec/models/course_wiki_timeslice_spec.rb index 3ec40975f7..16e12aae62 100644 --- a/spec/models/course_wiki_timeslice_spec.rb +++ b/spec/models/course_wiki_timeslice_spec.rb @@ -86,19 +86,21 @@ references_count: 4, revision_count: 4) - array_revisions << build(:revision, article:, user_id: 1, date: start) - array_revisions << build(:revision, article:, user_id: 1, date: start + 2.hours) - array_revisions << build(:revision, article:, user_id: 2, date: start + 3.hours) - array_revisions << build(:revision, article:, user_id: 2, date: start + 3.hours, system: true) - array_revisions << build(:revision, article:, deleted: true, user_id: 1, date: start + 8.hours) + array_revisions << build(:revision, article:, user_id: 1, date: start, views: true) + array_revisions << build(:revision, article:, user_id: 1, date: start + 2.hours, views: true) + array_revisions << build(:revision, article:, user_id: 2, date: start + 3.hours, views: true) + array_revisions << build(:revision, article:, user_id: 2, date: start + 3.hours, system: true, + views: true) + array_revisions << build(:revision, article:, deleted: true, user_id: 1, date: start + 8.hours, + views: true) end describe '.update_course_wiki_timeslices' do before do TimesliceManager.new(course).create_timeslices_for_new_course_wiki_records([wiki]) - array_revisions << build(:revision, article:, user_id: 1, date: start + 26.hours) - array_revisions << build(:revision, article:, user_id: 1, date: start + 50.hours) - array_revisions << build(:revision, article:, user_id: 1, date: start + 51.hours) + array_revisions << build(:revision, article:, user_id: 1, date: start + 26.hours, views: true) + array_revisions << build(:revision, article:, user_id: 1, date: start + 50.hours, views: true) + array_revisions << build(:revision, article:, user_id: 1, date: start + 51.hours, views: true) end it 'updates the right course wiki timeslices based on the revisions' do diff --git a/spec/services/update_timeslices_scoped_article_spec.rb b/spec/services/update_timeslices_scoped_article_spec.rb new file mode 100644 index 0000000000..b5c2729c90 --- /dev/null +++ b/spec/services/update_timeslices_scoped_article_spec.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +require 'rails_helper' + +describe UpdateTimeslicesScopedArticle do + let(:course) { create(:article_scoped_program) } + let(:basic_course) { create(:course) } + let(:assigned_article) { create(:article, title: 'Assigned', id: 2, namespace: 0) } + let(:random_article) { create(:article, title: 'Random', id: 1, namespace: 0) } + + context 'for ArticleScopedProgram' do + before do + enwiki = Wiki.get_or_create(language: 'en', project: 'wikipedia') + user = create(:user, username: 'Ragesoss') + create(:assignment, user_id: user.id, course_id: course.id, article_id: 2, + article_title: 'Assigned') + manager = TimesliceManager.new(course) + manager.create_timeslices_for_new_course_wiki_records([enwiki]) + + create(:article_course_timeslice, course:, article: random_article, + start: course.start + 1.day) + create(:article_course_timeslice, course:, article: assigned_article, + start: course.start) + end + + it 'sets timeslices as needs_update for new scoped articles' do + described_class.new(course).run + + expect(course.course_wiki_timeslices.where(needs_update: true).count).to eq(1) + expect(course.course_wiki_timeslices.find_by(needs_update: true).start).to eq(course.start) + expect(course.article_course_timeslices.where(article_id: assigned_article.id).count).to eq(0) + end + + it 'sets timeslices as needs_update for old scoped articles' do + create(:articles_course, course:, article: assigned_article) + create(:articles_course, course:, article: random_article) + described_class.new(course).run + + expect(course.course_wiki_timeslices.where(needs_update: true).count).to eq(1) + expect(course.course_wiki_timeslices.find_by(needs_update: true).start) + .to eq(course.start + 1.day) + expect(course.article_course_timeslices.where(article_id: random_article.id).count).to eq(0) + expect(course.articles_courses.count).to eq(1) + expect(course.articles_courses.where(article_id: random_article).count).to eq(0) + end + end + + context 'for basic course' do + before do + enwiki = Wiki.get_or_create(language: 'en', project: 'wikipedia') + user = create(:user, username: 'Ragesoss') + create(:assignment, user_id: user.id, course_id: basic_course.id, article_id: 2, + article_title: 'Assigned') + manager = TimesliceManager.new(basic_course) + manager.create_timeslices_for_new_course_wiki_records([enwiki]) + + create(:article_course_timeslice, course: basic_course, article: random_article, + start: basic_course.start + 1.day) + create(:article_course_timeslice, course: basic_course, article: assigned_article, + start: basic_course.start) + end + + it 'returns prematurely' do + create(:articles_course, course: basic_course, article: assigned_article) + create(:articles_course, course: basic_course, article: random_article) + described_class.new(basic_course).run + + expect(basic_course.course_wiki_timeslices.where(needs_update: true).count).to eq(0) + expect(basic_course.article_course_timeslices.count).to eq(2) + expect(basic_course.articles_courses.count).to eq(2) + end + end +end