From 0eced1432bd04cc29246b31597e25c24396916c7 Mon Sep 17 00:00:00 2001 From: gabina Date: Thu, 9 Jan 2025 17:37:36 -0300 Subject: [PATCH 1/6] Create articles records for all revisions even when filtering revisions according to course type --- lib/revision_data_manager.rb | 15 ++-- spec/lib/revision_data_manager_spec.rb | 95 ++++++++++++++++---------- 2 files changed, 69 insertions(+), 41 deletions(-) diff --git a/lib/revision_data_manager.rb b/lib/revision_data_manager.rb index 7bcc733ba7..13f87e9c01 100644 --- a/lib/revision_data_manager.rb +++ b/lib/revision_data_manager.rb @@ -21,13 +21,14 @@ def initialize(wiki, course, update_service: nil) # Returns an array of Revision records. # As a side effect, it imports Article records. def fetch_revision_data_for_course(timeslice_start, timeslice_end) - sub_data = get_course_revisions(@course.students, timeslice_start, timeslice_end) + all_sub_data, sub_data = get_course_revisions(@course.students, timeslice_start, timeslice_end) @revisions = [] # Extract all article data from the slice. Outputs a hash with article attrs. - articles = sub_data_to_article_attributes(sub_data) + articles = sub_data_to_article_attributes(all_sub_data) # Import articles. We do this here to avoid saving article data in memory. + # Note that we create articles for all sub data (not only for filtered ones). ArticleImporter.new(@wiki).import_articles_from_revision_data(articles) @articles = Article.where(wiki_id: @wiki.id, mw_page_id: articles.map { |a| a['mw_page_id'] }) @@ -52,7 +53,7 @@ def fetch_revision_data_for_course(timeslice_start, timeslice_end) # This method gets revisions for some specific users. # It does not fetch scores. It has no side effects. def fetch_revision_data_for_users(users, timeslice_start, timeslice_end) - sub_data = get_course_revisions(users, timeslice_start, timeslice_end) + _, sub_data = get_course_revisions(users, timeslice_start, timeslice_end) users = user_dict_from_sub_data(sub_data) sub_data_to_revision_attributes(sub_data, users) @@ -63,11 +64,15 @@ def fetch_revision_data_for_users(users, timeslice_start, timeslice_end) ########### private + # Returns a list of revisions for users during the given period: + # [all_sub_data, sub_data]. + # - all_sub_data: all revisions within the period. + # - sub_data: revisions filtered based on the course type. def get_course_revisions(users, start, end_date) all_sub_data = get_revisions(users, start, end_date) - # Filter revisions based on the article type. + # Filter revisions based on the course type. # Important for ArticleScopedProgram/VisitingScholarship courses - @course.filter_revisions(all_sub_data) + [all_sub_data, @course.filter_revisions(all_sub_data)] end # Get revisions made by a set of users between two dates. diff --git a/spec/lib/revision_data_manager_spec.rb b/spec/lib/revision_data_manager_spec.rb index fca35c049a..45075025d0 100644 --- a/spec/lib/revision_data_manager_spec.rb +++ b/spec/lib/revision_data_manager_spec.rb @@ -19,6 +19,50 @@ 'title' => 'Ragesoss/citing_sources', 'namespace' => '2' }] end + let(:revision_data2) do + [{ 'mw_page_id' => '777', + 'wiki_id' => 1, + 'title' => 'Ragesoss/citing_sources', + 'namespace' => '4' }] + end + let(:sub_data) { [data1] } + let(:data1) do + [ + '112', + { + 'article' => { + 'mw_page_id' => '777', + 'title' => 'Ragesoss/citing_sources', + 'namespace' => '4', + 'wiki_id' => 1 + }, + 'revisions' => [ + { 'mw_rev_id' => '849116430', 'date' => '20180706', 'characters' => '569', + 'mw_page_id' => '777', 'username' => 'Ragesoss', 'new_article' => 'false', + 'system' => 'false', 'wiki_id' => 1 } + ] + } + ] + end + let(:data2) do + [ + '789', + { + 'article' => { + 'mw_page_id' => '123', + 'title' => 'Draft article', + 'namespace' => '118', + 'wiki_id' => 1 + }, + 'revisions' => [ + { 'mw_rev_id' => '456', 'date' => '20180706', 'characters' => '569', + 'mw_page_id' => '123', 'username' => 'Ragesoss', 'new_article' => 'false', + 'system' => 'false', 'wiki_id' => 1 } + ] + } + ] + end + let(:filtered_sub_data) { [] } before do create(:courses_user, course:, user:) @@ -62,42 +106,7 @@ end it 'only calculates revisions scores for articles in mainspace, userspace or draftspace' do - allow(instance_class).to receive(:get_revisions).and_return( - [ - [ - '112', - { - 'article' => { - 'mw_page_id' => '777', - 'title' => 'Some title', - 'namespace' => '4', - 'wiki_id' => 1 - }, - 'revisions' => [ - { 'mw_rev_id' => '849116430', 'date' => '20180706', 'characters' => '569', - 'mw_page_id' => '777', 'username' => 'Ragesoss', 'new_article' => 'false', - 'system' => 'false', 'wiki_id' => 1 } - ] - } - ], - [ - '789', - { - 'article' => { - 'mw_page_id' => '123', - 'title' => 'Draft article', - 'namespace' => '118', - 'wiki_id' => 1 - }, - 'revisions' => [ - { 'mw_rev_id' => '456', 'date' => '20180706', 'characters' => '569', - 'mw_page_id' => '123', 'username' => 'Ragesoss', 'new_article' => 'false', - 'system' => 'false', 'wiki_id' => 1 } - ] - } - ] - ] - ) + allow(instance_class).to receive(:get_revisions).and_return([data1, data2]) VCR.use_cassette 'revision_importer/all' do revisions = subject # Returns all revisions @@ -117,6 +126,20 @@ subject end end + + it 'creates articles for all revisions even for article scoped programs' do + allow_any_instance_of(described_class).to receive(:get_course_revisions) + .and_return([sub_data, filtered_sub_data]) + + article_importer = instance_double(ArticleImporter) + allow(ArticleImporter).to receive(:new).and_return(article_importer) + + expect(article_importer).to receive(:import_articles_from_revision_data) + .once + .with(revision_data2) + + subject + end end describe '#fetch_revision_data_for_users' do From 4bec05d8a8272571997f02a47f198962121527bd Mon Sep 17 00:00:00 2001 From: gabina Date: Fri, 10 Jan 2025 19:28:45 -0300 Subject: [PATCH 2/6] Proof con concept for filtering revisions --- app/models/course_data/articles_courses.rb | 1 + app/models/course_user_wiki_timeslice.rb | 1 + app/models/course_wiki_timeslice.rb | 1 + lib/revision_data_manager.rb | 22 +++++++++++++++++----- spec/lib/revision_data_manager_spec.rb | 10 +++++++--- 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/app/models/course_data/articles_courses.rb b/app/models/course_data/articles_courses.rb index cd8e4cd9e0..8ea5c40620 100644 --- a/app/models/course_data/articles_courses.rb +++ b/app/models/course_data/articles_courses.rb @@ -182,6 +182,7 @@ def self.update_from_course(course) end def self.update_from_course_revisions(course, revisions) + revisions = revisions.reject { |r| r.views.zero? } course_article_ids = course.articles.where(wiki: course.wikis).pluck(:id) revision_article_ids = article_ids_by_namespaces_from_revisions(course, revisions) diff --git a/app/models/course_user_wiki_timeslice.rb b/app/models/course_user_wiki_timeslice.rb index 1f9cd291ae..cab127be95 100644 --- a/app/models/course_user_wiki_timeslice.rb +++ b/app/models/course_user_wiki_timeslice.rb @@ -45,6 +45,7 @@ class CourseUserWikiTimeslice < ApplicationRecord # {:start=>"20160320", :end=>"20160401", :revisions=>[...]}, # updates the article course timeslices based on the revisions. def self.update_course_user_wiki_timeslices(course, user_id, wiki, revisions) + revisions[:revisions] = revisions[:revisions].reject { |r| r.views.zero? } rev_start = revisions[:start] rev_end = revisions[:end] # Timeslice dates to update are determined based on course wiki timeslices diff --git a/app/models/course_wiki_timeslice.rb b/app/models/course_wiki_timeslice.rb index 2f08a6e94e..6d19d03a7e 100644 --- a/app/models/course_wiki_timeslice.rb +++ b/app/models/course_wiki_timeslice.rb @@ -48,6 +48,7 @@ class CourseWikiTimeslice < ApplicationRecord # {:start=>"20160320", :end=>"20160401", :revisions=>[...]}, # updates the course wiki timeslices based on the revisions. def self.update_course_wiki_timeslices(course, wiki, revisions) + revisions[:revisions] = revisions[:revisions].reject { |r| r.views.zero? } rev_start = revisions[:start] rev_end = revisions[:end] # Course wiki timeslices to update diff --git a/lib/revision_data_manager.rb b/lib/revision_data_manager.rb index 13f87e9c01..666a72449c 100644 --- a/lib/revision_data_manager.rb +++ b/lib/revision_data_manager.rb @@ -33,12 +33,13 @@ def fetch_revision_data_for_course(timeslice_start, timeslice_end) @articles = Article.where(wiki_id: @wiki.id, mw_page_id: articles.map { |a| a['mw_page_id'] }) # Prep: get a user dictionary for all users referred to by revisions. - users = user_dict_from_sub_data(sub_data) + users = user_dict_from_sub_data(all_sub_data) # Now get all the revisions # We need a slightly different article dictionary format here article_dict = @articles.each_with_object({}) { |a, memo| memo[a.mw_page_id] = a.id } - @revisions = sub_data_to_revision_attributes(sub_data, users, articles: article_dict) + @revisions = sub_data_to_revision_attributes(all_sub_data, users, filtered: sub_data, +articles: article_dict) # TODO: resolve duplicates # DuplicateArticleDeleter.new(@wiki).resolve_duplicates(@articles) @@ -116,8 +117,8 @@ def user_dict_from_sub_data(sub_data) User.where(username: users).pluck(:username, :id).to_h end - def sub_data_to_revision_attributes(sub_data, users, articles: nil) - sub_data.flat_map do |_a_id, article_data| + def sub_data_to_revision_attributes(all_sub_data, users, filtered: nil, articles: nil) + all_sub_data.flat_map do |_a_id, article_data| article_data['revisions'].map do |rev_data| mw_page_id = rev_data['mw_page_id'].to_i article_id = articles.nil? ? nil : articles[mw_page_id] @@ -129,12 +130,23 @@ def sub_data_to_revision_attributes(sub_data, users, articles: nil) user_id: users[rev_data['username']], new_article: string_to_boolean(rev_data['new_article']), system: string_to_boolean(rev_data['system']), - wiki_id: rev_data['wiki_id'] + wiki_id: rev_data['wiki_id'], + views: revision_filtered?(filtered, mw_page_id, rev_data['mw_rev_id']) }) end end.uniq(&:mw_rev_id) end + def revision_filtered?(data, mw_page_id, mw_rev_id) + return false if data.nil? + data.any? do |_, entry| + next unless entry.is_a?(Hash) && entry['article'] && entry['revisions'] + + entry['article']['mw_page_id'] == mw_page_id.to_s && + entry['revisions'].any? { |rev| rev['mw_rev_id'] == mw_rev_id.to_s } + end + end + # Partition revisions between those belonging to articles in/out of mainspace/userspace/draftspace # We need this to avoid calculating scores for articles out of pertinent spaces # Returns [revisions_in_spaces, revisions_out_spaces] diff --git a/spec/lib/revision_data_manager_spec.rb b/spec/lib/revision_data_manager_spec.rb index 45075025d0..6dee9b4d14 100644 --- a/spec/lib/revision_data_manager_spec.rb +++ b/spec/lib/revision_data_manager_spec.rb @@ -23,9 +23,13 @@ [{ 'mw_page_id' => '777', 'wiki_id' => 1, 'title' => 'Ragesoss/citing_sources', - 'namespace' => '4' }] + 'namespace' => '4' }, + { 'mw_page_id' => '123', + 'wiki_id' => 1, + 'title' => 'Draft article', + 'namespace' => '118' }] end - let(:sub_data) { [data1] } + let(:sub_data) { [data1, data2] } let(:data1) do [ '112', @@ -62,7 +66,7 @@ } ] end - let(:filtered_sub_data) { [] } + let(:filtered_sub_data) { [data1] } before do create(:courses_user, course:, user:) From a7b2ff81e6dc31adea7c0f4509004508fa4dae00 Mon Sep 17 00:00:00 2001 From: gabina Date: Mon, 13 Jan 2025 13:57:39 -0300 Subject: [PATCH 3/6] =?UTF-8?q?Add=20UpdateTimeslicesScopedArticle=20to=20?= =?UTF-8?q?handle=20changes=20in=20categories=20and=20assignments=20for?= =?UTF-8?q?=C2=A0ArticleScopedProgram=C2=A0and=C2=A0VisitingScholarship?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../course_types/visiting_scholarship.rb | 4 + .../update_timeslices_scoped_article.rb | 49 +++++++++++++ lib/timeslice_manager.rb | 4 +- .../update_timeslices_scoped_article_spec.rb | 73 +++++++++++++++++++ 4 files changed, 128 insertions(+), 2 deletions(-) create mode 100644 app/services/update_timeslices_scoped_article.rb create mode 100644 spec/services/update_timeslices_scoped_article_spec.rb diff --git a/app/models/course_types/visiting_scholarship.rb b/app/models/course_types/visiting_scholarship.rb index 6eee1657f7..971cf4e4f2 100644 --- a/app/models/course_types/visiting_scholarship.rb +++ b/app/models/course_types/visiting_scholarship.rb @@ -89,4 +89,8 @@ def filter_revisions(revisions) def scoped_article_titles assignments.pluck(:article_title) end + + def scoped_article_ids + assignments.pluck(:article_id) + end end diff --git a/app/services/update_timeslices_scoped_article.rb b/app/services/update_timeslices_scoped_article.rb new file mode 100644 index 0000000000..e28933eb6f --- /dev/null +++ b/app/services/update_timeslices_scoped_article.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +require_dependency "#{Rails.root}/lib/timeslice_manager" +require_dependency "#{Rails.root}/lib/articles_courses_cleaner_timeslice" +require_dependency "#{Rails.root}/lib/revision_data_manager" + +# Set as 'needs_update' timeslices associated to new scoped articles +# or to articles that are not scoped anymore due to changes in assignments +# and categories. +# Only for ArticleScopedProgram and VisitingScholarship courses +class UpdateTimeslicesScopedArticle + def initialize(course) + @course = course + @timeslice_manager = TimesliceManager.new(course) + @scoped_article_ids = course.scoped_article_ids + end + + def run + return unless %w[ArticleScopedProgram VisitingScholarship].include? @course.type + # Get the scoped articles that don't have articles courses but do have ac timeslices + articles_with_timeslices = @course.article_course_timeslices + .where(article_id: @scoped_article_ids) + .pluck(:article_id) + + tracked_articles = @course.articles_courses + .where(article_id: @scoped_article_ids) + .pluck(:article_id) + + new_articles = articles_with_timeslices - tracked_articles + reset(new_articles) + + # Get not-scoped articles with article course records + old_articles = @course.articles_courses + .where.not(article_id: @scoped_article_ids) + .pluck(:article_id) + + reset(old_articles) + end + + private + + def reset(article_ids) + return if article_ids.empty? + + # Mark course wiki timeslices to be re-proccesed + articles = Article.where(id: article_ids) + ArticlesCoursesCleanerTimeslice.reset_specific_articles(@course, articles) + end +end diff --git a/lib/timeslice_manager.rb b/lib/timeslice_manager.rb index d88301c527..b60a107fec 100644 --- a/lib/timeslice_manager.rb +++ b/lib/timeslice_manager.rb @@ -157,8 +157,8 @@ def update_last_mw_rev_datetime(new_fetched_data) # Resets course wiki timeslices. This involves: # - Marking timeslices as needs_update for dates with associated article course timeslices - # - Deleting given article course timeslices - # - Deleting course wiki timeslcies for those dates and wikis + # - Deleting given article course timeslices if no soft + # - Deleting course user wiki timeslices for those dates and wikis # Takes a collection of article course timeslices def reset_timeslices_that_need_update_from_article_timeslices(timeslices, wiki: nil, diff --git a/spec/services/update_timeslices_scoped_article_spec.rb b/spec/services/update_timeslices_scoped_article_spec.rb new file mode 100644 index 0000000000..b5c2729c90 --- /dev/null +++ b/spec/services/update_timeslices_scoped_article_spec.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +require 'rails_helper' + +describe UpdateTimeslicesScopedArticle do + let(:course) { create(:article_scoped_program) } + let(:basic_course) { create(:course) } + let(:assigned_article) { create(:article, title: 'Assigned', id: 2, namespace: 0) } + let(:random_article) { create(:article, title: 'Random', id: 1, namespace: 0) } + + context 'for ArticleScopedProgram' do + before do + enwiki = Wiki.get_or_create(language: 'en', project: 'wikipedia') + user = create(:user, username: 'Ragesoss') + create(:assignment, user_id: user.id, course_id: course.id, article_id: 2, + article_title: 'Assigned') + manager = TimesliceManager.new(course) + manager.create_timeslices_for_new_course_wiki_records([enwiki]) + + create(:article_course_timeslice, course:, article: random_article, + start: course.start + 1.day) + create(:article_course_timeslice, course:, article: assigned_article, + start: course.start) + end + + it 'sets timeslices as needs_update for new scoped articles' do + described_class.new(course).run + + expect(course.course_wiki_timeslices.where(needs_update: true).count).to eq(1) + expect(course.course_wiki_timeslices.find_by(needs_update: true).start).to eq(course.start) + expect(course.article_course_timeslices.where(article_id: assigned_article.id).count).to eq(0) + end + + it 'sets timeslices as needs_update for old scoped articles' do + create(:articles_course, course:, article: assigned_article) + create(:articles_course, course:, article: random_article) + described_class.new(course).run + + expect(course.course_wiki_timeslices.where(needs_update: true).count).to eq(1) + expect(course.course_wiki_timeslices.find_by(needs_update: true).start) + .to eq(course.start + 1.day) + expect(course.article_course_timeslices.where(article_id: random_article.id).count).to eq(0) + expect(course.articles_courses.count).to eq(1) + expect(course.articles_courses.where(article_id: random_article).count).to eq(0) + end + end + + context 'for basic course' do + before do + enwiki = Wiki.get_or_create(language: 'en', project: 'wikipedia') + user = create(:user, username: 'Ragesoss') + create(:assignment, user_id: user.id, course_id: basic_course.id, article_id: 2, + article_title: 'Assigned') + manager = TimesliceManager.new(basic_course) + manager.create_timeslices_for_new_course_wiki_records([enwiki]) + + create(:article_course_timeslice, course: basic_course, article: random_article, + start: basic_course.start + 1.day) + create(:article_course_timeslice, course: basic_course, article: assigned_article, + start: basic_course.start) + end + + it 'returns prematurely' do + create(:articles_course, course: basic_course, article: assigned_article) + create(:articles_course, course: basic_course, article: random_article) + described_class.new(basic_course).run + + expect(basic_course.course_wiki_timeslices.where(needs_update: true).count).to eq(0) + expect(basic_course.article_course_timeslices.count).to eq(2) + expect(basic_course.articles_courses.count).to eq(2) + end + end +end From edf979bbc4df6a8c2fc1634369f15b2123102e16 Mon Sep 17 00:00:00 2001 From: gabina Date: Mon, 13 Jan 2025 13:58:26 -0300 Subject: [PATCH 4/6] Start using UpdateTimeslicesScopedArticle class --- app/services/prepare_timeslices.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/services/prepare_timeslices.rb b/app/services/prepare_timeslices.rb index 6aafd17b4d..bdde16e712 100644 --- a/app/services/prepare_timeslices.rb +++ b/app/services/prepare_timeslices.rb @@ -35,6 +35,7 @@ def adjust_timeslices UpdateTimeslicesCourseUser.new(@course).run UpdateTimeslicesUntrackedArticle.new(@course).run UpdateTimeslicesCourseDate.new(@course).run + UpdateTimeslicesScopedArticle.new(@course).run @debugger.log_update_progress :adjust_timeslices_end end end From 077e368b0c3efcbb2300ee05e4c66e419bbc9ef5 Mon Sep 17 00:00:00 2001 From: gabina Date: Mon, 13 Jan 2025 16:34:33 -0300 Subject: [PATCH 5/6] Refactor RevisionDataManager class --- lib/revision_data_manager.rb | 62 +++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/lib/revision_data_manager.rb b/lib/revision_data_manager.rb index 666a72449c..5849b83838 100644 --- a/lib/revision_data_manager.rb +++ b/lib/revision_data_manager.rb @@ -21,14 +21,15 @@ def initialize(wiki, course, update_service: nil) # Returns an array of Revision records. # As a side effect, it imports Article records. def fetch_revision_data_for_course(timeslice_start, timeslice_end) - all_sub_data, sub_data = get_course_revisions(@course.students, timeslice_start, timeslice_end) + all_sub_data, scoped_sub_data = get_course_revisions(@course.students, timeslice_start, + timeslice_end) @revisions = [] # Extract all article data from the slice. Outputs a hash with article attrs. articles = sub_data_to_article_attributes(all_sub_data) # Import articles. We do this here to avoid saving article data in memory. - # Note that we create articles for all sub data (not only for filtered ones). + # Note that we create articles for all sub data (not only for scoped revisions). ArticleImporter.new(@wiki).import_articles_from_revision_data(articles) @articles = Article.where(wiki_id: @wiki.id, mw_page_id: articles.map { |a| a['mw_page_id'] }) @@ -38,8 +39,10 @@ def fetch_revision_data_for_course(timeslice_start, timeslice_end) # Now get all the revisions # We need a slightly different article dictionary format here article_dict = @articles.each_with_object({}) { |a, memo| memo[a.mw_page_id] = a.id } - @revisions = sub_data_to_revision_attributes(all_sub_data, users, filtered: sub_data, -articles: article_dict) + @revisions = sub_data_to_revision_attributes(all_sub_data, + users, + scoped_sub_data:, + articles: article_dict) # TODO: resolve duplicates # DuplicateArticleDeleter.new(@wiki).resolve_duplicates(@articles) @@ -54,10 +57,10 @@ def fetch_revision_data_for_course(timeslice_start, timeslice_end) # This method gets revisions for some specific users. # It does not fetch scores. It has no side effects. def fetch_revision_data_for_users(users, timeslice_start, timeslice_end) - _, sub_data = get_course_revisions(users, timeslice_start, timeslice_end) - users = user_dict_from_sub_data(sub_data) + all_sub_data, scoped_sub_data = get_course_revisions(users, timeslice_start, timeslice_end) + users = user_dict_from_sub_data(all_sub_data) - sub_data_to_revision_attributes(sub_data, users) + sub_data_to_revision_attributes(all_sub_data, users, scoped_sub_data:) end ########### @@ -68,7 +71,7 @@ def fetch_revision_data_for_users(users, timeslice_start, timeslice_end) # Returns a list of revisions for users during the given period: # [all_sub_data, sub_data]. # - all_sub_data: all revisions within the period. - # - sub_data: revisions filtered based on the course type. + # - scoped_sub_data: revisions filtered based on the course type. def get_course_revisions(users, start, end_date) all_sub_data = get_revisions(users, start, end_date) # Filter revisions based on the course type. @@ -117,29 +120,18 @@ def user_dict_from_sub_data(sub_data) User.where(username: users).pluck(:username, :id).to_h end - def sub_data_to_revision_attributes(all_sub_data, users, filtered: nil, articles: nil) + # Returns revisions from all_sub_data. + # scoped_sub_data contains filtered data according to the course type. + def sub_data_to_revision_attributes(all_sub_data, users, scoped_sub_data: nil, articles: nil) all_sub_data.flat_map do |_a_id, article_data| article_data['revisions'].map do |rev_data| - mw_page_id = rev_data['mw_page_id'].to_i - article_id = articles.nil? ? nil : articles[mw_page_id] - Revision.new({ - mw_rev_id: rev_data['mw_rev_id'], - date: rev_data['date'], - characters: rev_data['characters'], - article_id:, mw_page_id:, - user_id: users[rev_data['username']], - new_article: string_to_boolean(rev_data['new_article']), - system: string_to_boolean(rev_data['system']), - wiki_id: rev_data['wiki_id'], - views: revision_filtered?(filtered, mw_page_id, rev_data['mw_rev_id']) - }) + create_revision(rev_data, scoped_sub_data, users, articles) end end.uniq(&:mw_rev_id) end - def revision_filtered?(data, mw_page_id, mw_rev_id) - return false if data.nil? - data.any? do |_, entry| + def scoped_revision?(scoped_sub_data, mw_page_id, mw_rev_id) + scoped_sub_data.any? do |_, entry| next unless entry.is_a?(Hash) && entry['article'] && entry['revisions'] entry['article']['mw_page_id'] == mw_page_id.to_s && @@ -147,6 +139,26 @@ def revision_filtered?(data, mw_page_id, mw_rev_id) end end + # Creates a revision record for the given revision data. + # Note that views field is currently used to track if the revision + # is a scoped one. + # TODO: change the field name. Review this + def create_revision(rev_data, scoped_sub_data, users, articles) + mw_page_id = rev_data['mw_page_id'].to_i + Revision.new({ + mw_rev_id: rev_data['mw_rev_id'], + date: rev_data['date'], + characters: rev_data['characters'], + article_id: articles.nil? ? nil : articles[mw_page_id], + mw_page_id:, + user_id: users[rev_data['username']], + new_article: string_to_boolean(rev_data['new_article']), + system: string_to_boolean(rev_data['system']), + wiki_id: rev_data['wiki_id'], + views: scoped_revision?(scoped_sub_data, mw_page_id, rev_data['mw_rev_id']) + }) + end + # Partition revisions between those belonging to articles in/out of mainspace/userspace/draftspace # We need this to avoid calculating scores for articles out of pertinent spaces # Returns [revisions_in_spaces, revisions_out_spaces] From 46dfc1aa67b8ba6021f0a429ab9d8dad884c9150 Mon Sep 17 00:00:00 2001 From: gabina Date: Mon, 13 Jan 2025 16:36:09 -0300 Subject: [PATCH 6/6] Add scoped_revision revision method and fix specs --- app/models/course_data/articles_courses.rb | 2 +- app/models/course_user_wiki_timeslice.rb | 3 +-- app/models/course_wiki_timeslice.rb | 3 +-- app/models/wiki_content/revision.rb | 4 +++ spec/models/articles_courses_spec.rb | 10 +++---- .../models/course_user_wiki_timeslice_spec.rb | 27 ++++++++++++------- spec/models/course_wiki_timeslice_spec.rb | 18 +++++++------ 7 files changed, 39 insertions(+), 28 deletions(-) diff --git a/app/models/course_data/articles_courses.rb b/app/models/course_data/articles_courses.rb index 8ea5c40620..377c336736 100644 --- a/app/models/course_data/articles_courses.rb +++ b/app/models/course_data/articles_courses.rb @@ -182,7 +182,7 @@ def self.update_from_course(course) end def self.update_from_course_revisions(course, revisions) - revisions = revisions.reject { |r| r.views.zero? } + revisions = revisions.select(&:scoped_revision) course_article_ids = course.articles.where(wiki: course.wikis).pluck(:id) revision_article_ids = article_ids_by_namespaces_from_revisions(course, revisions) diff --git a/app/models/course_user_wiki_timeslice.rb b/app/models/course_user_wiki_timeslice.rb index cab127be95..424f1d962c 100644 --- a/app/models/course_user_wiki_timeslice.rb +++ b/app/models/course_user_wiki_timeslice.rb @@ -45,7 +45,6 @@ class CourseUserWikiTimeslice < ApplicationRecord # {:start=>"20160320", :end=>"20160401", :revisions=>[...]}, # updates the article course timeslices based on the revisions. def self.update_course_user_wiki_timeslices(course, user_id, wiki, revisions) - revisions[:revisions] = revisions[:revisions].reject { |r| r.views.zero? } rev_start = revisions[:start] rev_end = revisions[:end] # Timeslice dates to update are determined based on course wiki timeslices @@ -53,7 +52,7 @@ def self.update_course_user_wiki_timeslices(course, user_id, wiki, revisions) .for_revisions_between(rev_start, rev_end) timeslices.each do |timeslice| # Group revisions that belong to the timeslice - revisions_in_timeslice = revisions[:revisions].select do |revision| + revisions_in_timeslice = revisions[:revisions].select(&:scoped_revision).select do |revision| timeslice.start <= revision.date && revision.date < timeslice.end end # Get or create article course timeslice based on course, article_id, diff --git a/app/models/course_wiki_timeslice.rb b/app/models/course_wiki_timeslice.rb index 6d19d03a7e..101caa46c7 100644 --- a/app/models/course_wiki_timeslice.rb +++ b/app/models/course_wiki_timeslice.rb @@ -48,7 +48,6 @@ class CourseWikiTimeslice < ApplicationRecord # {:start=>"20160320", :end=>"20160401", :revisions=>[...]}, # updates the course wiki timeslices based on the revisions. def self.update_course_wiki_timeslices(course, wiki, revisions) - revisions[:revisions] = revisions[:revisions].reject { |r| r.views.zero? } rev_start = revisions[:start] rev_end = revisions[:end] # Course wiki timeslices to update @@ -57,7 +56,7 @@ def self.update_course_wiki_timeslices(course, wiki, revisions) .for_revisions_between(rev_start, rev_end) course_wiki_timeslices.each do |timeslice| # Group revisions that belong to the timeslice - revisions_in_timeslice = revisions[:revisions].select do |revision| + revisions_in_timeslice = revisions[:revisions].select(&:scoped_revision).select do |revision| timeslice.start <= revision.date && revision.date < timeslice.end end # Update cache for CourseWikiTimeslice diff --git a/app/models/wiki_content/revision.rb b/app/models/wiki_content/revision.rb index 59fbe00d28..a3b547f9b8 100644 --- a/app/models/wiki_content/revision.rb +++ b/app/models/wiki_content/revision.rb @@ -107,4 +107,8 @@ def diff_stats rescue JSON::ParserError nil # Return nil if parsing fails (i.e., not diff_stats) end + + def scoped_revision + !views.zero? + end end diff --git a/spec/models/articles_courses_spec.rb b/spec/models/articles_courses_spec.rb index 02619ecb38..6b2b6bc20d 100644 --- a/spec/models/articles_courses_spec.rb +++ b/spec/models/articles_courses_spec.rb @@ -208,15 +208,15 @@ create(:courses_user, user:, course:, role: CoursesUsers::Roles::STUDENT_ROLE) array_revisions << build(:revision, article:, user:, date: '2024-07-07', - system: true, new_article: true) + system: true, new_article: true, views: true) array_revisions << build(:revision, article:, user:, date: '2024-07-06 20:05:10', - system: true, new_article: true) + system: true, new_article: true, views: true) array_revisions << build(:revision, article:, user:, date: '2024-07-06 20:06:11', - system: true, new_article: true) + system: true, new_article: true, views: true) array_revisions << build(:revision, article:, user:, date: '2024-07-08 20:03:01', - system: true, new_article: true) + system: true, new_article: true, views: true) array_revisions << build(:revision, article: article3, user:, date: '2024-07-07', - system: true, new_article: true) + system: true, new_article: true, views: true) # revision for a non-tracked wiki array_revisions << build(:revision, article: article2, user:, date: '2024-07-06') # revision for a non-tracked namespace diff --git a/spec/models/course_user_wiki_timeslice_spec.rb b/spec/models/course_user_wiki_timeslice_spec.rb index f237a3d3a8..f220e6484d 100644 --- a/spec/models/course_user_wiki_timeslice_spec.rb +++ b/spec/models/course_user_wiki_timeslice_spec.rb @@ -55,28 +55,32 @@ characters: 123, features: { 'num_ref' => 8 }, features_previous: { 'num_ref' => 1 }, - user_id: user.id) + user_id: user.id, + views: true) end let(:revision1) do build(:revision, article: talk_page, date: start, characters: 200, features: { 'num_ref' => 12 }, features_previous: { 'num_ref' => 10 }, - user_id: user.id) + user_id: user.id, + views: true) end let(:revision2) do build(:revision, article: sandbox, date: start, characters: -65, features: { 'num_ref' => 1 }, features_previous: { 'num_ref' => 2 }, - user_id: user.id) + user_id: user.id, + views: true) end let(:revision3) do build(:revision, article: draft, date: start, characters: 225, features: { 'num_ref' => 3 }, features_previous: { 'num_ref' => 3 }, - user_id: user.id) + user_id: user.id, + views: true) end let(:revision4) do build(:revision, article:, date: start, @@ -84,7 +88,8 @@ deleted: true, # deleted revision features: { 'num_ref' => 2 }, features_previous: { 'num_ref' => 0 }, - user_id: user.id) + user_id: user.id, + views: true) end let(:revision5) do build(:revision, article_id: -1, # revision for a non-existing article @@ -93,7 +98,8 @@ deleted: false, features: { 'num_ref' => 2 }, features_previous: { 'num_ref' => 0 }, - user_id: user.id) + user_id: user.id, + views: true) end let(:revision6) do build(:revision, article: draft, date: start, @@ -101,7 +107,8 @@ features: { 'num_ref' => 1 }, features_previous: { 'num_ref' => 0 }, user_id: user.id, - system: true) # revision made by the system + system: true, # revision made by the system + views: true) end let(:revisions) { [revision0, revision1, revision2, revision3, revision4, revision5, revision6] } let(:subject) { course_user_wiki_timeslice.update_cache_from_revisions revisions } @@ -109,9 +116,9 @@ describe '.update_course_user_wiki_timeslices' do before do TimesliceManager.new(course).create_timeslices_for_new_course_wiki_records(course.wikis) - revisions << build(:revision, article:, user_id: user.id, date: start + 26.hours) - revisions << build(:revision, article:, user_id: user.id, date: start + 50.hours) - revisions << build(:revision, article:, user_id: user.id, date: start + 51.hours) + revisions << build(:revision, article:, user_id: user.id, date: start + 26.hours, views: true) + revisions << build(:revision, article:, user_id: user.id, date: start + 50.hours, views: true) + revisions << build(:revision, article:, user_id: user.id, date: start + 51.hours, views: true) end it 'creates the right article timeslices based on the revisions' do diff --git a/spec/models/course_wiki_timeslice_spec.rb b/spec/models/course_wiki_timeslice_spec.rb index 3ec40975f7..16e12aae62 100644 --- a/spec/models/course_wiki_timeslice_spec.rb +++ b/spec/models/course_wiki_timeslice_spec.rb @@ -86,19 +86,21 @@ references_count: 4, revision_count: 4) - array_revisions << build(:revision, article:, user_id: 1, date: start) - array_revisions << build(:revision, article:, user_id: 1, date: start + 2.hours) - array_revisions << build(:revision, article:, user_id: 2, date: start + 3.hours) - array_revisions << build(:revision, article:, user_id: 2, date: start + 3.hours, system: true) - array_revisions << build(:revision, article:, deleted: true, user_id: 1, date: start + 8.hours) + array_revisions << build(:revision, article:, user_id: 1, date: start, views: true) + array_revisions << build(:revision, article:, user_id: 1, date: start + 2.hours, views: true) + array_revisions << build(:revision, article:, user_id: 2, date: start + 3.hours, views: true) + array_revisions << build(:revision, article:, user_id: 2, date: start + 3.hours, system: true, + views: true) + array_revisions << build(:revision, article:, deleted: true, user_id: 1, date: start + 8.hours, + views: true) end describe '.update_course_wiki_timeslices' do before do TimesliceManager.new(course).create_timeslices_for_new_course_wiki_records([wiki]) - array_revisions << build(:revision, article:, user_id: 1, date: start + 26.hours) - array_revisions << build(:revision, article:, user_id: 1, date: start + 50.hours) - array_revisions << build(:revision, article:, user_id: 1, date: start + 51.hours) + array_revisions << build(:revision, article:, user_id: 1, date: start + 26.hours, views: true) + array_revisions << build(:revision, article:, user_id: 1, date: start + 50.hours, views: true) + array_revisions << build(:revision, article:, user_id: 1, date: start + 51.hours, views: true) end it 'updates the right course wiki timeslices based on the revisions' do