From 317e64494716281f71daa1c66669b2caa5de5a43 Mon Sep 17 00:00:00 2001 From: Dann Bohn Date: Mon, 8 Jan 2024 10:55:37 -0500 Subject: [PATCH 1/6] removes sidekiq --- Gemfile | 4 +- Gemfile.lock | 40 +++++-------------- config.ru | 13 ------ config/sidekiq.yml | 10 ----- lib/psulib_traject.rb | 2 - lib/psulib_traject/workers/base.rb | 2 - lib/tasks/traject.rake | 7 +--- sidekiq.rb | 7 ---- .../workers/hourly_indexer_spec.rb | 20 ---------- spec/support/sidekiq.rb | 16 -------- 10 files changed, 11 insertions(+), 110 deletions(-) delete mode 100644 config.ru delete mode 100644 config/sidekiq.yml delete mode 100644 sidekiq.rb delete mode 100644 spec/support/sidekiq.rb diff --git a/Gemfile b/Gemfile index 953bb33b..a4a04da1 100644 --- a/Gemfile +++ b/Gemfile @@ -7,10 +7,9 @@ gem 'library_stdnums' gem 'mail' gem 'marc' gem 'rake' +gem 'redis' gem 'rsolr' gem 'shelvit' -gem 'sidekiq', '~> 6.5' -gem 'sidekiq-scheduler', '~> 4.0' gem 'traject' gem 'traject-marc4j_reader', platform: :jruby gem 'whenever', require: false @@ -26,7 +25,6 @@ group :development, :test do gem 'pry-debugger-jruby', platform: :jruby gem 'rspec' gem 'rspec-its' - gem 'rspec-sidekiq' gem 'rubocop', '~> 1.5' gem 'rubocop-performance', '~> 1.1' gem 'rubocop-rspec', '~> 2' diff --git a/Gemfile.lock b/Gemfile.lock index 30dbc9b8..0e3c9af3 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -55,8 +55,6 @@ GEM dry-equalizer (~> 0.2) dry-initializer (~> 3.0) dry-schema (~> 1.5, >= 1.5.2) - et-orbi (1.2.7) - tzinfo faker (3.2.0) i18n (>= 1.8.11, < 2) faraday (1.4.2) @@ -77,9 +75,6 @@ GEM ffi-compiler (1.0.1) ffi (>= 1.0.0) rake - fugit (1.9.0) - et-orbi (~> 1, >= 1.2.7) - raabro (~> 1.4) hashdiff (1.0.1) hashie (4.1.0) http (4.4.1) @@ -130,20 +125,21 @@ GEM coderay (~> 1.1) method_source (~> 1.0) spoon (~> 0.0) - pry-byebug (3.9.0) + pry-byebug (3.10.1) byebug (~> 11.0) - pry (~> 0.13.0) + pry (>= 0.13, < 0.15) pry-debugger-jruby (2.0.0-java) pry (>= 0.13, < 0.14) ruby-debug-base (>= 0.10.4, < 0.12) public_suffix (4.0.6) - raabro (1.4.0) racc (1.7.2) racc (1.7.2-java) - rack (2.2.6.4) rainbow (3.1.1) rake (13.0.3) - redis (4.8.1) + redis (5.0.8) + redis-client (>= 0.17.0) + redis-client (0.19.1) + connection_pool regexp_parser (2.8.0) rexml (3.2.5) rsolr (2.3.0) @@ -164,9 +160,6 @@ GEM rspec-mocks (3.10.2) diff-lcs (>= 1.2.0, < 2.0) rspec-support (~> 3.10.0) - rspec-sidekiq (3.1.0) - rspec-core (~> 3.0, >= 3.0.0) - sidekiq (>= 2.4.0) rspec-support (3.10.2) rubocop (1.52.0) json (~> 2.3) @@ -194,20 +187,9 @@ GEM ruby-debug-base (0.11.0-java) ruby-progressbar (1.13.0) ruby2_keywords (0.0.4) - rufus-scheduler (3.9.1) - fugit (~> 1.1, >= 1.1.6) scrub_rb (1.0.1) shelvit (0.1.2) lcsort (~> 0.9) - sidekiq (6.5.12) - connection_pool (>= 2.2.5, < 3) - rack (~> 2.0) - redis (>= 4.5.0, < 5) - sidekiq-scheduler (4.0.3) - redis (>= 4.2.0) - rufus-scheduler (~> 3.2) - sidekiq (>= 4, < 7) - tilt (>= 1.4.0) simplecov (0.17.1) docile (~> 1.1) json (>= 1.8, < 3) @@ -216,7 +198,6 @@ GEM slop (4.9.1) spoon (0.0.6) ffi - tilt (2.3.0) traject (3.5.0) concurrent-ruby (>= 0.8.0) dot-properties (>= 0.1.1) @@ -231,12 +212,10 @@ GEM traject-marc4j_reader (1.1.0-java) marc (~> 1.0) marc-marc4j (~> 1.0) - tzinfo (2.0.6) - concurrent-ruby (~> 1.0) unf (0.1.4) unf_ext unf (0.1.4-java) - unf_ext (0.0.7.7) + unf_ext (0.0.9.1) unicode-display_width (2.4.2) webmock (3.13.0) addressable (>= 2.3.6) @@ -247,6 +226,7 @@ GEM yell (2.2.2) PLATFORMS + universal-java-1.8 universal-java-10 universal-java-11 universal-java-14 @@ -266,16 +246,14 @@ DEPENDENCIES pry-byebug pry-debugger-jruby rake + redis rsolr rspec rspec-its - rspec-sidekiq rubocop (~> 1.5) rubocop-performance (~> 1.1) rubocop-rspec (~> 2) shelvit - sidekiq (~> 6.5) - sidekiq-scheduler (~> 4.0) simplecov (< 0.18) traject traject-marc4j_reader diff --git a/config.ru b/config.ru deleted file mode 100644 index d4686b19..00000000 --- a/config.ru +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -require 'sidekiq/web' -require 'sidekiq-scheduler/web' - -Sidekiq.configure_client do |config| - config.redis = { size: 1 } -end - -use Rack::Session::Cookie, secret: ENV.fetch('SESSION_KEY', nil), same_site: true, max_age: 86400 -map '/sidekiq' do - run Sidekiq::Web -end diff --git a/config/sidekiq.yml b/config/sidekiq.yml deleted file mode 100644 index 25b75bf1..00000000 --- a/config/sidekiq.yml +++ /dev/null @@ -1,10 +0,0 @@ ---- -:verbose: false -:concurrency: <%= ENV.fetch("SIDEKIQ_CONCURRENCY", "1") %> -:timeout: 25 -:max_retries: <%= ENV.fetch("SIDEKIQ_RETRIES", "3") %> -:schedule: - hourlies: - cron: '3 0,7-23 * * *' - class: 'PsulibTraject::Workers::HourlyIndexer' - diff --git a/lib/psulib_traject.rb b/lib/psulib_traject.rb index 7142404e..0f4924c8 100644 --- a/lib/psulib_traject.rb +++ b/lib/psulib_traject.rb @@ -6,8 +6,6 @@ require 'library_stdnums' require 'redis' require 'shelvit' -require 'sidekiq' -require 'sidekiq-scheduler' require 'traject' require 'traject/macros/marc21_semantics' require 'yaml' diff --git a/lib/psulib_traject/workers/base.rb b/lib/psulib_traject/workers/base.rb index 7923f582..037a410c 100644 --- a/lib/psulib_traject/workers/base.rb +++ b/lib/psulib_traject/workers/base.rb @@ -3,8 +3,6 @@ module PsulibTraject module Workers class Base - include Sidekiq::Worker - def self.perform_now(*args) new.perform(*args) end diff --git a/lib/tasks/traject.rake b/lib/tasks/traject.rake index b536397a..f8ffda71 100644 --- a/lib/tasks/traject.rake +++ b/lib/tasks/traject.rake @@ -1,12 +1,7 @@ # frozen_string_literal: true namespace :traject do - desc 'Index a file or folder of files async with sidekiq' - task :index_async, [:path, :collection] do |_task, args| - PsulibTraject::Workers::Indexer.perform_async(args.path, args.collection) - end - - desc 'Index a file or folder of files without sidekiq' + desc 'Index a file or folder of files' task :index, [:path] do |_task, args| PsulibTraject::Workers::Indexer.perform_now(args.path, args.collection) end diff --git a/sidekiq.rb b/sidekiq.rb deleted file mode 100644 index 9ce171da..00000000 --- a/sidekiq.rb +++ /dev/null @@ -1,7 +0,0 @@ -# frozen_string_literal: true - -require 'pathname' - -$LOAD_PATH.prepend(Pathname.pwd.join('lib').to_s) -require 'sidekiq' -require 'psulib_traject' diff --git a/spec/lib/psulib_traject/workers/hourly_indexer_spec.rb b/spec/lib/psulib_traject/workers/hourly_indexer_spec.rb index 64d5279f..8e86385c 100644 --- a/spec/lib/psulib_traject/workers/hourly_indexer_spec.rb +++ b/spec/lib/psulib_traject/workers/hourly_indexer_spec.rb @@ -42,17 +42,6 @@ ) end - it 'submits jobs for each hourly file' do - indexer.perform_async - expect(indexer).to have_enqueued_sidekiq_job - end - - it 'increases the size of the job queue' do - expect { - indexer.perform_async - }.to change(indexer.jobs, :size).by(1) - end - it 'performs Indexer jobs' do indexer.perform_now expect(WebMock).to have_requested( @@ -67,15 +56,6 @@ :post, "http://#{ConfigSettings.solr.host}:#{ConfigSettings.solr.port}/solr/psul_catalog/update/json" ) .with(body: '{"delete":"1235"}').times(1) - expect(PsulibTraject::Workers::Indexer.jobs.size).to eq(0) - end - - it 'does not perform the job a second time' do - indexer.perform_now - expect(WebMock).to have_requested( - :post, "http://#{ConfigSettings.solr.host}:#{ConfigSettings.solr.port}/solr/psul_catalog/update/json" - ).times(0) - expect(PsulibTraject::Workers::Indexer.jobs.size).to eq(0) end end end diff --git a/spec/support/sidekiq.rb b/spec/support/sidekiq.rb deleted file mode 100644 index 584cc6cd..00000000 --- a/spec/support/sidekiq.rb +++ /dev/null @@ -1,16 +0,0 @@ -# frozen_string_literal: true - -require 'rspec-sidekiq' -require 'sidekiq/testing' -require 'sidekiq/testing/inline' - -Sidekiq::Testing.fake! - -RSpec::Sidekiq.configure do |conf| - # Clears all job queues before each example - conf.clear_all_enqueued_jobs = true # default => true - # Whether to use terminal colours when outputting messages - conf.enable_terminal_colours = true # default => true - # Warn when jobs are not enqueued to Redis but to a job array - conf.warn_when_jobs_not_processed_by_sidekiq = false # default => true -end From 8a9ffc2344b4970a3dbf0d396448be0bf948589d Mon Sep 17 00:00:00 2001 From: Dann Bohn Date: Mon, 8 Jan 2024 11:16:41 -0500 Subject: [PATCH 2/6] hourlies now incremental --- config/settings.yml | 10 +++++----- lib/psulib_traject.rb | 2 +- ...{hourly_indexer.rb => incremental_indexer.rb} | 16 ++++++++-------- lib/tasks/traject.rake | 10 +++++----- ...dexer_spec.rb => incremental_indexer_spec.rb} | 2 +- 5 files changed, 20 insertions(+), 20 deletions(-) rename lib/psulib_traject/workers/{hourly_indexer.rb => incremental_indexer.rb} (79%) rename spec/lib/psulib_traject/workers/{hourly_indexer_spec.rb => incremental_indexer_spec.rb} (96%) diff --git a/config/settings.yml b/config/settings.yml index 3073d684..ac7cf094 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -15,11 +15,11 @@ solr_writer: reader_class_name: PsulibTraject::MarcCombiningReader commit_timeout: 10000 symphony_data_path: "<%= ENV.fetch('SYMPHONY_DATA_PATH', '/data/symphony_data') %>" -# Folder within `symphony_data_path` where hourlies are stored -symphony_hourlies_subdir: hourlies -# How long to keep the hourlies skip keys -# we keep hourlies for 7 days, so we keep the lock for 10 to overlap -hourlies_skip_expire_seconds: 864000 +# Folder within `symphony_data_path` where incrementals are stored +symphony_incremental_subdir: hourlies +# How long to keep the incrementals skip keys +# we keep incrementals for 7 days, so we keep the lock for 10 to overlap +incremental_skip_expire_seconds: 864000 hathi_etas: false hathi_overlap_path: "<%= ENV.fetch('HATHI_OVERLAP_PATH', '/data/hathitrust_data/overlap.tsv') %>" marc4j_reader: diff --git a/lib/psulib_traject.rb b/lib/psulib_traject.rb index 0f4924c8..b0162a7d 100644 --- a/lib/psulib_traject.rb +++ b/lib/psulib_traject.rb @@ -38,7 +38,7 @@ module PsulibTraject require 'psulib_traject/solr_manager' require 'psulib_traject/subject_heading' require 'psulib_traject/workers/base' - require 'psulib_traject/workers/hourly_indexer' + require 'psulib_traject/workers/incremental_indexer' require 'psulib_traject/workers/indexer' Config.setup do |config| diff --git a/lib/psulib_traject/workers/hourly_indexer.rb b/lib/psulib_traject/workers/incremental_indexer.rb similarity index 79% rename from lib/psulib_traject/workers/hourly_indexer.rb rename to lib/psulib_traject/workers/incremental_indexer.rb index 32672515..ff96232a 100644 --- a/lib/psulib_traject/workers/hourly_indexer.rb +++ b/lib/psulib_traject/workers/incremental_indexer.rb @@ -2,22 +2,22 @@ module PsulibTraject module Workers - class HourlyIndexer < Base + class IncrementalIndexer < Base def perform perform_indexes perform_deletes end - def hourlies_directory - @hourlies_directory ||= Pathname + def incremental_directory + @incremental_directory ||= Pathname .new(ConfigSettings.symphony_data_path) - .join(ConfigSettings.symphony_hourlies_subdir) + .join(ConfigSettings.symphony_incremental_subdir) end def perform_deletes current_collection = PsulibTraject::SolrManager.new.current_collection - target_deletes = Dir.glob("#{hourlies_directory}/**/*_deletes_*.txt").sort + target_deletes = Dir.glob("#{incremental_directory}/**/*_deletes_*.txt").sort processed_deletes = redis.keys("#{current_collection}:*").map { |e| e.gsub("#{current_collection}:", '') } files_to_process = target_deletes - processed_deletes @@ -30,7 +30,7 @@ def perform_deletes .split("\n") .map { |id| delete(id) } redis.set("#{current_collection}:#{file_name}", true) - redis.expire("#{current_collection}:#{file_name}", ConfigSettings.hourlies_skip_expire_seconds.to_i) + redis.expire("#{current_collection}:#{file_name}", ConfigSettings.incremental_skip_expire_seconds.to_i) end end @@ -41,7 +41,7 @@ def delete(id) def perform_indexes current_collection = PsulibTraject::SolrManager.new.current_collection - target_files = Dir.glob("#{hourlies_directory}/**/*.m*rc").sort + target_files = Dir.glob("#{incremental_directory}/**/*.m*rc").sort indexed_files = redis.keys("#{current_collection}:*").map { |e| e.gsub("#{current_collection}:", '') } files_to_index = target_files - indexed_files @@ -54,7 +54,7 @@ def perform_indexes files_to_index.each do |file_name| indexer.logger.info "marking #{file_name} as done" redis.set("#{current_collection}:#{file_name}", true) - redis.expire("#{current_collection}:#{file_name}", ConfigSettings.hourlies_skip_expire_seconds.to_i) + redis.expire("#{current_collection}:#{file_name}", ConfigSettings.incremental_skip_expire_seconds.to_i) end end end diff --git a/lib/tasks/traject.rake b/lib/tasks/traject.rake index f8ffda71..3bc1ff49 100644 --- a/lib/tasks/traject.rake +++ b/lib/tasks/traject.rake @@ -6,13 +6,13 @@ namespace :traject do PsulibTraject::Workers::Indexer.perform_now(args.path, args.collection) end - desc 'Run Hourlies' - task :hourlies do - PsulibTraject::Workers::HourlyIndexer.perform_now + desc 'Run incrementals' + task :incremenatals do + PsulibTraject::Workers::IncrementalIndexer.perform_now end - desc 'Clear redis of hourly skip list' - task :clear_hourlies do + desc 'Clear redis of incremental skip list' + task :clear_incrementals do current_collection = PsulibTraject::SolrManager.new.current_collection redis = Redis.new redis.keys("#{current_collection}:*").map { |key| redis.del(key) } diff --git a/spec/lib/psulib_traject/workers/hourly_indexer_spec.rb b/spec/lib/psulib_traject/workers/incremental_indexer_spec.rb similarity index 96% rename from spec/lib/psulib_traject/workers/hourly_indexer_spec.rb rename to spec/lib/psulib_traject/workers/incremental_indexer_spec.rb index 8e86385c..b3ec7441 100644 --- a/spec/lib/psulib_traject/workers/hourly_indexer_spec.rb +++ b/spec/lib/psulib_traject/workers/incremental_indexer_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe PsulibTraject::Workers::HourlyIndexer do +RSpec.describe PsulibTraject::Workers::IncrementalIndexer do let(:indexer) { described_class } before(:all) do From ecd223e6fa179f746c4330ca7b30fbc5b74ba7cd Mon Sep 17 00:00:00 2001 From: Dann Bohn Date: Mon, 8 Jan 2024 12:27:59 -0500 Subject: [PATCH 3/6] fix typo --- lib/tasks/traject.rake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tasks/traject.rake b/lib/tasks/traject.rake index 3bc1ff49..f0f65a0b 100644 --- a/lib/tasks/traject.rake +++ b/lib/tasks/traject.rake @@ -7,7 +7,7 @@ namespace :traject do end desc 'Run incrementals' - task :incremenatals do + task :incrementals do PsulibTraject::Workers::IncrementalIndexer.perform_now end From ca0d8004e31d098d13dfc65d9cdd33f14a19851c Mon Sep 17 00:00:00 2001 From: Dann Bohn Date: Tue, 9 Jan 2024 10:07:04 -0500 Subject: [PATCH 4/6] use incrementals folder --- config/settings.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/settings.yml b/config/settings.yml index ac7cf094..004ae300 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -16,7 +16,7 @@ reader_class_name: PsulibTraject::MarcCombiningReader commit_timeout: 10000 symphony_data_path: "<%= ENV.fetch('SYMPHONY_DATA_PATH', '/data/symphony_data') %>" # Folder within `symphony_data_path` where incrementals are stored -symphony_incremental_subdir: hourlies +symphony_incremental_subdir: incrementals # How long to keep the incrementals skip keys # we keep incrementals for 7 days, so we keep the lock for 10 to overlap incremental_skip_expire_seconds: 864000 From 314974c41d19b56f87be407db6ab84cdb341641b Mon Sep 17 00:00:00 2001 From: Dann Bohn Date: Tue, 9 Jan 2024 10:08:23 -0500 Subject: [PATCH 5/6] allow old rake task to work for now --- Dockerfile | 2 +- lib/tasks/traject.rake | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 953f75c1..39447a79 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ FROM jruby:9.4.2.0 WORKDIR /app -ARG UID=3000 +ARG UID=1000 ENV BUNDLE_PATH=/app/vendor/bundle diff --git a/lib/tasks/traject.rake b/lib/tasks/traject.rake index f0f65a0b..211ad1e9 100644 --- a/lib/tasks/traject.rake +++ b/lib/tasks/traject.rake @@ -11,6 +11,11 @@ namespace :traject do PsulibTraject::Workers::IncrementalIndexer.perform_now end + desc 'Run incrementals as hourlies' + task :hourlies do + PsulibTraject::Workers::IncrementalIndexer.perform_now + end + desc 'Clear redis of incremental skip list' task :clear_incrementals do current_collection = PsulibTraject::SolrManager.new.current_collection From 00216fe1ec5ec7321a2d5a66ad373b23f89e4427 Mon Sep 17 00:00:00 2001 From: Dann Bohn Date: Tue, 9 Jan 2024 10:18:41 -0500 Subject: [PATCH 6/6] move hourlies subdir --- spec/fixtures/{hourlies => incrementals}/date_008.marc | 0 spec/fixtures/{hourlies => incrementals}/date_009.mrc | 0 .../{hourlies => incrementals}/hourly_deletes_202106231800.txt | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename spec/fixtures/{hourlies => incrementals}/date_008.marc (100%) rename spec/fixtures/{hourlies => incrementals}/date_009.mrc (100%) rename spec/fixtures/{hourlies => incrementals}/hourly_deletes_202106231800.txt (100%) diff --git a/spec/fixtures/hourlies/date_008.marc b/spec/fixtures/incrementals/date_008.marc similarity index 100% rename from spec/fixtures/hourlies/date_008.marc rename to spec/fixtures/incrementals/date_008.marc diff --git a/spec/fixtures/hourlies/date_009.mrc b/spec/fixtures/incrementals/date_009.mrc similarity index 100% rename from spec/fixtures/hourlies/date_009.mrc rename to spec/fixtures/incrementals/date_009.mrc diff --git a/spec/fixtures/hourlies/hourly_deletes_202106231800.txt b/spec/fixtures/incrementals/hourly_deletes_202106231800.txt similarity index 100% rename from spec/fixtures/hourlies/hourly_deletes_202106231800.txt rename to spec/fixtures/incrementals/hourly_deletes_202106231800.txt