diff --git a/app/jobs/delete_gbif_events_job.rb b/app/jobs/delete_gbif_events_job.rb index 1c7777a84..d40fa913b 100644 --- a/app/jobs/delete_gbif_events_job.rb +++ b/app/jobs/delete_gbif_events_job.rb @@ -5,12 +5,7 @@ class DeleteGbifEventsJob < ApplicationJob def perform(ids, options = {}) label = options[:label] - index = ENV["INDEX"] - - if index.blank? - Rails.logger.error("#{label}: ENV['INDEX'] must be provided") - return - end + index = options[:index] # delete event records from mysql sql = ActiveRecord::Base.sanitize_sql_array(["DELETE FROM events WHERE id IN (?)", ids]) diff --git a/app/models/event.rb b/app/models/event.rb index a8cae10b6..de47a0ef3 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -887,11 +887,13 @@ def self.loop_through_gbif_events(options) label = options[:label] || "" job_name = options[:job_name] || "" query = options[:query].presence + delete_count = 0 + max_delete_count = options[:max_delete_count] response = Event.query(query, filter.merge(page: { size: 1, cursor: [] })) if response.size.positive? - while response.size.positive? + while response.size.positive? && delete_count < max_delete_count response = Event.query(query, filter.merge(page: { size: size, cursor: cursor })) break unless response.size.positive? @@ -905,6 +907,8 @@ def self.loop_through_gbif_events(options) ids = response.results.map(&:_id).uniq Object.const_get(job_name).perform_later(ids, options) + + delete_count += response.size end end diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index dd459ca21..65430422b 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -87,14 +87,30 @@ end namespace :gbif_events do desc "delete gbif events" task delete_gbif_events: :environment do + index = ENV["INDEX"] + + if index.blank? + Rails.logger.error("You must provide an INDEX environment variable") + exit + end + + max_delete_count = ENV["MAX_DELETE_COUNT"] + + if max_delete_count.blank? + Rails.logger.error("You must provide an MAX_DELETE_COUNT environment variable") + exit + end + options = { - size: 1000, + size: 2, from_id: (ENV["FROM_ID"] || Event.minimum(:id)).to_i, until_id: (ENV["UNTIL_ID"] || Event.maximum(:id)).to_i, filter: {}, query: "+subj.registrantId:datacite.gbif.gbif +relation_type_id:references -source_doi:(\"10.15468/QJGWBA\" OR \"10.35035/GDWQ-3V93\" OR \"10.15469/3XSWXB\" OR \"10.15469/UBP6QO\" OR \"10.35000/TEDB-QD70\" OR \"10.15469/2YMQOZ\")", + job_name: "DeleteGbifEventsJob", label: "gbif_event_cleanup_#{Time.now.utc.strftime("%d%m%Y%H%M%S")}", - job_name: "DeleteGbifEventsJob" + max_delete_count: max_delete_count.to_i, + index: index } Event.loop_through_gbif_events(options)