Skip to content

Commit

Permalink
rework gbif cleanup to bulk delete
Browse files Browse the repository at this point in the history
  • Loading branch information
wendelfabianchinsamy committed Jul 25, 2024
1 parent 532c4e0 commit 56fb4da
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 9 deletions.
7 changes: 1 addition & 6 deletions app/jobs/delete_gbif_events_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,7 @@ class DeleteGbifEventsJob < ApplicationJob

def perform(ids, options = {})
label = options[:label]
index = ENV["INDEX"]

if index.blank?
Rails.logger.error("#{label}: ENV['INDEX'] must be provided")
return
end
index = options[:index]

# delete event records from mysql
sql = ActiveRecord::Base.sanitize_sql_array(["DELETE FROM events WHERE id IN (?)", ids])
Expand Down
6 changes: 5 additions & 1 deletion app/models/event.rb
Original file line number Diff line number Diff line change
Expand Up @@ -887,11 +887,13 @@ def self.loop_through_gbif_events(options)
label = options[:label] || ""
job_name = options[:job_name] || ""
query = options[:query].presence
delete_count = 0
max_delete_count = options[:max_delete_count]

response = Event.query(query, filter.merge(page: { size: 1, cursor: [] }))

if response.size.positive?
while response.size.positive?
while response.size.positive? && delete_count < max_delete_count
response = Event.query(query, filter.merge(page: { size: size, cursor: cursor }))

break unless response.size.positive?
Expand All @@ -905,6 +907,8 @@ def self.loop_through_gbif_events(options)
ids = response.results.map(&:_id).uniq

Object.const_get(job_name).perform_later(ids, options)

delete_count += response.size
end
end

Expand Down
20 changes: 18 additions & 2 deletions lib/tasks/event.rake
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,30 @@ end
namespace :gbif_events do
desc "delete gbif events"
task delete_gbif_events: :environment do
index = ENV["INDEX"]

if index.blank?
Rails.logger.error("You must provide an INDEX environment variable")
exit
end

max_delete_count = ENV["MAX_DELETE_COUNT"]

if max_delete_count.blank?
Rails.logger.error("You must provide an MAX_DELETE_COUNT environment variable")
exit
end

options = {
size: 1000,
size: 2,
from_id: (ENV["FROM_ID"] || Event.minimum(:id)).to_i,
until_id: (ENV["UNTIL_ID"] || Event.maximum(:id)).to_i,
filter: {},
query: "+subj.registrantId:datacite.gbif.gbif +relation_type_id:references -source_doi:(\"10.15468/QJGWBA\" OR \"10.35035/GDWQ-3V93\" OR \"10.15469/3XSWXB\" OR \"10.15469/UBP6QO\" OR \"10.35000/TEDB-QD70\" OR \"10.15469/2YMQOZ\")",
job_name: "DeleteGbifEventsJob",
label: "gbif_event_cleanup_#{Time.now.utc.strftime("%d%m%Y%H%M%S")}",
job_name: "DeleteGbifEventsJob"
max_delete_count: max_delete_count.to_i,
index: index
}

Event.loop_through_gbif_events(options)
Expand Down

0 comments on commit 56fb4da

Please sign in to comment.