Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add skip_client_lookup option when indexing all datacite dois #1181

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 33 additions & 22 deletions app/models/datacite_doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,14 @@ class DataciteDoi < Doi

def self.index_all_by_client(options = {})
client_to_doi_count = DataciteDoi.where(type: "DataciteDoi").group(:datacentre).count
# throw out id 0
client_to_doi_count.delete(0)


index = options[:index] || self.inactive_index
batch_size = options[:batch_size] || 2000
client_to_doi_count.keys.each do |client_id|
DoiImportByClientJob.perform_later(
client_id,
index: index,
batch_size: batch_size
batch_size: batch_size,
skip_client_lookup: true
)
end
end
Expand Down Expand Up @@ -64,7 +61,30 @@ def self.import_by_ids(options = {})
count
end

def self.lookup_client_and_get_dois(client_id, options = {})
# Search by propper ID
client = ::Client.find_by(id: client_id, deleted_at: nil)
if client.nil?
# Search by symbol
client = ::Client.find_by(symbol: client_id, deleted_at: nil)
if client.nil?
Rails.logger.error "Repository not found for client ID #{client_id}."
exit
end
end

# import DOIs for client
Rails.logger.info "Grabbing DOIs for repository #{client.symbol}"

client.dois
end

def self.import_by_client(client_id, options = {})
# Abort if client_id is blank
if client_id.blank?
Rails.logger.error "Missing client ID."
exit
end
# Get optional parameters
import_index =
if Rails.env.test?
Expand All @@ -74,28 +94,19 @@ def self.import_by_client(client_id, options = {})
else
active_index
end

batch_size = options[:batch_size] || 50

# Abort if client_id is blank
if client_id.blank?
Rails.logger.error "Missing client ID."
exit
end
# Search by propper ID
client = ::Client.find_by(id: client_id, deleted_at: nil)
if client.nil?
# Search by symbol
client = ::Client.find_by(symbol: client_id, deleted_at: nil)
if client.nil?
Rails.logger.error "Repository not found for client ID #{client_id}."
exit
end
# If skip_client is included in the options do not do a client lookup and just go to the Dois
if options[:skip_client_lookup]
client_dois = DataciteDoi.where(type: "DataciteDoi").where(datacentre: client_id)
else
client_dois = lookup_client_and_get_dois(client_id, options)
end

# import DOIs for client
Rails.logger.info "Started import of #{client.dois.count} DOIs for repository #{client.symbol} into the index '#{import_index}'"
Rails.logger.info "Started import of #{client_dois.count} DOIs for client_id #{client_id} into the index '#{import_index}'"

client.dois.find_in_batches(batch_size: batch_size) do |dois|
client_dois.select(:id).find_in_batches(batch_size: batch_size) do |dois|
ids = dois.pluck(:id)
DataciteDoiImportInBulkJob.perform_later(ids, index: import_index)
end
Expand Down