From cdaa1b615b4bc30108b2ffc91bd92996393c9862 Mon Sep 17 00:00:00 2001 From: Jeremy Friesen Date: Wed, 14 Feb 2024 10:04:11 -0500 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Favor=20configuration=20ov?= =?UTF-8?q?er=20hard-coding?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Given that Hyrax provides a mechanism for specifying a queue name, we should echo that configuration, but also provide our own configuration as well as the same fallback. --- app/jobs/bulkrax/create_relationships_job.rb | 2 +- app/jobs/bulkrax/delete_job.rb | 2 +- app/jobs/bulkrax/download_cloud_file_job.rb | 2 +- app/jobs/bulkrax/import_collection_job.rb | 2 +- app/jobs/bulkrax/import_file_set_job.rb | 2 +- app/jobs/bulkrax/import_work_job.rb | 2 +- app/jobs/bulkrax/importer_job.rb | 2 +- lib/bulkrax.rb | 405 ++++++++++--------- 8 files changed, 214 insertions(+), 205 deletions(-) diff --git a/app/jobs/bulkrax/create_relationships_job.rb b/app/jobs/bulkrax/create_relationships_job.rb index 6e9eb77b8..ff75c8717 100644 --- a/app/jobs/bulkrax/create_relationships_job.rb +++ b/app/jobs/bulkrax/create_relationships_job.rb @@ -40,7 +40,7 @@ class CreateRelationshipsJob < ApplicationJob include DynamicRecordLookup - queue_as :import + queue_as Bulkrax.config.ingest_queue_name # @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers # @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters) diff --git a/app/jobs/bulkrax/delete_job.rb b/app/jobs/bulkrax/delete_job.rb index 764cd8a72..5072045fa 100644 --- a/app/jobs/bulkrax/delete_job.rb +++ b/app/jobs/bulkrax/delete_job.rb @@ -2,7 +2,7 @@ module Bulkrax class DeleteJob < ApplicationJob - queue_as :import + queue_as Bulkrax.config.ingest_queue_name # rubocop:disable Rails/SkipsModelValidations def perform(entry, importer_run) diff --git a/app/jobs/bulkrax/download_cloud_file_job.rb b/app/jobs/bulkrax/download_cloud_file_job.rb index f56e81285..313c2f010 100644 --- a/app/jobs/bulkrax/download_cloud_file_job.rb +++ b/app/jobs/bulkrax/download_cloud_file_job.rb @@ -2,7 +2,7 @@ module Bulkrax class DownloadCloudFileJob < ApplicationJob - queue_as :import + queue_as Bulkrax.config.ingest_queue_name # Retrieve cloud file and write to the imports directory # Note: if using the file system, the mounted directory in diff --git a/app/jobs/bulkrax/import_collection_job.rb b/app/jobs/bulkrax/import_collection_job.rb index 163ecd6f9..6ce46d6d3 100644 --- a/app/jobs/bulkrax/import_collection_job.rb +++ b/app/jobs/bulkrax/import_collection_job.rb @@ -2,7 +2,7 @@ module Bulkrax class ImportCollectionJob < ApplicationJob - queue_as :import + queue_as Bulkrax.config.ingest_queue_name # rubocop:disable Rails/SkipsModelValidations def perform(*args) diff --git a/app/jobs/bulkrax/import_file_set_job.rb b/app/jobs/bulkrax/import_file_set_job.rb index c74ab45c0..fd74fe6a8 100644 --- a/app/jobs/bulkrax/import_file_set_job.rb +++ b/app/jobs/bulkrax/import_file_set_job.rb @@ -6,7 +6,7 @@ class MissingParentError < ::StandardError; end class ImportFileSetJob < ApplicationJob include DynamicRecordLookup - queue_as :import + queue_as Bulkrax.config.ingest_queue_name attr_reader :importer_run_id diff --git a/app/jobs/bulkrax/import_work_job.rb b/app/jobs/bulkrax/import_work_job.rb index 95258049f..d7620d4ac 100644 --- a/app/jobs/bulkrax/import_work_job.rb +++ b/app/jobs/bulkrax/import_work_job.rb @@ -2,7 +2,7 @@ module Bulkrax class ImportWorkJob < ApplicationJob - queue_as :import + queue_as Bulkrax.config.ingest_queue_name # rubocop:disable Rails/SkipsModelValidations # diff --git a/app/jobs/bulkrax/importer_job.rb b/app/jobs/bulkrax/importer_job.rb index 42691b4b1..9fb0f4456 100644 --- a/app/jobs/bulkrax/importer_job.rb +++ b/app/jobs/bulkrax/importer_job.rb @@ -2,7 +2,7 @@ module Bulkrax class ImporterJob < ApplicationJob - queue_as :import + queue_as Bulkrax.config.ingest_queue_name def perform(importer_id, only_updates_since_last_import = false) importer = Importer.find(importer_id) diff --git a/lib/bulkrax.rb b/lib/bulkrax.rb index 486a92292..9433de196 100644 --- a/lib/bulkrax.rb +++ b/lib/bulkrax.rb @@ -1,16 +1,16 @@ -# frozen_string_literal: true + # frozen_string_literal: true -require "bulkrax/version" -require "bulkrax/engine" -require 'active_support/all' + require "bulkrax/version" + require "bulkrax/engine" + require 'active_support/all' -# rubocop:disable Metrics/ModuleLength + # rubocop:disable Metrics/ModuleLength module Bulkrax - extend self # rubocop:disable Style/ModuleFunction - extend Forwardable + extend self # rubocop:disable Style/ModuleFunction + extend Forwardable - ## - # @api public + ## + # @api public class Configuration attr_accessor :api_definition, :curation_concerns, @@ -35,6 +35,15 @@ class Configuration :reserved_properties, :server_name + attr_writer :ingest_queue_name + ## + # @return [String, Proc] + def ingest_queue_name + return @ingest_queue_name if @ingest_queue_name.present? + return Hyrax.config.ingest_queue_name if defined?(Hyrax) + :import + end + attr_writer :use_locking def use_locking @@ -45,61 +54,61 @@ def use_locking alias use_locking? use_locking end - def config - @config ||= Configuration.new - yield @config if block_given? - @config - end - alias setup config + def config + @config ||= Configuration.new + yield @config if block_given? + @config + end + alias setup config - def_delegators :@config, - :api_definition, - :api_definition=, - :curation_concerns, - :curation_concerns=, - :default_field_mapping, - :default_field_mapping=, - :default_work_type, - :default_work_type=, - :export_path, - :export_path=, - :field_mappings, - :field_mappings=, - :file_model_class, - :file_model_class=, - :fill_in_blank_source_identifiers, - :fill_in_blank_source_identifiers=, - :generated_metadata_mapping, - :generated_metadata_mapping=, - :import_path, - :import_path=, - :multi_value_element_join_on, - :multi_value_element_join_on=, - :multi_value_element_split_on, - :multi_value_element_split_on=, - :object_factory, - :object_factory=, - :parsers, - :parsers=, - :qa_controlled_properties, - :qa_controlled_properties=, - :related_children_field_mapping, - :related_children_field_mapping=, - :related_parents_field_mapping, - :related_parents_field_mapping=, - :relationship_job_class, - :relationship_job_class=, - :removed_image_path, - :removed_image_path=, - :required_elements, - :required_elements=, - :reserved_properties, - :reserved_properties=, - :server_name, - :server_name=, - :use_locking, - :use_locking=, - :use_locking? + def_delegators :@config, + :api_definition, + :api_definition=, + :curation_concerns, + :curation_concerns=, + :default_field_mapping, + :default_field_mapping=, + :default_work_type, + :default_work_type=, + :export_path, + :export_path=, + :field_mappings, + :field_mappings=, + :file_model_class, + :file_model_class=, + :fill_in_blank_source_identifiers, + :fill_in_blank_source_identifiers=, + :generated_metadata_mapping, + :generated_metadata_mapping=, + :import_path, + :import_path=, + :multi_value_element_join_on, + :multi_value_element_join_on=, + :multi_value_element_split_on, + :multi_value_element_split_on=, + :object_factory, + :object_factory=, + :parsers, + :parsers=, + :qa_controlled_properties, + :qa_controlled_properties=, + :related_children_field_mapping, + :related_children_field_mapping=, + :related_parents_field_mapping, + :related_parents_field_mapping=, + :relationship_job_class, + :relationship_job_class=, + :removed_image_path, + :removed_image_path=, + :required_elements, + :required_elements=, + :reserved_properties, + :reserved_properties=, + :server_name, + :server_name=, + :use_locking, + :use_locking=, + :use_locking? config do |conf| conf.parsers = [ @@ -138,149 +147,149 @@ def conf.file_model_class=(val) # Based on Hyrax CoreMetadata && BasicMetadata # Override at application level to change conf.field_mappings = { - "Bulkrax::OaiDcParser" => { - "contributor" => { from: ["contributor"] }, - # no appropriate mapping for coverage (based_near needs id) - # ""=>{:from=>["coverage"]}, - "creator" => { from: ["creator"] }, - "date_created" => { from: ["date"] }, - "description" => { from: ["description"] }, - # no appropriate mapping for format - # ""=>{:from=>["format"]}, - "identifier" => { from: ["identifier"] }, - "language" => { from: ["language"], parsed: true }, - "publisher" => { from: ["publisher"] }, - "related_url" => { from: ["relation"] }, - "rights_statement" => { from: ["rights"] }, - "source" => { from: ["source"] }, - "subject" => { from: ["subject"], parsed: true }, - "title" => { from: ["title"] }, - "resource_type" => { from: ["type"], parsed: true }, - "remote_files" => { from: ["thumbnail_url"], parsed: true } + "Bulkrax::OaiDcParser" => { + "contributor" => { from: ["contributor"] }, + # no appropriate mapping for coverage (based_near needs id) + # ""=>{:from=>["coverage"]}, + "creator" => { from: ["creator"] }, + "date_created" => { from: ["date"] }, + "description" => { from: ["description"] }, + # no appropriate mapping for format + # ""=>{:from=>["format"]}, + "identifier" => { from: ["identifier"] }, + "language" => { from: ["language"], parsed: true }, + "publisher" => { from: ["publisher"] }, + "related_url" => { from: ["relation"] }, + "rights_statement" => { from: ["rights"] }, + "source" => { from: ["source"] }, + "subject" => { from: ["subject"], parsed: true }, + "title" => { from: ["title"] }, + "resource_type" => { from: ["type"], parsed: true }, + "remote_files" => { from: ["thumbnail_url"], parsed: true } }, - "Bulkrax::OaiQualifiedDcParser" => { - "abstract" => { from: ["abstract"] }, - "alternative_title" => { from: ["alternative"] }, + "Bulkrax::OaiQualifiedDcParser" => { + "abstract" => { from: ["abstract"] }, + "alternative_title" => { from: ["alternative"] }, "bibliographic_citation" => { from: ["bibliographicCitation"] }, - "contributor" => { from: ["contributor"] }, - "creator" => { from: ["creator"] }, - "date_created" => { from: ["created"] }, - "description" => { from: ["description"] }, - "language" => { from: ["language"] }, - "license" => { from: ["license"] }, - "publisher" => { from: ["publisher"] }, - "related_url" => { from: ["relation"] }, - "rights_holder" => { from: ["rightsHolder"] }, - "rights_statement" => { from: ["rights"] }, - "source" => { from: ["source"] }, - "subject" => { from: ["subject"], parsed: true }, - "title" => { from: ["title"] }, - "resource_type" => { from: ["type"], parsed: true }, - "remote_files" => { from: ["thumbnail_url"], parsed: true } - }, - # When empty, a default_field_mapping will be generated - "Bulkrax::CsvParser" => {}, - 'Bulkrax::BagitParser' => {}, - 'Bulkrax::XmlParser' => {} - } + "contributor" => { from: ["contributor"] }, + "creator" => { from: ["creator"] }, + "date_created" => { from: ["created"] }, + "description" => { from: ["description"] }, + "language" => { from: ["language"] }, + "license" => { from: ["license"] }, + "publisher" => { from: ["publisher"] }, + "related_url" => { from: ["relation"] }, + "rights_holder" => { from: ["rightsHolder"] }, + "rights_statement" => { from: ["rights"] }, + "source" => { from: ["source"] }, + "subject" => { from: ["subject"], parsed: true }, + "title" => { from: ["title"] }, + "resource_type" => { from: ["type"], parsed: true }, + "remote_files" => { from: ["thumbnail_url"], parsed: true } + }, + # When empty, a default_field_mapping will be generated + "Bulkrax::CsvParser" => {}, + 'Bulkrax::BagitParser' => {}, + 'Bulkrax::XmlParser' => {} + } - # Lambda to set the default field mapping - conf.default_field_mapping = lambda do |field| - return if field.blank? - { - field.to_s => - { - from: [field.to_s], - split: false, - parsed: Bulkrax::ApplicationMatcher.method_defined?("parse_#{field}"), - if: nil, - excluded: false - } - } - end + # Lambda to set the default field mapping + conf.default_field_mapping = lambda do |field| + return if field.blank? + { + field.to_s => + { + from: [field.to_s], + split: false, + parsed: Bulkrax::ApplicationMatcher.method_defined?("parse_#{field}"), + if: nil, + excluded: false + } + } + end - # Properties that should not be used in imports. They are reserved for use by Hyrax. - conf.reserved_properties = %w[ - create_date - modified_date - date_modified - date_uploaded - depositor - arkivo_checksum - has_model - head - label - import_url - on_behalf_of - proxy_depositor - owner - state - tail - original_url - relative_path - ] + # Properties that should not be used in imports. They are reserved for use by Hyrax. + conf.reserved_properties = %w[ + create_date + modified_date + date_modified + date_uploaded + depositor + arkivo_checksum + has_model + head + label + import_url + on_behalf_of + proxy_depositor + owner + state + tail + original_url + relative_path + ] - # List of Questioning Authority properties that are controlled via YAML files in - # the config/authorities/ directory. For example, the :rights_statement property - # is controlled by the active terms in config/authorities/rights_statements.yml - conf.qa_controlled_properties = %w[rights_statement license] - end + # List of Questioning Authority properties that are controlled via YAML files in + # the config/authorities/ directory. For example, the :rights_statement property + # is controlled by the active terms in config/authorities/rights_statements.yml + conf.qa_controlled_properties = %w[rights_statement license] + end - def api_definition - @api_definition ||= ActiveSupport::HashWithIndifferentAccess.new( - YAML.safe_load( - ERB.new( - File.read(Rails.root.join('config', 'bulkrax_api.yml')) - ).result - ) - ) - end + def api_definition + @api_definition ||= ActiveSupport::HashWithIndifferentAccess.new( + YAML.safe_load( + ERB.new( + File.read(Rails.root.join('config', 'bulkrax_api.yml')) + ).result + ) + ) + end - DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON = ' | ' - # Specify the delimiter for joining an attribute's multi-value array into a string. - # - # @note the specific delimiter should likely be present in the multi_value_element_split_on - # expression. - def multi_value_element_join_on - @multi_value_element_join_on ||= DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON - end + DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON = ' | ' + # Specify the delimiter for joining an attribute's multi-value array into a string. + # + # @note the specific delimiter should likely be present in the multi_value_element_split_on + # expression. + def multi_value_element_join_on + @multi_value_element_join_on ||= DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON + end - DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON = /\s*[:;|]\s*/.freeze - # @return [RegexClass] the regular express to use to "split" an attribute's values. If set to - # `true` use the DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON. - # - # @note The "true" value is to preserve backwards compatibility. - # @see DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON + DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON = /\s*[:;|]\s*/.freeze + # @return [RegexClass] the regular express to use to "split" an attribute's values. If set to + # `true` use the DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON. + # + # @note The "true" value is to preserve backwards compatibility. + # @see DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON def multi_value_element_split_on - if @multi_value_element_join_on.is_a?(TrueClass) - DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON + if @multi_value_element_join_on.is_a?(TrueClass) + DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON else - @multi_value_element_split_on ||= DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON + @multi_value_element_split_on ||= DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON end end - # Responsible for stripping hidden characters from the given string. - # - # @param value [#to_s] - # @return [String] with hidden characters removed - # - # @see https://github.com/samvera-labs/bulkrax/issues/688 - def normalize_string(value) - # Removing [Byte Order Mark (BOM)](https://en.wikipedia.org/wiki/Byte_order_mark) - value.to_s.delete("\xEF\xBB\xBF") - end + # Responsible for stripping hidden characters from the given string. + # + # @param value [#to_s] + # @return [String] with hidden characters removed + # + # @see https://github.com/samvera-labs/bulkrax/issues/688 + def normalize_string(value) + # Removing [Byte Order Mark (BOM)](https://en.wikipedia.org/wiki/Byte_order_mark) + value.to_s.delete("\xEF\xBB\xBF") + end - def fallback_user_for_importer_exporter_processing - return User.batch_user if defined?(Hyrax) && User.respond_to?(:batch_user) + def fallback_user_for_importer_exporter_processing + return User.batch_user if defined?(Hyrax) && User.respond_to?(:batch_user) - raise "We have no fallback user available for Bulkrax.fallback_user_for_importer_exporter_processing" - end + raise "We have no fallback user available for Bulkrax.fallback_user_for_importer_exporter_processing" + end - # This class confirms to the Active::Support.serialize interface. It's job is to ensure that we - # don't have keys with the tricksy Byte Order Mark character. - # - # @see https://api.rubyonrails.org/classes/ActiveRecord/AttributeMethods/Serialization/ClassMethods.html#method-i-serialize - class NormalizedJson + # This class confirms to the Active::Support.serialize interface. It's job is to ensure that we + # don't have keys with the tricksy Byte Order Mark character. + # + # @see https://api.rubyonrails.org/classes/ActiveRecord/AttributeMethods/Serialization/ClassMethods.html#method-i-serialize + class NormalizedJson def self.normalize_keys(hash) return hash unless hash.respond_to?(:each_pair) returning_value = {} @@ -292,18 +301,18 @@ def self.normalize_keys(hash) # When we write the serialized data to the database, we "dump" the value into that database # column. - def self.dump(value) - JSON.dump(normalize_keys(value)) - end + def self.dump(value) + JSON.dump(normalize_keys(value)) + end # When we load the serialized data from the database, we pass the database's value into "load" # function. # # rubocop:disable Security/JSONLoad - def self.load(string) - normalize_keys(JSON.load(string)) - end + def self.load(string) + normalize_keys(JSON.load(string)) + end # rubocop:enable Security/JSONLoad - end + end end -# rubocop:disable Metrics/ModuleLength + # rubocop:disable Metrics/ModuleLength