Skip to content

Commit

Permalink
Adds Fedora record removal to DeletePhysicalInstantiations fix
Browse files Browse the repository at this point in the history
Fedora records were not being deleted and were then being retreived by PhysicalInstantiationResource
models, effectively getting stuck on the parent Asset.

This allows the Fix::DeletePhysicalInstantiaton class to remove PhysicalInstantiations of a give
media type from a set of AssetResources identified by an ID list.

Usage

Includes class SampleData class for finding test data sets within an existing repository (e.g. demo).
  • Loading branch information
afred committed Nov 7, 2024
1 parent 9fbe8e8 commit 5f46e37
Show file tree
Hide file tree
Showing 6 changed files with 217 additions and 48 deletions.
24 changes: 17 additions & 7 deletions app/models/concerns/ams/work_behavior.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,24 @@ def to_rdf_representation
end

def members
return @members if @members.present?
@members = member_ids.map do |id|
begin
Hyrax.query_service.find_by(id: id)
rescue Valkyrie::Persistence::ObjectNotFoundError
Rails.logger.warn("Could not find member #{id} for #{self.id}")
end
@members ||= []
member_id_vals = member_ids.map { |id| id.to_s }.to_set
ids_from_members = @members.map { |m| m.id.to_s }.to_set
# If the member_ids do not match the IDs within the members, then re-set the members to be an accurate reflection of the member IDs.
# This allows us to modify resource.member_ids and have #members accurately reflect the changes.
if member_id_vals != ids_from_members
@members = member_ids.map do |id|
begin
Hyrax.query_service.find_by(id: id)
rescue Valkyrie::Persistence::ObjectNotFoundError
Rails.logger.warn("Could not find member #{id} for #{self.id}")
# Return nil (to be removed with #compact below)
nil
end
# Remove nils, all members must be able to respond to #members for recurisve member lookup.
end.compact
end
@members
end

def to_solr
Expand Down
11 changes: 7 additions & 4 deletions lib/fix/batch_process.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ module Fix
class BatchProcess
attr_reader :ids, :log, :cli_ptions, :log_level

def initialize(ids_file:, log_level: Logger::INFO)
@ids = File.readlines(ids_file, chomp: true)
def initialize(ids_file: nil, ids: [], log_level: Logger::INFO)
# Try reading ids from :ids_file first if it's given
@ids = File.readlines(ids_file, chomp: true) if ids_file
# Set ids to given param if not from a file
@ids ||= ids
@cli_options = {}
@log = Logger.new(STDOUT)
@log.level = log_level
Expand Down Expand Up @@ -38,8 +41,8 @@ def self.cli_options
@cli_options ||= {}
end

# self.option_parser Creates a default OptionParser for cli options and allows subclasses
# to add their own options.
# option_parser (class method) -- A default OptionParser for cli options that allows subclasses
# to add their own CLI options as needed.
# @param block [Proc] A block that takes an OptionParser instance as an argument.
# @return [OptionParser] The OptionParser instance.
# Usage:
Expand Down
70 changes: 59 additions & 11 deletions lib/fix/delete_asset_resources.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,68 @@

module Fix
class DeleteAssetResources < BatchProcess

def run
asset_resources.each do |ar|
log.info "Destroying Asset Resource #{ar.id}"
begin
Hyrax.persister.delete(resource: ar)
Hyrax.index_adapter.delete(resource: ar)
Hyrax.index_adapter.connection.commit
log.info "Asset Resource #{ar.id} destroyed."
rescue => e
log_error e
end
log.info "Deleting #{asset_resources.count} Asset Resources..."
asset_resources.map do |ar|
destroy_work_and_members(resource: ar)
end
log.info "Done."
log.info report
end

private


def results; @results ||= []; end

def work_destroy_transaction
Hyrax::Transactions::WorkDestroy.new
end

def destroy_work_and_members(resource:)
result = { resource: resource }
resource.members.each do |member|
resource.members.delete(member)
destroy_work_and_members(resource: member)
end
log.info "DELETING #{resource.class} #{resource.id.id}..."
work_destroy_transaction.call(resource)
log.info "DELETED #{resource.class} #{resource.id.id}."
delete_from_fedora(resource)
result[:error] = false
results << result
rescue => e
result[:error] = e
results << result
log_error e
end

def delete_from_fedora(resource)
log.info "SEARCHING FEDORA: #{resource.class} #{resource.id}..."
af_object = ActiveFedora::Base.find(resource.id.to_s)
af_object.destroy!
log.info "DELETED: #{af_object.class} #{af_object.id} from Fedora."
rescue ActiveFedora::ObjectNotFoundError
# Not a real error, just means it's not in Fedora. Carry on.
log.info "NOT FOUND."
end

def report
r = "\nRESULTS:\n"
r += "Successfully Deleted #{successes.count}:\n"
successes.each do |result|
r += "#{result[:resource].id}, #{result[:resource].class}\n"
end
puts "Done."
r += "Failed while Deleting #{failures.count}:\n"
failures.each do |result|
r += "#{result[:resource].id}, #{result[:error]}\n"
end
r
end

def successes; results.reject { |r| r[:error] }; end
def failures; results.select { |r| r[:error] }; end
end
end

Expand Down
64 changes: 46 additions & 18 deletions lib/fix/delete_physical_instantiations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,54 @@ def initialize(media_type:, **args)

def run
asset_resources.each do |ar|
pis = ar.physical_instantiation_resources.select { |pi| pi.media_type == media_type }
if pis.count == 0
log.warn "No physical instantiations with media type '#{media_type}' were found for Asset #{ar.id}, skipping."
next
end

pis.each do |pi|
begin
log.info "Deleting Physical Instantiation #{pi.id} with media type '#{media_type}' from Asset #{ar.id}..."
Hyrax.persister.delete(resource: pi)
Hyrax.index_adapter.delete(resource: pi)
log.info "Deleted physical instantiation #{pi.id} with media type '#{media_type}' from Asset #{ar.id}."
Hyrax.index_adapter.save(resource: ar)
log.info "Asset Resource #{ar.id} saved."
rescue => e
log_error(e)
end
end
process_asset_resource(ar)
end
end

private

def process_asset_resource(ar)
log.info "Processing Asset Resource #{ar.id}..."
pis = ar.physical_instantiation_resources.select { |pi| pi.media_type == media_type }
if pis.count == 0
log.warn "SKIPPING: No physical instantiations with media type '#{media_type}'."
return
end

pis.each do |pi|
delete_physical_instantiation_resource(pi)
ar.member_ids -= [pi.id]
end
Hyrax.persister.save(resource: ar)
Hyrax.index_adapter.save(resource: ar)
log.info "SAVED: Asset Resource #{ar.id}."
rescue => e
log_error(e)
end

def delete_physical_instantiation_resource(pi)
work_destroy_transaction.call(pi)
delete_from_fedora(pi)
Hyrax.index_adapter.delete(resource: pi)
log.info "DELETED: Physical Instantiation Resource #{pi.id} with media type '#{media_type}'"
rescue => e
log_error(e)
end

def delete_from_fedora(pi)
log.info "SEARCHING: PhysicalInstantiation #{pi.id} in Fedora..."
pi = PhysicalInstantiation.find(pi.id)
pi.destroy!
log.info "DELETED: PhysicalInstantiation #{pi.id} from Fedora."
rescue ActiveFedora::ObjectNotFoundError
log.info "NOT FOUND."
rescue => e
log_error(e)
end

def work_destroy_transaction
Hyrax::Transactions::WorkDestroy.new
end
end
end

Expand Down
87 changes: 87 additions & 0 deletions lib/fix/sample_data.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# first shell into a running container with bash, then run pry.
# load the app
require_relative '../../config/environment'

# Set some variables
class SampleData
attr_reader :asset_sample_size, :log

def initialize(asset_sample_size: 100)
@asset_sample_size = asset_sample_size
@log = Logger.new(STDOUT)
end

def solr
@solr ||= Blacklight.default_index.connection
end

def a_docs
@a_docs ||= solr.get('select', params: {q: 'has_model_ssim:Asset', rows: asset_sample_size, start: rand_start})['response']['docs']
end

def rand_start
@rand_start ||= rand(0..(total_assets - asset_sample_size - 1))
end

def total_assets
solr.get('select', params: {q: 'has_model_ssim:Asset', rows: 0})['response']['numFound']
end

def ars(refresh: false)
# Rest instance variables if refresh is true
@ars = @ars_with_pids = @ars_with_pis_in_fedora = nil if refresh
@ars ||= a_docs.map{|doc| AssetResource.find(doc['id'])}
end

def ars_with_pis(media_type: nil)
@ars_with_pis ||= ars.select{ |ar| ar.physical_instantiation_resources.present? }
if media_type
filter(ars: @ars_with_pis, media_type: media_type)
else
@ars_with_pis
end
end

def ars_with_pis_in_fedora(media_type: nil)
@ars_with_pis_in_fedora ||= ars_with_pis.select do |ar|
ar.physical_instantiation_resources.any? do |pi|
begin
pi = PhysicalInstantiation.find(pi.id)
rescue ActiveFedora::ObjectNotFoundError, Ldp::Gone => e
log.error("#{e.class}: #{e.message}")
false
end
end
end

if media_type
filter(ars: @ars_with_pis_in_fedora, media_type: media_type)
else
@ars_with_pis_in_fedora
end
end

def save_assets_to_pg
ars.each do |ar|
Hyrax.persister.save(resource: ar)
end
ars(refresh: true)
end

# Class method for filtering a set of AssetResources
def self.filter(ars: [], media_type: nil)
filtered = ars
if media_type
filtered = filtered.select do |ar|
ar.physical_instantiation_resources.any? do |pi|
pi.media_type.to_s.strip.downcase == media_type.to_s.strip.downcase
end
end
end
filtered
end

# Delegated instance method
def filter(*args, **kwargs); self.class.filter(*args, **kwargs); end

end
9 changes: 1 addition & 8 deletions spec/fix/delete_asset_resources_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,6 @@
end
end

let(:ids_file) do
f = Tempfile.new
f.write(ids.join("\n"))
f.flush
f.path
end

# Non-memoized helper for fetching Asset by ID.
def asset_resource_results
ids.map do |id|
Expand All @@ -50,7 +43,7 @@ def asset_resource_results

it 'deletes the AssetResources' do
expect(asset_resource_results.count).to be > 0
Fix::DeleteAssetResources.new(ids_file: ids_file).run
Fix::DeleteAssetResources.new(ids: ids).run
expect(asset_resource_results.count).to eq 0
end
end

0 comments on commit 5f46e37

Please sign in to comment.