Skip to content

Commit

Permalink
Merge pull request #925 from WGBH-MLA/854-more-more-more-delete-Fedor…
Browse files Browse the repository at this point in the history
…a-hangers-on

Adds Fedora record removal to DeletePhysicalInstantiations fix
  • Loading branch information
foglabs authored Dec 5, 2024
2 parents 81e158a + b235a80 commit fc3dbd0
Show file tree
Hide file tree
Showing 6 changed files with 219 additions and 49 deletions.
24 changes: 17 additions & 7 deletions app/models/concerns/ams/work_behavior.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,24 @@ def to_rdf_representation
end

def members
return @members if @members.present?
@members = member_ids.map do |id|
begin
Hyrax.query_service.find_by(id: id)
rescue Valkyrie::Persistence::ObjectNotFoundError
Rails.logger.warn("Could not find member #{id} for #{self.id}")
end
@members ||= []
member_id_vals = member_ids.map { |id| id.to_s }.to_set
ids_from_members = @members.map { |m| m.id.to_s }.to_set
# If the member_ids do not match the IDs within the members, then re-set the members to be an accurate reflection of the member IDs.
# This allows us to modify resource.member_ids and have #members accurately reflect the changes.
if member_id_vals != ids_from_members
@members = member_ids.map do |id|
begin
Hyrax.query_service.find_by(id: id)
rescue Valkyrie::Persistence::ObjectNotFoundError
Rails.logger.warn("Could not find member #{id} for #{self.id}")
# Return nil (to be removed with #compact below)
nil
end
# Remove nils, all members must be able to respond to #members for recurisve member lookup.
end.compact
end
@members
end

def to_solr
Expand Down
13 changes: 8 additions & 5 deletions lib/fix/batch_process.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ module Fix
class BatchProcess
attr_reader :ids, :log, :cli_ptions, :log_level

def initialize(ids_file:, log_level: Logger::INFO)
@ids = File.readlines(ids_file, chomp: true)
def initialize(ids_file: nil, ids: [], log_level: Logger::INFO)
# Try reading ids from :ids_file first if it's given
@ids = File.readlines(ids_file, chomp: true) if ids_file
# Set ids to given param if not from a file
@ids ||= ids
@cli_options = {}
@log = Logger.new(STDOUT)
@log.level = log_level
Expand Down Expand Up @@ -38,8 +41,8 @@ def self.cli_options
@cli_options ||= {}
end

# self.option_parser Creates a default OptionParser for cli options and allows subclasses
# to add their own options.
# option_parser (class method) -- A default OptionParser for cli options that allows subclasses
# to add their own CLI options as needed.
# @param block [Proc] A block that takes an OptionParser instance as an argument.
# @return [OptionParser] The OptionParser instance.
# Usage:
Expand All @@ -65,7 +68,7 @@ def self.option_parser(&block)
end

# Allow file input of AAPB IDs
opts.on("-f", "--file FILE", "List of AAPB IDs, one per line") do |file|
opts.on("-f FILE", "--file FILE", "List of AAPB IDs, one per line") do |file|
cli_options[:ids_file] = file
end
end
Expand Down
71 changes: 60 additions & 11 deletions lib/fix/delete_asset_resources.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,69 @@

module Fix
class DeleteAssetResources < BatchProcess

def run
asset_resources.each do |ar|
log.info "Destroying Asset Resource #{ar.id}"
begin
Hyrax.persister.delete(resource: ar)
Hyrax.index_adapter.delete(resource: ar)
Hyrax.index_adapter.connection.commit
log.info "Asset Resource #{ar.id} destroyed."
rescue => e
log_error e
end
log.info "Deleting #{asset_resources.count} Asset Resources..."
asset_resources.map do |ar|
destroy_work_and_members(resource: ar)
end
log.info "Done."
log.info report
end

private


def results; @results ||= []; end

def work_destroy_transaction
Hyrax::Transactions::WorkDestroy.new
end

def destroy_work_and_members(resource:)
result = { resource: resource }

resource.members.each do |member|
destroy_work_and_members(resource: member)
end
puts "Done."

log.info "DELETING #{resource.class} #{resource.id.id}..."
work_destroy_transaction.call(resource)
log.info "DELETED #{resource.class} #{resource.id.id}."
delete_from_fedora(resource)
result[:error] = false
results << result
rescue => e
result[:error] = e
results << result
log_error e
end

def delete_from_fedora(resource)
log.info "SEARCHING FEDORA: #{resource.class} #{resource.id}..."
af_object = ActiveFedora::Base.find(resource.id.to_s)
af_object.destroy!
log.info "DELETED: #{af_object.class} #{af_object.id} from Fedora."
rescue ActiveFedora::ObjectNotFoundError
# Not a real error, just means it's not in Fedora. Carry on.
log.info "NOT FOUND."
end

def report
r = "\nRESULTS:\n"
r += "Successfully Deleted #{successes.count}:\n"
successes.each do |result|
r += "#{result[:resource].id}, #{result[:resource].class}\n"
end
r += "Failed while Deleting #{failures.count}:\n"
failures.each do |result|
r += "#{result[:resource].id}, #{result[:error]}\n"
end
r
end

def successes; results.reject { |r| r[:error] }; end
def failures; results.select { |r| r[:error] }; end
end
end

Expand Down
64 changes: 46 additions & 18 deletions lib/fix/delete_physical_instantiations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,54 @@ def initialize(media_type:, **args)

def run
asset_resources.each do |ar|
pis = ar.physical_instantiation_resources.select { |pi| pi.media_type == media_type }
if pis.count == 0
log.warn "No physical instantiations with media type '#{media_type}' were found for Asset #{ar.id}, skipping."
next
end

pis.each do |pi|
begin
log.info "Deleting Physical Instantiation #{pi.id} with media type '#{media_type}' from Asset #{ar.id}..."
Hyrax.persister.delete(resource: pi)
Hyrax.index_adapter.delete(resource: pi)
log.info "Deleted physical instantiation #{pi.id} with media type '#{media_type}' from Asset #{ar.id}."
Hyrax.index_adapter.save(resource: ar)
log.info "Asset Resource #{ar.id} saved."
rescue => e
log_error(e)
end
end
process_asset_resource(ar)
end
end

private

def process_asset_resource(ar)
log.info "Processing Asset Resource #{ar.id}..."
pis = ar.physical_instantiation_resources.select { |pi| pi.media_type == media_type }
if pis.count == 0
log.warn "SKIPPING: No physical instantiations with media type '#{media_type}'."
return
end

pis.each do |pi|
delete_physical_instantiation_resource(pi)
end

Hyrax.persister.save(resource: ar)
Hyrax.index_adapter.save(resource: ar)
log.info "SAVED: Asset Resource #{ar.id}."
rescue => e
log_error(e)
end

def delete_physical_instantiation_resource(pi)
work_destroy_transaction.call(pi)
delete_from_fedora(pi)
Hyrax.index_adapter.delete(resource: pi)
log.info "DELETED: Physical Instantiation Resource #{pi.id} with media type '#{media_type}'"
rescue => e
log_error(e)
end

def delete_from_fedora(pi)
log.info "SEARCHING: PhysicalInstantiation #{pi.id} in Fedora..."
pi = PhysicalInstantiation.find(pi.id)
pi.destroy!
log.info "DELETED: PhysicalInstantiation #{pi.id} from Fedora."
rescue ActiveFedora::ObjectNotFoundError
log.info "NOT FOUND."
rescue => e
log_error(e)
end

def work_destroy_transaction
Hyrax::Transactions::WorkDestroy.new
end
end
end

Expand Down
87 changes: 87 additions & 0 deletions lib/fix/sample_data.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# first shell into a running container with bash, then run pry.
# load the app
require_relative '../../config/environment'

# Set some variables
class SampleData
attr_reader :asset_sample_size, :log

def initialize(asset_sample_size: 100)
@asset_sample_size = asset_sample_size
@log = Logger.new(STDOUT)
end

def solr
@solr ||= Blacklight.default_index.connection
end

def a_docs
@a_docs ||= solr.get('select', params: {q: 'has_model_ssim:Asset', rows: asset_sample_size, start: rand_start})['response']['docs']
end

def rand_start
@rand_start ||= rand(0..(total_assets - asset_sample_size - 1))
end

def total_assets
solr.get('select', params: {q: 'has_model_ssim:Asset', rows: 0})['response']['numFound']
end

def ars(refresh: false)
# Rest instance variables if refresh is true
@ars = @ars_with_pis = @ars_with_pis_in_fedora = nil if refresh
@ars ||= a_docs.map{|doc| AssetResource.find(doc['id'])}
end

def ars_with_pis(media_type: nil)
@ars_with_pis ||= ars.select{ |ar| ar.physical_instantiation_resources.present? }
if media_type
filter(ars: @ars_with_pis, media_type: media_type)
else
@ars_with_pis
end
end

def ars_with_pis_in_fedora(media_type: nil)
@ars_with_pis_in_fedora ||= ars_with_pis.select do |ar|
ar.physical_instantiation_resources.any? do |pi|
begin
pi = PhysicalInstantiation.find(pi.id)
rescue ActiveFedora::ObjectNotFoundError, Ldp::Gone => e
log.error("#{e.class}: #{e.message}")
false
end
end
end

if media_type
filter(ars: @ars_with_pis_in_fedora, media_type: media_type)
else
@ars_with_pis_in_fedora
end
end

def save_assets_to_pg
ars.each do |ar|
Hyrax.persister.save(resource: ar)
end
ars(refresh: true)
end

# Class method for filtering a set of AssetResources
def self.filter(ars: [], media_type: nil)
filtered = ars
if media_type
filtered = filtered.select do |ar|
ar.physical_instantiation_resources.any? do |pi|
pi.media_type.to_s.strip.downcase == media_type.to_s.strip.downcase
end
end
end
filtered
end

# Delegated instance method
def filter(*args, **kwargs); self.class.filter(*args, **kwargs); end

end
9 changes: 1 addition & 8 deletions spec/fix/delete_asset_resources_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,6 @@
end
end

let(:ids_file) do
f = Tempfile.new
f.write(ids.join("\n"))
f.flush
f.path
end

# Non-memoized helper for fetching Asset by ID.
def asset_resource_results
ids.map do |id|
Expand All @@ -50,7 +43,7 @@ def asset_resource_results

it 'deletes the AssetResources' do
expect(asset_resource_results.count).to be > 0
Fix::DeleteAssetResources.new(ids_file: ids_file).run
Fix::DeleteAssetResources.new(ids: ids).run
expect(asset_resource_results.count).to eq 0
end
end

0 comments on commit fc3dbd0

Please sign in to comment.