Skip to content

Commit

Permalink
first cut importer for boxes
Browse files Browse the repository at this point in the history
  • Loading branch information
jambun committed Oct 23, 2015
1 parent ab02707 commit c7c7313
Show file tree
Hide file tree
Showing 8 changed files with 212 additions and 0 deletions.
6 changes: 6 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
ASpaceGems.setup if defined? ASpaceGems

source 'http://rubygems.org'

gem 'rubyXL', "3.1.0", :require => false
gem 'rubyzip', "1.0.0", :require => false
181 changes: 181 additions & 0 deletions backend/converters/rm_export_converter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
require 'date'
require 'rubyXL'
require 'zip'


class RMExportConverter < Converter

def self.instance_for(type, input_file)
if type == "rm_export"
self.new(input_file)
else
nil
end
end


def self.import_types(show_hidden = false)
[
{
:name => "rm_export",
:description => "Records Management export zip"
}
]
end


def self.profile
"Convert Records Management spreadsheets to ArchivesSpace JSONModel records"
end


def initialize(input_file)
super
@batch = ASpaceImport::RecordBatch.new
@input_file = input_file
@records = []
end


def run
now = java.lang.System.currentTimeMillis
box_file = File.join(Dir.tmpdir, "rm_export_box_#{now}")
file_file = File.join(Dir.tmpdir, "rm_export_file_#{now}")
puts "box_file #{box_file}"
puts "file_file #{file_file}"

box_sheet = nil
file_sheet = nil

rms_import_date = Date.today.strftime('%Y-%m-%d')

Zip::File.open(@input_file) do |zip_file|
zip_file.each do |entry|
if entry.name.end_with? "ArchBoxExport.xlsx"
entry.extract(box_file)
box_sheet = RubyXL::Parser.parse(box_file)[0]
File.unlink(box_file)
elsif entry.name.end_with? "ArchFileExport.xlsx"
entry.extract(file_file)
file_sheet = RubyXL::Parser.parse(file_file)[0]
File.unlink(file_file)
end
end
end

unless box_sheet && file_sheet
raise "Zip file must contain files with names ending in 'ArchBoxExport.xlsx' and 'ArchFileExport.xlsx'"
end

rows = box_sheet.enum_for(:each)
headers = row_values(rows.next)
# box headers: ["Orig_SERN", "BOXN", "Box Location", "BOXNAME", "BEGINDATE", "ENDDDATE"]

begin
parent_aos = {}

while(row = rows.next)
values = row_values(row)

next if values.compact.empty?

values_map = Hash[headers.zip(values)]

# find parent AO using Orig_SERN = external_id with source of RMS
unless parent_aos[values_map["Orig_SERN"]]
ext_id = ExternalId.select(:archival_object_id).
where(:external_id => values_map["Orig_SERN"],
:source => AppConfig[:container_management_rms_source]).first
if ext_id.nil?
# raise "No archival_object found with external_id of #{values_map["Orig_SERN"]}"
p "NO Archival Object with #{values_map["Orig_SERN"]}, skipping ..."
next
end
parent_aos[values_map["Orig_SERN"]] = ArchivalObject[ext_id[:archival_object_id]]
end

parent = parent_aos[values_map["Orig_SERN"]]

external_id = {
:source => AppConfig[:container_management_rms_source],
:external_id => values_map["BOXN"],
}

date = {
:date_type => "inclusive",
:label => "creation",
:begin => values_map["BEGINDATE"][0,10],
:end => values_map["ENDDDATE"][0,10],
}

loc = Location.select(:id).where(:coordinate_1_indicator => values_map["Box Location"]).first
loc_uri = if loc.nil?
loc = JSONModel::JSONModel(:location).
from_hash({
:uri => "/locations/import_#{SecureRandom.hex}",
:building => "RecordsCenter",
:area => "RecordsManagement",
:coordinate_1_label => "Shelf",
:coordinate_1_indicator => values_map["Box Location"],
})
@batch << loc
loc.uri
else
JSONModel::JSONModel(:location).uri_for(loc[:id])
end

instance = {
:instance_type => "mixed_materials",
:container => {
:type_1 => "box",
:indicator_1 => values_map["BOXN"],
:container_locations => [{
:ref => loc_uri,
:status => "current",
:start_date => rms_import_date,
}],
}
}

ao_json = JSONModel::JSONModel(:archival_object).
from_hash({
:uri => "/repositories/12345/archival_objects/import_#{SecureRandom.hex}",
:title => values_map["BOXNAME"],
:level => "otherlevel",
:other_level => "box",
:external_ids => [external_id],
:dates => [date],
:instances => [instance],
:parent => {:ref => JSONModel::JSONModel(:archival_object).uri_for(parent.id, :repo_id => parent.repo_id)},
:resource => {:ref => JSONModel::JSONModel(:resource).uri_for(parent.root_record_id, :repo_id => parent.repo_id)},
:rms_import_date => rms_import_date,
})

@batch << ao_json

end
rescue StopIteration
end


# rows = file_sheet.enum_for(:each)
# headers = row_values(rows.next)
# file headers: ["BOXN", "FILN", "FILNAME"]
# puts "file headers: #{headers.inspect}"

end


def get_output_path
output_path = @batch.get_output_path

p "=================="
p output_path
p File.read(output_path)
p "=================="

output_path
end

end

18 changes: 18 additions & 0 deletions backend/model/archival_object_ext.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
class ArchivalObject < Sequel::Model(:archival_object)

def update_from_json(json, opts = {}, apply_nested_records = true)
# make sure we don't blat rms_import_date
# if the update is coming from the frontend
# currently not sending it in a hidden to avoid overriding the template
# also it's blowing up in prepare_for_db (why?) if there is no value,
# hence the dummy value
# also also I probably need to worry about obscuring other update_from_json's
# sheesh - ready to be led on this one!
unless json["rms_import_date"]
json["rms_import_date"] = self[:rms_import_date].to_s
json["rms_import_date"] = "1900-01-01" if json["rms_import_date"] == ""
end
super
end

end
7 changes: 7 additions & 0 deletions backend/plugin_init.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Work around small difference in rubyzip API
module Zip
if !defined?(Error)
class Error < StandardError
end
end
end
Binary file added samples/SampleRMExport.zip
Binary file not shown.
Binary file added samples/SampleRMExport/201507_ArchBoxExport.xlsx
Binary file not shown.
Binary file added samples/SampleRMExport/201507_ArchFileExport.xlsx
Binary file not shown.
Binary file not shown.

0 comments on commit c7c7313

Please sign in to comment.