Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2979 - Metadata template and download should be consistent #4273

Open
wants to merge 7 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 4 additions & 104 deletions app/controllers/concerns/export_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,11 @@ def export_collection_contributors_csv(out:, collection:, report_arguments:)
end

def export_work_metadata_csv(out:, collection:)
path = "work_metadata.csv"
path = 'work_metadata.csv'
out.put_next_entry(path)
out.write(export_work_metadata_as_csv(collection))

result = Work::Metadata::ExportCsv.call(collection: collection, works: collection.works)
out.write(result.csv_string)
end

def export_subject_csv(out:, collection:, work:)
Expand Down Expand Up @@ -341,7 +343,6 @@ def export_html_full_pages(out:, page:, by_work:, original_filenames:)
path = File.join("html_full_pages", "#{path_from_work(page.work, original_filenames)}_#{page.title}.html")
end


out.put_next_entry path

page_view = xml_to_html(page.xml_text, true, false, page.work.collection)
Expand Down Expand Up @@ -402,107 +403,6 @@ def get_headings(collection, ids)
@headings
end


def export_work_metadata_as_csv(collection)
csv_string = CSV.generate(:force_quotes => true) do |csv|
static_headers = [
'Title',
'Collection',
'Document Sets',
'Uploaded Filename',
'FromThePage ID',
'FromThePage Slug',
'FromThePage URL',
'Identifier',
'Originating Manifest ID',
'Creation Date',
'Total Pages',
'Pages Transcribed',
'Pages Corrected',
'Pages Indexed',
'Pages Translated',
'Pages Needing Review',
'Pages Marked Blank',
'Contributors',
'Contributors Name',
'work_id'
]

raw_metadata_strings = collection.works.pluck(:original_metadata)
metadata_headers = raw_metadata_strings.map{|raw| raw.nil? ? [] : JSON.parse(raw).map{|element| element["label"] } }.flatten.uniq
# append the headers for described metadata, read from the metadata_field configuration for the project
static_description_headers = ['Description Status', 'Described By']
described_headers = collection.metadata_fields.map {|field| field.label}

csv << static_headers + metadata_headers + static_description_headers + described_headers

collection.works.includes(:document_sets, :work_statistic, :sc_manifest).reorder(:id).each do |work|

work_users = work.deeds.map{ |d| "#{d.user.display_name}<#{d.user.email}>".gsub('|', '//') }.uniq.join('|')
contributors_real_names = work.deeds.map{ |d| d.user.real_name }.uniq.join(' | ')
row = [
work.title,
work.collection.title,
work.document_sets.map{|ds| ds.title}. join('|'),
work.uploaded_filename,
work.id,
work.slug,
collection_read_work_url(collection.owner, collection, work),
work.identifier,
work.sc_manifest.nil? ? '' : work.sc_manifest.at_id,
work.created_on,
work.work_statistic.total_pages,
work.work_statistic.transcribed_pages,
work.work_statistic.corrected_pages,
work.work_statistic.annotated_pages,
work.work_statistic.translated_pages,
work.work_statistic.needs_review,
work.work_statistic.blank_pages,
work_users,
contributors_real_names,
work.id

]

unless work.original_metadata.blank?
metadata = {}
JSON.parse(work.original_metadata).each {|e| metadata[e['label']] = e['value'] }

metadata_headers.each do |header|
# look up the value for this index
row << metadata[header]
end
end

unless work.metadata_description.blank?
# description status
row << work.description_status
# described by
row << User.find(work.metadata_description_versions.pluck(:user_id)).map{|u| u.display_name}.join('; ')

metadata = JSON.parse(work.metadata_description)
# we rely on a consistent order of fields returned by collection.metadata_fields to prevent scrambling columns
collection.metadata_fields.each do |field|
element = metadata.detect{|candidate| candidate['transcription_field_id'] == field.id}
if element
value = element['value']
if value.is_a? Array
value = value.join("; ")
end
row << value
else
row << nil
end
end
end

csv << row
end
end

csv_string
end

def export_tables_as_csv(table_obj)
if table_obj.is_a?(Collection)
collection = table_obj
Expand Down
7 changes: 5 additions & 2 deletions app/controllers/export_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,12 @@ def subject_index_csv
end

def work_metadata_csv
filename = params[:filename] ? "#{params[:filename]}.csv" : "fromthepage_work_metadata_export_#{@collection.id}_#{Time.now.utc.iso8601}.csv"
result = Work::Metadata::ExportCsv.call(collection: @collection, works: @collection.works)

send_data(
export_work_metadata_as_csv(@collection),
filename: "fromthepage_work_metadata_export_#{@collection.id}_#{Time.now.utc.iso8601}.csv",
result.csv_string,
filename: filename,
type: 'application/csv'
)
cookies['download_finished'] = 'true'
Expand Down
35 changes: 13 additions & 22 deletions app/controllers/metadata_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,24 @@ class MetadataController < ApplicationController

def example
collection = Collection.find(params[:id])
example = Metadata.create_example(collection)
send_data example, filename: "example.csv"
works = collection.works.limit(3)
result = Work::Metadata::ExportCsv.call(collection: collection, works: works)

send_data result.csv_string, filename: 'example.csv'
end

def upload
# Modal upload
end

def create
metadata_file = params[:metadata]['file'].tempfile

collection = Collection.find(params[:metadata][:collection_id])
metadata = Metadata.new(metadata_file: metadata_file, collection: collection)
result = metadata.process_csv
rows = result[:content]
row_errors = result[:errors].count
link = helpers.link_to 'link', collection_metadata_csv_error_path

if row_errors > 0
feedback = "Your upload has finished processing. #{rows} works were updated successfully; #{row_errors} rows encountered errors. Download the error file here: #{link}"
else
feedback = "Your upload has finished processing. #{rows} works were updated successfully."
end
metadata_file_path = params[:metadata]['file'].tempfile.path
collection_id = params[:metadata][:collection_id]
Work::Metadata::ImportCsvJob.perform_later(metadata_file_path, collection_id, current_user.id)

flash[:alert] = feedback
collection = Collection.find(collection_id)

flash[:alert] = t('.is_processing')
ajax_redirect_to edit_look_collection_path(collection.owner, collection)
end

Expand All @@ -46,9 +42,4 @@ def refresh
flash[:notice] = t('.is_processing')
ajax_redirect_to edit_look_collection_path(collection.owner, collection)
end

def csv_error
csv_string = Metadata.retrieve_error
send_data csv_string, filename: "error.csv"
end
end
126 changes: 126 additions & 0 deletions app/interactors/work/metadata/export_csv.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
require 'csv'

class Work::Metadata::ExportCsv
STATIC_HEADERS = [
'FromThePage Title',
'*Collection*',
'*Document Sets*',
'*Uploaded Filename*',
'*FromThePage ID*',
'*FromThePage Slug*',
'*FromThePage URL*',
'FromThePage Description',
'Identifier',
'*Originating Manifest ID*',
'*Creation Date*',
'*Total Pages*',
'*Pages Transcribed*',
'*Pages Corrected*',
'*Pages Indexed*',
'*Pages Translated*',
'*Pages Needing Review*',
'*Pages Marked Blank*',
'*Contributors*',
'*Contributors Name*'
].freeze

STATIC_DESCRIPTION_HEADERS = [
'*Description Status*',
'*Described By*'
].freeze

include Interactor
include Rails.application.routes.url_helpers

def initialize(collection:, works:)
@collection = collection
@works = works

super
end

def call
csv_string = CSV.generate(force_quotes: true) do |csv|
works_scope = @works.includes(
:document_sets,
:work_statistic,
:sc_manifest,
:deeds,
{ metadata_description_versions: :user }
)
.reorder(:id)

raw_metadata_strings = works_scope.pluck(:original_metadata)
metadata_headers = raw_metadata_strings
.compact
.flat_map { |raw| JSON.parse(raw).map { |element| element['label'] } }
.uniq

described_headers = @collection.metadata_fields.map(&:label)
csv << STATIC_HEADERS + metadata_headers + STATIC_DESCRIPTION_HEADERS + described_headers

works_scope.each do |work|
work_users = work.deeds.map { |d| "#{d.user.display_name}<#{d.user.email}>".gsub('|', '//') }.uniq.join('|')
contributors_real_names = work.deeds.map { |d| d.user.real_name }.uniq.join(' | ')
row = [
work.title,
work.collection.title,
work.document_sets.map(&:title).join('|'),
work.uploaded_filename,
work.id,
work.slug,
collection_read_work_url(@collection.owner, @collection, work),
work.description,
work.identifier,
work.sc_manifest.nil? ? '' : work.sc_manifest.at_id,
work.created_on,
work.work_statistic.total_pages,
work.work_statistic.transcribed_pages,
work.work_statistic.corrected_pages,
work.work_statistic.annotated_pages,
work.work_statistic.translated_pages,
work.work_statistic.needs_review,
work.work_statistic.blank_pages,
work_users,
contributors_real_names
]

if work.original_metadata.present?
metadata = {}
JSON.parse(work.original_metadata).each { |e| metadata[e['label']] = e['value'] }

metadata_headers.each do |header|
# look up the value for this index
row << metadata[header]
end
end

if work.metadata_description.present?
# description status
row << work.description_status
# described by
row << work.metadata_description_versions.flat_map(&:user).map(&:display_name).join('; ')

metadata = JSON.parse(work.metadata_description)

# we rely on a consistent order of fields returned by collection.metadata_fields to prevent scrambling columns
@collection.metadata_fields.each do |field|
element = metadata.detect { |candidate| candidate['transcription_field_id'] == field.id }
if element
value = element['value']
value = value.join('; ') if value.is_a? Array
row << value
else
row << nil
end
end
end

csv << row
end
end

context.csv_string = csv_string
context
end
end
Loading
Loading