Skip to content

Commit

Permalink
Removing pre_curation_uploads attachment to active storage (#1775)
Browse files Browse the repository at this point in the history
Also rename precuration_uploads_fast to precuration_uploads so that the _fast concept does not exists in the code base

Additionally making tests for publication more robust
  • Loading branch information
carolyncole authored Apr 18, 2024
1 parent fce723a commit d0bb22e
Show file tree
Hide file tree
Showing 10 changed files with 63 additions and 74 deletions.
4 changes: 2 additions & 2 deletions app/models/readme.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ def s3_readme_idx
end

def file_names
@file_names ||= work.pre_curation_uploads_fast.map(&:filename_display)
@file_names ||= work.pre_curation_uploads.map(&:filename_display)
end

def remove_old_readme
return if blank?
work.s3_query_service.delete_s3_object(work.pre_curation_uploads_fast[s3_readme_idx].key)
work.s3_query_service.delete_s3_object(work.pre_curation_uploads[s3_readme_idx].key)
end

def upload_readme(readme_file_param)
Expand Down
17 changes: 6 additions & 11 deletions app/models/work.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ class InvalidGroupError < ::ArgumentError; end
has_many :work_activity, -> { order(updated_at: :desc) }, dependent: :destroy
has_many :user_work, -> { order(updated_at: :desc) }, dependent: :destroy
has_many :upload_snapshots, -> { order(updated_at: :desc) }, dependent: :destroy
has_many_attached :pre_curation_uploads, service: :amazon_pre_curation

belongs_to :group, class_name: "Group"
belongs_to :curator, class_name: "User", foreign_key: "curator_user_id", optional: true
Expand Down Expand Up @@ -299,14 +298,14 @@ def current_transition
def uploads
return post_curation_uploads if approved?

pre_curation_uploads_fast
pre_curation_uploads
end

# Returns the list of files for the work with some basic information about each of them.
# This method is much faster than `uploads` because it does not return the actual S3File
# objects to the client, instead it returns just a few selected data elements.
def file_list
s3_files = approved? ? post_curation_uploads : pre_curation_uploads_fast
s3_files = approved? ? post_curation_uploads : pre_curation_uploads
files_info = s3_files.map do |s3_file|
{
"safe_id": s3_file.safe_id,
Expand All @@ -333,7 +332,7 @@ def total_file_size
end

# Fetches the data from S3 directly bypassing ActiveStorage
def pre_curation_uploads_fast
def pre_curation_uploads
s3_query_service.client_s3_files.sort_by(&:filename)
end

Expand All @@ -359,7 +358,7 @@ def post_curation_uploads(force_post_curation: false)
end

def s3_files
pre_curation_uploads_fast
pre_curation_uploads
end

def s3_client
Expand Down Expand Up @@ -440,7 +439,7 @@ def past_snapshots
# @return [UploadSnapshot]
def reload_snapshots(user_id: nil)
work_changes = []
s3_files = pre_curation_uploads_fast
s3_files = pre_curation_uploads
s3_filenames = s3_files.map(&:filename)

upload_snapshot = latest_snapshot
Expand Down Expand Up @@ -595,11 +594,7 @@ def s3_object_persisted?(s3_file)
end

def publish_precurated_files(user)
# An error is raised if there are no files to be moved
raise(StandardError, "Attempting to publish a Work without attached uploads for #{s3_object_key}") if pre_curation_uploads_fast.empty? && post_curation_uploads.empty?

# We need to explicitly access to post-curation services here.
# Lets explicitly create it so the state of the work does not have any impact.
# We need to explicitly check the to post-curation bucket here.
s3_post_curation_query_service = S3QueryService.new(self, "postcuration")

s3_dir = find_post_curation_s3_dir(bucket_name: s3_post_curation_query_service.bucket_name)
Expand Down
4 changes: 2 additions & 2 deletions app/services/work_validator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ class WorkValidator
attr_reader :work

delegate :errors, :metadata, :resource, :ark, :doi, :user_entered_doi, :id, :group,
:pre_curation_uploads_fast, :post_curation_uploads, to: :work
:pre_curation_uploads, :post_curation_uploads, to: :work

def initialize(work)
@work = work
Expand Down Expand Up @@ -44,7 +44,7 @@ def valid_to_approve(user)
unless user.has_role? :group_admin, group
errors.add :base, "Unauthorized to Approve"
end
if pre_curation_uploads_fast.empty? && post_curation_uploads.empty?
if pre_curation_uploads.empty? && post_curation_uploads.empty?
errors.add :base, "Uploads must be present for a work to be approved"
end
errors.count == 0
Expand Down
4 changes: 2 additions & 2 deletions docs/how_to_delete_a_work.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ These steps assume the work is assigned to a variable called `work`.
1. Delete the pre curation uploads
* ```
service = S3QueryService.new(work, "postcuration")
work.pre_curation_uploads_fast.each{|upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key})}
work.pre_curation_uploads.each{|upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key})}
```
1. Delete the post curation uploads
* ```
Expand All @@ -16,7 +16,7 @@ These steps assume the work is assigned to a variable called `work`.
Full script below...
```ruby
service = S3QueryService.new(work, "postcuration")
work.pre_curation_uploads_fast.each{|upload| service.client.delete_object({ bucket: service, bucket_name, key: upload.key})}
work.pre_curation_uploads.each{|upload| service.client.delete_object({ bucket: service, bucket_name, key: upload.key})}
work.post_curation_uploads.each{|upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key})}
work.destroy
```
2 changes: 1 addition & 1 deletion lib/tasks/works.rake
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ namespace :works do
works = Work.where.not(id: work_exclusion_ids)
works.each do |work|
service = S3QueryService.new(work, "postcuration")
work.pre_curation_uploads_fast.each { |upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key }) }
work.pre_curation_uploads.each { |upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key }) }
work.post_curation_uploads.each { |upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key }) }
work.destroy
end
Expand Down
18 changes: 9 additions & 9 deletions spec/controllers/works_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@

saved_work = Work.find(work.id)

expect(saved_work.pre_curation_uploads_fast).not_to be_empty
expect(saved_work.pre_curation_uploads).not_to be_empty
expect(fake_s3_service).not_to have_received(:delete_s3_object)
background_snapshot = BackgroundUploadSnapshot.last
expect(background_snapshot.work).to eq(work)
Expand Down Expand Up @@ -245,7 +245,7 @@

saved_work = Work.find(work.id)

expect(saved_work.pre_curation_uploads_fast).not_to be_empty
expect(saved_work.pre_curation_uploads).not_to be_empty
expect(fake_s3_service).not_to have_received(:delete_s3_object)
background_snapshot = BackgroundUploadSnapshot.last
expect(background_snapshot.work).to eq(work)
Expand Down Expand Up @@ -312,7 +312,7 @@
end

it "handles the update page" do
expect(work.pre_curation_uploads_fast.count).to eq 2
expect(work.pre_curation_uploads.count).to eq 2

params = base_params.clone
params[:work] = { pre_curation_uploads_added: [uploaded_file2],
Expand All @@ -321,7 +321,7 @@
post(:update, params:)

saved_work = Work.find(work.id)
expect(saved_work.pre_curation_uploads_fast.count).to eq 2
expect(saved_work.pre_curation_uploads.count).to eq 2
expect(fake_s3_service).to have_received(:delete_s3_object).once
end
end
Expand Down Expand Up @@ -370,14 +370,14 @@
end

it "handles the update page" do
expect(work.pre_curation_uploads_fast.length).to eq(3)
expect(work.pre_curation_uploads.length).to eq(3)

sign_in user
post(:update, params:)

saved_work = Work.find(work.id)

expect(saved_work.pre_curation_uploads_fast.length).to eq(1)
expect(saved_work.pre_curation_uploads.length).to eq(1)

expect(ActiveStorage::PurgeJob).not_to have_received(:new)
expect(fake_s3_service).to have_received(:delete_s3_object).with(s3_file1.key)
Expand Down Expand Up @@ -546,11 +546,11 @@

saved_work = Work.find(work.id)

expect(saved_work.pre_curation_uploads_fast).not_to be_empty
expect(saved_work.pre_curation_uploads).not_to be_empty

# order is alphabetical, we can not change it by sending the files in a different order
expect(saved_work.pre_curation_uploads_fast.first.filename).to include(uploaded_files.last.original_filename.gsub(".csv", ""))
expect(saved_work.pre_curation_uploads_fast.last.filename).to include(uploaded_files.first.original_filename.gsub(".csv", ""))
expect(saved_work.pre_curation_uploads.first.filename).to include(uploaded_files.last.original_filename.gsub(".csv", ""))
expect(saved_work.pre_curation_uploads.last.filename).to include(uploaded_files.first.original_filename.gsub(".csv", ""))

# original copies of the files get deleted
expect(fake_s3_service).to have_received(:delete_s3_object).twice
Expand Down
69 changes: 33 additions & 36 deletions spec/models/work_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,15 @@
let(:pre_curated_data_profile) { { objects: [] } }

let(:fake_s3_service_pre) { stub_s3(data: [file1, file2]) }
let(:fake_s3_service_post) { stub_s3(data: [file1, file2]) }
let(:fake_s3_service_post) { stub_s3 }

before do
allow(S3QueryService).to receive(:new).and_return(fake_s3_service_pre, fake_s3_service_post)
# initialize so the next allows happen after the stubs
fake_s3_service_post
fake_s3_service_pre

allow(S3QueryService).to receive(:new).with(work, "postcuration").and_return(fake_s3_service_post)
allow(S3QueryService).to receive(:new).with(work, "precuration").and_return(fake_s3_service_pre)
allow(fake_s3_service_pre.client).to receive(:head_object).with(bucket: "example-post-bucket", key: work.s3_object_key).and_raise(Aws::S3::Errors::NotFound.new("blah", "error"))
allow(fake_s3_service_post).to receive(:bucket_name).and_return("example-post-bucket")
allow(fake_s3_service_pre).to receive(:bucket_name).and_return("example-pre-bucket")
Expand All @@ -197,19 +202,32 @@
work.reload

expect(fake_s3_service_pre).to have_received(:publish_files).once
expect(work.pre_curation_uploads).to be_empty
expect(work.post_curation_uploads).not_to be_empty
expect(work.post_curation_uploads.length).to eq(2)
expect(work.post_curation_uploads.first).to be_an(S3File)
expect(work.post_curation_uploads.first.key).to eq(file1.key)
expect(work.post_curation_uploads.last).to be_an(S3File)
expect(work.post_curation_uploads.last.key).to eq(file2.key)

expect(work.as_json["files"][0].keys).to eq([:filename, :size, :display_size, :url])
expect(work.as_json["files"][0][:filename]).to match(/10\.34770\/123-abc\/\d+\/us_covid_2019\.csv/)
expect(work.as_json["files"][0][:size]).to eq(1024)
expect(work.as_json["files"][0][:display_size]).to eq("1 KB")
expect(work.as_json["files"][0][:url]).to eq "https://example.data.globus.org/10.34770/123-abc/#{work.id}/us_covid_2019.csv"
expect(fake_s3_service_post).to have_received(:bucket_name).once
expect(fake_s3_service_pre.client).to have_received(:head_object).once
end

context "when the post curation bucket alreay exists" do
before do
allow(fake_s3_service_pre.client).to receive(:head_object).with(bucket: "example-post-bucket", key: work.s3_object_key).and_return(true)
end

it "raises an error" do
stub_datacite_doi
expect { work.approve!(curator_user) }.to raise_error("Attempting to publish a Work with an existing S3 Bucket directory for: 10.34770/123-abc/#{work.id}")
end
end

context "when the pre curation bucket is empty" do
let(:fake_s3_service_pre) { stub_s3(data: []) }

before do
allow(fake_s3_service_pre.client).to receive(:head_object).with(bucket: "example-post-bucket", key: work.s3_object_key).and_return(true)
end

it "raises an error" do
stub_datacite_doi
expect { work.approve!(curator_user) }.to raise_error(AASM::InvalidTransition)
end
end

context "when the Work is under active embargo" do
Expand Down Expand Up @@ -457,26 +475,6 @@
end
end

# Is this test providing any coverage?
describe "#pre_curation_uploads" do
let(:uploaded_file) do
fixture_file_upload("us_covid_2019.csv", "text/csv")
end

let(:uploaded_file2) do
fixture_file_upload("us_covid_2019.csv", "text/csv")
end

before do
stub_request(:put, /#{attachment_url}/).with(
body: "date,state,fips,cases,deaths\n2020-01-21,Washington,53,1,0\n2022-07-10,Wyoming,56,165619,1834\n"
).to_return(status: 200)

work.pre_curation_uploads.attach(uploaded_file)
work.save
end
end

describe "#draft" do
let(:draft_work) do
work = Work.new(group:, resource: FactoryBot.build(:resource), created_by_user_id: user.id)
Expand Down Expand Up @@ -924,7 +922,6 @@
body: "date,state,fips,cases,deaths\n2020-01-21,Washington,53,1,0\n2022-07-10,Wyoming,56,165619,1834\n"
).to_return(status: 200)

work.pre_curation_uploads.attach(uploaded_file)
work.save
end

Expand Down
3 changes: 0 additions & 3 deletions spec/services/s3_query_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -425,9 +425,6 @@
fake_aws_client.stub(:list_objects_v2).and_return(fake_s3_resp)
fake_s3_resp.stub(:to_h).and_return(s3_hash)

blob = ActiveStorage::Blob.new(filename: s3_key1, key: s3_key1, content_type: "", byte_size: 100, checksum: "abc123")
work.pre_curation_uploads << ActiveStorage::Attachment.new(blob:, name: :pre_curation_uploads)

data_profile = s3_query_service.data_profile
expect(data_profile[:objects]).to be_instance_of(Array)
expect(data_profile[:ok]).to eq true
Expand Down
8 changes: 4 additions & 4 deletions spec/services/work_upload_edit_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@

upload_service = described_class.new(work, user)
updated_work = upload_service.update_precurated_file_list(added_files, deleted_files)
filenames = updated_work.pre_curation_uploads_fast.map(&:filename)
filenames = updated_work.pre_curation_uploads.map(&:filename)
expect(filenames).to eq(s3_data.map(&:filename))
expect(fake_s3_service).not_to have_received(:delete_s3_object)
expect(work.work_activity.count).to be 0
Expand All @@ -82,7 +82,7 @@
expect { updated_work = upload_service.update_precurated_file_list(added_files, deleted_files) }.to change { BackgroundUploadSnapshot.count }.by 1
perform_enqueued_jobs

expect(updated_work.pre_curation_uploads_fast.map(&:filename).sort).to eq([s3_file1.key, s3_file2.key].sort)
expect(updated_work.pre_curation_uploads.map(&:filename).sort).to eq([s3_file1.key, s3_file2.key].sort)
expect(fake_s3_service).not_to have_received(:delete_s3_object)

# it logs the addition (and no delete)
Expand All @@ -103,7 +103,7 @@
upload_service = described_class.new(work, user)
updated_work = nil
expect { updated_work = upload_service.update_precurated_file_list(added_files, deleted_files) }.to change { BackgroundUploadSnapshot.count }.by 0
expect(updated_work.pre_curation_uploads_fast.map(&:filename)).to eq([s3_file2.key])
expect(updated_work.pre_curation_uploads.map(&:filename)).to eq([s3_file2.key])
expect(fake_s3_service).to have_received(:delete_s3_object).with(s3_file1.key).once

# it logs the delete (and no additions)
Expand All @@ -122,7 +122,7 @@
allow(fake_s3_service).to receive(:client_s3_files).and_return([s3_file1], [s3_file2, s3_file3])
upload_service = described_class.new(work, user)
updated_work = upload_service.update_precurated_file_list(added_files, deleted_files)
list = updated_work.reload.pre_curation_uploads_fast
list = updated_work.reload.pre_curation_uploads
perform_enqueued_jobs

expect(list.map(&:filename)).to eq([s3_file3.key, s3_file2.key])
Expand Down
8 changes: 4 additions & 4 deletions spec/system/work_upload_s3_objects_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@
expect(page).to have_content filename1
expect(page).to have_content filename2
expect(page).to have_content "Total Size\n31.5 KB"
expect(work.reload.pre_curation_uploads_fast.length).to eq(3)
expect(work.reload.pre_curation_uploads.length).to eq(3)
end

it "renders S3 Bucket Objects and file uploads on the edit page", js: true do
visit work_path(work)
expect(work.reload.pre_curation_uploads_fast.length).to eq(3)
expect(work.reload.pre_curation_uploads.length).to eq(3)
visit edit_work_path(work) # can not click Edit link becuase wizard does not show files

expect(page).to have_content upload_file_name
Expand All @@ -80,12 +80,12 @@
expect(page).not_to have_content upload_file_name
expect(page).to have_content filename1
expect(page).to have_content filename2
expect(work.reload.pre_curation_uploads_fast.length).to eq(2)
expect(work.reload.pre_curation_uploads.length).to eq(2)
end

it "renders only the S3 Bucket Objects on the edit page", js: true do
visit work_path(work)
expect(work.pre_curation_uploads_fast.length).to eq(2)
expect(work.pre_curation_uploads.length).to eq(2)
visit edit_work_path(work) # can not click Edit link becuase wizard does not show files

expect(page).not_to have_content upload_file_name
Expand Down

0 comments on commit d0bb22e

Please sign in to comment.