From d0bb22ef0b918894142fac77e5f046b74bf234f0 Mon Sep 17 00:00:00 2001 From: carolyncole <1599081+carolyncole@users.noreply.github.com> Date: Thu, 18 Apr 2024 13:06:46 -0400 Subject: [PATCH] Removing pre_curation_uploads attachment to active storage (#1775) Also rename precuration_uploads_fast to precuration_uploads so that the _fast concept does not exists in the code base Additionally making tests for publication more robust --- app/models/readme.rb | 4 +- app/models/work.rb | 17 ++--- app/services/work_validator.rb | 4 +- docs/how_to_delete_a_work.md | 4 +- lib/tasks/works.rake | 2 +- spec/controllers/works_controller_spec.rb | 18 ++--- spec/models/work_spec.rb | 69 +++++++++---------- spec/services/s3_query_service_spec.rb | 3 - .../services/work_upload_edit_service_spec.rb | 8 +-- spec/system/work_upload_s3_objects_spec.rb | 8 +-- 10 files changed, 63 insertions(+), 74 deletions(-) diff --git a/app/models/readme.rb b/app/models/readme.rb index d4d257243..27f30d4b6 100644 --- a/app/models/readme.rb +++ b/app/models/readme.rb @@ -40,12 +40,12 @@ def s3_readme_idx end def file_names - @file_names ||= work.pre_curation_uploads_fast.map(&:filename_display) + @file_names ||= work.pre_curation_uploads.map(&:filename_display) end def remove_old_readme return if blank? - work.s3_query_service.delete_s3_object(work.pre_curation_uploads_fast[s3_readme_idx].key) + work.s3_query_service.delete_s3_object(work.pre_curation_uploads[s3_readme_idx].key) end def upload_readme(readme_file_param) diff --git a/app/models/work.rb b/app/models/work.rb index 29cc54d37..2262afe81 100644 --- a/app/models/work.rb +++ b/app/models/work.rb @@ -8,7 +8,6 @@ class InvalidGroupError < ::ArgumentError; end has_many :work_activity, -> { order(updated_at: :desc) }, dependent: :destroy has_many :user_work, -> { order(updated_at: :desc) }, dependent: :destroy has_many :upload_snapshots, -> { order(updated_at: :desc) }, dependent: :destroy - has_many_attached :pre_curation_uploads, service: :amazon_pre_curation belongs_to :group, class_name: "Group" belongs_to :curator, class_name: "User", foreign_key: "curator_user_id", optional: true @@ -299,14 +298,14 @@ def current_transition def uploads return post_curation_uploads if approved? - pre_curation_uploads_fast + pre_curation_uploads end # Returns the list of files for the work with some basic information about each of them. # This method is much faster than `uploads` because it does not return the actual S3File # objects to the client, instead it returns just a few selected data elements. def file_list - s3_files = approved? ? post_curation_uploads : pre_curation_uploads_fast + s3_files = approved? ? post_curation_uploads : pre_curation_uploads files_info = s3_files.map do |s3_file| { "safe_id": s3_file.safe_id, @@ -333,7 +332,7 @@ def total_file_size end # Fetches the data from S3 directly bypassing ActiveStorage - def pre_curation_uploads_fast + def pre_curation_uploads s3_query_service.client_s3_files.sort_by(&:filename) end @@ -359,7 +358,7 @@ def post_curation_uploads(force_post_curation: false) end def s3_files - pre_curation_uploads_fast + pre_curation_uploads end def s3_client @@ -440,7 +439,7 @@ def past_snapshots # @return [UploadSnapshot] def reload_snapshots(user_id: nil) work_changes = [] - s3_files = pre_curation_uploads_fast + s3_files = pre_curation_uploads s3_filenames = s3_files.map(&:filename) upload_snapshot = latest_snapshot @@ -595,11 +594,7 @@ def s3_object_persisted?(s3_file) end def publish_precurated_files(user) - # An error is raised if there are no files to be moved - raise(StandardError, "Attempting to publish a Work without attached uploads for #{s3_object_key}") if pre_curation_uploads_fast.empty? && post_curation_uploads.empty? - - # We need to explicitly access to post-curation services here. - # Lets explicitly create it so the state of the work does not have any impact. + # We need to explicitly check the to post-curation bucket here. s3_post_curation_query_service = S3QueryService.new(self, "postcuration") s3_dir = find_post_curation_s3_dir(bucket_name: s3_post_curation_query_service.bucket_name) diff --git a/app/services/work_validator.rb b/app/services/work_validator.rb index 9cb562d94..647259417 100644 --- a/app/services/work_validator.rb +++ b/app/services/work_validator.rb @@ -3,7 +3,7 @@ class WorkValidator attr_reader :work delegate :errors, :metadata, :resource, :ark, :doi, :user_entered_doi, :id, :group, - :pre_curation_uploads_fast, :post_curation_uploads, to: :work + :pre_curation_uploads, :post_curation_uploads, to: :work def initialize(work) @work = work @@ -44,7 +44,7 @@ def valid_to_approve(user) unless user.has_role? :group_admin, group errors.add :base, "Unauthorized to Approve" end - if pre_curation_uploads_fast.empty? && post_curation_uploads.empty? + if pre_curation_uploads.empty? && post_curation_uploads.empty? errors.add :base, "Uploads must be present for a work to be approved" end errors.count == 0 diff --git a/docs/how_to_delete_a_work.md b/docs/how_to_delete_a_work.md index d11384bdd..f02bf8fa9 100644 --- a/docs/how_to_delete_a_work.md +++ b/docs/how_to_delete_a_work.md @@ -3,7 +3,7 @@ These steps assume the work is assigned to a variable called `work`. 1. Delete the pre curation uploads * ``` service = S3QueryService.new(work, "postcuration") - work.pre_curation_uploads_fast.each{|upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key})} + work.pre_curation_uploads.each{|upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key})} ``` 1. Delete the post curation uploads * ``` @@ -16,7 +16,7 @@ These steps assume the work is assigned to a variable called `work`. Full script below... ```ruby service = S3QueryService.new(work, "postcuration") -work.pre_curation_uploads_fast.each{|upload| service.client.delete_object({ bucket: service, bucket_name, key: upload.key})} +work.pre_curation_uploads.each{|upload| service.client.delete_object({ bucket: service, bucket_name, key: upload.key})} work.post_curation_uploads.each{|upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key})} work.destroy ``` diff --git a/lib/tasks/works.rake b/lib/tasks/works.rake index cd5354d7e..65cf8a7bf 100644 --- a/lib/tasks/works.rake +++ b/lib/tasks/works.rake @@ -52,7 +52,7 @@ namespace :works do works = Work.where.not(id: work_exclusion_ids) works.each do |work| service = S3QueryService.new(work, "postcuration") - work.pre_curation_uploads_fast.each { |upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key }) } + work.pre_curation_uploads.each { |upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key }) } work.post_curation_uploads.each { |upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key }) } work.destroy end diff --git a/spec/controllers/works_controller_spec.rb b/spec/controllers/works_controller_spec.rb index ec8ad16d1..195d0d4cc 100644 --- a/spec/controllers/works_controller_spec.rb +++ b/spec/controllers/works_controller_spec.rb @@ -180,7 +180,7 @@ saved_work = Work.find(work.id) - expect(saved_work.pre_curation_uploads_fast).not_to be_empty + expect(saved_work.pre_curation_uploads).not_to be_empty expect(fake_s3_service).not_to have_received(:delete_s3_object) background_snapshot = BackgroundUploadSnapshot.last expect(background_snapshot.work).to eq(work) @@ -245,7 +245,7 @@ saved_work = Work.find(work.id) - expect(saved_work.pre_curation_uploads_fast).not_to be_empty + expect(saved_work.pre_curation_uploads).not_to be_empty expect(fake_s3_service).not_to have_received(:delete_s3_object) background_snapshot = BackgroundUploadSnapshot.last expect(background_snapshot.work).to eq(work) @@ -312,7 +312,7 @@ end it "handles the update page" do - expect(work.pre_curation_uploads_fast.count).to eq 2 + expect(work.pre_curation_uploads.count).to eq 2 params = base_params.clone params[:work] = { pre_curation_uploads_added: [uploaded_file2], @@ -321,7 +321,7 @@ post(:update, params:) saved_work = Work.find(work.id) - expect(saved_work.pre_curation_uploads_fast.count).to eq 2 + expect(saved_work.pre_curation_uploads.count).to eq 2 expect(fake_s3_service).to have_received(:delete_s3_object).once end end @@ -370,14 +370,14 @@ end it "handles the update page" do - expect(work.pre_curation_uploads_fast.length).to eq(3) + expect(work.pre_curation_uploads.length).to eq(3) sign_in user post(:update, params:) saved_work = Work.find(work.id) - expect(saved_work.pre_curation_uploads_fast.length).to eq(1) + expect(saved_work.pre_curation_uploads.length).to eq(1) expect(ActiveStorage::PurgeJob).not_to have_received(:new) expect(fake_s3_service).to have_received(:delete_s3_object).with(s3_file1.key) @@ -546,11 +546,11 @@ saved_work = Work.find(work.id) - expect(saved_work.pre_curation_uploads_fast).not_to be_empty + expect(saved_work.pre_curation_uploads).not_to be_empty # order is alphabetical, we can not change it by sending the files in a different order - expect(saved_work.pre_curation_uploads_fast.first.filename).to include(uploaded_files.last.original_filename.gsub(".csv", "")) - expect(saved_work.pre_curation_uploads_fast.last.filename).to include(uploaded_files.first.original_filename.gsub(".csv", "")) + expect(saved_work.pre_curation_uploads.first.filename).to include(uploaded_files.last.original_filename.gsub(".csv", "")) + expect(saved_work.pre_curation_uploads.last.filename).to include(uploaded_files.first.original_filename.gsub(".csv", "")) # original copies of the files get deleted expect(fake_s3_service).to have_received(:delete_s3_object).twice diff --git a/spec/models/work_spec.rb b/spec/models/work_spec.rb index 03a0294d8..18e705d97 100644 --- a/spec/models/work_spec.rb +++ b/spec/models/work_spec.rb @@ -173,10 +173,15 @@ let(:pre_curated_data_profile) { { objects: [] } } let(:fake_s3_service_pre) { stub_s3(data: [file1, file2]) } - let(:fake_s3_service_post) { stub_s3(data: [file1, file2]) } + let(:fake_s3_service_post) { stub_s3 } before do - allow(S3QueryService).to receive(:new).and_return(fake_s3_service_pre, fake_s3_service_post) + # initialize so the next allows happen after the stubs + fake_s3_service_post + fake_s3_service_pre + + allow(S3QueryService).to receive(:new).with(work, "postcuration").and_return(fake_s3_service_post) + allow(S3QueryService).to receive(:new).with(work, "precuration").and_return(fake_s3_service_pre) allow(fake_s3_service_pre.client).to receive(:head_object).with(bucket: "example-post-bucket", key: work.s3_object_key).and_raise(Aws::S3::Errors::NotFound.new("blah", "error")) allow(fake_s3_service_post).to receive(:bucket_name).and_return("example-post-bucket") allow(fake_s3_service_pre).to receive(:bucket_name).and_return("example-pre-bucket") @@ -197,19 +202,32 @@ work.reload expect(fake_s3_service_pre).to have_received(:publish_files).once - expect(work.pre_curation_uploads).to be_empty - expect(work.post_curation_uploads).not_to be_empty - expect(work.post_curation_uploads.length).to eq(2) - expect(work.post_curation_uploads.first).to be_an(S3File) - expect(work.post_curation_uploads.first.key).to eq(file1.key) - expect(work.post_curation_uploads.last).to be_an(S3File) - expect(work.post_curation_uploads.last.key).to eq(file2.key) - - expect(work.as_json["files"][0].keys).to eq([:filename, :size, :display_size, :url]) - expect(work.as_json["files"][0][:filename]).to match(/10\.34770\/123-abc\/\d+\/us_covid_2019\.csv/) - expect(work.as_json["files"][0][:size]).to eq(1024) - expect(work.as_json["files"][0][:display_size]).to eq("1 KB") - expect(work.as_json["files"][0][:url]).to eq "https://example.data.globus.org/10.34770/123-abc/#{work.id}/us_covid_2019.csv" + expect(fake_s3_service_post).to have_received(:bucket_name).once + expect(fake_s3_service_pre.client).to have_received(:head_object).once + end + + context "when the post curation bucket alreay exists" do + before do + allow(fake_s3_service_pre.client).to receive(:head_object).with(bucket: "example-post-bucket", key: work.s3_object_key).and_return(true) + end + + it "raises an error" do + stub_datacite_doi + expect { work.approve!(curator_user) }.to raise_error("Attempting to publish a Work with an existing S3 Bucket directory for: 10.34770/123-abc/#{work.id}") + end + end + + context "when the pre curation bucket is empty" do + let(:fake_s3_service_pre) { stub_s3(data: []) } + + before do + allow(fake_s3_service_pre.client).to receive(:head_object).with(bucket: "example-post-bucket", key: work.s3_object_key).and_return(true) + end + + it "raises an error" do + stub_datacite_doi + expect { work.approve!(curator_user) }.to raise_error(AASM::InvalidTransition) + end end context "when the Work is under active embargo" do @@ -457,26 +475,6 @@ end end - # Is this test providing any coverage? - describe "#pre_curation_uploads" do - let(:uploaded_file) do - fixture_file_upload("us_covid_2019.csv", "text/csv") - end - - let(:uploaded_file2) do - fixture_file_upload("us_covid_2019.csv", "text/csv") - end - - before do - stub_request(:put, /#{attachment_url}/).with( - body: "date,state,fips,cases,deaths\n2020-01-21,Washington,53,1,0\n2022-07-10,Wyoming,56,165619,1834\n" - ).to_return(status: 200) - - work.pre_curation_uploads.attach(uploaded_file) - work.save - end - end - describe "#draft" do let(:draft_work) do work = Work.new(group:, resource: FactoryBot.build(:resource), created_by_user_id: user.id) @@ -924,7 +922,6 @@ body: "date,state,fips,cases,deaths\n2020-01-21,Washington,53,1,0\n2022-07-10,Wyoming,56,165619,1834\n" ).to_return(status: 200) - work.pre_curation_uploads.attach(uploaded_file) work.save end diff --git a/spec/services/s3_query_service_spec.rb b/spec/services/s3_query_service_spec.rb index 3c764822f..7f3e1a06d 100644 --- a/spec/services/s3_query_service_spec.rb +++ b/spec/services/s3_query_service_spec.rb @@ -425,9 +425,6 @@ fake_aws_client.stub(:list_objects_v2).and_return(fake_s3_resp) fake_s3_resp.stub(:to_h).and_return(s3_hash) - blob = ActiveStorage::Blob.new(filename: s3_key1, key: s3_key1, content_type: "", byte_size: 100, checksum: "abc123") - work.pre_curation_uploads << ActiveStorage::Attachment.new(blob:, name: :pre_curation_uploads) - data_profile = s3_query_service.data_profile expect(data_profile[:objects]).to be_instance_of(Array) expect(data_profile[:ok]).to eq true diff --git a/spec/services/work_upload_edit_service_spec.rb b/spec/services/work_upload_edit_service_spec.rb index c76bb1a63..6fbb6c459 100644 --- a/spec/services/work_upload_edit_service_spec.rb +++ b/spec/services/work_upload_edit_service_spec.rb @@ -62,7 +62,7 @@ upload_service = described_class.new(work, user) updated_work = upload_service.update_precurated_file_list(added_files, deleted_files) - filenames = updated_work.pre_curation_uploads_fast.map(&:filename) + filenames = updated_work.pre_curation_uploads.map(&:filename) expect(filenames).to eq(s3_data.map(&:filename)) expect(fake_s3_service).not_to have_received(:delete_s3_object) expect(work.work_activity.count).to be 0 @@ -82,7 +82,7 @@ expect { updated_work = upload_service.update_precurated_file_list(added_files, deleted_files) }.to change { BackgroundUploadSnapshot.count }.by 1 perform_enqueued_jobs - expect(updated_work.pre_curation_uploads_fast.map(&:filename).sort).to eq([s3_file1.key, s3_file2.key].sort) + expect(updated_work.pre_curation_uploads.map(&:filename).sort).to eq([s3_file1.key, s3_file2.key].sort) expect(fake_s3_service).not_to have_received(:delete_s3_object) # it logs the addition (and no delete) @@ -103,7 +103,7 @@ upload_service = described_class.new(work, user) updated_work = nil expect { updated_work = upload_service.update_precurated_file_list(added_files, deleted_files) }.to change { BackgroundUploadSnapshot.count }.by 0 - expect(updated_work.pre_curation_uploads_fast.map(&:filename)).to eq([s3_file2.key]) + expect(updated_work.pre_curation_uploads.map(&:filename)).to eq([s3_file2.key]) expect(fake_s3_service).to have_received(:delete_s3_object).with(s3_file1.key).once # it logs the delete (and no additions) @@ -122,7 +122,7 @@ allow(fake_s3_service).to receive(:client_s3_files).and_return([s3_file1], [s3_file2, s3_file3]) upload_service = described_class.new(work, user) updated_work = upload_service.update_precurated_file_list(added_files, deleted_files) - list = updated_work.reload.pre_curation_uploads_fast + list = updated_work.reload.pre_curation_uploads perform_enqueued_jobs expect(list.map(&:filename)).to eq([s3_file3.key, s3_file2.key]) diff --git a/spec/system/work_upload_s3_objects_spec.rb b/spec/system/work_upload_s3_objects_spec.rb index d8005da3f..d48630958 100644 --- a/spec/system/work_upload_s3_objects_spec.rb +++ b/spec/system/work_upload_s3_objects_spec.rb @@ -53,12 +53,12 @@ expect(page).to have_content filename1 expect(page).to have_content filename2 expect(page).to have_content "Total Size\n31.5 KB" - expect(work.reload.pre_curation_uploads_fast.length).to eq(3) + expect(work.reload.pre_curation_uploads.length).to eq(3) end it "renders S3 Bucket Objects and file uploads on the edit page", js: true do visit work_path(work) - expect(work.reload.pre_curation_uploads_fast.length).to eq(3) + expect(work.reload.pre_curation_uploads.length).to eq(3) visit edit_work_path(work) # can not click Edit link becuase wizard does not show files expect(page).to have_content upload_file_name @@ -80,12 +80,12 @@ expect(page).not_to have_content upload_file_name expect(page).to have_content filename1 expect(page).to have_content filename2 - expect(work.reload.pre_curation_uploads_fast.length).to eq(2) + expect(work.reload.pre_curation_uploads.length).to eq(2) end it "renders only the S3 Bucket Objects on the edit page", js: true do visit work_path(work) - expect(work.pre_curation_uploads_fast.length).to eq(2) + expect(work.pre_curation_uploads.length).to eq(2) visit edit_work_path(work) # can not click Edit link becuase wizard does not show files expect(page).not_to have_content upload_file_name