From d0bb22ef0b918894142fac77e5f046b74bf234f0 Mon Sep 17 00:00:00 2001
From: carolyncole <1599081+carolyncole@users.noreply.github.com>
Date: Thu, 18 Apr 2024 13:06:46 -0400
Subject: [PATCH] Removing pre_curation_uploads attachment to active storage
 (#1775)

Also rename precuration_uploads_fast to precuration_uploads so that the _fast concept does not exists in the code base

Additionally making tests for publication more robust
---
 app/models/readme.rb                          |  4 +-
 app/models/work.rb                            | 17 ++---
 app/services/work_validator.rb                |  4 +-
 docs/how_to_delete_a_work.md                  |  4 +-
 lib/tasks/works.rake                          |  2 +-
 spec/controllers/works_controller_spec.rb     | 18 ++---
 spec/models/work_spec.rb                      | 69 +++++++++----------
 spec/services/s3_query_service_spec.rb        |  3 -
 .../services/work_upload_edit_service_spec.rb |  8 +--
 spec/system/work_upload_s3_objects_spec.rb    |  8 +--
 10 files changed, 63 insertions(+), 74 deletions(-)

diff --git a/app/models/readme.rb b/app/models/readme.rb
index d4d257243..27f30d4b6 100644
--- a/app/models/readme.rb
+++ b/app/models/readme.rb
@@ -40,12 +40,12 @@ def s3_readme_idx
       end
 
       def file_names
-        @file_names ||= work.pre_curation_uploads_fast.map(&:filename_display)
+        @file_names ||= work.pre_curation_uploads.map(&:filename_display)
       end
 
       def remove_old_readme
         return if blank?
-        work.s3_query_service.delete_s3_object(work.pre_curation_uploads_fast[s3_readme_idx].key)
+        work.s3_query_service.delete_s3_object(work.pre_curation_uploads[s3_readme_idx].key)
       end
 
       def upload_readme(readme_file_param)
diff --git a/app/models/work.rb b/app/models/work.rb
index 29cc54d37..2262afe81 100644
--- a/app/models/work.rb
+++ b/app/models/work.rb
@@ -8,7 +8,6 @@ class InvalidGroupError < ::ArgumentError; end
   has_many :work_activity, -> { order(updated_at: :desc) }, dependent: :destroy
   has_many :user_work, -> { order(updated_at: :desc) }, dependent: :destroy
   has_many :upload_snapshots, -> { order(updated_at: :desc) }, dependent: :destroy
-  has_many_attached :pre_curation_uploads, service: :amazon_pre_curation
 
   belongs_to :group, class_name: "Group"
   belongs_to :curator, class_name: "User", foreign_key: "curator_user_id", optional: true
@@ -299,14 +298,14 @@ def current_transition
   def uploads
     return post_curation_uploads if approved?
 
-    pre_curation_uploads_fast
+    pre_curation_uploads
   end
 
   # Returns the list of files for the work with some basic information about each of them.
   # This method is much faster than `uploads` because it does not return the actual S3File
   # objects to the client, instead it returns just a few selected data elements.
   def file_list
-    s3_files = approved? ? post_curation_uploads : pre_curation_uploads_fast
+    s3_files = approved? ? post_curation_uploads : pre_curation_uploads
     files_info = s3_files.map do |s3_file|
       {
         "safe_id": s3_file.safe_id,
@@ -333,7 +332,7 @@ def total_file_size
   end
 
   # Fetches the data from S3 directly bypassing ActiveStorage
-  def pre_curation_uploads_fast
+  def pre_curation_uploads
     s3_query_service.client_s3_files.sort_by(&:filename)
   end
 
@@ -359,7 +358,7 @@ def post_curation_uploads(force_post_curation: false)
   end
 
   def s3_files
-    pre_curation_uploads_fast
+    pre_curation_uploads
   end
 
   def s3_client
@@ -440,7 +439,7 @@ def past_snapshots
   # @return [UploadSnapshot]
   def reload_snapshots(user_id: nil)
     work_changes = []
-    s3_files = pre_curation_uploads_fast
+    s3_files = pre_curation_uploads
     s3_filenames = s3_files.map(&:filename)
 
     upload_snapshot = latest_snapshot
@@ -595,11 +594,7 @@ def s3_object_persisted?(s3_file)
     end
 
     def publish_precurated_files(user)
-      # An error is raised if there are no files to be moved
-      raise(StandardError, "Attempting to publish a Work without attached uploads for #{s3_object_key}") if pre_curation_uploads_fast.empty? && post_curation_uploads.empty?
-
-      # We need to explicitly access to post-curation services here.
-      # Lets explicitly create it so the state of the work does not have any impact.
+      # We need to explicitly check the to post-curation bucket here.
       s3_post_curation_query_service = S3QueryService.new(self, "postcuration")
 
       s3_dir = find_post_curation_s3_dir(bucket_name: s3_post_curation_query_service.bucket_name)
diff --git a/app/services/work_validator.rb b/app/services/work_validator.rb
index 9cb562d94..647259417 100644
--- a/app/services/work_validator.rb
+++ b/app/services/work_validator.rb
@@ -3,7 +3,7 @@ class WorkValidator
   attr_reader :work
 
   delegate :errors, :metadata, :resource, :ark, :doi, :user_entered_doi, :id, :group,
-           :pre_curation_uploads_fast, :post_curation_uploads, to: :work
+           :pre_curation_uploads, :post_curation_uploads, to: :work
 
   def initialize(work)
     @work = work
@@ -44,7 +44,7 @@ def valid_to_approve(user)
     unless user.has_role? :group_admin, group
       errors.add :base, "Unauthorized to Approve"
     end
-    if pre_curation_uploads_fast.empty? && post_curation_uploads.empty?
+    if pre_curation_uploads.empty? && post_curation_uploads.empty?
       errors.add :base, "Uploads must be present for a work to be approved"
     end
     errors.count == 0
diff --git a/docs/how_to_delete_a_work.md b/docs/how_to_delete_a_work.md
index d11384bdd..f02bf8fa9 100644
--- a/docs/how_to_delete_a_work.md
+++ b/docs/how_to_delete_a_work.md
@@ -3,7 +3,7 @@ These steps assume the work is assigned to a variable called `work`.
 1. Delete the pre curation uploads
    * ```
      service = S3QueryService.new(work, "postcuration")
-     work.pre_curation_uploads_fast.each{|upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key})}
+     work.pre_curation_uploads.each{|upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key})}
      ```
 1. Delete the post curation uploads
    * ```
@@ -16,7 +16,7 @@ These steps assume the work is assigned to a variable called `work`.
 Full script below...
 ```ruby
 service = S3QueryService.new(work, "postcuration")
-work.pre_curation_uploads_fast.each{|upload| service.client.delete_object({ bucket: service, bucket_name, key: upload.key})}
+work.pre_curation_uploads.each{|upload| service.client.delete_object({ bucket: service, bucket_name, key: upload.key})}
 work.post_curation_uploads.each{|upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key})}
 work.destroy
 ```
diff --git a/lib/tasks/works.rake b/lib/tasks/works.rake
index cd5354d7e..65cf8a7bf 100644
--- a/lib/tasks/works.rake
+++ b/lib/tasks/works.rake
@@ -52,7 +52,7 @@ namespace :works do
     works = Work.where.not(id: work_exclusion_ids)
     works.each do |work|
       service = S3QueryService.new(work, "postcuration")
-      work.pre_curation_uploads_fast.each { |upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key }) }
+      work.pre_curation_uploads.each { |upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key }) }
       work.post_curation_uploads.each { |upload| service.client.delete_object({ bucket: service.bucket_name, key: upload.key }) }
       work.destroy
     end
diff --git a/spec/controllers/works_controller_spec.rb b/spec/controllers/works_controller_spec.rb
index ec8ad16d1..195d0d4cc 100644
--- a/spec/controllers/works_controller_spec.rb
+++ b/spec/controllers/works_controller_spec.rb
@@ -180,7 +180,7 @@
 
         saved_work = Work.find(work.id)
 
-        expect(saved_work.pre_curation_uploads_fast).not_to be_empty
+        expect(saved_work.pre_curation_uploads).not_to be_empty
         expect(fake_s3_service).not_to have_received(:delete_s3_object)
         background_snapshot = BackgroundUploadSnapshot.last
         expect(background_snapshot.work).to eq(work)
@@ -245,7 +245,7 @@
 
         saved_work = Work.find(work.id)
 
-        expect(saved_work.pre_curation_uploads_fast).not_to be_empty
+        expect(saved_work.pre_curation_uploads).not_to be_empty
         expect(fake_s3_service).not_to have_received(:delete_s3_object)
         background_snapshot = BackgroundUploadSnapshot.last
         expect(background_snapshot.work).to eq(work)
@@ -312,7 +312,7 @@
       end
 
       it "handles the update page" do
-        expect(work.pre_curation_uploads_fast.count).to eq 2
+        expect(work.pre_curation_uploads.count).to eq 2
 
         params = base_params.clone
         params[:work] = { pre_curation_uploads_added: [uploaded_file2],
@@ -321,7 +321,7 @@
         post(:update, params:)
 
         saved_work = Work.find(work.id)
-        expect(saved_work.pre_curation_uploads_fast.count).to eq 2
+        expect(saved_work.pre_curation_uploads.count).to eq 2
         expect(fake_s3_service).to have_received(:delete_s3_object).once
       end
     end
@@ -370,14 +370,14 @@
         end
 
         it "handles the update page" do
-          expect(work.pre_curation_uploads_fast.length).to eq(3)
+          expect(work.pre_curation_uploads.length).to eq(3)
 
           sign_in user
           post(:update, params:)
 
           saved_work = Work.find(work.id)
 
-          expect(saved_work.pre_curation_uploads_fast.length).to eq(1)
+          expect(saved_work.pre_curation_uploads.length).to eq(1)
 
           expect(ActiveStorage::PurgeJob).not_to have_received(:new)
           expect(fake_s3_service).to have_received(:delete_s3_object).with(s3_file1.key)
@@ -546,11 +546,11 @@
 
         saved_work = Work.find(work.id)
 
-        expect(saved_work.pre_curation_uploads_fast).not_to be_empty
+        expect(saved_work.pre_curation_uploads).not_to be_empty
 
         # order is alphabetical, we can not change it by sending the files in a different order
-        expect(saved_work.pre_curation_uploads_fast.first.filename).to include(uploaded_files.last.original_filename.gsub(".csv", ""))
-        expect(saved_work.pre_curation_uploads_fast.last.filename).to include(uploaded_files.first.original_filename.gsub(".csv", ""))
+        expect(saved_work.pre_curation_uploads.first.filename).to include(uploaded_files.last.original_filename.gsub(".csv", ""))
+        expect(saved_work.pre_curation_uploads.last.filename).to include(uploaded_files.first.original_filename.gsub(".csv", ""))
 
         # original copies of the files get deleted
         expect(fake_s3_service).to have_received(:delete_s3_object).twice
diff --git a/spec/models/work_spec.rb b/spec/models/work_spec.rb
index 03a0294d8..18e705d97 100644
--- a/spec/models/work_spec.rb
+++ b/spec/models/work_spec.rb
@@ -173,10 +173,15 @@
     let(:pre_curated_data_profile) { { objects: [] } }
 
     let(:fake_s3_service_pre) { stub_s3(data: [file1, file2]) }
-    let(:fake_s3_service_post) { stub_s3(data: [file1, file2]) }
+    let(:fake_s3_service_post) { stub_s3 }
 
     before do
-      allow(S3QueryService).to receive(:new).and_return(fake_s3_service_pre, fake_s3_service_post)
+      # initialize so the next allows happen after the stubs
+      fake_s3_service_post
+      fake_s3_service_pre
+
+      allow(S3QueryService).to receive(:new).with(work, "postcuration").and_return(fake_s3_service_post)
+      allow(S3QueryService).to receive(:new).with(work, "precuration").and_return(fake_s3_service_pre)
       allow(fake_s3_service_pre.client).to receive(:head_object).with(bucket: "example-post-bucket", key: work.s3_object_key).and_raise(Aws::S3::Errors::NotFound.new("blah", "error"))
       allow(fake_s3_service_post).to receive(:bucket_name).and_return("example-post-bucket")
       allow(fake_s3_service_pre).to receive(:bucket_name).and_return("example-pre-bucket")
@@ -197,19 +202,32 @@
         work.reload
 
         expect(fake_s3_service_pre).to have_received(:publish_files).once
-        expect(work.pre_curation_uploads).to be_empty
-        expect(work.post_curation_uploads).not_to be_empty
-        expect(work.post_curation_uploads.length).to eq(2)
-        expect(work.post_curation_uploads.first).to be_an(S3File)
-        expect(work.post_curation_uploads.first.key).to eq(file1.key)
-        expect(work.post_curation_uploads.last).to be_an(S3File)
-        expect(work.post_curation_uploads.last.key).to eq(file2.key)
-
-        expect(work.as_json["files"][0].keys).to eq([:filename, :size, :display_size, :url])
-        expect(work.as_json["files"][0][:filename]).to match(/10\.34770\/123-abc\/\d+\/us_covid_2019\.csv/)
-        expect(work.as_json["files"][0][:size]).to eq(1024)
-        expect(work.as_json["files"][0][:display_size]).to eq("1 KB")
-        expect(work.as_json["files"][0][:url]).to eq "https://example.data.globus.org/10.34770/123-abc/#{work.id}/us_covid_2019.csv"
+        expect(fake_s3_service_post).to have_received(:bucket_name).once
+        expect(fake_s3_service_pre.client).to have_received(:head_object).once
+      end
+
+      context "when the post curation bucket alreay exists" do
+        before do
+          allow(fake_s3_service_pre.client).to receive(:head_object).with(bucket: "example-post-bucket", key: work.s3_object_key).and_return(true)
+        end
+
+        it "raises an error" do
+          stub_datacite_doi
+          expect { work.approve!(curator_user) }.to raise_error("Attempting to publish a Work with an existing S3 Bucket directory for: 10.34770/123-abc/#{work.id}")
+        end
+      end
+
+      context "when the pre curation bucket is empty" do
+        let(:fake_s3_service_pre) { stub_s3(data: []) }
+
+        before do
+          allow(fake_s3_service_pre.client).to receive(:head_object).with(bucket: "example-post-bucket", key: work.s3_object_key).and_return(true)
+        end
+
+        it "raises an error" do
+          stub_datacite_doi
+          expect { work.approve!(curator_user) }.to raise_error(AASM::InvalidTransition)
+        end
       end
 
       context "when the Work is under active embargo" do
@@ -457,26 +475,6 @@
     end
   end
 
-  # Is this test providing any coverage?
-  describe "#pre_curation_uploads" do
-    let(:uploaded_file) do
-      fixture_file_upload("us_covid_2019.csv", "text/csv")
-    end
-
-    let(:uploaded_file2) do
-      fixture_file_upload("us_covid_2019.csv", "text/csv")
-    end
-
-    before do
-      stub_request(:put, /#{attachment_url}/).with(
-        body: "date,state,fips,cases,deaths\n2020-01-21,Washington,53,1,0\n2022-07-10,Wyoming,56,165619,1834\n"
-      ).to_return(status: 200)
-
-      work.pre_curation_uploads.attach(uploaded_file)
-      work.save
-    end
-  end
-
   describe "#draft" do
     let(:draft_work) do
       work = Work.new(group:, resource: FactoryBot.build(:resource), created_by_user_id: user.id)
@@ -924,7 +922,6 @@
         body: "date,state,fips,cases,deaths\n2020-01-21,Washington,53,1,0\n2022-07-10,Wyoming,56,165619,1834\n"
       ).to_return(status: 200)
 
-      work.pre_curation_uploads.attach(uploaded_file)
       work.save
     end
 
diff --git a/spec/services/s3_query_service_spec.rb b/spec/services/s3_query_service_spec.rb
index 3c764822f..7f3e1a06d 100644
--- a/spec/services/s3_query_service_spec.rb
+++ b/spec/services/s3_query_service_spec.rb
@@ -425,9 +425,6 @@
       fake_aws_client.stub(:list_objects_v2).and_return(fake_s3_resp)
       fake_s3_resp.stub(:to_h).and_return(s3_hash)
 
-      blob = ActiveStorage::Blob.new(filename: s3_key1, key: s3_key1, content_type: "", byte_size: 100, checksum: "abc123")
-      work.pre_curation_uploads << ActiveStorage::Attachment.new(blob:, name: :pre_curation_uploads)
-
       data_profile = s3_query_service.data_profile
       expect(data_profile[:objects]).to be_instance_of(Array)
       expect(data_profile[:ok]).to eq true
diff --git a/spec/services/work_upload_edit_service_spec.rb b/spec/services/work_upload_edit_service_spec.rb
index c76bb1a63..6fbb6c459 100644
--- a/spec/services/work_upload_edit_service_spec.rb
+++ b/spec/services/work_upload_edit_service_spec.rb
@@ -62,7 +62,7 @@
 
       upload_service = described_class.new(work, user)
       updated_work = upload_service.update_precurated_file_list(added_files, deleted_files)
-      filenames = updated_work.pre_curation_uploads_fast.map(&:filename)
+      filenames = updated_work.pre_curation_uploads.map(&:filename)
       expect(filenames).to eq(s3_data.map(&:filename))
       expect(fake_s3_service).not_to have_received(:delete_s3_object)
       expect(work.work_activity.count).to be 0
@@ -82,7 +82,7 @@
       expect { updated_work = upload_service.update_precurated_file_list(added_files, deleted_files) }.to change { BackgroundUploadSnapshot.count }.by 1
       perform_enqueued_jobs
 
-      expect(updated_work.pre_curation_uploads_fast.map(&:filename).sort).to eq([s3_file1.key, s3_file2.key].sort)
+      expect(updated_work.pre_curation_uploads.map(&:filename).sort).to eq([s3_file1.key, s3_file2.key].sort)
       expect(fake_s3_service).not_to have_received(:delete_s3_object)
 
       # it logs the addition (and no delete)
@@ -103,7 +103,7 @@
       upload_service = described_class.new(work, user)
       updated_work = nil
       expect { updated_work = upload_service.update_precurated_file_list(added_files, deleted_files) }.to change { BackgroundUploadSnapshot.count }.by 0
-      expect(updated_work.pre_curation_uploads_fast.map(&:filename)).to eq([s3_file2.key])
+      expect(updated_work.pre_curation_uploads.map(&:filename)).to eq([s3_file2.key])
       expect(fake_s3_service).to have_received(:delete_s3_object).with(s3_file1.key).once
 
       # it logs the delete (and no additions)
@@ -122,7 +122,7 @@
       allow(fake_s3_service).to receive(:client_s3_files).and_return([s3_file1], [s3_file2, s3_file3])
       upload_service = described_class.new(work, user)
       updated_work = upload_service.update_precurated_file_list(added_files, deleted_files)
-      list = updated_work.reload.pre_curation_uploads_fast
+      list = updated_work.reload.pre_curation_uploads
       perform_enqueued_jobs
 
       expect(list.map(&:filename)).to eq([s3_file3.key, s3_file2.key])
diff --git a/spec/system/work_upload_s3_objects_spec.rb b/spec/system/work_upload_s3_objects_spec.rb
index d8005da3f..d48630958 100644
--- a/spec/system/work_upload_s3_objects_spec.rb
+++ b/spec/system/work_upload_s3_objects_spec.rb
@@ -53,12 +53,12 @@
         expect(page).to have_content filename1
         expect(page).to have_content filename2
         expect(page).to have_content "Total Size\n31.5 KB"
-        expect(work.reload.pre_curation_uploads_fast.length).to eq(3)
+        expect(work.reload.pre_curation_uploads.length).to eq(3)
       end
 
       it "renders S3 Bucket Objects and file uploads on the edit page", js: true do
         visit work_path(work)
-        expect(work.reload.pre_curation_uploads_fast.length).to eq(3)
+        expect(work.reload.pre_curation_uploads.length).to eq(3)
         visit edit_work_path(work) # can not click Edit link becuase wizard does not show files
 
         expect(page).to have_content upload_file_name
@@ -80,12 +80,12 @@
           expect(page).not_to have_content upload_file_name
           expect(page).to have_content filename1
           expect(page).to have_content filename2
-          expect(work.reload.pre_curation_uploads_fast.length).to eq(2)
+          expect(work.reload.pre_curation_uploads.length).to eq(2)
         end
 
         it "renders only the S3 Bucket Objects on the edit page", js: true do
           visit work_path(work)
-          expect(work.pre_curation_uploads_fast.length).to eq(2)
+          expect(work.pre_curation_uploads.length).to eq(2)
           visit edit_work_path(work) # can not click Edit link becuase wizard does not show files
 
           expect(page).not_to have_content upload_file_name