From ca8768899cc584120d1f12e1ced0fd6929e2b203 Mon Sep 17 00:00:00 2001 From: Bilka Date: Sat, 7 Dec 2024 21:52:07 +0100 Subject: [PATCH 1/3] AO3-6529 Prevent work imports from AO3 --- app/controllers/works_controller.rb | 13 +++++++++++++ config/config.yml | 3 ++- config/locales/controllers/en.yml | 3 +++ spec/controllers/works/importing_spec.rb | 15 +++++++++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/app/controllers/works_controller.rb b/app/controllers/works_controller.rb index d87f5fb0596..e3ebc10059e 100755 --- a/app/controllers/works_controller.rb +++ b/app/controllers/works_controller.rb @@ -476,6 +476,19 @@ def import render(:new_import) && return end + @urls.each do |url| + uri = UrlFormatter.new(url).standardized + next unless ArchiveConfig.PERMITTED_HOSTS.include?(uri.host) + + work_id = uri.path[%r{/works/(\d+)}, 1] + flash.now[:error] = if work_id + t(".on_archive", bookmark_it_link: view_context.link_to(t(".bookmark_it"), new_work_bookmark_path(work_id))) + else + t(".on_archive", bookmark_it_link: t(".bookmark_it")) + end + render(:new_import) and return + end + @language_id = params[:language_id] if @language_id.empty? flash.now[:error] = ts("Language cannot be blank.") diff --git a/config/config.yml b/config/config.yml index 149a6859280..9e1b2808904 100644 --- a/config/config.yml +++ b/config/config.yml @@ -675,7 +675,8 @@ HIT_COUNT_ROLLOVER_HOUR: 3 # The batch size for calculating a work's filters from its tags: FILTER_UPDATE_BATCH_SIZE: 100 -# URLs for which we should not display the proxy notice. Alphabetical by +# URLs for which we should not display the proxy notice. URLs from these hosts +# are allowed in Abuse reports and disallowed in Work imports. Alphabetical by # environment. PERMITTED_HOSTS: [ # Production diff --git a/config/locales/controllers/en.yml b/config/locales/controllers/en.yml index ea56888e5be..d5f777fb11c 100644 --- a/config/locales/controllers/en.yml +++ b/config/locales/controllers/en.yml @@ -138,6 +138,9 @@ en: works: drafts: page_title: "%{username} - Drafts" + import: + bookmark_it: bookmark it directly + on_archive: 'We couldn''t successfully import that work, sorry: URL is for a work on the Archive. Please %{bookmark_it_link} instead.' wrangling_guidelines: create: Wrangling Guideline was successfully created. delete: Wrangling Guideline was successfully deleted. diff --git a/spec/controllers/works/importing_spec.rb b/spec/controllers/works/importing_spec.rb index e6bb82db344..1a6d91eeb32 100644 --- a/spec/controllers/works/importing_spec.rb +++ b/spec/controllers/works/importing_spec.rb @@ -18,6 +18,21 @@ expect(flash[:error]).to eq "Did you want to enter a URL?" end + context "when url is from this archive" do + it "is a work url" do + work_id = "1234" + params = { urls: "http://archiveofourown.org/works/#{work_id}" } + get :import, params: params + expect(flash[:error]).to eq "We couldn't successfully import that work, sorry: URL is for a work on the Archive. Please bookmark it directly instead." + end + + it "is a different url" do + params = { urls: "http://archiveofourown.org/tags/search" } + get :import, params: params + expect(flash[:error]).to eq "We couldn't successfully import that work, sorry: URL is for a work on the Archive. Please bookmark it directly instead." + end + end + it "there is an external author name but importing_for_others is NOT turned on" do params = { urls: "url1, url2", From f3249c9d54fe0f554153df3a9f15713c06a94112 Mon Sep 17 00:00:00 2001 From: Bilka Date: Thu, 12 Dec 2024 09:51:45 +0100 Subject: [PATCH 2/3] AO3-6529 Shut up rubocop --- app/controllers/works_controller.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/controllers/works_controller.rb b/app/controllers/works_controller.rb index e3ebc10059e..de25a96ef0c 100755 --- a/app/controllers/works_controller.rb +++ b/app/controllers/works_controller.rb @@ -486,7 +486,7 @@ def import else t(".on_archive", bookmark_it_link: t(".bookmark_it")) end - render(:new_import) and return + render(:new_import) and return # rubocop:disable Lint/NonLocalExitFromIterator end @language_id = params[:language_id] From 011216494efd3f98e19d5b4c8cc9b156147fc980 Mon Sep 17 00:00:00 2001 From: Bilka Date: Fri, 13 Dec 2024 18:15:28 +0100 Subject: [PATCH 3/3] AO3-6529 Moved code to StoryParser --- app/controllers/works_controller.rb | 13 ------------- app/models/story_parser.rb | 2 ++ config/locales/controllers/en.yml | 3 --- config/locales/models/en.yml | 2 ++ features/importing/work_import_errors.feature | 9 +++++++++ spec/controllers/works/importing_spec.rb | 15 --------------- 6 files changed, 13 insertions(+), 31 deletions(-) diff --git a/app/controllers/works_controller.rb b/app/controllers/works_controller.rb index de25a96ef0c..d87f5fb0596 100755 --- a/app/controllers/works_controller.rb +++ b/app/controllers/works_controller.rb @@ -476,19 +476,6 @@ def import render(:new_import) && return end - @urls.each do |url| - uri = UrlFormatter.new(url).standardized - next unless ArchiveConfig.PERMITTED_HOSTS.include?(uri.host) - - work_id = uri.path[%r{/works/(\d+)}, 1] - flash.now[:error] = if work_id - t(".on_archive", bookmark_it_link: view_context.link_to(t(".bookmark_it"), new_work_bookmark_path(work_id))) - else - t(".on_archive", bookmark_it_link: t(".bookmark_it")) - end - render(:new_import) and return # rubocop:disable Lint/NonLocalExitFromIterator - end - @language_id = params[:language_id] if @language_id.empty? flash.now[:error] = ts("Language cannot be blank.") diff --git a/app/models/story_parser.rb b/app/models/story_parser.rb index 96477074800..2c2e9e69d9e 100644 --- a/app/models/story_parser.rb +++ b/app/models/story_parser.rb @@ -799,6 +799,8 @@ def download_with_timeout(location, limit = 10) # we do a little cleanup here in case the user hasn't included the 'http://' # or if they've used capital letters or an underscore in the hostname uri = UrlFormatter.new(location).standardized + raise Error, I18n.t("story_parser.on_archive") if ArchiveConfig.PERMITTED_HOSTS.include?(uri.host) + response = Net::HTTP.get_response(uri) case response when Net::HTTPSuccess diff --git a/config/locales/controllers/en.yml b/config/locales/controllers/en.yml index d5f777fb11c..ea56888e5be 100644 --- a/config/locales/controllers/en.yml +++ b/config/locales/controllers/en.yml @@ -138,9 +138,6 @@ en: works: drafts: page_title: "%{username} - Drafts" - import: - bookmark_it: bookmark it directly - on_archive: 'We couldn''t successfully import that work, sorry: URL is for a work on the Archive. Please %{bookmark_it_link} instead.' wrangling_guidelines: create: Wrangling Guideline was successfully created. delete: Wrangling Guideline was successfully deleted. diff --git a/config/locales/models/en.yml b/config/locales/models/en.yml index 1c9c25b5f26..cffa9d02aa8 100644 --- a/config/locales/models/en.yml +++ b/config/locales/models/en.yml @@ -257,5 +257,7 @@ en: closed_ticket: must not be closed. invalid_department: must be in your department. required: must exist and not be spam. + story_parser: + on_archive: URL is for a work on the Archive. Please bookmark it directly instead. subscriptions: deleted: Deleted item diff --git a/features/importing/work_import_errors.feature b/features/importing/work_import_errors.feature index c8e67120575..cc084d33b56 100644 --- a/features/importing/work_import_errors.feature +++ b/features/importing/work_import_errors.feature @@ -15,3 +15,12 @@ Feature: Import Works Then I should see "We couldn't successfully import that work, sorry: We couldn't download anything from http://no-content. Please make sure that the URL is correct and complete, and try again." When I go to my works page Then I should see "Drafts (0)" + + Scenario: Cannot import works from the current archive + Given I set up importing + And I fill in "urls" with "https://archiveofourown.org/works/54711364" + And I select "English" from "Choose a language" + And I press "Import" + Then I should see "We couldn't successfully import that work, sorry: URL is for a work on the Archive. Please bookmark it directly instead." + When I go to my works page + Then I should see "Drafts (0)" diff --git a/spec/controllers/works/importing_spec.rb b/spec/controllers/works/importing_spec.rb index 1a6d91eeb32..e6bb82db344 100644 --- a/spec/controllers/works/importing_spec.rb +++ b/spec/controllers/works/importing_spec.rb @@ -18,21 +18,6 @@ expect(flash[:error]).to eq "Did you want to enter a URL?" end - context "when url is from this archive" do - it "is a work url" do - work_id = "1234" - params = { urls: "http://archiveofourown.org/works/#{work_id}" } - get :import, params: params - expect(flash[:error]).to eq "We couldn't successfully import that work, sorry: URL is for a work on the Archive. Please bookmark it directly instead." - end - - it "is a different url" do - params = { urls: "http://archiveofourown.org/tags/search" } - get :import, params: params - expect(flash[:error]).to eq "We couldn't successfully import that work, sorry: URL is for a work on the Archive. Please bookmark it directly instead." - end - end - it "there is an external author name but importing_for_others is NOT turned on" do params = { urls: "url1, url2",