Adds GitHub Actions for CI (#2332)

* Creates GitHub Actions YML for Rspec Testing * Fixes Rubocop errors * Updates name of CI task
WGBH-MLA · Oct 6, 2021 · 7007baa · 7007baa
1 parent 70c5661
commit 7007baa
Show file tree

Hide file tree

Showing 12 changed files with 75 additions and 94 deletions.
diff --git a/.github/workflows/aapb-ci.yml b/.github/workflows/aapb-ci.yml
@@ -0,0 +1,30 @@
+name: CI RSpec Tests
+
+on: [push, pull_request]
+
+jobs:
+  tests:
+    name: CI
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Install libcurl4-openssl-dev for Curb Gem
+        run: sudo apt-get install libcurl4-openssl-dev
+
+      - name: Setup Ruby and install RubyGems
+        uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: 2.4.4
+          bundler-cache: true
+
+      - name: Start Rails
+        run: nohup bundle exec rails server &
+
+      - name: Run Rspec specs using CI config
+        run: bundle exec rake ci_specs
+
+      - name: Run Rubocop code analyzer and Formatter
+        run: bundle exec rubocop -D
diff --git a/.travis.yml b/.travis.yml
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ Install it as instructed.
 
 At this point you can
 
-- Run tests (skipping Ci tests): `rspec --tag ~not_on_travis`
+- Run tests (skipping Ci tests): `rspec --tag ~not_on_ci`
 (If it's not 100% passing, let us know!)
 - Ingest the fixtures: `ruby scripts/download_clean_ingest.rb --stdout-log --files spec/fixtures/pbcore/clean-*.xml`
 - Start rails: `rails s`

diff --git a/app/controllers/catalog_controller.rb b/app/controllers/catalog_controller.rb
@@ -245,7 +245,7 @@ def index
           transcript_file = TranscriptFile.new(solr_doc.transcript_src)
           if transcript_file.file_type == TranscriptFile::JSON_FILE
 
-            ts = TimecodeSnippet.new(this_id, @terms_array,  transcript_file.plaintext, JSON.parse(transcript_file.content)["parts"])
+            ts = TimecodeSnippet.new(this_id, @terms_array, transcript_file.plaintext, JSON.parse(transcript_file.content)["parts"])
 
             @snippets[this_id][:transcript] = ts.snippet
             @snippets[this_id][:transcript_timecode_url] = ts.url_at_timecode
@@ -257,10 +257,9 @@ def index
 
         end
 
-        if !caption_file.captions_src.nil?
+        unless caption_file.captions_src.nil?
           s = Snippet.new(this_id, @terms_array, caption_file.text)
           @snippets[this_id][:caption] = s.snippet
-
         end
       end
     end

diff --git a/app/helpers/application_helper.rb b/app/helpers/application_helper.rb
@@ -15,29 +15,24 @@ def query_to_terms_array(query)
       sw
     end
 
-
     terms_array = if query.include?(%("))
+                    # pull out double quoted terms!
+                    quoteds = query.scan(/"([^"]*)"/)
 
-      # pull out double quoted terms!
-      quoteds = query.scan(/"([^"]*)"/)
-      # now remove them from the remaining query
-
-      quoteds.each {|q| query.remove!(q.first) }
-
-
-      query = query.gsub(/[[:punct:]]/, '').upcase
+                    # now remove them from the remaining query
+                    quoteds.each { |q| query.remove!(q.first) }
 
-      # put it all together (removing any term thats just a stopword)
-      # and remove punctuation now that we've used our ""
-      quoteds.flatten.map(&:upcase) + (query.split(" ").delete_if { |term| stopwords.any? { |stopword| stopword == term } })
+                    query = query.gsub(/[[:punct:]]/, '').upcase
 
-
-    else
-      query.split(" ").delete_if { |term| stopwords.any? { |stopword| stopword == term } }
-    end
+                    # put it all together (removing any term thats just a stopword)
+                    # and remove punctuation now that we've used our ""
+                    quoteds.flatten.map(&:upcase) + (query.split(" ").delete_if { |term| stopwords.any? { |stopword| stopword == term } })
+                  else
+                    query.split(" ").delete_if { |term| stopwords.any? { |stopword| stopword == term } }
+                  end
 
     # remove extra spaces and turn each term into word array
-    terms_array.map {|term| term.upcase.strip.gsub(/[^\w\s]/, "").split(" ") }
+    terms_array.map { |term| term.upcase.strip.gsub(/[^\w\s]/, "").split(" ") }
   end
 
   def get_last_day(month)

diff --git a/app/helpers/snippet_helper.rb b/app/helpers/snippet_helper.rb
@@ -10,13 +10,13 @@ def initialize(guid, terms_array, plaintext)
 
     def left_chunk_indicies(match_index)
       return 0..0 if match_index == 0
-      chunk_start = match_index-100 < 0 ? 0 : match_index-100
-      chunk_end = match_index-1
+      chunk_start = match_index - 100 < 0 ? 0 : match_index - 100
+      chunk_end = match_index - 1
       chunk_start..chunk_end
     end
 
     def right_chunk_indicies(match_index)
-      match_index..match_index+100
+      match_index..match_index + 100
     end
 
     def snippet
@@ -27,19 +27,17 @@ def snippet
         # stupid to rejoin word_array here but makes more sense than storing it twice
         this_term = word_array.join(" ")
 
-        start_index = @plaintext.index( /\s{1}#{this_term}\s{1}|\s{1}#{this_term}\z|\A#{this_term}\s{1}/ )
-        if start_index
+        start_index = @plaintext.index(/\s{1}#{this_term}\s{1}|\s{1}#{this_term}\z|\A#{this_term}\s{1}/)
+        next unless start_index
 
-          # grab the chunk around our match and clean up the crap 
-          txt = ( @plaintext[left_chunk_indicies(start_index)] + @plaintext[right_chunk_indicies(start_index)] ).gsub(/\A\w+\s{1}/, '').gsub(/\s{1}\w+\z/, '')
+        # grab the chunk around our match and clean up the crap
+        txt = (@plaintext[left_chunk_indicies(start_index)] + @plaintext[right_chunk_indicies(start_index)]).gsub(/\A\w+\s{1}/, '').gsub(/\s{1}\w+\z/, '')
 
-          # and highlifght 
-          break
-        end
-      end
-      if txt
-        highlight_snippet( txt, this_term )
+        # and highlifght
+        break
       end
+
+      highlight_snippet(txt, this_term) if txt
     end
 
     # shared methods
@@ -49,27 +47,21 @@ def highlight_snippet(snippet, match_text)
   end
 
   class TimecodeSnippet < Snippet
+    attr_reader :matched_term, :match_timecode
+
     def initialize(guid, terms_array, plaintext, json_parts)
       terms_array.each do |word_array|
         @match_timecode = find_match_timecode(json_parts, word_array)
 
         # used for the url
         @matched_term = word_array.join(" ")
 
-        # make this here so we dont have too       
+        # make this here so we dont have too
         break if @match_timecode
       end
       super(guid, terms_array, plaintext)
     end
 
-    def matched_term
-      @matched_term
-    end
-
-    def match_timecode
-      @match_timecode
-    end
-
     def url_at_timecode
       "/catalog/#{@guid}?term=#{@matched_term}&#at_#{@match_timecode}_s"
     end
@@ -84,27 +76,22 @@ def find_match_timecode(json_parts, words_to_match)
         # we found every word in our query chunk, goodbye!
 
         part_hash["text"].split(" ").each do |word|
-
           return match_timecode if query_terms_matched == words_to_match.length
 
           # get first occurrence of each word and pair it with the most accurate time stamp we have
 
-          if word.upcase == words_to_match[ query_terms_matched ]
+          if word.upcase == words_to_match[query_terms_matched]
             # record the tc because we started a match and we werent already a'matchin
             match_timecode = part_hash["start_time"] if query_terms_matched == 0
 
             query_terms_matched += 1
           else
             query_terms_matched = 0
           end
-
         end
-
       end
 
       match_timecode
     end
-
   end
 end
-
diff --git a/lib/tasks/ci.rake → lib/tasks/ci_specs.rake b/lib/tasks/ci.rake → lib/tasks/ci_specs.rake
@@ -4,7 +4,7 @@ if Rails && !Rails.env.production?
   require 'rspec/core/rake_task'
 
   desc 'Run tests as if on CI server'
-  task :ci do
+  task :ci_specs do
     require 'jettywrapper'
 
     # Set the version of hydra-jetty we want, and download a clean copy of it.
@@ -26,7 +26,7 @@ if Rails && !Rails.env.production?
     # running the code in the block passed to it.
     error = Jettywrapper.wrap(jetty_params) do
       task = RSpec::Core::RakeTask.new(:spec)
-      task.rspec_opts = '--tag ~not_on_travis'
+      task.rspec_opts = '--tag ~not_on_ci'
       task.run_task(true)
     end
     raise "test failures: #{error}" if error

diff --git a/spec/aws/s3_spec.rb b/spec/aws/s3_spec.rb
@@ -5,7 +5,7 @@
 
 describe 'S3' do
   # For AAPB, all the S3 content is open, since the video delivery is managed by Ci.
-  describe 'policy implementation', not_on_travis: true do
+  describe 'policy implementation', not_on_ci: true do
     def to_pretty_json(string_io)
       JSON.pretty_generate(JSON.parse(string_io.string))
     end

diff --git a/spec/features/media_spec.rb b/spec/features/media_spec.rb
@@ -5,7 +5,7 @@
 
 # TODO: was this test broken when we switche Ci credentials?
 
-describe 'Media URLs', not_on_travis: true do
+describe 'Media URLs', not_on_ci: true do
   before(:all) do
     PBCoreIngester.load_fixtures('spec/fixtures/pbcore/clean-MOCK.xml')
   end

diff --git a/spec/helpers/snippet_helper_spec.rb b/spec/helpers/snippet_helper_spec.rb
@@ -19,27 +19,23 @@
   let(:txt_example) { File.read('./spec/fixtures/transcripts/cpb-aacip-507-0000000j8w-transcript.txt') }
   let(:txt_transcript) { TranscriptFile.new(txt_url) }
 
-
   # queries are split up into no-stopword arrays in appl helper
-  let(:transcript_query_1) { [ ["ARKANSAS"] ] }
-  let(:transcript_query_2) { [ ["SENATOR", "PRYOR"] ] }
-  let(:transcript_query_3) { [ ["FILED", "FOR", "A", "DELAY"], ["NEVER", "GONNA", "GET", "MATCHED"] ] }
-  let(:transcript_query_4) { [ ["TWO", "DOZEN", "FINE", "POTENTIAL", "NOMINEES", "FOR", "THE", "POSITION", "OF", "SECRETARY", "OF", "THE", "INTERIOR"] ] }
-
+  let(:transcript_query_1) { [["ARKANSAS"]] }
+  let(:transcript_query_2) { [%w(SENATOR PRYOR)] }
+  let(:transcript_query_3) { [%w(FILED FOR A DELAY), %w(NEVER GONNA GET MATCHED)] }
+  let(:transcript_query_4) { [%w(TWO DOZEN FINE POTENTIAL NOMINEES FOR THE POSITION OF SECRETARY OF THE INTERIOR)] }
 
   let(:srt_example) { File.read('./spec/fixtures/captions/srt/srt_example.srt') }
   let(:caption_file) { CaptionFile.new("1a2b") }
 
-
   # single
-  let(:transcript_snippet_1) { TimecodeSnippet.new('cpb-aacip-111-21ghx7d6', transcript_query_1, json_transcript.plaintext, JSON.parse(json_transcript.content)["parts"] ) }
+  let(:transcript_snippet_1) { TimecodeSnippet.new('cpb-aacip-111-21ghx7d6', transcript_query_1, json_transcript.plaintext, JSON.parse(json_transcript.content)["parts"]) }
   # compound
-  let(:transcript_snippet_2) { TimecodeSnippet.new('cpb-aacip-111-21ghx7d6', transcript_query_2, json_transcript.plaintext, JSON.parse(json_transcript.content)["parts"] ) }
+  let(:transcript_snippet_2) { TimecodeSnippet.new('cpb-aacip-111-21ghx7d6', transcript_query_2, json_transcript.plaintext, JSON.parse(json_transcript.content)["parts"]) }
   # caption
-  let(:transcript_snippet_3) { Snippet.new('cpb-aacip-111-21ghx7d6', transcript_query_3, caption_file.text ) }
+  let(:transcript_snippet_3) { Snippet.new('cpb-aacip-111-21ghx7d6', transcript_query_3, caption_file.text) }
   # txt
-  let(:transcript_snippet_4) { Snippet.new('cpb-aacip-507-0000000j8w', transcript_query_4, txt_transcript.plaintext ) }
-
+  let(:transcript_snippet_4) { Snippet.new('cpb-aacip-507-0000000j8w', transcript_query_4, txt_transcript.plaintext) }
 
   before do
     # Stub requests so we don't actually have to fetch them remotely. But note
@@ -48,32 +44,28 @@
     WebMock.stub_request(:get, json_url).to_return(body: json_example)
     WebMock.stub_request(:get, txt_url).to_return(body: txt_example)
 
-
     CaptionFile.any_instance.stub(:captions_src).and_return('https://s3.amazonaws.com/americanarchive.org/captions/1a2b.srt')
     WebMock.stub_request(:get, caption_file.srt_url).to_return(body: srt_example)
   end
 
   describe '#new' do
     it 'initializes with the expected attrs' do
-
       expect(transcript_snippet_1.snippet).to eq(" FOR THIS 15TH ANNIVERSARY CELEBRATION AND DEDICATION CEREMONY IS MR GEORGE CAMPBELL CHAIRMAN OF THE <mark>ARKANSAS</mark> EDUCATIONAL TELEVISION COMMISSION GOOD AFTERNOON DISTINGUISHED GUESTS LADIES AND GENTLEMEN ")
       expect(transcript_snippet_1.match_timecode).to eq("50.24")
     end
 
     it 'initializes with expected attrs for compound query' do
-
       expect(transcript_snippet_2.snippet).to eq(" NOW I MAKE NO APOLOGIES FOR STORIES I MAY OR MAY NOT TALE CAN CERTAINLY RAISE BUT I JUST CANT THINK <mark>SENATOR PRYOR</mark> ALL TALKING ABOUT LEE REEVES FROM A PARTICULARLY GOVERNMENTAL AND STATE GOVERNMENT")
       expect(transcript_snippet_2.match_timecode).to eq("2061.79")
     end
 
-
     it 'initializes with expected attrs for caption file' do
       expect(transcript_snippet_3.snippet).to eq(" THE SUMMER OF 1958 THAT ALLOWED THE LOST YEAR TO HAPPEN THE FIRST ONE WAS THAT THE SCHOOL BOARD HAD <mark>FILED FOR A DELAY</mark>  A NUMBER OF BUSINESS LEADERS PERSUADED A MAJORITY OF THE MEMBERS OF THE SCHOOL")
     end
 
     it 'initializes with expected attrs for txt file' do
       expect(transcript_snippet_4.snippet).to eq("AT THE OLD EXECUTIVE OFFICE BUILDING PRES RONALD REAGAN AFTER EXAMINING THE RECORDS OF MORE THAN <mark>TWO DOZEN FINE POTENTIAL NOMINEES FOR THE POSITION OF SECRETARY OF THE INTERIOR</mark> I HAVE DECIDED TO")
-    end    
+    end
   end
 
   describe '#url_at_timecode' do
@@ -88,7 +80,7 @@
     # end
 
     it 'uses stopwords.txt to remove words not used in actual search' do
-      expect(query_to_terms_array(%(extremist is cheddar "president of the Eisenhower"))).to eq([["PRESIDENT", "OF", "THE", "EISENHOWER"], ["EXTREMIST"], ["CHEDDAR"]])
+      expect(query_to_terms_array(%(extremist is cheddar "president of the Eisenhower"))).to eq([%w(PRESIDENT OF THE EISENHOWER), ["EXTREMIST"], ["CHEDDAR"]])
     end
   end
   after(:all) do

diff --git a/spec/models/caption_file_spec.rb b/spec/models/caption_file_spec.rb
@@ -86,7 +86,6 @@
     end
   end
 
-
   after(:all) do
     # Re-disable WebMock so other tests can use actual connections.
     WebMock.disable!

diff --git a/spec/scripts/downloader_spec.rb b/spec/scripts/downloader_spec.rb
@@ -2,7 +2,7 @@
 require_relative '../../scripts/lib/downloader'
 
 describe Downloader do
-  it 'can download the past 7 days', not_on_travis: true do
+  it 'can download the past 7 days', not_on_ci: true do
     # I really don't think it's a good idea to make the tests dependent
     # on the activity of the catalogers, though this is a good test otherwise.
     Dir.mktmpdir do |tmpdir|
-Original file line number
+Diff line change
@@ Expand Up / @@ -86,7 +86,6 @@ @@
         end
       end
       after(:all) do
         # Re-disable WebMock so other tests can use actual connections.
         WebMock.disable!
@@ Expand Down @@