Skip to content

Commit

Permalink
Adds GitHub Actions for CI (#2332)
Browse files Browse the repository at this point in the history
* Creates GitHub Actions YML for Rspec Testing

* Fixes Rubocop errors

* Updates name of CI task
  • Loading branch information
jasoncorum authored Oct 6, 2021
1 parent 70c5661 commit 7007baa
Show file tree
Hide file tree
Showing 12 changed files with 75 additions and 94 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/aapb-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: CI RSpec Tests

on: [push, pull_request]

jobs:
tests:
name: CI
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Install libcurl4-openssl-dev for Curb Gem
run: sudo apt-get install libcurl4-openssl-dev

- name: Setup Ruby and install RubyGems
uses: ruby/setup-ruby@v1
with:
ruby-version: 2.4.4
bundler-cache: true

- name: Start Rails
run: nohup bundle exec rails server &

- name: Run Rspec specs using CI config
run: bundle exec rake ci_specs

- name: Run Rubocop code analyzer and Formatter
run: bundle exec rubocop -D
21 changes: 0 additions & 21 deletions .travis.yml

This file was deleted.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Install it as instructed.

At this point you can

- Run tests (skipping Ci tests): `rspec --tag ~not_on_travis`
- Run tests (skipping Ci tests): `rspec --tag ~not_on_ci`
(If it's not 100% passing, let us know!)
- Ingest the fixtures: `ruby scripts/download_clean_ingest.rb --stdout-log --files spec/fixtures/pbcore/clean-*.xml`
- Start rails: `rails s`
Expand Down
5 changes: 2 additions & 3 deletions app/controllers/catalog_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def index
transcript_file = TranscriptFile.new(solr_doc.transcript_src)
if transcript_file.file_type == TranscriptFile::JSON_FILE

ts = TimecodeSnippet.new(this_id, @terms_array, transcript_file.plaintext, JSON.parse(transcript_file.content)["parts"])
ts = TimecodeSnippet.new(this_id, @terms_array, transcript_file.plaintext, JSON.parse(transcript_file.content)["parts"])

@snippets[this_id][:transcript] = ts.snippet
@snippets[this_id][:transcript_timecode_url] = ts.url_at_timecode
Expand All @@ -257,10 +257,9 @@ def index

end

if !caption_file.captions_src.nil?
unless caption_file.captions_src.nil?
s = Snippet.new(this_id, @terms_array, caption_file.text)
@snippets[this_id][:caption] = s.snippet

end
end
end
Expand Down
29 changes: 12 additions & 17 deletions app/helpers/application_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,24 @@ def query_to_terms_array(query)
sw
end


terms_array = if query.include?(%("))
# pull out double quoted terms!
quoteds = query.scan(/"([^"]*)"/)

# pull out double quoted terms!
quoteds = query.scan(/"([^"]*)"/)
# now remove them from the remaining query

quoteds.each {|q| query.remove!(q.first) }


query = query.gsub(/[[:punct:]]/, '').upcase
# now remove them from the remaining query
quoteds.each { |q| query.remove!(q.first) }

# put it all together (removing any term thats just a stopword)
# and remove punctuation now that we've used our ""
quoteds.flatten.map(&:upcase) + (query.split(" ").delete_if { |term| stopwords.any? { |stopword| stopword == term } })
query = query.gsub(/[[:punct:]]/, '').upcase


else
query.split(" ").delete_if { |term| stopwords.any? { |stopword| stopword == term } }
end
# put it all together (removing any term thats just a stopword)
# and remove punctuation now that we've used our ""
quoteds.flatten.map(&:upcase) + (query.split(" ").delete_if { |term| stopwords.any? { |stopword| stopword == term } })
else
query.split(" ").delete_if { |term| stopwords.any? { |stopword| stopword == term } }
end

# remove extra spaces and turn each term into word array
terms_array.map {|term| term.upcase.strip.gsub(/[^\w\s]/, "").split(" ") }
terms_array.map { |term| term.upcase.strip.gsub(/[^\w\s]/, "").split(" ") }
end

def get_last_day(month)
Expand Down
43 changes: 15 additions & 28 deletions app/helpers/snippet_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ def initialize(guid, terms_array, plaintext)

def left_chunk_indicies(match_index)
return 0..0 if match_index == 0
chunk_start = match_index-100 < 0 ? 0 : match_index-100
chunk_end = match_index-1
chunk_start = match_index - 100 < 0 ? 0 : match_index - 100
chunk_end = match_index - 1
chunk_start..chunk_end
end

def right_chunk_indicies(match_index)
match_index..match_index+100
match_index..match_index + 100
end

def snippet
Expand All @@ -27,19 +27,17 @@ def snippet
# stupid to rejoin word_array here but makes more sense than storing it twice
this_term = word_array.join(" ")

start_index = @plaintext.index( /\s{1}#{this_term}\s{1}|\s{1}#{this_term}\z|\A#{this_term}\s{1}/ )
if start_index
start_index = @plaintext.index(/\s{1}#{this_term}\s{1}|\s{1}#{this_term}\z|\A#{this_term}\s{1}/)
next unless start_index

# grab the chunk around our match and clean up the crap
txt = ( @plaintext[left_chunk_indicies(start_index)] + @plaintext[right_chunk_indicies(start_index)] ).gsub(/\A\w+\s{1}/, '').gsub(/\s{1}\w+\z/, '')
# grab the chunk around our match and clean up the crap
txt = (@plaintext[left_chunk_indicies(start_index)] + @plaintext[right_chunk_indicies(start_index)]).gsub(/\A\w+\s{1}/, '').gsub(/\s{1}\w+\z/, '')

# and highlifght
break
end
end
if txt
highlight_snippet( txt, this_term )
# and highlifght
break
end

highlight_snippet(txt, this_term) if txt
end

# shared methods
Expand All @@ -49,27 +47,21 @@ def highlight_snippet(snippet, match_text)
end

class TimecodeSnippet < Snippet
attr_reader :matched_term, :match_timecode

def initialize(guid, terms_array, plaintext, json_parts)
terms_array.each do |word_array|
@match_timecode = find_match_timecode(json_parts, word_array)

# used for the url
@matched_term = word_array.join(" ")

# make this here so we dont have too
# make this here so we dont have too
break if @match_timecode
end
super(guid, terms_array, plaintext)
end

def matched_term
@matched_term
end

def match_timecode
@match_timecode
end

def url_at_timecode
"/catalog/#{@guid}?term=#{@matched_term}&#at_#{@match_timecode}_s"
end
Expand All @@ -84,27 +76,22 @@ def find_match_timecode(json_parts, words_to_match)
# we found every word in our query chunk, goodbye!

part_hash["text"].split(" ").each do |word|

return match_timecode if query_terms_matched == words_to_match.length

# get first occurrence of each word and pair it with the most accurate time stamp we have

if word.upcase == words_to_match[ query_terms_matched ]
if word.upcase == words_to_match[query_terms_matched]
# record the tc because we started a match and we werent already a'matchin
match_timecode = part_hash["start_time"] if query_terms_matched == 0

query_terms_matched += 1
else
query_terms_matched = 0
end

end

end

match_timecode
end

end
end

4 changes: 2 additions & 2 deletions lib/tasks/ci.rake → lib/tasks/ci_specs.rake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ if Rails && !Rails.env.production?
require 'rspec/core/rake_task'

desc 'Run tests as if on CI server'
task :ci do
task :ci_specs do
require 'jettywrapper'

# Set the version of hydra-jetty we want, and download a clean copy of it.
Expand All @@ -26,7 +26,7 @@ if Rails && !Rails.env.production?
# running the code in the block passed to it.
error = Jettywrapper.wrap(jetty_params) do
task = RSpec::Core::RakeTask.new(:spec)
task.rspec_opts = '--tag ~not_on_travis'
task.rspec_opts = '--tag ~not_on_ci'
task.run_task(true)
end
raise "test failures: #{error}" if error
Expand Down
2 changes: 1 addition & 1 deletion spec/aws/s3_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

describe 'S3' do
# For AAPB, all the S3 content is open, since the video delivery is managed by Ci.
describe 'policy implementation', not_on_travis: true do
describe 'policy implementation', not_on_ci: true do
def to_pretty_json(string_io)
JSON.pretty_generate(JSON.parse(string_io.string))
end
Expand Down
2 changes: 1 addition & 1 deletion spec/features/media_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

# TODO: was this test broken when we switche Ci credentials?

describe 'Media URLs', not_on_travis: true do
describe 'Media URLs', not_on_ci: true do
before(:all) do
PBCoreIngester.load_fixtures('spec/fixtures/pbcore/clean-MOCK.xml')
end
Expand Down
28 changes: 10 additions & 18 deletions spec/helpers/snippet_helper_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,23 @@
let(:txt_example) { File.read('./spec/fixtures/transcripts/cpb-aacip-507-0000000j8w-transcript.txt') }
let(:txt_transcript) { TranscriptFile.new(txt_url) }


# queries are split up into no-stopword arrays in appl helper
let(:transcript_query_1) { [ ["ARKANSAS"] ] }
let(:transcript_query_2) { [ ["SENATOR", "PRYOR"] ] }
let(:transcript_query_3) { [ ["FILED", "FOR", "A", "DELAY"], ["NEVER", "GONNA", "GET", "MATCHED"] ] }
let(:transcript_query_4) { [ ["TWO", "DOZEN", "FINE", "POTENTIAL", "NOMINEES", "FOR", "THE", "POSITION", "OF", "SECRETARY", "OF", "THE", "INTERIOR"] ] }

let(:transcript_query_1) { [["ARKANSAS"]] }
let(:transcript_query_2) { [%w(SENATOR PRYOR)] }
let(:transcript_query_3) { [%w(FILED FOR A DELAY), %w(NEVER GONNA GET MATCHED)] }
let(:transcript_query_4) { [%w(TWO DOZEN FINE POTENTIAL NOMINEES FOR THE POSITION OF SECRETARY OF THE INTERIOR)] }

let(:srt_example) { File.read('./spec/fixtures/captions/srt/srt_example.srt') }
let(:caption_file) { CaptionFile.new("1a2b") }


# single
let(:transcript_snippet_1) { TimecodeSnippet.new('cpb-aacip-111-21ghx7d6', transcript_query_1, json_transcript.plaintext, JSON.parse(json_transcript.content)["parts"] ) }
let(:transcript_snippet_1) { TimecodeSnippet.new('cpb-aacip-111-21ghx7d6', transcript_query_1, json_transcript.plaintext, JSON.parse(json_transcript.content)["parts"]) }
# compound
let(:transcript_snippet_2) { TimecodeSnippet.new('cpb-aacip-111-21ghx7d6', transcript_query_2, json_transcript.plaintext, JSON.parse(json_transcript.content)["parts"] ) }
let(:transcript_snippet_2) { TimecodeSnippet.new('cpb-aacip-111-21ghx7d6', transcript_query_2, json_transcript.plaintext, JSON.parse(json_transcript.content)["parts"]) }
# caption
let(:transcript_snippet_3) { Snippet.new('cpb-aacip-111-21ghx7d6', transcript_query_3, caption_file.text ) }
let(:transcript_snippet_3) { Snippet.new('cpb-aacip-111-21ghx7d6', transcript_query_3, caption_file.text) }
# txt
let(:transcript_snippet_4) { Snippet.new('cpb-aacip-507-0000000j8w', transcript_query_4, txt_transcript.plaintext ) }

let(:transcript_snippet_4) { Snippet.new('cpb-aacip-507-0000000j8w', transcript_query_4, txt_transcript.plaintext) }

before do
# Stub requests so we don't actually have to fetch them remotely. But note
Expand All @@ -48,32 +44,28 @@
WebMock.stub_request(:get, json_url).to_return(body: json_example)
WebMock.stub_request(:get, txt_url).to_return(body: txt_example)


CaptionFile.any_instance.stub(:captions_src).and_return('https://s3.amazonaws.com/americanarchive.org/captions/1a2b.srt')
WebMock.stub_request(:get, caption_file.srt_url).to_return(body: srt_example)
end

describe '#new' do
it 'initializes with the expected attrs' do

expect(transcript_snippet_1.snippet).to eq(" FOR THIS 15TH ANNIVERSARY CELEBRATION AND DEDICATION CEREMONY IS MR GEORGE CAMPBELL CHAIRMAN OF THE <mark>ARKANSAS</mark> EDUCATIONAL TELEVISION COMMISSION GOOD AFTERNOON DISTINGUISHED GUESTS LADIES AND GENTLEMEN ")
expect(transcript_snippet_1.match_timecode).to eq("50.24")
end

it 'initializes with expected attrs for compound query' do

expect(transcript_snippet_2.snippet).to eq(" NOW I MAKE NO APOLOGIES FOR STORIES I MAY OR MAY NOT TALE CAN CERTAINLY RAISE BUT I JUST CANT THINK <mark>SENATOR PRYOR</mark> ALL TALKING ABOUT LEE REEVES FROM A PARTICULARLY GOVERNMENTAL AND STATE GOVERNMENT")
expect(transcript_snippet_2.match_timecode).to eq("2061.79")
end


it 'initializes with expected attrs for caption file' do
expect(transcript_snippet_3.snippet).to eq(" THE SUMMER OF 1958 THAT ALLOWED THE LOST YEAR TO HAPPEN THE FIRST ONE WAS THAT THE SCHOOL BOARD HAD <mark>FILED FOR A DELAY</mark> A NUMBER OF BUSINESS LEADERS PERSUADED A MAJORITY OF THE MEMBERS OF THE SCHOOL")
end

it 'initializes with expected attrs for txt file' do
expect(transcript_snippet_4.snippet).to eq("AT THE OLD EXECUTIVE OFFICE BUILDING PRES RONALD REAGAN AFTER EXAMINING THE RECORDS OF MORE THAN <mark>TWO DOZEN FINE POTENTIAL NOMINEES FOR THE POSITION OF SECRETARY OF THE INTERIOR</mark> I HAVE DECIDED TO")
end
end
end

describe '#url_at_timecode' do
Expand All @@ -88,7 +80,7 @@
# end

it 'uses stopwords.txt to remove words not used in actual search' do
expect(query_to_terms_array(%(extremist is cheddar "president of the Eisenhower"))).to eq([["PRESIDENT", "OF", "THE", "EISENHOWER"], ["EXTREMIST"], ["CHEDDAR"]])
expect(query_to_terms_array(%(extremist is cheddar "president of the Eisenhower"))).to eq([%w(PRESIDENT OF THE EISENHOWER), ["EXTREMIST"], ["CHEDDAR"]])
end
end
after(:all) do
Expand Down
1 change: 0 additions & 1 deletion spec/models/caption_file_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@
end
end


after(:all) do
# Re-disable WebMock so other tests can use actual connections.
WebMock.disable!
Expand Down
2 changes: 1 addition & 1 deletion spec/scripts/downloader_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
require_relative '../../scripts/lib/downloader'

describe Downloader do
it 'can download the past 7 days', not_on_travis: true do
it 'can download the past 7 days', not_on_ci: true do
# I really don't think it's a good idea to make the tests dependent
# on the activity of the catalogers, though this is a good test otherwise.
Dir.mktmpdir do |tmpdir|
Expand Down

0 comments on commit 7007baa

Please sign in to comment.