Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NZSL-159: Update dictionary from S3 rather than a github release #1523

Merged
merged 6 commits into from
Jan 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,19 @@ jobs:
env:
DATABASE_URL: postgres://postgres:postgres@localhost:5432/nzsl_test
DEVISE_SECRET_KEY: anything
AWS_REGION: ap-southeast-2
RAILS_ENV: test
run: |
cp env-example .env
bundle exec rails db:prepare

- name: Run rspec
env:
DATABASE_URL: postgres://postgres:postgres@localhost:5432/nzsl_test
DEVISE_SECRET_KEY: anything
NZSL_ONLINE_SECRET_KEY_BASE: anything
APP_DOMAIN_NAME: localhost:3000
APP_PROTOCOL: http
AWS_REGION: ap-southeast-2
S3_BUCKET_URL: http://s3-ap-southeast-2.amazonaws.com/dummy-fake/
run: bundle exec rspec spec
run: |
cp env-example .env
bundle exec rspec spec
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ gem 'pg', '~>1.2'
# Use SQLite to access signs from a Signbank dictionary export
gem 'sqlite3'

gem 'aws-sdk-s3'
gem 'bootsnap', '>= 1.1.0', require: false
gem 'haml'
gem 'jquery-rails'
Expand Down
18 changes: 18 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,22 @@ GEM
ast (2.4.2)
autoprefixer-rails (10.3.3.0)
execjs (~> 2)
aws-eventstream (1.3.0)
aws-partitions (1.878.0)
aws-sdk-core (3.190.2)
aws-eventstream (~> 1, >= 1.3.0)
aws-partitions (~> 1, >= 1.651.0)
aws-sigv4 (~> 1.8)
jmespath (~> 1, >= 1.6.1)
aws-sdk-kms (1.76.0)
aws-sdk-core (~> 3, >= 3.188.0)
aws-sigv4 (~> 1.1)
aws-sdk-s3 (1.142.0)
aws-sdk-core (~> 3, >= 3.189.0)
aws-sdk-kms (~> 1)
aws-sigv4 (~> 1.8)
aws-sigv4 (1.8.0)
aws-eventstream (~> 1, >= 1.0.2)
babel-source (5.8.35)
babel-transpiler (0.7.0)
babel-source (>= 4.0, < 6)
Expand Down Expand Up @@ -175,6 +191,7 @@ GEM
multi_xml (>= 0.5.2)
i18n (1.14.1)
concurrent-ruby (~> 1.0)
jmespath (1.6.2)
jquery-rails (4.4.0)
rails-dom-testing (>= 1, < 3)
railties (>= 4.2.0)
Expand Down Expand Up @@ -420,6 +437,7 @@ PLATFORMS

DEPENDENCIES
autoprefixer-rails
aws-sdk-s3
bootsnap (>= 1.1.0)
brakeman
bundle-audit
Expand Down
13 changes: 5 additions & 8 deletions config/initializers/sign_database.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# Update the dictionary file if it is older than 1 month
# We update this file in both dictionary modes because our tests
# expect the database to test across both modes
path = Rails.root.join('db', 'dictionary.sqlite3')
Rails.application.load_tasks
deployed = !Rails.env.development? && !Rails.env.test?

Rake::Task['dictionary:update'].execute if deployed || (!path.exist? || path.mtime <= 1.month.ago)
Rails.application.reloader.to_prepare do
# Update the dictionary file on boot
Rails.application.load_tasks
Rake::Task['dictionary:update'].execute
end
5 changes: 4 additions & 1 deletion env-example
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
S3_BUCKET_URL: "example.s3.url/"
NZSL_ONLINE_SECRET_KEY_BASE: 62da7bed624d0cbbe3d186166fdd88db5bb3989075a2154cebe3e5ee20a4f2a2d540865309958346b7b43799d461be2b37c6e27d1fd6ca03b1f59622c5ccc402
APP_DOMAIN_NAME: "localhost:3000"
APP_PROTOCOL: "http"
APP_PROTOCOL: "http"

# The latest public release
DICTIONARY_DATABASE_S3_LOCATION="s3://nzsl-signbank-media-production/dictionary-exports/nzsl.db"
54 changes: 36 additions & 18 deletions lib/tasks/dictionary.rake
Original file line number Diff line number Diff line change
@@ -1,29 +1,47 @@
namespace :dictionary do
namespace :dictionary do # rubocop:disable Metrics/BlockLength
desc 'Updates the NZSL dictionary packaged with the application to the latest release from Signbank'
task :update do # rubocop:disable Rails/RakeEnvironment - we need to place this file before the app can start
repo = 'odnzsl/nzsl-dictionary-scripts'
filename = 'nzsl.db'
content_type = 'application/vnd.sqlite3'
release_uri = URI::HTTPS.build(host: 'api.github.com', path: "/repos/#{repo}/releases/latest")
release = JSON.parse(release_uri.open.read)
database_asset = release['assets'].find do |asset|
asset['name'] == filename && asset['content_type'] == content_type
end

database_url = database_asset.fetch('browser_download_url')
database_s3_location = URI.parse(ENV.fetch('DICTIONARY_DATABASE_S3_LOCATION') || '')
raise 'DICTIONARY_DATABASE_S3_LOCATION must be an S3 URI' unless database_s3_location.scheme == 's3'

File.open('db/new-dictionary.sqlite3', 'wb') do |f|
f.write URI.parse(database_url).open.read
rescue OpenURI::HTTPError
sleep 5 # Wait a few seconds before retrying
retry
end
download_s3_uri(database_s3_location, 'db/new-dictionary.sqlite3')

database = SQLite3::Database.open('db/new-dictionary.sqlite3')
raise 'Database does not pass integrity check' unless database.integrity_check == [['ok']]

version = database.get_int_pragma('user_version')

FileUtils.mv('db/new-dictionary.sqlite3', 'db/dictionary.sqlite3')

puts "Updated db/dictionary.sqlite3 to #{release['name']}"
puts "Updated db/dictionary.sqlite3 to #{version}"
end

def s3_client
@s3_client ||= Aws::S3::Client.new({
region: ENV.fetch('DICTIONARY_AWS_REGION', ENV.fetch('AWS_REGION', nil)),
access_key_id: ENV.fetch('DICTIONARY_AWS_ACCESS_KEY_ID', nil),
secret_access_key: ENV.fetch('DICTIONARY_AWS_SECRET_ACCESS_KEY', nil)
}.compact)
end

def download_s3_uri(s3_uri, target)
bucket = s3_uri.host
key = s3_uri.path[1..]

begin
s3_client.get_object({ bucket:, key: }, target:)
rescue Aws::Errors::MissingCredentialsError,
Aws::Sigv4::Errors::MissingCredentialsError,
Aws::S3::Errors::ServiceError

# Fallback to public-URL download over HTTP if credentials are not provided or invalid.
# TODO use aws-sdk to leverage aws-client optimizations once unsigned requests are supported:
# https://github.com/aws/aws-sdk-ruby/issues/1149
public_url = URI.parse(Aws::S3::Bucket.new(bucket, credentials: 0).object(key).public_url)
Net::HTTP.start(public_url.host, public_url.port, use_ssl: true) do |http|
response = http.get(public_url.request_uri).tap(&:value)
File.binwrite(target, response.body)
end
end
end
end
Loading