diff --git a/app/jobs/deploy_runner_job.rb b/app/jobs/deploy_runner_job.rb index 3a5b54b4..7afdb2dc 100644 --- a/app/jobs/deploy_runner_job.rb +++ b/app/jobs/deploy_runner_job.rb @@ -42,6 +42,10 @@ def perform(heritage, without_before_deploy:, description: "") sleep 3 end end + + rescue => e + Rails.logger.error e + Rails.logger.error e.backtrace end def other_deploy_in_progress?(heritage) @@ -51,6 +55,7 @@ def other_deploy_in_progress?(heritage) end def notify(level: :good, message:) + Rails.logger.info message Event.new(@heritage.district).notify(level: level, message: "[#{@heritage.name}] #{message}") end end diff --git a/app/jobs/monitor_deployment_job.rb b/app/jobs/monitor_deployment_job.rb index 2650c302..a6d54fb6 100644 --- a/app/jobs/monitor_deployment_job.rb +++ b/app/jobs/monitor_deployment_job.rb @@ -2,6 +2,14 @@ class MonitorDeploymentJob < ActiveJob::Base queue_as :default def perform(service, count: 0, deployment_id: nil) + if service.heritage.version == 2 + ServiceDeployment.create!(service: service) + return + end + + # old version does not rely on cloudformation and thus has to be + # polled one by one. We will need to clean this up later. + if service.deployment_finished?(deployment_id) notify(service, message: "#{service.name} service deployed") elsif count > 20 @@ -15,6 +23,7 @@ def perform(service, count: 0, deployment_id: nil) end def notify(service, level: :good, message:) + Rails.logger.info message Event.new(service.district).notify(level: level, message: "[#{service.heritage.name}] #{message}") end end diff --git a/app/models/service.rb b/app/models/service.rb index c1e2ca88..eca1cfae 100644 --- a/app/models/service.rb +++ b/app/models/service.rb @@ -82,8 +82,11 @@ def https_port_mapping port_mappings.find_by(protocol: 'https') end - def deployment_finished?(deployment_id) - backend.deployment_finished?(deployment_id) + def deployment_finished?(deployment_id=nil) + backend.deployment_finished?(deployment_id) if heritage.version == 1 + + return true if service_deployment_object.nil? + service_deployment_object.finished? end def save_and_update_container_count!(desired_container_count) @@ -123,6 +126,10 @@ def service_arns s.flat_map(&:service_arns) end + def stack_name + "#{district.name}-#{heritage.name}-#{name}" + end + def arn_prefix [ 'arn:aws:ecs', @@ -141,8 +148,20 @@ def arn_prefix_legacy ].join(':') end + def deployment + if service_deployment_object.nil? + ServiceDeployment.create!(service: self) + end + + service_deployment_object + end + private + def service_deployment_object + service_deployments.unfinished.last || service_deployments.last + end + def ecs @ecs ||= district.aws.ecs end diff --git a/app/services/deploy_service.rb b/app/services/deploy_service.rb new file mode 100644 index 00000000..943f550f --- /dev/null +++ b/app/services/deploy_service.rb @@ -0,0 +1,153 @@ +class DeployService + + STATUS_TO_ACTION_MAP = { + "CREATE_IN_PROGRESS" => :incomplete, + "CREATE_FAILED" => :failed, + "CREATE_COMPLETE" => :completed, + "ROLLBACK_IN_PROGRESS" => :incomplete, + "ROLLBACK_FAILED" => :failed, + "ROLLBACK_COMPLETE" => :failed, + "DELETE_IN_PROGRESS" => :incomplete, + "DELETE_FAILED" => :failed, + "DELETE_COMPLETE" => :completed, + "UPDATE_IN_PROGRESS" => :incomplete, + "UPDATE_COMPLETE_CLEANUP_IN_PROGRESS" => :incomplete, + "UPDATE_COMPLETE" => :completed, + "UPDATE_FAILED" => :failed, + "UPDATE_ROLLBACK_IN_PROGRESS" => :failed, + "UPDATE_ROLLBACK_FAILED" => :failed, + "UPDATE_ROLLBACK_COMPLETE_CLEANUP_IN_PROGRESS" => :failed, + "UPDATE_ROLLBACK_COMPLETE" => :failed, + "REVIEW_IN_PROGRESS" => :incomplete, + "IMPORT_IN_PROGRESS" => :incomplete, + "IMPORT_COMPLETE" => :completed, + "IMPORT_ROLLBACK_IN_PROGRESS" => :failed, + "IMPORT_ROLLBACK_FAILED" => :failed, + "IMPORT_ROLLBACK_COMPLETE" => :failed + }.freeze + + class << self + def deploy_service(service) + ServiceDeployment.create!(service: service) + end + + DEPLOY_SERVICE_LOCK=1000001 + + def synchronize + got_lock = ActiveRecord::Base.connection.get_advisory_lock(DEPLOY_SERVICE_LOCK) + if !got_lock + Rails.logger.info("[DeployService] Lock already held. Skipping.") + return + + yield + ensure + if got_lock && !ActiveRecord::Base.connection.release_advisory_lock(DEPLOY_SERVICE_LOCK) + Rails.logger.info("[DeployService] Failed to release lock") + end + end + + def check_all + Rails.logger.info("[DeployService] Starting checks.") + + synchronize do + District.all.each do |district| + Rails.logger.info("Checking district #{district.name}") + DeployService.new(district).check + end + end + end + end + + def initialize(district) + @district = district + end + + def check + @district.heritages.each do |heritage| + heritage.services.each do |service| + next if service.deployment.finished? + + status = stack_statuses[service.stack_name] + action = STATUS_TO_ACTION_MAP[status] + notify(service, action) + end + end + end + + def notify(service, action) + if action.nil? + Rails.logger.error("[deploy_service] stack #{service.stack_name} not found!") + return + end + + send("notify_#{action}", service) + end + + def notify_completed(service) + Rails.logger.info("Heritage: #{service.heritage.name} Service: #{service.name} Deployment Completed") + service.service_deployments.unfinished.each do |record| + record.complete! + end + event(service, message: "#{service.name} service deployed") + end + + def notify_incomplete(service) + Rails.logger.info("Heritage: #{service.heritage.name} Service: #{service.name} Deployment Incomplete") + runtime = Time.now - service.deployment.created_at + if runtime > 20.minutes + event(service, level: :error, message: "Deploying #{service.name} service has not finished for a while.") + end + end + + def notify_failed(service) + Rails.logger.info("Heritage: #{service.heritage.name} Service: #{service.name} Deployment Failed") + service.service_deployments.unfinished.each do |record| + record.fail! + end + event(service, level: :error, message: "Deployment of #{service.name} service has failed.") + end + + def stack_names + @stack_names ||= begin + results = {} + @district.heritages.map do |heritage| + heritage.services.map do |service| + results[service.stack_name] = true + end + end + results + end + end + + def stack_statuses + @stack_statuses ||= begin + results = {} + + cloudformation.list_stacks.each do |response| + response.stack_summaries.each do |summary| + if stack_names.key?(summary.stack_name) + results[summary.stack_name] = summary.stack_status + end + end + end + + Rails.logger.info(results.to_yaml) + + results + rescue StandardError => e + Rails.logger.error("Failed to retrieve stack statuses!") + raise e + end + end + + private + + def event(service, level: :good, message:) + Event.new(@district).notify(level: level, message: "[#{service.heritage.name}] #{message}") + end + + def cloudformation + @cloudformation ||= @district.aws.cloudformation + end + +end diff --git a/barcelona.yml b/barcelona.yml index b0ce17d9..3e2065af 100644 --- a/barcelona.yml +++ b/barcelona.yml @@ -1,12 +1,18 @@ +scheduled_tasks: &scheduled_tasks + scheduled_tasks: + # 10AM JST every week day + - schedule: cron(0 1 ? * MON-FRI *) + command: bin/chaos + # every 5 minutes + - schedule: cron(0/5 * ? * * *) + command: rake bcn:deployment_check + environments: production: + <<: *scheduled_tasks name: barcelona2 image_name: public.ecr.aws/degica/barcelona before_deploy: rake db:migrate - scheduled_tasks: - # 10AM JST every week day - - schedule: cron(0 1 ? * MON-FRI *) - command: bin/chaos services: - name: web service_type: web @@ -23,13 +29,10 @@ environments: cpu: 128 memory: 256 test: + <<: *scheduled_tasks name: barcelona image_name: public.ecr.aws/degica/barcelona before_deploy: rake db:migrate - scheduled_tasks: - # 10AM JST every week day - - schedule: cron(0 1 ? * MON-FRI *) - command: bin/chaos services: - name: web service_type: web diff --git a/lib/tasks/deployment_check.rake b/lib/tasks/deployment_check.rake new file mode 100644 index 00000000..10b7ed81 --- /dev/null +++ b/lib/tasks/deployment_check.rake @@ -0,0 +1,10 @@ +namespace :bcn do + desc "Check deployments" + task :deployment_check => :environment do + Rails.logger = Logger.new(STDOUT) + Rails.logger.level = :info + + Rails.logger.info("Starting deployment check...") + DeployService.check_all + end +end diff --git a/spec/models/service_spec.rb b/spec/models/service_spec.rb index 610aff99..122536f9 100644 --- a/spec/models/service_spec.rb +++ b/spec/models/service_spec.rb @@ -210,4 +210,36 @@ expect(s2.service_deployments).to eq [s2d1] end end + + describe '#deployment_finished?' do + it 'returns true if there are no deployments (backwards compat)' do + s = create :service + + expect(s).to be_deployment_finished + end + + it 'returns true if the last one is finished' do + s = create :service + create :service_deployment, service: s, completed_at: Time.now + + expect(s).to be_deployment_finished + end + + it 'returns false if the last one is not yet finished' do + s = create :service + create :service_deployment, service: s + + expect(s).to_not be_deployment_finished + end + end + + describe '#stack_name' do + it 'returns the corresponding stack name' do + s = create :service, name: 'serv' + allow(s.district).to receive(:name) { 'dist' } + allow(s.heritage).to receive(:name) { 'heri' } + + expect(s.stack_name).to eq "dist-heri-serv" + end + end end diff --git a/spec/services/deploy_service_spec.rb b/spec/services/deploy_service_spec.rb new file mode 100644 index 00000000..8ffc4451 --- /dev/null +++ b/spec/services/deploy_service_spec.rb @@ -0,0 +1,278 @@ +require 'rails_helper' + +describe DeployService, type: :model do + let(:district) { create :district, name: 'testdistrict' } + let(:deployer) { DeployService.new(district) } + + describe '.deploy_service' do + it 'simply creates a service deployment object' do + heritage = create :heritage, district: district + service = create :service, heritage: heritage + DeployService.deploy_service(service) + + expect(ServiceDeployment.last.service).to eq service + end + end + + describe '.check_all' do + it 'iterates through all districts' do + d1 = create :district + d2 = create :district + d3 = create :district + + dsdouble1 = double('DeployService') + dsdouble2 = double('DeployService') + dsdouble3 = double('DeployService') + + expect(DeployService).to receive(:new).with(d1) { dsdouble1 } + expect(DeployService).to receive(:new).with(d2) { dsdouble2 } + expect(DeployService).to receive(:new).with(d3) { dsdouble3 } + + expect(dsdouble1).to receive(:check) + expect(dsdouble2).to receive(:check) + expect(dsdouble3).to receive(:check) + + DeployService.check_all + end + end + + describe '#check' do + it 'is backwards compatible' do + heritage = create :heritage, name: 'hname', district: district + service = create :service, name: 'sname', heritage: heritage + + # if no service deployment object was created + # it would need to create one + + ds = DeployService.new(district) + + expect(ds).to receive(:notify_completed).with(service) + allow(ds).to receive(:stack_statuses) { + { + 'testdistrict-hname-sname' => 'UPDATE_COMPLETE' + } + } + + ds.check + end + + it 'updates state of services' do + heritage = create :heritage, name: 'hname', district: district + service = create :service, name: 'sname', heritage: heritage + create :service_deployment, service: service + + ds = DeployService.new(district) + + expect(ds).to receive(:notify_completed).with(service) + allow(ds).to receive(:stack_statuses) { + { + 'testdistrict-hname-sname' => 'UPDATE_COMPLETE' + } + } + + ds.check + end + + it 'updates state of services that are not ready yet' do + heritage = create :heritage, name: 'hname', district: district + service = create :service, name: 'sname', heritage: heritage + create :service_deployment, service: service + + ds = DeployService.new(district) + + expect(ds).to receive(:notify_incomplete).with(service) + allow(ds).to receive(:stack_statuses) { + { + 'testdistrict-hname-sname' => 'UPDATE_IN_PROGRESS' + } + } + + ds.check + end + + it 'update state of services that are failed' do + heritage = create :heritage, name: 'hname', district: district + service = create :service, name: 'sname', heritage: heritage + create :service_deployment, service: service + + ds = DeployService.new(district) + + expect(ds).to receive(:notify_failed).with(service) + allow(ds).to receive(:stack_statuses) { + { + 'testdistrict-hname-sname' => 'UPDATE_ROLLBACK_COMPLETE' + } + } + + ds.check + end + end + + describe '#stack_statuses' do + let(:cfclient) { double('CloudformationClient') } + + it 'updates state of services' do + heritage1 = create :heritage, name: 'heritage1', district: district + service1 = create :service, name: 'service1', heritage: heritage1 + + heritage2 = create :heritage, name: 'heritage2', district: district + service2 = create :service, name: 'service2', heritage: heritage2 + + ds = DeployService.new(district) + + allow(ds).to receive(:cloudformation) { cfclient } + + allow(cfclient).to receive(:list_stacks) do + [ + double('Response', stack_summaries:[ + double('Summary', { + stack_name: 'testdistrict-heritage1-service1', + stack_status: 'UPDATE_COMPLETE' + }), + + double('Summary', { + stack_name: 'testdistrict-heritage2-service2', + stack_status: 'UPDATE_FAILED' + }), + + double('Summary', { + stack_name: 'testdistrict-heritage1-service2', + stack_status: 'UPDATE_IN_PROGRESS' + }) + ]) + ] + end + + expect(ds.stack_statuses).to eq({ + 'testdistrict-heritage1-service1' => 'UPDATE_COMPLETE', + 'testdistrict-heritage2-service2' => 'UPDATE_FAILED' + }) + end + end + + describe '#notify_completed' do + it 'sets the service deployment to completed' do + heritage = create :heritage, name: 'sname', district: district + service = create :service, heritage: heritage + deployment = create :service_deployment, service: service + + ds = DeployService.new(district) + ds.notify_completed(service) + + expect(deployment.reload).to be_completed + end + + it 'sets all deployment objects for that service to completed' do + heritage = create :heritage, name: 'sname', district: district + service = create :service, heritage: heritage + deployment = create :service_deployment, service: service + + # extra deployment objects + # can happen in race conditions but we should expect it and handle accordingly + deployment1 = create :service_deployment, service: service + deployment2 = create :service_deployment, service: service + + ds = DeployService.new(district) + ds.notify_completed(service) + + expect(deployment.reload).to be_completed + expect(deployment1.reload).to be_completed + expect(deployment2.reload).to be_completed + end + + it 'notifies if the deployment has completed' do + heritage = create :heritage, name: 'sname', district: district + service = create :service, name: 'theservice', heritage: heritage + deployment = create :service_deployment, service: service + + # this is copied from the deploy_runner_job_spec.rb + event_object = double("Event") + expect(Event).to receive(:new).with(district) { event_object } + expect(event_object).to receive(:notify).with(level: :good, message: "[sname] theservice service deployed") + + ds = DeployService.new(district) + ds.notify_completed(service) + end + end + + describe '#notify' do + it 'logs an error action is nil' do + district = create :district, name: 'dtest' + heritage = create :heritage, name: 'htest', district: district + service = create :service, name: 'stest', heritage: heritage + + logger_double = double('logger') + expect(Rails).to receive(:logger) { logger_double } + expect(logger_double).to receive(:error) { "[deploy_service] stack dtest-htest-stest not found!" } + + ds = DeployService.new(district) + ds.notify(service, nil) + end + end + + describe '#notify_incomplete' do + it 'notifies if the deployment is taking a while' do + heritage = create :heritage, name: 'sname', district: district + service = create :service, name: 'naughty', heritage: heritage + + # Let's just agree that 1 hour for deployment is simply simply way too long + # even if practical circumstances happen to be such that deployments take + # more than an hour, barcelona deserves to be noisy about it. + create :service_deployment, service: service, created_at: 1.hour.ago + + event_object = double("Event") + expect(Event).to receive(:new).with(district) { event_object } + expect(event_object).to receive(:notify).with(level: :error, message: "[sname] Deploying naughty service has not finished for a while.") + + ds = DeployService.new(district) + ds.notify_incomplete(service) + end + end + + describe '#notify_failed' do + # This is a new thing + # the original monitor deployment job did not check for this + + it 'notifies if the deployment has failed' do + heritage = create :heritage, name: 'sname', district: district + service = create :service, name: 'bad', heritage: heritage + deployment = create :service_deployment, service: service + + event_object = double("Event") + expect(Event).to receive(:new).with(district) { event_object } + expect(event_object).to receive(:notify).with(level: :error, message: "[sname] Deployment of bad service has failed.") + + ds = DeployService.new(district) + ds.notify_failed(service) + end + + it 'sets the service deployment to failed' do + heritage = create :heritage, name: 'sname', district: district + service = create :service, name: 'hname', heritage: heritage + deployment = create :service_deployment, service: service + + ds = DeployService.new(district) + ds.notify_failed(service) + + expect(deployment.reload).to be_failed + end + + it 'sets all deployment objects for that service to failed' do + heritage = create :heritage, name: 'sname', district: district + service = create :service, name: 'hname', heritage: heritage + deployment = create :service_deployment, service: service + + # extra deployment objects + # can happen in race conditions but we should expect it and handle accordingly + deployment1 = create :service_deployment, service: service + deployment2 = create :service_deployment, service: service + + ds = DeployService.new(district) + ds.notify_failed(service) + + expect(deployment.reload).to be_failed + expect(deployment1.reload).to be_failed + expect(deployment2.reload).to be_failed + end + end +end