-
Notifications
You must be signed in to change notification settings - Fork 781
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Monitoring] Add EC2 auto scaling script (#4098)
* Add auto manage script for start stop ec2 workers Signed-off-by: AyushR1 <[email protected]> * Fix issues Signed-off-by: AyushR1 <[email protected]> * Update serializers.py * Update EC2 instance auto scaling script * Fix flake8 * Update auto scaling * Fix flake8 * Fix tests --------- Signed-off-by: AyushR1 <[email protected]> Co-authored-by: Gunjan Chhablani <[email protected]>
- Loading branch information
1 parent
bcc642b
commit 7568d9c
Showing
6 changed files
with
201 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
import os | ||
import pytz | ||
import warnings | ||
import boto3 | ||
from datetime import datetime | ||
from dateutil.parser import parse | ||
from evalai_interface import EvalAI_Interface | ||
|
||
warnings.filterwarnings("ignore") | ||
|
||
utc = pytz.UTC | ||
|
||
ENV = os.environ.get("ENV", "dev") | ||
evalai_endpoint = os.environ.get("API_HOST_URL", "http://localhost:8000") | ||
auth_token = os.environ.get( | ||
"AUTH_TOKEN", | ||
) | ||
|
||
|
||
def get_boto3_client(resource, aws_keys): | ||
client = boto3.client( | ||
resource, | ||
region_name=aws_keys["AWS_REGION"], | ||
aws_access_key_id=aws_keys["AWS_ACCESS_KEY_ID"], | ||
aws_secret_access_key=aws_keys["AWS_SECRET_ACCESS_KEY"], | ||
) | ||
return client | ||
|
||
|
||
def get_pending_submission_count(challenge_metrics): | ||
pending_submissions = 0 | ||
for status in ["running", "submitted", "queued", "resuming"]: | ||
pending_submissions += challenge_metrics.get(status, 0) | ||
return pending_submissions | ||
|
||
|
||
def stop_instance(challenge, evalai_interface): | ||
instance_details = evalai_interface.get_ec2_instance_details(challenge["id"]) | ||
instance = instance_details["message"] | ||
if instance["State"]["Name"] == "running": | ||
response = evalai_interface.stop_challenge_ec2_instance(challenge["id"]) | ||
print("AWS API Response: {}".format(response)) | ||
print( | ||
"Stopped EC2 instance for Challenge ID: {}, Title: {}".format( | ||
challenge["id"], challenge["title"] | ||
) | ||
) | ||
else: | ||
print( | ||
"No running EC2 instance and pending messages found for Challenge ID: {}, Title: {}. Skipping.".format( | ||
challenge["id"], challenge["title"] | ||
) | ||
) | ||
|
||
|
||
def start_instance(challenge, evalai_interface): | ||
instance_details = evalai_interface.get_ec2_instance_details(challenge["id"]) | ||
instance = instance_details["message"] | ||
if instance["State"]["Name"] == "stopped": | ||
response = evalai_interface.start_challenge_ec2_instance(challenge["id"]) | ||
print("AWS API Response: {}".format(response)) | ||
print( | ||
"Started EC2 instance for Challenge ID: {}, Title: {}.".format( | ||
challenge["id"], challenge["title"] | ||
) | ||
) | ||
else: | ||
print( | ||
"Existing running EC2 instance and pending messages found for Challenge ID: {}, Title: {}. Skipping.".format( | ||
challenge["id"], challenge["title"] | ||
) | ||
) | ||
|
||
|
||
def start_or_stop_workers(challenge, challenge_metrics, evalai_interface): | ||
try: | ||
pending_submissions = get_pending_submission_count(challenge_metrics) | ||
except Exception: # noqa: F841 | ||
print( | ||
"Unable to get the pending submissions for challenge ID: {}, Title: {}. Skipping.".format( | ||
challenge["id"], challenge["title"] | ||
) | ||
) | ||
return | ||
|
||
print("Pending Submissions: {}".format(pending_submissions)) | ||
|
||
if pending_submissions == 0 or parse( | ||
challenge["end_date"] | ||
) < pytz.UTC.localize(datetime.utcnow()): | ||
stop_instance(challenge, evalai_interface) | ||
else: | ||
start_instance(challenge, evalai_interface) | ||
|
||
|
||
# TODO: Factor in limits for the APIs | ||
def start_or_stop_workers_for_challenges(response, metrics, evalai_interface): | ||
for challenge in response["results"]: | ||
if challenge["uses_ec2_worker"]: | ||
start_or_stop_workers(challenge, metrics[str(challenge["id"])], evalai_interface) | ||
|
||
|
||
def create_evalai_interface(auth_token, evalai_endpoint): | ||
evalai_interface = EvalAI_Interface(auth_token, evalai_endpoint) | ||
return evalai_interface | ||
|
||
|
||
# Cron Job | ||
def start_job(): | ||
evalai_interface = create_evalai_interface(auth_token, evalai_endpoint) | ||
response = evalai_interface.get_challenges() | ||
metrics = evalai_interface.get_challenges_submission_metrics() | ||
start_or_stop_workers_for_challenges(response, metrics) | ||
next_page = response["next"] | ||
while next_page is not None: | ||
response = evalai_interface.make_request(next_page, "GET") | ||
start_or_stop_workers_for_challenges(response, metrics, evalai_interface) | ||
next_page = response["next"] | ||
|
||
|
||
if __name__ == "__main__": | ||
print("Starting worker auto scaling script") | ||
start_job() | ||
print("Quitting worker auto scaling script!") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/bin/bash | ||
path=$PWD | ||
auth_token='' | ||
api_host_url='' | ||
if [ ! -z "$1" ] | ||
then | ||
path=$1 | ||
fi | ||
|
||
if [ ! -z "$2" ] | ||
then | ||
auth_token=$2 | ||
fi | ||
|
||
if [ ! -z "$3" ] | ||
then | ||
api_host_url=$3 | ||
fi | ||
|
||
if [ ! -z "$4" ] | ||
then | ||
env=$4 | ||
fi | ||
|
||
# crontab doesn't have access to env variable, define explicitly | ||
export AUTH_TOKEN=${auth_token}; | ||
export API_HOST_URL=${api_host_url}; | ||
export ENV=${env} | ||
|
||
/home/ubuntu/venv/bin/python ${path}/scripts/monitoring/auto_scale_ec2_workers.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters