Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tuning Halving Multicloud Support #384

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 39 additions & 1 deletion .github/workflows/e2e-tuning-halving.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,23 @@ jobs:
id: buildx
uses: docker/setup-buildx-action@v2

- name: Install AWS CLI
uses: unfor19/install-aws-cli-action@master
with:
version: '2'

- name: Set up Python Dependencies for AWS Jobs
working-directory: runner/aws_lambda_scripts
run: |
python3 -m pip install --upgrade pip
python3 -m pip install wheel ez_setup setuptools
python3 -m pip install -r requirements.txt

- name: Build and Push
working-directory: benchmarks/tuning-halving
env:
GOPRIVATE_KEY: ${{ secrets.XDT_REPO_ACCESS_KEY }}
run: make all-image-push
run: make all-push

test-compose:
name: Test Docker Compose
Expand Down Expand Up @@ -186,3 +198,29 @@ jobs:
run: |
kubectl delete -f ./service-driver.yaml --namespace default --wait
kubectl delete -f ./service-trainer.yaml --namespace default --wait

test-aws-lambda:
name: Test AWS Lambda Deployment
needs: build-and-push
runs-on: ubuntu-20.04
strategy:
fail-fast: false
steps:
- name: Check out code
uses: actions/checkout@v3
with:
lfs: 'true'

- name: Set up Python Dependencies for AWS Jobs
working-directory: runner/aws_lambda_scripts
run: |
python3 -m pip install --upgrade pip
python3 -m pip install wheel ez_setup setuptools
python3 -m pip install -r requirements.txt

- name: Deploy and Test functions from ECR container
working-directory: runner/aws_lambda_scripts
run: |
python aws_actions.py deploy_lambdafn_from_ecr -n tuning-halving-driver -f tuning-halving-driver -p invoke_function,access_s3 -e '{"IS_LAMBDA":"true","TRAINER_FUNCTION":"tuning-halving-trainer","BUCKET_NAME":"vhive-stacking"}'
python aws_actions.py deploy_lambdafn_from_ecr -n tuning-halving-trainer -f tuning-halving-trainer -p access_s3 -e '{"IS_LAMBDA":"true","BUCKET_NAME":"vhive-stacking"}'
python aws_actions.py invoke_lambdafn -f stacking-training-driver
44 changes: 39 additions & 5 deletions benchmarks/tuning-halving/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,26 +32,46 @@ MODE = --load

.PHONY: proto

all: all-image
all: all-image all-image-lambda

all-image: driver-image trainer-image

all-image-lambda: driver-image-lambda trainer-image-lambda

all-push: all-image-push all-image-lambda-push

all-image-push: driver-image-push trainer-image-push

driver-image: Dockerfile proto/tuning_pb2_grpc.py proto/tuning_pb2.py driver/main.py
all-image-lambda-push: driver-image-lambda-push trainer-image-lambda-push

driver-image: docker/Dockerfile proto/tuning_pb2_grpc.py proto/tuning_pb2.py driver/main.py
docker buildx build $(PLATFORM) \
-t vhiveease/tuning-halving-driver:latest \
--build-arg target_arg=driver \
--secret id=GOPRIVATE_KEY \
-f Dockerfile \
-f docker/Dockerfile \
$(ROOT) $(MODE)

trainer-image: Dockerfile proto/tuning_pb2_grpc.py proto/tuning_pb2.py trainer/main.py
trainer-image: docker/Dockerfile proto/tuning_pb2_grpc.py proto/tuning_pb2.py trainer/main.py
docker buildx build $(PLATFORM) \
-t vhiveease/tuning-halving-trainer:latest \
--build-arg target_arg=trainer \
--secret id=GOPRIVATE_KEY \
-f Dockerfile \
-f docker/Dockerfile \
$(ROOT) $(MODE)

driver-image-lambda: docker/Dockerfile.Lambda driver/main.py
docker buildx build $(PLATFORM) \
-t $(AWS_ACCOUNT_ID).dkr.ecr.$(AWS_REGION).amazonaws.com/tuning-halving-driver:latest \
--build-arg target_arg=driver \
-f docker/Dockerfile.Lambda \
$(ROOT) $(MODE)

trainer-image-lambda: docker/Dockerfile.Lambda trainer/main.py
docker buildx build $(PLATFORM) \
-t $(AWS_ACCOUNT_ID).dkr.ecr.$(AWS_REGION).amazonaws.com/tuning-halving-trainer:latest \
--build-arg target_arg=trainer \
-f docker/Dockerfile.Lambda \
$(ROOT) $(MODE)

driver-image-push: driver-image
Expand All @@ -60,6 +80,20 @@ driver-image-push: driver-image
trainer-image-push: trainer-image
docker push vhiveease/tuning-halving-trainer:latest

driver-image-lambda-push: driver-image-lambda
aws ecr get-login-password --region $(AWS_REGION) | \
docker login --username AWS --password-stdin \
$(AWS_ACCOUNT_ID).dkr.ecr.$(AWS_REGION).amazonaws.com
python $(ROOT)/runner/aws_lambda_scripts/aws_actions.py create_ecr_repo -n tuning-halving-driver
docker push $(AWS_ACCOUNT_ID).dkr.ecr.$(AWS_REGION).amazonaws.com/tuning-halving-driver:latest

trainer-image-lambda-push: trainer-image-lambda
aws ecr get-login-password --region $(AWS_REGION) | \
docker login --username AWS --password-stdin \
$(AWS_ACCOUNT_ID).dkr.ecr.$(AWS_REGION).amazonaws.com
python $(ROOT)/runner/aws_lambda_scripts/aws_actions.py create_ecr_repo -n tuning-halving-trainer
docker push $(AWS_ACCOUNT_ID).dkr.ecr.$(AWS_REGION).amazonaws.com/tuning-halving-trainer:latest

proto: proto/tuning.proto proto/helloworld.proto
python -m grpc_tools.protoc -I./proto --python_out=./proto --grpc_python_out=./proto ./proto/tuning.proto
python -m grpc_tools.protoc -I./proto --python_out=./proto --grpc_python_out=./proto ./proto/helloworld.proto
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ FROM vhiveease/python-slim:latest as builder
ARG target_arg
ENV target=$target_arg
WORKDIR /py
COPY ./benchmarks/tuning-halving/requirements.txt ./
COPY ./benchmarks/tuning-halving/requirements/common.txt ./requirements.txt
RUN apt update && \
apt install git curl -y && \
pip3 install --no-cache-dir --user -r requirements.txt
Expand Down
37 changes: 37 additions & 0 deletions benchmarks/tuning-halving/docker/Dockerfile.Lambda
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# MIT License
#
# Copyright (c) 2022 Alan Nair and The vHive Ecosystem
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

FROM public.ecr.aws/lambda/python:3.9
ARG target_arg
ENV target=$target_arg

# Copy function code
COPY benchmarks/tuning-halving/${target}/* ${LAMBDA_TASK_ROOT}
COPY utils/tracing/python/tracing.py ${LAMBDA_TASK_ROOT}
COPY utils/storage/python/storage.py ${LAMBDA_TASK_ROOT}

# Install the function dependencies
COPY benchmarks/tuning-halving/requirements/aws_lambda.txt requirements.txt
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"

# Set the CMD to handler
CMD [ "main.lambda_handler" ]
115 changes: 115 additions & 0 deletions benchmarks/tuning-halving/driver/driver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# MIT License
#
# Copyright (c) 2022 Alan Nair and The vHive Ecosystem
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import os

from storage import Storage

import itertools
import logging as log
import numpy as np
import pickle
import sklearn.datasets as datasets
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import StratifiedShuffleSplit

def generate_dataset():
n_samples = 1000
n_features = 1024
X, y = datasets.make_classification(n_samples,
n_features,
n_redundant=0,
n_clusters_per_class=2,
weights=[0.9, 0.1],
flip_y=0.1,
random_state=42)
return {'features': X, 'labels': y}

def generate_hyperparam_sets(param_config):
keys = list(param_config.keys())
values = [param_config[k] for k in keys]
for elements in itertools.product(*values):
yield dict(zip(keys, elements))

class Driver:
def __init__(self, XDTconfig=None):
bucket = os.getenv('BUCKET_NAME', 'vhive-tuning')
self.storageBackend = Storage(bucket, XDTconfig)

def handler_broker(self, event, context):
dataset = generate_dataset()
hyperparam_config = {
'model': 'RandomForestRegressor',
'params': {
'n_estimators': [5, 10, 20],
'min_samples_split': [2, 4],
'random_state': [42]
}
}
models_config = {
'models': [
{
'model': 'RandomForestRegressor',
'params': hyperparam
} for hyperparam in generate_hyperparam_sets(hyperparam_config['params'])
]
}
key = self.storageBackend.put('dataset_key', pickle.dumps(dataset))
return {
'dataset_key': key,
'models_config': models_config
}

def drive(self, driveArgs):
event = self.handler_broker({}, {})
models = event['models_config']['models']
while len(models) > 1:
sample_rate = 1 / len(models)
log.info(f"Running {len(models)} models at sample rate {sample_rate}")

training_responses = []
for count, model_config in enumerate(models):
training_responses.append(
driveArgs['trainerfn']({
'dataset_key': event['dataset_key'],
'model_config': model_config,
'count': count,
'sample_rate': sample_rate
})
)

# Keep models with the best score
top_number = len(training_responses) // 2
sorted_responses = sorted(training_responses, key=lambda result: result['score'], reverse=True)
models = [resp['params'] for resp in sorted_responses[:top_number]]

log.info(f"Training final model {models[0]} on the full dataset")
final_response = driveArgs['trainerfn']({
'dataset_key': event['dataset_key'],
'model_config': models[0],
'count': 0,
'sample_rate': 1.0
})
log.info(f"Final result: score {final_response['score']}, model {final_response['params']['model']}")
return
Loading