Skip to content

Commit

Permalink
CI update to latest kepler action
Browse files Browse the repository at this point in the history
Signed-off-by: Sam Yuan <[email protected]>
  • Loading branch information
SamYuan1990 committed Apr 6, 2024
1 parent f75122d commit 1f12584
Show file tree
Hide file tree
Showing 13 changed files with 23 additions and 759 deletions.
50 changes: 12 additions & 38 deletions .github/workflows/collect-data-self-hosted.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ on:
type: string

env:
KUBECONFIG: /root/.kube/config
KUBECONFIG: /tmp/kubeconfig

jobs:
setup-runner:
Expand Down Expand Up @@ -78,53 +78,27 @@ jobs:
runs-on: [self-hosted, linux, x64]

steps:
- name: Enable RAPL module
run: |
kernel_version=$(uname -r)
apt install -y linux-modules-$kernel_version linux-modules-extra-$kernel_version
modprobe intel_rapl_common
- name: Install Docker
id: docker
run: |
# Add Docker's official GPG key:
apt-get update -y
apt-get install ca-certificates curl gnupg -y
install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg
chmod a+r /etc/apt/keyrings/docker.gpg
# Add the repository to Apt sources:
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
tee /etc/apt/sources.list.d/docker.list > /dev/null
apt-get update -y
apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -y
docker info
- name: Checkout
id: checkout
uses: actions/checkout@v4

- name: Prepare Cluster
working-directory: model_training
run: |
./script.sh cluster_up
cp $HOME/bin/kubectl /usr/local/bin/kubectl
kubectl get po -A
- name: use Kepler action to deploy cluster
uses: sustainable-computing-io/kepler-action@main
with:
ebpfprovider: libbpf
cluster_provider: kind
install_containerruntime: true
prometheus_enable: true
tekton_enable: true
kernel_module_names: intel_rapl_common

- name: Install Kepler
working-directory: model_training
run: |
./script.sh deploy_kepler
./script.sh deploy_prom_dependency
kubectl logs $(kubectl get pods -oname -nkepler) -n kepler|grep "obtain power"
- name: Install Tekton
run: |
kubectl apply --filename https://storage.googleapis.com/tekton-releases/pipeline/latest/release.yaml
./hack/k8s_helper.sh rollout_ns_status tekton-pipelines
./hack/k8s_helper.sh rollout_ns_status tekton-pipelines-resolvers
- name: Prepare PVC
working-directory: model_training/tekton
run: |
Expand Down
22 changes: 9 additions & 13 deletions .github/workflows/train-model.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ env:
AWS_REGION: ${{ secrets.aws_region }}
AWS_ACCESS_KEY_ID: ${{ secrets.aws_access_key_id }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.aws_secret_access_key }}
KIND_CLUSTER_NAME: kind-for-training
KIND_CLUSTER_NAME: kind
KUBECONFIG: /tmp/kubeconfig

jobs:
check-data:
Expand Down Expand Up @@ -114,18 +115,13 @@ jobs:
sudo apt-get install -y python3-pip bc
sudo pip3 install awscli
- name: Prepare Cluster
working-directory: model_training
run: |
./script.sh cluster_up
cp $HOME/bin/kubectl /usr/local/bin/kubectl
kubectl get po -A
- name: Install Tekton
run: |
kubectl apply --filename https://storage.googleapis.com/tekton-releases/pipeline/latest/release.yaml
./hack/k8s_helper.sh rollout_ns_status tekton-pipelines
./hack/k8s_helper.sh rollout_ns_status tekton-pipelines-resolvers
- name: use Kepler action to deploy cluster
uses: sustainable-computing-io/kepler-action@main
with:
ebpfprovider: libbpf
cluster_provider: kind
prometheus_enable: true
tekton_enable: true

- name: Prepare PVC
working-directory: model_training/tekton
Expand Down
1 change: 1 addition & 0 deletions hack/k8s_helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# Copyright 2023 The Kepler Contributors
#

export KUBECONFIG="/tmp/kubeconfig"
set -e

rollout_ns_status() {
Expand Down
201 changes: 0 additions & 201 deletions model_training/custom-cluster/LICENSE

This file was deleted.

43 changes: 0 additions & 43 deletions model_training/custom-cluster/README.md

This file was deleted.

Loading

0 comments on commit 1f12584

Please sign in to comment.