Skip to content

Commit

Permalink
init power manager
Browse files Browse the repository at this point in the history
Signed-off-by: HermioneKT <[email protected]>
  • Loading branch information
KTCyber authored and HermioneKT committed Feb 17, 2024
1 parent 712925b commit d7350f1
Show file tree
Hide file tree
Showing 10 changed files with 615 additions and 0 deletions.
47 changes: 47 additions & 0 deletions docs/power_manager.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# K8s Power Manager

## Components
1. **PowerManager Controller**: ensures the actual state matches the desired state of the cluster.
2. **PowerConfig Controller**: sees the powerConfig created by user and deploys Power Node Agents onto each node specified using a DaemonSet.
- powerNodeSelector: A key/value map used to define a list of node labels that a node must satisfy for the operator's node
agent to be deployed.
- powerProfiles: The list of PowerProfiles that the user wants available on the nodes
3. **Power Node Agent**: containerized applications used to communicate with the node's Kubelet PodResources endpoint to discover the exact CPUs that
are allocated per container and tune frequency of the cores as requested


## Setup
### 1. Manual
#### on both nodes
git clone -b new_test --depth=1 https://github.com/vhive-serverless/vhive.git
cd vhive
mkdir -p /tmp/vhive-logs
./scripts/cloudlab/setup_node.sh stock-only > >(tee -a /tmp/vhive-logs/setup_node.stdout) 2> >(tee -a /tmp/vhive-logs/setup_node.stderr >&2)

#### for worker
./scripts/cluster/setup_worker_kubelet.sh stock-only > >(tee -a /tmp/vhive-logs/setup_worker_kubelet.stdout) 2> >(tee -a /tmp/vhive-logs/setup_worker_kubelet.stderr >&2)
sudo screen -dmS containerd bash -c "containerd > >(tee -a /tmp/vhive-logs/containerd.stdout) 2> >(tee -a /tmp/vhive-logs/containerd.stderr >&2)"

#### for master
sudo screen -dmS containerd bash -c "containerd > >(tee -a /tmp/vhive-logs/containerd.stdout) 2> >(tee -a /tmp/vhive-logs/containerd.stderr >&2)"
./scripts/cluster/create_multinode_cluster.sh stock-only > >(tee -a /tmp/vhive-logs/create_multinode_cluster.stdout) 2> >(tee -a /tmp/vhive-logs/create_multinode_cluster.stderr >&2)

join the cluster from worker, answer 'y' to master

## Setup
### 1. Manual

Execute the following below **as a non-root user with sudo rights** using **bash**:
1. On master node, run the node setup script:
```bash
./examples/powermanger/setup_power_manager.sh;
```
2. On worker node, run:
```bash
go run ./examples/powermanger/workload_sensitivity_exp.go
```

2. Clean Up
```bash
./scripts/github_runner/clean_cri_runner.sh
```
250 changes: 250 additions & 0 deletions go.work.sum

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions power_manager/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module github.com/vhive-serverless/vhive/examples/deployer

go 1.19

replace github.com/vhive-serverless/vhive/examples/deployer => ../deployer
Empty file added power_manager/go.sum
Empty file.
121 changes: 121 additions & 0 deletions power_manager/internode_scaling_exp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
package main

import (
"encoding/csv"
"fmt"
"os"
"os/exec"
"strconv"
"time"
)

var (
SpinningURL = "spinning-go.default.192.168.1.240.sslip.io"
SleepingURL = "sleeping-go.default.192.168.1.240.sslip.io"
AesURL = "aes-python.default.192.168.1.240.sslip.io"
AuthURL = "auth-python.default.192.168.1.240.sslip.io"
)

func setPowerProfileToNodes(freq1 int64, freq2 int64) error {
// powerConfig
command := fmt.Sprintf("kubectl apply -f - <<EOF\napiVersion: \"power.intel.com/v1\"\nkind: PowerConfig\nmetadata:\n name: power-config\n namespace: intel-power\nspec:\n powerNodeSelector:\n kubernetes.io/os: linux\n powerProfiles:\n - \"performance\"\nEOF")
cmd := exec.Command("bash", "-c", command)
_, err := cmd.CombinedOutput()
if err != nil {
return err
}

// performanceProfile w freq
command = fmt.Sprintf("kubectl apply -f - <<EOF\napiVersion: \"power.intel.com/v1\"\nkind: PowerProfile\nmetadata:\n name: performance-node1\n namespace: intel-power\nspec:\n name: \"performance-node1\"\n max: %d\n min: %d\n shared: true\n governor: \"performance\"\nEOF", freq1, freq1)
cmd = exec.Command("bash", "-c", command)
_, err = cmd.CombinedOutput()
if err != nil {
return err
}

command = fmt.Sprintf("kubectl apply -f - <<EOF\napiVersion: \"power.intel.com/v1\"\nkind: PowerProfile\nmetadata:\n name: performance-node2\n namespace: intel-power\nspec:\n name: \"performance-node2\"\n max: %d\n min: %d\n shared: true\n governor: \"performance\"\nEOF", freq2, freq2)
cmd = exec.Command("bash", "-c", command)
_, err = cmd.CombinedOutput()
if err != nil {
return err
}

// apply to node
command = fmt.Sprintf("kubectl apply -f - <<EOF\napiVersion: \"power.intel.com/v1\"\nkind: PowerWorkload\nmetadata:\n name: performance-node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us-workload\n namespace: intel-power\nspec:\n name: \"performance-node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us-workload\"\n allCores: true\n powerNodeSelector:\n kubernetes.io/hostname: node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us\n powerProfile: \"performance-node1\"\nEOF")
cmd = exec.Command("bash", "-c", command)
_, err = cmd.CombinedOutput()
if err != nil {
return err
}

command = fmt.Sprintf("kubectl apply -f - <<EOF\napiVersion: \"power.intel.com/v1\"\nkind: PowerWorkload\nmetadata:\n name: performance-node-2.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us-workload\n namespace: intel-power\nspec:\n name: \"performance-node-2.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us-workload\"\n allCores: true\n powerNodeSelector:\n kubernetes.io/hostname: node-2.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us\n powerProfile: \"performance-node2\"\nEOF")
cmd = exec.Command("bash", "-c", command)
_, err = cmd.CombinedOutput()
if err != nil {
return err
}
return nil
}

func invoke(n int, url string, writer *csv.Writer) {
for i := 0; i < n; i++ {
go func() {
command := fmt.Sprintf("cd $HOME/vSwarm/tools/test-client && ./test-client --addr %s:80 --name \"allow\"", url)
startInvoke := time.Now().UTC().UnixMilli()
cmd := exec.Command("bash", "-c", command)
_, err := cmd.CombinedOutput()
if err != nil {
fmt.Printf(fmt.Sprintf("ERR2: %+v", err))
return
}
endInvoke := time.Now().UTC().UnixMilli()
latency := endInvoke - startInvoke
err = writer.Write(append([]string{strconv.FormatInt(startInvoke, 10), strconv.FormatInt(endInvoke, 10), strconv.FormatInt(latency, 10)}))
if err != nil {
fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
}
}()
}
}

func main() {
file1, err := os.Create("metrics1.csv")
if err != nil {
panic(err)
}
defer file1.Close()
writer1 := csv.NewWriter(file1)
defer writer1.Flush()
err = writer1.Write(append([]string{"startTime", "endTime", "sleepingLatency"}))
if err != nil {
fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
}

file2, err := os.Create("metrics2.csv")
if err != nil {
panic(err)
}
defer file2.Close()
writer2 := csv.NewWriter(file2)
defer writer2.Flush()
err = writer2.Write(append([]string{"startTime", "endTime", "spinningLatency"}))
if err != nil {
fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
}

now := time.Now()
for time.Since(now) < (time.Minute * 2) {
go invoke(5, SleepingURL, writer1)
go invoke(5, SpinningURL, writer2)
}

err = writer1.Write(append([]string{"-", "-", "-"}))
if err != nil {
fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
}
err = writer2.Write(append([]string{"-", "-", "-"}))
if err != nil {
fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
}
fmt.Println("done")
}
14 changes: 14 additions & 0 deletions power_manager/powerconfig.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: "power.intel.com/v1"
kind: PowerConfig
metadata:
name: power-config
namespace: intel-power
spec:
# Add labels here for the Nodes you want the PowerNodeAgent to be applied to
powerNodeSelector:
kubernetes.io/os: linux
# Add wanted PowerProfiles here; valid entries are as follows: performance, balance-performance, balance-power
powerProfiles:
- "performance"


68 changes: 68 additions & 0 deletions power_manager/setup_power_manager.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/bash

# MIT License
#
# Copyright (c) 2020 Dmitrii Ustiugov, Plamen Petrov and EASE lab
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# Install K8 Power Manager
git clone https://github.com/intel/kubernetes-power-manager $HOME/kubernetes-power-manager

# Set up the necessary Namespace, Service Account, and RBAC rules for the Kubernetes Power Manager
kubectl apply -f $HOME/kubernetes-power-manager/config/rbac/namespace.yaml
kubectl apply -f $HOME/kubernetes-power-manager/config/rbac/rbac.yaml

# Install go1.20
sudo rm -rf /usr/local/go
sudo apt update
wget https://go.dev/dl/go1.20.2.linux-amd64.tar.gz
sudo tar -C /usr/local -xzf go1.20.2.linux-amd64.tar.gz
export GOROOT=/usr/local/go
export GOPATH=$HOME
export PATH=$GOPATH/bin:$GOROOT/bin:$PATH

# Install docker
curl -fsSL https://get.docker.com/ | sudo sh && \
sudo curl -L "https://github.com/docker/compose/releases/download/v2.12.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose && \
sudo mv /usr/local/bin/docker-compose /usr/bin/docker-compose && \
sudo chmod +x /usr/bin/docker-compose && \
export PATH=$PATH:/usr/local/go/bin && \
export PATH=$PATH:$HOME/go/bin

# Generate the CRD templates, create the Custom Resource Definitions, and install the CRDs and Built Docker images locally
cd $HOME/kubernetes-power-manager
make
sudo docker pull intel/power-operator
sudo docker pull intel/power-node-agent:latest

# Apply Power Manager Controller
kubectl apply -f $HOME/kubernetes-power-manager/config/manager/manager.yaml

# Apply PowerConfig -> create the power-node-agent DaemonSet that manages the Power Node Agent pods.
kubectl apply -f $HOME/vhive/power_manager/powerconfig.yaml

# Apply Profile. U can modify the spec in the shared-profile.yaml file
kubectl apply -f $HOME/vhive/power_manager/shared-profile.yaml

# Apply the shared PowerWorkload. Al CPUs (except reservedCPUs specified in this yaml file) will be tuned ti the specified frequency in shared-profile.yaml
kubectl apply -f $HOME/vhive/power_manager/shared-workload.yaml

kubectl get powerprofiles -n intel-power
kubectl get powerworkloads -n intel-power
11 changes: 11 additions & 0 deletions power_manager/shared-profile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: power.intel.com/v1
kind: PowerProfile
metadata:
name: shared
namespace: intel-power
spec:
name: "shared"
max: 1200
min: 1200
shared: true
governor: "powersave"
11 changes: 11 additions & 0 deletions power_manager/shared-workload.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: "power.intel.com/v1"
kind: PowerWorkload
metadata:
name: shared-node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us-workload
namespace: intel-power
spec:
name: "shared-node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us-workload"
allCores: true
powerNodeSelector:
kubernetes.io/hostname: node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us
powerProfile: "shared"
88 changes: 88 additions & 0 deletions power_manager/workload_sensitivity_exp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package main

import (
"encoding/csv"
"fmt"
"os"
"os/exec"
"strconv"
"time"
)

func setPowerProfileToNode(freq int64) error {
// powerConfig
command := fmt.Sprintf("kubectl apply -f - <<EOF\napiVersion: \"power.intel.com/v1\"\nkind: PowerConfig\nmetadata:\n name: power-config\n namespace: intel-power\nspec:\n powerNodeSelector:\n kubernetes.io/hostname: node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us\n powerProfiles:\n - \"performance\"\nEOF")
cmd := exec.Command("bash", "-c", command)
_, err := cmd.CombinedOutput()
if err != nil {
return err
}

// performanceProfile w freq
command = fmt.Sprintf("kubectl apply -f - <<EOF\napiVersion: \"power.intel.com/v1\"\nkind: PowerProfile\nmetadata:\n name: performance\n namespace: intel-power\nspec:\n name: \"performance\"\n max: %d\n min: %d\n shared: true\n governor: \"performance\"\nEOF", freq, freq)
cmd = exec.Command("bash", "-c", command)

_, err = cmd.CombinedOutput()
if err != nil {
return err
}

// apply to node
command = fmt.Sprintf("kubectl apply -f - <<EOF\napiVersion: \"power.intel.com/v1\"\nkind: PowerWorkload\nmetadata:\n name: performance-node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us-workload\n namespace: intel-power\nspec:\n name: \"performance-node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us-workload\"\n allCores: true\n powerNodeSelector:\n kubernetes.io/hostname: node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us\n powerProfile: \"performance\"\nEOF")
cmd = exec.Command("bash", "-c", command)

_, err = cmd.CombinedOutput()
if err != nil {
return err
}
return nil
}

func main() {
file, err := os.Create("metrics.csv")
if err != nil {
panic(err)
}
defer file.Close()

writer := csv.NewWriter(file)
defer writer.Flush()

err = writer.Write(append([]string{"startTime", "endTime", "latency"}))
if err != nil {
fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
}

frequencies := []int64{1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600}
for i := 0; i < len(frequencies); i++ {
err := setPowerProfileToNode(frequencies[i])
if err != nil {
fmt.Printf(fmt.Sprintf("ERR1 :%+v", err))
}

for j := 0; j < 1000; j++ {
url := "sleeping-go.default.192.168.1.240.sslip.io"
command := fmt.Sprintf("cd $HOME/vSwarm/tools/test-client && ./test-client --addr %s:80 --name \"allow\"", url)

startInvoke := time.Now().UTC().UnixMilli()
cmd := exec.Command("bash", "-c", command)
_, err := cmd.CombinedOutput()
if err != nil {
fmt.Printf(fmt.Sprintf("ERR2: %+v", err))
return
}
endInvoke := time.Now().UTC().UnixMilli()
latency := endInvoke - startInvoke
err = writer.Write(append([]string{strconv.FormatInt(startInvoke, 10), strconv.FormatInt(endInvoke, 10), strconv.FormatInt(latency, 10)}))
if err != nil {
fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
}
}

err = writer.Write(append([]string{"-", "-", "-"}))
if err != nil {
fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
}
fmt.Println("done")
}
}

0 comments on commit d7350f1

Please sign in to comment.