Skip to content

Commit

Permalink
resolve merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
HermioneKT committed Feb 17, 2024
1 parent 2ca1268 commit 649e5cb
Show file tree
Hide file tree
Showing 14 changed files with 332 additions and 72 deletions.
47 changes: 47 additions & 0 deletions docs/power_manager.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# K8s Power Manager

## Components
1. **PowerManager Controller**: ensures the actual state matches the desired state of the cluster.
2. **PowerConfig Controller**: sees the powerConfig created by user and deploys Power Node Agents onto each node specified using a DaemonSet.
- powerNodeSelector: A key/value map used to define a list of node labels that a node must satisfy for the operator's node
agent to be deployed.
- powerProfiles: The list of PowerProfiles that the user wants available on the nodes
3. **Power Node Agent**: containerized applications used to communicate with the node's Kubelet PodResources endpoint to discover the exact CPUs that
are allocated per container and tune frequency of the cores as requested


## Setup
### 1. Manual
#### on both nodes
git clone -b new_test --depth=1 https://github.com/vhive-serverless/vhive.git
cd vhive
mkdir -p /tmp/vhive-logs
./scripts/cloudlab/setup_node.sh stock-only > >(tee -a /tmp/vhive-logs/setup_node.stdout) 2> >(tee -a /tmp/vhive-logs/setup_node.stderr >&2)

#### for worker
./scripts/cluster/setup_worker_kubelet.sh stock-only > >(tee -a /tmp/vhive-logs/setup_worker_kubelet.stdout) 2> >(tee -a /tmp/vhive-logs/setup_worker_kubelet.stderr >&2)
sudo screen -dmS containerd bash -c "containerd > >(tee -a /tmp/vhive-logs/containerd.stdout) 2> >(tee -a /tmp/vhive-logs/containerd.stderr >&2)"

#### for master
sudo screen -dmS containerd bash -c "containerd > >(tee -a /tmp/vhive-logs/containerd.stdout) 2> >(tee -a /tmp/vhive-logs/containerd.stderr >&2)"
./scripts/cluster/create_multinode_cluster.sh stock-only > >(tee -a /tmp/vhive-logs/create_multinode_cluster.stdout) 2> >(tee -a /tmp/vhive-logs/create_multinode_cluster.stderr >&2)

join the cluster from worker, answer 'y' to master

## Setup
### 1. Manual

Execute the following below **as a non-root user with sudo rights** using **bash**:
1. On master node, run the node setup script:
```bash
./examples/powermanger/setup_power_manager.sh;
```
2. On worker node, run:
```bash
go run ./examples/powermanger/workload_sensitivity_exp.go
```

2. Clean Up
```bash
./scripts/github_runner/clean_cri_runner.sh
```
10 changes: 10 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,23 @@ require (
github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5
github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f
github.com/wcharczuk/go-chart v2.0.1+incompatible
<<<<<<< HEAD
golang.org/x/net v0.20.0
golang.org/x/sync v0.6.0
golang.org/x/sys v0.16.0
gonum.org/v1/gonum v0.14.0
gonum.org/v1/plot v0.14.0
google.golang.org/grpc v1.47.0
k8s.io/cri-api v0.25.0
=======
golang.org/x/net v0.6.0
golang.org/x/sync v0.1.0
golang.org/x/sys v0.11.0
gonum.org/v1/gonum v0.13.0
gonum.org/v1/plot v0.13.0
google.golang.org/grpc v1.34.0
k8s.io/cri-api v0.20.6
>>>>>>> 2b0f5d9 (refactor comments)
)

require (
Expand Down
5 changes: 5 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1324,8 +1324,13 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
<<<<<<< HEAD
golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
=======
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
>>>>>>> 2b0f5d9 (refactor comments)
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
Expand Down
5 changes: 5 additions & 0 deletions power_manager/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module github.com/vhive-serverless/vhive/examples/deployer

go 1.19

replace github.com/vhive-serverless/vhive/examples/deployer => ../deployer
Empty file added power_manager/go.sum
Empty file.
121 changes: 121 additions & 0 deletions power_manager/internode_scaling_exp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
package main

import (
"encoding/csv"
"fmt"
"os"
"os/exec"
"strconv"
"time"
)

var (
SpinningURL = "spinning-go.default.192.168.1.240.sslip.io"
SleepingURL = "sleeping-go.default.192.168.1.240.sslip.io"
AesURL = "aes-python.default.192.168.1.240.sslip.io"
AuthURL = "auth-python.default.192.168.1.240.sslip.io"
)

func setPowerProfileToNodes(freq1 int64, freq2 int64) error {
// powerConfig
command := fmt.Sprintf("kubectl apply -f - <<EOF\napiVersion: \"power.intel.com/v1\"\nkind: PowerConfig\nmetadata:\n name: power-config\n namespace: intel-power\nspec:\n powerNodeSelector:\n kubernetes.io/os: linux\n powerProfiles:\n - \"performance\"\nEOF")
cmd := exec.Command("bash", "-c", command)
_, err := cmd.CombinedOutput()
if err != nil {
return err
}

// performanceProfile w freq
command = fmt.Sprintf("kubectl apply -f - <<EOF\napiVersion: \"power.intel.com/v1\"\nkind: PowerProfile\nmetadata:\n name: performance-node1\n namespace: intel-power\nspec:\n name: \"performance-node1\"\n max: %d\n min: %d\n shared: true\n governor: \"performance\"\nEOF", freq1, freq1)
cmd = exec.Command("bash", "-c", command)
_, err = cmd.CombinedOutput()
if err != nil {
return err
}

command = fmt.Sprintf("kubectl apply -f - <<EOF\napiVersion: \"power.intel.com/v1\"\nkind: PowerProfile\nmetadata:\n name: performance-node2\n namespace: intel-power\nspec:\n name: \"performance-node2\"\n max: %d\n min: %d\n shared: true\n governor: \"performance\"\nEOF", freq2, freq2)
cmd = exec.Command("bash", "-c", command)
_, err = cmd.CombinedOutput()
if err != nil {
return err
}

// apply to node
command = fmt.Sprintf("kubectl apply -f - <<EOF\napiVersion: \"power.intel.com/v1\"\nkind: PowerWorkload\nmetadata:\n name: performance-node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us-workload\n namespace: intel-power\nspec:\n name: \"performance-node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us-workload\"\n allCores: true\n powerNodeSelector:\n kubernetes.io/hostname: node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us\n powerProfile: \"performance-node1\"\nEOF")
cmd = exec.Command("bash", "-c", command)
_, err = cmd.CombinedOutput()
if err != nil {
return err
}

command = fmt.Sprintf("kubectl apply -f - <<EOF\napiVersion: \"power.intel.com/v1\"\nkind: PowerWorkload\nmetadata:\n name: performance-node-2.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us-workload\n namespace: intel-power\nspec:\n name: \"performance-node-2.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us-workload\"\n allCores: true\n powerNodeSelector:\n kubernetes.io/hostname: node-2.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us\n powerProfile: \"performance-node2\"\nEOF")
cmd = exec.Command("bash", "-c", command)
_, err = cmd.CombinedOutput()
if err != nil {
return err
}
return nil
}

func invoke(n int, url string, writer *csv.Writer) {
for i := 0; i < n; i++ {
go func() {
command := fmt.Sprintf("cd $HOME/vSwarm/tools/test-client && ./test-client --addr %s:80 --name \"allow\"", url)
startInvoke := time.Now().UTC().UnixMilli()
cmd := exec.Command("bash", "-c", command)
_, err := cmd.CombinedOutput()
if err != nil {
fmt.Printf(fmt.Sprintf("ERR2: %+v", err))
return
}
endInvoke := time.Now().UTC().UnixMilli()
latency := endInvoke - startInvoke
err = writer.Write(append([]string{strconv.FormatInt(startInvoke, 10), strconv.FormatInt(endInvoke, 10), strconv.FormatInt(latency, 10)}))
if err != nil {
fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
}
}()
}
}

func main() {
file1, err := os.Create("metrics1.csv")
if err != nil {
panic(err)
}
defer file1.Close()
writer1 := csv.NewWriter(file1)
defer writer1.Flush()
err = writer1.Write(append([]string{"startTime", "endTime", "sleepingLatency"}))
if err != nil {
fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
}

file2, err := os.Create("metrics2.csv")
if err != nil {
panic(err)
}
defer file2.Close()
writer2 := csv.NewWriter(file2)
defer writer2.Flush()
err = writer2.Write(append([]string{"startTime", "endTime", "spinningLatency"}))
if err != nil {
fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
}

now := time.Now()
for time.Since(now) < (time.Minute * 2) {
go invoke(5, SleepingURL, writer1)
go invoke(5, SpinningURL, writer2)
}

err = writer1.Write(append([]string{"-", "-", "-"}))
if err != nil {
fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
}
err = writer2.Write(append([]string{"-", "-", "-"}))
if err != nil {
fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
}
fmt.Println("done")
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ apiVersion: "power.intel.com/v1"
kind: PowerConfig
metadata:
name: power-config
namespace: intel-power
spec:
# Add labels here for the Nodes you want the PowerNodeAgent to be applied to
powerNodeSelector:
feature.node.kubernetes.io/power-node: "true"
kubernetes.io/os: linux
# Add wanted PowerProfiles here; valid entries are as follows: performance, balance-performance, balance-power
powerProfiles:
- "performance"
- "performance"


48 changes: 29 additions & 19 deletions scripts/power_manager/setup_power_manager.sh → power_manager/setup_power_manager.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -22,37 +22,47 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

PWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

# Install K8 Power Manager
git clone https://github.com/intel/kubernetes-power-manager $HOME/kubernetes-power-manager
cd $HOME/kubernetes-power-manager

# Set up the necessary Namespace, Service Account, and RBAC rules for the Kubernetes Power Manager
kubectl apply -f config/rbac/namespace.yaml
kubectl apply -f config/rbac/rbac.yaml
kubectl apply -f $HOME/kubernetes-power-manager/config/rbac/namespace.yaml
kubectl apply -f $HOME/kubernetes-power-manager/config/rbac/rbac.yaml

# Generate the CRD templates, create the Custom Resource Definitions, and install the CRDs
make
# Install go1.20
sudo rm -rf /usr/local/go
sudo apt update
wget https://go.dev/dl/go1.20.2.linux-amd64.tar.gz
sudo tar -C /usr/local -xzf go1.20.2.linux-amd64.tar.gz
export GOROOT=/usr/local/go
export GOPATH=$HOME
export PATH=$GOPATH/bin:$GOROOT/bin:$PATH

# Built Docker images locally
make images
# Install docker
curl -fsSL https://get.docker.com/ | sudo sh && \
sudo curl -L "https://github.com/docker/compose/releases/download/v2.12.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose && \
sudo mv /usr/local/bin/docker-compose /usr/bin/docker-compose && \
sudo chmod +x /usr/bin/docker-compose && \
export PATH=$PATH:/usr/local/go/bin && \
export PATH=$PATH:$HOME/go/bin

# Generate the CRD templates, create the Custom Resource Definitions, and install the CRDs and Built Docker images locally
cd $HOME/kubernetes-power-manager
make
sudo docker pull intel/power-operator
sudo docker pull intel/power-node-agent:latest

# Apply Power Manager Controller
kubectl apply -f config/manager/manager.yaml
kubectl apply -f $HOME/kubernetes-power-manager/config/manager/manager.yaml

# Apply PowerConfig -> create the power-node-agent DaemonSet that manages the Power Node Agent pods.
kubectl apply -f ${PWD}/powerconfig.yaml
kubectl apply -f $HOME/vhive/power_manager/powerconfig.yaml

# Apply Profile. U can modify the spec in the shared-profile.yaml file
kubectl apply -f ${PWD}/shared-profile.yaml
kubectl apply -f $HOME/vhive/power_manager/shared-profile.yaml

# Apply the shared PowerWorkload. All CPUs (except reservedCPUs specified in this yaml file) will be tuned ti the specified frequency in shared-profile.yaml
kubectl apply -f ${PWD}/shared-workload.yaml
# Apply the shared PowerWorkload. Al CPUs (except reservedCPUs specified in this yaml file) will be tuned ti the specified frequency in shared-profile.yaml
kubectl apply -f $HOME/vhive/power_manager/shared-workload.yaml

kubectl get powerprofiles -n intel-power
kubectl get powerworkloads -n intel-power




kubectl get powerworkloads -n intel-power
11 changes: 11 additions & 0 deletions power_manager/shared-profile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: power.intel.com/v1
kind: PowerProfile
metadata:
name: shared
namespace: intel-power
spec:
name: "shared"
max: 1200
min: 1200
shared: true
governor: "powersave"
11 changes: 11 additions & 0 deletions power_manager/shared-workload.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: "power.intel.com/v1"
kind: PowerWorkload
metadata:
name: shared-node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us-workload
namespace: intel-power
spec:
name: "shared-node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us-workload"
allCores: true
powerNodeSelector:
kubernetes.io/hostname: node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us
powerProfile: "shared"
Loading

0 comments on commit 649e5cb

Please sign in to comment.