-
Notifications
You must be signed in to change notification settings - Fork 51
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
smt_via_label via machine config #47
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,3 +28,6 @@ chronyconfig: | |
options: iburst | ||
- server: ntp2.example.com | ||
options: iburst | ||
|
||
powervm_rmc: true | ||
smt_control: false |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,4 +5,4 @@ workdir: ~/ocp4-workdir | |
rhcos_kernel_options: [] | ||
sysctl_tuned_options: false | ||
powervm_rmc: true | ||
|
||
smt_control: false |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#!/bin/bash | ||
export PATH=/root/.local/bin:/root/bin:/sbin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin | ||
export KUBECONFIG=/var/lib/kubelet/kubeconfig | ||
COREPS=$(/bin/lscpu | /bin/awk -F: ' $1 ~ /^Core\(s\) per socket$/ {print $2}'|/bin/xargs) | ||
SOCKETS=$(/bin/lscpu | /bin/awk -F: ' $1 ~ /^Socket\(s\)$/ {print $2}'|/bin/xargs) | ||
let TOTALCORES=$COREPS*$SOCKETS | ||
MAXTHREADS=$(/bin/lscpu | /bin/awk -F: ' $1 ~ /^CPU\(s\)$/ {print $2}'|/bin/xargs) | ||
let MAXSMT=$MAXTHREADS/$TOTALCORES | ||
CURRENTSMT=$(/bin/lscpu | /bin/awk -F: ' $1 ~ /^Thread\(s\) per core$/ {print $2}'|/bin/xargs) | ||
|
||
while : | ||
do | ||
SMTLABEL=$(/bin/oc get node $HOSTNAME -L SMT --no-headers |/bin/awk '{print $6}') | ||
if [[ -n $SMTLABEL ]] | ||
then | ||
case $SMTLABEL in | ||
1) TARGETSMT=1 | ||
;; | ||
2) TARGETSMT=2 | ||
;; | ||
4) TARGETSMT=4 | ||
;; | ||
8) TARGETSMT=8 | ||
;; | ||
*) TARGETSMT=$CURRENTSMT ; echo "SMT value must be 1, 2, 4, or 8 and smaller than Maximum SMT." | ||
;; | ||
esac | ||
else | ||
TARGETSMT=$MAXSMT | ||
fi | ||
|
||
CURRENTSMT=$(/bin/lscpu | /bin/awk -F: ' $1 ~ /^Thread\(s\) per core$/ {print $2}'|/bin/xargs) | ||
|
||
if [[ $CURRENTSMT -ne $TARGETSMT ]] | ||
then | ||
INITONTHREAD=0 | ||
INITOFFTHREAD=$TARGETSMT | ||
if [[ $MAXSMT -ge $TARGETSMT ]] | ||
then | ||
while [[ $INITONTHREAD -lt $MAXTHREADS ]] | ||
do | ||
ONTHREAD=$INITONTHREAD | ||
OFFTHREAD=$INITOFFTHREAD | ||
|
||
while [[ $ONTHREAD -lt $OFFTHREAD ]] | ||
do | ||
/bin/echo 1 > /sys/devices/system/cpu/cpu$ONTHREAD/online | ||
let ONTHREAD=$ONTHREAD+1 | ||
done | ||
let INITONTHREAD=$INITONTHREAD+$MAXSMT | ||
while [[ $OFFTHREAD -lt $INITONTHREAD ]] | ||
do | ||
/bin/echo 0 > /sys/devices/system/cpu/cpu$OFFTHREAD/online | ||
let OFFTHREAD=$OFFTHREAD+1 | ||
done | ||
let INITOFFTHREAD=$INITOFFTHREAD+$MAXSMT | ||
done | ||
systemctl restart kubelet | ||
else | ||
echo "Target SMT must be smaller or equal than Maximum SMT supported" | ||
fi | ||
fi | ||
/bin/sleep 30 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not needed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is not needed ? |
||
done |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
[Unit] | ||
Description=SMT control via k8s SMT label | ||
|
||
[Service] | ||
ExecStart=/usr/local/bin/powersmt | ||
|
||
[Install] | ||
WantedBy=multi-user.target |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,11 @@ | |
when: powervm_rmc | ||
import_tasks: powervm_rmc.yaml | ||
|
||
- name: Allow SMT configuration via node label (SMT=X) | ||
when: ansible_architecture == "ppc64le" | ||
import_tasks: smt_configuration.yaml | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's make this task optional. I'm not very comfortable fiddling with SMT settings and would like to keep this optional and run through some iterations of e2e tests to verify stability. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you mean adding a variable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes and default to false There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done in my last commit |
||
when: smt_control | ||
|
||
always: | ||
- name: UnPause reboot node (Machineconfig) | ||
k8s: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
- name: Copy files to bastion | ||
copy: | ||
src: "{{ item }}" | ||
dest: "/tmp" | ||
loop: | ||
- powersmt | ||
- powersmt_service | ||
|
||
- name: slurp powersmt to base64 | ||
slurp: | ||
src: "/tmp/powersmt" | ||
register: powersmt_b64 | ||
|
||
- name: slurp powersmt_service to base64 | ||
slurp: | ||
src: "/tmp/powersmt_service" | ||
register: powersmt_service_b64 | ||
|
||
- name: Create SMT via Label options CRDs | ||
vars: | ||
role: '{{ item }}' | ||
template: | ||
src: 90-smt-via-label.yaml | ||
dest: "{{ workdir }}/90-{{ item }}-smt-via-label.yaml" | ||
with_items: | ||
- master | ||
- worker | ||
|
||
- name: Apply the MachineConfig CRDs | ||
shell: "oc apply -f {{ workdir }}/90-{{ item }}-smt-via-label.yaml" | ||
with_items: | ||
- master | ||
- worker | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
apiVersion: machineconfiguration.openshift.io/v1 | ||
kind: MachineConfig | ||
metadata: | ||
labels: | ||
machineconfiguration.openshift.io/role: '{{ role }}' | ||
name: 90-{{ role }}-smt | ||
spec: | ||
config: | ||
ignition: | ||
version: 2.2.0 | ||
storage: | ||
files: | ||
- path: /usr/local/bin/powersmt | ||
overwrite: true | ||
mode: 0700 | ||
filesystem: root | ||
contents: | ||
source: data:text/plain;charset=utf-8;base64,{{ powersmt_b64.content }} | ||
- path: /etc/systemd/system/powersmt.service | ||
overwrite: true | ||
mode: 0644 | ||
filesystem: root | ||
contents: | ||
source: data:text/plain;charset=utf-8;base64,{{ powersmt_service_b64.content }} | ||
systemd: | ||
units: | ||
- name: powersmt.service | ||
enabled: true |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since we are rebooting the node, we should avoid restarting kubelet
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Btw, is this setting persistent when done in this way?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes it is persistent as it seems to start before kubelet.
Do you think it is really dangerous to restart kubelet ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure how MCO will behave if kubelet is restarted. Additionally since anyway node reboot will happen post application of the machine config then why to restart kubelet.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
MCO deploy a systemd service which start a script that monitor every 30s the status of the SMT label for the current node.
If this label is set (or changed) the script change the SMT mode on the current node and restart kubelet to update the CPU resource count without rebooting the node (which is REALLY long on powerVM).
So the main idea is to avoid a reboot when changing mode. (and because I’m 90% sure that users will always forget to reboot the node to refresh the kubelet)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is kubelet, there is crio and then there is impact to the containers that are already running. Some scenarios that I can think of and have no idea how will it get handled if kubelet is restarted.
I agree reboot will be longer process. However according to me the safest way to set the SMT value during install time (as a
oneshot
service) via machineconfig and letting MCO take care of the reboot.Let's think through this and get some datapoints. The base code as part of this PR is good start, it's just finding the right way and agreeing to it.