diff --git a/Dockerfile b/Dockerfile index 26fb6345..7ed8cb61 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Build the manager binary -FROM golang:1.20 as builder +FROM golang:1.20 AS builder WORKDIR /workspace diff --git a/api/v1alpha2/minicluster_types.go b/api/v1alpha2/minicluster_types.go index dee7c59a..547811d0 100644 --- a/api/v1alpha2/minicluster_types.go +++ b/api/v1alpha2/minicluster_types.go @@ -188,6 +188,10 @@ type PodSpec struct { // +optional ServiceAccountName string `json:"serviceAccountName,omitempty"` + // RuntimeClassName for the pod + // +optional + RuntimeClassName string `json:"runtimeClassName,omitempty"` + // Automatically mount the service account name // +optional AutomountServiceAccountToken bool `json:"automountServiceAccountToken,omitempty"` diff --git a/api/v1alpha2/swagger.json b/api/v1alpha2/swagger.json index a4e931c9..2593817f 100644 --- a/api/v1alpha2/swagger.json +++ b/api/v1alpha2/swagger.json @@ -775,6 +775,10 @@ "description": "Restart Policy", "type": "string" }, + "runtimeClassName": { + "description": "RuntimeClassName for the pod", + "type": "string" + }, "schedulerName": { "description": "Scheduler name for the pod", "type": "string" diff --git a/api/v1alpha2/zz_generated.openapi.go b/api/v1alpha2/zz_generated.openapi.go index e8d7a275..5d151a7e 100644 --- a/api/v1alpha2/zz_generated.openapi.go +++ b/api/v1alpha2/zz_generated.openapi.go @@ -1349,6 +1349,13 @@ func schema_flux_framework_flux_operator_api_v1alpha2_PodSpec(ref common.Referen Format: "", }, }, + "runtimeClassName": { + SchemaProps: spec.SchemaProps{ + Description: "RuntimeClassName for the pod", + Type: []string{"string"}, + Format: "", + }, + }, "automountServiceAccountToken": { SchemaProps: spec.SchemaProps{ Description: "Automatically mount the service account name", diff --git a/chart/templates/minicluster-crd.yaml b/chart/templates/minicluster-crd.yaml index f4d6c966..619d67e5 100644 --- a/chart/templates/minicluster-crd.yaml +++ b/chart/templates/minicluster-crd.yaml @@ -555,6 +555,9 @@ spec: restartPolicy: description: Restart Policy type: string + runtimeClassName: + description: RuntimeClassName for the pod + type: string schedulerName: description: Scheduler name for the pod type: string diff --git a/config/crd/bases/flux-framework.org_miniclusters.yaml b/config/crd/bases/flux-framework.org_miniclusters.yaml index 7e8c1db7..6fb2dea2 100644 --- a/config/crd/bases/flux-framework.org_miniclusters.yaml +++ b/config/crd/bases/flux-framework.org_miniclusters.yaml @@ -558,6 +558,9 @@ spec: restartPolicy: description: Restart Policy type: string + runtimeClassName: + description: RuntimeClassName for the pod + type: string schedulerName: description: Scheduler name for the pod type: string diff --git a/controllers/flux/job.go b/controllers/flux/job.go index 6be186d4..6c48c730 100644 --- a/controllers/flux/job.go +++ b/controllers/flux/job.go @@ -45,8 +45,8 @@ func NewMiniClusterJob(cluster *api.MiniCluster) (*batchv1.Job, error) { Labels: cluster.Spec.JobLabels, }, + // Completions must be == to Parallelism to allow for scaling Spec: batchv1.JobSpec{ - BackoffLimit: &backoffLimit, Completions: &cluster.Spec.Size, Parallelism: &cluster.Spec.Size, @@ -70,13 +70,22 @@ func NewMiniClusterJob(cluster *api.MiniCluster) (*batchv1.Job, error) { ImagePullSecrets: getImagePullSecrets(cluster), ServiceAccountName: cluster.Spec.Pod.ServiceAccountName, AutomountServiceAccountToken: &cluster.Spec.Pod.AutomountServiceAccountToken, - RestartPolicy: corev1.RestartPolicy(cluster.Spec.Pod.RestartPolicy), + RestartPolicy: corev1.RestartPolicyOnFailure, NodeSelector: cluster.Spec.Pod.NodeSelector, SchedulerName: cluster.Spec.Pod.SchedulerName, }, }, }, } + // Custom restart policy + if cluster.Spec.Pod.RestartPolicy != "" { + job.Spec.Template.Spec.RestartPolicy = corev1.RestartPolicy(cluster.Spec.Pod.RestartPolicy) + } + + // Only add runClassName if defined + if cluster.Spec.Pod.RuntimeClassName != "" { + job.Spec.Template.Spec.RuntimeClassName = &cluster.Spec.Pod.RuntimeClassName + } // Add Affinity to map one pod / node only if the user hasn't disbaled it if !cluster.Spec.Network.DisableAffinity { diff --git a/controllers/flux/pods.go b/controllers/flux/pods.go index 9a4649bf..4a0ff9d3 100644 --- a/controllers/flux/pods.go +++ b/controllers/flux/pods.go @@ -135,13 +135,23 @@ func (r *MiniClusterReconciler) newServicePod( SetHostnameAsFQDN: &setAsFQDN, Volumes: existingVolumes, ImagePullSecrets: getImagePullSecrets(cluster), - RestartPolicy: corev1.RestartPolicy(cluster.Spec.Pod.RestartPolicy), + RestartPolicy: corev1.RestartPolicyOnFailure, ServiceAccountName: cluster.Spec.Pod.ServiceAccountName, AutomountServiceAccountToken: &cluster.Spec.Pod.AutomountServiceAccountToken, NodeSelector: cluster.Spec.Pod.NodeSelector, }, } + // Custom restart policy + if cluster.Spec.Pod.RestartPolicy != "" { + pod.Spec.RestartPolicy = corev1.RestartPolicy(cluster.Spec.Pod.RestartPolicy) + } + + // Only add runClassName if defined + if cluster.Spec.Pod.RuntimeClassName != "" { + pod.Spec.RuntimeClassName = &cluster.Spec.Pod.RuntimeClassName + } + // Assemble existing volume mounts - they are added with getContainers mounts := []corev1.VolumeMount{} diff --git a/docs/getting_started/custom-resource-definition.md b/docs/getting_started/custom-resource-definition.md index 6466625f..e50842bf 100644 --- a/docs/getting_started/custom-resource-definition.md +++ b/docs/getting_started/custom-resource-definition.md @@ -188,8 +188,11 @@ When enabled, meaning that we use flux from a view within the container, these c - [ghcr.io/converged-computing/flux-view-rocky:tag-9](https://github.com/converged-computing/flux-views/pkgs/container/flux-view-rocky) - [ghcr.io/converged-computing/flux-view-rocky:tag-8](https://github.com/converged-computing/flux-views/pkgs/container/flux-view-rocky) + - [ghcr.io/converged-computing/flux-view-ubuntu:tag-noble](https://github.com/converged-computing/flux-views/pkgs/container/flux-view-ubuntu) + - [ghcr.io/converged-computing/flux-view-ubuntu:tag-jammy](https://github.com/converged-computing/flux-views/pkgs/container/flux-view-ubuntu) - [ghcr.io/converged-computing/flux-view-ubuntu:tag-focal](https://github.com/converged-computing/flux-views/pkgs/container/flux-view-ubuntu) + Note that we have [arm builds](https://github.com/converged-computing/flux-views/tree/main/arm) available for each of rocky and ubuntu as well. If you don't want to use Flux from a view (and want to use the v1apha1 design of the Flux Operator that had the application alongside Flux) you can do that by way of disabling the flux view: @@ -682,6 +685,34 @@ pod: serviceAccountName: my-service-account ``` +#### restartPolicy + +To customize the restartPolicy for the pod: + +```yaml +pod: + restartPolicy: Never +``` + +#### runtimeClassName + +To add a runtime class name: + +```yaml +pod: + runtimeClassName: nvidia +``` + +#### automountServiceAccountToken + +If you want to automatically mount a service account token: + +```yaml +pod: + automountServiceAccountToken: true +``` + + #### nodeSelector A node selector is a set of key value pairs that helps to schedule pods to the right nodes! You can @@ -720,10 +751,8 @@ name: rabbit #### image -This is the only required attribute! You *must* provide a container base that has Flux. -The requirements of your container are defined in the README of the [flux-hpc](https://github.com/rse-ops/flux-hpc/) -repository. Generally speaking, you need to have Flux executables, Flux Python bindings, -and your own executables on the path, and should be started with root with a flux user. +You do not need to provide a container base that has Flux, but you must make sure your view (with a particular operator system) that will add Flux matches your container. We don't require you to start as root, but if you +have a container with a non-root user, the user needs to have sudo available (to act as root). If you use the [fluxrm/flux-sched](https://hub.docker.com/r/fluxrm/flux-sched) base containers this is usually a good start. diff --git a/examples/dist/flux-operator-arm.yaml b/examples/dist/flux-operator-arm.yaml index e8862a65..0090fdc8 100644 --- a/examples/dist/flux-operator-arm.yaml +++ b/examples/dist/flux-operator-arm.yaml @@ -564,6 +564,9 @@ spec: restartPolicy: description: Restart Policy type: string + runtimeClassName: + description: RuntimeClassName for the pod + type: string schedulerName: description: Scheduler name for the pod type: string diff --git a/examples/dist/flux-operator.yaml b/examples/dist/flux-operator.yaml index 3436899b..be2f791a 100644 --- a/examples/dist/flux-operator.yaml +++ b/examples/dist/flux-operator.yaml @@ -564,6 +564,9 @@ spec: restartPolicy: description: Restart Policy type: string + runtimeClassName: + description: RuntimeClassName for the pod + type: string schedulerName: description: Scheduler name for the pod type: string diff --git a/pkg/flux/templates/components.sh b/pkg/flux/templates/components.sh index c246d285..edae355c 100644 --- a/pkg/flux/templates/components.sh +++ b/pkg/flux/templates/components.sh @@ -21,7 +21,7 @@ url=$goshareUrl/wait-fs-{{ .Spec.Flux.Arch }} # This waiting script is intended to wait for the flux view, and then start running curl -L -O -s -o ./wait-fs -s ${url} {{ if .Spec.Logging.Quiet }}> /dev/null 2>&1{{ end }} || wget ${url} -q -O ./wait-fs {{ if .Spec.Logging.Quiet }}> /dev/null 2>&1{{ end }} || true chmod +x ./wait-fs || true -mv ./wait-fs /usr/bin/goshare-wait-fs || true +${SUDO} mv ./wait-fs /usr/bin/goshare-wait-fs || true # Ensure spack view is on the path, wherever it is mounted viewbase="{{ .ViewBase }}" @@ -47,9 +47,9 @@ goshare-wait-fs -p ${viewbase}/flux-operator-done.txt {{ if .Spec.Logging.Quiet # Copy mount software to /opt/software # If /opt/software already exists, we need to copy into it if [[ -e "/opt/software" ]]; then - cp -R ${viewbase}/software/* /opt/software/ || true + ${SUDO} cp -R ${viewbase}/software/* /opt/software/ || true else - cp -R ${viewbase}/software /opt/software || true + ${SUDO} cp -R ${viewbase}/software /opt/software || true fi {{end}} @@ -72,10 +72,10 @@ echo "Python root: $foundroot" {{ if .Spec.Logging.Quiet }} > /dev/null 2>&1{{ e # If we found the right python, ensure it's linked (old link does not work) if [[ -f "${pythonversion}" ]]; then - rm -rf $viewroot/bin/python3 - rm -rf $viewroot/bin/python - ln -s ${pythonversion} $viewroot/lib/python || true - ln -s ${pythonversion} $viewroot/lib/python3 || true + ${SUDO} rm -rf $viewroot/bin/python3 + ${SUDO} rm -rf $viewroot/bin/python + ${SUDO} ln -s ${pythonversion} $viewroot/lib/python || true + ${SUDO} ln -s ${pythonversion} $viewroot/lib/python3 || true fi # Ensure we use flux's python (TODO update this to use variable) @@ -87,15 +87,16 @@ find $viewroot . -name libpython*.so* {{ if .Spec.Logging.Quiet }}> /dev/null 2> ls -l /mnt/flux/view/lib/libpython3.11.so.1.0 {{ if .Spec.Logging.Quiet }}> /dev/null 2>&1{{ end }} # Write an easy file we can source for the environment -cat <> ${viewbase}/flux-view.sh +cat <> ./flux-view.sh #!/bin/bash export PATH=$PATH export PYTHONPATH=$PYTHONPATH export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$viewroot/lib export fluxsocket=local://${viewroot}/run/flux/local EOT +${SUDO} mv ./flux-view.sh ${viewbase}/flux-view.sh {{end}} {{define "ensure-pip"}} -${pythonversion} -m pip --version || ${pythonversion} -m ensurepip || (wget https://bootstrap.pypa.io/get-pip.py && ${pythonversion} ./get-pip.py) {{ if .Spec.Logging.Quiet }}> /dev/null 2>&1{{ end }} -${pythonversion} -m pip --upgrade pip {{ if .Spec.Logging.Quiet }}> /dev/null 2>&1{{ end }} -{{end}} +${SUDO} ${pythonversion} -m pip --version || ${SUDO} ${pythonversion} -m ensurepip || (${SUDO} wget https://bootstrap.pypa.io/get-pip.py && ${pythonversion} ./get-pip.py) {{ if .Spec.Logging.Quiet }}> /dev/null 2>&1{{ end }} +${SUDO} ${pythonversion} -m pip --upgrade pip {{ if .Spec.Logging.Quiet }}> /dev/null 2>&1{{ end }} +{{end}} \ No newline at end of file diff --git a/pkg/flux/templates/wait.sh b/pkg/flux/templates/wait.sh index dc489d45..3c84b1a9 100644 --- a/pkg/flux/templates/wait.sh +++ b/pkg/flux/templates/wait.sh @@ -6,6 +6,18 @@ # We use the actual time command and not the wrapper, otherwise we get there is no argument -f {{ if .Spec.Logging.Timed }}which /usr/bin/time > /dev/null 2>&1 || (echo "/usr/bin/time is required to use logging.timed true" && exit 1);{{ end }} +# Set the flux user and id from the getgo +fluxuser=$(whoami) +fluxuid=$(id -u $fluxuser) + +# Add fluxuser to sudoers living... dangerously! +# A non root user container requires sudo to work +SUDO="" +if [[ "${fluxuser}" != "root" ]]; then + echo "${fluxuser} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + SUDO="sudo" +fi + # If any initCommand logic is defined {{ .Container.Commands.Init}} {{ if .Spec.Logging.Quiet }}> /dev/null{{ end }} @@ -14,10 +26,6 @@ {{template "wait-view" .}} {{ if not .Spec.Flux.Container.Disable }}{{template "paths" .}}{{ end }} -# Set the flux user and id from the getgo -fluxuser=$(whoami) -fluxuid=$(id -u $fluxuser) - # Variables we can use again cfg="${viewroot}/etc/flux/config" command="{{ .Container.Command }}" @@ -28,19 +36,21 @@ command="{{ .Container.Command }}" {{ if not .Spec.Logging.Quiet }} echo echo "Hello user ${fluxuser}"{{ end }} - -# Add fluxuser to sudoers living... dangerously! -if [[ "${fluxuser}" != "root" ]]; then - echo "${fluxuser} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers -fi # Ensure the flux user owns the curve.cert # We need to move the curve.cert because config map volume is read only curvesrc=/flux_operator/curve.cert curvepath=$viewroot/curve/curve.cert -mkdir -p $viewroot/curve -cp $curvesrc $curvepath +# run directory must be owned by this user +# and /var/lib/flux +if [[ "${fluxuser}" != "root" ]]; then + ${SUDO} chown -R ${fluxuser} ${viewroot}/run/flux ${viewroot}/var/lib/flux +fi + +# Prepare curve certificate! +${SUDO} mkdir -p $viewroot/curve +${SUDO} cp $curvesrc $curvepath {{ if not .Spec.Logging.Quiet }} echo echo "🌟️ Curve Certificate" @@ -49,9 +59,9 @@ cat ${curvepath} {{ end }} # Remove group and other read -chmod o-r ${curvepath} -chmod g-r ${curvepath} -chown -R ${fluxuid} ${curvepath} +${SUDO} chmod o-r ${curvepath} +${SUDO} chmod g-r ${curvepath} +${SUDO} chown -R ${fluxuid} ${curvepath} # If we have disabled the view, we need to use the flux here to generate resources {{ if .Spec.Flux.Container.Disable }} @@ -61,7 +71,8 @@ echo echo "📦 Resources" echo "flux R encode --hosts=${hosts} --local" {{ end }} -flux R encode --hosts=${hosts} --local > ${viewroot}/etc/flux/system/R +flux R encode --hosts=${hosts} --local > /tmp/R +${SUDO} mv /tmp/R ${viewroot}/etc/flux/system/R {{ if not .Spec.Logging.Quiet }}cat ${viewroot}/etc/flux/system/R{{ end }} {{ end }} diff --git a/sdk/python/v1alpha2/docs/PodSpec.md b/sdk/python/v1alpha2/docs/PodSpec.md index 26574529..0d5f3b71 100644 --- a/sdk/python/v1alpha2/docs/PodSpec.md +++ b/sdk/python/v1alpha2/docs/PodSpec.md @@ -11,6 +11,7 @@ Name | Type | Description | Notes **node_selector** | **dict[str, str]** | NodeSelectors for a pod | [optional] **resources** | [**dict[str, IntOrString]**](IntOrString.md) | Resources include limits and requests | [optional] **restart_policy** | **str** | Restart Policy | [optional] +**runtime_class_name** | **str** | RuntimeClassName for the pod | [optional] **scheduler_name** | **str** | Scheduler name for the pod | [optional] **service_account_name** | **str** | Service account name for the pod | [optional] diff --git a/sdk/python/v1alpha2/fluxoperator/models/pod_spec.py b/sdk/python/v1alpha2/fluxoperator/models/pod_spec.py index 8e082277..c52edfe2 100644 --- a/sdk/python/v1alpha2/fluxoperator/models/pod_spec.py +++ b/sdk/python/v1alpha2/fluxoperator/models/pod_spec.py @@ -42,6 +42,7 @@ class PodSpec(object): 'node_selector': 'dict[str, str]', 'resources': 'dict[str, IntOrString]', 'restart_policy': 'str', + 'runtime_class_name': 'str', 'scheduler_name': 'str', 'service_account_name': 'str' } @@ -53,11 +54,12 @@ class PodSpec(object): 'node_selector': 'nodeSelector', 'resources': 'resources', 'restart_policy': 'restartPolicy', + 'runtime_class_name': 'runtimeClassName', 'scheduler_name': 'schedulerName', 'service_account_name': 'serviceAccountName' } - def __init__(self, annotations=None, automount_service_account_token=None, labels=None, node_selector=None, resources=None, restart_policy=None, scheduler_name=None, service_account_name=None, local_vars_configuration=None): # noqa: E501 + def __init__(self, annotations=None, automount_service_account_token=None, labels=None, node_selector=None, resources=None, restart_policy=None, runtime_class_name=None, scheduler_name=None, service_account_name=None, local_vars_configuration=None): # noqa: E501 """PodSpec - a model defined in OpenAPI""" # noqa: E501 if local_vars_configuration is None: local_vars_configuration = Configuration.get_default_copy() @@ -69,6 +71,7 @@ def __init__(self, annotations=None, automount_service_account_token=None, label self._node_selector = None self._resources = None self._restart_policy = None + self._runtime_class_name = None self._scheduler_name = None self._service_account_name = None self.discriminator = None @@ -85,6 +88,8 @@ def __init__(self, annotations=None, automount_service_account_token=None, label self.resources = resources if restart_policy is not None: self.restart_policy = restart_policy + if runtime_class_name is not None: + self.runtime_class_name = runtime_class_name if scheduler_name is not None: self.scheduler_name = scheduler_name if service_account_name is not None: @@ -228,6 +233,29 @@ def restart_policy(self, restart_policy): self._restart_policy = restart_policy + @property + def runtime_class_name(self): + """Gets the runtime_class_name of this PodSpec. # noqa: E501 + + RuntimeClassName for the pod # noqa: E501 + + :return: The runtime_class_name of this PodSpec. # noqa: E501 + :rtype: str + """ + return self._runtime_class_name + + @runtime_class_name.setter + def runtime_class_name(self, runtime_class_name): + """Sets the runtime_class_name of this PodSpec. + + RuntimeClassName for the pod # noqa: E501 + + :param runtime_class_name: The runtime_class_name of this PodSpec. # noqa: E501 + :type runtime_class_name: str + """ + + self._runtime_class_name = runtime_class_name + @property def scheduler_name(self): """Gets the scheduler_name of this PodSpec. # noqa: E501