diff --git a/Makefile b/Makefile index 5d7a595de1..e701855dae 100644 --- a/Makefile +++ b/Makefile @@ -208,7 +208,7 @@ $(KUSTOMIZE_OUTPUT): _work/kustomize $(KUSTOMIZE_INPUT) fi kustomize: _work/go-bindata clean_kustomize_output $(KUSTOMIZE_OUTPUT) - $< -o deploy/bindata_generated.go -pkg deploy deploy/kubernetes-*/*/pmem-csi.yaml + $< -o deploy/bindata_generated.go -pkg deploy deploy/kubernetes-*/*/pmem-csi.yaml deploy/kustomize/webhook/webhook.yaml deploy/kustomize/scheduler/scheduler-service.yaml clean_kustomize_output: rm -rf deploy/kubernetes-* diff --git a/deploy/bindata_generated.go b/deploy/bindata_generated.go index 09db99573d..e394f841ff 100644 --- a/deploy/bindata_generated.go +++ b/deploy/bindata_generated.go @@ -8,6 +8,8 @@ // deploy/kubernetes-1.19-alpha/lvm/pmem-csi.yaml // deploy/kubernetes-1.19/direct/pmem-csi.yaml // deploy/kubernetes-1.19/lvm/pmem-csi.yaml +// deploy/kustomize/webhook/webhook.yaml +// deploy/kustomize/scheduler/scheduler-service.yaml package deploy import ( @@ -99,7 +101,7 @@ func deployKubernetes117DirectPmemCsiYaml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "deploy/kubernetes-1.17/direct/pmem-csi.yaml", size: 12635, mode: os.FileMode(420), modTime: time.Unix(1610654713, 0)} + info := bindataFileInfo{name: "deploy/kubernetes-1.17/direct/pmem-csi.yaml", size: 12635, mode: os.FileMode(420), modTime: time.Unix(1610654763, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -119,7 +121,7 @@ func deployKubernetes117LvmPmemCsiYaml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "deploy/kubernetes-1.17/lvm/pmem-csi.yaml", size: 12569, mode: os.FileMode(420), modTime: time.Unix(1610654714, 0)} + info := bindataFileInfo{name: "deploy/kubernetes-1.17/lvm/pmem-csi.yaml", size: 12569, mode: os.FileMode(420), modTime: time.Unix(1610654764, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -139,7 +141,7 @@ func deployKubernetes118DirectPmemCsiYaml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "deploy/kubernetes-1.18/direct/pmem-csi.yaml", size: 12635, mode: os.FileMode(420), modTime: time.Unix(1610654718, 0)} + info := bindataFileInfo{name: "deploy/kubernetes-1.18/direct/pmem-csi.yaml", size: 12635, mode: os.FileMode(420), modTime: time.Unix(1610654768, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -159,7 +161,7 @@ func deployKubernetes118LvmPmemCsiYaml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "deploy/kubernetes-1.18/lvm/pmem-csi.yaml", size: 12569, mode: os.FileMode(420), modTime: time.Unix(1610654720, 0)} + info := bindataFileInfo{name: "deploy/kubernetes-1.18/lvm/pmem-csi.yaml", size: 12569, mode: os.FileMode(420), modTime: time.Unix(1610654770, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -179,7 +181,7 @@ func deployKubernetes119AlphaDirectPmemCsiYaml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "deploy/kubernetes-1.19-alpha/direct/pmem-csi.yaml", size: 12949, mode: os.FileMode(420), modTime: time.Unix(1610654729, 0)} + info := bindataFileInfo{name: "deploy/kubernetes-1.19-alpha/direct/pmem-csi.yaml", size: 12949, mode: os.FileMode(420), modTime: time.Unix(1610654779, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -199,7 +201,7 @@ func deployKubernetes119AlphaLvmPmemCsiYaml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "deploy/kubernetes-1.19-alpha/lvm/pmem-csi.yaml", size: 12883, mode: os.FileMode(420), modTime: time.Unix(1610654731, 0)} + info := bindataFileInfo{name: "deploy/kubernetes-1.19-alpha/lvm/pmem-csi.yaml", size: 12883, mode: os.FileMode(420), modTime: time.Unix(1610654781, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -219,7 +221,7 @@ func deployKubernetes119DirectPmemCsiYaml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "deploy/kubernetes-1.19/direct/pmem-csi.yaml", size: 12635, mode: os.FileMode(420), modTime: time.Unix(1610654724, 0)} + info := bindataFileInfo{name: "deploy/kubernetes-1.19/direct/pmem-csi.yaml", size: 12635, mode: os.FileMode(420), modTime: time.Unix(1610654774, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -239,7 +241,47 @@ func deployKubernetes119LvmPmemCsiYaml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "deploy/kubernetes-1.19/lvm/pmem-csi.yaml", size: 12569, mode: os.FileMode(420), modTime: time.Unix(1610654725, 0)} + info := bindataFileInfo{name: "deploy/kubernetes-1.19/lvm/pmem-csi.yaml", size: 12569, mode: os.FileMode(420), modTime: time.Unix(1610654775, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + +var _deployKustomizeWebhookWebhookYaml = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xc4\x54\x3f\x6f\xfb\x46\x0c\xdd\xfd\x29\x08\x77\x68\x0b\x44\x16\x02\xa4\x40\xa1\xad\x0d\xdc\xc2\x43\x82\xa0\x29\xda\x21\xc8\x70\x3a\xbd\x58\xac\x4f\x47\xe1\xc8\x93\xe3\x6f\x5f\x9c\x2c\x3b\xc9\xf0\x9b\x7f\x9e\x7c\x7c\xfc\xf3\x48\x3e\xca\x8d\xfc\x0f\x92\xb2\xc4\x86\x5c\x37\xb0\x96\xbf\x09\x7b\x56\x4b\xce\x58\xe2\xe6\xf0\xab\x6e\x58\xea\xe9\xb6\x85\xb9\xdb\xd5\x81\x63\xd7\xd0\x43\x36\x67\x1c\xf7\xff\xa2\xed\x45\x0e\xf7\x12\xdf\x78\x9f\xcf\x11\xab\x01\xe6\x3a\x67\xae\x59\x11\x45\x37\xa0\xa1\x71\xc0\x50\x79\xe5\x8a\xa3\x21\x54\x5e\x86\xaa\x84\xad\x8e\xe7\x70\x2d\x9e\xd5\xc5\x57\xba\x19\xdc\x5c\x82\x36\x73\xd0\xc6\xcb\xb0\x22\x3a\x67\xd4\xd1\x79\x3c\x23\xc0\x9b\xa4\x66\x36\x13\x0d\xce\x7c\xbf\x7d\x1f\x13\xe6\x26\xf4\x62\xaf\xe8\x80\xd3\x07\x87\x8f\x74\xf5\x52\x7e\xf1\x23\x92\x11\xc9\x95\x8c\xf4\x28\xb6\x8b\x57\xfb\xe4\x42\x86\x36\xf4\xb2\xe6\x7d\x94\x84\xf5\xeb\x0c\x49\xfb\x1f\xbc\x7d\x77\x1a\x3f\xd0\x1f\x92\xca\xd8\x94\x8e\x6c\x3d\x3d\x3d\x6c\x1f\x68\x92\x90\x07\x28\x65\xe5\xb8\xa7\xe0\x0c\xd4\x72\xec\x38\xee\x6f\x28\x8a\x91\xf3\xc6\xd3\xbc\x42\xb2\x1e\x4b\x1e\xf5\x3d\xba\x1c\x90\x08\xef\x86\x58\xe8\x13\x2b\xc9\xc1\x9d\x58\x7b\xfa\xa9\xcd\x46\x0a\x50\x6f\x36\x6a\x53\xd7\x7b\xb6\x3e\xb7\x73\x0f\x87\xdc\x22\x45\x18\xb4\x34\x57\x97\xf8\x14\x5d\xa8\xc6\x24\x13\x97\x44\x48\x35\xab\x66\x68\xfd\xcb\xdd\xdd\xcf\x9b\xa5\xe2\xce\x7e\x54\x3a\x4a\x52\xd0\x9b\x24\xba\x7f\xde\x11\xc6\x1e\x03\x92\x0b\xd7\x1e\x5a\x78\x97\x15\x85\x68\x42\x21\x14\x85\x12\xbc\x4c\x48\xa7\x4b\xa2\x47\x89\xb0\x1e\x01\xaa\x37\xf4\xe6\x38\x94\xce\x46\xe9\x2e\x3d\x95\x27\xa2\x71\x42\x38\xd1\xb1\x47\x9c\xc7\x54\x95\x82\xac\xd4\xc9\x31\x5e\x66\x00\x0c\x0b\xa5\x1b\x52\xa1\x23\x28\xc0\x4a\xed\x4f\xe3\xf1\x12\x8d\x63\x06\x75\xd0\x91\x0d\x73\xc5\x9c\xa0\x67\x3a\xcb\xeb\x49\x02\xfb\x53\x43\xbb\x79\x5b\x33\xe2\x03\x23\xda\xf9\x5c\x2e\xc2\x50\xa4\x89\x3d\x9a\xeb\x9e\xbf\x79\x33\x57\x02\x5f\x5c\xe7\x63\xf8\xf0\xbf\x62\xa3\xb3\xbe\xa1\x7a\x94\xae\x1e\xca\xb9\x62\x41\xbc\xfb\x3d\xc7\x2e\x2c\x05\x53\x0e\xf8\xa4\xd1\xb3\xf6\x66\xdd\xd2\xcb\xfa\xfe\xaf\xed\x6f\x7f\x6f\x17\x9d\x95\x9f\x1b\xf9\xcf\x24\x79\x9c\xd1\xaf\xf6\xe5\x33\x32\x23\xd3\xed\x27\x2c\x41\x25\x27\x7f\x56\x6e\x91\xe9\xfa\x75\xf5\x7f\x00\x00\x00\xff\xff\xcd\x46\xf3\x6c\x77\x04\x00\x00") + +func deployKustomizeWebhookWebhookYamlBytes() ([]byte, error) { + return bindataRead( + _deployKustomizeWebhookWebhookYaml, + "deploy/kustomize/webhook/webhook.yaml", + ) +} + +func deployKustomizeWebhookWebhookYaml() (*asset, error) { + bytes, err := deployKustomizeWebhookWebhookYamlBytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "deploy/kustomize/webhook/webhook.yaml", size: 1143, mode: os.FileMode(420), modTime: time.Unix(1610654562, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + +var _deployKustomizeSchedulerSchedulerServiceYaml = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x6c\x8e\xb1\x4a\x04\x41\x0c\x86\xfb\x79\x8a\xbc\xc0\xae\x2b\x5e\x21\xd3\x5a\xd9\x1d\x08\xf6\x63\xf6\x47\x87\x9b\x99\x84\x24\x7b\xe0\xdb\xcb\xae\x16\x87\x5c\x97\xc0\xc7\xf7\xfd\x45\xeb\x3b\xcc\xab\x8c\x4c\xd7\xc7\x74\xa9\x63\xcd\xf4\x06\xbb\x56\x46\xea\x88\xb2\x96\x28\x39\x11\x8d\xd2\x91\x49\x3b\xfa\xc4\x5e\xa7\x3a\x02\x6d\x62\xe9\x93\xf3\x17\xd6\xad\xc1\xfe\x20\xd7\xc2\x37\x64\x72\x05\xef\x02\x47\x03\x87\xd8\x7e\x13\x15\xd5\xf9\xb2\x7d\xc0\x06\x02\x3e\x57\x79\xf8\x17\x60\x19\x61\xd2\x7e\xbd\xf7\xf8\x3a\x3c\xca\xb8\x4d\xcd\xc7\xa8\x99\xa5\x27\xa2\xf8\x56\x64\x7a\x69\x9b\x07\xec\xf5\x9c\x88\x54\x2c\x7c\xaf\x4f\x14\xc5\x3e\x11\x67\xb1\xc8\xf4\xbc\x2c\xcb\x91\xd0\xe3\x3d\x9d\x9e\xd2\x4f\x00\x00\x00\xff\xff\x05\xad\x4e\x1c\x15\x01\x00\x00") + +func deployKustomizeSchedulerSchedulerServiceYamlBytes() ([]byte, error) { + return bindataRead( + _deployKustomizeSchedulerSchedulerServiceYaml, + "deploy/kustomize/scheduler/scheduler-service.yaml", + ) +} + +func deployKustomizeSchedulerSchedulerServiceYaml() (*asset, error) { + bytes, err := deployKustomizeSchedulerSchedulerServiceYamlBytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "deploy/kustomize/scheduler/scheduler-service.yaml", size: 277, mode: os.FileMode(420), modTime: time.Unix(1610654562, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -304,6 +346,8 @@ var _bindata = map[string]func() (*asset, error){ "deploy/kubernetes-1.19-alpha/lvm/pmem-csi.yaml": deployKubernetes119AlphaLvmPmemCsiYaml, "deploy/kubernetes-1.19/direct/pmem-csi.yaml": deployKubernetes119DirectPmemCsiYaml, "deploy/kubernetes-1.19/lvm/pmem-csi.yaml": deployKubernetes119LvmPmemCsiYaml, + "deploy/kustomize/webhook/webhook.yaml": deployKustomizeWebhookWebhookYaml, + "deploy/kustomize/scheduler/scheduler-service.yaml": deployKustomizeSchedulerSchedulerServiceYaml, } // AssetDir returns the file names below a certain @@ -380,6 +424,14 @@ var _bintree = &bintree{nil, map[string]*bintree{ "pmem-csi.yaml": &bintree{deployKubernetes119AlphaLvmPmemCsiYaml, map[string]*bintree{}}, }}, }}, + "kustomize": &bintree{nil, map[string]*bintree{ + "scheduler": &bintree{nil, map[string]*bintree{ + "scheduler-service.yaml": &bintree{deployKustomizeSchedulerSchedulerServiceYaml, map[string]*bintree{}}, + }}, + "webhook": &bintree{nil, map[string]*bintree{ + "webhook.yaml": &bintree{deployKustomizeWebhookWebhookYaml, map[string]*bintree{}}, + }}, + }}, }}, }} diff --git a/deploy/crd/pmem-csi.intel.com_pmemcsideployments.yaml b/deploy/crd/pmem-csi.intel.com_pmemcsideployments.yaml index be677daf5c..efe7d0cc8c 100644 --- a/deploy/crd/pmem-csi.intel.com_pmemcsideployments.yaml +++ b/deploy/crd/pmem-csi.intel.com_pmemcsideployments.yaml @@ -57,7 +57,7 @@ spec: properties: controllerDriverResources: description: ControllerDriverResources Compute resources required - by driver container running on master node + by central driver container properties: limits: additionalProperties: @@ -82,6 +82,12 @@ spec: to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' type: object type: object + controllerTLSSecret: + description: ControllerTLSSecret is the name of a secret which contains + ca.crt, tls.crt and tls.key data for the scheduler extender and + pod mutation webhook. A controller is started if (and only if) this + secret is specified. + type: string deviceMode: description: DeviceMode to use to manage PMEM devices. enum: @@ -112,6 +118,15 @@ spec: logLevel: description: LogLevel number for the log verbosity type: integer + mutatePods: + description: MutatePod defines how a mutating pod webhook is configured + if a controller is started. The field is ignored if the controller + is not enabled. The default is "Try". + enum: + - Always + - Try + - Never + type: string nodeDriverResources: description: NodeDriverResources Compute resources required by driver container running on worker nodes @@ -214,6 +229,15 @@ spec: to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' type: object type: object + schedulerNodePort: + description: SchedulerNodePort, if non-zero, ensures that the "scheduler" + service is created as a NodeService with that fixed port number. + Otherwise that service is created as a cluster service. The number + must be from the range reserved by Kubernetes for node ports. This + is useful if the kube-scheduler cannot reach the scheduler extender + via a cluster service. + format: int32 + type: integer type: object status: description: DeploymentStatus defines the observed state of Deployment diff --git a/deploy/kustomize/operator/operator.yaml b/deploy/kustomize/operator/operator.yaml index bfdd5c9586..2a4f7b3199 100644 --- a/deploy/kustomize/operator/operator.yaml +++ b/deploy/kustomize/operator/operator.yaml @@ -48,6 +48,7 @@ rules: - "" resources: - pods + - secrets verbs: - get --- @@ -77,6 +78,12 @@ rules: - pmemcsideployments/finalizers verbs: - '*' +- apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + verbs: + - '*' --- kind: RoleBinding apiVersion: rbac.authorization.k8s.io/v1 diff --git a/deploy/operator/pmem-csi-operator.yaml b/deploy/operator/pmem-csi-operator.yaml index 830d21eb42..90fbc216c1 100644 --- a/deploy/operator/pmem-csi-operator.yaml +++ b/deploy/operator/pmem-csi-operator.yaml @@ -62,6 +62,7 @@ rules: - "" resources: - pods + - secrets verbs: - get --- @@ -91,6 +92,12 @@ rules: - pmemcsideployments/finalizers verbs: - '*' +- apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + verbs: + - '*' --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding diff --git a/deploy/yamls.go b/deploy/yamls.go index c98156660f..4c86327884 100644 --- a/deploy/yamls.go +++ b/deploy/yamls.go @@ -40,7 +40,7 @@ func init() { for _, file := range AssetNames() { parts := re.FindStringSubmatch(file) if parts == nil { - panic(fmt.Sprintf("unexpected deployment asset: %s", file)) + continue } kubernetes, err := version.Parse(parts[1]) if err != nil { diff --git a/docs/design.md b/docs/design.md index 777402657b..a48eb59e41 100644 --- a/docs/design.md +++ b/docs/design.md @@ -292,15 +292,22 @@ components that help with pod scheduling: ### Scheduler extender -When a pod requests the special [extended +When a pod requests a special [extended resource](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#extended-resources) -called `pmem-csi.intel.com/scheduler`, the Kubernetes scheduler calls +, the Kubernetes scheduler calls a [scheduler extender](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/scheduling/scheduler_extender.md) provided by PMEM-CSI with a list of nodes that a pod might run -on. This extender is implemented in the PMEM-CSI controller and -connects to node driver's metrics endpoint to check for -capacity. PMEM-CSI then filters out all nodes which currently do not +on. + +The name of that special resource is `/scheduler`, +i.e. `pmem-csi.intel.com/scheduler` when the default PMEM-CSI driver +name is used. It is possible to configure one extender per PMEM-CSI +deployment because each deployment has its own unique driver name. + +This extender is implemented in the PMEM-CSI controller and retrieves +metrics data from each PMEM-CSI node driver instance to filter out all +nodes which currently do not have enough storage left for the volumes that still need to be created. This considers inline ephemeral volumes and all unbound volumes, regardless whether they use late binding or immediate @@ -328,7 +335,7 @@ See our [implementation](http://github.com/intel/pmem-csi/tree/devel/pkg/schedul ### Pod admission webhook -Having to add `pmem-csi.intel.com/scheduler` manually is not +Having to add the `/scheduler` extended resource manually is not user-friendly. To simplify this, PMEM-CSI provides a [mutating admission webhook](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/) diff --git a/docs/install.md b/docs/install.md index be25662b54..8bbf882a3f 100644 --- a/docs/install.md +++ b/docs/install.md @@ -752,41 +752,27 @@ The PMEM-CSI scheduler extender and admission webhook are provided by the PMEM-CSI controller. They need to be enabled during deployment via the `--schedulerListen=[]:` parameter. The listen address is optional and can be left out. The port is where a -HTTPS server will run. It uses the same certificates as the internal -gRPC service. When using the CA creation script described above, they -will contain alternative names for the URLs described in this section -(service names, `127.0.0.1` IP address). - -This parameter can be added to one of the existing deployment files -with `kustomize`. All of the following examples assume that the -current directory contains the `deploy` directory from the PMEM-CSI -repository. It is also possible to reference the base via a -[URL](https://github.com/kubernetes-sigs/kustomize/blob/master/examples/remoteBuild.md). - -``` ShellSession -$ mkdir my-pmem-csi-deployment - -$ cat >my-pmem-csi-deployment/kustomization.yaml <my-pmem-csi-deployment/scheduler-patch.yaml <my-scheduler/node-port-patch.yaml <3 | | deviceMode | string | Device management mode to use. Supports one of `lvm` or `direct` | `lvm` +| controllerTLSSecret | string | Name of an existing secret in the driver's namespace which contains ca.crt, tls.crt and tls.key data for the scheduler extender and pod mutation webhook. A controller is started if (and only if) this secret is specified. | empty +| mutatePods | Always/Try/Never | Defines how a mutating pod webhook is configured if a controller is started. The field is ignored if the controller is not enabled. "Never" disables pod mutation. "Try" configured it so that pod creation is allowed to proceed even when the webhook fails. "Always" requires that the webhook gets invoked successfully before creating a pod. | Try +| schedulerNodePort | If non-zero, the scheduler service is created as a NodeService with that fixed port number. Otherwise that service is created as a cluster service. The number must be from the range reserved by Kubernetes for node ports. This is useful if the kube-scheduler cannot reach the scheduler extender via a cluster service. | 0 | controllerResources | [ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.12/#resourcerequirements-v1-core) | Describes the compute resource requirements for controller pod.
4_Deprecated and only available in `v1alpha1`._ | | nodeResources | [ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.12/#resourcerequirements-v1-core) | Describes the compute resource requirements for the pods running on node(s).
_4Deprecated and only available in `v1alpha1`._ | | controllerDriverResources | [ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.12/#resourcerequirements-v1-core) | Describes the compute resource requirements for controller driver container running on master node. Available since `v1beta1`. | diff --git a/pkg/apis/pmemcsi/v1beta1/deployment_types.go b/pkg/apis/pmemcsi/v1beta1/deployment_types.go index e630810a1b..53b5ebde2b 100644 --- a/pkg/apis/pmemcsi/v1beta1/deployment_types.go +++ b/pkg/apis/pmemcsi/v1beta1/deployment_types.go @@ -59,6 +59,20 @@ const ( LogFormatJSON LogFormat = "json" ) +type MutatePods string + +const ( + // MutatePodsAlways enables the mutating pod webhook so that a failure is considered fatal. + MutatePodsAlways MutatePods = "Always" + + // MutatePodsTry enables the mutating pod webhook so that it a pod can be created even + // when the webhook fails. + MutatePodsTry MutatePods = "Try" + + // MutatePodsNever disables the mutating pod webhook. + MutatePodsNever MutatePods = "Never" +) + // +k8s:deepcopy-gen=true // DeploymentSpec defines the desired state of Deployment type DeploymentSpec struct { @@ -78,8 +92,24 @@ type DeploymentSpec struct { NodeRegistrarResources *corev1.ResourceRequirements `json:"nodeRegistrarResources,omitempty"` // NodeDriverResources Compute resources required by driver container running on worker nodes NodeDriverResources *corev1.ResourceRequirements `json:"nodeDriverResources,omitempty"` - // ControllerDriverResources Compute resources required by driver container running on master node + // ControllerDriverResources Compute resources required by central driver container ControllerDriverResources *corev1.ResourceRequirements `json:"controllerDriverResources,omitempty"` + // ControllerTLSSecret is the name of a secret which contains ca.crt, tls.crt and tls.key data + // for the scheduler extender and pod mutation webhook. A controller is started if (and only if) + // this secret is specified. + ControllerTLSSecret string `json:"controllerTLSSecret,omitempty"` + // MutatePod defines how a mutating pod webhook is configured if a controller + // is started. The field is ignored if the controller is not enabled. + // The default is "Try". + // +kubebuilder:validation:Enum=Always;Try;Never + MutatePods MutatePods `json:"mutatePods,omitempty"` + // SchedulerNodePort, if non-zero, ensures that the "scheduler" service + // is created as a NodeService with that fixed port number. Otherwise + // that service is created as a cluster service. The number must be + // from the range reserved by Kubernetes for + // node ports. This is useful if the kube-scheduler cannot reach the scheduler + // extender via a cluster service. + SchedulerNodePort int32 `json:"schedulerNodePort,omitempty"` // DeviceMode to use to manage PMEM devices. // +kubebuilder:validation:Enum=lvm;direct DeviceMode DeviceMode `json:"deviceMode,omitempty"` @@ -228,6 +258,8 @@ const ( // DefaultDriverImage default PMEM-CSI driver docker image DefaultDriverImage = defaultDriverImageName + ":" + defaultDriverImageTag + DefaultMutatePods = MutatePodsTry + // The sidecar versions must be kept in sync with the // deploy/kustomize YAML files! @@ -305,6 +337,16 @@ const ( DeploymentPhaseFailed DeploymentPhase = "Failed" ) +// A TLS secret must contain three data items. +const ( + // TLSSecretCA is the CA bundle. + TLSSecretCA = "ca.crt" + // TLSSecretKey is the secret key to be used by the server. + TLSSecretKey = "tls.key" + // TLSSecretCert is the public key to used by the server. + TLSSecretCert = "tls.crt" +) + func (d *PmemCSIDeployment) SetCondition(t DeploymentConditionType, state corev1.ConditionStatus, reason string) { for _, c := range d.Status.Conditions { if c.Type == t { @@ -347,6 +389,14 @@ func (d *PmemCSIDeployment) EnsureDefaults(operatorImage string) error { return fmt.Errorf("invalid device mode %q", d.Spec.DeviceMode) } + switch d.Spec.MutatePods { + case "": + d.Spec.MutatePods = DefaultMutatePods + case MutatePodsAlways, MutatePodsTry, MutatePodsNever: + default: + return fmt.Errorf("invalid MutatePods value: %s", d.Spec.MutatePods) + } + if d.Spec.Image == "" { // If provided use operatorImage if operatorImage != "" { @@ -478,6 +528,12 @@ func (d *PmemCSIDeployment) MetricsServiceName() string { return d.GetHyphenedName() + "-metrics" } +// SchedulerServiceName returns the name of the controller's scheduler +// Service object +func (d *PmemCSIDeployment) SchedulerServiceName() string { + return d.GetHyphenedName() + "-scheduler" +} + // WebhooksServiceAccountName returns the name of the service account // used by the StatefulSet with the webhooks. func (d *PmemCSIDeployment) WebhooksServiceAccountName() string { @@ -508,6 +564,12 @@ func (d *PmemCSIDeployment) WebhooksClusterRoleBindingName() string { return d.GetHyphenedName() + "-webhooks-role" } +// MutatingWebhookName returns the name of the +// MutatingWebhookConfiguration +func (d *PmemCSIDeployment) MutatingWebhookName() string { + return d.GetHyphenedName() + "-hook" +} + // NodeServiceAccountName returns the name of the service account // used by the DaemonSet with the external-provisioner func (d *PmemCSIDeployment) ProvisionerServiceAccountName() string { diff --git a/pkg/deployments/load.go b/pkg/deployments/load.go index 81a61222e7..11a013e7a2 100644 --- a/pkg/deployments/load.go +++ b/pkg/deployments/load.go @@ -24,11 +24,12 @@ import ( // LoadObjects reads all objects stored in a pmem-csi.yaml reference file. func LoadObjects(kubernetes version.Version, deviceMode api.DeviceMode) ([]unstructured.Unstructured, error) { - return loadObjects(kubernetes, deviceMode, nil, nil) + return loadYAML(yamlPath(kubernetes, deviceMode), nil, nil, nil) } var pmemImage = regexp.MustCompile(`image: intel/pmem-csi-driver(-test)?:\S+`) -var nameRegex = regexp.MustCompile(`(name|app|secretName|serviceName|serviceAccountName): pmem-csi`) +var nameRegex = regexp.MustCompile(`(name|secretName|serviceName|serviceAccountName): pmem-csi-intel-com`) +var driverNameRegex = regexp.MustCompile(`(?m)(name|app\.kubernetes.io/instance): pmem-csi.intel.com$`) // LoadAndCustomizeObjects reads all objects stored in a pmem-csi.yaml reference file // and updates them on-the-fly according to the deployment spec, namespace and name. @@ -39,9 +40,10 @@ func LoadAndCustomizeObjects(kubernetes version.Version, deviceMode api.DeviceMo // our deployments. But because we controll the input, we can do some // things like renaming with a simple text search/replace. patchYAML := func(yaml *[]byte) { - // This renames the objects. A hyphen is used instead of a dot, - // except for CSIDriver which needs the exact name. + // This renames the objects and labels. A hyphen is used instead of a dot, + // except for CSIDriver and instance label which need the exact name. *yaml = nameRegex.ReplaceAll(*yaml, []byte("$1: "+deployment.GetHyphenedName())) + *yaml = driverNameRegex.ReplaceAll(*yaml, []byte("$1: "+deployment.Name)) // Update the driver name inside the state and socket dir. *yaml = bytes.ReplaceAll(*yaml, []byte("path: /var/lib/pmem-csi.intel.com"), []byte("path: /var/lib/"+deployment.Name)) @@ -78,9 +80,36 @@ func LoadAndCustomizeObjects(kubernetes version.Version, deviceMode api.DeviceMo *yaml = pmemImage.ReplaceAll(*yaml, []byte("image: "+deployment.Spec.Image)) } + enabled := func(obj *unstructured.Unstructured) bool { + if deployment.Spec.ControllerTLSSecret != "" { + // Shortcut: all objects enabled. + return true + } + + // Controller is disabled. + switch obj.GetKind() + "/" + obj.GetName() { + case "StatefulSet/" + deployment.ControllerDriverName(), + "Service/" + deployment.ControllerServiceName(), + "Service/" + deployment.MetricsServiceName(), + "Service/" + deployment.SchedulerServiceName(), + "ServiceAccount/" + deployment.WebhooksServiceAccountName(), + "Role/" + deployment.WebhooksRoleName(), + "RoleBinding/" + deployment.WebhooksRoleBindingName(), + "ClusterRole/" + deployment.WebhooksClusterRoleName(), + "ClusterRoleBinding/" + deployment.WebhooksClusterRoleBindingName(), + "MutatingWebhookConfiguration/" + deployment.MutatingWebhookName(): + return false + default: + return true + } + } + patchUnstructured := func(obj *unstructured.Unstructured) { if deployment.Spec.Labels != nil { labels := obj.GetLabels() + if labels == nil { + labels = map[string]string{} + } for key, value := range deployment.Spec.Labels { labels[key] = value } @@ -88,12 +117,9 @@ func LoadAndCustomizeObjects(kubernetes version.Version, deviceMode api.DeviceMo } switch obj.GetKind() { - case "CSIDriver": - obj.SetName(deployment.GetName()) case "StatefulSet": resources := map[string]*corev1.ResourceRequirements{ - "pmem-driver": deployment.Spec.ControllerDriverResources, - "external-provisioner": deployment.Spec.ProvisionerResources, + "pmem-driver": deployment.Spec.ControllerDriverResources, } if err := patchPodTemplate(obj, deployment, resources); err != nil { // TODO: avoid panic @@ -101,8 +127,9 @@ func LoadAndCustomizeObjects(kubernetes version.Version, deviceMode api.DeviceMo } case "DaemonSet": resources := map[string]*corev1.ResourceRequirements{ - "pmem-driver": deployment.Spec.NodeDriverResources, - "driver-registrar": deployment.Spec.NodeRegistrarResources, + "pmem-driver": deployment.Spec.NodeDriverResources, + "external-provisioner": deployment.Spec.ProvisionerResources, + "driver-registrar": deployment.Spec.NodeRegistrarResources, } if err := patchPodTemplate(obj, deployment, resources); err != nil { // TODO: avoid panic @@ -118,10 +145,46 @@ func LoadAndCustomizeObjects(kubernetes version.Version, deviceMode api.DeviceMo } spec["nodeSelector"] = selector } + case "MutatingWebhookConfiguration": + webhooks := obj.Object["webhooks"].([]interface{}) + failurePolicy := "Ignore" + if deployment.Spec.MutatePods == api.MutatePodsAlways { + failurePolicy = "Fail" + } + webhooks[0].(map[string]interface{})["failurePolicy"] = failurePolicy + case "Service": + switch obj.GetName() { + case deployment.SchedulerServiceName(): + if deployment.Spec.SchedulerNodePort != 0 { + spec := obj.Object["spec"].(map[string]interface{}) + spec["type"] = "NodePort" + ports := spec["ports"].([]interface{}) + ports[0].(map[string]interface{})["nodePort"] = deployment.Spec.SchedulerNodePort + } + } } } - return loadObjects(kubernetes, deviceMode, patchYAML, patchUnstructured) + objects, err := loadYAML(yamlPath(kubernetes, deviceMode), patchYAML, enabled, patchUnstructured) + if err != nil { + return nil, err + } + + scheduler, err := loadYAML("deploy/kustomize/scheduler/scheduler-service.yaml", patchYAML, enabled, patchUnstructured) + if err != nil { + return nil, err + } + objects = append(objects, scheduler...) + + if deployment.Spec.MutatePods != api.MutatePodsNever { + webhook, err := loadYAML("deploy/kustomize/webhook/webhook.yaml", patchYAML, enabled, patchUnstructured) + if err != nil { + return nil, err + } + objects = append(objects, webhook...) + } + + return objects, nil } func patchPodTemplate(obj *unstructured.Unstructured, deployment api.PmemCSIDeployment, resources map[string]*corev1.ResourceRequirements) error { @@ -205,14 +268,25 @@ func patchPodTemplate(obj *unstructured.Unstructured, deployment api.PmemCSIDepl } } + volumes := spec["volumes"].([]interface{}) + for _, volume := range volumes { + volume := volume.(map[string]interface{}) + volumeName := volume["name"].(string) + if volumeName == "webhook-cert" { + volume["secret"].(map[string]interface{})["secretName"] = deployment.Spec.ControllerTLSSecret + } + } return nil } -func loadObjects(kubernetes version.Version, deviceMode api.DeviceMode, +func yamlPath(kubernetes version.Version, deviceMode api.DeviceMode) string { + return fmt.Sprintf("deploy/kubernetes-%s/%s/pmem-csi.yaml", kubernetes, deviceMode) +} + +func loadYAML(path string, patchYAML func(yaml *[]byte), + enabled func(obj *unstructured.Unstructured) bool, patchUnstructured func(obj *unstructured.Unstructured)) ([]unstructured.Unstructured, error) { - path := fmt.Sprintf("deploy/kubernetes-%s/%s/pmem-csi.yaml", kubernetes, deviceMode) - // We load the builtin yaml files. yaml, err := deploy.Asset(path) if err != nil { @@ -238,6 +312,9 @@ func loadObjects(kubernetes version.Version, deviceMode api.DeviceMode, if err != nil { return nil, fmt.Errorf("decode item %q from file %q: %v", item, path, err) } + if enabled != nil && !enabled(&obj) { + continue + } if patchUnstructured != nil { patchUnstructured(&obj) } diff --git a/pkg/pmem-csi-driver/main.go b/pkg/pmem-csi-driver/main.go index e8b5557432..e5dd0923f4 100644 --- a/pkg/pmem-csi-driver/main.go +++ b/pkg/pmem-csi-driver/main.go @@ -34,8 +34,7 @@ func init() { flag.StringVar(&config.DriverName, "drivername", "pmem-csi.intel.com", "name of the driver") flag.StringVar(&config.NodeID, "nodeid", "nodeid", "node id") flag.StringVar(&config.Endpoint, "endpoint", "unix:///tmp/pmem-csi.sock", "PMEM CSI endpoint") - flag.StringVar(&config.RegistryEndpoint, "registryEndpoint", "tcp://pmem-csi-controller:10000", "endpoint for internal registry server (controller listens, node connects)") - flag.Var(&config.Mode, "mode", "driver run mode: controller or node") + flag.Var(&config.Mode, "mode", "driver run mode") flag.StringVar(&config.CAFile, "caFile", "ca.pem", "Root CA certificate file to use for verifying connections") flag.StringVar(&config.CertFile, "certFile", "pmem-registry.pem", "SSL certificate file to use for authenticating client connections") flag.StringVar(&config.KeyFile, "keyFile", "pmem-registry-key.pem", "Private key file associated to certificate") diff --git a/pkg/pmem-csi-operator/controller/deployment/controller_driver.go b/pkg/pmem-csi-operator/controller/deployment/controller_driver.go index bc956d2e9c..8599acf395 100644 --- a/pkg/pmem-csi-operator/controller/deployment/controller_driver.go +++ b/pkg/pmem-csi-operator/controller/deployment/controller_driver.go @@ -15,6 +15,7 @@ import ( "github.com/intel/pmem-csi/pkg/logger" "github.com/intel/pmem-csi/pkg/version" + admissionregistrationv1beta1 "k8s.io/api/admissionregistration/v1beta1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" @@ -36,6 +37,7 @@ const ( nodeControllerPort = 10001 nodeMetricsPort = 10010 provisionerMetricsPort = 10011 + schedulerPort = 8000 ) func typeMeta(gv schema.GroupVersion, kind string) metav1.TypeMeta { @@ -62,6 +64,7 @@ var currentObjects = []apiruntime.Object{ &corev1.Service{TypeMeta: typeMeta(corev1.SchemeGroupVersion, "Service")}, &corev1.ServiceAccount{TypeMeta: typeMeta(corev1.SchemeGroupVersion, "ServiceAccount")}, &appsv1.StatefulSet{TypeMeta: typeMeta(appsv1.SchemeGroupVersion, "StatefulSet")}, + &admissionregistrationv1beta1.MutatingWebhookConfiguration{TypeMeta: typeMeta(admissionregistrationv1beta1.SchemeGroupVersion, "MutatingWebhookConfiguration")}, } // CurrentObjects returns the active sub-object types used by the operator @@ -107,6 +110,8 @@ type pmemCSIDeployment struct { // operator's namespace used for creating sub-resources namespace string k8sVersion version.Version + + controllerCABundle []byte } // objectPatch combines a modified object and the patch against @@ -156,18 +161,13 @@ func (op objectPatch) diff() ([]byte, error) { // Apply sends the changes to API Server // Creates new object if not existing, otherwise patches it with changes -func (op *objectPatch) apply(ctx context.Context, c client.Client, labels map[string]string) error { +func (op *objectPatch) apply(ctx context.Context, c client.Client) error { objMeta, err := meta.Accessor(op.obj) if err != nil { return fmt.Errorf("internal error %T: %v", op.obj, err) } l := logger.Get(ctx).WithName("objectPatch/apply") - // NOTE(avalluri): Set labels just before creating/patching. - // Setting them before creating the client.Patch makes - // they get lost from the final diff. - objMeta.SetLabels(labels) - if op.isNew() { // For unknown reason client.Create() clearing off the // GVK on obj, So restore it manually. @@ -202,10 +202,34 @@ func (d *pmemCSIDeployment) reconcile(ctx context.Context, r *ReconcileDeploymen } l := logger.Get(ctx).WithName("reconcile") + if d.Spec.ControllerTLSSecret != "" { + secret := &corev1.Secret{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Secret", + }, + } + if err := r.client.Get(ctx, + client.ObjectKey{ + Namespace: d.namespace, + Name: d.Spec.ControllerTLSSecret}, + secret); err != nil { + return fmt.Errorf("loading ControllerTLSSecret %s from namespace %s: %v", d.Spec.ControllerTLSSecret, d.namespace, err) + } + ca, ok := secret.Data[api.TLSSecretCA] + if !ok { + return fmt.Errorf("ControllerTLSSecret %s in namespace %s contains no %s", d.Spec.ControllerTLSSecret, d.namespace, api.TLSSecretCA) + } + d.controllerCABundle = ca + } + l.V(3).Info("start", "deployment", d.Name, "phase", d.Status.Phase) var allObjects []apiruntime.Object redeployAll := func() error { for name, handler := range subObjectHandlers { + if handler.enabled != nil && !handler.enabled(d) { + continue + } o, err := d.redeploy(ctx, r, handler) if err != nil { return fmt.Errorf("failed to update %s: %v", name, err) @@ -257,13 +281,9 @@ func (d *pmemCSIDeployment) getSubObject(ctx context.Context, r *ReconcileDeploy return nil } -// updateSubObject writes the object changes to the API server. -func (d *pmemCSIDeployment) updateSubObject(ctx context.Context, r *ReconcileDeployment, op *objectPatch) error { - return op.apply(ctx, r.client, d.Spec.Labels) -} - type redeployObject struct { objType reflect.Type + enabled func(*pmemCSIDeployment) bool object func(*pmemCSIDeployment) apiruntime.Object modify func(*pmemCSIDeployment, apiruntime.Object) error postUpdate func(*pmemCSIDeployment, apiruntime.Object) error @@ -291,7 +311,22 @@ func (d *pmemCSIDeployment) redeploy(ctx context.Context, r *ReconcileDeployment if err := ro.modify(d, o); err != nil { return nil, err } - if err := d.updateSubObject(ctx, r, op); err != nil { + + // Add the additional labels before patching. + objMeta, err := meta.Accessor(o) + if err != nil { + return nil, fmt.Errorf("internal error %T: %v", op.obj, err) + } + labels := objMeta.GetLabels() + if labels == nil { + labels = map[string]string{} + } + for key, value := range d.Spec.Labels { + labels[key] = value + } + objMeta.SetLabels(labels) + + if err := op.apply(ctx, r.client); err != nil { return nil, err } if ro.postUpdate != nil { @@ -302,6 +337,16 @@ func (d *pmemCSIDeployment) redeploy(ctx context.Context, r *ReconcileDeployment return o, nil } +func controllerEnabled(d *pmemCSIDeployment) bool { + // The controller part only contains the scheduler webhooks + // and is enabled only when the necessary secret is provided. + return d.Spec.ControllerTLSSecret != "" +} + +func mutatingWebhookEnabled(d *pmemCSIDeployment) bool { + return controllerEnabled(d) && d.Spec.MutatePods != api.MutatePodsNever +} + var subObjectHandlers = map[string]redeployObject{ "node driver": { objType: reflect.TypeOf(&appsv1.DaemonSet{}), @@ -334,6 +379,7 @@ var subObjectHandlers = map[string]redeployObject{ }, "controller driver": { objType: reflect.TypeOf(&appsv1.StatefulSet{}), + enabled: controllerEnabled, object: func(d *pmemCSIDeployment) apiruntime.Object { return &appsv1.StatefulSet{ TypeMeta: metav1.TypeMeta{Kind: "StatefulSet", APIVersion: "apps/v1"}, @@ -364,6 +410,7 @@ var subObjectHandlers = map[string]redeployObject{ }, "controller service": { objType: reflect.TypeOf(&corev1.Service{}), + enabled: controllerEnabled, object: func(d *pmemCSIDeployment) apiruntime.Object { return &corev1.Service{ TypeMeta: metav1.TypeMeta{Kind: "Service", APIVersion: "v1"}, @@ -377,6 +424,7 @@ var subObjectHandlers = map[string]redeployObject{ }, "metrics service": { objType: reflect.TypeOf(&corev1.Service{}), + enabled: controllerEnabled, object: func(d *pmemCSIDeployment) apiruntime.Object { return &corev1.Service{ TypeMeta: metav1.TypeMeta{Kind: "Service", APIVersion: "v1"}, @@ -403,6 +451,7 @@ var subObjectHandlers = map[string]redeployObject{ }, "webhooks role": { objType: reflect.TypeOf(&rbacv1.Role{}), + enabled: controllerEnabled, object: func(d *pmemCSIDeployment) apiruntime.Object { return &rbacv1.Role{ TypeMeta: metav1.TypeMeta{Kind: "Role", APIVersion: "rbac.authorization.k8s.io/v1"}, @@ -416,6 +465,7 @@ var subObjectHandlers = map[string]redeployObject{ }, "webhooks role binding": { objType: reflect.TypeOf(&rbacv1.RoleBinding{}), + enabled: controllerEnabled, object: func(d *pmemCSIDeployment) apiruntime.Object { return &rbacv1.RoleBinding{ TypeMeta: metav1.TypeMeta{Kind: "RoleBinding", APIVersion: "rbac.authorization.k8s.io/v1"}, @@ -429,6 +479,7 @@ var subObjectHandlers = map[string]redeployObject{ }, "webhooks cluster role": { objType: reflect.TypeOf(&rbacv1.ClusterRole{}), + enabled: controllerEnabled, object: func(d *pmemCSIDeployment) apiruntime.Object { return &rbacv1.ClusterRole{ TypeMeta: metav1.TypeMeta{Kind: "ClusterRole", APIVersion: "rbac.authorization.k8s.io/v1"}, @@ -442,6 +493,7 @@ var subObjectHandlers = map[string]redeployObject{ }, "webhooks cluster role binding": { objType: reflect.TypeOf(&rbacv1.ClusterRoleBinding{}), + enabled: controllerEnabled, object: func(d *pmemCSIDeployment) apiruntime.Object { return &rbacv1.ClusterRoleBinding{ TypeMeta: metav1.TypeMeta{Kind: "ClusterRoleBinding", APIVersion: "rbac.authorization.k8s.io/v1"}, @@ -453,6 +505,48 @@ var subObjectHandlers = map[string]redeployObject{ return nil }, }, + "webhooks service account": { + objType: reflect.TypeOf(&corev1.ServiceAccount{}), + enabled: controllerEnabled, + object: func(d *pmemCSIDeployment) apiruntime.Object { + return &corev1.ServiceAccount{ + TypeMeta: metav1.TypeMeta{Kind: "ServiceAccount", APIVersion: "v1"}, + ObjectMeta: d.getObjectMeta(d.WebhooksServiceAccountName(), false), + } + }, + modify: func(d *pmemCSIDeployment, o apiruntime.Object) error { + // nothing to customize for service account + return nil + }, + }, + "mutating webhook configuration": { + objType: reflect.TypeOf(&admissionregistrationv1beta1.MutatingWebhookConfiguration{}), + enabled: mutatingWebhookEnabled, + object: func(d *pmemCSIDeployment) apiruntime.Object { + return &admissionregistrationv1beta1.MutatingWebhookConfiguration{ + TypeMeta: metav1.TypeMeta{Kind: "MutatingWebhookConfiguration", APIVersion: "admissionregistration.k8s.io/v1beta1"}, + ObjectMeta: d.getObjectMeta(d.MutatingWebhookName(), true), + } + }, + modify: func(d *pmemCSIDeployment, o apiruntime.Object) error { + d.getMutatingWebhookConfig(o.(*admissionregistrationv1beta1.MutatingWebhookConfiguration)) + return nil + }, + }, + "scheduler service": { + objType: reflect.TypeOf(&corev1.Service{}), + enabled: controllerEnabled, + object: func(d *pmemCSIDeployment) apiruntime.Object { + return &corev1.Service{ + TypeMeta: metav1.TypeMeta{Kind: "Service", APIVersion: "v1"}, + ObjectMeta: d.getObjectMeta(d.SchedulerServiceName(), false), + } + }, + modify: func(d *pmemCSIDeployment, o apiruntime.Object) error { + d.getSchedulerService(o.(*corev1.Service)) + return nil + }, + }, "provisioner role": { objType: reflect.TypeOf(&rbacv1.Role{}), object: func(d *pmemCSIDeployment) apiruntime.Object { @@ -531,6 +625,9 @@ func (d *pmemCSIDeployment) handleEvent(ctx context.Context, metaData metav1.Obj objName := metaData.GetName() for name, handler := range subObjectHandlers { + if handler.enabled != nil && !handler.enabled(d) { + continue + } if objType != handler.objType { continue } @@ -645,16 +742,6 @@ func (d *pmemCSIDeployment) getCSIDriver(csiDriver *storagev1beta1.CSIDriver) { } } -func (d *pmemCSIDeployment) getSecret(secret *corev1.Secret, cn string, ca, encodedKey, encodedCert []byte) { - secret.Type = corev1.SecretTypeTLS - secret.Data = map[string][]byte{ - // Same names as in the example secrets and in the v1 API. - "ca.crt": ca, // no standard name for this one - "tls.key": encodedKey, // v1.TLSPrivateKeyKey - "tls.crt": encodedCert, // v1.TLSCertKey - } -} - func (d *pmemCSIDeployment) getService(service *corev1.Service, t corev1.ServiceType, port int32) { service.Spec.Type = t if service.Spec.Ports == nil { @@ -665,7 +752,8 @@ func (d *pmemCSIDeployment) getService(service *corev1.Service, t corev1.Service IntVal: port, } service.Spec.Selector = map[string]string{ - "app": d.GetHyphenedName() + "-controller", + "app.kubernetes.io/name": "pmem-csi-controller", + "app.kubernetes.io/instance": d.Name, } } @@ -738,6 +826,61 @@ func (d *pmemCSIDeployment) getWebhooksClusterRoleBinding(crb *rbacv1.ClusterRol } } +func (d *pmemCSIDeployment) getMutatingWebhookConfig(hook *admissionregistrationv1beta1.MutatingWebhookConfiguration) { + selector := &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "pmem-csi.intel.com/webhook", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"ignore"}, + }, + }, + } + failurePolicy := admissionregistrationv1beta1.Ignore + if d.Spec.MutatePods == api.MutatePodsAlways { + failurePolicy = admissionregistrationv1beta1.Fail + } + path := "/pod/mutate" + hook.Webhooks = []admissionregistrationv1beta1.MutatingWebhook{ + { + // Name must be "fully-qualified" (i.e. with domain) but not unique, so + // here "pmem-csi.intel.com" is not the default driver name. + // https://pkg.go.dev/k8s.io/api/admissionregistration/v1beta1#MutatingWebhook + Name: "pod-hook.pmem-csi.intel.com", + NamespaceSelector: selector, + ObjectSelector: selector, + FailurePolicy: &failurePolicy, + ClientConfig: admissionregistrationv1beta1.WebhookClientConfig{ + Service: &admissionregistrationv1beta1.ServiceReference{ + Name: d.SchedulerServiceName(), + Namespace: d.namespace, + Path: &path, + }, + CABundle: d.controllerCABundle, // loaded earlier in reconcile() + }, + Rules: []admissionregistrationv1beta1.RuleWithOperations{ + { + Operations: []admissionregistrationv1beta1.OperationType{admissionregistrationv1beta1.Create}, + Rule: admissionregistrationv1beta1.Rule{ + APIGroups: []string{""}, + APIVersions: []string{"v1"}, + Resources: []string{"pods"}, + }, + }, + }, + }, + } +} + +func (d *pmemCSIDeployment) getSchedulerService(service *corev1.Service) { + d.getService(service, corev1.ServiceTypeClusterIP, 443) + service.Spec.Ports[0].TargetPort.IntVal = schedulerPort + service.Spec.Ports[0].NodePort = d.Spec.SchedulerNodePort + if d.Spec.SchedulerNodePort != 0 { + service.Spec.Type = corev1.ServiceTypeNodePort + } +} + func (d *pmemCSIDeployment) getControllerProvisionerRole(role *rbacv1.Role) { role.Rules = []rbacv1.PolicyRule{ { @@ -877,21 +1020,33 @@ func (d *pmemCSIDeployment) getControllerStatefulSet(ss *appsv1.StatefulSet) { true := true pmemcsiUser := int64(1000) + if ss.Labels == nil { + ss.Labels = map[string]string{} + } + ss.Labels["app.kubernetes.io/name"] = "pmem-csi-controller" + ss.Labels["app.kubernetes.io/part-of"] = "pmem-csi" + ss.Labels["app.kubernetes.io/component"] = "controller" + ss.Labels["app.kubernetes.io/instance"] = d.Name + ss.Spec = appsv1.StatefulSetSpec{ Replicas: &replicas, Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - "app": d.GetHyphenedName() + "-controller", + "app.kubernetes.io/name": "pmem-csi-controller", + "app.kubernetes.io/instance": d.Name, }, }, - ServiceName: d.WebhooksServiceAccountName(), + ServiceName: d.ControllerServiceName(), Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: joinMaps( d.Spec.Labels, map[string]string{ - "app": d.GetHyphenedName() + "-controller", - "pmem-csi.intel.com/webhook": "ignore", + "app.kubernetes.io/name": "pmem-csi-controller", + "app.kubernetes.io/part-of": "pmem-csi", + "app.kubernetes.io/component": "controller", + "app.kubernetes.io/instance": d.Name, + "pmem-csi.intel.com/webhook": "ignore", }), Annotations: map[string]string{ "pmem-csi.intel.com/scrape": "containers", @@ -903,7 +1058,7 @@ func (d *pmemCSIDeployment) getControllerStatefulSet(ss *appsv1.StatefulSet) { RunAsNonRoot: &true, RunAsUser: &pmemcsiUser, }, - ServiceAccountName: d.GetHyphenedName() + "-controller", + ServiceAccountName: d.GetHyphenedName() + "-webhooks", Containers: []corev1.Container{ d.getControllerContainer(), }, @@ -919,8 +1074,7 @@ func (d *pmemCSIDeployment) getControllerStatefulSet(ss *appsv1.StatefulSet) { Name: "webhook-cert", VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - // TODO: replace name, create secret via cert-manager - SecretName: d.GetHyphenedName() + "-registry-secrets", + SecretName: d.Spec.ControllerTLSSecret, }, }, }, @@ -932,10 +1086,20 @@ func (d *pmemCSIDeployment) getControllerStatefulSet(ss *appsv1.StatefulSet) { func (d *pmemCSIDeployment) getNodeDaemonSet(ds *appsv1.DaemonSet) { directoryOrCreate := corev1.HostPathDirectoryOrCreate + + if ds.Labels == nil { + ds.Labels = map[string]string{} + } + ds.Labels["app.kubernetes.io/name"] = "pmem-csi-node" + ds.Labels["app.kubernetes.io/part-of"] = "pmem-csi" + ds.Labels["app.kubernetes.io/component"] = "node" + ds.Labels["app.kubernetes.io/instance"] = d.Name + ds.Spec = appsv1.DaemonSetSpec{ Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - "app": d.GetHyphenedName() + "-node", + "app.kubernetes.io/name": "pmem-csi-node", + "app.kubernetes.io/instance": d.Name, }, }, Template: corev1.PodTemplateSpec{ @@ -943,8 +1107,11 @@ func (d *pmemCSIDeployment) getNodeDaemonSet(ds *appsv1.DaemonSet) { Labels: joinMaps( d.Spec.Labels, map[string]string{ - "app": d.GetHyphenedName() + "-node", - "pmem-csi.intel.com/webhook": "ignore", + "app.kubernetes.io/name": "pmem-csi-node", + "app.kubernetes.io/part-of": "pmem-csi", + "app.kubernetes.io/component": "node", + "app.kubernetes.io/instance": d.Name, + "pmem-csi.intel.com/webhook": "ignore", }), Annotations: map[string]string{ "pmem-csi.intel.com/scrape": "containers", @@ -1034,7 +1201,7 @@ func (d *pmemCSIDeployment) getControllerCommand() []string { fmt.Sprintf("-v=%d", d.Spec.LogLevel), "-logging-format=" + string(d.Spec.LogFormat), "-mode=webhooks", - "-schedulerListen=:8000", + fmt.Sprintf("-schedulerListen=:%d", schedulerPort), "-drivername=$(PMEM_CSI_DRIVER_NAME)", "-caFile=/certs/ca.crt", "-certFile=/certs/tls.crt", @@ -1178,7 +1345,7 @@ func (d *pmemCSIDeployment) getProvisionerContainer() corev1.Container { ImagePullPolicy: d.Spec.PullPolicy, Args: []string{ fmt.Sprintf("-v=%d", d.Spec.LogLevel), - "--csi-address=/csi/csi-controller.sock", + "--csi-address=/csi/csi.sock", "--feature-gates=Topology=true", "--node-deployment=true", "--strict-topology=true", @@ -1190,7 +1357,7 @@ func (d *pmemCSIDeployment) getProvisionerContainer() corev1.Container { }, Env: []corev1.EnvVar{ { - Name: "KUBE_NODE_NAME", + Name: "NODE_NAME", ValueFrom: &corev1.EnvVarSource{ FieldRef: &corev1.ObjectFieldSelector{ APIVersion: "v1", diff --git a/pkg/pmem-csi-operator/controller/deployment/deployment_controller.go b/pkg/pmem-csi-operator/controller/deployment/deployment_controller.go index 8c90ac6de7..bbf50d9da0 100644 --- a/pkg/pmem-csi-operator/controller/deployment/deployment_controller.go +++ b/pkg/pmem-csi-operator/controller/deployment/deployment_controller.go @@ -320,7 +320,7 @@ func (r *ReconcileDeployment) Reconcile(request reconcile.Request) (reconcile.Re l.V(3).Info("reconcile done", "duration", time.Since(startTime)) }() - d := &pmemCSIDeployment{dep, r.namespace, r.k8sVersion} + d := &pmemCSIDeployment{dep, r.namespace, r.k8sVersion, []byte{}} if err := d.reconcile(ctx, r); err != nil { l.Error(err, "reconcile failed") dep.Status.Phase = api.DeploymentPhaseFailed @@ -423,7 +423,7 @@ func (r *ReconcileDeployment) getDeploymentFor(obj metav1.Object) (*pmemCSIDeplo if err := deployment.EnsureDefaults(r.containerImage); err != nil { return nil, err } - return &pmemCSIDeployment{deployment, r.namespace, r.k8sVersion}, nil + return &pmemCSIDeployment{deployment, r.namespace, r.k8sVersion, []byte{}}, nil } } diff --git a/pkg/pmem-csi-operator/controller/deployment/deployment_controller_test.go b/pkg/pmem-csi-operator/controller/deployment/deployment_controller_test.go index 715c8ad89b..c9be65cbb2 100644 --- a/pkg/pmem-csi-operator/controller/deployment/deployment_controller_test.go +++ b/pkg/pmem-csi-operator/controller/deployment/deployment_controller_test.go @@ -23,7 +23,6 @@ import ( pmemcontroller "github.com/intel/pmem-csi/pkg/pmem-csi-operator/controller" "github.com/intel/pmem-csi/pkg/pmem-csi-operator/controller/deployment" "github.com/intel/pmem-csi/pkg/pmem-csi-operator/controller/deployment/testcases" - pmemtls "github.com/intel/pmem-csi/pkg/pmem-csi-operator/pmem-tls" "github.com/intel/pmem-csi/pkg/version" "github.com/intel/pmem-csi/test/e2e/operator/validate" @@ -46,6 +45,7 @@ import ( ) type pmemDeployment struct { + // input parameters for test name string deviceMode string logLevel uint16 @@ -55,8 +55,15 @@ type pmemDeployment struct { nodeCPU, nodeMemory string provisionerCPU, provisionerMemory string nodeRegistarCPU, nodeRegistrarMemory string - caCert, regCert, regKey, ncCert, ncKey []byte + controllerTLSSecret string + mutatePods api.MutatePods + schedulerNodePort int32 kubeletDir string + + objects []runtime.Object + + // expected result + expectFailure bool } func getDeployment(d *pmemDeployment) *api.PmemCSIDeployment { @@ -74,15 +81,19 @@ func getDeployment(d *pmemDeployment) *api.PmemCSIDeployment { // TODO (?): embed DeploymentSpec inside pmemDeployment instead of splitting it up into individual values. // The entire copying block below then collapses into a single line. - dep.Spec = api.DeploymentSpec{} + dep.Spec = api.DeploymentSpec{ + DeviceMode: api.DeviceMode(d.deviceMode), + LogLevel: d.logLevel, + LogFormat: api.LogFormat(d.logFormat), + Image: d.image, + PullPolicy: corev1.PullPolicy(d.pullPolicy), + ProvisionerImage: d.provisionerImage, + NodeRegistrarImage: d.registrarImage, + ControllerTLSSecret: d.controllerTLSSecret, + MutatePods: d.mutatePods, + SchedulerNodePort: d.schedulerNodePort, + } spec := &dep.Spec - spec.DeviceMode = api.DeviceMode(d.deviceMode) - spec.LogLevel = d.logLevel - spec.LogFormat = api.LogFormat(d.logFormat) - spec.Image = d.image - spec.PullPolicy = corev1.PullPolicy(d.pullPolicy) - spec.ProvisionerImage = d.provisionerImage - spec.NodeRegistrarImage = d.registrarImage if d.controllerCPU != "" || d.controllerMemory != "" { spec.ControllerDriverResources = &corev1.ResourceRequirements{ Requests: corev1.ResourceList{ @@ -215,12 +226,12 @@ type testContext struct { k8sVersion version.Version } -func newTestContext(t *testing.T, k8sVersion version.Version) *testContext { +func newTestContext(t *testing.T, k8sVersion version.Version, initObjs ...runtime.Object) *testContext { ctx := logger.Set(context.Background(), testinglogger.New(t)) tc := &testContext{ ctx: ctx, t: t, - c: newTestClient(), + c: newTestClient(initObjs...), cs: cgfake.NewSimpleClientset(), resourceVersions: map[string]string{}, k8sVersion: k8sVersion, @@ -259,14 +270,27 @@ func (tc *testContext) UnsetEventWatcher() { } } +func createSecret(name, namespace string, data map[string][]byte) *corev1.Secret { + return &corev1.Secret{ + TypeMeta: metav1.TypeMeta{ + APIVersion: corev1.SchemeGroupVersion.String(), + Kind: "Secret", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Data: data, + } +} + func TestDeploymentController(t *testing.T) { err := apis.AddToScheme(scheme.Scheme) require.NoError(t, err, "add api schema") - t.Skip("TODO: add distributed provisioning to operator") testIt := func(t *testing.T, testK8sVersion version.Version) { - setup := func(t *testing.T) *testContext { - return newTestContext(t, testK8sVersion) + setup := func(t *testing.T, initObjs ...runtime.Object) *testContext { + return newTestContext(t, testK8sVersion, initObjs...) } teardown := func(tc *testContext) { @@ -320,29 +344,17 @@ func TestDeploymentController(t *testing.T) { t.Parallel() - t.Run("deployment with defaults", func(t *testing.T) { - tc := setup(t) - defer teardown(tc) - d := &pmemDeployment{ - name: "test-deployment", - } - - dep := getDeployment(d) - - err := tc.c.Create(tc.ctx, dep) - require.NoError(t, err, "failed to create deployment") - - testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) - validateConditions(tc, d.name, map[api.DeploymentConditionType]corev1.ConditionStatus{ - api.DriverDeployed: corev1.ConditionTrue, - }) - }) + dataOkay := map[string][]byte{ + api.TLSSecretCA: []byte("ca"), + api.TLSSecretKey: []byte("key"), + api.TLSSecretCert: []byte("cert"), + } - t.Run("deployment with explicit values", func(t *testing.T) { - tc := setup(t) - defer teardown(tc) - d := &pmemDeployment{ + cases := map[string]pmemDeployment{ + "deployment with defaults": pmemDeployment{ + name: "test-deployment", + }, + "deployment with explicit values": pmemDeployment{ name: "test-deployment", image: "test-driver:v0.0.0", provisionerImage: "test-provisioner-image:v0.0.0", @@ -355,19 +367,85 @@ func TestDeploymentController(t *testing.T) { nodeCPU: "1000m", nodeMemory: "500Mi", kubeletDir: "/some/directory", - } + }, + "invalid device mode": pmemDeployment{ + name: "test-driver-modes", + deviceMode: "foobar", + expectFailure: true, + }, + "LVM mode": pmemDeployment{ + name: "test-driver-modes", + deviceMode: "lvm", + }, + "direct mode": pmemDeployment{ + name: "test-driver-modes", + deviceMode: "direct", + }, + "with controller, no secret": pmemDeployment{ + name: "test-controller", + controllerTLSSecret: "controller-secret", + expectFailure: true, + }, + "with controller, wrong secret content": pmemDeployment{ + name: "test-controller", + controllerTLSSecret: "controller-secret", + objects: []runtime.Object{createSecret("controller-secret", testNamespace, nil)}, + expectFailure: true, + }, + "with controller, secret okay": pmemDeployment{ + name: "test-controller", + controllerTLSSecret: "controller-secret", + objects: []runtime.Object{createSecret("controller-secret", testNamespace, dataOkay)}, + }, + "controller, no mutate": pmemDeployment{ + name: "test-controller", + controllerTLSSecret: "controller-secret", + mutatePods: api.MutatePodsNever, + objects: []runtime.Object{createSecret("controller-secret", testNamespace, dataOkay)}, + }, + "controller, try mutate": pmemDeployment{ + name: "test-controller", + controllerTLSSecret: "controller-secret", + mutatePods: api.MutatePodsTry, + objects: []runtime.Object{createSecret("controller-secret", testNamespace, dataOkay)}, + }, + "controller, always mutate": pmemDeployment{ + name: "test-controller", + controllerTLSSecret: "controller-secret", + mutatePods: api.MutatePodsAlways, + objects: []runtime.Object{createSecret("controller-secret", testNamespace, dataOkay)}, + }, + "controller, port 31000": pmemDeployment{ + name: "test-controller", + controllerTLSSecret: "controller-secret", + schedulerNodePort: 31000, + objects: []runtime.Object{createSecret("controller-secret", testNamespace, dataOkay)}, + }, + } - dep := getDeployment(d) - err := tc.c.Create(tc.ctx, dep) - require.NoError(t, err, "failed to create deployment") + for name, d := range cases { + d := d + t.Run(name, func(t *testing.T) { + tc := setup(t, d.objects...) + defer teardown(tc) + dep := getDeployment(&d) - // Reconcile now should change Phase to running - testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) - validateConditions(tc, d.name, map[api.DeploymentConditionType]corev1.ConditionStatus{ - api.DriverDeployed: corev1.ConditionTrue, + err := tc.c.Create(tc.ctx, dep) + require.NoError(t, err, "failed to create deployment") + + if d.expectFailure { + testReconcilePhase(t, tc.rc, tc.c, d.name, true, true, api.DeploymentPhaseFailed) + validateEvents(tc, dep, []string{api.EventReasonNew, api.EventReasonFailed}) + validateConditions(tc, d.name, map[api.DeploymentConditionType]corev1.ConditionStatus{}) + } else { + testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) + validateDriver(tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) + validateConditions(tc, d.name, map[api.DeploymentConditionType]corev1.ConditionStatus{ + api.DriverDeployed: corev1.ConditionTrue, + }) + } }) - }) + } t.Run("multiple deployments", func(t *testing.T) { tc := setup(t) @@ -400,194 +478,6 @@ func TestDeploymentController(t *testing.T) { validateConditions(tc, d2.name, conditions) }) - t.Run("invalid device mode", func(t *testing.T) { - tc := setup(t) - defer teardown(tc) - d := &pmemDeployment{ - name: "test-driver-modes", - deviceMode: "foobar", - } - - dep := getDeployment(d) - - err := tc.c.Create(tc.ctx, dep) - require.NoError(t, err, "failed to create deployment") - // Deployment should failed with an error - testReconcilePhase(t, tc.rc, tc.c, d.name, true, true, api.DeploymentPhaseFailed) - validateEvents(tc, dep, []string{api.EventReasonNew, api.EventReasonFailed}) - validateConditions(tc, d.name, map[api.DeploymentConditionType]corev1.ConditionStatus{}) - }) - - t.Run("LVM mode", func(t *testing.T) { - tc := setup(t) - d := &pmemDeployment{ - name: "test-driver-modes", - deviceMode: "lvm", - } - - dep := getDeployment(d) - - err := tc.c.Create(tc.ctx, dep) - require.NoError(t, err, "failed to create deployment") - testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) - validateConditions(tc, d.name, map[api.DeploymentConditionType]corev1.ConditionStatus{ - api.DriverDeployed: corev1.ConditionTrue, - }) - }) - - t.Run("direct mode", func(t *testing.T) { - tc := setup(t) - d := &pmemDeployment{ - name: "test-driver-modes", - deviceMode: "direct", - } - - dep := getDeployment(d) - - err := tc.c.Create(tc.ctx, dep) - require.NoError(t, err, "failed to create deployment") - testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) - validateConditions(tc, d.name, map[api.DeploymentConditionType]corev1.ConditionStatus{ - api.DriverDeployed: corev1.ConditionTrue, - }) - }) - - t.Run("provided private keys", func(t *testing.T) { - tc := setup(t) - defer teardown(tc) - // Generate private key - regKey, err := pmemtls.NewPrivateKey() - require.NoError(t, err, "Failed to generate a private key: %v", err) - - encodedKey := pmemtls.EncodeKey(regKey) - - d := &pmemDeployment{ - name: "test-deployment", - regKey: encodedKey, - } - dep := getDeployment(d) - err = tc.c.Create(tc.ctx, dep) - require.NoError(t, err, "failed to create deployment") - - // First deployment expected to be successful - testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) - validateConditions(tc, d.name, map[api.DeploymentConditionType]corev1.ConditionStatus{ - api.DriverDeployed: corev1.ConditionTrue, - }) - }) - - t.Run("provided private keys and certificates", func(t *testing.T) { - tc := setup(t) - defer teardown(tc) - ca, err := pmemtls.NewCA(nil, nil) - require.NoError(t, err, "failed to instantiate CA") - - regKey, err := pmemtls.NewPrivateKey() - require.NoError(t, err, "failed to generate a private key: %v", err) - regCert, err := ca.GenerateCertificate("pmem-registry", regKey.Public()) - require.NoError(t, err, "failed to sign registry key") - - ncKey, err := pmemtls.NewPrivateKey() - require.NoError(t, err, "failed to generate a private key: %v", err) - ncCert, err := ca.GenerateCertificate("pmem-node-controller", ncKey.Public()) - require.NoError(t, err, "failed to sign node controller key") - - d := &pmemDeployment{ - name: "test-deployment", - caCert: ca.EncodedCertificate(), - regKey: pmemtls.EncodeKey(regKey), - regCert: pmemtls.EncodeCert(regCert), - ncKey: pmemtls.EncodeKey(ncKey), - ncCert: pmemtls.EncodeCert(ncCert), - } - dep := getDeployment(d) - err = tc.c.Create(tc.ctx, dep) - require.NoError(t, err, "failed to create deployment") - - // First deployment expected to be successful - testReconcilePhase(t, tc.rc, tc.c, d.name, false, false, api.DeploymentPhaseRunning) - validateDriver(tc, dep, []string{api.EventReasonNew, api.EventReasonRunning}, false) - validateConditions(tc, d.name, map[api.DeploymentConditionType]corev1.ConditionStatus{ - api.DriverDeployed: corev1.ConditionTrue, - }) - }) - - t.Run("invalid private keys and certificates", func(t *testing.T) { - tc := setup(t) - defer teardown(tc) - ca, err := pmemtls.NewCA(nil, nil) - require.NoError(t, err, "faield to instantiate CA") - - regKey, err := pmemtls.NewPrivateKey() - require.NoError(t, err, "failed to generate a private key: %v", err) - regCert, err := ca.GenerateCertificate("invalid-registry", regKey.Public()) - require.NoError(t, err, "failed to sign registry key") - - ncKey, err := pmemtls.NewPrivateKey() - require.NoError(t, err, "failed to generate a private key: %v", err) - ncCert, err := ca.GenerateCertificate("invalid-node-controller", ncKey.Public()) - require.NoError(t, err, "failed to sign node key") - - d := &pmemDeployment{ - name: "test-deployment-cert-invalid", - caCert: ca.EncodedCertificate(), - regKey: pmemtls.EncodeKey(regKey), - regCert: pmemtls.EncodeCert(regCert), - ncKey: pmemtls.EncodeKey(ncKey), - ncCert: pmemtls.EncodeCert(ncCert), - } - dep := getDeployment(d) - err = tc.c.Create(tc.ctx, dep) - require.NoError(t, err, "failed to create deployment") - - testReconcilePhase(t, tc.rc, tc.c, d.name, true, true, api.DeploymentPhaseFailed) - validateEvents(tc, dep, []string{api.EventReasonNew, api.EventReasonFailed}) - validateConditions(tc, d.name, map[api.DeploymentConditionType]corev1.ConditionStatus{ - api.DriverDeployed: corev1.ConditionFalse, - }) - }) - - t.Run("expired certificates", func(t *testing.T) { - tc := setup(t) - defer teardown(tc) - oneDayAgo := time.Now().Add(-24 * time.Hour) - oneMinuteAgo := time.Now().Add(-1 * time.Minute) - - ca, err := pmemtls.NewCA(nil, nil) - require.NoError(t, err, "faield to instantiate CA") - - regKey, err := pmemtls.NewPrivateKey() - require.NoError(t, err, "failed to generate a private key: %v", err) - regCert, err := ca.GenerateCertificateWithDuration("pmem-registry", oneDayAgo, oneMinuteAgo, regKey.Public()) - require.NoError(t, err, "failed to registry sign key") - - ncKey, err := pmemtls.NewPrivateKey() - require.NoError(t, err, "failed to generate a private key: %v", err) - ncCert, err := ca.GenerateCertificateWithDuration("pmem-node-controller", oneDayAgo, oneMinuteAgo, ncKey.Public()) - require.NoError(t, err, "failed to sign node controller key") - - d := &pmemDeployment{ - name: "test-deployment-cert-expired", - caCert: ca.EncodedCertificate(), - regKey: pmemtls.EncodeKey(regKey), - regCert: pmemtls.EncodeCert(regCert), - ncKey: pmemtls.EncodeKey(ncKey), - ncCert: pmemtls.EncodeCert(ncCert), - } - dep := getDeployment(d) - err = tc.c.Create(tc.ctx, dep) - require.NoError(t, err, "failed to create deployment") - - testReconcilePhase(t, tc.rc, tc.c, d.name, true, true, api.DeploymentPhaseFailed) - validateEvents(tc, dep, []string{api.EventReasonNew, api.EventReasonFailed}) - validateConditions(tc, d.name, map[api.DeploymentConditionType]corev1.ConditionStatus{ - api.DriverDeployed: corev1.ConditionFalse, - }) - }) - t.Run("modified deployment under reconcile", func(t *testing.T) { tc := setup(t) defer teardown(tc) diff --git a/pkg/pmem-csi-operator/pmem-tls/tls.go b/pkg/pmem-csi-operator/pmem-tls/tls.go deleted file mode 100644 index 1e694b66f2..0000000000 --- a/pkg/pmem-csi-operator/pmem-tls/tls.go +++ /dev/null @@ -1,251 +0,0 @@ -/* -Copyright 2020 The Kubernetes Authors. - -SPDX-License-Identifier: Apache-2.0 -*/ - -package pmemtls - -import ( - "crypto" - "errors" - "math" - "runtime" - "time" - - "crypto/rand" - "crypto/rsa" - "crypto/tls" - "crypto/x509" - "encoding/pem" - "math/big" -) - -const ( - rasKeySize = 3072 -) - -// NewPrivateKey generate an rsa private key -func NewPrivateKey() (*rsa.PrivateKey, error) { - key, err := rsa.GenerateKey(rand.Reader, rasKeySize) - if err != nil { - return nil, err - } - - runtime.SetFinalizer(key, func(k *rsa.PrivateKey) { - // Zero key after usage - *k = rsa.PrivateKey{} - }) - return key, nil -} - -// EncodeKey returns PEM encoding of give private key -func EncodeKey(key *rsa.PrivateKey) []byte { - if key == nil { - return []byte{} - } - return pem.EncodeToMemory(&pem.Block{ - Type: "RSA PRIVATE KEY", - Bytes: x509.MarshalPKCS1PrivateKey(key), - }) -} - -// DecodeKey returns the decoded private key of given encodedKey -func DecodeKey(encodedKey []byte) (*rsa.PrivateKey, error) { - block, _ := pem.Decode(encodedKey) - - key, err := x509.ParsePKCS1PrivateKey(block.Bytes) - wipe(block.Bytes) - if err != nil { - return nil, err - } - - runtime.SetFinalizer(key, func(k *rsa.PrivateKey) { - // Zero key after usage - *k = rsa.PrivateKey{} - }) - - return key, nil -} - -// EncodeCert returns PEM encoding of given cert -func EncodeCert(cert *x509.Certificate) []byte { - if cert == nil { - return []byte{} - } - return pem.EncodeToMemory(&pem.Block{ - Type: "CERTIFICATE", - Bytes: cert.Raw, - }) -} - -// DecodeCert return the decoded certificate of given encodedCert -func DecodeCert(encodedCert []byte) (*x509.Certificate, error) { - block, _ := pem.Decode(encodedCert) - - cert, err := x509.ParseCertificate(block.Bytes) - wipe(block.Bytes) - - if err != nil { - return nil, err - } - runtime.SetFinalizer(cert, func(c *x509.Certificate) { - *c = x509.Certificate{} - }) - - return cert, nil -} - -func wipe(arr []byte) { - for i := range arr { - arr[i] = 0 - } -} - -// CA type representation for a self-signed certificate authority -type CA struct { - prKey *rsa.PrivateKey - cert *x509.Certificate -} - -// NewCA creates a new CA object for given CA certificate and private key. -// If both of caCert and key are nil, generates a new private key and -// a self-signed certificate -func NewCA(caCert *x509.Certificate, key *rsa.PrivateKey) (*CA, error) { - var err error - prKey := key - cert := caCert - if cert == nil { - if prKey == nil { - prKey, err = NewPrivateKey() - if err != nil { - return nil, err - } - } - - cert, err = NewCACertificate(prKey) - if err != nil { - return nil, err - } - } else if prKey == nil { - return nil, errors.New("certificate is provided but not the associated private key is missing") - } else { - requiredKeyUsages := x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign - if cert.KeyUsage&requiredKeyUsages != requiredKeyUsages { - return nil, errors.New("provided certificates can not be used as CA certificate as" + - " is not usable for encrypting or signing other keys") - } - - if cert.IsCA != true { - return nil, errors.New("provided certificate is not a ca certificate") - } - } - - ca := &CA{ - prKey: prKey, - cert: cert, - } - return ca, nil -} - -// PrivateKey returns private key used -func (ca *CA) PrivateKey() *rsa.PrivateKey { - return ca.prKey -} - -// Certificate returns root ca certificate used -func (ca *CA) Certificate() *x509.Certificate { - return ca.cert -} - -// EncodedKey returns encoded private key used -func (ca *CA) EncodedKey() []byte { - return EncodeKey(ca.prKey) -} - -// EncodedCertificate returns encoded root ca certificate used -func (ca *CA) EncodedCertificate() []byte { - return EncodeCert(ca.cert) -} - -// GenerateCertificate returns a new certificate signed for given public key. -func (ca *CA) GenerateCertificate(cn string, key crypto.PublicKey) (*x509.Certificate, error) { - return ca.generateCertificate(cn, ca.cert.NotBefore, time.Now().Add(time.Hour*24*365), key) -} - -// GenerateCertificateWithDuration returns a new certificate signed for given public key. -// The duration of this certificate is with in the given notBefore and notAfter bounds. -// Intended use of this API is only by tests -func (ca *CA) GenerateCertificateWithDuration(cn string, notBefore, notAfter time.Time, key crypto.PublicKey) (*x509.Certificate, error) { - return ca.generateCertificate(cn, notAfter, notAfter, key) -} - -// NewCACertificate returns a self-signed certificate used as certificate authority -func NewCACertificate(key *rsa.PrivateKey) (*x509.Certificate, error) { - max := new(big.Int).SetInt64(math.MaxInt64) - serial, err := rand.Int(rand.Reader, max) - if err != nil { - return nil, err - } - tmpl := &x509.Certificate{ - Version: tls.VersionTLS12, - SerialNumber: serial, - NotBefore: time.Now(), - NotAfter: time.Now().Add(time.Hour * 24 * 365).UTC(), - KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign, - IsCA: true, - BasicConstraintsValid: true, - DNSNames: []string{"pmem-csi", "ca"}, - } - certBytes, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, key.Public(), key) - *tmpl = x509.Certificate{} - if err != nil { - return nil, err - } - - cert, err := x509.ParseCertificate(certBytes) - if err != nil { - return nil, err - } - - runtime.SetFinalizer(cert, func(c *x509.Certificate) { - *c = x509.Certificate{} - }) - - return cert, nil -} - -func (ca *CA) generateCertificate(cn string, notBefore, notAfter time.Time, key crypto.PublicKey) (*x509.Certificate, error) { - max := new(big.Int).SetInt64(math.MaxInt64) - serial, err := rand.Int(rand.Reader, max) - if err != nil { - return nil, err - } - - tmpl := &x509.Certificate{ - Version: tls.VersionTLS12, - SerialNumber: serial, - NotBefore: notBefore, - NotAfter: notAfter, - KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature, - ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth}, - DNSNames: []string{cn}, - } - - certBytes, err := x509.CreateCertificate(rand.Reader, tmpl, ca.cert, key, ca.prKey) - *tmpl = x509.Certificate{} - if err != nil { - return nil, err - } - - cert, err := x509.ParseCertificate(certBytes) - if err != nil { - return nil, err - } - - runtime.SetFinalizer(cert, func(c *x509.Certificate) { - *c = x509.Certificate{} - }) - - return cert, nil -} diff --git a/pkg/pmem-csi-operator/pmem-tls/tls_test.go b/pkg/pmem-csi-operator/pmem-tls/tls_test.go deleted file mode 100644 index 497f9ff919..0000000000 --- a/pkg/pmem-csi-operator/pmem-tls/tls_test.go +++ /dev/null @@ -1,143 +0,0 @@ -/* -Copyright 2020 The Kubernetes Authors. - -SPDX-License-Identifier: Apache-2.0 -*/ -package pmemtls_test - -import ( - "crypto/rand" - "crypto/rsa" - "crypto/tls" - "crypto/x509" - "crypto/x509/pkix" - "math" - "math/big" - "testing" - "time" - - pmemtls "github.com/intel/pmem-csi/pkg/pmem-csi-operator/pmem-tls" - "github.com/stretchr/testify/assert" -) - -func generateSelfSignedCertificate(key *rsa.PrivateKey, keyUsage x509.KeyUsage, isCA bool) (*x509.Certificate, error) { - max := new(big.Int).SetInt64(math.MaxInt64) - serial, err := rand.Int(rand.Reader, max) - if err != nil { - return nil, err - } - tmpl := &x509.Certificate{ - Version: tls.VersionTLS12, - SerialNumber: serial, - NotBefore: time.Now(), - NotAfter: time.Now().Add(time.Hour * 24 * 365), - KeyUsage: keyUsage, - IsCA: isCA, - BasicConstraintsValid: true, - Subject: pkix.Name{ - CommonName: "test root certificate authority", - }, - } - certBytes, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, key.Public(), key) - if err != nil { - return nil, err - } - - return x509.ParseCertificate(certBytes) -} - -func TestPmemTLS(t *testing.T) { - t.Run("key", func(t *testing.T) { - // create keys - key, err := pmemtls.NewPrivateKey() - assert.Empty(t, err, "Key creation failed with error: %v", err) - assert.NotEmpty(t, key, "nil key") - - // Encode-decode - bytes := pmemtls.EncodeKey(key) - assert.NotEqual(t, 0, len(bytes), "Zero length") - decodedKey, err := pmemtls.DecodeKey(bytes) - assert.Empty(t, err, "Failed to decode key: %v", err) - assert.Equal(t, key, decodedKey, "Mismatched key after decode: %+v", decodedKey) - }) - - t.Run("ca with defaults", func(t *testing.T) { - ca, err := pmemtls.NewCA(nil, nil) - - assert.Empty(t, err, "CA creation with defaults failed", err) - assert.NotEmpty(t, ca, "nil CA") - assert.NotEmpty(t, ca.PrivateKey(), "CA: empty private key") - assert.NotEmpty(t, ca.Certificate(), "CA: empty root certificate key") - }) - - t.Run("ca with invalid arguments", func(t *testing.T) { - cakey, err := pmemtls.NewPrivateKey() - assert.Empty(t, err, "failed to create new key") - - keyUsage := x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature - cacert, err := generateSelfSignedCertificate(cakey, keyUsage, true) - - _, err = pmemtls.NewCA(cacert, cakey) - assert.NotEmpty(t, err, "expected an error when no private key provided") - - keyUsage = x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign - cacert, err = generateSelfSignedCertificate(cakey, keyUsage, false) - - _, err = pmemtls.NewCA(cacert, nil) - assert.NotEmpty(t, err, "expected an error when no private key provided") - - _, err = pmemtls.NewCA(cacert, cakey) - assert.NotEmpty(t, err, "expected an error when provided certificate is not for CA") - }) - - t.Run("ca with provided private key", func(t *testing.T) { - cakey, err := rsa.GenerateKey(rand.Reader, 1024) - assert.Empty(t, err, "failed to create new key") - - encKey := pmemtls.EncodeKey(cakey) - assert.NotEmpty(t, encKey, "Encoding key failed") - - ca, err := pmemtls.NewCA(nil, cakey) - assert.Empty(t, err, "CA creation with pre-provisioned key failed") - assert.NotEmpty(t, ca, "nil CA") - assert.Equal(t, cakey, ca.PrivateKey(), "CA: mismatched private key") - assert.Equal(t, encKey, ca.EncodedKey(), "CA: mismatched encoded key") - assert.NotEmpty(t, ca.Certificate(), "CA: empty root certificate key") - - prKey, err := pmemtls.NewPrivateKey() - assert.Empty(t, err, "Failed to create private key") - - cert, err := ca.GenerateCertificate("Some name", prKey.Public()) - assert.Empty(t, err, "Failed to sign certificate") - assert.NotEmpty(t, cert, "Generated certificate is empty") - }) - - t.Run("ca with provided certificate and key", func(t *testing.T) { - cakey, err := pmemtls.NewPrivateKey() - assert.Empty(t, err, "failed to create new key") - - keyUsage := x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign - cacert, err := generateSelfSignedCertificate(cakey, keyUsage, true) - - ca, err := pmemtls.NewCA(cacert, cakey) - assert.Empty(t, err, "CA creation with pre-provisioned key failed") - assert.NotEmpty(t, ca, "nil CA") - assert.Equal(t, cakey, ca.PrivateKey(), "CA: mismatched private key") - assert.Equal(t, cacert, ca.Certificate(), "CA: empty root certificate key") - - prKey, err := pmemtls.NewPrivateKey() - assert.Empty(t, err, "Failed to create private key") - - // CA signing truncates noano seconds - validity := time.Now().Add(time.Hour * 24 * 365).UTC().Truncate(time.Second) - - cert, err := ca.GenerateCertificate("test-cert", prKey.Public()) - assert.Empty(t, err, "Failed to sign certificate") - assert.NotEmpty(t, cert, "Generated certificate is empty") - - isValid := cert.NotAfter.Equal(validity) || cert.NotAfter.After(validity) - assert.Equal(t, isValid, true, "invalid certificate validity(%v) expected least %v", cert.NotAfter, validity) - - assert.Contains(t, cert.DNSNames, "test-cert", "mismatched common name") - }) -} diff --git a/pkg/scheduler/mutate_pod.go b/pkg/scheduler/mutate_pod.go index fea3ef3481..62d0b1f17e 100644 --- a/pkg/scheduler/mutate_pod.go +++ b/pkg/scheduler/mutate_pod.go @@ -27,8 +27,9 @@ import ( ) const ( - // Resource is the resource that will trigger the scheduler extender. - Resource = "pmem-csi.intel.com/scheduler" + // resourceSuffix is the part which gets added to the CSI driver name to + // create the extended resource name that will trigger the scheduler extender. + resourceSuffix = "/scheduler" ) // Handle implements admission.Handler interface. @@ -70,14 +71,15 @@ func (s scheduler) Handle(ctx context.Context, req admission.Request) admission. ctnr := &pod.Spec.Containers[0] quantity := resource.NewQuantity(1, resource.DecimalSI) + resource := corev1.ResourceName(s.driverName + resourceSuffix) if ctnr.Resources.Requests == nil { ctnr.Resources.Requests = corev1.ResourceList{} } - ctnr.Resources.Requests[Resource] = *quantity + ctnr.Resources.Requests[resource] = *quantity if ctnr.Resources.Limits == nil { ctnr.Resources.Limits = corev1.ResourceList{} } - ctnr.Resources.Limits[Resource] = *quantity + ctnr.Resources.Limits[resource] = *quantity marshaledPod, err := json.Marshal(pod) if err != nil { diff --git a/test/e2e/deploy/deploy.go b/test/e2e/deploy/deploy.go index 0a103206d3..28e95e8c29 100644 --- a/test/e2e/deploy/deploy.go +++ b/test/e2e/deploy/deploy.go @@ -97,9 +97,7 @@ func WaitForOLM(c *Cluster, namespace string) *v1.Pod { // - controller service is up and running // - all nodes have registered // - for testing deployments: TCP CSI endpoints are ready -// -// "name" is the common prefix used for objects of the deployment. -func WaitForPMEMDriver(c *Cluster, name string, d *Deployment) (metricsURL string) { +func WaitForPMEMDriver(c *Cluster, d *Deployment) (metricsURL string) { ticker := time.NewTicker(time.Second) defer ticker.Stop() info := time.NewTicker(time.Minute) @@ -107,6 +105,17 @@ func WaitForPMEMDriver(c *Cluster, name string, d *Deployment) (metricsURL strin deadline, cancel := context.WithTimeout(context.Background(), 10*time.Minute) defer cancel() + // "name" is the common prefix used for objects of the deployment. + // If we are testing against an older version of PMEM-CSI, then it is always "pmem-csi". + // PMEM-CSI 0.9.0 derives it from the CSI driver name. + var name string + switch d.Version { + case "0.7", "0.8": + name = "pmem-csi" + default: + name = strings.ReplaceAll(d.DriverName, ".", "-") + } + if waitForPMEMDriverTimedOut { // Abort early. skipper.Skipf("installing PMEM-CSI driver during previous test was too slow") @@ -133,58 +142,120 @@ func WaitForPMEMDriver(c *Cluster, name string, d *Deployment) (metricsURL strin deadline, cancel := context.WithTimeout(deadline, timeout) defer cancel() - // The controller service must be defined. - port, err := c.GetServicePort(deadline, name+"-metrics", d.Namespace) - if err != nil { - return fmt.Errorf("get port for service %s-metrics in namespace %s: %v", name, d.Namespace, err) - } + if d.HasController { + // The controller service must be defined. + port, err := c.GetServicePort(deadline, name+"-metrics", d.Namespace) + if err != nil { + return fmt.Errorf("get port for service %s-metrics in namespace %s: %v", name, d.Namespace, err) + } - // We can connect to it and get metrics data. - scheme := "http" - if d.Version == "0.7" { - scheme = "https" - } - metricsURL = fmt.Sprintf("%s://%s:%d/metrics", scheme, c.NodeIP(0), port) - client := &http.Client{ - Transport: &tr, - Timeout: timeout, - } - resp, err := client.Get(metricsURL) - if err != nil { - return fmt.Errorf("get controller metrics: %v", err) - } - if resp.StatusCode != 200 { - body, _ := ioutil.ReadAll(resp.Body) - suffix := "" - if len(body) > 0 { - suffix = "\n" + string(body) + // We can connect to it and get metrics data. + scheme := "http" + if d.Version == "0.7" { + scheme = "https" + } + metricsURL = fmt.Sprintf("%s://%s:%d/metrics", scheme, c.NodeIP(0), port) + client := &http.Client{ + Transport: &tr, + Timeout: timeout, + } + resp, err := client.Get(metricsURL) + if err != nil { + return fmt.Errorf("get controller metrics: %v", err) + } + if resp.StatusCode != 200 { + body, _ := ioutil.ReadAll(resp.Body) + suffix := "" + if len(body) > 0 { + suffix = "\n" + string(body) + } + return fmt.Errorf("HTTP GET %s failed: %d%s", metricsURL, resp.StatusCode, suffix) } - return fmt.Errorf("HTTP GET %s failed: %d%s", metricsURL, resp.StatusCode, suffix) - } - // Parse and check number of connected nodes. Dump the - // version number while we are at it. - parser := expfmt.TextParser{} - metrics, err := parser.TextToMetricFamilies(resp.Body) - if err != nil { - return fmt.Errorf("parse metrics response: %v", err) - } - buildInfo, ok := metrics["build_info"] - if !ok { - return fmt.Errorf("expected build_info not found in metrics: %v", metrics) - } - if len(buildInfo.Metric) != 1 { - return fmt.Errorf("expected build_info to have one metric, got: %v", buildInfo.Metric) - } - buildMetric := buildInfo.Metric[0] - if len(buildMetric.Label) != 1 { - return fmt.Errorf("expected build_info to have one label, got: %v", buildMetric.Label) + // Check metrics data. + parser := expfmt.TextParser{} + metrics, err := parser.TextToMetricFamilies(resp.Body) + if err != nil { + return fmt.Errorf("parse metrics response: %v", err) + } + buildInfo, ok := metrics["build_info"] + if !ok { + return fmt.Errorf("expected build_info not found in metrics: %v", metrics) + } + if len(buildInfo.Metric) != 1 { + return fmt.Errorf("expected build_info to have one metric, got: %v", buildInfo.Metric) + } + buildMetric := buildInfo.Metric[0] + if len(buildMetric.Label) != 1 { + return fmt.Errorf("expected build_info to have one label, got: %v", buildMetric.Label) + } + label := buildMetric.Label[0] + if *label.Name != "version" { + return fmt.Errorf("expected build_info to contain a version label, got: %s", *label.Name) + } + version = *label.Value + + // With the older, centralized provisioning we + // can also check that the controller knows + // about all nodes. + switch d.Version { + case "0.7", "0.8": + pmemNodes, ok := metrics["pmem_nodes"] + if !ok { + return fmt.Errorf("expected pmem_nodes not found in metrics: %v", metrics) + } + + if len(pmemNodes.Metric) != 1 { + return fmt.Errorf("expected pmem_nodes to have one metric, got: %v", pmemNodes.Metric) + } + nodesMetric := pmemNodes.Metric[0] + actualNodes := int(*nodesMetric.Gauge.Value) + if actualNodes != c.NumNodes()-1 { + return fmt.Errorf("only %d of %d nodes have registered", actualNodes, c.NumNodes()-1) + } + } } - label := buildMetric.Label[0] - if *label.Name != "version" { - return fmt.Errorf("expected build_info to contain a version label, got: %s", *label.Name) + + // Check status of every node driver. This is crucial for 0.9.0 + // because this is no longer covered by the controller + // metrics check that was used previously. + switch d.Version { + case "0.7", "0.8": + // No need to test and doesn't have the necessary labels. + default: + pods, err := c.cs.CoreV1().Pods(d.Namespace).List(context.Background(), + metav1.ListOptions{ + LabelSelector: labels.Set{ + "app.kubernetes.io/instance": d.DriverName, + "app.kubernetes.io/component": "node", + }.String(), + }, + ) + if err != nil { + return fmt.Errorf("list node pods: %v", err) + } + if len(pods.Items) != c.NumNodes()-1 { + return fmt.Errorf("only %d of %d node driver pods exist", len(pods.Items), c.NumNodes()-1) + } + for _, pod := range pods.Items { + if !podIsReady(pod.Status) { + return fmt.Errorf("node driver pod %s on node %s is not ready", pod.Name, pod.Spec.NodeName) + } + csiNode, err := c.cs.StorageV1().CSINodes().Get(context.Background(), + pod.Spec.NodeName, + metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("get CSINode %s: %v", pod.Spec.NodeName, err) + } + if !driverHasRegistered(*csiNode, d.DriverName) { + return fmt.Errorf("PMEM-CSI driver %s not added to CSINode %+v yet", d.DriverName, csiNode) + } + + // It would be nice to check the metrics endpoint here, but reaching it from outside + // the cluster relies on port forwarding (tricky to set up) or some way to run curl + // inside the cluster (expensive because we would need to start a pod for it). + } } - version = *label.Value // Done for normal deployments. if !d.Testing { @@ -248,6 +319,27 @@ func WaitForPMEMDriver(c *Cluster, name string, d *Deployment) (metricsURL strin } } +func podIsReady(podStatus v1.PodStatus) bool { + if podStatus.Phase != v1.PodRunning { + return false + } + for _, condition := range podStatus.Conditions { + if condition.Type == v1.ContainersReady { + return condition.Status == v1.ConditionTrue + } + } + return false +} + +func driverHasRegistered(csiNode storagev1.CSINode, driverName string) bool { + for _, driver := range csiNode.Spec.Drivers { + if driver.Name == driverName { + return true + } + } + return false +} + // https://github.com/containerd/containerd/issues/4068 var containerdTaskError = regexp.MustCompile(`failed to (start|create) containerd task`) @@ -349,9 +441,8 @@ func RemoveObjects(c *Cluster, deployment *Deployment) error { } } - // We intentionally delete statefulset last because that is - // how FindDeployment will find it again if we don't manage to - // delete the entire deployment. Here we just scale it down + // We intentionally delete statefulset last because + // findDriver checks for it. Here we just scale it down // to trigger pod deletion. if list, err := c.cs.AppsV1().StatefulSets("").List(context.Background(), filter); !failure(err) { for _, object := range list.Items { @@ -506,17 +597,22 @@ func RemoveObjects(c *Cluster, deployment *Deployment) error { // Deployment contains some information about a some deployed PMEM-CSI component(s). // Those components can be a full driver installation and/or just the operator. type Deployment struct { - // HasDriver is true if the driver itself is running. The - // driver is reacting to the usual pmem-csi.intel.com driver - // name. + // HasDriver is true if the driver itself is running. HasDriver bool + // The CSI driver name that the driver is using. Usually + // pmem-csi.intel.com. + DriverName string + // HasOperator is true if the operator is running. HasOperator bool // HasOLM is true if the OLM(OperatorLifecycleManager) is running. HasOLM bool + // HasController is true if the controller part with the webhooks is enabled. + HasController bool + // Mode is the driver mode of the deployment. Mode api.DeviceMode @@ -584,19 +680,21 @@ func FindDeployment(c *Cluster) (*Deployment, error) { if operator != nil && driver != nil && operator.Name() != driver.Name() { return nil, fmt.Errorf("found two different deployments: %s and %s", operator.Name(), driver.Name()) } - if operator != nil { - return operator, nil - } + // findDriver is able to discover some additional information, so return that result + // if we have both. if driver != nil { return driver, nil } + if operator != nil { + return operator, nil + } return nil, nil } var imageVersion = regexp.MustCompile(`pmem-csi-driver(?:-test)?:v(\d+\.\d+)`) func findDriver(c *Cluster) (*Deployment, error) { - list, err := c.cs.AppsV1().StatefulSets("").List(context.Background(), metav1.ListOptions{LabelSelector: deploymentLabel}) + list, err := c.cs.AppsV1().DaemonSets("").List(context.Background(), metav1.ListOptions{LabelSelector: deploymentLabel}) if err != nil { return nil, err } @@ -611,6 +709,21 @@ func findDriver(c *Cluster) (*Deployment, error) { } deployment.Namespace = list.Items[0].Namespace + drivers, err := c.cs.StorageV1beta1().CSIDrivers().List(context.Background(), metav1.ListOptions{LabelSelector: deploymentLabel}) + if err != nil { + return nil, err + } + if len(drivers.Items) != 1 { + return nil, fmt.Errorf("expected one CSIDriver info, got: %v", drivers) + } + deployment.DriverName = drivers.Items[0].Name + + controllers, err := c.cs.AppsV1().StatefulSets("").List(context.Background(), metav1.ListOptions{LabelSelector: deploymentLabel}) + if err != nil { + return nil, fmt.Errorf("checking for StatefulSet: %v", err) + } + deployment.HasController = len(controllers.Items) > 0 + // Derive the version from the image tag. The annotation doesn't include it. // If the version matches what we are currently testing, then we skip // the version (i.e. "current version" == "no explicit version"). @@ -672,26 +785,34 @@ var allDeployments = []string{ "lvm-production", "direct-testing", "direct-production", - // TODO: add distributed provisioning to operator - // "operator", - // "operator-lvm-production", - // "operator-direct-production", // Uses kube-system, to ensure that deployment in a namespace also works. - // "olm", // operator installed by OLM + "operator", + // Uses second.pmem-csi.intel.com as driver name. + "operator-lvm-production", + // Uses kube-system, to ensure that deployment in a namespace also works, + // and *no* controller. + "operator-direct-production", + "olm", // operator installed by OLM } var deploymentRE = regexp.MustCompile(`^(operator|olm)?-?(\w*)?-?(testing|production)?-?([0-9\.]*)$`) // Parse the deployment name and sets fields accordingly. func Parse(deploymentName string) (*Deployment, error) { deployment := &Deployment{ - Namespace: "default", + Namespace: "default", + DriverName: "pmem-csi.intel.com", + HasController: true, } - if deploymentName == "operator" { + switch deploymentName { + case "operator": // Run the operator tests in a dedicated namespace // to cover the non-default namespace usecase deployment.Namespace = "operator-test" - } - if deploymentName == "operator-direct-production" { + case "operator-direct-production": deployment.Namespace = "kube-system" + // No secret available in that namespace. + deployment.HasController = false + case "operator-lvm-production": + deployment.DriverName = "second.pmem-csi.intel.com" } matches := deploymentRE.FindStringSubmatch(deploymentName) @@ -789,7 +910,7 @@ func EnsureDeploymentNow(f *framework.Framework, deployment *Deployment) { framework.Logf("reusing existing %s PMEM-CSI components", deployment.Name()) // Do some sanity checks on the running deployment before the test. if deployment.HasDriver { - WaitForPMEMDriver(c, "pmem-csi", deployment) + WaitForPMEMDriver(c, deployment) CheckPMEMDriver(c, deployment) } if deployment.HasOperator { @@ -907,7 +1028,7 @@ func EnsureDeploymentNow(f *framework.Framework, deployment *Deployment) { // We check for a running driver the same way at the moment, by directly // looking at the driver state. Long-term we want the operator to do that // checking itself. - WaitForPMEMDriver(c, "pmem-csi", deployment) + WaitForPMEMDriver(c, deployment) CheckPMEMDriver(c, deployment) } } @@ -915,7 +1036,7 @@ func EnsureDeploymentNow(f *framework.Framework, deployment *Deployment) { // GetDriverDeployment returns the spec for the driver deployment that is used // for deployments like operator-lvm-production. func (d *Deployment) GetDriverDeployment() api.PmemCSIDeployment { - return api.PmemCSIDeployment{ + dep := api.PmemCSIDeployment{ // TypeMeta is needed because // DefaultUnstructuredConverter does not add it for us. Is there a better way? TypeMeta: metav1.TypeMeta{ @@ -923,7 +1044,7 @@ func (d *Deployment) GetDriverDeployment() api.PmemCSIDeployment { Kind: "PmemCSIDeployment", }, ObjectMeta: metav1.ObjectMeta{ - Name: "pmem-csi", + Name: d.DriverName, Labels: map[string]string{ deploymentLabel: d.Label(), }, @@ -943,6 +1064,16 @@ func (d *Deployment) GetDriverDeployment() api.PmemCSIDeployment { }, }, } + + if d.HasController { + // The controller is enabled, using a secret that must have + // been prepared beforehand. + dep.Spec.ControllerTLSSecret = strings.ReplaceAll(d.DriverName, ".", "-") + "-controller-secret" + dep.Spec.MutatePods = api.MutatePodsAlways + dep.Spec.SchedulerNodePort = 32000 // TODO: read from test-config.sh + } + + return dep } // DeleteAllPods deletes all currently running pods that belong to the deployment. diff --git a/test/e2e/driver/driver.go b/test/e2e/driver/driver.go index f626ccb14a..008ae88d0b 100644 --- a/test/e2e/driver/driver.go +++ b/test/e2e/driver/driver.go @@ -50,6 +50,11 @@ type DynamicDriver interface { WithParameters(parameters map[string]string) DynamicDriver } +// CSIDriver exposes the CSI driver name, something that is normally hidden. +type CSIDriver interface { + GetCSIDriverName(config *testsuites.PerTestConfig) string +} + func New(name, csiDriverName string, fsTypes []string, scManifests map[string]string) testsuites.TestDriver { if fsTypes == nil { fsTypes = []string{"", "ext4", "xfs"} diff --git a/test/e2e/metrics/metrics.go b/test/e2e/metrics/metrics.go index 2129d16526..b3352ef632 100644 --- a/test/e2e/metrics/metrics.go +++ b/test/e2e/metrics/metrics.go @@ -50,7 +50,7 @@ var _ = deploy.Describe("direct-testing", "direct-testing-metrics", "", func(d * BeforeEach(func() { cluster, err := deploy.NewCluster(f.ClientSet, f.DynamicClient) framework.ExpectNoError(err, "get cluster information") - metricsURL = deploy.WaitForPMEMDriver(cluster, "pmem-csi", d) + metricsURL = deploy.WaitForPMEMDriver(cluster, d) }) It("works", func() { diff --git a/test/e2e/operator/deployment_api.go b/test/e2e/operator/deployment_api.go index 18f8e79ef9..39fec2a343 100644 --- a/test/e2e/operator/deployment_api.go +++ b/test/e2e/operator/deployment_api.go @@ -21,6 +21,7 @@ import ( "github.com/intel/pmem-csi/test/e2e/deploy" "github.com/intel/pmem-csi/test/e2e/operator/validate" + admissionregistrationv1beta1 "k8s.io/api/admissionregistration/v1beta1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" @@ -295,6 +296,7 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { deployment.Spec.Image = "" deployment.Spec.PMEMPercentage = 50 deployment.Spec.LogFormat = format + deployment.Spec.ControllerTLSSecret = "pmem-csi-intel-com-controller-secret" deployment = deploy.CreateDeploymentCR(f, deployment) defer deploy.DeleteDeploymentCR(f, deployment.Name) @@ -424,43 +426,41 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { It("shall recover from conflicts", func() { deployment := getDeployment("test-recover-from-conflicts") - sec := &corev1.Secret{ - TypeMeta: metav1.TypeMeta{ - Kind: "Secret", - APIVersion: "v1", - }, + deployment.Spec.ControllerTLSSecret = "pmem-csi-intel-com-controller-secret" + se := &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ - Name: deployment.GetHyphenedName() + "-registry-secrets", + Name: deployment.GetHyphenedName() + "-scheduler", Namespace: d.Namespace, }, - Type: corev1.SecretTypeTLS, - Data: map[string][]byte{ - "ca.crt": []byte("fake ca"), - "tls.key": []byte("fake key"), - "tls.crt": []byte("fake crt"), + Spec: corev1.ServiceSpec{ + Selector: map[string]string{"app": "foobar"}, + Type: corev1.ServiceTypeClusterIP, + Ports: []corev1.ServicePort{ + {Port: 433}, + }, }, } - deleteSecret := func(name string) { + deleteService := func() { Eventually(func() error { - err := f.ClientSet.CoreV1().Secrets(d.Namespace).Delete(context.Background(), name, metav1.DeleteOptions{}) - deploy.LogError(err, "Delete secret error: %v, will retry...", err) + err := f.ClientSet.CoreV1().Services(d.Namespace).Delete(context.Background(), se.Name, metav1.DeleteOptions{}) + deploy.LogError(err, "Delete service error: %v, will retry...", err) if errors.IsNotFound(err) { return nil } return err - }, "3m", "1s").ShouldNot(HaveOccurred(), "delete secret %q", name) + }, "3m", "1s").ShouldNot(HaveOccurred(), "delete service %s", se.Name) } Eventually(func() error { - _, err := f.ClientSet.CoreV1().Secrets(d.Namespace).Create(context.Background(), sec, metav1.CreateOptions{}) - deploy.LogError(err, "create secret error: %v, will retry...", err) + _, err := f.ClientSet.CoreV1().Services(d.Namespace).Create(context.Background(), se, metav1.CreateOptions{}) + deploy.LogError(err, "create service error: %v, will retry...", err) return err - }, "3m", "1s").ShouldNot(HaveOccurred(), "create secret %q", sec.Name) - defer deleteSecret(sec.Name) + }, "3m", "1s").ShouldNot(HaveOccurred(), "create service %s", se.Name) + defer deleteService() deployment = deploy.CreateDeploymentCR(f, deployment) defer deploy.DeleteDeploymentCR(f, deployment.Name) - // The deployment should fail to create required secret(s) as it already + // The deployment should fail to create the required service as it already // exists and is owned by others. Eventually(func() bool { out := deploy.GetDeploymentCR(f, deployment.Name) @@ -468,8 +468,54 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { }, "3m", "1s").Should(BeTrue(), "deployment should fail %q", deployment.Name) validateEvents(&deployment, []string{api.EventReasonNew, api.EventReasonFailed}) - // Deleting the existing secret should make the deployment succeed. - deleteSecret(sec.Name) + // Deleting the existing service should make the deployment succeed. + deleteService() + validateDriver(deployment, true) + validateEvents(&deployment, []string{api.EventReasonNew, api.EventReasonRunning}) + }) + + It("shall recover from missing secret", func() { + deployment := getDeployment("test-recover-from-missing-secret") + sec := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-controller-secret", + Namespace: d.Namespace, + }, + Type: corev1.SecretTypeTLS, + Data: map[string][]byte{ + "ca.crt": []byte("fake ca"), + "tls.key": []byte("fake key"), + "tls.crt": []byte("fake crt"), + }, + } + deployment.Spec.ControllerTLSSecret = sec.Name + deployment = deploy.CreateDeploymentCR(f, deployment) + defer deploy.DeleteDeploymentCR(f, deployment.Name) + + // The deployment should fail because the required secret is missing. + Eventually(func() bool { + out := deploy.GetDeploymentCR(f, deployment.Name) + return out.Status.Phase == api.DeploymentPhaseFailed + }, "3m", "1s").Should(BeTrue(), "deployment should fail %q", deployment.Name) + validateEvents(&deployment, []string{api.EventReasonNew, api.EventReasonFailed}) + + // Creating the secret should make the deployment succeed. + deleteSecret := func() { + Eventually(func() error { + err := f.ClientSet.CoreV1().Secrets(d.Namespace).Delete(context.Background(), sec.Name, metav1.DeleteOptions{}) + deploy.LogError(err, "Delete secret error: %v, will retry...", err) + if errors.IsNotFound(err) { + return nil + } + return err + }, "3m", "1s").ShouldNot(HaveOccurred(), "delete secret %s", sec.Name) + } + Eventually(func() error { + _, err := f.ClientSet.CoreV1().Secrets(d.Namespace).Create(context.Background(), sec, metav1.CreateOptions{}) + deploy.LogError(err, "create secret error: %v, will retry...", err) + return err + }, "3m", "1s").ShouldNot(HaveOccurred(), "create secret %s", sec.Name) + defer deleteSecret() validateDriver(deployment, true) validateEvents(&deployment, []string{api.EventReasonNew, api.EventReasonRunning}) }) @@ -585,9 +631,10 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { deployment = deploy.CreateDeploymentCR(f, deployment) defer deploy.DeleteDeploymentCR(f, deployment.Name) - deploy.WaitForPMEMDriver(c, deployment.Name, + deploy.WaitForPMEMDriver(c, &deploy.Deployment{ - Namespace: d.Namespace, + Namespace: d.Namespace, + DriverName: driverName, }) validateDriver(deployment, true) @@ -685,18 +732,6 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { Context("recover", func() { Context("deleted sub-resources", func() { tests := map[string]func(*api.PmemCSIDeployment) apiruntime.Object{ - "registry secret": func(dep *api.PmemCSIDeployment) apiruntime.Object { - return &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: dep.RegistrySecretName(), Namespace: d.Namespace, - }, - } - }, - "node secret": func(dep *api.PmemCSIDeployment) apiruntime.Object { - return &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{Name: dep.NodeSecretName(), Namespace: d.Namespace}, - } - }, "provisioner service account": func(dep *api.PmemCSIDeployment) apiruntime.Object { return &corev1.ServiceAccount{ ObjectMeta: metav1.ObjectMeta{Name: dep.ProvisionerServiceAccountName(), Namespace: d.Namespace}, @@ -707,6 +742,11 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { ObjectMeta: metav1.ObjectMeta{Name: dep.WebhooksServiceAccountName(), Namespace: d.Namespace}, } }, + "scheduler service": func(dep *api.PmemCSIDeployment) apiruntime.Object { + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: dep.SchedulerServiceName(), Namespace: d.Namespace}, + } + }, "controller service": func(dep *api.PmemCSIDeployment) apiruntime.Object { return &corev1.Service{ ObjectMeta: metav1.ObjectMeta{Name: dep.ControllerServiceName(), Namespace: d.Namespace}, @@ -737,6 +777,11 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { ObjectMeta: metav1.ObjectMeta{Name: dep.WebhooksClusterRoleBindingName()}, } }, + "mutating webhook config": func(dep *api.PmemCSIDeployment) apiruntime.Object { + return &admissionregistrationv1beta1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: dep.MutatingWebhookName()}, + } + }, "provisioner role": func(dep *api.PmemCSIDeployment) apiruntime.Object { return &rbacv1.Role{ ObjectMeta: metav1.ObjectMeta{Name: dep.ProvisionerRoleName(), Namespace: d.Namespace}, @@ -789,7 +834,10 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { for name, getter := range tests { name, getter := name, getter It(name, func() { + // Create a deployment with controller and webhook config. dep := getDeployment("recover-" + strings.ReplaceAll(name, " ", "-")) + dep.Spec.ControllerTLSSecret = "pmem-csi-intel-com-controller-secret" + dep.Spec.MutatePods = api.MutatePodsAlways deployment := deploy.CreateDeploymentCR(f, dep) defer deploy.DeleteDeploymentCR(f, dep.Name) validateDriver(deployment) @@ -872,6 +920,7 @@ var _ = deploy.DescribeForSome("API", func(d *deploy.Deployment) bool { name, mutate := name, mutate It(name, func() { dep := getDeployment("recover-" + strings.ReplaceAll(name, " ", "-")) + dep.Spec.ControllerTLSSecret = "pmem-csi-intel-com-controller-secret" deployment := deploy.CreateDeploymentCR(f, dep) defer deploy.DeleteDeploymentCR(f, dep.Name) validateDriver(deployment) @@ -1104,9 +1153,10 @@ func switchDeploymentMode(c *deploy.Cluster, f *framework.Framework, depName, ns }, "3m", "1s").Should(BeTrue(), "Pod restart '%s'", pod) } - deploy.WaitForPMEMDriver(c, depName, + deploy.WaitForPMEMDriver(c, &deploy.Deployment{ - Namespace: ns, + Namespace: ns, + DriverName: depName, }) return deployment diff --git a/test/e2e/operator/driver.go b/test/e2e/operator/driver.go index acdfb115b3..971e9f34fd 100644 --- a/test/e2e/operator/driver.go +++ b/test/e2e/operator/driver.go @@ -15,6 +15,7 @@ import ( "github.com/intel/pmem-csi/test/e2e/driver" "github.com/intel/pmem-csi/test/e2e/operator/validate" "github.com/intel/pmem-csi/test/e2e/storage/dax" + "github.com/intel/pmem-csi/test/e2e/storage/scheduler" "k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/storage/testsuites" @@ -57,6 +58,10 @@ var _ = deploy.DescribeForSome("driver", func(d *deploy.Deployment) bool { var csiTestSuites = []func() testsuites.TestSuite{ dax.InitDaxTestSuite, } + if d.HasController { + // Scheduler tests depend on the webhooks in the controller. + csiTestSuites = append(csiTestSuites, scheduler.InitSchedulerTestSuite) + } testsuites.DefineTestSuite(csiTestDriver, csiTestSuites) }) diff --git a/test/e2e/operator/validate/validate.go b/test/e2e/operator/validate/validate.go index 9c719c5b32..5bb0c38426 100644 --- a/test/e2e/operator/validate/validate.go +++ b/test/e2e/operator/validate/validate.go @@ -196,14 +196,7 @@ func DriverDeployment(client client.Client, k8sver version.Version, namespace st } } } - gvk := schema.GroupVersionKind{ - Kind: "Secret", - Version: "v1", - } - for _, expected := range append(expectedObjects, - // Content doesn't matter, we just want to be sure they exist. - createObject(gvk, deployment.GetHyphenedName()+"-registry-secrets", namespace), - createObject(gvk, deployment.GetHyphenedName()+"-node-secrets", namespace)) { + for _, expected := range expectedObjects { if findObject(objects, expected) == nil { diffs = append(diffs, fmt.Sprintf("expected object was not deployed: %v", prettyPrintObjectID(expected))) } @@ -296,6 +289,10 @@ func parseDefaultValues() map[string]interface{} { imagePullPolicy: IfNotPresent ports: protocol: TCP + env: + valueFrom: + fieldRef: + apiVersion: v1 volumes: secret: defaultMode: 420` @@ -321,6 +318,20 @@ StatefulSet:` + defaultsApps + ` CSIDriver: spec: storageCapacity: false +MutatingWebhookConfiguration: + webhooks: + clientConfig: + caBundle: ignore # content varies, correctness is validated during E2E testing + service: + port: 443 + admissionReviewVersions: + - v1beta1 + matchPolicy: Exact + reinvocationPolicy: Never + rules: + scope: "*" + sideEffects: Unknown + timeoutSeconds: 30 ` err := yaml.UnmarshalStrict([]byte(defaultsYAML), &defaults) @@ -486,7 +497,7 @@ func listAllDeployedObjects(c client.Client, deployment api.PmemCSIDeployment, n // Test client does not support differentiating cluster-scoped objects // and the query fails when fetch those object by setting the namespace- switch list.GetKind() { - case "CSIDriverList", "ClusterRoleList", "ClusterRoleBindingList": + case "CSIDriverList", "ClusterRoleList", "ClusterRoleBindingList", "MutatingWebhookConfigurationList": opts = &client.ListOptions{} } // Filtering by owner doesn't work, so we have to use brute-force and look at all diff --git a/test/e2e/storage/csi_volumes.go b/test/e2e/storage/csi_volumes.go index c0776c0495..949fc8ca64 100644 --- a/test/e2e/storage/csi_volumes.go +++ b/test/e2e/storage/csi_volumes.go @@ -48,7 +48,7 @@ var ( ) var _ = deploy.DescribeForAll("E2E", func(d *deploy.Deployment) { - csiTestDriver := driver.New(d.Name(), "pmem-csi.intel.com", nil, nil) + csiTestDriver := driver.New(d.Name(), d.DriverName, nil, nil) // List of testSuites to be added below. var csiTestSuites = []func() testsuites.TestSuite{ diff --git a/test/e2e/storage/scheduler/scheduler.go b/test/e2e/storage/scheduler/scheduler.go index a1d6e3839b..357589a65c 100644 --- a/test/e2e/storage/scheduler/scheduler.go +++ b/test/e2e/storage/scheduler/scheduler.go @@ -29,6 +29,7 @@ import ( "k8s.io/kubernetes/test/e2e/storage/testpatterns" "k8s.io/kubernetes/test/e2e/storage/testsuites" + e2edriver "github.com/intel/pmem-csi/test/e2e/driver" "github.com/intel/pmem-csi/test/e2e/ephemeral" . "github.com/onsi/ginkgo" @@ -109,12 +110,15 @@ func (p *schedulerTestSuite) DefineTests(driver testsuites.TestDriver, pattern t init() defer cleanup() - l.testSchedulerInPod(f, l.resource.Pattern.VolType, l.resource.VolSource, l.config) + driverName := driver.(e2edriver.CSIDriver).GetCSIDriverName(l.config) + + l.testSchedulerInPod(f, driverName, l.resource.Pattern.VolType, l.resource.VolSource, l.config) }) } func (l local) testSchedulerInPod( f *framework.Framework, + driverName string, volumeType testpatterns.TestVolType, source *v1.VolumeSource, config *testsuites.PerTestConfig) { @@ -163,12 +167,14 @@ func (l local) testSchedulerInPod( podClient.DeleteSync(createdPod.Name, metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout) }() + resourceName := v1.ResourceName(driverName + "/scheduler") + Expect(createdPod.Spec.Containers[0].Resources).NotTo(BeNil(), "pod resources") Expect(createdPod.Spec.Containers[0].Resources.Requests).NotTo(BeNil(), "pod resource requests") - _, ok := createdPod.Spec.Containers[0].Resources.Requests["pmem-csi.intel.com/scheduler"] + _, ok := createdPod.Spec.Containers[0].Resources.Requests[resourceName] Expect(ok).To(BeTrue(), "PMEM-CSI extended resource request") Expect(createdPod.Spec.Containers[0].Resources.Limits).NotTo(BeNil(), "pod resource requests") - _, ok = createdPod.Spec.Containers[0].Resources.Requests["pmem-csi.intel.com/scheduler"] + _, ok = createdPod.Spec.Containers[0].Resources.Requests[resourceName] Expect(ok).To(BeTrue(), "PMEM-CSI extended resource limit") podErr := e2epod.WaitForPodRunningInNamespace(f.ClientSet, createdPod) diff --git a/test/e2e/tls/tls.go b/test/e2e/tls/tls.go index f5629ce271..6de023f66b 100644 --- a/test/e2e/tls/tls.go +++ b/test/e2e/tls/tls.go @@ -18,6 +18,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/kubernetes/test/e2e/framework" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" + "k8s.io/kubernetes/test/e2e/framework/skipper" "github.com/intel/pmem-csi/test/e2e/deploy" pmempod "github.com/intel/pmem-csi/test/e2e/pod" @@ -45,6 +46,9 @@ var _ = deploy.DescribeForAll("TLS", func(d *deploy.Deployment) { Context("controller", func() { It("is secure", func() { + if !d.HasController { + skipper.Skipf("has no controller") + } checkTLS(f, "pmem-csi-intel-com-controller-0.pmem-csi-intel-com-controller."+d.Namespace) }) }) diff --git a/test/setup-ca-kubernetes.sh b/test/setup-ca-kubernetes.sh index 7fda520a30..d623e4afb9 100755 --- a/test/setup-ca-kubernetes.sh +++ b/test/setup-ca-kubernetes.sh @@ -27,6 +27,11 @@ read_key () { # Read certificate files and turn them into Kubernetes secrets. # +# The "registry" part in the file and variable names is historic. +# PMEM-CSI < 0.9.0 used that certificate for the node registry +# and webhooks. PMEM-CSI >= 0.9.0 only uses it for the webhooks +# and no longer has such a registry. +# # -caFile (controller and all nodes) CA=$(read_key "${TEST_CA}.pem") # -certFile (controller) diff --git a/test/setup-deployment.sh b/test/setup-deployment.sh index 8b751702bb..3701e54437 100755 --- a/test/setup-deployment.sh +++ b/test/setup-deployment.sh @@ -160,6 +160,9 @@ EOF - op: add path: /spec/ports/0/nodePort value: ${TEST_SCHEDULER_EXTENDER_NODE_PORT} +- op: add + path: /spec/type + value: NodePort EOF ;; webhook) diff --git a/test/setup-kubernetes.sh b/test/setup-kubernetes.sh index e32b8d11ad..4703f0ba5b 100755 --- a/test/setup-kubernetes.sh +++ b/test/setup-kubernetes.sh @@ -82,6 +82,14 @@ list_gates () ( # filter, the extender is only going to be called for pods which # explicitly enable it and thus other pods (including PMEM-CSI # itself!) can be scheduled without it. +# +# In order to reach the scheduler extender, a fixed node port +# is used regardless of the driver name, so only one deployment +# can be active at once. In production this has to be solved +# differently. +# +# Usually the driver name will be "pmem-csi.intel.com", but for testing +# purposed we also configure a second extender. sudo mkdir -p /var/lib/scheduler/ sudo cp ca.crt /var/lib/scheduler/ @@ -118,6 +126,18 @@ EOF "name": "pmem-csi.intel.com/scheduler", "ignoredByScheduler": true }] + }, + { + "urlPrefix": "https://127.0.0.1:${TEST_SCHEDULER_EXTENDER_NODE_PORT}", + "filterVerb": "filter", + "prioritizeVerb": "prioritize", + "nodeCacheCapable": true, + "weight": 1, + "managedResources": + [{ + "name": "second.pmem-csi.intel.com/scheduler", + "ignoredByScheduler": true + }] }] } EOF @@ -139,6 +159,14 @@ extenders: managedResources: - name: pmem-csi.intel.com/scheduler ignoredByScheduler: true +- urlPrefix: https://127.0.0.1:${TEST_SCHEDULER_EXTENDER_NODE_PORT} + filterVerb: filter + prioritizeVerb: prioritize + nodeCacheCapable: true + weight: 1 + managedResources: + - name: second.pmem-csi.intel.com/scheduler + ignoredByScheduler: true EOF ;; esac