From 006592bcc38c4a459efa89e82bbfb606c62ab79f Mon Sep 17 00:00:00 2001 From: zhanglei Date: Thu, 4 Aug 2022 18:58:19 +0800 Subject: [PATCH] feat: Support GPU passthrough --- cmd/virt-prerunner/main.go | 20 +++++ ...rt.virtink.smartx.com_virtualmachines.yaml | 14 ++++ pkg/apis/virt/v1alpha1/types.go | 7 ++ .../virt/v1alpha1/zz_generated.deepcopy.go | 21 ++++++ pkg/controller/vm_controller.go | 4 + pkg/controller/vm_webhook.go | 34 +++++++++ pkg/controller/vm_webhook_test.go | 74 +++++++++++++++++++ samples/ubuntu-gpu.yaml | 30 ++++++++ 8 files changed, 204 insertions(+) create mode 100644 samples/ubuntu-gpu.yaml diff --git a/cmd/virt-prerunner/main.go b/cmd/virt-prerunner/main.go index 5bff440..8ede15b 100644 --- a/cmd/virt-prerunner/main.go +++ b/cmd/virt-prerunner/main.go @@ -191,6 +191,26 @@ func buildVMConfig(ctx context.Context, vm *virtv1alpha1.VirtualMachine) (*cloud } } + var resourceIndexes = map[string]int{} + getResourceIndex := func(resourceName string) int { + index := resourceIndexes[resourceName] + resourceIndexes[resourceName] = index + 1 + return index + } + + for _, gpu := range vm.Spec.Instance.GPUs { + pciAddresses := strings.Split(os.Getenv(gpu.ResourcePCIAddressEnvVarName), ",") + index := getResourceIndex(gpu.ResourceName) + if index >= len(pciAddresses) { + return nil, fmt.Errorf("failed to get PCI address for %s", gpu.Name) + } + gpuDeviceConfig := cloudhypervisor.DeviceConfig{ + Id: gpu.Name, + Path: fmt.Sprintf("/sys/bus/pci/devices/%s", pciAddresses[index]), + } + vmConfig.Devices = append(vmConfig.Devices, &gpuDeviceConfig) + } + return &vmConfig, nil } diff --git a/deploy/crd/virt.virtink.smartx.com_virtualmachines.yaml b/deploy/crd/virt.virtink.smartx.com_virtualmachines.yaml index 400d6bd..f6fa326 100644 --- a/deploy/crd/virt.virtink.smartx.com_virtualmachines.yaml +++ b/deploy/crd/virt.virtink.smartx.com_virtualmachines.yaml @@ -883,6 +883,20 @@ spec: - name type: object type: array + gpus: + items: + properties: + name: + type: string + resourceName: + type: string + resourcePCIAddressEnvVarName: + type: string + required: + - name + - resourceName + type: object + type: array interfaces: items: properties: diff --git a/pkg/apis/virt/v1alpha1/types.go b/pkg/apis/virt/v1alpha1/types.go index 9403d17..5362e6e 100644 --- a/pkg/apis/virt/v1alpha1/types.go +++ b/pkg/apis/virt/v1alpha1/types.go @@ -57,6 +57,7 @@ type Instance struct { Kernel *Kernel `json:"kernel,omitempty"` Disks []Disk `json:"disks,omitempty"` Interfaces []Interface `json:"interfaces,omitempty"` + GPUs []GPU `json:"gpus,omitempty"` } type CPU struct { @@ -95,6 +96,12 @@ type InterfaceBridge struct { type InterfaceSRIOV struct { } +type GPU struct { + Name string `json:"name"` + ResourceName string `json:"resourceName"` + ResourcePCIAddressEnvVarName string `json:"resourcePCIAddressEnvVarName,omitempty"` +} + type Volume struct { Name string `json:"name"` VolumeSource `json:",inline"` diff --git a/pkg/apis/virt/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/virt/v1alpha1/zz_generated.deepcopy.go index 22fb66d..05ce937 100644 --- a/pkg/apis/virt/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/virt/v1alpha1/zz_generated.deepcopy.go @@ -112,6 +112,22 @@ func (in *Disk) DeepCopy() *Disk { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GPU) DeepCopyInto(out *GPU) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPU. +func (in *GPU) DeepCopy() *GPU { + if in == nil { + return nil + } + out := new(GPU) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Instance) DeepCopyInto(out *Instance) { *out = *in @@ -136,6 +152,11 @@ func (in *Instance) DeepCopyInto(out *Instance) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.GPUs != nil { + in, out := &in.GPUs, &out.GPUs + *out = make([]GPU, len(*in)) + copy(*out, *in) + } return } diff --git a/pkg/controller/vm_controller.go b/pkg/controller/vm_controller.go index 3fd9d66..de13a0d 100644 --- a/pkg/controller/vm_controller.go +++ b/pkg/controller/vm_controller.go @@ -470,6 +470,10 @@ func (r *VMReconciler) buildVMPod(ctx context.Context, vm *virtv1alpha1.VirtualM vmPod.Annotations["k8s.v1.cni.cncf.io/networks"] = string(networksJSON) } + for _, gpu := range vm.Spec.Instance.GPUs { + incrementContainerResource(&vmPod.Spec.Containers[0], gpu.ResourceName) + } + return &vmPod, nil } diff --git a/pkg/controller/vm_webhook.go b/pkg/controller/vm_webhook.go index e03454b..4a4c4aa 100644 --- a/pkg/controller/vm_webhook.go +++ b/pkg/controller/vm_webhook.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "net/http" + "strings" "github.com/r3labs/diff/v2" admissionv1 "k8s.io/api/admission/v1" @@ -84,6 +85,16 @@ func MutateVM(ctx context.Context, vm *virtv1alpha1.VirtualMachine, oldVM *virtv } } } + + for i := range vm.Spec.Instance.GPUs { + if vm.Spec.Instance.GPUs[i].ResourcePCIAddressEnvVarName == "" { + if strings.Contains(vm.Spec.Instance.GPUs[i].ResourceName, "nvidia.com/") { + // https://github.com/NVIDIA/kubevirt-gpu-device-plugin/blob/f2f291647189859946a4fbb61d4e1812d86861f7/pkg/device_plugin/generic_device_plugin.go#L48 + deviceName := strings.TrimPrefix(vm.Spec.Instance.GPUs[i].ResourceName, "nvidia.com/") + vm.Spec.Instance.GPUs[i].ResourcePCIAddressEnvVarName = fmt.Sprintf("PCI_RESOURCE_NVIDIA_COM_%s", deviceName) + } + } + } return nil } @@ -210,6 +221,16 @@ func ValidateInstance(ctx context.Context, instance *virtv1alpha1.Instance, fiel errs = append(errs, ValidateInterface(ctx, &iface, fieldPath)...) } + gpuNames := map[string]struct{}{} + for i, gpu := range instance.GPUs { + fieldPath := fieldPath.Child("gpus").Index(i) + if _, ok := gpuNames[gpu.Name]; ok { + errs = append(errs, field.Duplicate(fieldPath.Child("name"), gpu.Name)) + } + gpuNames[gpu.Name] = struct{}{} + errs = append(errs, ValidateGPU(ctx, &gpu, fieldPath)...) + } + return errs } @@ -336,6 +357,19 @@ func ValidateCIDR(cidr string, capacity int, fieldPath *field.Path) field.ErrorL return errs } +func ValidateGPU(ctx context.Context, gpu *virtv1alpha1.GPU, fieldPath *field.Path) field.ErrorList { + var errs field.ErrorList + if gpu == nil { + errs = append(errs, field.Required(fieldPath, "")) + return errs + } + + if gpu.ResourcePCIAddressEnvVarName == "" { + errs = append(errs, field.Required(fieldPath.Child("resourcePCIAddressEnvVarName"), "")) + } + return errs +} + func ValidateVolume(ctx context.Context, volume *virtv1alpha1.Volume, fieldPath *field.Path) field.ErrorList { var errs field.ErrorList if volume == nil { diff --git a/pkg/controller/vm_webhook_test.go b/pkg/controller/vm_webhook_test.go index 1a6d5b4..3b5ab01 100644 --- a/pkg/controller/vm_webhook_test.go +++ b/pkg/controller/vm_webhook_test.go @@ -30,6 +30,11 @@ func TestValidateVM(t *testing.T) { Bridge: &virtv1alpha1.InterfaceBridge{}, }, }}, + GPUs: []virtv1alpha1.GPU{{ + Name: "gpu-1", + ResourceName: "nvidia.com/Tesla_01", + ResourcePCIAddressEnvVarName: "PCI_RESOURCE_NVIDIA_COM_Tesla_01", + }}, }, Volumes: []virtv1alpha1.Volume{{ Name: "vol-1", @@ -117,6 +122,27 @@ func TestValidateVM(t *testing.T) { }(), invalidFields: []string{"spec.instance.interfaces[0].sriov"}, }, { + vm: func() *virtv1alpha1.VirtualMachine { + vm := validVM.DeepCopy() + vm.Spec.Instance.GPUs[0].Name = "" + return vm + }(), + invalidFields: []string{"spec.instance.gpus[0].name"}, + }, { + vm: func() *virtv1alpha1.VirtualMachine { + vm := validVM.DeepCopy() + vm.Spec.Instance.GPUs[0].ResourceName = "" + return vm + }(), + invalidFields: []string{"spec.instance.gpus[0].resourceName"}, + }, { + vm: func() *virtv1alpha1.VirtualMachine { + vm := validVM.DeepCopy() + vm.Spec.Instance.GPUs[0].ResourcePCIAddressEnvVarName = "" + return vm + }(), + invalidFields: []string{"spec.instance.gpus[0].resourcePCIAddressEnvVarName"}, + }, { vm: func() *virtv1alpha1.VirtualMachine { vm := validVM.DeepCopy() @@ -168,3 +194,51 @@ func TestValidateVM(t *testing.T) { } } } + +func TestMutateVM(t *testing.T) { + vm := &virtv1alpha1.VirtualMachine{ + Spec: virtv1alpha1.VirtualMachineSpec{ + Instance: virtv1alpha1.Instance{ + Interfaces: []virtv1alpha1.Interface{{ + Name: "net-1", + InterfaceBindingMethod: virtv1alpha1.InterfaceBindingMethod{ + Bridge: &virtv1alpha1.InterfaceBridge{}, + }, + }}, + GPUs: []virtv1alpha1.GPU{{ + Name: "gpu-1", + ResourceName: "nvidia.com/Tesla_01", + }, { + Name: "gpu-2", + ResourceName: "amd.com/S7100", + }}, + }, + }, + } + + tests := []struct { + vm *virtv1alpha1.VirtualMachine + assert func() + }{{ + vm: vm, + assert: func() { + assert.NotNil(t, vm.Spec.Instance.Interfaces[0].InterfaceBindingMethod.Bridge) + }, + }, { + vm: vm, + assert: func() { + assert.Equal(t, vm.Spec.Instance.GPUs[0].ResourcePCIAddressEnvVarName, "PCI_RESOURCE_NVIDIA_COM_Tesla_01") + }, + }, { + vm: vm, + assert: func() { + assert.Equal(t, vm.Spec.Instance.GPUs[1].ResourcePCIAddressEnvVarName, "") + }, + }} + + for _, tc := range tests { + err := MutateVM(context.Background(), tc.vm, nil) + assert.Nil(t, err) + tc.assert() + } +} diff --git a/samples/ubuntu-gpu.yaml b/samples/ubuntu-gpu.yaml new file mode 100644 index 0000000..da83681 --- /dev/null +++ b/samples/ubuntu-gpu.yaml @@ -0,0 +1,30 @@ +apiVersion: virt.virtink.smartx.com/v1alpha1 +kind: VirtualMachine +metadata: + name: ubuntu-gpu +spec: + instance: + memory: + size: 1Gi + disks: + - name: ubuntu + - name: cloud-init + interfaces: + - name: pod + gpus: + - name: gpu1 + resourceName: nvidia.com/XXXX #TODO + volumes: + - name: ubuntu + containerDisk: + image: smartxworks/virtink-container-disk-ubuntu + - name: cloud-init + cloudInit: + userData: |- + #cloud-config + password: password + chpasswd: { expire: False } + ssh_pwauth: True + networks: + - name: pod + pod: {}