From c777dc08c24795a574411975ea94c84c95109e7e Mon Sep 17 00:00:00 2001 From: "wangjianyu.wjy" Date: Wed, 20 Nov 2024 17:26:32 +0800 Subject: [PATCH] koordlet: support strict gpu share Signed-off-by: wangjianyu.wjy --- apis/extension/device_share.go | 8 ++ pkg/koordlet/runtimehooks/hooks/gpu/gpu.go | 26 ++++++ .../runtimehooks/hooks/gpu/gpu_test.go | 79 +++++++++++++------ .../protocol/container_context.go | 10 ++- 4 files changed, 99 insertions(+), 24 deletions(-) diff --git a/apis/extension/device_share.go b/apis/extension/device_share.go index 8cd977ebb..b53425e71 100644 --- a/apis/extension/device_share.go +++ b/apis/extension/device_share.go @@ -58,6 +58,8 @@ const ( LabelGPUModel string = NodeDomainPrefix + "/gpu-model" LabelGPUDriverVersion string = NodeDomainPrefix + "/gpu-driver-version" LabelSecondaryDeviceWellPlanned string = NodeDomainPrefix + "/secondary-device-well-planned" + + LabelGPUIsolationProvider = "koordinator.sh/gpu-isolation-provider" ) // DeviceAllocations would be injected into Pod as form of annotation during Pre-bind stage. @@ -202,6 +204,12 @@ const ( GPUPartitionPolicyPrefer GPUPartitionPolicy = "Prefer" ) +type GPUIsolationProvider string + +const ( + GPUIsolationProviderHAMICore GPUIsolationProvider = "HAMi-core" +) + func GetDeviceAllocations(podAnnotations map[string]string) (DeviceAllocations, error) { deviceAllocations := DeviceAllocations{} data, ok := podAnnotations[AnnotationDeviceAllocated] diff --git a/pkg/koordlet/runtimehooks/hooks/gpu/gpu.go b/pkg/koordlet/runtimehooks/hooks/gpu/gpu.go index 65b6094b2..c9e86b64c 100644 --- a/pkg/koordlet/runtimehooks/hooks/gpu/gpu.go +++ b/pkg/koordlet/runtimehooks/hooks/gpu/gpu.go @@ -20,6 +20,7 @@ import ( "fmt" "strings" + "github.com/containerd/nri/pkg/api" "k8s.io/klog/v2" ext "github.com/koordinator-sh/koordinator/apis/extension" @@ -70,5 +71,30 @@ func (p *gpuPlugin) InjectContainerGPUEnv(proto protocol.HooksProtocol) error { containerCtx.Response.AddContainerEnvs = make(map[string]string) } containerCtx.Response.AddContainerEnvs[GpuAllocEnv] = strings.Join(gpuIDs, ",") + if containerReq.PodAnnotations[ext.LabelGPUIsolationProvider] == string(ext.GPUIsolationProviderHAMICore) { + gpuResources := devices[0].Resources + gpuMemoryRatio, ok := gpuResources[ext.ResourceGPUMemoryRatio] + if !ok { + return fmt.Errorf("gpu memory ratio not found in gpu resource") + } + if gpuMemoryRatio.Value() < 100 { + gpuMemory, ok := gpuResources[ext.ResourceGPUMemory] + if !ok { + return fmt.Errorf("gpu memory not found in gpu resource") + } + containerCtx.Response.AddContainerEnvs["CUDA_DEVICE_MEMORY_LIMIT"] = fmt.Sprintf("%d", gpuMemory.Value()) + gpuCore, ok := gpuResources[ext.ResourceGPUCore] + if ok { + containerCtx.Response.AddContainerEnvs["CUDA_DEVICE_SM_LIMIT"] = fmt.Sprintf("%d", gpuCore.Value()) + } + containerCtx.Response.AddContainerEnvs["LD_PRELOAD"] = "/libvgpu.so" + containerCtx.Response.AddContainerMounts = append(containerCtx.Response.AddContainerMounts, &api.Mount{ + Destination: "/libvgpu.so", + Type: "bind", + Source: "/data/bin/libvgpu.so", + }) + } + } + return nil } diff --git a/pkg/koordlet/runtimehooks/hooks/gpu/gpu_test.go b/pkg/koordlet/runtimehooks/hooks/gpu/gpu_test.go index cd1af7550..f1fbe31c0 100644 --- a/pkg/koordlet/runtimehooks/hooks/gpu/gpu_test.go +++ b/pkg/koordlet/runtimehooks/hooks/gpu/gpu_test.go @@ -19,6 +19,7 @@ package gpu import ( "testing" + "github.com/containerd/nri/pkg/api" "github.com/stretchr/testify/assert" ext "github.com/koordinator-sh/koordinator/apis/extension" @@ -31,30 +32,33 @@ func Test_InjectContainerGPUEnv(t *testing.T) { expectedAllocStr string expectedError bool proto protocol.HooksProtocol + expectedMounts []*api.Mount + expectedEnvs map[string]string }{ { - "test empty proto", - "", - true, - nil, + name: "test empty proto", + expectedAllocStr: "", + expectedError: true, + proto: nil, }, { - "test normal gpu alloc", - "0,1", - false, - &protocol.ContainerContext{ + name: "test normal gpu alloc", + expectedAllocStr: "0,1", + expectedError: false, + proto: &protocol.ContainerContext{ Request: protocol.ContainerRequest{ PodAnnotations: map[string]string{ ext.AnnotationDeviceAllocated: "{\"gpu\": [{\"minor\": 0},{\"minor\": 1}]}", }, }, }, + expectedEnvs: map[string]string{GpuAllocEnv: "0,1"}, }, { - "test empty gpu alloc", - "", - false, - &protocol.ContainerContext{ + name: "test empty gpu alloc", + expectedAllocStr: "", + expectedError: false, + proto: &protocol.ContainerContext{ Request: protocol.ContainerRequest{ PodAnnotations: map[string]string{ ext.AnnotationDeviceAllocated: "{\"fpga\": [{\"minor\": 0},{\"minor\": 1}]}", @@ -62,18 +66,49 @@ func Test_InjectContainerGPUEnv(t *testing.T) { }, }, }, + { + name: "gpu share with HAMi", + expectedAllocStr: "1", + expectedError: false, + proto: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodAnnotations: map[string]string{ + ext.AnnotationDeviceAllocated: `{"gpu":[{"minor":1,"resources":{"koordinator.sh/gpu-core":"50","koordinator.sh/gpu-memory":"16Gi","koordinator.sh/gpu-memory-ratio":"50"}}]}`, + ext.LabelGPUIsolationProvider: string(ext.GPUIsolationProviderHAMICore), + }, + }, + }, + expectedEnvs: map[string]string{ + GpuAllocEnv: "1", + "CUDA_DEVICE_MEMORY_LIMIT": "17179869184", + "CUDA_DEVICE_SM_LIMIT": "50", + "LD_PRELOAD": "/libvgpu.so", + }, + expectedMounts: []*api.Mount{ + { + Destination: "/libvgpu.so", + Type: "bind", + Source: "/data/bin/libvgpu.so", + }, + }, + }, } plugin := gpuPlugin{} for _, tt := range tests { - var containerCtx *protocol.ContainerContext - if tt.proto != nil { - containerCtx = tt.proto.(*protocol.ContainerContext) - } - err := plugin.InjectContainerGPUEnv(containerCtx) - assert.Equal(t, tt.expectedError, err != nil, tt.name) - if tt.proto != nil { - containerCtx := tt.proto.(*protocol.ContainerContext) - assert.Equal(t, containerCtx.Response.AddContainerEnvs[GpuAllocEnv], tt.expectedAllocStr, tt.name) - } + t.Run(tt.name, func(t *testing.T) { + var containerCtx *protocol.ContainerContext + if tt.proto != nil { + containerCtx = tt.proto.(*protocol.ContainerContext) + } + err := plugin.InjectContainerGPUEnv(containerCtx) + assert.Equal(t, tt.expectedError, err != nil, tt.name) + if tt.proto != nil { + containerCtx := tt.proto.(*protocol.ContainerContext) + assert.Equal(t, containerCtx.Response.AddContainerEnvs[GpuAllocEnv], tt.expectedAllocStr, tt.name) + assert.Equal(t, containerCtx.Response.AddContainerEnvs, tt.expectedEnvs, tt.name) + assert.Equal(t, containerCtx.Response.AddContainerMounts, tt.expectedMounts, tt.name) + } + }) + } } diff --git a/pkg/koordlet/runtimehooks/protocol/container_context.go b/pkg/koordlet/runtimehooks/protocol/container_context.go index a39ba04a9..311fc9bc7 100644 --- a/pkg/koordlet/runtimehooks/protocol/container_context.go +++ b/pkg/koordlet/runtimehooks/protocol/container_context.go @@ -181,8 +181,9 @@ func (c *ContainerRequest) FromReconciler(podMeta *statesinformer.PodMeta, conta } type ContainerResponse struct { - Resources Resources - AddContainerEnvs map[string]string + Resources Resources + AddContainerEnvs map[string]string + AddContainerMounts []*api.Mount } func (c *ContainerResponse) ProxyDone(resp *runtimeapi.ContainerResourceHookResponse) { @@ -278,6 +279,11 @@ func (c *ContainerContext) NriDone(executor resourceexecutor.ResourceUpdateExecu adjust.AddEnv(k, v) } } + if len(c.Response.AddContainerMounts) != 0 { + for _, m := range c.Response.AddContainerMounts { + adjust.AddMount(m) + } + } c.Update()