From be03800944d0a2663bab130f365c0ce96a79fe17 Mon Sep 17 00:00:00 2001 From: "wangjianyu.wjy" Date: Mon, 2 Dec 2024 11:32:20 +0800 Subject: [PATCH] koordlet: support gpu share with HAMi Signed-off-by: wangjianyu.wjy --- apis/extension/device_share.go | 8 ++ pkg/koordlet/runtimehooks/hooks/gpu/gpu.go | 27 +++++++ .../runtimehooks/hooks/gpu/gpu_test.go | 80 ++++++++++++++----- .../protocol/container_context.go | 12 +++ .../runtimehooks/protocol/protocol.go | 7 ++ pkg/koordlet/util/system/config.go | 48 ++++++----- pkg/koordlet/util/system/config_test.go | 38 ++++----- 7 files changed, 158 insertions(+), 62 deletions(-) diff --git a/apis/extension/device_share.go b/apis/extension/device_share.go index 5914a16e1..1014a649f 100644 --- a/apis/extension/device_share.go +++ b/apis/extension/device_share.go @@ -58,6 +58,8 @@ const ( LabelGPUModel string = NodeDomainPrefix + "/gpu-model" LabelGPUDriverVersion string = NodeDomainPrefix + "/gpu-driver-version" LabelSecondaryDeviceWellPlanned string = NodeDomainPrefix + "/secondary-device-well-planned" + + LabelGPUIsolationProvider = DomainPrefix + "gpu-isolation-provider" ) // DeviceAllocations would be injected into Pod as form of annotation during Pre-bind stage. @@ -220,6 +222,12 @@ const ( GPUPartitionPolicyPrefer GPUPartitionPolicy = "Prefer" ) +type GPUIsolationProvider string + +const ( + GPUIsolationProviderHAMICore GPUIsolationProvider = "HAMi-core" +) + func GetDeviceAllocations(podAnnotations map[string]string) (DeviceAllocations, error) { deviceAllocations := DeviceAllocations{} data, ok := podAnnotations[AnnotationDeviceAllocated] diff --git a/pkg/koordlet/runtimehooks/hooks/gpu/gpu.go b/pkg/koordlet/runtimehooks/hooks/gpu/gpu.go index 65b6094b2..0abeb458f 100644 --- a/pkg/koordlet/runtimehooks/hooks/gpu/gpu.go +++ b/pkg/koordlet/runtimehooks/hooks/gpu/gpu.go @@ -26,6 +26,7 @@ import ( schedulingv1alpha1 "github.com/koordinator-sh/koordinator/apis/scheduling/v1alpha1" "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks" "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/protocol" + "github.com/koordinator-sh/koordinator/pkg/koordlet/util/system" rmconfig "github.com/koordinator-sh/koordinator/pkg/runtimeproxy/config" ) @@ -70,5 +71,31 @@ func (p *gpuPlugin) InjectContainerGPUEnv(proto protocol.HooksProtocol) error { containerCtx.Response.AddContainerEnvs = make(map[string]string) } containerCtx.Response.AddContainerEnvs[GpuAllocEnv] = strings.Join(gpuIDs, ",") + if containerReq.PodLabels[ext.LabelGPUIsolationProvider] == string(ext.GPUIsolationProviderHAMICore) { + gpuResources := devices[0].Resources + gpuMemoryRatio, ok := gpuResources[ext.ResourceGPUMemoryRatio] + if !ok { + return fmt.Errorf("gpu memory ratio not found in gpu resource") + } + if gpuMemoryRatio.Value() < 100 { + gpuMemory, ok := gpuResources[ext.ResourceGPUMemory] + if !ok { + return fmt.Errorf("gpu memory not found in gpu resource") + } + containerCtx.Response.AddContainerEnvs["CUDA_DEVICE_MEMORY_LIMIT"] = fmt.Sprintf("%d", gpuMemory.Value()) + gpuCore, ok := gpuResources[ext.ResourceGPUCore] + if ok { + containerCtx.Response.AddContainerEnvs["CUDA_DEVICE_SM_LIMIT"] = fmt.Sprintf("%d", gpuCore.Value()) + } + containerCtx.Response.AddContainerEnvs["LD_PRELOAD"] = system.Conf.HAMICoreLibraryDirectoryPath + containerCtx.Response.AddContainerMounts = append(containerCtx.Response.AddContainerMounts, &protocol.Mount{ + Destination: system.Conf.HAMICoreLibraryDirectoryPath, + Type: "bind", + Source: system.Conf.HAMICoreLibraryDirectoryPath, + Options: []string{"rbind"}, + }) + } + } + return nil } diff --git a/pkg/koordlet/runtimehooks/hooks/gpu/gpu_test.go b/pkg/koordlet/runtimehooks/hooks/gpu/gpu_test.go index cd1af7550..df5421abe 100644 --- a/pkg/koordlet/runtimehooks/hooks/gpu/gpu_test.go +++ b/pkg/koordlet/runtimehooks/hooks/gpu/gpu_test.go @@ -31,30 +31,33 @@ func Test_InjectContainerGPUEnv(t *testing.T) { expectedAllocStr string expectedError bool proto protocol.HooksProtocol + expectedMounts []*protocol.Mount + expectedEnvs map[string]string }{ { - "test empty proto", - "", - true, - nil, + name: "test empty proto", + expectedAllocStr: "", + expectedError: true, + proto: nil, }, { - "test normal gpu alloc", - "0,1", - false, - &protocol.ContainerContext{ + name: "test normal gpu alloc", + expectedAllocStr: "0,1", + expectedError: false, + proto: &protocol.ContainerContext{ Request: protocol.ContainerRequest{ PodAnnotations: map[string]string{ ext.AnnotationDeviceAllocated: "{\"gpu\": [{\"minor\": 0},{\"minor\": 1}]}", }, }, }, + expectedEnvs: map[string]string{GpuAllocEnv: "0,1"}, }, { - "test empty gpu alloc", - "", - false, - &protocol.ContainerContext{ + name: "test empty gpu alloc", + expectedAllocStr: "", + expectedError: false, + proto: &protocol.ContainerContext{ Request: protocol.ContainerRequest{ PodAnnotations: map[string]string{ ext.AnnotationDeviceAllocated: "{\"fpga\": [{\"minor\": 0},{\"minor\": 1}]}", @@ -62,18 +65,51 @@ func Test_InjectContainerGPUEnv(t *testing.T) { }, }, }, + { + name: "gpu share with HAMi", + expectedAllocStr: "1", + expectedError: false, + proto: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodLabels: map[string]string{ + ext.LabelGPUIsolationProvider: string(ext.GPUIsolationProviderHAMICore), + }, + PodAnnotations: map[string]string{ + ext.AnnotationDeviceAllocated: `{"gpu":[{"minor":1,"resources":{"koordinator.sh/gpu-core":"50","koordinator.sh/gpu-memory":"16Gi","koordinator.sh/gpu-memory-ratio":"50"}}]}`, + }, + }, + }, + expectedEnvs: map[string]string{ + GpuAllocEnv: "1", + "CUDA_DEVICE_MEMORY_LIMIT": "17179869184", + "CUDA_DEVICE_SM_LIMIT": "50", + "LD_PRELOAD": "/data/bin/libvgpu.so", + }, + expectedMounts: []*protocol.Mount{ + { + Destination: "/data/bin/libvgpu.so", + Type: "bind", + Source: "/data/bin/libvgpu.so", + Options: []string{"rbind"}, + }, + }, + }, } plugin := gpuPlugin{} for _, tt := range tests { - var containerCtx *protocol.ContainerContext - if tt.proto != nil { - containerCtx = tt.proto.(*protocol.ContainerContext) - } - err := plugin.InjectContainerGPUEnv(containerCtx) - assert.Equal(t, tt.expectedError, err != nil, tt.name) - if tt.proto != nil { - containerCtx := tt.proto.(*protocol.ContainerContext) - assert.Equal(t, containerCtx.Response.AddContainerEnvs[GpuAllocEnv], tt.expectedAllocStr, tt.name) - } + t.Run(tt.name, func(t *testing.T) { + var containerCtx *protocol.ContainerContext + if tt.proto != nil { + containerCtx = tt.proto.(*protocol.ContainerContext) + } + err := plugin.InjectContainerGPUEnv(containerCtx) + assert.Equal(t, tt.expectedError, err != nil, tt.name) + if tt.proto != nil { + containerCtx := tt.proto.(*protocol.ContainerContext) + assert.Equal(t, containerCtx.Response.AddContainerEnvs[GpuAllocEnv], tt.expectedAllocStr, tt.name) + assert.Equal(t, containerCtx.Response.AddContainerEnvs, tt.expectedEnvs, tt.name) + assert.Equal(t, containerCtx.Response.AddContainerMounts, tt.expectedMounts, tt.name) + } + }) } } diff --git a/pkg/koordlet/runtimehooks/protocol/container_context.go b/pkg/koordlet/runtimehooks/protocol/container_context.go index 5c76bdec8..aa1afeceb 100644 --- a/pkg/koordlet/runtimehooks/protocol/container_context.go +++ b/pkg/koordlet/runtimehooks/protocol/container_context.go @@ -183,6 +183,7 @@ func (c *ContainerRequest) FromReconciler(podMeta *statesinformer.PodMeta, conta type ContainerResponse struct { Resources Resources AddContainerEnvs map[string]string + AddContainerMounts []*Mount AddContainerDevices []*LinuxDevice } @@ -288,6 +289,17 @@ func (c *ContainerContext) NriDone(executor resourceexecutor.ResourceUpdateExecu } } + if len(c.Response.AddContainerMounts) != 0 { + for _, m := range c.Response.AddContainerMounts { + adjust.AddMount(&api.Mount{ + Destination: m.Destination, + Type: m.Type, + Source: m.Source, + Options: m.Options, + }) + } + } + if len(c.Response.AddContainerDevices) != 0 { for i := range c.Response.AddContainerDevices { adjust.AddDevice(&api.LinuxDevice{ diff --git a/pkg/koordlet/runtimehooks/protocol/protocol.go b/pkg/koordlet/runtimehooks/protocol/protocol.go index bf7fc2ea9..845a6ebcd 100644 --- a/pkg/koordlet/runtimehooks/protocol/protocol.go +++ b/pkg/koordlet/runtimehooks/protocol/protocol.go @@ -135,6 +135,13 @@ func (r *Resources) FromContainer(container *corev1.Container) { } } +type Mount struct { + Destination string `protobuf:"bytes,1,opt,name=destination,proto3" json:"destination,omitempty"` + Type string `protobuf:"bytes,2,opt,name=type,proto3" json:"type,omitempty"` + Source string `protobuf:"bytes,3,opt,name=source,proto3" json:"source,omitempty"` + Options []string `protobuf:"bytes,4,rep,name=options,proto3" json:"options,omitempty"` +} + func injectCPUShares(cgroupParent string, cpuShares int64, a *audit.EventHelper, e resourceexecutor.ResourceUpdateExecutor) (resourceexecutor.ResourceUpdater, error) { cpuShareStr := strconv.FormatInt(cpuShares, 10) updater, err := resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUSharesName, cgroupParent, cpuShareStr, a) diff --git a/pkg/koordlet/util/system/config.go b/pkg/koordlet/util/system/config.go index 7941775e3..82769664c 100644 --- a/pkg/koordlet/util/system/config.go +++ b/pkg/koordlet/util/system/config.go @@ -44,11 +44,12 @@ type Config struct { RunRootDir string RuntimeHooksConfigDir string - ContainerdEndPoint string - PouchEndpoint string - DockerEndPoint string - CrioEndPoint string - DefaultRuntimeType string + ContainerdEndPoint string + PouchEndpoint string + DockerEndPoint string + CrioEndPoint string + DefaultRuntimeType string + HAMICoreLibraryDirectoryPath string } func init() { @@ -77,15 +78,16 @@ func InitSupportConfigs() { func NewHostModeConfig() *Config { return &Config{ - CgroupRootDir: "/sys/fs/cgroup/", - ProcRootDir: "/proc/", - SysRootDir: "/sys/", - SysFSRootDir: "/sys/fs/", - VarRunRootDir: "/var/run/", - VarLibKubeletRootDir: "/var/lib/kubelet/", - RunRootDir: "/run/", - RuntimeHooksConfigDir: "/etc/runtime/hookserver.d", - DefaultRuntimeType: "containerd", + CgroupRootDir: "/sys/fs/cgroup/", + ProcRootDir: "/proc/", + SysRootDir: "/sys/", + SysFSRootDir: "/sys/fs/", + VarRunRootDir: "/var/run/", + VarLibKubeletRootDir: "/var/lib/kubelet/", + RunRootDir: "/run/", + RuntimeHooksConfigDir: "/etc/runtime/hookserver.d", + DefaultRuntimeType: "containerd", + HAMICoreLibraryDirectoryPath: "/usr/local/vgpu/libvgpu.so", } } @@ -93,14 +95,15 @@ func NewDsModeConfig() *Config { return &Config{ CgroupRootDir: "/host-cgroup/", // some dirs are not covered by ns, or unused with `hostPID` is on - ProcRootDir: "/proc/", - SysRootDir: "/host-sys/", - SysFSRootDir: "/host-sys-fs/", - VarRunRootDir: "/host-var-run/", - VarLibKubeletRootDir: "/var/lib/kubelet/", - RunRootDir: "/host-run/", - RuntimeHooksConfigDir: "/host-etc-hookserver/", - DefaultRuntimeType: "containerd", + ProcRootDir: "/proc/", + SysRootDir: "/host-sys/", + SysFSRootDir: "/host-sys-fs/", + VarRunRootDir: "/host-var-run/", + VarLibKubeletRootDir: "/var/lib/kubelet/", + RunRootDir: "/host-run/", + RuntimeHooksConfigDir: "/host-etc-hookserver/", + DefaultRuntimeType: "containerd", + HAMICoreLibraryDirectoryPath: "/usr/local/vgpu/libvgpu.so", } } @@ -122,4 +125,5 @@ func (c *Config) InitFlags(fs *flag.FlagSet) { fs.StringVar(&c.PouchEndpoint, "pouch-endpoint", c.PouchEndpoint, "pouch endPoint") fs.StringVar(&c.DefaultRuntimeType, "default-runtime-type", c.DefaultRuntimeType, "default runtime type during runtime hooks handle request, candidates are containerd/docker/pouch.") + fs.StringVar(&c.HAMICoreLibraryDirectoryPath, "hami-core-library-directory-path", c.HAMICoreLibraryDirectoryPath, "path of hami core library") } diff --git a/pkg/koordlet/util/system/config_test.go b/pkg/koordlet/util/system/config_test.go index f5acc0bd7..55eb9a158 100644 --- a/pkg/koordlet/util/system/config_test.go +++ b/pkg/koordlet/util/system/config_test.go @@ -24,15 +24,16 @@ import ( func Test_NewDsModeConfig(t *testing.T) { expectConfig := &Config{ - CgroupRootDir: "/host-cgroup/", - ProcRootDir: "/proc/", - SysRootDir: "/host-sys/", - SysFSRootDir: "/host-sys-fs/", - VarRunRootDir: "/host-var-run/", - VarLibKubeletRootDir: "/var/lib/kubelet/", - RunRootDir: "/host-run/", - RuntimeHooksConfigDir: "/host-etc-hookserver/", - DefaultRuntimeType: "containerd", + CgroupRootDir: "/host-cgroup/", + ProcRootDir: "/proc/", + SysRootDir: "/host-sys/", + SysFSRootDir: "/host-sys-fs/", + VarRunRootDir: "/host-var-run/", + VarLibKubeletRootDir: "/var/lib/kubelet/", + RunRootDir: "/host-run/", + RuntimeHooksConfigDir: "/host-etc-hookserver/", + DefaultRuntimeType: "containerd", + HAMICoreLibraryDirectoryPath: "/usr/local/vgpu/libvgpu.so", } defaultConfig := NewDsModeConfig() assert.Equal(t, expectConfig, defaultConfig) @@ -40,15 +41,16 @@ func Test_NewDsModeConfig(t *testing.T) { func Test_NewHostModeConfig(t *testing.T) { expectConfig := &Config{ - CgroupRootDir: "/sys/fs/cgroup/", - ProcRootDir: "/proc/", - SysRootDir: "/sys/", - SysFSRootDir: "/sys/fs/", - VarRunRootDir: "/var/run/", - VarLibKubeletRootDir: "/var/lib/kubelet/", - RunRootDir: "/run/", - RuntimeHooksConfigDir: "/etc/runtime/hookserver.d", - DefaultRuntimeType: "containerd", + CgroupRootDir: "/sys/fs/cgroup/", + ProcRootDir: "/proc/", + SysRootDir: "/sys/", + SysFSRootDir: "/sys/fs/", + VarRunRootDir: "/var/run/", + VarLibKubeletRootDir: "/var/lib/kubelet/", + RunRootDir: "/run/", + RuntimeHooksConfigDir: "/etc/runtime/hookserver.d", + DefaultRuntimeType: "containerd", + HAMICoreLibraryDirectoryPath: "/usr/local/vgpu/libvgpu.so", } defaultConfig := NewHostModeConfig() assert.Equal(t, expectConfig, defaultConfig)