From c777dc08c24795a574411975ea94c84c95109e7e Mon Sep 17 00:00:00 2001
From: "wangjianyu.wjy" <wangjianyu.wjy@alibaba-inc.com>
Date: Wed, 20 Nov 2024 17:26:32 +0800
Subject: [PATCH] koordlet: support strict gpu share

Signed-off-by: wangjianyu.wjy <wangjianyu.wjy@alibaba-inc.com>
---
 apis/extension/device_share.go                |  8 ++
 pkg/koordlet/runtimehooks/hooks/gpu/gpu.go    | 26 ++++++
 .../runtimehooks/hooks/gpu/gpu_test.go        | 79 +++++++++++++------
 .../protocol/container_context.go             | 10 ++-
 4 files changed, 99 insertions(+), 24 deletions(-)

diff --git a/apis/extension/device_share.go b/apis/extension/device_share.go
index 8cd977ebb..b53425e71 100644
--- a/apis/extension/device_share.go
+++ b/apis/extension/device_share.go
@@ -58,6 +58,8 @@ const (
 	LabelGPUModel                   string = NodeDomainPrefix + "/gpu-model"
 	LabelGPUDriverVersion           string = NodeDomainPrefix + "/gpu-driver-version"
 	LabelSecondaryDeviceWellPlanned string = NodeDomainPrefix + "/secondary-device-well-planned"
+
+	LabelGPUIsolationProvider = "koordinator.sh/gpu-isolation-provider"
 )
 
 // DeviceAllocations would be injected into Pod as form of annotation during Pre-bind stage.
@@ -202,6 +204,12 @@ const (
 	GPUPartitionPolicyPrefer GPUPartitionPolicy = "Prefer"
 )
 
+type GPUIsolationProvider string
+
+const (
+	GPUIsolationProviderHAMICore GPUIsolationProvider = "HAMi-core"
+)
+
 func GetDeviceAllocations(podAnnotations map[string]string) (DeviceAllocations, error) {
 	deviceAllocations := DeviceAllocations{}
 	data, ok := podAnnotations[AnnotationDeviceAllocated]
diff --git a/pkg/koordlet/runtimehooks/hooks/gpu/gpu.go b/pkg/koordlet/runtimehooks/hooks/gpu/gpu.go
index 65b6094b2..c9e86b64c 100644
--- a/pkg/koordlet/runtimehooks/hooks/gpu/gpu.go
+++ b/pkg/koordlet/runtimehooks/hooks/gpu/gpu.go
@@ -20,6 +20,7 @@ import (
 	"fmt"
 	"strings"
 
+	"github.com/containerd/nri/pkg/api"
 	"k8s.io/klog/v2"
 
 	ext "github.com/koordinator-sh/koordinator/apis/extension"
@@ -70,5 +71,30 @@ func (p *gpuPlugin) InjectContainerGPUEnv(proto protocol.HooksProtocol) error {
 		containerCtx.Response.AddContainerEnvs = make(map[string]string)
 	}
 	containerCtx.Response.AddContainerEnvs[GpuAllocEnv] = strings.Join(gpuIDs, ",")
+	if containerReq.PodAnnotations[ext.LabelGPUIsolationProvider] == string(ext.GPUIsolationProviderHAMICore) {
+		gpuResources := devices[0].Resources
+		gpuMemoryRatio, ok := gpuResources[ext.ResourceGPUMemoryRatio]
+		if !ok {
+			return fmt.Errorf("gpu memory ratio not found in gpu resource")
+		}
+		if gpuMemoryRatio.Value() < 100 {
+			gpuMemory, ok := gpuResources[ext.ResourceGPUMemory]
+			if !ok {
+				return fmt.Errorf("gpu memory not found in gpu resource")
+			}
+			containerCtx.Response.AddContainerEnvs["CUDA_DEVICE_MEMORY_LIMIT"] = fmt.Sprintf("%d", gpuMemory.Value())
+			gpuCore, ok := gpuResources[ext.ResourceGPUCore]
+			if ok {
+				containerCtx.Response.AddContainerEnvs["CUDA_DEVICE_SM_LIMIT"] = fmt.Sprintf("%d", gpuCore.Value())
+			}
+			containerCtx.Response.AddContainerEnvs["LD_PRELOAD"] = "/libvgpu.so"
+			containerCtx.Response.AddContainerMounts = append(containerCtx.Response.AddContainerMounts, &api.Mount{
+				Destination: "/libvgpu.so",
+				Type:        "bind",
+				Source:      "/data/bin/libvgpu.so",
+			})
+		}
+	}
+
 	return nil
 }
diff --git a/pkg/koordlet/runtimehooks/hooks/gpu/gpu_test.go b/pkg/koordlet/runtimehooks/hooks/gpu/gpu_test.go
index cd1af7550..f1fbe31c0 100644
--- a/pkg/koordlet/runtimehooks/hooks/gpu/gpu_test.go
+++ b/pkg/koordlet/runtimehooks/hooks/gpu/gpu_test.go
@@ -19,6 +19,7 @@ package gpu
 import (
 	"testing"
 
+	"github.com/containerd/nri/pkg/api"
 	"github.com/stretchr/testify/assert"
 
 	ext "github.com/koordinator-sh/koordinator/apis/extension"
@@ -31,30 +32,33 @@ func Test_InjectContainerGPUEnv(t *testing.T) {
 		expectedAllocStr string
 		expectedError    bool
 		proto            protocol.HooksProtocol
+		expectedMounts   []*api.Mount
+		expectedEnvs     map[string]string
 	}{
 		{
-			"test empty proto",
-			"",
-			true,
-			nil,
+			name:             "test empty proto",
+			expectedAllocStr: "",
+			expectedError:    true,
+			proto:            nil,
 		},
 		{
-			"test normal gpu alloc",
-			"0,1",
-			false,
-			&protocol.ContainerContext{
+			name:             "test normal gpu alloc",
+			expectedAllocStr: "0,1",
+			expectedError:    false,
+			proto: &protocol.ContainerContext{
 				Request: protocol.ContainerRequest{
 					PodAnnotations: map[string]string{
 						ext.AnnotationDeviceAllocated: "{\"gpu\": [{\"minor\": 0},{\"minor\": 1}]}",
 					},
 				},
 			},
+			expectedEnvs: map[string]string{GpuAllocEnv: "0,1"},
 		},
 		{
-			"test empty gpu alloc",
-			"",
-			false,
-			&protocol.ContainerContext{
+			name:             "test empty gpu alloc",
+			expectedAllocStr: "",
+			expectedError:    false,
+			proto: &protocol.ContainerContext{
 				Request: protocol.ContainerRequest{
 					PodAnnotations: map[string]string{
 						ext.AnnotationDeviceAllocated: "{\"fpga\": [{\"minor\": 0},{\"minor\": 1}]}",
@@ -62,18 +66,49 @@ func Test_InjectContainerGPUEnv(t *testing.T) {
 				},
 			},
 		},
+		{
+			name:             "gpu share with HAMi",
+			expectedAllocStr: "1",
+			expectedError:    false,
+			proto: &protocol.ContainerContext{
+				Request: protocol.ContainerRequest{
+					PodAnnotations: map[string]string{
+						ext.AnnotationDeviceAllocated: `{"gpu":[{"minor":1,"resources":{"koordinator.sh/gpu-core":"50","koordinator.sh/gpu-memory":"16Gi","koordinator.sh/gpu-memory-ratio":"50"}}]}`,
+						ext.LabelGPUIsolationProvider: string(ext.GPUIsolationProviderHAMICore),
+					},
+				},
+			},
+			expectedEnvs: map[string]string{
+				GpuAllocEnv:                "1",
+				"CUDA_DEVICE_MEMORY_LIMIT": "17179869184",
+				"CUDA_DEVICE_SM_LIMIT":     "50",
+				"LD_PRELOAD":               "/libvgpu.so",
+			},
+			expectedMounts: []*api.Mount{
+				{
+					Destination: "/libvgpu.so",
+					Type:        "bind",
+					Source:      "/data/bin/libvgpu.so",
+				},
+			},
+		},
 	}
 	plugin := gpuPlugin{}
 	for _, tt := range tests {
-		var containerCtx *protocol.ContainerContext
-		if tt.proto != nil {
-			containerCtx = tt.proto.(*protocol.ContainerContext)
-		}
-		err := plugin.InjectContainerGPUEnv(containerCtx)
-		assert.Equal(t, tt.expectedError, err != nil, tt.name)
-		if tt.proto != nil {
-			containerCtx := tt.proto.(*protocol.ContainerContext)
-			assert.Equal(t, containerCtx.Response.AddContainerEnvs[GpuAllocEnv], tt.expectedAllocStr, tt.name)
-		}
+		t.Run(tt.name, func(t *testing.T) {
+			var containerCtx *protocol.ContainerContext
+			if tt.proto != nil {
+				containerCtx = tt.proto.(*protocol.ContainerContext)
+			}
+			err := plugin.InjectContainerGPUEnv(containerCtx)
+			assert.Equal(t, tt.expectedError, err != nil, tt.name)
+			if tt.proto != nil {
+				containerCtx := tt.proto.(*protocol.ContainerContext)
+				assert.Equal(t, containerCtx.Response.AddContainerEnvs[GpuAllocEnv], tt.expectedAllocStr, tt.name)
+				assert.Equal(t, containerCtx.Response.AddContainerEnvs, tt.expectedEnvs, tt.name)
+				assert.Equal(t, containerCtx.Response.AddContainerMounts, tt.expectedMounts, tt.name)
+			}
+		})
+
 	}
 }
diff --git a/pkg/koordlet/runtimehooks/protocol/container_context.go b/pkg/koordlet/runtimehooks/protocol/container_context.go
index a39ba04a9..311fc9bc7 100644
--- a/pkg/koordlet/runtimehooks/protocol/container_context.go
+++ b/pkg/koordlet/runtimehooks/protocol/container_context.go
@@ -181,8 +181,9 @@ func (c *ContainerRequest) FromReconciler(podMeta *statesinformer.PodMeta, conta
 }
 
 type ContainerResponse struct {
-	Resources        Resources
-	AddContainerEnvs map[string]string
+	Resources          Resources
+	AddContainerEnvs   map[string]string
+	AddContainerMounts []*api.Mount
 }
 
 func (c *ContainerResponse) ProxyDone(resp *runtimeapi.ContainerResourceHookResponse) {
@@ -278,6 +279,11 @@ func (c *ContainerContext) NriDone(executor resourceexecutor.ResourceUpdateExecu
 			adjust.AddEnv(k, v)
 		}
 	}
+	if len(c.Response.AddContainerMounts) != 0 {
+		for _, m := range c.Response.AddContainerMounts {
+			adjust.AddMount(m)
+		}
+	}
 
 	c.Update()