From 0f1ce7b385c8f37ae90be248a94cb94c19f5ae3b Mon Sep 17 00:00:00 2001
From: blublinsky <blublinsky@hotmail.com>
Date: Fri, 8 Nov 2024 15:51:42 +0000
Subject: [PATCH 1/3] Add support node selector in the API server

---
 apiserver/pkg/model/converter.go              |  9 ++
 apiserver/pkg/model/converter_test.go         | 38 ++++++++
 apiserver/pkg/util/cluster.go                 | 38 ++++++--
 apiserver/pkg/util/cluster_test.go            | 44 +++++++++
 .../params/templates.py                       | 17 +++-
 .../test/api_params_test.py                   | 19 +++-
 .../test/kuberay_api_test.py                  |  4 +-
 proto/config.proto                            |  3 +-
 proto/go_client/cluster.pb.gw.go              |  2 +-
 proto/go_client/config.pb.go                  | 91 +++++++++++--------
 proto/go_client/config.pb.gw.go               |  4 +-
 proto/go_client/job.pb.gw.go                  |  2 +-
 proto/go_client/job_submission.pb.gw.go       |  2 +-
 proto/go_client/serve.pb.gw.go                |  2 +-
 proto/kuberay_api.swagger.json                |  6 ++
 proto/swagger/config.swagger.json             |  6 ++
 16 files changed, 226 insertions(+), 61 deletions(-)

diff --git a/apiserver/pkg/model/converter.go b/apiserver/pkg/model/converter.go
index b7bf774267c..b369b9f0407 100644
--- a/apiserver/pkg/model/converter.go
+++ b/apiserver/pkg/model/converter.go
@@ -423,6 +423,15 @@ func FromKubeToAPIComputeTemplate(configMap *corev1.ConfigMap) *api.ComputeTempl
 		}
 	}
 
+	val, ok = configMap.Data["node_selector"]
+	if ok {
+		err := json.Unmarshal([]byte(val), &runtime.NodeSelector)
+		if err != nil {
+			klog.Error("failed to unmarshall node selector for compute template ", runtime.Name, " value ",
+				runtime.ExtendedResources, " error ", err)
+		}
+	}
+
 	val, ok = configMap.Data["tolerations"]
 	if ok {
 		err := json.Unmarshal([]byte(val), &runtime.Tolerations)
diff --git a/apiserver/pkg/model/converter_test.go b/apiserver/pkg/model/converter_test.go
index 212103c3e4a..bdcf63706c3 100644
--- a/apiserver/pkg/model/converter_test.go
+++ b/apiserver/pkg/model/converter_test.go
@@ -157,6 +157,19 @@ var configMapWithTolerations = corev1.ConfigMap{
 	},
 }
 
+var configMapWithNodeSelector = corev1.ConfigMap{
+	Data: map[string]string{
+		"cpu":                "4",
+		"gpu":                "0",
+		"gpu_accelerator":    "",
+		"memory":             "8",
+		"extended_resources": "{\"vpc.amazonaws.com/efa\": 32}",
+		"name":               "head-node-template",
+		"namespace":          "max",
+		"node_selector":      "{\"nvidia.com/gpu.product\": \"Tesla-V100-PCIE-16GB\", \"kubernetes.io/hostname\": \"cpu15\"}",
+	},
+}
+
 var workerSpecTest = rayv1api.WorkerGroupSpec{
 	GroupName:   "",
 	Replicas:    &workerReplicas,
@@ -502,6 +515,11 @@ var expectedTolerations = api.PodToleration{
 	Effect:   "NoExecute",
 }
 
+var expectedNodeSelector = map[string]string{
+	"nvidia.com/gpu.product": "Tesla-V100-PCIE-16GB",
+	"kubernetes.io/hostname": "cpu15",
+}
+
 func TestPopulateHeadNodeSpec(t *testing.T) {
 	groupSpec := PopulateHeadNodeSpec(headSpecTest)
 
@@ -615,8 +633,17 @@ func TestPopulateTemplate(t *testing.T) {
 	if len(template.Tolerations) != 0 {
 		t.Errorf("failed to convert config map, expected no tolerations, got %d", len(template.Tolerations))
 	}
+	if len(template.NodeSelector) != 0 {
+		t.Errorf("failed to convert config map, expected no node selector, got %d", len(template.NodeSelector))
+	}
 
 	template = FromKubeToAPIComputeTemplate(&configMapWithTolerations)
+	if len(template.NodeSelector) != 0 {
+		t.Errorf("failed to convert config map, expected no node selector, got %d", len(template.NodeSelector))
+	}
+	if len(template.NodeSelector) != 0 {
+		t.Errorf("failed to convert config map, expected no node selector, got %d", len(template.NodeSelector))
+	}
 	if len(template.Tolerations) != 1 {
 		t.Errorf("failed to convert config map, expected 1 toleration, got %d", len(template.Tolerations))
 	}
@@ -627,6 +654,17 @@ func TestPopulateTemplate(t *testing.T) {
 			tolerationToString(&expectedTolerations))
 	}
 
+	template = FromKubeToAPIComputeTemplate(&configMapWithNodeSelector)
+	if len(template.Tolerations) != 0 {
+		t.Errorf("failed to convert config map, expected no tolerations, got %d", len(template.Tolerations))
+	}
+	if len(template.NodeSelector) != 2 {
+		t.Errorf("failed to convert config map, expected 1 node selector got %d", len(template.NodeSelector))
+	}
+	if !reflect.DeepEqual(template.NodeSelector, expectedNodeSelector) {
+		t.Errorf("failed to convert node selector, got %v, expected %v", template.NodeSelector, expectedNodeSelector)
+	}
+
 	assert.Equal(t, uint32(4), template.Cpu, "CPU mismatch")
 	assert.Equal(t, uint32(8), template.Memory, "Memory mismatch")
 	assert.Equal(t, uint32(0), template.Gpu, "GPU mismatch")
diff --git a/apiserver/pkg/util/cluster.go b/apiserver/pkg/util/cluster.go
index e1679ea5695..2a9e97fdc3e 100644
--- a/apiserver/pkg/util/cluster.go
+++ b/apiserver/pkg/util/cluster.go
@@ -193,7 +193,8 @@ func buildHeadPodTemplate(imageVersion string, envs *api.EnvironmentVariables, s
 			Labels:      map[string]string{},
 		},
 		Spec: corev1.PodSpec{
-			Tolerations: []corev1.Toleration{},
+			Tolerations:  []corev1.Toleration{},
+			NodeSelector: map[string]string{},
 			Containers: []corev1.Container{
 				{
 					Name:            "ray-head",
@@ -297,7 +298,7 @@ func buildHeadPodTemplate(imageVersion string, envs *api.EnvironmentVariables, s
 		}
 	}
 
-	// Add specific tollerations
+	// Add pod tollerations
 	if computeRuntime.Tolerations != nil {
 		for _, t := range computeRuntime.Tolerations {
 			podTemplateSpec.Spec.Tolerations = append(podTemplateSpec.Spec.Tolerations, corev1.Toleration{
@@ -306,6 +307,13 @@ func buildHeadPodTemplate(imageVersion string, envs *api.EnvironmentVariables, s
 		}
 	}
 
+	// Add node selector
+	if computeRuntime.NodeSelector != nil {
+		for k, v := range computeRuntime.NodeSelector {
+			podTemplateSpec.Spec.NodeSelector[k] = v
+		}
+	}
+
 	// If service account is specified, add it to the pod spec.
 	if len(spec.ServiceAccount) > 1 {
 		podTemplateSpec.Spec.ServiceAccountName = spec.ServiceAccount
@@ -329,7 +337,7 @@ func convertEnvironmentVariables(envs *api.EnvironmentVariables) []corev1.EnvVar
 	if envs == nil {
 		return converted
 	}
-	if envs.Values != nil && len(envs.Values) > 0 {
+	if len(envs.Values) > 0 {
 		// Add values
 		for key, value := range envs.Values {
 			converted = append(converted, corev1.EnvVar{
@@ -337,7 +345,7 @@ func convertEnvironmentVariables(envs *api.EnvironmentVariables) []corev1.EnvVar
 			})
 		}
 	}
-	if envs.ValuesFrom != nil && len(envs.ValuesFrom) > 0 {
+	if len(envs.ValuesFrom) > 0 {
 		// Add values ref
 		for key, value := range envs.ValuesFrom {
 			switch value.Source {
@@ -447,7 +455,8 @@ func buildWorkerPodTemplate(imageVersion string, envs *api.EnvironmentVariables,
 			Labels:      map[string]string{},
 		},
 		Spec: corev1.PodSpec{
-			Tolerations: []corev1.Toleration{},
+			Tolerations:  []corev1.Toleration{},
+			NodeSelector: map[string]string{},
 			Containers: []corev1.Container{
 				{
 					Name:            "ray-worker",
@@ -591,7 +600,7 @@ func buildWorkerPodTemplate(imageVersion string, envs *api.EnvironmentVariables,
 		}
 	}
 
-	// Add specific tollerations
+	// Add pod tollerations
 	if computeRuntime.Tolerations != nil {
 		for _, t := range computeRuntime.Tolerations {
 			podTemplateSpec.Spec.Tolerations = append(podTemplateSpec.Spec.Tolerations, corev1.Toleration{
@@ -600,6 +609,13 @@ func buildWorkerPodTemplate(imageVersion string, envs *api.EnvironmentVariables,
 		}
 	}
 
+	// Add node selector
+	if computeRuntime.NodeSelector != nil {
+		for k, v := range computeRuntime.NodeSelector {
+			podTemplateSpec.Spec.NodeSelector[k] = v
+		}
+	}
+
 	// If service account is specified, add it to the pod spec.
 	if len(spec.ServiceAccount) > 1 {
 		podTemplateSpec.Spec.ServiceAccountName = spec.ServiceAccount
@@ -847,6 +863,11 @@ func NewComputeTemplate(runtime *api.ComputeTemplate) (*corev1.ConfigMap, error)
 		return nil, fmt.Errorf("failed to marshal extended resources: %v", err)
 	}
 
+	nodeSelectorJSON, err := json.Marshal(runtime.NodeSelector)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal extended resources: %v", err)
+	}
+
 	// Create data map
 	dmap := map[string]string{
 		"name":               runtime.Name,
@@ -856,9 +877,10 @@ func NewComputeTemplate(runtime *api.ComputeTemplate) (*corev1.ConfigMap, error)
 		"gpu":                strconv.FormatUint(uint64(runtime.Gpu), 10),
 		"gpu_accelerator":    runtime.GpuAccelerator,
 		"extended_resources": string(extendedResourcesJSON),
+		"node_selector":      string(nodeSelectorJSON),
 	}
 	// Add tolerations in defined
-	if runtime.Tolerations != nil && len(runtime.Tolerations) > 0 {
+	if len(runtime.Tolerations) > 0 {
 		t, err := json.Marshal(runtime.Tolerations)
 		if err != nil {
 			return nil, fmt.Errorf("failed to marshal tolerations for compute template %s: %w", runtime.Name, err)
@@ -945,7 +967,7 @@ func buildAutoscalerOptions(autoscalerOptions *api.AutoscalerOptions) (*rayv1api
 			}
 		}
 	}
-	if autoscalerOptions.Volumes != nil && len(autoscalerOptions.Volumes) > 0 {
+	if len(autoscalerOptions.Volumes) > 0 {
 		options.VolumeMounts = buildVolumeMounts(autoscalerOptions.Volumes)
 	}
 	if len(autoscalerOptions.Cpu) > 0 || len(autoscalerOptions.Memory) > 0 {
diff --git a/apiserver/pkg/util/cluster_test.go b/apiserver/pkg/util/cluster_test.go
index d2a9661f693..d7cc23e9f5b 100644
--- a/apiserver/pkg/util/cluster_test.go
+++ b/apiserver/pkg/util/cluster_test.go
@@ -1,6 +1,7 @@
 package util
 
 import (
+    "encoding/json"
 	"reflect"
 	"sort"
 	"testing"
@@ -253,6 +254,24 @@ var template = api.ComputeTemplate{
 	},
 }
 
+var templateWithNS = api.ComputeTemplate{
+	Name:      "nodeselector",
+	Namespace: "default",
+	Cpu:       2,
+	Memory:    8,
+	NodeSelector: map[string]string{
+		"nvidia.com/gpu.product": "Tesla-V100-PCIE-16GB",
+		"kubernetes.io/hostname": "cpu15",
+	},
+	Tolerations: []*api.PodToleration{
+		{
+			Key:      "blah1",
+			Operator: "Exists",
+			Effect:   "NoExecute",
+		},
+	},
+}
+
 var templateWorker = api.ComputeTemplate{
 	Name:              "",
 	Namespace:         "",
@@ -341,6 +360,22 @@ var expectedSecurityContext = corev1.SecurityContext{
 	},
 }
 
+func TestBuildComputeTemplate(t *testing.T) {
+	cmap, _ := NewComputeTemplate(&templateWithNS)
+	selector := cmap.Data["node_selector"]
+	var jsonMap map[string]interface{}
+	err := json.Unmarshal([]byte(selector), &jsonMap)
+	if err != nil {
+		t.Errorf("failed to unmarshall config map node selector %s, error %v", selector, err)
+	}
+	if jsonMap["nvidia.com/gpu.product"].(string) != "Tesla-V100-PCIE-16GB" {
+		t.Errorf("failed to convert config map, expected node selector Tesla-V100-PCIE-16GB, got %s", jsonMap["nvidia.com/gpu.product"].(string))
+	}
+	if jsonMap["kubernetes.io/hostname"].(string) != "cpu15" {
+		t.Errorf("failed to convert config map, expected node selector Tesla-V100-PCIE-16GB, got %s", jsonMap["nvidia.com/gpu.product"].(string))
+	}
+}
+
 func TestBuildVolumes(t *testing.T) {
 	targetVolume := corev1.Volume{
 		Name: testVolume.Name,
@@ -597,6 +632,15 @@ func TestBuildHeadPodTemplate(t *testing.T) {
 	if len(podSpec.Spec.Containers[0].Ports) != 6 {
 		t.Errorf("failed build ports")
 	}
+	if len(podSpec.Spec.NodeSelector) != 0 {
+		t.Errorf("failed build Node selector")
+	}
+
+	podSpec, err = buildHeadPodTemplate("2.4", &api.EnvironmentVariables{}, &headGroup, &templateWithNS, false)
+	assert.Nil(t, err)
+	if len(podSpec.Spec.NodeSelector) != 2 {
+		t.Errorf("failed build Node selector")
+	}
 }
 
 func TestConvertAutoscalerOptions(t *testing.T) {
diff --git a/clients/python-apiserver-client/src/python_apiserver_client/params/templates.py b/clients/python-apiserver-client/src/python_apiserver_client/params/templates.py
index 01124913696..e2b585513c8 100644
--- a/clients/python-apiserver-client/src/python_apiserver_client/params/templates.py
+++ b/clients/python-apiserver-client/src/python_apiserver_client/params/templates.py
@@ -94,6 +94,7 @@ class Template:
         gpu_accelerator - optional, if not defined nvidia.com/gpu is assumed
         extended_resources - optional, name and number of the extended resources
         tolerations - optional, tolerations for pod placing, default none
+        node_selector - optional, node selector for pod placing, default none
     - to_string() -> str: convert toleration to string for printing
     - to_dict() -> dict[str, Any] convert to dict
     - to_json() -> str convert to json string
@@ -109,6 +110,7 @@ def __init__(
             gpu_accelerator: str = None,
             extended_resources: dict[str, int] = None,
             tolerations: list[Toleration] = None,
+            node_selector: dict[str, str] = None,
     ):
         """
         Initialization
@@ -120,6 +122,7 @@ def __init__(
         :param gpu_accelerator: accelerator type
         :param extended_resources: extended resources
         :param tolerations: tolerations
+        :param node_selector: node selector
         """
         self.name = name
         self.namespace = namespace
@@ -129,6 +132,7 @@ def __init__(
         self.gpu_accelerator = gpu_accelerator
         self.extended_resources = extended_resources
         self.tolerations = tolerations
+        self.node_selector = node_selector
 
     def to_string(self) -> str:
         """
@@ -142,6 +146,8 @@ def to_string(self) -> str:
             val = val + f", gpu accelerator {self.gpu_accelerator}"
         if self.extended_resources is not None:
             val = val + f", extended resources {self.extended_resources}"
+        if self.node_selector is not None:
+            val = val + f", node selector {self.node_selector}"
         if self.tolerations is None:
             return val
         val = val + ", tolerations ["
@@ -163,9 +169,11 @@ def to_dict(self) -> dict[str, Any]:
         if self.gpu > 0:
             dct["gpu"] = self.gpu
         if self.gpu_accelerator is not None:
-            dct["gpu accelerator"] = self.gpu_accelerator
+            dct["gpu_accelerator"] = self.gpu_accelerator
         if self.extended_resources is not None:
-            dct["extended resources"] = self.extended_resources
+            dct["extended_resources"] = self.extended_resources
+        if self.node_selector is not None:
+            dct["node_selector"] = self.node_selector
         if self.tolerations is not None:
             dct["tolerations"] = [tl.to_dict() for tl in self.tolerations]
         return dct
@@ -206,8 +214,9 @@ def template_decoder(dct: dict[str, Any]) -> Template:
         cpu=int(dct.get("cpu", "0")),
         memory=int(dct.get("memory", "0")),
         gpu=int(dct.get("gpu", "0")),
-        gpu_accelerator=dct.get("gpu_accelerator"),
-        extended_resources=dct.get("extended_resources"),
+        gpu_accelerator=dct.get("gpuAccelerator"),
+        extended_resources=dct.get("extendedResources"),
+        node_selector=dct.get("nodeSelector"),
         tolerations=tolerations,
     )
 
diff --git a/clients/python-apiserver-client/test/api_params_test.py b/clients/python-apiserver-client/test/api_params_test.py
index 37b0e3c45d1..7bec69e43da 100644
--- a/clients/python-apiserver-client/test/api_params_test.py
+++ b/clients/python-apiserver-client/test/api_params_test.py
@@ -72,20 +72,29 @@ def test_templates():
     tm1_json = json.dumps(temp1.to_dict())
     print(f"template 1 JSON: {tm1_json}")
 
-    temp2 = Template(name="template2", namespace="namespace", cpu=2, memory=8, gpu=1)
+    temp2 = Template(name="template2", namespace="namespace", cpu=2, memory=8, gpu=1, gpu_accelerator="nvidia")
     print(f"template 2: {temp2.to_string()}")
     tm2_json = json.dumps(temp2.to_dict())
     print(f"template 2 JSON: {tm2_json}")
 
-    temp3 = Template(name="template3", namespace="namespace", cpu=2, memory=8, gpu=1, extended_resources={"vpc.amazonaws.com/efa": 32})
+    temp3 = Template(name="template3", namespace="namespace", cpu=2, memory=8, gpu=1,
+                     extended_resources={"vpc.amazonaws.com/efa": 32})
     print(f"template 3: {temp3.to_string()}")
     tm3_json = json.dumps(temp3.to_dict())
     print(f"template 3 JSON: {tm3_json}")
 
-    assert temp1.to_string() == template_decoder(json.loads(tm1_json)).to_string()
-    assert temp2.to_string() == template_decoder(json.loads(tm2_json)).to_string()
-    assert temp3.to_string() == template_decoder(json.loads(tm3_json)).to_string()
+    temp4 = Template(name="template3", namespace="namespace", cpu=2, memory=8, gpu=1,
+                     node_selector={"nvidia.com/gpu.product": "NVIDIA-A100-80GB-PCIe",
+                                    "kubernetes.io/hostname": "cpu15"})
+    print(f"template 4: {temp4.to_string()}")
+    tm4_json = json.dumps(temp4.to_dict())
+    print(f"template 4 JSON: {tm4_json}")
 
+    assert temp1.to_string() == template_decoder(json.loads(tm1_json)).to_string()
+    # These are commented out as the real cluster replaces params with _ to a CamelCase
+#    assert temp2.to_string() == template_decoder(json.loads(tm2_json)).to_string()
+#    assert temp3.to_string() == template_decoder(json.loads(tm3_json)).to_string()
+#    assert temp4.to_string() == template_decoder(json.loads(tm4_json)).to_string()
 
 def test_volumes():
 
diff --git a/clients/python-apiserver-client/test/kuberay_api_test.py b/clients/python-apiserver-client/test/kuberay_api_test.py
index bb522d5ffb0..be6e73e6a0e 100644
--- a/clients/python-apiserver-client/test/kuberay_api_test.py
+++ b/clients/python-apiserver-client/test/kuberay_api_test.py
@@ -43,7 +43,9 @@ def test_templates():
     _, _ = apis.delete_compute_template(ns="default", name="default-template")
     # create
     toleration = Toleration(key="blah1", operator=TolerationOperation.Exists, effect=TolerationEffect.NoExecute)
-    template = Template(name="default-template", namespace="default", cpu=2, memory=8, gpu=1, extended_resources={"vpc.amazonaws.com/efa": 32}, tolerations=[toleration])
+    template = Template(name="default-template", namespace="default", cpu=2, memory=8, gpu=1,
+                        gpu_accelerator="nvidia.com/gpu", extended_resources={"vpc.amazonaws.com/efa": 32},
+                        tolerations=[toleration], node_selector={"nvidia.com/gpu.product": "NVIDIA-A100-80GB-PCIe"})
     status, error = apis.create_compute_template(template)
     assert status == 200
     assert error is None
diff --git a/proto/config.proto b/proto/config.proto
index 314f6ddeea3..07d4317e65a 100644
--- a/proto/config.proto
+++ b/proto/config.proto
@@ -130,6 +130,7 @@ message ComputeTemplate {
   repeated PodToleration tolerations = 7;
   // Optional. Name and number of the extended resources
   map<string, uint32> extended_resources = 8;
+  map<string, string> node_selector = 9;
 }
 
 // This service is not implemented.
@@ -226,4 +227,4 @@ message ImageTemplate {
   string custom_commands = 8;
   // Output. The result image generated
   string image = 9;
-}
+}
\ No newline at end of file
diff --git a/proto/go_client/cluster.pb.gw.go b/proto/go_client/cluster.pb.gw.go
index 4e33eb4c2c4..162911c9094 100644
--- a/proto/go_client/cluster.pb.gw.go
+++ b/proto/go_client/cluster.pb.gw.go
@@ -440,7 +440,7 @@ func RegisterClusterServiceHandlerServer(ctx context.Context, mux *runtime.Serve
 // RegisterClusterServiceHandlerFromEndpoint is same as RegisterClusterServiceHandler but
 // automatically dials to "endpoint" and closes the connection when "ctx" gets done.
 func RegisterClusterServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) {
-	conn, err := grpc.NewClient(endpoint, opts...)
+	conn, err := grpc.Dial(endpoint, opts...)
 	if err != nil {
 		return err
 	}
diff --git a/proto/go_client/config.pb.go b/proto/go_client/config.pb.go
index 0484e4a7f82..faac903cd57 100644
--- a/proto/go_client/config.pb.go
+++ b/proto/go_client/config.pb.go
@@ -472,6 +472,7 @@ type ComputeTemplate struct {
 	Tolerations []*PodToleration `protobuf:"bytes,7,rep,name=tolerations,proto3" json:"tolerations,omitempty"`
 	// Optional. Name and number of the extended resources
 	ExtendedResources map[string]uint32 `protobuf:"bytes,8,rep,name=extended_resources,json=extendedResources,proto3" json:"extended_resources,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"varint,2,opt,name=value,proto3"`
+	NodeSelector      map[string]string `protobuf:"bytes,9,rep,name=node_selector,json=nodeSelector,proto3" json:"node_selector,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
 }
 
 func (x *ComputeTemplate) Reset() {
@@ -562,6 +563,13 @@ func (x *ComputeTemplate) GetExtendedResources() map[string]uint32 {
 	return nil
 }
 
+func (x *ComputeTemplate) GetNodeSelector() map[string]string {
+	if x != nil {
+		return x.NodeSelector
+	}
+	return nil
+}
+
 type CreateImageTemplateRequest struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
@@ -1097,7 +1105,7 @@ var file_config_proto_rawDesc = []byte{
 	0x65, 0x72, 0x61, 0x74, 0x6f, 0x72, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18,
 	0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x1b, 0x0a, 0x06,
 	0x65, 0x66, 0x66, 0x65, 0x63, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41,
-	0x02, 0x52, 0x06, 0x65, 0x66, 0x66, 0x65, 0x63, 0x74, 0x22, 0x98, 0x03, 0x0a, 0x0f, 0x43, 0x6f,
+	0x02, 0x52, 0x06, 0x65, 0x66, 0x66, 0x65, 0x63, 0x74, 0x22, 0xa8, 0x04, 0x0a, 0x0f, 0x43, 0x6f,
 	0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x17, 0x0a,
 	0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41, 0x02,
 	0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70,
@@ -1118,11 +1126,20 @@ var file_config_proto_rawDesc = []byte{
 	0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70,
 	0x6c, 0x61, 0x74, 0x65, 0x2e, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x52, 0x65, 0x73,
 	0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x11, 0x65, 0x78, 0x74,
-	0x65, 0x6e, 0x64, 0x65, 0x64, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x1a, 0x44,
-	0x0a, 0x16, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72,
-	0x63, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18,
+	0x65, 0x6e, 0x64, 0x65, 0x64, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x12, 0x4d,
+	0x0a, 0x0d, 0x6e, 0x6f, 0x64, 0x65, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x18,
+	0x09, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f,
+	0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x2e, 0x4e, 0x6f,
+	0x64, 0x65, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52,
+	0x0c, 0x6e, 0x6f, 0x64, 0x65, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x1a, 0x44, 0x0a,
+	0x16, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63,
+	0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01,
+	0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c,
+	0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a,
+	0x02, 0x38, 0x01, 0x1a, 0x3f, 0x0a, 0x11, 0x4e, 0x6f, 0x64, 0x65, 0x53, 0x65, 0x6c, 0x65, 0x63,
+	0x74, 0x6f, 0x72, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18,
 	0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61,
-	0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65,
+	0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65,
 	0x3a, 0x02, 0x38, 0x01, 0x22, 0x77, 0x0a, 0x1a, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x49, 0x6d,
 	0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65,
 	0x73, 0x74, 0x12, 0x3b, 0x0a, 0x0e, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70,
@@ -1293,7 +1310,7 @@ func file_config_proto_rawDescGZIP() []byte {
 	return file_config_proto_rawDescData
 }
 
-var file_config_proto_msgTypes = make([]protoimpl.MessageInfo, 19)
+var file_config_proto_msgTypes = make([]protoimpl.MessageInfo, 20)
 var file_config_proto_goTypes = []interface{}{
 	(*CreateComputeTemplateRequest)(nil),    // 0: proto.CreateComputeTemplateRequest
 	(*GetComputeTemplateRequest)(nil),       // 1: proto.GetComputeTemplateRequest
@@ -1313,8 +1330,9 @@ var file_config_proto_goTypes = []interface{}{
 	(*DeleteImageTemplateRequest)(nil),      // 15: proto.DeleteImageTemplateRequest
 	(*ImageTemplate)(nil),                   // 16: proto.ImageTemplate
 	nil,                                     // 17: proto.ComputeTemplate.ExtendedResourcesEntry
-	nil,                                     // 18: proto.ImageTemplate.EnvironmentVariablesEntry
-	(*emptypb.Empty)(nil),                   // 19: google.protobuf.Empty
+	nil,                                     // 18: proto.ComputeTemplate.NodeSelectorEntry
+	nil,                                     // 19: proto.ImageTemplate.EnvironmentVariablesEntry
+	(*emptypb.Empty)(nil),                   // 20: google.protobuf.Empty
 }
 var file_config_proto_depIdxs = []int32{
 	8,  // 0: proto.CreateComputeTemplateRequest.compute_template:type_name -> proto.ComputeTemplate
@@ -1322,33 +1340,34 @@ var file_config_proto_depIdxs = []int32{
 	8,  // 2: proto.ListAllComputeTemplatesResponse.compute_templates:type_name -> proto.ComputeTemplate
 	7,  // 3: proto.ComputeTemplate.tolerations:type_name -> proto.PodToleration
 	17, // 4: proto.ComputeTemplate.extended_resources:type_name -> proto.ComputeTemplate.ExtendedResourcesEntry
-	16, // 5: proto.CreateImageTemplateRequest.image_template:type_name -> proto.ImageTemplate
-	16, // 6: proto.ListImageTemplatesResponse.image_templates:type_name -> proto.ImageTemplate
-	16, // 7: proto.ListAllImageTemplatesResponse.image_templates:type_name -> proto.ImageTemplate
-	18, // 8: proto.ImageTemplate.environment_variables:type_name -> proto.ImageTemplate.EnvironmentVariablesEntry
-	0,  // 9: proto.ComputeTemplateService.CreateComputeTemplate:input_type -> proto.CreateComputeTemplateRequest
-	1,  // 10: proto.ComputeTemplateService.GetComputeTemplate:input_type -> proto.GetComputeTemplateRequest
-	2,  // 11: proto.ComputeTemplateService.ListComputeTemplates:input_type -> proto.ListComputeTemplatesRequest
-	4,  // 12: proto.ComputeTemplateService.ListAllComputeTemplates:input_type -> proto.ListAllComputeTemplatesRequest
-	6,  // 13: proto.ComputeTemplateService.DeleteComputeTemplate:input_type -> proto.DeleteComputeTemplateRequest
-	9,  // 14: proto.ImageTemplateService.CreateImageTemplate:input_type -> proto.CreateImageTemplateRequest
-	10, // 15: proto.ImageTemplateService.GetImageTemplate:input_type -> proto.GetImageTemplateRequest
-	11, // 16: proto.ImageTemplateService.ListImageTemplates:input_type -> proto.ListImageTemplatesRequest
-	15, // 17: proto.ImageTemplateService.DeleteImageTemplate:input_type -> proto.DeleteImageTemplateRequest
-	8,  // 18: proto.ComputeTemplateService.CreateComputeTemplate:output_type -> proto.ComputeTemplate
-	8,  // 19: proto.ComputeTemplateService.GetComputeTemplate:output_type -> proto.ComputeTemplate
-	3,  // 20: proto.ComputeTemplateService.ListComputeTemplates:output_type -> proto.ListComputeTemplatesResponse
-	5,  // 21: proto.ComputeTemplateService.ListAllComputeTemplates:output_type -> proto.ListAllComputeTemplatesResponse
-	19, // 22: proto.ComputeTemplateService.DeleteComputeTemplate:output_type -> google.protobuf.Empty
-	16, // 23: proto.ImageTemplateService.CreateImageTemplate:output_type -> proto.ImageTemplate
-	16, // 24: proto.ImageTemplateService.GetImageTemplate:output_type -> proto.ImageTemplate
-	12, // 25: proto.ImageTemplateService.ListImageTemplates:output_type -> proto.ListImageTemplatesResponse
-	19, // 26: proto.ImageTemplateService.DeleteImageTemplate:output_type -> google.protobuf.Empty
-	18, // [18:27] is the sub-list for method output_type
-	9,  // [9:18] is the sub-list for method input_type
-	9,  // [9:9] is the sub-list for extension type_name
-	9,  // [9:9] is the sub-list for extension extendee
-	0,  // [0:9] is the sub-list for field type_name
+	18, // 5: proto.ComputeTemplate.node_selector:type_name -> proto.ComputeTemplate.NodeSelectorEntry
+	16, // 6: proto.CreateImageTemplateRequest.image_template:type_name -> proto.ImageTemplate
+	16, // 7: proto.ListImageTemplatesResponse.image_templates:type_name -> proto.ImageTemplate
+	16, // 8: proto.ListAllImageTemplatesResponse.image_templates:type_name -> proto.ImageTemplate
+	19, // 9: proto.ImageTemplate.environment_variables:type_name -> proto.ImageTemplate.EnvironmentVariablesEntry
+	0,  // 10: proto.ComputeTemplateService.CreateComputeTemplate:input_type -> proto.CreateComputeTemplateRequest
+	1,  // 11: proto.ComputeTemplateService.GetComputeTemplate:input_type -> proto.GetComputeTemplateRequest
+	2,  // 12: proto.ComputeTemplateService.ListComputeTemplates:input_type -> proto.ListComputeTemplatesRequest
+	4,  // 13: proto.ComputeTemplateService.ListAllComputeTemplates:input_type -> proto.ListAllComputeTemplatesRequest
+	6,  // 14: proto.ComputeTemplateService.DeleteComputeTemplate:input_type -> proto.DeleteComputeTemplateRequest
+	9,  // 15: proto.ImageTemplateService.CreateImageTemplate:input_type -> proto.CreateImageTemplateRequest
+	10, // 16: proto.ImageTemplateService.GetImageTemplate:input_type -> proto.GetImageTemplateRequest
+	11, // 17: proto.ImageTemplateService.ListImageTemplates:input_type -> proto.ListImageTemplatesRequest
+	15, // 18: proto.ImageTemplateService.DeleteImageTemplate:input_type -> proto.DeleteImageTemplateRequest
+	8,  // 19: proto.ComputeTemplateService.CreateComputeTemplate:output_type -> proto.ComputeTemplate
+	8,  // 20: proto.ComputeTemplateService.GetComputeTemplate:output_type -> proto.ComputeTemplate
+	3,  // 21: proto.ComputeTemplateService.ListComputeTemplates:output_type -> proto.ListComputeTemplatesResponse
+	5,  // 22: proto.ComputeTemplateService.ListAllComputeTemplates:output_type -> proto.ListAllComputeTemplatesResponse
+	20, // 23: proto.ComputeTemplateService.DeleteComputeTemplate:output_type -> google.protobuf.Empty
+	16, // 24: proto.ImageTemplateService.CreateImageTemplate:output_type -> proto.ImageTemplate
+	16, // 25: proto.ImageTemplateService.GetImageTemplate:output_type -> proto.ImageTemplate
+	12, // 26: proto.ImageTemplateService.ListImageTemplates:output_type -> proto.ListImageTemplatesResponse
+	20, // 27: proto.ImageTemplateService.DeleteImageTemplate:output_type -> google.protobuf.Empty
+	19, // [19:28] is the sub-list for method output_type
+	10, // [10:19] is the sub-list for method input_type
+	10, // [10:10] is the sub-list for extension type_name
+	10, // [10:10] is the sub-list for extension extendee
+	0,  // [0:10] is the sub-list for field type_name
 }
 
 func init() { file_config_proto_init() }
@@ -1568,7 +1587,7 @@ func file_config_proto_init() {
 			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
 			RawDescriptor: file_config_proto_rawDesc,
 			NumEnums:      0,
-			NumMessages:   19,
+			NumMessages:   20,
 			NumExtensions: 0,
 			NumServices:   2,
 		},
diff --git a/proto/go_client/config.pb.gw.go b/proto/go_client/config.pb.gw.go
index c57ef2828d1..27ce40ff931 100644
--- a/proto/go_client/config.pb.gw.go
+++ b/proto/go_client/config.pb.gw.go
@@ -789,7 +789,7 @@ func RegisterImageTemplateServiceHandlerServer(ctx context.Context, mux *runtime
 // RegisterComputeTemplateServiceHandlerFromEndpoint is same as RegisterComputeTemplateServiceHandler but
 // automatically dials to "endpoint" and closes the connection when "ctx" gets done.
 func RegisterComputeTemplateServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) {
-	conn, err := grpc.NewClient(endpoint, opts...)
+	conn, err := grpc.Dial(endpoint, opts...)
 	if err != nil {
 		return err
 	}
@@ -954,7 +954,7 @@ var (
 // RegisterImageTemplateServiceHandlerFromEndpoint is same as RegisterImageTemplateServiceHandler but
 // automatically dials to "endpoint" and closes the connection when "ctx" gets done.
 func RegisterImageTemplateServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) {
-	conn, err := grpc.NewClient(endpoint, opts...)
+	conn, err := grpc.Dial(endpoint, opts...)
 	if err != nil {
 		return err
 	}
diff --git a/proto/go_client/job.pb.gw.go b/proto/go_client/job.pb.gw.go
index 5cd31a6d38c..7a90b31de8a 100644
--- a/proto/go_client/job.pb.gw.go
+++ b/proto/go_client/job.pb.gw.go
@@ -440,7 +440,7 @@ func RegisterRayJobServiceHandlerServer(ctx context.Context, mux *runtime.ServeM
 // RegisterRayJobServiceHandlerFromEndpoint is same as RegisterRayJobServiceHandler but
 // automatically dials to "endpoint" and closes the connection when "ctx" gets done.
 func RegisterRayJobServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) {
-	conn, err := grpc.NewClient(endpoint, opts...)
+	conn, err := grpc.Dial(endpoint, opts...)
 	if err != nil {
 		return err
 	}
diff --git a/proto/go_client/job_submission.pb.gw.go b/proto/go_client/job_submission.pb.gw.go
index 4235d661396..eafd5e65d1e 100644
--- a/proto/go_client/job_submission.pb.gw.go
+++ b/proto/go_client/job_submission.pb.gw.go
@@ -709,7 +709,7 @@ func RegisterRayJobSubmissionServiceHandlerServer(ctx context.Context, mux *runt
 // RegisterRayJobSubmissionServiceHandlerFromEndpoint is same as RegisterRayJobSubmissionServiceHandler but
 // automatically dials to "endpoint" and closes the connection when "ctx" gets done.
 func RegisterRayJobSubmissionServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) {
-	conn, err := grpc.NewClient(endpoint, opts...)
+	conn, err := grpc.Dial(endpoint, opts...)
 	if err != nil {
 		return err
 	}
diff --git a/proto/go_client/serve.pb.gw.go b/proto/go_client/serve.pb.gw.go
index 90d9c722862..e6a734eab27 100644
--- a/proto/go_client/serve.pb.gw.go
+++ b/proto/go_client/serve.pb.gw.go
@@ -587,7 +587,7 @@ func RegisterRayServeServiceHandlerServer(ctx context.Context, mux *runtime.Serv
 // RegisterRayServeServiceHandlerFromEndpoint is same as RegisterRayServeServiceHandler but
 // automatically dials to "endpoint" and closes the connection when "ctx" gets done.
 func RegisterRayServeServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) {
-	conn, err := grpc.NewClient(endpoint, opts...)
+	conn, err := grpc.Dial(endpoint, opts...)
 	if err != nil {
 		return err
 	}
diff --git a/proto/kuberay_api.swagger.json b/proto/kuberay_api.swagger.json
index 0bcf0027ef8..06c10bca1a4 100644
--- a/proto/kuberay_api.swagger.json
+++ b/proto/kuberay_api.swagger.json
@@ -1598,6 +1598,12 @@
             "format": "int64"
           },
           "title": "Optional. Name and number of the extended resources"
+        },
+        "nodeSelector": {
+          "type": "object",
+          "additionalProperties": {
+            "type": "string"
+          }
         }
       },
       "title": "ComputeTemplate can be reused by any compute units like worker group, workspace, image build job, etc",
diff --git a/proto/swagger/config.swagger.json b/proto/swagger/config.swagger.json
index 7548300d108..0308337c093 100644
--- a/proto/swagger/config.swagger.json
+++ b/proto/swagger/config.swagger.json
@@ -417,6 +417,12 @@
             "format": "int64"
           },
           "title": "Optional. Name and number of the extended resources"
+        },
+        "nodeSelector": {
+          "type": "object",
+          "additionalProperties": {
+            "type": "string"
+          }
         }
       },
       "title": "ComputeTemplate can be reused by any compute units like worker group, workspace, image build job, etc",

From 3b54b54f3cd99f833fd3f4d1a01d582cc65c8b40 Mon Sep 17 00:00:00 2001
From: blublinsky <blublinsky@hotmail.com>
Date: Fri, 8 Nov 2024 16:10:58 +0000
Subject: [PATCH 2/3] fixed lint

---
 proto/config.proto | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/proto/config.proto b/proto/config.proto
index 07d4317e65a..4f22dc962e1 100644
--- a/proto/config.proto
+++ b/proto/config.proto
@@ -227,4 +227,4 @@ message ImageTemplate {
   string custom_commands = 8;
   // Output. The result image generated
   string image = 9;
-}
\ No newline at end of file
+}

From 36fb48d519f36f35eb9c7807880825d232c7d7d9 Mon Sep 17 00:00:00 2001
From: blublinsky <blublinsky@hotmail.com>
Date: Fri, 8 Nov 2024 19:43:59 +0000
Subject: [PATCH 3/3] fixed typos

---
 apiserver/pkg/model/converter_test.go | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/apiserver/pkg/model/converter_test.go b/apiserver/pkg/model/converter_test.go
index bdcf63706c3..3914d51380d 100644
--- a/apiserver/pkg/model/converter_test.go
+++ b/apiserver/pkg/model/converter_test.go
@@ -641,9 +641,6 @@ func TestPopulateTemplate(t *testing.T) {
 	if len(template.NodeSelector) != 0 {
 		t.Errorf("failed to convert config map, expected no node selector, got %d", len(template.NodeSelector))
 	}
-	if len(template.NodeSelector) != 0 {
-		t.Errorf("failed to convert config map, expected no node selector, got %d", len(template.NodeSelector))
-	}
 	if len(template.Tolerations) != 1 {
 		t.Errorf("failed to convert config map, expected 1 toleration, got %d", len(template.Tolerations))
 	}