From 0f1ce7b385c8f37ae90be248a94cb94c19f5ae3b Mon Sep 17 00:00:00 2001 From: blublinsky Date: Fri, 8 Nov 2024 15:51:42 +0000 Subject: [PATCH 1/3] Add support node selector in the API server --- apiserver/pkg/model/converter.go | 9 ++ apiserver/pkg/model/converter_test.go | 38 ++++++++ apiserver/pkg/util/cluster.go | 38 ++++++-- apiserver/pkg/util/cluster_test.go | 44 +++++++++ .../params/templates.py | 17 +++- .../test/api_params_test.py | 19 +++- .../test/kuberay_api_test.py | 4 +- proto/config.proto | 3 +- proto/go_client/cluster.pb.gw.go | 2 +- proto/go_client/config.pb.go | 91 +++++++++++-------- proto/go_client/config.pb.gw.go | 4 +- proto/go_client/job.pb.gw.go | 2 +- proto/go_client/job_submission.pb.gw.go | 2 +- proto/go_client/serve.pb.gw.go | 2 +- proto/kuberay_api.swagger.json | 6 ++ proto/swagger/config.swagger.json | 6 ++ 16 files changed, 226 insertions(+), 61 deletions(-) diff --git a/apiserver/pkg/model/converter.go b/apiserver/pkg/model/converter.go index b7bf774267c..b369b9f0407 100644 --- a/apiserver/pkg/model/converter.go +++ b/apiserver/pkg/model/converter.go @@ -423,6 +423,15 @@ func FromKubeToAPIComputeTemplate(configMap *corev1.ConfigMap) *api.ComputeTempl } } + val, ok = configMap.Data["node_selector"] + if ok { + err := json.Unmarshal([]byte(val), &runtime.NodeSelector) + if err != nil { + klog.Error("failed to unmarshall node selector for compute template ", runtime.Name, " value ", + runtime.ExtendedResources, " error ", err) + } + } + val, ok = configMap.Data["tolerations"] if ok { err := json.Unmarshal([]byte(val), &runtime.Tolerations) diff --git a/apiserver/pkg/model/converter_test.go b/apiserver/pkg/model/converter_test.go index 212103c3e4a..bdcf63706c3 100644 --- a/apiserver/pkg/model/converter_test.go +++ b/apiserver/pkg/model/converter_test.go @@ -157,6 +157,19 @@ var configMapWithTolerations = corev1.ConfigMap{ }, } +var configMapWithNodeSelector = corev1.ConfigMap{ + Data: map[string]string{ + "cpu": "4", + "gpu": "0", + "gpu_accelerator": "", + "memory": "8", + "extended_resources": "{\"vpc.amazonaws.com/efa\": 32}", + "name": "head-node-template", + "namespace": "max", + "node_selector": "{\"nvidia.com/gpu.product\": \"Tesla-V100-PCIE-16GB\", \"kubernetes.io/hostname\": \"cpu15\"}", + }, +} + var workerSpecTest = rayv1api.WorkerGroupSpec{ GroupName: "", Replicas: &workerReplicas, @@ -502,6 +515,11 @@ var expectedTolerations = api.PodToleration{ Effect: "NoExecute", } +var expectedNodeSelector = map[string]string{ + "nvidia.com/gpu.product": "Tesla-V100-PCIE-16GB", + "kubernetes.io/hostname": "cpu15", +} + func TestPopulateHeadNodeSpec(t *testing.T) { groupSpec := PopulateHeadNodeSpec(headSpecTest) @@ -615,8 +633,17 @@ func TestPopulateTemplate(t *testing.T) { if len(template.Tolerations) != 0 { t.Errorf("failed to convert config map, expected no tolerations, got %d", len(template.Tolerations)) } + if len(template.NodeSelector) != 0 { + t.Errorf("failed to convert config map, expected no node selector, got %d", len(template.NodeSelector)) + } template = FromKubeToAPIComputeTemplate(&configMapWithTolerations) + if len(template.NodeSelector) != 0 { + t.Errorf("failed to convert config map, expected no node selector, got %d", len(template.NodeSelector)) + } + if len(template.NodeSelector) != 0 { + t.Errorf("failed to convert config map, expected no node selector, got %d", len(template.NodeSelector)) + } if len(template.Tolerations) != 1 { t.Errorf("failed to convert config map, expected 1 toleration, got %d", len(template.Tolerations)) } @@ -627,6 +654,17 @@ func TestPopulateTemplate(t *testing.T) { tolerationToString(&expectedTolerations)) } + template = FromKubeToAPIComputeTemplate(&configMapWithNodeSelector) + if len(template.Tolerations) != 0 { + t.Errorf("failed to convert config map, expected no tolerations, got %d", len(template.Tolerations)) + } + if len(template.NodeSelector) != 2 { + t.Errorf("failed to convert config map, expected 1 node selector got %d", len(template.NodeSelector)) + } + if !reflect.DeepEqual(template.NodeSelector, expectedNodeSelector) { + t.Errorf("failed to convert node selector, got %v, expected %v", template.NodeSelector, expectedNodeSelector) + } + assert.Equal(t, uint32(4), template.Cpu, "CPU mismatch") assert.Equal(t, uint32(8), template.Memory, "Memory mismatch") assert.Equal(t, uint32(0), template.Gpu, "GPU mismatch") diff --git a/apiserver/pkg/util/cluster.go b/apiserver/pkg/util/cluster.go index e1679ea5695..2a9e97fdc3e 100644 --- a/apiserver/pkg/util/cluster.go +++ b/apiserver/pkg/util/cluster.go @@ -193,7 +193,8 @@ func buildHeadPodTemplate(imageVersion string, envs *api.EnvironmentVariables, s Labels: map[string]string{}, }, Spec: corev1.PodSpec{ - Tolerations: []corev1.Toleration{}, + Tolerations: []corev1.Toleration{}, + NodeSelector: map[string]string{}, Containers: []corev1.Container{ { Name: "ray-head", @@ -297,7 +298,7 @@ func buildHeadPodTemplate(imageVersion string, envs *api.EnvironmentVariables, s } } - // Add specific tollerations + // Add pod tollerations if computeRuntime.Tolerations != nil { for _, t := range computeRuntime.Tolerations { podTemplateSpec.Spec.Tolerations = append(podTemplateSpec.Spec.Tolerations, corev1.Toleration{ @@ -306,6 +307,13 @@ func buildHeadPodTemplate(imageVersion string, envs *api.EnvironmentVariables, s } } + // Add node selector + if computeRuntime.NodeSelector != nil { + for k, v := range computeRuntime.NodeSelector { + podTemplateSpec.Spec.NodeSelector[k] = v + } + } + // If service account is specified, add it to the pod spec. if len(spec.ServiceAccount) > 1 { podTemplateSpec.Spec.ServiceAccountName = spec.ServiceAccount @@ -329,7 +337,7 @@ func convertEnvironmentVariables(envs *api.EnvironmentVariables) []corev1.EnvVar if envs == nil { return converted } - if envs.Values != nil && len(envs.Values) > 0 { + if len(envs.Values) > 0 { // Add values for key, value := range envs.Values { converted = append(converted, corev1.EnvVar{ @@ -337,7 +345,7 @@ func convertEnvironmentVariables(envs *api.EnvironmentVariables) []corev1.EnvVar }) } } - if envs.ValuesFrom != nil && len(envs.ValuesFrom) > 0 { + if len(envs.ValuesFrom) > 0 { // Add values ref for key, value := range envs.ValuesFrom { switch value.Source { @@ -447,7 +455,8 @@ func buildWorkerPodTemplate(imageVersion string, envs *api.EnvironmentVariables, Labels: map[string]string{}, }, Spec: corev1.PodSpec{ - Tolerations: []corev1.Toleration{}, + Tolerations: []corev1.Toleration{}, + NodeSelector: map[string]string{}, Containers: []corev1.Container{ { Name: "ray-worker", @@ -591,7 +600,7 @@ func buildWorkerPodTemplate(imageVersion string, envs *api.EnvironmentVariables, } } - // Add specific tollerations + // Add pod tollerations if computeRuntime.Tolerations != nil { for _, t := range computeRuntime.Tolerations { podTemplateSpec.Spec.Tolerations = append(podTemplateSpec.Spec.Tolerations, corev1.Toleration{ @@ -600,6 +609,13 @@ func buildWorkerPodTemplate(imageVersion string, envs *api.EnvironmentVariables, } } + // Add node selector + if computeRuntime.NodeSelector != nil { + for k, v := range computeRuntime.NodeSelector { + podTemplateSpec.Spec.NodeSelector[k] = v + } + } + // If service account is specified, add it to the pod spec. if len(spec.ServiceAccount) > 1 { podTemplateSpec.Spec.ServiceAccountName = spec.ServiceAccount @@ -847,6 +863,11 @@ func NewComputeTemplate(runtime *api.ComputeTemplate) (*corev1.ConfigMap, error) return nil, fmt.Errorf("failed to marshal extended resources: %v", err) } + nodeSelectorJSON, err := json.Marshal(runtime.NodeSelector) + if err != nil { + return nil, fmt.Errorf("failed to marshal extended resources: %v", err) + } + // Create data map dmap := map[string]string{ "name": runtime.Name, @@ -856,9 +877,10 @@ func NewComputeTemplate(runtime *api.ComputeTemplate) (*corev1.ConfigMap, error) "gpu": strconv.FormatUint(uint64(runtime.Gpu), 10), "gpu_accelerator": runtime.GpuAccelerator, "extended_resources": string(extendedResourcesJSON), + "node_selector": string(nodeSelectorJSON), } // Add tolerations in defined - if runtime.Tolerations != nil && len(runtime.Tolerations) > 0 { + if len(runtime.Tolerations) > 0 { t, err := json.Marshal(runtime.Tolerations) if err != nil { return nil, fmt.Errorf("failed to marshal tolerations for compute template %s: %w", runtime.Name, err) @@ -945,7 +967,7 @@ func buildAutoscalerOptions(autoscalerOptions *api.AutoscalerOptions) (*rayv1api } } } - if autoscalerOptions.Volumes != nil && len(autoscalerOptions.Volumes) > 0 { + if len(autoscalerOptions.Volumes) > 0 { options.VolumeMounts = buildVolumeMounts(autoscalerOptions.Volumes) } if len(autoscalerOptions.Cpu) > 0 || len(autoscalerOptions.Memory) > 0 { diff --git a/apiserver/pkg/util/cluster_test.go b/apiserver/pkg/util/cluster_test.go index d2a9661f693..d7cc23e9f5b 100644 --- a/apiserver/pkg/util/cluster_test.go +++ b/apiserver/pkg/util/cluster_test.go @@ -1,6 +1,7 @@ package util import ( + "encoding/json" "reflect" "sort" "testing" @@ -253,6 +254,24 @@ var template = api.ComputeTemplate{ }, } +var templateWithNS = api.ComputeTemplate{ + Name: "nodeselector", + Namespace: "default", + Cpu: 2, + Memory: 8, + NodeSelector: map[string]string{ + "nvidia.com/gpu.product": "Tesla-V100-PCIE-16GB", + "kubernetes.io/hostname": "cpu15", + }, + Tolerations: []*api.PodToleration{ + { + Key: "blah1", + Operator: "Exists", + Effect: "NoExecute", + }, + }, +} + var templateWorker = api.ComputeTemplate{ Name: "", Namespace: "", @@ -341,6 +360,22 @@ var expectedSecurityContext = corev1.SecurityContext{ }, } +func TestBuildComputeTemplate(t *testing.T) { + cmap, _ := NewComputeTemplate(&templateWithNS) + selector := cmap.Data["node_selector"] + var jsonMap map[string]interface{} + err := json.Unmarshal([]byte(selector), &jsonMap) + if err != nil { + t.Errorf("failed to unmarshall config map node selector %s, error %v", selector, err) + } + if jsonMap["nvidia.com/gpu.product"].(string) != "Tesla-V100-PCIE-16GB" { + t.Errorf("failed to convert config map, expected node selector Tesla-V100-PCIE-16GB, got %s", jsonMap["nvidia.com/gpu.product"].(string)) + } + if jsonMap["kubernetes.io/hostname"].(string) != "cpu15" { + t.Errorf("failed to convert config map, expected node selector Tesla-V100-PCIE-16GB, got %s", jsonMap["nvidia.com/gpu.product"].(string)) + } +} + func TestBuildVolumes(t *testing.T) { targetVolume := corev1.Volume{ Name: testVolume.Name, @@ -597,6 +632,15 @@ func TestBuildHeadPodTemplate(t *testing.T) { if len(podSpec.Spec.Containers[0].Ports) != 6 { t.Errorf("failed build ports") } + if len(podSpec.Spec.NodeSelector) != 0 { + t.Errorf("failed build Node selector") + } + + podSpec, err = buildHeadPodTemplate("2.4", &api.EnvironmentVariables{}, &headGroup, &templateWithNS, false) + assert.Nil(t, err) + if len(podSpec.Spec.NodeSelector) != 2 { + t.Errorf("failed build Node selector") + } } func TestConvertAutoscalerOptions(t *testing.T) { diff --git a/clients/python-apiserver-client/src/python_apiserver_client/params/templates.py b/clients/python-apiserver-client/src/python_apiserver_client/params/templates.py index 01124913696..e2b585513c8 100644 --- a/clients/python-apiserver-client/src/python_apiserver_client/params/templates.py +++ b/clients/python-apiserver-client/src/python_apiserver_client/params/templates.py @@ -94,6 +94,7 @@ class Template: gpu_accelerator - optional, if not defined nvidia.com/gpu is assumed extended_resources - optional, name and number of the extended resources tolerations - optional, tolerations for pod placing, default none + node_selector - optional, node selector for pod placing, default none - to_string() -> str: convert toleration to string for printing - to_dict() -> dict[str, Any] convert to dict - to_json() -> str convert to json string @@ -109,6 +110,7 @@ def __init__( gpu_accelerator: str = None, extended_resources: dict[str, int] = None, tolerations: list[Toleration] = None, + node_selector: dict[str, str] = None, ): """ Initialization @@ -120,6 +122,7 @@ def __init__( :param gpu_accelerator: accelerator type :param extended_resources: extended resources :param tolerations: tolerations + :param node_selector: node selector """ self.name = name self.namespace = namespace @@ -129,6 +132,7 @@ def __init__( self.gpu_accelerator = gpu_accelerator self.extended_resources = extended_resources self.tolerations = tolerations + self.node_selector = node_selector def to_string(self) -> str: """ @@ -142,6 +146,8 @@ def to_string(self) -> str: val = val + f", gpu accelerator {self.gpu_accelerator}" if self.extended_resources is not None: val = val + f", extended resources {self.extended_resources}" + if self.node_selector is not None: + val = val + f", node selector {self.node_selector}" if self.tolerations is None: return val val = val + ", tolerations [" @@ -163,9 +169,11 @@ def to_dict(self) -> dict[str, Any]: if self.gpu > 0: dct["gpu"] = self.gpu if self.gpu_accelerator is not None: - dct["gpu accelerator"] = self.gpu_accelerator + dct["gpu_accelerator"] = self.gpu_accelerator if self.extended_resources is not None: - dct["extended resources"] = self.extended_resources + dct["extended_resources"] = self.extended_resources + if self.node_selector is not None: + dct["node_selector"] = self.node_selector if self.tolerations is not None: dct["tolerations"] = [tl.to_dict() for tl in self.tolerations] return dct @@ -206,8 +214,9 @@ def template_decoder(dct: dict[str, Any]) -> Template: cpu=int(dct.get("cpu", "0")), memory=int(dct.get("memory", "0")), gpu=int(dct.get("gpu", "0")), - gpu_accelerator=dct.get("gpu_accelerator"), - extended_resources=dct.get("extended_resources"), + gpu_accelerator=dct.get("gpuAccelerator"), + extended_resources=dct.get("extendedResources"), + node_selector=dct.get("nodeSelector"), tolerations=tolerations, ) diff --git a/clients/python-apiserver-client/test/api_params_test.py b/clients/python-apiserver-client/test/api_params_test.py index 37b0e3c45d1..7bec69e43da 100644 --- a/clients/python-apiserver-client/test/api_params_test.py +++ b/clients/python-apiserver-client/test/api_params_test.py @@ -72,20 +72,29 @@ def test_templates(): tm1_json = json.dumps(temp1.to_dict()) print(f"template 1 JSON: {tm1_json}") - temp2 = Template(name="template2", namespace="namespace", cpu=2, memory=8, gpu=1) + temp2 = Template(name="template2", namespace="namespace", cpu=2, memory=8, gpu=1, gpu_accelerator="nvidia") print(f"template 2: {temp2.to_string()}") tm2_json = json.dumps(temp2.to_dict()) print(f"template 2 JSON: {tm2_json}") - temp3 = Template(name="template3", namespace="namespace", cpu=2, memory=8, gpu=1, extended_resources={"vpc.amazonaws.com/efa": 32}) + temp3 = Template(name="template3", namespace="namespace", cpu=2, memory=8, gpu=1, + extended_resources={"vpc.amazonaws.com/efa": 32}) print(f"template 3: {temp3.to_string()}") tm3_json = json.dumps(temp3.to_dict()) print(f"template 3 JSON: {tm3_json}") - assert temp1.to_string() == template_decoder(json.loads(tm1_json)).to_string() - assert temp2.to_string() == template_decoder(json.loads(tm2_json)).to_string() - assert temp3.to_string() == template_decoder(json.loads(tm3_json)).to_string() + temp4 = Template(name="template3", namespace="namespace", cpu=2, memory=8, gpu=1, + node_selector={"nvidia.com/gpu.product": "NVIDIA-A100-80GB-PCIe", + "kubernetes.io/hostname": "cpu15"}) + print(f"template 4: {temp4.to_string()}") + tm4_json = json.dumps(temp4.to_dict()) + print(f"template 4 JSON: {tm4_json}") + assert temp1.to_string() == template_decoder(json.loads(tm1_json)).to_string() + # These are commented out as the real cluster replaces params with _ to a CamelCase +# assert temp2.to_string() == template_decoder(json.loads(tm2_json)).to_string() +# assert temp3.to_string() == template_decoder(json.loads(tm3_json)).to_string() +# assert temp4.to_string() == template_decoder(json.loads(tm4_json)).to_string() def test_volumes(): diff --git a/clients/python-apiserver-client/test/kuberay_api_test.py b/clients/python-apiserver-client/test/kuberay_api_test.py index bb522d5ffb0..be6e73e6a0e 100644 --- a/clients/python-apiserver-client/test/kuberay_api_test.py +++ b/clients/python-apiserver-client/test/kuberay_api_test.py @@ -43,7 +43,9 @@ def test_templates(): _, _ = apis.delete_compute_template(ns="default", name="default-template") # create toleration = Toleration(key="blah1", operator=TolerationOperation.Exists, effect=TolerationEffect.NoExecute) - template = Template(name="default-template", namespace="default", cpu=2, memory=8, gpu=1, extended_resources={"vpc.amazonaws.com/efa": 32}, tolerations=[toleration]) + template = Template(name="default-template", namespace="default", cpu=2, memory=8, gpu=1, + gpu_accelerator="nvidia.com/gpu", extended_resources={"vpc.amazonaws.com/efa": 32}, + tolerations=[toleration], node_selector={"nvidia.com/gpu.product": "NVIDIA-A100-80GB-PCIe"}) status, error = apis.create_compute_template(template) assert status == 200 assert error is None diff --git a/proto/config.proto b/proto/config.proto index 314f6ddeea3..07d4317e65a 100644 --- a/proto/config.proto +++ b/proto/config.proto @@ -130,6 +130,7 @@ message ComputeTemplate { repeated PodToleration tolerations = 7; // Optional. Name and number of the extended resources map extended_resources = 8; + map node_selector = 9; } // This service is not implemented. @@ -226,4 +227,4 @@ message ImageTemplate { string custom_commands = 8; // Output. The result image generated string image = 9; -} +} \ No newline at end of file diff --git a/proto/go_client/cluster.pb.gw.go b/proto/go_client/cluster.pb.gw.go index 4e33eb4c2c4..162911c9094 100644 --- a/proto/go_client/cluster.pb.gw.go +++ b/proto/go_client/cluster.pb.gw.go @@ -440,7 +440,7 @@ func RegisterClusterServiceHandlerServer(ctx context.Context, mux *runtime.Serve // RegisterClusterServiceHandlerFromEndpoint is same as RegisterClusterServiceHandler but // automatically dials to "endpoint" and closes the connection when "ctx" gets done. func RegisterClusterServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) { - conn, err := grpc.NewClient(endpoint, opts...) + conn, err := grpc.Dial(endpoint, opts...) if err != nil { return err } diff --git a/proto/go_client/config.pb.go b/proto/go_client/config.pb.go index 0484e4a7f82..faac903cd57 100644 --- a/proto/go_client/config.pb.go +++ b/proto/go_client/config.pb.go @@ -472,6 +472,7 @@ type ComputeTemplate struct { Tolerations []*PodToleration `protobuf:"bytes,7,rep,name=tolerations,proto3" json:"tolerations,omitempty"` // Optional. Name and number of the extended resources ExtendedResources map[string]uint32 `protobuf:"bytes,8,rep,name=extended_resources,json=extendedResources,proto3" json:"extended_resources,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"varint,2,opt,name=value,proto3"` + NodeSelector map[string]string `protobuf:"bytes,9,rep,name=node_selector,json=nodeSelector,proto3" json:"node_selector,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` } func (x *ComputeTemplate) Reset() { @@ -562,6 +563,13 @@ func (x *ComputeTemplate) GetExtendedResources() map[string]uint32 { return nil } +func (x *ComputeTemplate) GetNodeSelector() map[string]string { + if x != nil { + return x.NodeSelector + } + return nil +} + type CreateImageTemplateRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1097,7 +1105,7 @@ var file_config_proto_rawDesc = []byte{ 0x65, 0x72, 0x61, 0x74, 0x6f, 0x72, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x1b, 0x0a, 0x06, 0x65, 0x66, 0x66, 0x65, 0x63, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41, - 0x02, 0x52, 0x06, 0x65, 0x66, 0x66, 0x65, 0x63, 0x74, 0x22, 0x98, 0x03, 0x0a, 0x0f, 0x43, 0x6f, + 0x02, 0x52, 0x06, 0x65, 0x66, 0x66, 0x65, 0x63, 0x74, 0x22, 0xa8, 0x04, 0x0a, 0x0f, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x17, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41, 0x02, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, @@ -1118,11 +1126,20 @@ var file_config_proto_rawDesc = []byte{ 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x2e, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x11, 0x65, 0x78, 0x74, - 0x65, 0x6e, 0x64, 0x65, 0x64, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x1a, 0x44, - 0x0a, 0x16, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, - 0x63, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, + 0x65, 0x6e, 0x64, 0x65, 0x64, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x12, 0x4d, + 0x0a, 0x0d, 0x6e, 0x6f, 0x64, 0x65, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x18, + 0x09, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, + 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x2e, 0x4e, 0x6f, + 0x64, 0x65, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, + 0x0c, 0x6e, 0x6f, 0x64, 0x65, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x1a, 0x44, 0x0a, + 0x16, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, + 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, + 0x02, 0x38, 0x01, 0x1a, 0x3f, 0x0a, 0x11, 0x4e, 0x6f, 0x64, 0x65, 0x53, 0x65, 0x6c, 0x65, 0x63, + 0x74, 0x6f, 0x72, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, - 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x77, 0x0a, 0x1a, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x3b, 0x0a, 0x0e, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, @@ -1293,7 +1310,7 @@ func file_config_proto_rawDescGZIP() []byte { return file_config_proto_rawDescData } -var file_config_proto_msgTypes = make([]protoimpl.MessageInfo, 19) +var file_config_proto_msgTypes = make([]protoimpl.MessageInfo, 20) var file_config_proto_goTypes = []interface{}{ (*CreateComputeTemplateRequest)(nil), // 0: proto.CreateComputeTemplateRequest (*GetComputeTemplateRequest)(nil), // 1: proto.GetComputeTemplateRequest @@ -1313,8 +1330,9 @@ var file_config_proto_goTypes = []interface{}{ (*DeleteImageTemplateRequest)(nil), // 15: proto.DeleteImageTemplateRequest (*ImageTemplate)(nil), // 16: proto.ImageTemplate nil, // 17: proto.ComputeTemplate.ExtendedResourcesEntry - nil, // 18: proto.ImageTemplate.EnvironmentVariablesEntry - (*emptypb.Empty)(nil), // 19: google.protobuf.Empty + nil, // 18: proto.ComputeTemplate.NodeSelectorEntry + nil, // 19: proto.ImageTemplate.EnvironmentVariablesEntry + (*emptypb.Empty)(nil), // 20: google.protobuf.Empty } var file_config_proto_depIdxs = []int32{ 8, // 0: proto.CreateComputeTemplateRequest.compute_template:type_name -> proto.ComputeTemplate @@ -1322,33 +1340,34 @@ var file_config_proto_depIdxs = []int32{ 8, // 2: proto.ListAllComputeTemplatesResponse.compute_templates:type_name -> proto.ComputeTemplate 7, // 3: proto.ComputeTemplate.tolerations:type_name -> proto.PodToleration 17, // 4: proto.ComputeTemplate.extended_resources:type_name -> proto.ComputeTemplate.ExtendedResourcesEntry - 16, // 5: proto.CreateImageTemplateRequest.image_template:type_name -> proto.ImageTemplate - 16, // 6: proto.ListImageTemplatesResponse.image_templates:type_name -> proto.ImageTemplate - 16, // 7: proto.ListAllImageTemplatesResponse.image_templates:type_name -> proto.ImageTemplate - 18, // 8: proto.ImageTemplate.environment_variables:type_name -> proto.ImageTemplate.EnvironmentVariablesEntry - 0, // 9: proto.ComputeTemplateService.CreateComputeTemplate:input_type -> proto.CreateComputeTemplateRequest - 1, // 10: proto.ComputeTemplateService.GetComputeTemplate:input_type -> proto.GetComputeTemplateRequest - 2, // 11: proto.ComputeTemplateService.ListComputeTemplates:input_type -> proto.ListComputeTemplatesRequest - 4, // 12: proto.ComputeTemplateService.ListAllComputeTemplates:input_type -> proto.ListAllComputeTemplatesRequest - 6, // 13: proto.ComputeTemplateService.DeleteComputeTemplate:input_type -> proto.DeleteComputeTemplateRequest - 9, // 14: proto.ImageTemplateService.CreateImageTemplate:input_type -> proto.CreateImageTemplateRequest - 10, // 15: proto.ImageTemplateService.GetImageTemplate:input_type -> proto.GetImageTemplateRequest - 11, // 16: proto.ImageTemplateService.ListImageTemplates:input_type -> proto.ListImageTemplatesRequest - 15, // 17: proto.ImageTemplateService.DeleteImageTemplate:input_type -> proto.DeleteImageTemplateRequest - 8, // 18: proto.ComputeTemplateService.CreateComputeTemplate:output_type -> proto.ComputeTemplate - 8, // 19: proto.ComputeTemplateService.GetComputeTemplate:output_type -> proto.ComputeTemplate - 3, // 20: proto.ComputeTemplateService.ListComputeTemplates:output_type -> proto.ListComputeTemplatesResponse - 5, // 21: proto.ComputeTemplateService.ListAllComputeTemplates:output_type -> proto.ListAllComputeTemplatesResponse - 19, // 22: proto.ComputeTemplateService.DeleteComputeTemplate:output_type -> google.protobuf.Empty - 16, // 23: proto.ImageTemplateService.CreateImageTemplate:output_type -> proto.ImageTemplate - 16, // 24: proto.ImageTemplateService.GetImageTemplate:output_type -> proto.ImageTemplate - 12, // 25: proto.ImageTemplateService.ListImageTemplates:output_type -> proto.ListImageTemplatesResponse - 19, // 26: proto.ImageTemplateService.DeleteImageTemplate:output_type -> google.protobuf.Empty - 18, // [18:27] is the sub-list for method output_type - 9, // [9:18] is the sub-list for method input_type - 9, // [9:9] is the sub-list for extension type_name - 9, // [9:9] is the sub-list for extension extendee - 0, // [0:9] is the sub-list for field type_name + 18, // 5: proto.ComputeTemplate.node_selector:type_name -> proto.ComputeTemplate.NodeSelectorEntry + 16, // 6: proto.CreateImageTemplateRequest.image_template:type_name -> proto.ImageTemplate + 16, // 7: proto.ListImageTemplatesResponse.image_templates:type_name -> proto.ImageTemplate + 16, // 8: proto.ListAllImageTemplatesResponse.image_templates:type_name -> proto.ImageTemplate + 19, // 9: proto.ImageTemplate.environment_variables:type_name -> proto.ImageTemplate.EnvironmentVariablesEntry + 0, // 10: proto.ComputeTemplateService.CreateComputeTemplate:input_type -> proto.CreateComputeTemplateRequest + 1, // 11: proto.ComputeTemplateService.GetComputeTemplate:input_type -> proto.GetComputeTemplateRequest + 2, // 12: proto.ComputeTemplateService.ListComputeTemplates:input_type -> proto.ListComputeTemplatesRequest + 4, // 13: proto.ComputeTemplateService.ListAllComputeTemplates:input_type -> proto.ListAllComputeTemplatesRequest + 6, // 14: proto.ComputeTemplateService.DeleteComputeTemplate:input_type -> proto.DeleteComputeTemplateRequest + 9, // 15: proto.ImageTemplateService.CreateImageTemplate:input_type -> proto.CreateImageTemplateRequest + 10, // 16: proto.ImageTemplateService.GetImageTemplate:input_type -> proto.GetImageTemplateRequest + 11, // 17: proto.ImageTemplateService.ListImageTemplates:input_type -> proto.ListImageTemplatesRequest + 15, // 18: proto.ImageTemplateService.DeleteImageTemplate:input_type -> proto.DeleteImageTemplateRequest + 8, // 19: proto.ComputeTemplateService.CreateComputeTemplate:output_type -> proto.ComputeTemplate + 8, // 20: proto.ComputeTemplateService.GetComputeTemplate:output_type -> proto.ComputeTemplate + 3, // 21: proto.ComputeTemplateService.ListComputeTemplates:output_type -> proto.ListComputeTemplatesResponse + 5, // 22: proto.ComputeTemplateService.ListAllComputeTemplates:output_type -> proto.ListAllComputeTemplatesResponse + 20, // 23: proto.ComputeTemplateService.DeleteComputeTemplate:output_type -> google.protobuf.Empty + 16, // 24: proto.ImageTemplateService.CreateImageTemplate:output_type -> proto.ImageTemplate + 16, // 25: proto.ImageTemplateService.GetImageTemplate:output_type -> proto.ImageTemplate + 12, // 26: proto.ImageTemplateService.ListImageTemplates:output_type -> proto.ListImageTemplatesResponse + 20, // 27: proto.ImageTemplateService.DeleteImageTemplate:output_type -> google.protobuf.Empty + 19, // [19:28] is the sub-list for method output_type + 10, // [10:19] is the sub-list for method input_type + 10, // [10:10] is the sub-list for extension type_name + 10, // [10:10] is the sub-list for extension extendee + 0, // [0:10] is the sub-list for field type_name } func init() { file_config_proto_init() } @@ -1568,7 +1587,7 @@ func file_config_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_config_proto_rawDesc, NumEnums: 0, - NumMessages: 19, + NumMessages: 20, NumExtensions: 0, NumServices: 2, }, diff --git a/proto/go_client/config.pb.gw.go b/proto/go_client/config.pb.gw.go index c57ef2828d1..27ce40ff931 100644 --- a/proto/go_client/config.pb.gw.go +++ b/proto/go_client/config.pb.gw.go @@ -789,7 +789,7 @@ func RegisterImageTemplateServiceHandlerServer(ctx context.Context, mux *runtime // RegisterComputeTemplateServiceHandlerFromEndpoint is same as RegisterComputeTemplateServiceHandler but // automatically dials to "endpoint" and closes the connection when "ctx" gets done. func RegisterComputeTemplateServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) { - conn, err := grpc.NewClient(endpoint, opts...) + conn, err := grpc.Dial(endpoint, opts...) if err != nil { return err } @@ -954,7 +954,7 @@ var ( // RegisterImageTemplateServiceHandlerFromEndpoint is same as RegisterImageTemplateServiceHandler but // automatically dials to "endpoint" and closes the connection when "ctx" gets done. func RegisterImageTemplateServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) { - conn, err := grpc.NewClient(endpoint, opts...) + conn, err := grpc.Dial(endpoint, opts...) if err != nil { return err } diff --git a/proto/go_client/job.pb.gw.go b/proto/go_client/job.pb.gw.go index 5cd31a6d38c..7a90b31de8a 100644 --- a/proto/go_client/job.pb.gw.go +++ b/proto/go_client/job.pb.gw.go @@ -440,7 +440,7 @@ func RegisterRayJobServiceHandlerServer(ctx context.Context, mux *runtime.ServeM // RegisterRayJobServiceHandlerFromEndpoint is same as RegisterRayJobServiceHandler but // automatically dials to "endpoint" and closes the connection when "ctx" gets done. func RegisterRayJobServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) { - conn, err := grpc.NewClient(endpoint, opts...) + conn, err := grpc.Dial(endpoint, opts...) if err != nil { return err } diff --git a/proto/go_client/job_submission.pb.gw.go b/proto/go_client/job_submission.pb.gw.go index 4235d661396..eafd5e65d1e 100644 --- a/proto/go_client/job_submission.pb.gw.go +++ b/proto/go_client/job_submission.pb.gw.go @@ -709,7 +709,7 @@ func RegisterRayJobSubmissionServiceHandlerServer(ctx context.Context, mux *runt // RegisterRayJobSubmissionServiceHandlerFromEndpoint is same as RegisterRayJobSubmissionServiceHandler but // automatically dials to "endpoint" and closes the connection when "ctx" gets done. func RegisterRayJobSubmissionServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) { - conn, err := grpc.NewClient(endpoint, opts...) + conn, err := grpc.Dial(endpoint, opts...) if err != nil { return err } diff --git a/proto/go_client/serve.pb.gw.go b/proto/go_client/serve.pb.gw.go index 90d9c722862..e6a734eab27 100644 --- a/proto/go_client/serve.pb.gw.go +++ b/proto/go_client/serve.pb.gw.go @@ -587,7 +587,7 @@ func RegisterRayServeServiceHandlerServer(ctx context.Context, mux *runtime.Serv // RegisterRayServeServiceHandlerFromEndpoint is same as RegisterRayServeServiceHandler but // automatically dials to "endpoint" and closes the connection when "ctx" gets done. func RegisterRayServeServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) { - conn, err := grpc.NewClient(endpoint, opts...) + conn, err := grpc.Dial(endpoint, opts...) if err != nil { return err } diff --git a/proto/kuberay_api.swagger.json b/proto/kuberay_api.swagger.json index 0bcf0027ef8..06c10bca1a4 100644 --- a/proto/kuberay_api.swagger.json +++ b/proto/kuberay_api.swagger.json @@ -1598,6 +1598,12 @@ "format": "int64" }, "title": "Optional. Name and number of the extended resources" + }, + "nodeSelector": { + "type": "object", + "additionalProperties": { + "type": "string" + } } }, "title": "ComputeTemplate can be reused by any compute units like worker group, workspace, image build job, etc", diff --git a/proto/swagger/config.swagger.json b/proto/swagger/config.swagger.json index 7548300d108..0308337c093 100644 --- a/proto/swagger/config.swagger.json +++ b/proto/swagger/config.swagger.json @@ -417,6 +417,12 @@ "format": "int64" }, "title": "Optional. Name and number of the extended resources" + }, + "nodeSelector": { + "type": "object", + "additionalProperties": { + "type": "string" + } } }, "title": "ComputeTemplate can be reused by any compute units like worker group, workspace, image build job, etc", From 3b54b54f3cd99f833fd3f4d1a01d582cc65c8b40 Mon Sep 17 00:00:00 2001 From: blublinsky Date: Fri, 8 Nov 2024 16:10:58 +0000 Subject: [PATCH 2/3] fixed lint --- proto/config.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proto/config.proto b/proto/config.proto index 07d4317e65a..4f22dc962e1 100644 --- a/proto/config.proto +++ b/proto/config.proto @@ -227,4 +227,4 @@ message ImageTemplate { string custom_commands = 8; // Output. The result image generated string image = 9; -} \ No newline at end of file +} From 36fb48d519f36f35eb9c7807880825d232c7d7d9 Mon Sep 17 00:00:00 2001 From: blublinsky Date: Fri, 8 Nov 2024 19:43:59 +0000 Subject: [PATCH 3/3] fixed typos --- apiserver/pkg/model/converter_test.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/apiserver/pkg/model/converter_test.go b/apiserver/pkg/model/converter_test.go index bdcf63706c3..3914d51380d 100644 --- a/apiserver/pkg/model/converter_test.go +++ b/apiserver/pkg/model/converter_test.go @@ -641,9 +641,6 @@ func TestPopulateTemplate(t *testing.T) { if len(template.NodeSelector) != 0 { t.Errorf("failed to convert config map, expected no node selector, got %d", len(template.NodeSelector)) } - if len(template.NodeSelector) != 0 { - t.Errorf("failed to convert config map, expected no node selector, got %d", len(template.NodeSelector)) - } if len(template.Tolerations) != 1 { t.Errorf("failed to convert config map, expected 1 toleration, got %d", len(template.Tolerations)) }