From 626fb5f2e066685d461ed49a175967d9cdb1fe1e Mon Sep 17 00:00:00 2001 From: Akihiko Kuroda Date: Thu, 9 Nov 2023 10:37:48 -0500 Subject: [PATCH] review comments --- .../charts/gateway/templates/deployment.yaml | 6 ++++ .../charts/gateway/values.yaml | 3 ++ .../migrations/0011_jobconfig_job_config.py | 30 ++++++++----------- gateway/api/models.py | 15 +++++++++- gateway/api/ray.py | 19 +++++++----- gateway/api/serializers.py | 4 ++- gateway/main/settings.py | 6 +++- 7 files changed, 56 insertions(+), 27 deletions(-) diff --git a/charts/quantum-serverless/charts/gateway/templates/deployment.yaml b/charts/quantum-serverless/charts/gateway/templates/deployment.yaml index 3d1b97bc8..af6c248cf 100644 --- a/charts/quantum-serverless/charts/gateway/templates/deployment.yaml +++ b/charts/quantum-serverless/charts/gateway/templates/deployment.yaml @@ -292,6 +292,12 @@ spec: value: {{ .Release.Namespace }} - name: RAY_NODE_IMAGE value: {{ .Values.application.ray.nodeImage | quote }} + - name: RAY_NODE_IMAGE_PY38 + value: {{ .Values.application.ray.nodeImage_py38 | quote }} + - name: RAY_NODE_IMAGE_PY39 + value: {{ .Values.application.ray.nodeImage_py39 | quote }} + - name: RAY_NODE_IMAGE_PY310 + value: {{ .Values.application.ray.nodeImage_py310 | quote }} - name: LIMITS_JOBS_PER_USER value: {{ .Values.application.limits.maxJobsPerUser | quote }} - name: LIMITS_MAX_CLUSTERS diff --git a/charts/quantum-serverless/charts/gateway/values.yaml b/charts/quantum-serverless/charts/gateway/values.yaml index 86693f81e..08702d8ee 100644 --- a/charts/quantum-serverless/charts/gateway/values.yaml +++ b/charts/quantum-serverless/charts/gateway/values.yaml @@ -17,6 +17,9 @@ application: enable: true ray: nodeImage: "icr.io/quantum-public/quantum-serverless-ray-node:0.7.1-py39" + nodeImage_py38: "icr.io/quantum-public/quantum-serverless-ray-node:0.7.1-py38" + nodeImage_py39: "icr.io/quantum-public/quantum-serverless-ray-node:0.7.1-py39" + nodeImage_py310: "icr.io/quantum-public/quantum-serverless-ray-node:0.7.1-py310" cpu: 2 memory: 2 replicas: 1 diff --git a/gateway/api/migrations/0011_jobconfig_job_config.py b/gateway/api/migrations/0011_jobconfig_job_config.py index 7d2504f39..8fc020766 100644 --- a/gateway/api/migrations/0011_jobconfig_job_config.py +++ b/gateway/api/migrations/0011_jobconfig_job_config.py @@ -1,6 +1,5 @@ -# Generated by Django 4.2.2 on 2023-11-03 17:54 +# Generated by Django 4.2.2 on 2023-11-09 13:06 -import django.core.validators from django.db import migrations, models import django.db.models.deletion import uuid @@ -27,25 +26,22 @@ class Migration(migrations.Migration): ), ("created", models.DateTimeField(auto_now_add=True)), ("auto_scaling", models.BooleanField(default=False, null=True)), + ("workers", models.IntegerField(null=True)), + ("min_workers", models.IntegerField(null=True)), + ("max_workers", models.IntegerField(null=True)), ( - "workers", - models.IntegerField( + "python_version", + models.CharField( + blank=True, + choices=[ + ("py38", "Version 3.8"), + ("py39", "Version 3.9"), + ("py310", "Version 3.10"), + ], + max_length=6, null=True, ), ), - ( - "min_workers", - models.IntegerField( - null=True, - ), - ), - ( - "max_workers", - models.IntegerField( - null=True, - ), - ), - ("python_version", models.TextField(blank=True, null=True)), ], ), migrations.AddField( diff --git a/gateway/api/models.py b/gateway/api/models.py index b6c78072c..65673d3c4 100644 --- a/gateway/api/models.py +++ b/gateway/api/models.py @@ -31,7 +31,20 @@ class JobConfig(models.Model): max_workers = models.IntegerField( null=True, ) - python_version = models.TextField(null=True, blank=True) + PYTHON_V3_8 = "py38" + PYTHON_V3_9 = "py39" + PYTHON_V3_10 = "py310" + PYTHON_VERSIONS = [ + (PYTHON_V3_8, "Version 3.8"), + (PYTHON_V3_9, "Version 3.9"), + (PYTHON_V3_10, "Version 3.10"), + ] + python_version = models.CharField( + max_length=6, + choices=PYTHON_VERSIONS, + null=True, + blank=True, + ) def __str__(self): return self.id diff --git a/gateway/api/ray.py b/gateway/api/ray.py index f1aa897b7..f60631fc6 100644 --- a/gateway/api/ray.py +++ b/gateway/api/ray.py @@ -188,14 +188,19 @@ def create_ray_cluster( if not job_config.auto_scaling: job_config.auto_scaling = settings.RAY_CLUSTER_WORKER_AUTO_SCALING if not job_config.python_version: - job_config.python_version = settings.RAY_PYTHON_VERSION + job_config.python_version = "default" - py_version = job_config.python_version - node_image = ( - settings.RAY_NODE_IMAGE[: settings.RAY_NODE_IMAGE.rindex("-")] - + "-" - + py_version - ) + if job_config.python_version in settings.RAY_NODE_IMAGES_MAP: + node_image = settings.RAY_NODE_IMAGES_MAP[job_config.python_version] + else: + message = ( + "Specified python version {job_config.python_version} " + "not in a list of supported python versions " + "{list(settings.RAY_NODE_IMAGES_MAP.keys())}. " + "Default image will be used instead." + ) + logger.warning(message) + node_image = settings.RAY_NODE_IMAGE cluster = get_template("rayclustertemplate.yaml") manifest = cluster.render( { diff --git a/gateway/api/serializers.py b/gateway/api/serializers.py index 14f03a58b..ccd23a6b3 100644 --- a/gateway/api/serializers.py +++ b/gateway/api/serializers.py @@ -44,7 +44,9 @@ class Meta: auto_scaling = serializers.BooleanField( default=False, required=False, allow_null=True ) - python_version = serializers.CharField(required=False, allow_null=True) + python_version = serializers.CharField( + required=False, allow_null=True, allow_blank=True + ) class ProgramSerializer(serializers.ModelSerializer): diff --git a/gateway/main/settings.py b/gateway/main/settings.py index 3dec968ff..88cfc20ab 100644 --- a/gateway/main/settings.py +++ b/gateway/main/settings.py @@ -303,7 +303,11 @@ RAY_NODE_IMAGE = os.environ.get( "RAY_NODE_IMAGE", "icr.io/quantum-public/quantum-serverless-ray-node:0.6.6-py39" ) -RAY_PYTHON_VERSION = RAY_NODE_IMAGE[RAY_NODE_IMAGE.rindex("-") + 1 :] +RAY_NODE_IMAGES_MAP = { + "py38": os.environ.get("RAY_NODE_IMAGE_PY38", RAY_NODE_IMAGE), + "py39": os.environ.get("RAY_NODE_IMAGE_PY39", RAY_NODE_IMAGE), + "py310": os.environ.get("RAY_NODE_IMAGE_PY310", RAY_NODE_IMAGE), +} RAY_CLUSTER_WORKER_REPLICAS = int(os.environ.get("RAY_CLUSTER_WORKER_REPLICAS", "1")) RAY_CLUSTER_WORKER_REPLICAS_MAX = int( os.environ.get("RAY_CLUSTER_WORKER_REPLICAS_MAX", "5")