From d5c35c271fd372be944f581b0802f4abe8db5ed0 Mon Sep 17 00:00:00 2001 From: Nikolaos Triantafyllis Date: Mon, 12 Feb 2024 20:18:10 +0200 Subject: [PATCH 1/2] Make ml optional --- etc/oar/admission_rules.d/15_check_types.py | 2 +- etc/oar/admission_rules.d/25_ml_model.py | 31 ++++++++++----------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/etc/oar/admission_rules.d/15_check_types.py b/etc/oar/admission_rules.d/15_check_types.py index 7442a7f8..a8391458 100644 --- a/etc/oar/admission_rules.d/15_check_types.py +++ b/etc/oar/admission_rules.d/15_check_types.py @@ -9,7 +9,7 @@ r8 = "^allowed=\\w+$" r9 = "^inner=\\w+$" r10 = "^timesharing=(?:(?:\\*|user),(?:\\*|name)|(?:\\*|name),(?:\\*|user))$" - r11 = "^(?:compact|spread|no_pref)$" + r11 = "^(?:compact|spread|no_pref|ml)$" all_re = re.compile( "(%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s)" % (r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11) diff --git a/etc/oar/admission_rules.d/25_ml_model.py b/etc/oar/admission_rules.d/25_ml_model.py index c4b9cc5b..cee0433f 100644 --- a/etc/oar/admission_rules.d/25_ml_model.py +++ b/etc/oar/admission_rules.d/25_ml_model.py @@ -199,19 +199,18 @@ def estimate_job_nb_resources_in_spread( return type_from_ml -# check if user wants specifically a particular tag -# if yes, let it be -# if no, use ml model to define it -# this is: if no -if ( - ("find=compact" not in types) - and ("find=spread" not in types) - and ("find=no_pref" not in types) - and ("compact" not in types) - and ("spread" not in types) - and ("no_pref" not in types) -): - - # type_from_ml can be: "find=compact" or "find=spread" or "find=no_pref" - type_from_ml = model(session, config, resource_request, properties, name) - types.append(type_from_ml) +# make sure that user specifies "ml" without +# "compact", "spread", or "no_pref" tag +if "ml" in types: + if ( + ("find=compact" not in types) + and ("find=spread" not in types) + and ("find=no_pref" not in types) + and ("compact" not in types) + and ("spread" not in types) + and ("no_pref" not in types) + ): + + # type_from_ml can be: "find=compact" or "find=spread" or "find=no_pref" + type_from_ml = model(session, config, resource_request, properties, name) + types.append(type_from_ml) From e9d6c7fc5e9ac62947b355f85d9afd8555736eb4 Mon Sep 17 00:00:00 2001 From: nikosT Date: Thu, 22 Feb 2024 00:21:10 +0200 Subject: [PATCH 2/2] Adding new policy r_spread, which works exactly as spread but starts from less free cores allocation --- etc/oar/admission_rules.d/15_check_types.py | 2 +- etc/oar/admission_rules.d/26_r_spread.py | 121 ++++++++++++++++++++ oar/kao/custom_scheduling.py | 21 +++- pyproject.toml | 1 + 4 files changed, 142 insertions(+), 3 deletions(-) create mode 100644 etc/oar/admission_rules.d/26_r_spread.py diff --git a/etc/oar/admission_rules.d/15_check_types.py b/etc/oar/admission_rules.d/15_check_types.py index a8391458..85a97f7a 100644 --- a/etc/oar/admission_rules.d/15_check_types.py +++ b/etc/oar/admission_rules.d/15_check_types.py @@ -9,7 +9,7 @@ r8 = "^allowed=\\w+$" r9 = "^inner=\\w+$" r10 = "^timesharing=(?:(?:\\*|user),(?:\\*|name)|(?:\\*|name),(?:\\*|user))$" - r11 = "^(?:compact|spread|no_pref|ml)$" + r11 = "^(?:compact|spread|r_spread|no_pref|ml)$" all_re = re.compile( "(%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s)" % (r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11) diff --git a/etc/oar/admission_rules.d/26_r_spread.py b/etc/oar/admission_rules.d/26_r_spread.py new file mode 100644 index 00000000..57745636 --- /dev/null +++ b/etc/oar/admission_rules.d/26_r_spread.py @@ -0,0 +1,121 @@ +def estimate_job_nb_resources_in_spread( + session, config, resource_request, j_properties +): + """returns an array with an estimation of the number of resources that can be used by a job: + (resources_available, [(nbresources => int, walltime => int)]) + """ + from sqlalchemy import exc + + # estimate_job_nb_resources + estimated_nb_resources = [] + is_resource_available = False + resource_set = ResourceSet(session, config) + resources_itvs = resource_set.roid_itvs + + for mld_idx, mld_resource_request in enumerate(resource_request): + resource_desc, walltime = mld_resource_request + + if not walltime: + walltime = str(config["DEFAULT_JOB_WALLTIME"]) + + estimated_nb_res = 0 + + for prop_res in resource_desc: + jrg_grp_property = prop_res["property"] + resource_value_lst = prop_res["resources"] + + # + # determine resource constraints + # + if (not j_properties) and ( + not jrg_grp_property or (jrg_grp_property == "type='default'") + ): # TODO change to re.match + # copy itvs + constraints = copy.copy(resource_set.roid_itvs) + else: + and_sql = "" + if j_properties and jrg_grp_property: + and_sql = " AND " + if j_properties is None: + j_properties = "" + if jrg_grp_property is None: + jrg_grp_property = "" + + sql_constraints = j_properties + and_sql + jrg_grp_property + + try: + request_constraints = ( + session.query(Resource.id).filter(text(sql_constraints)).all() + ) + except exc.SQLAlchemyError: + error_code = -5 + error_msg = ( + "Bad resource SQL constraints request:" + + sql_constraints + + "\n" + + "SQLAlchemyError: " + + str(exc) + ) + error = (error_code, error_msg) + return (error, None, None) + + roids = [resource_set.rid_i2o[int(y[0])] for y in request_constraints] + constraints = ProcSet(*roids) + + hy_levels = [] + hy_nbs = [] + for resource_value in resource_value_lst: + res_name = resource_value["resource"] + if res_name not in resource_set.hierarchy: + possible_options = ", ".join(resource_set.hierarchy.keys()) + error_code = -3 + error_msg = ( + f"Bad resources name: {res_name} is not a valid resources name." + f"Valid resource names are: {possible_options}" + ) + error = (error_code, error_msg) + return (error, None, None) + + value = resource_value["value"] + hy_levels.append(resource_set.hierarchy[res_name]) + hy_nbs.append(int(value)) + + cts_resources_itvs = constraints & resources_itvs + + for soc in resource_set.hierarchy["cpu"]: + avail_cores = soc & cts_resources_itvs + cts_resources_itvs -= ProcSet( + *avail_cores[int(len(soc) / 2) : len(soc)] + ) + + res_itvs = find_resource_hierarchies_scattered( + cts_resources_itvs, hy_levels, hy_nbs + ) + if res_itvs: + estimated_nb_res += len(res_itvs) + # break + + if estimated_nb_res > 0: + is_resource_available = True + + estimated_nb_resources.append((estimated_nb_res, walltime)) + + if not is_resource_available: + error = (-5, "There are not enough resources for your request") + return (error, None, None) + + return ((0, ""), is_resource_available, estimated_nb_resources) + + +if "r_spread" in types: + types = list(map(lambda t: t.replace("r_spread", "find=r_spread"), types)) + + if ( + estimate_job_nb_resources_in_spread( + session, config, resource_request, properties + )[0][0] + < 0 + ): + raise Exception( + "# ADMISSION RULE> There are not enough resources for your request using the spread method" + ) diff --git a/oar/kao/custom_scheduling.py b/oar/kao/custom_scheduling.py index d22efa8a..d345a550 100644 --- a/oar/kao/custom_scheduling.py +++ b/oar/kao/custom_scheduling.py @@ -71,7 +71,7 @@ def compact(itvs_slots, hy_res_rqts, hy, beginning_slotset, reverse=True): return result -def spread(itvs_slots, hy_res_rqts, hy, beginning_slotset): +def spread(itvs_slots, hy_res_rqts, hy, beginning_slotset, reverse=False): """ Given a job resource request and a set of resources this function tries to find a matching allocation. @@ -112,7 +112,7 @@ def spread(itvs_slots, hy_res_rqts, hy, beginning_slotset): key=lambda i: [ len(prev & itvs_cts_slots2) for prev in path(i, hy) ], - reverse=True, + reverse=not reverse, ), hy_levels, ) @@ -130,6 +130,23 @@ def spread(itvs_slots, hy_res_rqts, hy, beginning_slotset): return result +def r_spread(itvs_slots, hy_res_rqts, hy, beginning_slotset): + """ + Given a job resource request and a set of resources this function tries to find a matching allocation. + + .. note:: + This` can be override with the oar `extension <../admin/extensions.html#functions-assign-and-find>`_ mechanism. + + :param itvs_slots: A procset of the resources available for the allocation + :type itvs_slots: :class:`procset.ProcSet` + :param hy_res_rqts: The job's request + :param hy: The definition of the resources hierarchy + :return [ProcSet]: \ + The allocation if found, otherwise an empty :class:`procset.ProcSet` + """ + return spread(itvs_slots, hy_res_rqts, hy, beginning_slotset, reverse=True) + + def no_pref(itvs_slots, hy_res_rqts, hy, beginning_slotset): """ Given a job resource request and a set of resources this function tries to find a matching allocation. diff --git a/pyproject.toml b/pyproject.toml index 3e597a55..8d818b1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -125,4 +125,5 @@ build-backend = "poetry.masonry.api" [tool.poetry.plugins."oar.find_func"] compact = "oar.kao.custom_scheduling:compact" spread = "oar.kao.custom_scheduling:spread" +r_spread = "oar.kao.custom_scheduling:r_spread" no_pref = "oar.kao.custom_scheduling:no_pref"