Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow assigning resources to nodes with finite negative scores #3802

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Next Next commit
Refactor: scheduler: convert boolean arguments to flags
... in pcmk__node_available(), to make it easier to add new conditions.

I went through the callers to see if any others should reject guest
nodes with unrunnable guests, and it appears not.
kgaillot committed Jan 14, 2025
commit ae46153035bbc4ae86344a2efd5ace3ff53be656
20 changes: 17 additions & 3 deletions lib/pacemaker/libpacemaker_private.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2021-2024 the Pacemaker project contributors
* Copyright 2021-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -994,9 +994,23 @@ xmlNode *pcmk__inject_action_result(xmlNode *cib_resource,

// Nodes (pcmk_sched_nodes.c)

//! Options for checking node availability
enum pcmk__node_availability {
//! Disallow offline or unclean nodes (always implied)
pcmk__node_alive = 0,

//! Disallow shutting down, standby, and maintenance nodes
pcmk__node_usable = (1 << 0),

//! Disallow nodes with negative scores
pcmk__node_no_negative = (1 << 2),

//! Disallow guest nodes whose guest resource is unrunnable
pcmk__node_no_unrunnable_guest = (1 << 4),
};

G_GNUC_INTERNAL
bool pcmk__node_available(const pcmk_node_t *node, bool consider_score,
bool consider_guest);
bool pcmk__node_available(const pcmk_node_t *node, uint32_t flags);

G_GNUC_INTERNAL
bool pcmk__any_node_available(GHashTable *nodes);
2 changes: 1 addition & 1 deletion lib/pacemaker/pcmk_sched_actions.c
Original file line number Diff line number Diff line change
@@ -1940,7 +1940,7 @@ pcmk__handle_rsc_config_changes(pcmk_scheduler_t *scheduler)
* cancel any existing recurring monitors.
*/
if (node->details->maintenance
|| pcmk__node_available(node, false, false)) {
|| pcmk__node_available(node, pcmk__node_alive|pcmk__node_usable)) {

char *xpath = NULL;
xmlNode *history = NULL;
4 changes: 2 additions & 2 deletions lib/pacemaker/pcmk_sched_colocation.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -1578,7 +1578,7 @@ best_node_score_matching_attr(const pcmk__colocation_t *colocation,
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {

if ((node->assign->score > best_score)
&& pcmk__node_available(node, false, false)
&& pcmk__node_available(node, pcmk__node_alive|pcmk__node_usable)
&& pcmk__str_eq(value, pcmk__colocation_node_attr(node, attr, rsc),
pcmk__str_casei)) {

18 changes: 11 additions & 7 deletions lib/pacemaker/pcmk_sched_instances.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -38,7 +38,7 @@ can_run_instance(const pcmk_resource_t *instance, const pcmk_node_t *node,
return false;
}

if (!pcmk__node_available(node, false, false)) {
if (!pcmk__node_available(node, pcmk__node_alive|pcmk__node_usable)) {
pcmk__rsc_trace(instance,
"%s cannot run on %s: node cannot run resources",
instance->id, pcmk__node_name(node));
@@ -434,8 +434,8 @@ pcmk__cmp_instance(gconstpointer a, gconstpointer b)
}

// Prefer instance whose current node can run resources
can1 = pcmk__node_available(node1, false, false);
can2 = pcmk__node_available(node2, false, false);
can1 = pcmk__node_available(node1, pcmk__node_alive|pcmk__node_usable);
can2 = pcmk__node_available(node2, pcmk__node_alive|pcmk__node_usable);
if (can1 && !can2) {
crm_trace("Assign %s before %s: current node can run resources",
instance1->id, instance2->id);
@@ -613,7 +613,9 @@ assign_instance_early(const pcmk_resource_t *rsc, pcmk_resource_t *instance,

allowed_node = g_hash_table_lookup(instance->priv->allowed_nodes,
current->priv->id);
if (!pcmk__node_available(allowed_node, true, false)) {
if (!pcmk__node_available(allowed_node, pcmk__node_alive
|pcmk__node_usable
|pcmk__node_no_negative)) {
pcmk__rsc_info(instance,
"Not assigning %s to current node %s: unavailable",
instance->id, pcmk__node_name(current));
@@ -726,7 +728,7 @@ reset_allowed_node_counts(pcmk_resource_t *rsc)
g_hash_table_iter_init(&iter, rsc->priv->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
node->assign->count = 0;
if (pcmk__node_available(node, false, false)) {
if (pcmk__node_available(node, pcmk__node_alive|pcmk__node_usable)) {
available_nodes++;
}
}
@@ -757,7 +759,9 @@ preferred_node(const pcmk_resource_t *instance, int optimal_per_node)

// Check whether instance's current node can run resources
node = pcmk__current_node(instance);
if (!pcmk__node_available(node, true, false)) {
if (!pcmk__node_available(node, pcmk__node_alive
|pcmk__node_usable
|pcmk__node_no_negative)) {
pcmk__rsc_trace(instance, "Not assigning %s to %s early (unavailable)",
instance->id, pcmk__node_name(node));
return NULL;
41 changes: 23 additions & 18 deletions lib/pacemaker/pcmk_sched_nodes.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -18,31 +18,34 @@
* \internal
* \brief Check whether a node is available to run resources
*
* \param[in] node Node to check
* \param[in] consider_score If true, consider a negative score unavailable
* \param[in] consider_guest If true, consider a guest node unavailable whose
* resource will not be active
* \param[in] node Node to check
* \param[in] flags Group of enum pcmk__node_availability flags
*
* \return true if node is online and not shutting down, unclean, or in standby
* or maintenance mode, otherwise false
* \return true if node is available per flags, otherwise false
*/
bool
pcmk__node_available(const pcmk_node_t *node, bool consider_score,
bool consider_guest)
pcmk__node_available(const pcmk_node_t *node, uint32_t flags)
{
if ((node == NULL) || (node->details == NULL) || !node->details->online
|| node->details->shutdown || node->details->unclean
// pcmk__node_alive is implicit
if ((node == NULL) || (node->details == NULL)
|| !node->details->online || node->details->unclean) {
return false;
}

if (pcmk_is_set(flags, pcmk__node_usable)
&& (node->details->shutdown
|| pcmk_is_set(node->priv->flags, pcmk__node_standby)
|| node->details->maintenance) {
|| node->details->maintenance)) {
return false;
}

if (consider_score && (node->assign->score < 0)) {
if (pcmk_is_set(flags, pcmk__node_no_negative)
&& (node->assign->score < 0)) {
return false;
}

// @TODO Go through all callers to see which should set consider_guest
if (consider_guest && pcmk__is_guest_or_bundle_node(node)) {
if (pcmk_is_set(flags, pcmk__node_no_unrunnable_guest)
&& pcmk__is_guest_or_bundle_node(node)) {
pcmk_resource_t *guest = node->priv->remote->priv->launcher;

if (guest->priv->fns->location(guest, NULL,
@@ -226,10 +229,10 @@ compare_nodes(gconstpointer a, gconstpointer b, gpointer data)

// Compare node scores

if (pcmk__node_available(node1, false, false)) {
if (pcmk__node_available(node1, pcmk__node_alive|pcmk__node_usable)) {
node1_score = node1->assign->score;
}
if (pcmk__node_available(node2, false, false)) {
if (pcmk__node_available(node2, pcmk__node_alive|pcmk__node_usable)) {
node2_score = node2->assign->score;
}

@@ -351,7 +354,9 @@ pcmk__any_node_available(GHashTable *nodes)
}
g_hash_table_iter_init(&iter, nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (pcmk__node_available(node, true, false)) {
if (pcmk__node_available(node, pcmk__node_alive
|pcmk__node_usable
|pcmk__node_no_negative)) {
return true;
}
}
13 changes: 9 additions & 4 deletions lib/pacemaker/pcmk_sched_primitive.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -206,7 +206,9 @@ assign_best_node(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
pcmk__node_name(chosen), rsc->id);
chosen = NULL;

} else if (!pcmk__node_available(chosen, true, false)) {
} else if (!pcmk__node_available(chosen, pcmk__node_alive
|pcmk__node_usable
|pcmk__node_no_negative)) {
pcmk__rsc_trace(rsc, "Preferred node %s for %s was unavailable",
pcmk__node_name(chosen), rsc->id);
chosen = NULL;
@@ -229,7 +231,8 @@ assign_best_node(pcmk_resource_t *rsc, const pcmk_node_t *prefer,

if (!pcmk__is_unique_clone(rsc->priv->parent)
&& (chosen->assign->score > 0) // Zero not acceptable
&& pcmk__node_available(chosen, false, false)) {
&& pcmk__node_available(chosen,
pcmk__node_alive|pcmk__node_usable)) {
/* If the resource is already running on a node, prefer that node if
* it is just as good as the chosen node.
*
@@ -244,7 +247,9 @@ assign_best_node(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
if (running == NULL) {
// Nothing to do

} else if (!pcmk__node_available(running, true, false)) {
} else if (!pcmk__node_available(running, pcmk__node_alive
|pcmk__node_usable
|pcmk__node_no_negative)) {
pcmk__rsc_trace(rsc,
"Current node for %s (%s) can't run resources",
rsc->id, pcmk__node_name(running));
9 changes: 6 additions & 3 deletions lib/pacemaker/pcmk_sched_promotable.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -206,7 +206,9 @@ node_to_be_promoted_on(const pcmk_resource_t *rsc)
rsc->id, rsc->priv->priority);
return NULL;

} else if (!pcmk__node_available(node, false, true)) {
} else if (!pcmk__node_available(node, pcmk__node_alive
|pcmk__node_usable
|pcmk__node_no_unrunnable_guest)) {
pcmk__rsc_trace(rsc,
"%s can't be promoted because %s can't run resources",
rsc->id, pcmk__node_name(node));
@@ -801,7 +803,8 @@ pcmk__add_promotion_scores(pcmk_resource_t *rsc)

g_hash_table_iter_init(&iter, child_rsc->priv->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (!pcmk__node_available(node, false, false)) {
if (!pcmk__node_available(node,
pcmk__node_alive|pcmk__node_usable)) {
/* This node will never be promoted, so don't apply the
* promotion score, as that may lead to clone shuffling.
*/
37 changes: 21 additions & 16 deletions lib/pacemaker/pcmk_sched_resource.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2014-2024 the Pacemaker project contributors
* Copyright 2014-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -436,23 +436,28 @@ pcmk__assign_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool force,

// Assigning a primitive

if (!force && (node != NULL)
&& ((node->assign->score < 0)
if (!force && (node != NULL)) {
bool available = pcmk__node_available(node, pcmk__node_alive
|pcmk__node_usable
|pcmk__node_no_negative);

if ((node->assign->score < 0)
// Allow graph to assume that guest node connections will come up
|| (!pcmk__node_available(node, true, false)
&& !pcmk__is_guest_or_bundle_node(node)))) {

pcmk__rsc_debug(rsc,
"All nodes for resource %s are unavailable, unclean or "
"shutting down (%s can%s run resources, with score %s)",
rsc->id, pcmk__node_name(node),
(pcmk__node_available(node, true, false)? "" : "not"),
pcmk_readable_score(node->assign->score));

if (stop_if_fail) {
pe__set_next_role(rsc, pcmk_role_stopped, "node availability");
|| (!available && !pcmk__is_guest_or_bundle_node(node))) {

pcmk__rsc_debug(rsc,
"All nodes for resource %s are unavailable, "
"unclean or shutting down (%s can%s run "
"resources, with score %s)",
rsc->id, pcmk__node_name(node),
(available? "" : "not"),
pcmk_readable_score(node->assign->score));

if (stop_if_fail) {
pe__set_next_role(rsc, pcmk_role_stopped, "node availability");
}
node = NULL;
}
node = NULL;
}

if (rsc->priv->assigned_node != NULL) {
14 changes: 10 additions & 4 deletions lib/pacemaker/pcmk_sched_utilization.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2014-2024 the Pacemaker project contributors
* Copyright 2014-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -336,7 +336,9 @@ pcmk__ban_insufficient_capacity(pcmk_resource_t *rsc)
// Check whether any node has enough capacity for all the resources
g_hash_table_iter_init(&iter, rsc->priv->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (!pcmk__node_available(node, true, false)) {
if (!pcmk__node_available(node, pcmk__node_alive
|pcmk__node_usable
|pcmk__node_no_negative)) {
continue;
}

@@ -355,7 +357,9 @@ pcmk__ban_insufficient_capacity(pcmk_resource_t *rsc)
// If so, ban resource from any node with insufficient capacity
g_hash_table_iter_init(&iter, rsc->priv->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (pcmk__node_available(node, true, false)
if (pcmk__node_available(node, pcmk__node_alive
|pcmk__node_usable
|pcmk__node_no_negative)
&& !have_enough_capacity(node, rscs_id,
unassigned_utilization)) {
pcmk__rsc_debug(rsc, "%s does not have enough capacity for %s",
@@ -371,7 +375,9 @@ pcmk__ban_insufficient_capacity(pcmk_resource_t *rsc)
// Otherwise, ban from nodes with insufficient capacity for rsc alone
g_hash_table_iter_init(&iter, rsc->priv->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (pcmk__node_available(node, true, false)
if (pcmk__node_available(node, pcmk__node_alive
|pcmk__node_usable
|pcmk__node_no_negative)
&& !have_enough_capacity(node, rsc->id,
rsc->priv->utilization)) {
pcmk__rsc_debug(rsc, "%s does not have enough capacity for %s",