root/lib/pacemaker/pcmk_sched_resource.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. pcmk__rsc_agent_changed
  2. add_rsc_if_matching
  3. pcmk__rscs_matching_id
  4. set_allocation_methods_for_rsc
  5. pcmk__set_allocation_methods
  6. pcmk__colocated_resources
  7. pcmk__noop_add_graph_meta
  8. pcmk__output_resource_actions
  9. pcmk__finalize_assignment
  10. pcmk__assign_resource
  11. pcmk__unassign_resource
  12. pcmk__threshold_reached
  13. convert_const_pointer
  14. get_node_weight
  15. cmp_resources
  16. pcmk__sort_resources
  17. new_node_table
  18. apply_parent_colocations
  19. cmp_instance_by_colocation
  20. did_fail
  21. node_is_allowed
  22. pcmk__cmp_instance_number
  23. pcmk__cmp_instance

   1 /*
   2  * Copyright 2014-2022 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <stdlib.h>
  13 #include <string.h>
  14 #include <crm/msg_xml.h>
  15 #include <pacemaker-internal.h>
  16 
  17 #include "libpacemaker_private.h"
  18 
  19 // Resource allocation methods that vary by resource variant
  20 static resource_alloc_functions_t allocation_methods[] = {
  21     {
  22         pcmk__primitive_assign,
  23         pcmk__primitive_create_actions,
  24         pcmk__probe_rsc_on_node,
  25         pcmk__primitive_internal_constraints,
  26         pcmk__primitive_apply_coloc_score,
  27         pcmk__colocated_resources,
  28         pcmk__apply_location,
  29         pcmk__primitive_action_flags,
  30         pcmk__update_ordered_actions,
  31         pcmk__output_resource_actions,
  32         pcmk__add_rsc_actions_to_graph,
  33         pcmk__primitive_add_graph_meta,
  34         pcmk__primitive_add_utilization,
  35         pcmk__primitive_shutdown_lock,
  36     },
  37     {
  38         pcmk__group_assign,
  39         pcmk__group_create_actions,
  40         pcmk__probe_rsc_on_node,
  41         pcmk__group_internal_constraints,
  42         pcmk__group_apply_coloc_score,
  43         pcmk__group_colocated_resources,
  44         pcmk__group_apply_location,
  45         pcmk__group_action_flags,
  46         pcmk__group_update_ordered_actions,
  47         pcmk__output_resource_actions,
  48         pcmk__add_rsc_actions_to_graph,
  49         pcmk__noop_add_graph_meta,
  50         pcmk__group_add_utilization,
  51         pcmk__group_shutdown_lock,
  52     },
  53     {
  54         pcmk__clone_allocate,
  55         clone_create_actions,
  56         clone_create_probe,
  57         clone_internal_constraints,
  58         pcmk__clone_apply_coloc_score,
  59         pcmk__colocated_resources,
  60         clone_rsc_location,
  61         clone_action_flags,
  62         pcmk__multi_update_actions,
  63         pcmk__output_resource_actions,
  64         clone_expand,
  65         clone_append_meta,
  66         pcmk__clone_add_utilization,
  67         pcmk__clone_shutdown_lock,
  68     },
  69     {
  70         pcmk__bundle_allocate,
  71         pcmk__bundle_create_actions,
  72         pcmk__bundle_create_probe,
  73         pcmk__bundle_internal_constraints,
  74         pcmk__bundle_apply_coloc_score,
  75         pcmk__colocated_resources,
  76         pcmk__bundle_rsc_location,
  77         pcmk__bundle_action_flags,
  78         pcmk__multi_update_actions,
  79         pcmk__output_bundle_actions,
  80         pcmk__bundle_expand,
  81         pcmk__noop_add_graph_meta,
  82         pcmk__bundle_add_utilization,
  83         pcmk__bundle_shutdown_lock,
  84     }
  85 };
  86 
  87 /*!
  88  * \internal
  89  * \brief Check whether a resource's agent standard, provider, or type changed
  90  *
  91  * \param[in] rsc             Resource to check
  92  * \param[in] node            Node needing unfencing/restart if agent changed
  93  * \param[in] rsc_entry       XML with previously known agent information
  94  * \param[in] active_on_node  Whether \p rsc is active on \p node
  95  *
  96  * \return true if agent for \p rsc changed, otherwise false
  97  */
  98 bool
  99 pcmk__rsc_agent_changed(pe_resource_t *rsc, pe_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
 100                         const xmlNode *rsc_entry, bool active_on_node)
 101 {
 102     bool changed = false;
 103     const char *attr_list[] = {
 104         XML_ATTR_TYPE,
 105         XML_AGENT_ATTR_CLASS,
 106         XML_AGENT_ATTR_PROVIDER
 107     };
 108 
 109     for (int i = 0; i < PCMK__NELEM(attr_list); i++) {
 110         const char *value = crm_element_value(rsc->xml, attr_list[i]);
 111         const char *old_value = crm_element_value(rsc_entry, attr_list[i]);
 112 
 113         if (!pcmk__str_eq(value, old_value, pcmk__str_none)) {
 114             changed = true;
 115             trigger_unfencing(rsc, node, "Device definition changed", NULL,
 116                               rsc->cluster);
 117             if (active_on_node) {
 118                 crm_notice("Forcing restart of %s on %s "
 119                            "because %s changed from '%s' to '%s'",
 120                            rsc->id, pe__node_name(node), attr_list[i],
 121                            pcmk__s(old_value, ""), pcmk__s(value, ""));
 122             }
 123         }
 124     }
 125     if (changed && active_on_node) {
 126         // Make sure the resource is restarted
 127         custom_action(rsc, stop_key(rsc), CRMD_ACTION_STOP, node, FALSE, TRUE,
 128                       rsc->cluster);
 129         pe__set_resource_flags(rsc, pe_rsc_start_pending);
 130     }
 131     return changed;
 132 }
 133 
 134 /*!
 135  * \internal
 136  * \brief Add resource (and any matching children) to list if it matches ID
 137  *
 138  * \param[in] result  List to add resource to
 139  * \param[in] rsc     Resource to check
 140  * \param[in] id      ID to match
 141  *
 142  * \return (Possibly new) head of list
 143  */
 144 static GList *
 145 add_rsc_if_matching(GList *result, pe_resource_t *rsc, const char *id)
     /* [previous][next][first][last][top][bottom][index][help] */
 146 {
 147     if ((strcmp(rsc->id, id) == 0)
 148         || ((rsc->clone_name != NULL) && (strcmp(rsc->clone_name, id) == 0))) {
 149         result = g_list_prepend(result, rsc);
 150     }
 151     for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
 152         pe_resource_t *child = (pe_resource_t *) iter->data;
 153 
 154         result = add_rsc_if_matching(result, child, id);
 155     }
 156     return result;
 157 }
 158 
 159 /*!
 160  * \internal
 161  * \brief Find all resources matching a given ID by either ID or clone name
 162  *
 163  * \param[in] id        Resource ID to check
 164  * \param[in] data_set  Cluster working set
 165  *
 166  * \return List of all resources that match \p id
 167  * \note The caller is responsible for freeing the return value with
 168  *       g_list_free().
 169  */
 170 GList *
 171 pcmk__rscs_matching_id(const char *id, pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 172 {
 173     GList *result = NULL;
 174 
 175     CRM_CHECK((id != NULL) && (data_set != NULL), return NULL);
 176     for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
 177         result = add_rsc_if_matching(result, (pe_resource_t *) iter->data, id);
 178     }
 179     return result;
 180 }
 181 
 182 /*!
 183  * \internal
 184  * \brief Set the variant-appropriate allocation methods for a resource
 185  *
 186  * \param[in] rsc      Resource to set allocation methods for
 187  * \param[in] ignored  Only here so function can be used with g_list_foreach()
 188  */
 189 static void
 190 set_allocation_methods_for_rsc(pe_resource_t *rsc, void *ignored)
     /* [previous][next][first][last][top][bottom][index][help] */
 191 {
 192     rsc->cmds = &allocation_methods[rsc->variant];
 193     g_list_foreach(rsc->children, (GFunc) set_allocation_methods_for_rsc, NULL);
 194 }
 195 
 196 /*!
 197  * \internal
 198  * \brief Set the variant-appropriate allocation methods for all resources
 199  *
 200  * \param[in] data_set  Cluster working set
 201  */
 202 void
 203 pcmk__set_allocation_methods(pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 204 {
 205     g_list_foreach(data_set->resources, (GFunc) set_allocation_methods_for_rsc,
 206                    NULL);
 207 }
 208 
 209 // Shared implementation of resource_alloc_functions_t:colocated_resources()
 210 GList *
 211 pcmk__colocated_resources(pe_resource_t *rsc, pe_resource_t *orig_rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
 212                           GList *colocated_rscs)
 213 {
 214     GList *gIter = NULL;
 215 
 216     if (orig_rsc == NULL) {
 217         orig_rsc = rsc;
 218     }
 219 
 220     if ((rsc == NULL) || (g_list_find(colocated_rscs, rsc) != NULL)) {
 221         return colocated_rscs;
 222     }
 223 
 224     pe_rsc_trace(orig_rsc, "%s is in colocation chain with %s",
 225                  rsc->id, orig_rsc->id);
 226     colocated_rscs = g_list_append(colocated_rscs, rsc);
 227 
 228     // Follow colocations where this resource is the dependent resource
 229     for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
 230         pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
 231         pe_resource_t *primary = constraint->primary;
 232 
 233         if (primary == orig_rsc) {
 234             continue; // Break colocation loop
 235         }
 236 
 237         if ((constraint->score == INFINITY) &&
 238             (pcmk__colocation_affects(rsc, primary, constraint,
 239                                       true) == pcmk__coloc_affects_location)) {
 240 
 241             colocated_rscs = primary->cmds->colocated_resources(primary,
 242                                                                 orig_rsc,
 243                                                                 colocated_rscs);
 244         }
 245     }
 246 
 247     // Follow colocations where this resource is the primary resource
 248     for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
 249         pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
 250         pe_resource_t *dependent = constraint->dependent;
 251 
 252         if (dependent == orig_rsc) {
 253             continue; // Break colocation loop
 254         }
 255 
 256         if (pe_rsc_is_clone(rsc) && !pe_rsc_is_clone(dependent)) {
 257             continue; // We can't be sure whether dependent will be colocated
 258         }
 259 
 260         if ((constraint->score == INFINITY) &&
 261             (pcmk__colocation_affects(dependent, rsc, constraint,
 262                                       true) == pcmk__coloc_affects_location)) {
 263 
 264             colocated_rscs = dependent->cmds->colocated_resources(dependent,
 265                                                                   orig_rsc,
 266                                                                   colocated_rscs);
 267         }
 268     }
 269 
 270     return colocated_rscs;
 271 }
 272 
 273 // No-op function for variants that don't need to implement add_graph_meta()
 274 void
 275 pcmk__noop_add_graph_meta(pe_resource_t *rsc, xmlNode *xml)
     /* [previous][next][first][last][top][bottom][index][help] */
 276 {
 277 }
 278 
 279 void
 280 pcmk__output_resource_actions(pe_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 281 {
 282     pcmk__output_t *out = rsc->cluster->priv;
 283 
 284     pe_node_t *next = NULL;
 285     pe_node_t *current = NULL;
 286 
 287     if (rsc->children != NULL) {
 288         for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
 289             pe_resource_t *child = (pe_resource_t *) iter->data;
 290 
 291             child->cmds->output_actions(child);
 292         }
 293         return;
 294     }
 295 
 296     next = rsc->allocated_to;
 297     if (rsc->running_on) {
 298         current = pe__current_node(rsc);
 299         if (rsc->role == RSC_ROLE_STOPPED) {
 300             /* This can occur when resources are being recovered because
 301              * the current role can change in pcmk__primitive_create_actions()
 302              */
 303             rsc->role = RSC_ROLE_STARTED;
 304         }
 305     }
 306 
 307     if ((current == NULL) && pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
 308         /* Don't log stopped orphans */
 309         return;
 310     }
 311 
 312     out->message(out, "rsc-action", rsc, current, next);
 313 }
 314 
 315 /*!
 316  * \internal
 317  * \brief Assign a specified primitive resource to a node
 318  *
 319  * Assign a specified primitive resource to a specified node, if the node can
 320  * run the resource (or unconditionally, if \p force is true). Mark the resource
 321  * as no longer provisional. If the primitive can't be assigned (or \p chosen is
 322  * NULL), unassign any previous assignment for it, set its next role to stopped,
 323  * and update any existing actions scheduled for it. This is not done
 324  * recursively for children, so it should be called only for primitives.
 325  *
 326  * \param[in] rsc     Resource to assign
 327  * \param[in] chosen  Node to assign \p rsc to
 328  * \param[in] force   If true, assign to \p chosen even if unavailable
 329  *
 330  * \return true if \p rsc could be assigned, otherwise false
 331  *
 332  * \note Assigning a resource to the NULL node using this function is different
 333  *       from calling pcmk__unassign_resource(), in that it will also update any
 334  *       actions created for the resource.
 335  */
 336 bool
 337 pcmk__finalize_assignment(pe_resource_t *rsc, pe_node_t *chosen, bool force)
     /* [previous][next][first][last][top][bottom][index][help] */
 338 {
 339     pcmk__output_t *out = rsc->cluster->priv;
 340 
 341     CRM_ASSERT(rsc->variant == pe_native);
 342 
 343     if (!force && (chosen != NULL)) {
 344         if ((chosen->weight < 0)
 345             // Allow the graph to assume that guest node connections will come up
 346             || (!pcmk__node_available(chosen, true, false)
 347                 && !pe__is_guest_node(chosen))) {
 348 
 349             crm_debug("All nodes for resource %s are unavailable, unclean or "
 350                       "shutting down (%s can%s run resources, with weight %d)",
 351                       rsc->id, pe__node_name(chosen),
 352                       (pcmk__node_available(chosen, true, false)? "" : "not"),
 353                       chosen->weight);
 354             pe__set_next_role(rsc, RSC_ROLE_STOPPED, "node availability");
 355             chosen = NULL;
 356         }
 357     }
 358 
 359     pcmk__unassign_resource(rsc);
 360     pe__clear_resource_flags(rsc, pe_rsc_provisional);
 361 
 362     if (chosen == NULL) {
 363         crm_debug("Could not allocate a node for %s", rsc->id);
 364         pe__set_next_role(rsc, RSC_ROLE_STOPPED, "unable to allocate");
 365 
 366         for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) {
 367             pe_action_t *op = (pe_action_t *) iter->data;
 368 
 369             crm_debug("Updating %s for allocation failure", op->uuid);
 370 
 371             if (pcmk__str_eq(op->task, RSC_STOP, pcmk__str_casei)) {
 372                 pe__clear_action_flags(op, pe_action_optional);
 373 
 374             } else if (pcmk__str_eq(op->task, RSC_START, pcmk__str_casei)) {
 375                 pe__clear_action_flags(op, pe_action_runnable);
 376                 //pe__set_resource_flags(rsc, pe_rsc_block);
 377 
 378             } else {
 379                 // Cancel recurring actions, unless for stopped state
 380                 const char *interval_ms_s = NULL;
 381                 const char *target_rc_s = NULL;
 382                 char *rc_stopped = pcmk__itoa(PCMK_OCF_NOT_RUNNING);
 383 
 384                 interval_ms_s = g_hash_table_lookup(op->meta,
 385                                                     XML_LRM_ATTR_INTERVAL_MS);
 386                 target_rc_s = g_hash_table_lookup(op->meta,
 387                                                   XML_ATTR_TE_TARGET_RC);
 388                 if ((interval_ms_s != NULL)
 389                     && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_none)
 390                     && !pcmk__str_eq(rc_stopped, target_rc_s, pcmk__str_none)) {
 391                     pe__clear_action_flags(op, pe_action_runnable);
 392                 }
 393                 free(rc_stopped);
 394             }
 395         }
 396         return false;
 397     }
 398 
 399     crm_debug("Assigning %s to %s", rsc->id, pe__node_name(chosen));
 400     rsc->allocated_to = pe__copy_node(chosen);
 401 
 402     chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc,
 403                                                     rsc);
 404     chosen->details->num_resources++;
 405     chosen->count++;
 406     pcmk__consume_node_capacity(chosen->details->utilization, rsc);
 407 
 408     if (pcmk_is_set(rsc->cluster->flags, pe_flag_show_utilization)) {
 409         out->message(out, "resource-util", rsc, chosen, __func__);
 410     }
 411     return true;
 412 }
 413 
 414 /*!
 415  * \internal
 416  * \brief Assign a specified resource (of any variant) to a node
 417  *
 418  * Assign a specified resource and its children (if any) to a specified node, if
 419  * the node can run the resource (or unconditionally, if \p force is true). Mark
 420  * the resources as no longer provisional. If the resources can't be assigned
 421  * (or \p chosen is NULL), unassign any previous assignments, set next role to
 422  * stopped, and update any existing actions scheduled for them.
 423  *
 424  * \param[in] rsc     Resource to assign
 425  * \param[in] chosen  Node to assign \p rsc to
 426  * \param[in] force   If true, assign to \p chosen even if unavailable
 427  *
 428  * \return true if \p rsc could be assigned, otherwise false
 429  *
 430  * \note Assigning a resource to the NULL node using this function is different
 431  *       from calling pcmk__unassign_resource(), in that it will also update any
 432  *       actions created for the resource.
 433  */
 434 bool
 435 pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force)
     /* [previous][next][first][last][top][bottom][index][help] */
 436 {
 437     bool changed = false;
 438 
 439     if (rsc->children == NULL) {
 440         if (rsc->allocated_to != NULL) {
 441             changed = true;
 442         }
 443         pcmk__finalize_assignment(rsc, node, force);
 444 
 445     } else {
 446         for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
 447             pe_resource_t *child_rsc = (pe_resource_t *) iter->data;
 448 
 449             changed |= pcmk__assign_resource(child_rsc, node, force);
 450         }
 451     }
 452     return changed;
 453 }
 454 
 455 /*!
 456  * \internal
 457  * \brief Remove any assignment of a specified resource to a node
 458  *
 459  * If a specified resource has been assigned to a node, remove that assignment
 460  * and mark the resource as provisional again. This is not done recursively for
 461  * children, so it should be called only for primitives.
 462  *
 463  * \param[in] rsc  Resource to unassign
 464  */
 465 void
 466 pcmk__unassign_resource(pe_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 467 {
 468     pe_node_t *old = rsc->allocated_to;
 469 
 470     if (old == NULL) {
 471         return;
 472     }
 473 
 474     crm_info("Unassigning %s from %s", rsc->id, pe__node_name(old));
 475     pe__set_resource_flags(rsc, pe_rsc_provisional);
 476     rsc->allocated_to = NULL;
 477 
 478     /* We're going to free the pe_node_t, but its details member is shared and
 479      * will remain, so update that appropriately first.
 480      */
 481     old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc,
 482                                                 rsc);
 483     old->details->num_resources--;
 484     pcmk__release_node_capacity(old->details->utilization, rsc);
 485     free(old);
 486 }
 487 
 488 /*!
 489  * \internal
 490  * \brief Check whether a resource has reached its migration threshold on a node
 491  *
 492  * \param[in]  rsc       Resource to check
 493  * \param[in]  node      Node to check
 494  * \param[out] failed    If the threshold has been reached, this will be set to
 495  *                       the resource that failed (possibly a parent of \p rsc)
 496  *
 497  * \return true if the migration threshold has been reached, false otherwise
 498  */
 499 bool
 500 pcmk__threshold_reached(pe_resource_t *rsc, pe_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
 501                         pe_resource_t **failed)
 502 {
 503     int fail_count, remaining_tries;
 504     pe_resource_t *rsc_to_ban = rsc;
 505 
 506     // Migration threshold of 0 means never force away
 507     if (rsc->migration_threshold == 0) {
 508         return false;
 509     }
 510 
 511     // If we're ignoring failures, also ignore the migration threshold
 512     if (pcmk_is_set(rsc->flags, pe_rsc_failure_ignored)) {
 513         return false;
 514     }
 515 
 516     // If there are no failures, there's no need to force away
 517     fail_count = pe_get_failcount(node, rsc, NULL,
 518                                   pe_fc_effective|pe_fc_fillers, NULL,
 519                                   rsc->cluster);
 520     if (fail_count <= 0) {
 521         return false;
 522     }
 523 
 524     // If failed resource is anonymous clone instance, we'll force clone away
 525     if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
 526         rsc_to_ban = uber_parent(rsc);
 527     }
 528 
 529     // How many more times recovery will be tried on this node
 530     remaining_tries = rsc->migration_threshold - fail_count;
 531 
 532     if (remaining_tries <= 0) {
 533         crm_warn("%s cannot run on %s due to reaching migration threshold "
 534                  "(clean up resource to allow again)"
 535                  CRM_XS " failures=%d migration-threshold=%d",
 536                  rsc_to_ban->id, pe__node_name(node), fail_count,
 537                  rsc->migration_threshold);
 538         if (failed != NULL) {
 539             *failed = rsc_to_ban;
 540         }
 541         return true;
 542     }
 543 
 544     crm_info("%s can fail %d more time%s on "
 545              "%s before reaching migration threshold (%d)",
 546              rsc_to_ban->id, remaining_tries, pcmk__plural_s(remaining_tries),
 547              pe__node_name(node), rsc->migration_threshold);
 548     return false;
 549 }
 550 
 551 static void *
 552 convert_const_pointer(const void *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
 553 {
 554     /* Worst function ever */
 555     return (void *)ptr;
 556 }
 557 
 558 /*!
 559  * \internal
 560  * \brief Get a node's weight
 561  *
 562  * \param[in] node     Unweighted node to check (for node ID)
 563  * \param[in] nodes    List of weighted nodes to look for \p node in
 564  *
 565  * \return Node's weight, or -INFINITY if not found
 566  */
 567 static int
 568 get_node_weight(pe_node_t *node, GHashTable *nodes)
     /* [previous][next][first][last][top][bottom][index][help] */
 569 {
 570     pe_node_t *weighted_node = NULL;
 571 
 572     if ((node != NULL) && (nodes != NULL)) {
 573         weighted_node = g_hash_table_lookup(nodes, node->details->id);
 574     }
 575     return (weighted_node == NULL)? -INFINITY : weighted_node->weight;
 576 }
 577 
 578 /*!
 579  * \internal
 580  * \brief Compare two resources according to which should be allocated first
 581  *
 582  * \param[in] a     First resource to compare
 583  * \param[in] b     Second resource to compare
 584  * \param[in] data  Sorted list of all nodes in cluster
 585  *
 586  * \return -1 if \p a should be allocated before \b, 0 if they are equal,
 587  *         or +1 if \p a should be allocated after \b
 588  */
 589 static gint
 590 cmp_resources(gconstpointer a, gconstpointer b, gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 591 {
 592     const pe_resource_t *resource1 = a;
 593     const pe_resource_t *resource2 = b;
 594     GList *nodes = (GList *) data;
 595 
 596     int rc = 0;
 597     int r1_weight = -INFINITY;
 598     int r2_weight = -INFINITY;
 599     pe_node_t *r1_node = NULL;
 600     pe_node_t *r2_node = NULL;
 601     GHashTable *r1_nodes = NULL;
 602     GHashTable *r2_nodes = NULL;
 603     const char *reason = NULL;
 604 
 605     // Resources with highest priority should be allocated first
 606     reason = "priority";
 607     r1_weight = resource1->priority;
 608     r2_weight = resource2->priority;
 609     if (r1_weight > r2_weight) {
 610         rc = -1;
 611         goto done;
 612     }
 613     if (r1_weight < r2_weight) {
 614         rc = 1;
 615         goto done;
 616     }
 617 
 618     // We need nodes to make any other useful comparisons
 619     reason = "no node list";
 620     if (nodes == NULL) {
 621         goto done;
 622     }
 623 
 624     // Calculate and log node weights
 625     pcmk__add_colocated_node_scores(convert_const_pointer(resource1),
 626                                     resource1->id, &r1_nodes, NULL, 1,
 627                                     pcmk__coloc_select_this_with);
 628     pcmk__add_colocated_node_scores(convert_const_pointer(resource2),
 629                                     resource2->id, &r2_nodes, NULL, 1,
 630                                     pcmk__coloc_select_this_with);
 631     pe__show_node_weights(true, NULL, resource1->id, r1_nodes,
 632                           resource1->cluster);
 633     pe__show_node_weights(true, NULL, resource2->id, r2_nodes,
 634                           resource2->cluster);
 635 
 636     // The resource with highest score on its current node goes first
 637     reason = "current location";
 638     if (resource1->running_on != NULL) {
 639         r1_node = pe__current_node(resource1);
 640     }
 641     if (resource2->running_on != NULL) {
 642         r2_node = pe__current_node(resource2);
 643     }
 644     r1_weight = get_node_weight(r1_node, r1_nodes);
 645     r2_weight = get_node_weight(r2_node, r2_nodes);
 646     if (r1_weight > r2_weight) {
 647         rc = -1;
 648         goto done;
 649     }
 650     if (r1_weight < r2_weight) {
 651         rc = 1;
 652         goto done;
 653     }
 654 
 655     // Otherwise a higher weight on any node will do
 656     reason = "score";
 657     for (GList *iter = nodes; iter != NULL; iter = iter->next) {
 658         pe_node_t *node = (pe_node_t *) iter->data;
 659 
 660         r1_weight = get_node_weight(node, r1_nodes);
 661         r2_weight = get_node_weight(node, r2_nodes);
 662         if (r1_weight > r2_weight) {
 663             rc = -1;
 664             goto done;
 665         }
 666         if (r1_weight < r2_weight) {
 667             rc = 1;
 668             goto done;
 669         }
 670     }
 671 
 672 done:
 673     crm_trace("%s (%d)%s%s %c %s (%d)%s%s: %s",
 674               resource1->id, r1_weight,
 675               ((r1_node == NULL)? "" : " on "),
 676               ((r1_node == NULL)? "" : r1_node->details->id),
 677               ((rc < 0)? '>' : ((rc > 0)? '<' : '=')),
 678               resource2->id, r2_weight,
 679               ((r2_node == NULL)? "" : " on "),
 680               ((r2_node == NULL)? "" : r2_node->details->id),
 681               reason);
 682     if (r1_nodes != NULL) {
 683         g_hash_table_destroy(r1_nodes);
 684     }
 685     if (r2_nodes != NULL) {
 686         g_hash_table_destroy(r2_nodes);
 687     }
 688     return rc;
 689 }
 690 
 691 /*!
 692  * \internal
 693  * \brief Sort resources in the order they should be allocated to nodes
 694  *
 695  * \param[in] data_set  Cluster working set
 696  */
 697 void
 698 pcmk__sort_resources(pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 699 {
 700     GList *nodes = g_list_copy(data_set->nodes);
 701 
 702     nodes = pcmk__sort_nodes(nodes, NULL);
 703     data_set->resources = g_list_sort_with_data(data_set->resources,
 704                                                 cmp_resources, nodes);
 705     g_list_free(nodes);
 706 }
 707 
 708 /*!
 709  * \internal
 710  * \brief Create a hash table with a single node in it
 711  *
 712  * \param[in] node  Node to copy into new table
 713  *
 714  * \return Newly created hash table containing a copy of \p node
 715  * \note The caller is responsible for freeing the result with
 716  *       g_hash_table_destroy().
 717  */
 718 static GHashTable *
 719 new_node_table(pe_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 720 {
 721     GHashTable *table = pcmk__strkey_table(NULL, free);
 722 
 723     node = pe__copy_node(node);
 724     g_hash_table_insert(table, (gpointer) node->details->id, node);
 725     return table;
 726 }
 727 
 728 /*!
 729  * \internal
 730  * \brief Apply a resource's parent's colocation scores to a node table
 731  *
 732  * \param[in]     rsc    Resource whose colocations should be applied
 733  * \param[in,out] nodes  Node table to apply colocations to
 734  */
 735 static void
 736 apply_parent_colocations(const pe_resource_t *rsc, GHashTable **nodes)
     /* [previous][next][first][last][top][bottom][index][help] */
 737 {
 738     GList *iter = NULL;
 739     pcmk__colocation_t *colocation = NULL;
 740 
 741     for (iter = rsc->parent->rsc_cons; iter != NULL; iter = iter->next) {
 742         colocation = (pcmk__colocation_t *) iter->data;
 743         pcmk__add_colocated_node_scores(colocation->primary, rsc->id, nodes,
 744                                         colocation->node_attribute,
 745                                         colocation->score / (float) INFINITY,
 746                                         pcmk__coloc_select_default);
 747     }
 748     for (iter = rsc->parent->rsc_cons_lhs; iter != NULL; iter = iter->next) {
 749         colocation = (pcmk__colocation_t *) iter->data;
 750         if (!pcmk__colocation_has_influence(colocation, rsc)) {
 751             continue;
 752         }
 753         pcmk__add_colocated_node_scores(colocation->dependent, rsc->id, nodes,
 754                                         colocation->node_attribute,
 755                                         colocation->score / (float) INFINITY,
 756                                         pcmk__coloc_select_nonnegative);
 757     }
 758 }
 759 
 760 /*!
 761  * \internal
 762  * \brief Compare clone or bundle instances based on colocation scores
 763  *
 764  * Determine the relative order in which two clone or bundle instances should be
 765  * assigned to nodes, considering the scores of colocation constraints directly
 766  * or indirectly involving them.
 767  *
 768  * \param[in] instance1  First instance to compare
 769  * \param[in] instance2  Second instance to compare
 770  *
 771  * \return A negative number if \p instance1 should be assigned first,
 772  *         a positive number if \p instance2 should be assigned first,
 773  *         or 0 if assignment order doesn't matter
 774  */
 775 static int
 776 cmp_instance_by_colocation(const pe_resource_t *instance1,
     /* [previous][next][first][last][top][bottom][index][help] */
 777                            const pe_resource_t *instance2)
 778 {
 779     int rc = 0;
 780     pe_node_t *node1 = NULL;
 781     pe_node_t *node2 = NULL;
 782     pe_node_t *current_node1 = pe__current_node(instance1);
 783     pe_node_t *current_node2 = pe__current_node(instance2);
 784     GHashTable *colocated_scores1 = NULL;
 785     GHashTable *colocated_scores2 = NULL;
 786 
 787     CRM_ASSERT((instance1 != NULL) && (instance1->parent != NULL)
 788                && (instance2 != NULL) && (instance2->parent != NULL)
 789                && (current_node1 != NULL) && (current_node2 != NULL));
 790 
 791     // Create node tables initialized with each node
 792     colocated_scores1 = new_node_table(current_node1);
 793     colocated_scores2 = new_node_table(current_node2);
 794 
 795     // Apply parental colocations
 796     apply_parent_colocations(instance1, &colocated_scores1);
 797     apply_parent_colocations(instance2, &colocated_scores2);
 798 
 799     // Find original nodes again, with scores updated for colocations
 800     node1 = g_hash_table_lookup(colocated_scores1, current_node1->details->id);
 801     node2 = g_hash_table_lookup(colocated_scores2, current_node2->details->id);
 802 
 803     // Compare nodes by updated scores
 804     if (node1->weight < node2->weight) {
 805         crm_trace("Assign %s (%d on %s) after %s (%d on %s)",
 806                   instance1->id, node1->weight, pe__node_name(node1),
 807                   instance2->id, node2->weight, pe__node_name(node2));
 808         rc = 1;
 809 
 810     } else if (node1->weight > node2->weight) {
 811         crm_trace("Assign %s (%d on %s) before %s (%d on %s)",
 812                   instance1->id, node1->weight, pe__node_name(node1),
 813                   instance2->id, node2->weight, pe__node_name(node2));
 814         rc = -1;
 815     }
 816 
 817     g_hash_table_destroy(colocated_scores1);
 818     g_hash_table_destroy(colocated_scores2);
 819     return rc;
 820 }
 821 
 822 /*!
 823  * \internal
 824  * \brief Check whether a resource or any of its children are failed
 825  *
 826  * \param[in] rsc  Resource to check
 827  *
 828  * \return true if \p rsc or any of its children are failed, otherwise false
 829  */
 830 static bool
 831 did_fail(const pe_resource_t * rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 832 {
 833     if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
 834         return true;
 835     }
 836     for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
 837         if (did_fail((pe_resource_t *) iter->data)) {
 838             return true;
 839         }
 840     }
 841     return false;
 842 }
 843 
 844 /*!
 845  * \internal
 846  * \brief Check whether a node is allowed to run a resource
 847  *
 848  * \param[in]     rsc   Resource to check
 849  * \param[in,out] node  Node to check (will be set NULL if not allowed)
 850  *
 851  * \return true if *node is either NULL or allowed for \p rsc, otherwise false
 852  */
 853 static bool
 854 node_is_allowed(const pe_resource_t *rsc, pe_node_t **node)
     /* [previous][next][first][last][top][bottom][index][help] */
 855 {
 856     if (*node != NULL) {
 857         pe_node_t *allowed = pe_hash_table_lookup(rsc->allowed_nodes,
 858                                                   (*node)->details->id);
 859         if ((allowed == NULL) || (allowed->weight < 0)) {
 860             pe_rsc_trace(rsc, "%s: current location (%s) is unavailable",
 861                          rsc->id, pe__node_name(*node));
 862             *node = NULL;
 863             return false;
 864         }
 865     }
 866     return true;
 867 }
 868 
 869 /*!
 870  * \internal
 871  * \brief Compare two clone or bundle instances' instance numbers
 872  *
 873  * \param[in] a  First instance to compare
 874  * \param[in] b  Second instance to compare
 875  *
 876  * \return A negative number if \p a's instance number is lower,
 877  *         a positive number if \p b's instance number is lower,
 878  *         or 0 if their instance numbers are the same
 879  */
 880 gint
 881 pcmk__cmp_instance_number(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
 882 {
 883     const pe_resource_t *instance1 = (const pe_resource_t *) a;
 884     const pe_resource_t *instance2 = (const pe_resource_t *) b;
 885     char *div1 = NULL;
 886     char *div2 = NULL;
 887 
 888     CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
 889 
 890     // Clone numbers are after a colon, bundle numbers after a dash
 891     div1 = strrchr(instance1->id, ':');
 892     if (div1 == NULL) {
 893         div1 = strrchr(instance1->id, '-');
 894     }
 895     div2 = strrchr(instance2->id, ':');
 896     if (div2 == NULL) {
 897         div2 = strrchr(instance2->id, '-');
 898     }
 899     CRM_ASSERT((div1 != NULL) && (div2 != NULL));
 900 
 901     return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10));
 902 }
 903 
 904 /*!
 905  * \internal
 906  * \brief Compare clone or bundle instances according to assignment order
 907  *
 908  * Compare two clone or bundle instances according to the order they should be
 909  * assigned to nodes, preferring (in order):
 910  *
 911  *  - Active instance that is less multiply active
 912  *  - Instance that is not active on a disallowed node
 913  *  - Instance with higher configured priority
 914  *  - Active instance whose current node can run resources
 915  *  - Active instance whose parent is allowed on current node
 916  *  - Active instance whose current node has fewer other instances
 917  *  - Active instance
 918  *  - Failed instance
 919  *  - Instance whose colocations result in higher score on current node
 920  *  - Instance with lower ID in lexicographic order
 921  *
 922  * \param[in] a          First instance to compare
 923  * \param[in] b          Second instance to compare
 924  *
 925  * \return A negative number if \p a should be assigned first,
 926  *         a positive number if \p b should be assigned first,
 927  *         or 0 if assignment order doesn't matter
 928  */
 929 gint
 930 pcmk__cmp_instance(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
 931 {
 932     int rc = 0;
 933     pe_node_t *node1 = NULL;
 934     pe_node_t *node2 = NULL;
 935     unsigned int nnodes1 = 0;
 936     unsigned int nnodes2 = 0;
 937 
 938     bool can1 = true;
 939     bool can2 = true;
 940 
 941     const pe_resource_t *instance1 = (const pe_resource_t *) a;
 942     const pe_resource_t *instance2 = (const pe_resource_t *) b;
 943 
 944     CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
 945 
 946     node1 = pe__find_active_on(instance1, &nnodes1, NULL);
 947     node2 = pe__find_active_on(instance2, &nnodes2, NULL);
 948 
 949     /* If both instances are running and at least one is multiply
 950      * active, prefer instance that's running on fewer nodes.
 951      */
 952     if ((nnodes1 > 0) && (nnodes2 > 0)) {
 953         if (nnodes1 < nnodes2) {
 954             crm_trace("Assign %s (active on %d) before %s (active on %d): "
 955                       "less multiply active",
 956                       instance1->id, nnodes1, instance2->id, nnodes2);
 957             return -1;
 958 
 959         } else if (nnodes1 > nnodes2) {
 960             crm_trace("Assign %s (active on %d) after %s (active on %d): "
 961                       "more multiply active",
 962                       instance1->id, nnodes1, instance2->id, nnodes2);
 963             return 1;
 964         }
 965     }
 966 
 967     /* An instance that is either inactive or active on an allowed node is
 968      * preferred over an instance that is active on a no-longer-allowed node.
 969      */
 970     can1 = node_is_allowed(instance1, &node1);
 971     can2 = node_is_allowed(instance2, &node2);
 972     if (can1 && !can2) {
 973         crm_trace("Assign %s before %s: not active on a disallowed node",
 974                   instance1->id, instance2->id);
 975         return -1;
 976 
 977     } else if (!can1 && can2) {
 978         crm_trace("Assign %s after %s: active on a disallowed node",
 979                   instance1->id, instance2->id);
 980         return 1;
 981     }
 982 
 983     // Prefer instance with higher configured priority
 984     if (instance1->priority > instance2->priority) {
 985         crm_trace("Assign %s before %s: priority (%d > %d)",
 986                   instance1->id, instance2->id,
 987                   instance1->priority, instance2->priority);
 988         return -1;
 989 
 990     } else if (instance1->priority < instance2->priority) {
 991         crm_trace("Assign %s after %s: priority (%d < %d)",
 992                   instance1->id, instance2->id,
 993                   instance1->priority, instance2->priority);
 994         return 1;
 995     }
 996 
 997     // Prefer active instance
 998     if ((node1 == NULL) && (node2 == NULL)) {
 999         crm_trace("No assignment preference for %s vs. %s: inactive",
1000                   instance1->id, instance2->id);
1001         return 0;
1002 
1003     } else if (node1 == NULL) {
1004         crm_trace("Assign %s after %s: active", instance1->id, instance2->id);
1005         return 1;
1006 
1007     } else if (node2 == NULL) {
1008         crm_trace("Assign %s before %s: active", instance1->id, instance2->id);
1009         return -1;
1010     }
1011 
1012     // Prefer instance whose current node can run resources
1013     can1 = pcmk__node_available(node1, false, false);
1014     can2 = pcmk__node_available(node2, false, false);
1015     if (can1 && !can2) {
1016         crm_trace("Assign %s before %s: current node can run resources",
1017                   instance1->id, instance2->id);
1018         return -1;
1019 
1020     } else if (!can1 && can2) {
1021         crm_trace("Assign %s after %s: current node can't run resources",
1022                   instance1->id, instance2->id);
1023         return 1;
1024     }
1025 
1026     // Prefer instance whose parent is allowed to run on instance's current node
1027     node1 = pcmk__top_allowed_node(instance1, node1);
1028     node2 = pcmk__top_allowed_node(instance2, node2);
1029     if ((node1 == NULL) && (node2 == NULL)) {
1030         crm_trace("No assignment preference for %s vs. %s: "
1031                   "parent not allowed on either instance's current node",
1032                   instance1->id, instance2->id);
1033         return 0;
1034 
1035     } else if (node1 == NULL) {
1036         crm_trace("Assign %s after %s: parent not allowed on current node",
1037                   instance1->id, instance2->id);
1038         return 1;
1039 
1040     } else if (node2 == NULL) {
1041         crm_trace("Assign %s before %s: parent allowed on current node",
1042                   instance1->id, instance2->id);
1043         return -1;
1044     }
1045 
1046     // Prefer instance whose current node is running fewer other instances
1047     if (node1->count < node2->count) {
1048         crm_trace("Assign %s before %s: fewer active instances on current node",
1049                   instance1->id, instance2->id);
1050         return -1;
1051 
1052     } else if (node1->count > node2->count) {
1053         crm_trace("Assign %s after %s: more active instances on current node",
1054                   instance1->id, instance2->id);
1055         return 1;
1056     }
1057 
1058     // Prefer failed instance
1059     can1 = did_fail(instance1);
1060     can2 = did_fail(instance2);
1061     if (!can1 && can2) {
1062         crm_trace("Assign %s before %s: failed", instance1->id, instance2->id);
1063         return -1;
1064     } else if (can1 && !can2) {
1065         crm_trace("Assign %s after %s: not failed",
1066                   instance1->id, instance2->id);
1067         return 1;
1068     }
1069 
1070     // Prefer instance with higher cumulative colocation score on current node
1071     rc = cmp_instance_by_colocation(instance1, instance2);
1072     if (rc != 0) {
1073         return rc;
1074     }
1075 
1076     // Prefer instance with lower instance number
1077     rc = pcmk__cmp_instance_number(instance1, instance2);
1078     if (rc < 0) {
1079         crm_trace("Assign %s before %s: instance number",
1080                   instance1->id, instance2->id);
1081     } else if (rc > 0) {
1082         crm_trace("Assign %s after %s: instance number",
1083                   instance1->id, instance2->id);
1084     } else {
1085         crm_trace("No assignment preference for %s vs. %s",
1086                   instance1->id, instance2->id);
1087     }
1088     return rc;
1089 }

/* [previous][next][first][last][top][bottom][index][help] */