root/lib/pacemaker/pcmk_sched_primitive.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sorted_allowed_nodes
  2. assign_best_node
  3. apply_this_with
  4. remote_connection_assigned
  5. pcmk__primitive_assign
  6. schedule_restart_actions
  7. set_default_next_role
  8. create_pending_start
  9. schedule_role_transition_actions
  10. pcmk__primitive_create_actions
  11. rsc_avoids_remote_nodes
  12. allowed_nodes_as_list
  13. pcmk__primitive_internal_constraints
  14. pcmk__primitive_apply_coloc_score
  15. pcmk__with_primitive_colocations
  16. pcmk__primitive_with_colocations
  17. pcmk__primitive_action_flags
  18. is_expected_node
  19. stop_resource
  20. start_resource
  21. promote_resource
  22. demote_resource
  23. assert_role_error
  24. pcmk__schedule_cleanup
  25. pcmk__primitive_add_graph_meta
  26. pcmk__primitive_add_utilization
  27. shutdown_time
  28. ban_if_not_locked
  29. pcmk__primitive_shutdown_lock

   1 /*
   2  * Copyright 2004-2023 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <stdbool.h>
  13 #include <stdint.h>                 // uint8_t, uint32_t
  14 
  15 #include <crm/msg_xml.h>
  16 #include <pacemaker-internal.h>
  17 
  18 #include "libpacemaker_private.h"
  19 
  20 static void stop_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
  21                           bool optional);
  22 static void start_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
  23                            bool optional);
  24 static void demote_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
  25                             bool optional);
  26 static void promote_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
  27                              bool optional);
  28 static void assert_role_error(pcmk_resource_t *rsc, pcmk_node_t *node,
  29                               bool optional);
  30 
  31 #define RSC_ROLE_MAX    (pcmk_role_promoted + 1)
  32 
  33 static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
  34     /* This array lists the immediate next role when transitioning from one role
  35      * to a target role. For example, when going from Stopped to Promoted, the
  36      * next role is Unpromoted, because the resource must be started before it
  37      * can be promoted. The current state then becomes Started, which is fed
  38      * into this array again, giving a next role of Promoted.
  39      *
  40      * Current role       Immediate next role   Final target role
  41      * ------------       -------------------   -----------------
  42      */
  43     /* Unknown */       { pcmk_role_unknown,    /* Unknown */
  44                           pcmk_role_stopped,    /* Stopped */
  45                           pcmk_role_stopped,    /* Started */
  46                           pcmk_role_stopped,    /* Unpromoted */
  47                           pcmk_role_stopped,    /* Promoted */
  48                         },
  49     /* Stopped */       { pcmk_role_stopped,    /* Unknown */
  50                           pcmk_role_stopped,    /* Stopped */
  51                           pcmk_role_started,    /* Started */
  52                           pcmk_role_unpromoted, /* Unpromoted */
  53                           pcmk_role_unpromoted, /* Promoted */
  54                         },
  55     /* Started */       { pcmk_role_stopped,    /* Unknown */
  56                           pcmk_role_stopped,    /* Stopped */
  57                           pcmk_role_started,    /* Started */
  58                           pcmk_role_unpromoted, /* Unpromoted */
  59                           pcmk_role_promoted,   /* Promoted */
  60                         },
  61     /* Unpromoted */    { pcmk_role_stopped,    /* Unknown */
  62                           pcmk_role_stopped,    /* Stopped */
  63                           pcmk_role_stopped,    /* Started */
  64                           pcmk_role_unpromoted, /* Unpromoted */
  65                           pcmk_role_promoted,   /* Promoted */
  66                         },
  67     /* Promoted  */     { pcmk_role_stopped,    /* Unknown */
  68                           pcmk_role_unpromoted, /* Stopped */
  69                           pcmk_role_unpromoted, /* Started */
  70                           pcmk_role_unpromoted, /* Unpromoted */
  71                           pcmk_role_promoted,   /* Promoted */
  72                         },
  73 };
  74 
  75 /*!
  76  * \internal
  77  * \brief Function to schedule actions needed for a role change
  78  *
  79  * \param[in,out] rsc       Resource whose role is changing
  80  * \param[in,out] node      Node where resource will be in its next role
  81  * \param[in]     optional  Whether scheduled actions should be optional
  82  */
  83 typedef void (*rsc_transition_fn)(pcmk_resource_t *rsc, pcmk_node_t *node,
  84                                   bool optional);
  85 
  86 static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
  87     /* This array lists the function needed to transition directly from one role
  88      * to another. NULL indicates that nothing is needed.
  89      *
  90      * Current role         Transition function             Next role
  91      * ------------         -------------------             ----------
  92      */
  93     /* Unknown */       {   assert_role_error,              /* Unknown */
  94                             stop_resource,                  /* Stopped */
  95                             assert_role_error,              /* Started */
  96                             assert_role_error,              /* Unpromoted */
  97                             assert_role_error,              /* Promoted */
  98                         },
  99     /* Stopped */       {   assert_role_error,              /* Unknown */
 100                             NULL,                           /* Stopped */
 101                             start_resource,                 /* Started */
 102                             start_resource,                 /* Unpromoted */
 103                             assert_role_error,              /* Promoted */
 104                         },
 105     /* Started */       {   assert_role_error,              /* Unknown */
 106                             stop_resource,                  /* Stopped */
 107                             NULL,                           /* Started */
 108                             NULL,                           /* Unpromoted */
 109                             promote_resource,               /* Promoted */
 110                         },
 111     /* Unpromoted */    {   assert_role_error,              /* Unknown */
 112                             stop_resource,                  /* Stopped */
 113                             stop_resource,                  /* Started */
 114                             NULL,                           /* Unpromoted */
 115                             promote_resource,               /* Promoted */
 116                         },
 117     /* Promoted  */     {   assert_role_error,              /* Unknown */
 118                             demote_resource,                /* Stopped */
 119                             demote_resource,                /* Started */
 120                             demote_resource,                /* Unpromoted */
 121                             NULL,                           /* Promoted */
 122                         },
 123 };
 124 
 125 /*!
 126  * \internal
 127  * \brief Get a list of a resource's allowed nodes sorted by node score
 128  *
 129  * \param[in] rsc  Resource to check
 130  *
 131  * \return List of allowed nodes sorted by node score
 132  */
 133 static GList *
 134 sorted_allowed_nodes(const pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 135 {
 136     if (rsc->allowed_nodes != NULL) {
 137         GList *nodes = g_hash_table_get_values(rsc->allowed_nodes);
 138 
 139         if (nodes != NULL) {
 140             return pcmk__sort_nodes(nodes, pe__current_node(rsc));
 141         }
 142     }
 143     return NULL;
 144 }
 145 
 146 /*!
 147  * \internal
 148  * \brief Assign a resource to its best allowed node, if possible
 149  *
 150  * \param[in,out] rsc           Resource to choose a node for
 151  * \param[in]     prefer        If not \c NULL, prefer this node when all else
 152  *                              equal
 153  * \param[in]     stop_if_fail  If \c true and \p rsc can't be assigned to a
 154  *                              node, set next role to stopped and update
 155  *                              existing actions
 156  *
 157  * \return true if \p rsc could be assigned to a node, otherwise false
 158  *
 159  * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
 160  *       completely undo the assignment. A successful assignment can be either
 161  *       undone or left alone as final. A failed assignment has the same effect
 162  *       as calling pcmk__unassign_resource(); there are no side effects on
 163  *       roles or actions.
 164  */
 165 static bool
 166 assign_best_node(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
     /* [previous][next][first][last][top][bottom][index][help] */
 167                  bool stop_if_fail)
 168 {
 169     GList *nodes = NULL;
 170     pcmk_node_t *chosen = NULL;
 171     pcmk_node_t *best = NULL;
 172     const pcmk_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc);
 173 
 174     if (prefer == NULL) {
 175         prefer = most_free_node;
 176     }
 177 
 178     if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
 179         // We've already finished assignment of resources to nodes
 180         return rsc->allocated_to != NULL;
 181     }
 182 
 183     // Sort allowed nodes by score
 184     nodes = sorted_allowed_nodes(rsc);
 185     if (nodes != NULL) {
 186         best = (pcmk_node_t *) nodes->data; // First node has best score
 187     }
 188 
 189     if ((prefer != NULL) && (nodes != NULL)) {
 190         // Get the allowed node version of prefer
 191         chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
 192 
 193         if (chosen == NULL) {
 194             pe_rsc_trace(rsc, "Preferred node %s for %s was unknown",
 195                          pe__node_name(prefer), rsc->id);
 196 
 197         /* Favor the preferred node as long as its score is at least as good as
 198          * the best allowed node's.
 199          *
 200          * An alternative would be to favor the preferred node even if the best
 201          * node is better, when the best node's score is less than INFINITY.
 202          */
 203         } else if (chosen->weight < best->weight) {
 204             pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
 205                          pe__node_name(chosen), rsc->id);
 206             chosen = NULL;
 207 
 208         } else if (!pcmk__node_available(chosen, true, false)) {
 209             pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable",
 210                          pe__node_name(chosen), rsc->id);
 211             chosen = NULL;
 212 
 213         } else {
 214             pe_rsc_trace(rsc,
 215                          "Chose preferred node %s for %s "
 216                          "(ignoring %d candidates)",
 217                          pe__node_name(chosen), rsc->id, g_list_length(nodes));
 218         }
 219     }
 220 
 221     if ((chosen == NULL) && (best != NULL)) {
 222         /* Either there is no preferred node, or the preferred node is not
 223          * suitable, but another node is allowed to run the resource.
 224          */
 225 
 226         chosen = best;
 227 
 228         if (!pe_rsc_is_unique_clone(rsc->parent)
 229             && (chosen->weight > 0) // Zero not acceptable
 230             && pcmk__node_available(chosen, false, false)) {
 231             /* If the resource is already running on a node, prefer that node if
 232              * it is just as good as the chosen node.
 233              *
 234              * We don't do this for unique clone instances, because
 235              * pcmk__assign_instances() has already assigned instances to their
 236              * running nodes when appropriate, and if we get here, we don't want
 237              * remaining unassigned instances to prefer a node that's already
 238              * running another instance.
 239              */
 240             pcmk_node_t *running = pe__current_node(rsc);
 241 
 242             if (running == NULL) {
 243                 // Nothing to do
 244 
 245             } else if (!pcmk__node_available(running, true, false)) {
 246                 pe_rsc_trace(rsc,
 247                              "Current node for %s (%s) can't run resources",
 248                              rsc->id, pe__node_name(running));
 249 
 250             } else {
 251                 int nodes_with_best_score = 1;
 252 
 253                 for (GList *iter = nodes->next; iter; iter = iter->next) {
 254                     pcmk_node_t *allowed = (pcmk_node_t *) iter->data;
 255 
 256                     if (allowed->weight != chosen->weight) {
 257                         // The nodes are sorted by score, so no more are equal
 258                         break;
 259                     }
 260                     if (pe__same_node(allowed, running)) {
 261                         // Scores are equal, so prefer the current node
 262                         chosen = allowed;
 263                     }
 264                     nodes_with_best_score++;
 265                 }
 266 
 267                 if (nodes_with_best_score > 1) {
 268                     uint8_t log_level = LOG_INFO;
 269 
 270                     if (chosen->weight >= INFINITY) {
 271                         log_level = LOG_WARNING;
 272                     }
 273                     do_crm_log(log_level,
 274                                "Chose %s for %s from %d nodes with score %s",
 275                                pe__node_name(chosen), rsc->id,
 276                                nodes_with_best_score,
 277                                pcmk_readable_score(chosen->weight));
 278                 }
 279             }
 280         }
 281 
 282         pe_rsc_trace(rsc, "Chose %s for %s from %d candidates",
 283                      pe__node_name(chosen), rsc->id, g_list_length(nodes));
 284     }
 285 
 286     pcmk__assign_resource(rsc, chosen, false, stop_if_fail);
 287     g_list_free(nodes);
 288     return rsc->allocated_to != NULL;
 289 }
 290 
 291 /*!
 292  * \internal
 293  * \brief Apply a "this with" colocation to a node's allowed node scores
 294  *
 295  * \param[in,out] colocation  Colocation to apply
 296  * \param[in,out] rsc         Resource being assigned
 297  */
 298 static void
 299 apply_this_with(pcmk__colocation_t *colocation, pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 300 {
 301     GHashTable *archive = NULL;
 302     pcmk_resource_t *other = colocation->primary;
 303 
 304     // In certain cases, we will need to revert the node scores
 305     if ((colocation->dependent_role >= pcmk_role_promoted)
 306         || ((colocation->score < 0) && (colocation->score > -INFINITY))) {
 307         archive = pcmk__copy_node_table(rsc->allowed_nodes);
 308     }
 309 
 310     if (pcmk_is_set(other->flags, pcmk_rsc_unassigned)) {
 311         pe_rsc_trace(rsc,
 312                      "%s: Assigning colocation %s primary %s first"
 313                      "(score=%d role=%s)",
 314                      rsc->id, colocation->id, other->id,
 315                      colocation->score, role2text(colocation->dependent_role));
 316         other->cmds->assign(other, NULL, true);
 317     }
 318 
 319     // Apply the colocation score to this resource's allowed node scores
 320     rsc->cmds->apply_coloc_score(rsc, other, colocation, true);
 321     if ((archive != NULL)
 322         && !pcmk__any_node_available(rsc->allowed_nodes)) {
 323         pe_rsc_info(rsc,
 324                     "%s: Reverting scores from colocation with %s "
 325                     "because no nodes allowed",
 326                     rsc->id, other->id);
 327         g_hash_table_destroy(rsc->allowed_nodes);
 328         rsc->allowed_nodes = archive;
 329         archive = NULL;
 330     }
 331     if (archive != NULL) {
 332         g_hash_table_destroy(archive);
 333     }
 334 }
 335 
 336 /*!
 337  * \internal
 338  * \brief Update a Pacemaker Remote node once its connection has been assigned
 339  *
 340  * \param[in] connection  Connection resource that has been assigned
 341  */
 342 static void
 343 remote_connection_assigned(const pcmk_resource_t *connection)
     /* [previous][next][first][last][top][bottom][index][help] */
 344 {
 345     pcmk_node_t *remote_node = pe_find_node(connection->cluster->nodes,
 346                                             connection->id);
 347 
 348     CRM_CHECK(remote_node != NULL, return);
 349 
 350     if ((connection->allocated_to != NULL)
 351         && (connection->next_role != pcmk_role_stopped)) {
 352 
 353         crm_trace("Pacemaker Remote node %s will be online",
 354                   remote_node->details->id);
 355         remote_node->details->online = TRUE;
 356         if (remote_node->details->unseen) {
 357             // Avoid unnecessary fence, since we will attempt connection
 358             remote_node->details->unclean = FALSE;
 359         }
 360 
 361     } else {
 362         crm_trace("Pacemaker Remote node %s will be shut down "
 363                   "(%sassigned connection's next role is %s)",
 364                   remote_node->details->id,
 365                   ((connection->allocated_to == NULL)? "un" : ""),
 366                   role2text(connection->next_role));
 367         remote_node->details->shutdown = TRUE;
 368     }
 369 }
 370 
 371 /*!
 372  * \internal
 373  * \brief Assign a primitive resource to a node
 374  *
 375  * \param[in,out] rsc           Resource to assign to a node
 376  * \param[in]     prefer        Node to prefer, if all else is equal
 377  * \param[in]     stop_if_fail  If \c true and \p rsc can't be assigned to a
 378  *                              node, set next role to stopped and update
 379  *                              existing actions
 380  *
 381  * \return Node that \p rsc is assigned to, if assigned entirely to one node
 382  *
 383  * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
 384  *       completely undo the assignment. A successful assignment can be either
 385  *       undone or left alone as final. A failed assignment has the same effect
 386  *       as calling pcmk__unassign_resource(); there are no side effects on
 387  *       roles or actions.
 388  */
 389 pcmk_node_t *
 390 pcmk__primitive_assign(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
     /* [previous][next][first][last][top][bottom][index][help] */
 391                        bool stop_if_fail)
 392 {
 393     GList *this_with_colocations = NULL;
 394     GList *with_this_colocations = NULL;
 395     GList *iter = NULL;
 396     pcmk__colocation_t *colocation = NULL;
 397 
 398     CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
 399 
 400     // Never assign a child without parent being assigned first
 401     if ((rsc->parent != NULL)
 402         && !pcmk_is_set(rsc->parent->flags, pcmk_rsc_assigning)) {
 403         pe_rsc_debug(rsc, "%s: Assigning parent %s first",
 404                      rsc->id, rsc->parent->id);
 405         rsc->parent->cmds->assign(rsc->parent, prefer, stop_if_fail);
 406     }
 407 
 408     if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
 409         // Assignment has already been done
 410         const char *node_name = "no node";
 411 
 412         if (rsc->allocated_to != NULL) {
 413             node_name = pe__node_name(rsc->allocated_to);
 414         }
 415         pe_rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id, node_name);
 416         return rsc->allocated_to;
 417     }
 418 
 419     // Ensure we detect assignment loops
 420     if (pcmk_is_set(rsc->flags, pcmk_rsc_assigning)) {
 421         pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
 422         return NULL;
 423     }
 424     pe__set_resource_flags(rsc, pcmk_rsc_assigning);
 425 
 426     pe__show_node_scores(true, rsc, "Pre-assignment", rsc->allowed_nodes,
 427                          rsc->cluster);
 428 
 429     this_with_colocations = pcmk__this_with_colocations(rsc);
 430     with_this_colocations = pcmk__with_this_colocations(rsc);
 431 
 432     // Apply mandatory colocations first, to satisfy as many as possible
 433     for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
 434         colocation = iter->data;
 435 
 436         if ((colocation->score <= -CRM_SCORE_INFINITY)
 437             || (colocation->score >= CRM_SCORE_INFINITY)) {
 438             apply_this_with(colocation, rsc);
 439         }
 440     }
 441     for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
 442         colocation = iter->data;
 443 
 444         if ((colocation->score <= -CRM_SCORE_INFINITY)
 445             || (colocation->score >= CRM_SCORE_INFINITY)) {
 446             pcmk__add_dependent_scores(colocation, rsc);
 447         }
 448     }
 449 
 450     pe__show_node_scores(true, rsc, "Mandatory-colocations",
 451                          rsc->allowed_nodes, rsc->cluster);
 452 
 453     // Then apply optional colocations
 454     for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
 455         colocation = iter->data;
 456 
 457         if ((colocation->score > -CRM_SCORE_INFINITY)
 458             && (colocation->score < CRM_SCORE_INFINITY)) {
 459             apply_this_with(colocation, rsc);
 460         }
 461     }
 462     for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
 463         colocation = iter->data;
 464 
 465         if ((colocation->score > -CRM_SCORE_INFINITY)
 466             && (colocation->score < CRM_SCORE_INFINITY)) {
 467             pcmk__add_dependent_scores(colocation, rsc);
 468         }
 469     }
 470 
 471     g_list_free(this_with_colocations);
 472     g_list_free(with_this_colocations);
 473 
 474     if (rsc->next_role == pcmk_role_stopped) {
 475         pe_rsc_trace(rsc,
 476                      "Banning %s from all nodes because it will be stopped",
 477                      rsc->id);
 478         resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE,
 479                           rsc->cluster);
 480 
 481     } else if ((rsc->next_role > rsc->role)
 482                && !pcmk_is_set(rsc->cluster->flags, pcmk_sched_quorate)
 483                && (rsc->cluster->no_quorum_policy == pcmk_no_quorum_freeze)) {
 484         crm_notice("Resource %s cannot be elevated from %s to %s due to "
 485                    "no-quorum-policy=freeze",
 486                    rsc->id, role2text(rsc->role), role2text(rsc->next_role));
 487         pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze");
 488     }
 489 
 490     pe__show_node_scores(!pcmk_is_set(rsc->cluster->flags,
 491                                       pcmk_sched_output_scores),
 492                          rsc, __func__, rsc->allowed_nodes, rsc->cluster);
 493 
 494     // Unmanage resource if fencing is enabled but no device is configured
 495     if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)
 496         && !pcmk_is_set(rsc->cluster->flags, pcmk_sched_have_fencing)) {
 497         pe__clear_resource_flags(rsc, pcmk_rsc_managed);
 498     }
 499 
 500     if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
 501         // Unmanaged resources stay on their current node
 502         const char *reason = NULL;
 503         pcmk_node_t *assign_to = NULL;
 504 
 505         pe__set_next_role(rsc, rsc->role, "unmanaged");
 506         assign_to = pe__current_node(rsc);
 507         if (assign_to == NULL) {
 508             reason = "inactive";
 509         } else if (rsc->role == pcmk_role_promoted) {
 510             reason = "promoted";
 511         } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
 512             reason = "failed";
 513         } else {
 514             reason = "active";
 515         }
 516         pe_rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id,
 517                     (assign_to? assign_to->details->uname : "no node"), reason);
 518         pcmk__assign_resource(rsc, assign_to, true, stop_if_fail);
 519 
 520     } else if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_stop_all)) {
 521         // Must stop at some point, but be consistent with stop_if_fail
 522         if (stop_if_fail) {
 523             pe_rsc_debug(rsc, "Forcing %s to stop: stop-all-resources",
 524                          rsc->id);
 525         }
 526         pcmk__assign_resource(rsc, NULL, true, stop_if_fail);
 527 
 528     } else if (!assign_best_node(rsc, prefer, stop_if_fail)) {
 529         // Assignment failed
 530         if (!pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
 531             pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
 532         } else if ((rsc->running_on != NULL) && stop_if_fail) {
 533             pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id);
 534         }
 535     }
 536 
 537     pe__clear_resource_flags(rsc, pcmk_rsc_assigning);
 538 
 539     if (rsc->is_remote_node) {
 540         remote_connection_assigned(rsc);
 541     }
 542 
 543     return rsc->allocated_to;
 544 }
 545 
 546 /*!
 547  * \internal
 548  * \brief Schedule actions to bring resource down and back to current role
 549  *
 550  * \param[in,out] rsc           Resource to restart
 551  * \param[in,out] current       Node that resource should be brought down on
 552  * \param[in]     need_stop     Whether the resource must be stopped
 553  * \param[in]     need_promote  Whether the resource must be promoted
 554  *
 555  * \return Role that resource would have after scheduled actions are taken
 556  */
 557 static void
 558 schedule_restart_actions(pcmk_resource_t *rsc, pcmk_node_t *current,
     /* [previous][next][first][last][top][bottom][index][help] */
 559                          bool need_stop, bool need_promote)
 560 {
 561     enum rsc_role_e role = rsc->role;
 562     enum rsc_role_e next_role;
 563     rsc_transition_fn fn = NULL;
 564 
 565     pe__set_resource_flags(rsc, pcmk_rsc_restarting);
 566 
 567     // Bring resource down to a stop on its current node
 568     while (role != pcmk_role_stopped) {
 569         next_role = rsc_state_matrix[role][pcmk_role_stopped];
 570         pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
 571                      (need_stop? "required" : "optional"), rsc->id,
 572                      role2text(role), role2text(next_role));
 573         fn = rsc_action_matrix[role][next_role];
 574         if (fn == NULL) {
 575             break;
 576         }
 577         fn(rsc, current, !need_stop);
 578         role = next_role;
 579     }
 580 
 581     // Bring resource up to its next role on its next node
 582     while ((rsc->role <= rsc->next_role) && (role != rsc->role)
 583            && !pcmk_is_set(rsc->flags, pcmk_rsc_blocked)) {
 584         bool required = need_stop;
 585 
 586         next_role = rsc_state_matrix[role][rsc->role];
 587         if ((next_role == pcmk_role_promoted) && need_promote) {
 588             required = true;
 589         }
 590         pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
 591                      (required? "required" : "optional"), rsc->id,
 592                      role2text(role), role2text(next_role));
 593         fn = rsc_action_matrix[role][next_role];
 594         if (fn == NULL) {
 595             break;
 596         }
 597         fn(rsc, rsc->allocated_to, !required);
 598         role = next_role;
 599     }
 600 
 601     pe__clear_resource_flags(rsc, pcmk_rsc_restarting);
 602 }
 603 
 604 /*!
 605  * \internal
 606  * \brief If a resource's next role is not explicitly specified, set a default
 607  *
 608  * \param[in,out] rsc  Resource to set next role for
 609  *
 610  * \return "explicit" if next role was explicitly set, otherwise "implicit"
 611  */
 612 static const char *
 613 set_default_next_role(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 614 {
 615     if (rsc->next_role != pcmk_role_unknown) {
 616         return "explicit";
 617     }
 618 
 619     if (rsc->allocated_to == NULL) {
 620         pe__set_next_role(rsc, pcmk_role_stopped, "assignment");
 621     } else {
 622         pe__set_next_role(rsc, pcmk_role_started, "assignment");
 623     }
 624     return "implicit";
 625 }
 626 
 627 /*!
 628  * \internal
 629  * \brief Create an action to represent an already pending start
 630  *
 631  * \param[in,out] rsc  Resource to create start action for
 632  */
 633 static void
 634 create_pending_start(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 635 {
 636     pcmk_action_t *start = NULL;
 637 
 638     pe_rsc_trace(rsc,
 639                  "Creating action for %s to represent already pending start",
 640                  rsc->id);
 641     start = start_action(rsc, rsc->allocated_to, TRUE);
 642     pe__set_action_flags(start, pcmk_action_always_in_graph);
 643 }
 644 
 645 /*!
 646  * \internal
 647  * \brief Schedule actions needed to take a resource to its next role
 648  *
 649  * \param[in,out] rsc  Resource to schedule actions for
 650  */
 651 static void
 652 schedule_role_transition_actions(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 653 {
 654     enum rsc_role_e role = rsc->role;
 655 
 656     while (role != rsc->next_role) {
 657         enum rsc_role_e next_role = rsc_state_matrix[role][rsc->next_role];
 658         rsc_transition_fn fn = NULL;
 659 
 660         pe_rsc_trace(rsc,
 661                      "Creating action to take %s from %s to %s (ending at %s)",
 662                      rsc->id, role2text(role), role2text(next_role),
 663                      role2text(rsc->next_role));
 664         fn = rsc_action_matrix[role][next_role];
 665         if (fn == NULL) {
 666             break;
 667         }
 668         fn(rsc, rsc->allocated_to, false);
 669         role = next_role;
 670     }
 671 }
 672 
 673 /*!
 674  * \internal
 675  * \brief Create all actions needed for a given primitive resource
 676  *
 677  * \param[in,out] rsc  Primitive resource to create actions for
 678  */
 679 void
 680 pcmk__primitive_create_actions(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 681 {
 682     bool need_stop = false;
 683     bool need_promote = false;
 684     bool is_moving = false;
 685     bool allow_migrate = false;
 686     bool multiply_active = false;
 687 
 688     pcmk_node_t *current = NULL;
 689     unsigned int num_all_active = 0;
 690     unsigned int num_clean_active = 0;
 691     const char *next_role_source = NULL;
 692 
 693     CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
 694 
 695     next_role_source = set_default_next_role(rsc);
 696     pe_rsc_trace(rsc,
 697                  "Creating all actions for %s transition from %s to %s "
 698                  "(%s) on %s",
 699                  rsc->id, role2text(rsc->role), role2text(rsc->next_role),
 700                  next_role_source, pe__node_name(rsc->allocated_to));
 701 
 702     current = rsc->fns->active_node(rsc, &num_all_active, &num_clean_active);
 703 
 704     g_list_foreach(rsc->dangling_migrations, pcmk__abort_dangling_migration,
 705                    rsc);
 706 
 707     if ((current != NULL) && (rsc->allocated_to != NULL)
 708         && !pe__same_node(current, rsc->allocated_to)
 709         && (rsc->next_role >= pcmk_role_started)) {
 710 
 711         pe_rsc_trace(rsc, "Moving %s from %s to %s",
 712                      rsc->id, pe__node_name(current),
 713                      pe__node_name(rsc->allocated_to));
 714         is_moving = true;
 715         allow_migrate = pcmk__rsc_can_migrate(rsc, current);
 716 
 717         // This is needed even if migrating (though I'm not sure why ...)
 718         need_stop = true;
 719     }
 720 
 721     // Check whether resource is partially migrated and/or multiply active
 722     if ((rsc->partial_migration_source != NULL)
 723         && (rsc->partial_migration_target != NULL)
 724         && allow_migrate && (num_all_active == 2)
 725         && pe__same_node(current, rsc->partial_migration_source)
 726         && pe__same_node(rsc->allocated_to, rsc->partial_migration_target)) {
 727         /* A partial migration is in progress, and the migration target remains
 728          * the same as when the migration began.
 729          */
 730         pe_rsc_trace(rsc, "Partial migration of %s from %s to %s will continue",
 731                      rsc->id, pe__node_name(rsc->partial_migration_source),
 732                      pe__node_name(rsc->partial_migration_target));
 733 
 734     } else if ((rsc->partial_migration_source != NULL)
 735                || (rsc->partial_migration_target != NULL)) {
 736         // A partial migration is in progress but can't be continued
 737 
 738         if (num_all_active > 2) {
 739             // The resource is migrating *and* multiply active!
 740             crm_notice("Forcing recovery of %s because it is migrating "
 741                        "from %s to %s and possibly active elsewhere",
 742                        rsc->id, pe__node_name(rsc->partial_migration_source),
 743                        pe__node_name(rsc->partial_migration_target));
 744         } else {
 745             // The migration source or target isn't available
 746             crm_notice("Forcing recovery of %s because it can no longer "
 747                        "migrate from %s to %s",
 748                        rsc->id, pe__node_name(rsc->partial_migration_source),
 749                        pe__node_name(rsc->partial_migration_target));
 750         }
 751         need_stop = true;
 752         rsc->partial_migration_source = rsc->partial_migration_target = NULL;
 753         allow_migrate = false;
 754 
 755     } else if (pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)) {
 756         multiply_active = (num_all_active > 1);
 757     } else {
 758         /* If a resource has "requires" set to nothing or quorum, don't consider
 759          * it active on unclean nodes (similar to how all resources behave when
 760          * stonith-enabled is false). We can start such resources elsewhere
 761          * before fencing completes, and if we considered the resource active on
 762          * the failed node, we would attempt recovery for being active on
 763          * multiple nodes.
 764          */
 765         multiply_active = (num_clean_active > 1);
 766     }
 767 
 768     if (multiply_active) {
 769         const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
 770 
 771         // Resource was (possibly) incorrectly multiply active
 772         pe_proc_err("%s resource %s might be active on %u nodes (%s)",
 773                     pcmk__s(class, "Untyped"), rsc->id, num_all_active,
 774                     recovery2text(rsc->recovery_type));
 775         crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ"
 776                    "#Resource_is_Too_Active for more information");
 777 
 778         switch (rsc->recovery_type) {
 779             case pcmk_multiply_active_restart:
 780                 need_stop = true;
 781                 break;
 782             case pcmk_multiply_active_unexpected:
 783                 need_stop = true; // stop_resource() will skip expected node
 784                 pe__set_resource_flags(rsc, pcmk_rsc_stop_unexpected);
 785                 break;
 786             default:
 787                 break;
 788         }
 789 
 790     } else {
 791         pe__clear_resource_flags(rsc, pcmk_rsc_stop_unexpected);
 792     }
 793 
 794     if (pcmk_is_set(rsc->flags, pcmk_rsc_start_pending)) {
 795         create_pending_start(rsc);
 796     }
 797 
 798     if (is_moving) {
 799         // Remaining tests are only for resources staying where they are
 800 
 801     } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
 802         if (pcmk_is_set(rsc->flags, pcmk_rsc_stop_if_failed)) {
 803             need_stop = true;
 804             pe_rsc_trace(rsc, "Recovering %s", rsc->id);
 805         } else {
 806             pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id);
 807             if (rsc->next_role == pcmk_role_promoted) {
 808                 need_promote = true;
 809             }
 810         }
 811 
 812     } else if (pcmk_is_set(rsc->flags, pcmk_rsc_blocked)) {
 813         pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id);
 814         need_stop = true;
 815 
 816     } else if ((rsc->role > pcmk_role_started) && (current != NULL)
 817                && (rsc->allocated_to != NULL)) {
 818         pcmk_action_t *start = NULL;
 819 
 820         pe_rsc_trace(rsc, "Creating start action for promoted resource %s",
 821                      rsc->id);
 822         start = start_action(rsc, rsc->allocated_to, TRUE);
 823         if (!pcmk_is_set(start->flags, pcmk_action_optional)) {
 824             // Recovery of a promoted resource
 825             pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id);
 826             need_stop = true;
 827         }
 828     }
 829 
 830     // Create any actions needed to bring resource down and back up to same role
 831     schedule_restart_actions(rsc, current, need_stop, need_promote);
 832 
 833     // Create any actions needed to take resource from this role to the next
 834     schedule_role_transition_actions(rsc);
 835 
 836     pcmk__create_recurring_actions(rsc);
 837 
 838     if (allow_migrate) {
 839         pcmk__create_migration_actions(rsc, current);
 840     }
 841 }
 842 
 843 /*!
 844  * \internal
 845  * \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes
 846  *
 847  * \param[in] rsc  Resource to check
 848  */
 849 static void
 850 rsc_avoids_remote_nodes(const pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 851 {
 852     GHashTableIter iter;
 853     pcmk_node_t *node = NULL;
 854 
 855     g_hash_table_iter_init(&iter, rsc->allowed_nodes);
 856     while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
 857         if (node->details->remote_rsc != NULL) {
 858             node->weight = -INFINITY;
 859         }
 860     }
 861 }
 862 
 863 /*!
 864  * \internal
 865  * \brief Return allowed nodes as (possibly sorted) list
 866  *
 867  * Convert a resource's hash table of allowed nodes to a list. If printing to
 868  * stdout, sort the list, to keep action ID numbers consistent for regression
 869  * test output (while avoiding the performance hit on a live cluster).
 870  *
 871  * \param[in] rsc       Resource to check for allowed nodes
 872  *
 873  * \return List of resource's allowed nodes
 874  * \note Callers should take care not to rely on the list being sorted.
 875  */
 876 static GList *
 877 allowed_nodes_as_list(const pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 878 {
 879     GList *allowed_nodes = NULL;
 880 
 881     if (rsc->allowed_nodes) {
 882         allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes);
 883     }
 884 
 885     if (!pcmk__is_daemon) {
 886         allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name);
 887     }
 888 
 889     return allowed_nodes;
 890 }
 891 
 892 /*!
 893  * \internal
 894  * \brief Create implicit constraints needed for a primitive resource
 895  *
 896  * \param[in,out] rsc  Primitive resource to create implicit constraints for
 897  */
 898 void
 899 pcmk__primitive_internal_constraints(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 900 {
 901     GList *allowed_nodes = NULL;
 902     bool check_unfencing = false;
 903     bool check_utilization = false;
 904 
 905     CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
 906 
 907     if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
 908         pe_rsc_trace(rsc,
 909                      "Skipping implicit constraints for unmanaged resource %s",
 910                      rsc->id);
 911         return;
 912     }
 913 
 914     // Whether resource requires unfencing
 915     check_unfencing = !pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)
 916                       && pcmk_is_set(rsc->cluster->flags,
 917                                      pcmk_sched_enable_unfencing)
 918                       && pcmk_is_set(rsc->flags, pcmk_rsc_needs_unfencing);
 919 
 920     // Whether a non-default placement strategy is used
 921     check_utilization = (g_hash_table_size(rsc->utilization) > 0)
 922                          && !pcmk__str_eq(rsc->cluster->placement_strategy,
 923                                           "default", pcmk__str_casei);
 924 
 925     // Order stops before starts (i.e. restart)
 926     pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0), NULL,
 927                        rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0), NULL,
 928                        pcmk__ar_ordered
 929                        |pcmk__ar_first_implies_then
 930                        |pcmk__ar_intermediate_stop,
 931                        rsc->cluster);
 932 
 933     // Promotable ordering: demote before stop, start before promote
 934     if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
 935                     pcmk_rsc_promotable)
 936         || (rsc->role > pcmk_role_unpromoted)) {
 937 
 938         pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_DEMOTE, 0),
 939                            NULL,
 940                            rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
 941                            NULL,
 942                            pcmk__ar_promoted_then_implies_first, rsc->cluster);
 943 
 944         pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
 945                            NULL,
 946                            rsc, pcmk__op_key(rsc->id, PCMK_ACTION_PROMOTE, 0),
 947                            NULL,
 948                            pcmk__ar_unrunnable_first_blocks, rsc->cluster);
 949     }
 950 
 951     // Don't clear resource history if probing on same node
 952     pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_LRM_DELETE, 0),
 953                        NULL, rsc,
 954                        pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0),
 955                        NULL,
 956                        pcmk__ar_if_on_same_node|pcmk__ar_then_cancels_first,
 957                        rsc->cluster);
 958 
 959     // Certain checks need allowed nodes
 960     if (check_unfencing || check_utilization || (rsc->container != NULL)) {
 961         allowed_nodes = allowed_nodes_as_list(rsc);
 962     }
 963 
 964     if (check_unfencing) {
 965         g_list_foreach(allowed_nodes, pcmk__order_restart_vs_unfence, rsc);
 966     }
 967 
 968     if (check_utilization) {
 969         pcmk__create_utilization_constraints(rsc, allowed_nodes);
 970     }
 971 
 972     if (rsc->container != NULL) {
 973         pcmk_resource_t *remote_rsc = NULL;
 974 
 975         if (rsc->is_remote_node) {
 976             // rsc is the implicit remote connection for a guest or bundle node
 977 
 978             /* Guest resources are not allowed to run on Pacemaker Remote nodes,
 979              * to avoid nesting remotes. However, bundles are allowed.
 980              */
 981             if (!pcmk_is_set(rsc->flags, pcmk_rsc_remote_nesting_allowed)) {
 982                 rsc_avoids_remote_nodes(rsc->container);
 983             }
 984 
 985             /* If someone cleans up a guest or bundle node's container, we will
 986              * likely schedule a (re-)probe of the container and recovery of the
 987              * connection. Order the connection stop after the container probe,
 988              * so that if we detect the container running, we will trigger a new
 989              * transition and avoid the unnecessary recovery.
 990              */
 991             pcmk__order_resource_actions(rsc->container, PCMK_ACTION_MONITOR,
 992                                          rsc, PCMK_ACTION_STOP,
 993                                          pcmk__ar_ordered);
 994 
 995         /* A user can specify that a resource must start on a Pacemaker Remote
 996          * node by explicitly configuring it with the container=NODENAME
 997          * meta-attribute. This is of questionable merit, since location
 998          * constraints can accomplish the same thing. But we support it, so here
 999          * we check whether a resource (that is not itself a remote connection)
1000          * has container set to a remote node or guest node resource.
1001          */
1002         } else if (rsc->container->is_remote_node) {
1003             remote_rsc = rsc->container;
1004         } else  {
1005             remote_rsc = pe__resource_contains_guest_node(rsc->cluster,
1006                                                           rsc->container);
1007         }
1008 
1009         if (remote_rsc != NULL) {
1010             /* Force the resource on the Pacemaker Remote node instead of
1011              * colocating the resource with the container resource.
1012              */
1013             for (GList *item = allowed_nodes; item; item = item->next) {
1014                 pcmk_node_t *node = item->data;
1015 
1016                 if (node->details->remote_rsc != remote_rsc) {
1017                     node->weight = -INFINITY;
1018                 }
1019             }
1020 
1021         } else {
1022             /* This resource is either a filler for a container that does NOT
1023              * represent a Pacemaker Remote node, or a Pacemaker Remote
1024              * connection resource for a guest node or bundle.
1025              */
1026             int score;
1027 
1028             crm_trace("Order and colocate %s relative to its container %s",
1029                       rsc->id, rsc->container->id);
1030 
1031             pcmk__new_ordering(rsc->container,
1032                                pcmk__op_key(rsc->container->id,
1033                                             PCMK_ACTION_START, 0),
1034                                NULL, rsc,
1035                                pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
1036                                NULL,
1037                                pcmk__ar_first_implies_then
1038                                |pcmk__ar_unrunnable_first_blocks,
1039                                rsc->cluster);
1040 
1041             pcmk__new_ordering(rsc,
1042                                pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
1043                                NULL,
1044                                rsc->container,
1045                                pcmk__op_key(rsc->container->id,
1046                                             PCMK_ACTION_STOP, 0),
1047                                NULL, pcmk__ar_then_implies_first, rsc->cluster);
1048 
1049             if (pcmk_is_set(rsc->flags, pcmk_rsc_remote_nesting_allowed)) {
1050                 score = 10000;    /* Highly preferred but not essential */
1051             } else {
1052                 score = INFINITY; /* Force them to run on the same host */
1053             }
1054             pcmk__new_colocation("#resource-with-container", NULL, score, rsc,
1055                                  rsc->container, NULL, NULL,
1056                                  pcmk__coloc_influence);
1057         }
1058     }
1059 
1060     if (rsc->is_remote_node
1061         || pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)) {
1062         /* Remote connections and fencing devices are not allowed to run on
1063          * Pacemaker Remote nodes
1064          */
1065         rsc_avoids_remote_nodes(rsc);
1066     }
1067     g_list_free(allowed_nodes);
1068 }
1069 
1070 /*!
1071  * \internal
1072  * \brief Apply a colocation's score to node scores or resource priority
1073  *
1074  * Given a colocation constraint, apply its score to the dependent's
1075  * allowed node scores (if we are still placing resources) or priority (if
1076  * we are choosing promotable clone instance roles).
1077  *
1078  * \param[in,out] dependent      Dependent resource in colocation
1079  * \param[in]     primary        Primary resource in colocation
1080  * \param[in]     colocation     Colocation constraint to apply
1081  * \param[in]     for_dependent  true if called on behalf of dependent
1082  */
1083 void
1084 pcmk__primitive_apply_coloc_score(pcmk_resource_t *dependent,
     /* [previous][next][first][last][top][bottom][index][help] */
1085                                   const pcmk_resource_t *primary,
1086                                   const pcmk__colocation_t *colocation,
1087                                   bool for_dependent)
1088 {
1089     enum pcmk__coloc_affects filter_results;
1090 
1091     CRM_ASSERT((dependent != NULL) && (primary != NULL)
1092                && (colocation != NULL));
1093 
1094     if (for_dependent) {
1095         // Always process on behalf of primary resource
1096         primary->cmds->apply_coloc_score(dependent, primary, colocation, false);
1097         return;
1098     }
1099 
1100     filter_results = pcmk__colocation_affects(dependent, primary, colocation,
1101                                               false);
1102     pe_rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)",
1103                  ((colocation->score > 0)? "Colocating" : "Anti-colocating"),
1104                  dependent->id, primary->id, colocation->id, colocation->score,
1105                  filter_results);
1106 
1107     switch (filter_results) {
1108         case pcmk__coloc_affects_role:
1109             pcmk__apply_coloc_to_priority(dependent, primary, colocation);
1110             break;
1111         case pcmk__coloc_affects_location:
1112             pcmk__apply_coloc_to_scores(dependent, primary, colocation);
1113             break;
1114         default: // pcmk__coloc_affects_nothing
1115             return;
1116     }
1117 }
1118 
1119 /* Primitive implementation of
1120  * pcmk_assignment_methods_t:with_this_colocations()
1121  */
1122 void
1123 pcmk__with_primitive_colocations(const pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1124                                  const pcmk_resource_t *orig_rsc, GList **list)
1125 {
1126     CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
1127                && (list != NULL));
1128 
1129     if (rsc == orig_rsc) {
1130         /* For the resource itself, add all of its own colocations and relevant
1131          * colocations from its parent (if any).
1132          */
1133         pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
1134         if (rsc->parent != NULL) {
1135             rsc->parent->cmds->with_this_colocations(rsc->parent, orig_rsc, list);
1136         }
1137     } else {
1138         // For an ancestor, add only explicitly configured constraints
1139         for (GList *iter = rsc->rsc_cons_lhs; iter != NULL; iter = iter->next) {
1140             pcmk__colocation_t *colocation = iter->data;
1141 
1142             if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) {
1143                 pcmk__add_with_this(list, colocation, orig_rsc);
1144             }
1145         }
1146     }
1147 }
1148 
1149 /* Primitive implementation of
1150  * pcmk_assignment_methods_t:this_with_colocations()
1151  */
1152 void
1153 pcmk__primitive_with_colocations(const pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1154                                  const pcmk_resource_t *orig_rsc, GList **list)
1155 {
1156     CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
1157                && (list != NULL));
1158 
1159     if (rsc == orig_rsc) {
1160         /* For the resource itself, add all of its own colocations and relevant
1161          * colocations from its parent (if any).
1162          */
1163         pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
1164         if (rsc->parent != NULL) {
1165             rsc->parent->cmds->this_with_colocations(rsc->parent, orig_rsc, list);
1166         }
1167     } else {
1168         // For an ancestor, add only explicitly configured constraints
1169         for (GList *iter = rsc->rsc_cons; iter != NULL; iter = iter->next) {
1170             pcmk__colocation_t *colocation = iter->data;
1171 
1172             if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) {
1173                 pcmk__add_this_with(list, colocation, orig_rsc);
1174             }
1175         }
1176     }
1177 }
1178 
1179 /*!
1180  * \internal
1181  * \brief Return action flags for a given primitive resource action
1182  *
1183  * \param[in,out] action  Action to get flags for
1184  * \param[in]     node    If not NULL, limit effects to this node (ignored)
1185  *
1186  * \return Flags appropriate to \p action on \p node
1187  */
1188 uint32_t
1189 pcmk__primitive_action_flags(pcmk_action_t *action, const pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
1190 {
1191     CRM_ASSERT(action != NULL);
1192     return (uint32_t) action->flags;
1193 }
1194 
1195 /*!
1196  * \internal
1197  * \brief Check whether a node is a multiply active resource's expected node
1198  *
1199  * \param[in] rsc  Resource to check
1200  * \param[in] node  Node to check
1201  *
1202  * \return true if \p rsc is multiply active with multiple-active set to
1203  *         stop_unexpected, and \p node is the node where it will remain active
1204  * \note This assumes that the resource's next role cannot be changed to stopped
1205  *       after this is called, which should be reasonable if status has already
1206  *       been unpacked and resources have been assigned to nodes.
1207  */
1208 static bool
1209 is_expected_node(const pcmk_resource_t *rsc, const pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
1210 {
1211     return pcmk_all_flags_set(rsc->flags,
1212                               pcmk_rsc_stop_unexpected|pcmk_rsc_restarting)
1213            && (rsc->next_role > pcmk_role_stopped)
1214            && pe__same_node(rsc->allocated_to, node);
1215 }
1216 
1217 /*!
1218  * \internal
1219  * \brief Schedule actions needed to stop a resource wherever it is active
1220  *
1221  * \param[in,out] rsc       Resource being stopped
1222  * \param[in]     node      Node where resource is being stopped (ignored)
1223  * \param[in]     optional  Whether actions should be optional
1224  */
1225 static void
1226 stop_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1227 {
1228     for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
1229         pcmk_node_t *current = (pcmk_node_t *) iter->data;
1230         pcmk_action_t *stop = NULL;
1231 
1232         if (is_expected_node(rsc, current)) {
1233             /* We are scheduling restart actions for a multiply active resource
1234              * with multiple-active=stop_unexpected, and this is where it should
1235              * not be stopped.
1236              */
1237             pe_rsc_trace(rsc,
1238                          "Skipping stop of multiply active resource %s "
1239                          "on expected node %s",
1240                          rsc->id, pe__node_name(current));
1241             continue;
1242         }
1243 
1244         if (rsc->partial_migration_target != NULL) {
1245             // Continue migration if node originally was and remains target
1246             if (pe__same_node(current, rsc->partial_migration_target)
1247                 && pe__same_node(current, rsc->allocated_to)) {
1248                 pe_rsc_trace(rsc,
1249                              "Skipping stop of %s on %s "
1250                              "because partial migration there will continue",
1251                              rsc->id, pe__node_name(current));
1252                 continue;
1253             } else {
1254                 pe_rsc_trace(rsc,
1255                              "Forcing stop of %s on %s "
1256                              "because migration target changed",
1257                              rsc->id, pe__node_name(current));
1258                 optional = false;
1259             }
1260         }
1261 
1262         pe_rsc_trace(rsc, "Scheduling stop of %s on %s",
1263                      rsc->id, pe__node_name(current));
1264         stop = stop_action(rsc, current, optional);
1265 
1266         if (rsc->allocated_to == NULL) {
1267             pe_action_set_reason(stop, "node availability", true);
1268         } else if (pcmk_all_flags_set(rsc->flags, pcmk_rsc_restarting
1269                                                   |pcmk_rsc_stop_unexpected)) {
1270             /* We are stopping a multiply active resource on a node that is
1271              * not its expected node, and we are still scheduling restart
1272              * actions, so the stop is for being multiply active.
1273              */
1274             pe_action_set_reason(stop, "being multiply active", true);
1275         }
1276 
1277         if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
1278             pe__clear_action_flags(stop, pcmk_action_runnable);
1279         }
1280 
1281         if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_remove_after_stop)) {
1282             pcmk__schedule_cleanup(rsc, current, optional);
1283         }
1284 
1285         if (pcmk_is_set(rsc->flags, pcmk_rsc_needs_unfencing)) {
1286             pcmk_action_t *unfence = pe_fence_op(current, PCMK_ACTION_ON, true,
1287                                                  NULL, false, rsc->cluster);
1288 
1289             order_actions(stop, unfence, pcmk__ar_then_implies_first);
1290             if (!pcmk__node_unfenced(current)) {
1291                 pe_proc_err("Stopping %s until %s can be unfenced",
1292                             rsc->id, pe__node_name(current));
1293             }
1294         }
1295     }
1296 }
1297 
1298 /*!
1299  * \internal
1300  * \brief Schedule actions needed to start a resource on a node
1301  *
1302  * \param[in,out] rsc       Resource being started
1303  * \param[in,out] node      Node where resource should be started
1304  * \param[in]     optional  Whether actions should be optional
1305  */
1306 static void
1307 start_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1308 {
1309     pcmk_action_t *start = NULL;
1310 
1311     CRM_ASSERT(node != NULL);
1312 
1313     pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)",
1314                  (optional? "optional" : "required"), rsc->id,
1315                  pe__node_name(node), node->weight);
1316     start = start_action(rsc, node, TRUE);
1317 
1318     pcmk__order_vs_unfence(rsc, node, start, pcmk__ar_first_implies_then);
1319 
1320     if (pcmk_is_set(start->flags, pcmk_action_runnable) && !optional) {
1321         pe__clear_action_flags(start, pcmk_action_optional);
1322     }
1323 
1324     if (is_expected_node(rsc, node)) {
1325         /* This could be a problem if the start becomes necessary for other
1326          * reasons later.
1327          */
1328         pe_rsc_trace(rsc,
1329                      "Start of multiply active resouce %s "
1330                      "on expected node %s will be a pseudo-action",
1331                      rsc->id, pe__node_name(node));
1332         pe__set_action_flags(start, pcmk_action_pseudo);
1333     }
1334 }
1335 
1336 /*!
1337  * \internal
1338  * \brief Schedule actions needed to promote a resource on a node
1339  *
1340  * \param[in,out] rsc       Resource being promoted
1341  * \param[in]     node      Node where resource should be promoted
1342  * \param[in]     optional  Whether actions should be optional
1343  */
1344 static void
1345 promote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1346 {
1347     GList *iter = NULL;
1348     GList *action_list = NULL;
1349     bool runnable = true;
1350 
1351     CRM_ASSERT(node != NULL);
1352 
1353     // Any start must be runnable for promotion to be runnable
1354     action_list = pe__resource_actions(rsc, node, PCMK_ACTION_START, true);
1355     for (iter = action_list; iter != NULL; iter = iter->next) {
1356         pcmk_action_t *start = (pcmk_action_t *) iter->data;
1357 
1358         if (!pcmk_is_set(start->flags, pcmk_action_runnable)) {
1359             runnable = false;
1360         }
1361     }
1362     g_list_free(action_list);
1363 
1364     if (runnable) {
1365         pcmk_action_t *promote = promote_action(rsc, node, optional);
1366 
1367         pe_rsc_trace(rsc, "Scheduling %s promotion of %s on %s",
1368                      (optional? "optional" : "required"), rsc->id,
1369                      pe__node_name(node));
1370 
1371         if (is_expected_node(rsc, node)) {
1372             /* This could be a problem if the promote becomes necessary for
1373              * other reasons later.
1374              */
1375             pe_rsc_trace(rsc,
1376                          "Promotion of multiply active resouce %s "
1377                          "on expected node %s will be a pseudo-action",
1378                          rsc->id, pe__node_name(node));
1379             pe__set_action_flags(promote, pcmk_action_pseudo);
1380         }
1381     } else {
1382         pe_rsc_trace(rsc, "Not promoting %s on %s: start unrunnable",
1383                      rsc->id, pe__node_name(node));
1384         action_list = pe__resource_actions(rsc, node, PCMK_ACTION_PROMOTE,
1385                                            true);
1386         for (iter = action_list; iter != NULL; iter = iter->next) {
1387             pcmk_action_t *promote = (pcmk_action_t *) iter->data;
1388 
1389             pe__clear_action_flags(promote, pcmk_action_runnable);
1390         }
1391         g_list_free(action_list);
1392     }
1393 }
1394 
1395 /*!
1396  * \internal
1397  * \brief Schedule actions needed to demote a resource wherever it is active
1398  *
1399  * \param[in,out] rsc       Resource being demoted
1400  * \param[in]     node      Node where resource should be demoted (ignored)
1401  * \param[in]     optional  Whether actions should be optional
1402  */
1403 static void
1404 demote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1405 {
1406     /* Since this will only be called for a primitive (possibly as an instance
1407      * of a collective resource), the resource is multiply active if it is
1408      * running on more than one node, so we want to demote on all of them as
1409      * part of recovery, regardless of which one is the desired node.
1410      */
1411     for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
1412         pcmk_node_t *current = (pcmk_node_t *) iter->data;
1413 
1414         if (is_expected_node(rsc, current)) {
1415             pe_rsc_trace(rsc,
1416                          "Skipping demote of multiply active resource %s "
1417                          "on expected node %s",
1418                          rsc->id, pe__node_name(current));
1419         } else {
1420             pe_rsc_trace(rsc, "Scheduling %s demotion of %s on %s",
1421                          (optional? "optional" : "required"), rsc->id,
1422                          pe__node_name(current));
1423             demote_action(rsc, current, optional);
1424         }
1425     }
1426 }
1427 
1428 static void
1429 assert_role_error(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1430 {
1431     CRM_ASSERT(false);
1432 }
1433 
1434 /*!
1435  * \internal
1436  * \brief Schedule cleanup of a resource
1437  *
1438  * \param[in,out] rsc       Resource to clean up
1439  * \param[in]     node      Node to clean up on
1440  * \param[in]     optional  Whether clean-up should be optional
1441  */
1442 void
1443 pcmk__schedule_cleanup(pcmk_resource_t *rsc, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1444                        bool optional)
1445 {
1446     /* If the cleanup is required, its orderings are optional, because they're
1447      * relevant only if both actions are required. Conversely, if the cleanup is
1448      * optional, the orderings make the then action required if the first action
1449      * becomes required.
1450      */
1451     uint32_t flag = optional? pcmk__ar_first_implies_then : pcmk__ar_ordered;
1452 
1453     CRM_CHECK((rsc != NULL) && (node != NULL), return);
1454 
1455     if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
1456         pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed",
1457                      rsc->id, pe__node_name(node));
1458         return;
1459     }
1460 
1461     if (node->details->unclean || !node->details->online) {
1462         pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable",
1463                      rsc->id, pe__node_name(node));
1464         return;
1465     }
1466 
1467     crm_notice("Scheduling clean-up of %s on %s", rsc->id, pe__node_name(node));
1468     delete_action(rsc, node, optional);
1469 
1470     // stop -> clean-up -> start
1471     pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP,
1472                                  rsc, PCMK_ACTION_DELETE, flag);
1473     pcmk__order_resource_actions(rsc, PCMK_ACTION_DELETE,
1474                                  rsc, PCMK_ACTION_START, flag);
1475 }
1476 
1477 /*!
1478  * \internal
1479  * \brief Add primitive meta-attributes relevant to graph actions to XML
1480  *
1481  * \param[in]     rsc  Primitive resource whose meta-attributes should be added
1482  * \param[in,out] xml  Transition graph action attributes XML to add to
1483  */
1484 void
1485 pcmk__primitive_add_graph_meta(const pcmk_resource_t *rsc, xmlNode *xml)
     /* [previous][next][first][last][top][bottom][index][help] */
1486 {
1487     char *name = NULL;
1488     char *value = NULL;
1489     const pcmk_resource_t *parent = NULL;
1490 
1491     CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
1492                && (xml != NULL));
1493 
1494     /* Clone instance numbers get set internally as meta-attributes, and are
1495      * needed in the transition graph (for example, to tell unique clone
1496      * instances apart).
1497      */
1498     value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION);
1499     if (value != NULL) {
1500         name = crm_meta_name(XML_RSC_ATTR_INCARNATION);
1501         crm_xml_add(xml, name, value);
1502         free(name);
1503     }
1504 
1505     // Not sure if this one is really needed ...
1506     value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE);
1507     if (value != NULL) {
1508         name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE);
1509         crm_xml_add(xml, name, value);
1510         free(name);
1511     }
1512 
1513     /* The container meta-attribute can be set on the primitive itself or one of
1514      * its parents (for example, a group inside a container resource), so check
1515      * them all, and keep the highest one found.
1516      */
1517     for (parent = rsc; parent != NULL; parent = parent->parent) {
1518         if (parent->container != NULL) {
1519             crm_xml_add(xml, CRM_META "_" XML_RSC_ATTR_CONTAINER,
1520                         parent->container->id);
1521         }
1522     }
1523 
1524     /* Bundle replica children will get their external-ip set internally as a
1525      * meta-attribute. The graph action needs it, but under a different naming
1526      * convention than other meta-attributes.
1527      */
1528     value = g_hash_table_lookup(rsc->meta, "external-ip");
1529     if (value != NULL) {
1530         crm_xml_add(xml, "pcmk_external_ip", value);
1531     }
1532 }
1533 
1534 // Primitive implementation of pcmk_assignment_methods_t:add_utilization()
1535 void
1536 pcmk__primitive_add_utilization(const pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1537                                 const pcmk_resource_t *orig_rsc,
1538                                 GList *all_rscs, GHashTable *utilization)
1539 {
1540     CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)
1541                && (orig_rsc != NULL) && (utilization != NULL));
1542 
1543     if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
1544         return;
1545     }
1546 
1547     pe_rsc_trace(orig_rsc, "%s: Adding primitive %s as colocated utilization",
1548                  orig_rsc->id, rsc->id);
1549     pcmk__release_node_capacity(utilization, rsc);
1550 }
1551 
1552 /*!
1553  * \internal
1554  * \brief Get epoch time of node's shutdown attribute (or now if none)
1555  *
1556  * \param[in,out] node  Node to check
1557  *
1558  * \return Epoch time corresponding to shutdown attribute if set or now if not
1559  */
1560 static time_t
1561 shutdown_time(pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
1562 {
1563     const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
1564     time_t result = 0;
1565 
1566     if (shutdown != NULL) {
1567         long long result_ll;
1568 
1569         if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) {
1570             result = (time_t) result_ll;
1571         }
1572     }
1573     return (result == 0)? get_effective_time(node->details->data_set) : result;
1574 }
1575 
1576 /*!
1577  * \internal
1578  * \brief Ban a resource from a node if it's not locked to the node
1579  *
1580  * \param[in]     data       Node to check
1581  * \param[in,out] user_data  Resource to check
1582  */
1583 static void
1584 ban_if_not_locked(gpointer data, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1585 {
1586     const pcmk_node_t *node = (const pcmk_node_t *) data;
1587     pcmk_resource_t *rsc = (pcmk_resource_t *) user_data;
1588 
1589     if (strcmp(node->details->uname, rsc->lock_node->details->uname) != 0) {
1590         resource_location(rsc, node, -CRM_SCORE_INFINITY,
1591                           XML_CONFIG_ATTR_SHUTDOWN_LOCK, rsc->cluster);
1592     }
1593 }
1594 
1595 // Primitive implementation of pcmk_assignment_methods_t:shutdown_lock()
1596 void
1597 pcmk__primitive_shutdown_lock(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1598 {
1599     const char *class = NULL;
1600 
1601     CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive));
1602 
1603     class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
1604 
1605     // Fence devices and remote connections can't be locked
1606     if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches)
1607         || pe__resource_is_remote_conn(rsc)) {
1608         return;
1609     }
1610 
1611     if (rsc->lock_node != NULL) {
1612         // The lock was obtained from resource history
1613 
1614         if (rsc->running_on != NULL) {
1615             /* The resource was started elsewhere even though it is now
1616              * considered locked. This shouldn't be possible, but as a
1617              * failsafe, we don't want to disturb the resource now.
1618              */
1619             pe_rsc_info(rsc,
1620                         "Cancelling shutdown lock because %s is already active",
1621                         rsc->id);
1622             pe__clear_resource_history(rsc, rsc->lock_node);
1623             rsc->lock_node = NULL;
1624             rsc->lock_time = 0;
1625         }
1626 
1627     // Only a resource active on exactly one node can be locked
1628     } else if (pcmk__list_of_1(rsc->running_on)) {
1629         pcmk_node_t *node = rsc->running_on->data;
1630 
1631         if (node->details->shutdown) {
1632             if (node->details->unclean) {
1633                 pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown",
1634                              rsc->id, pe__node_name(node));
1635             } else {
1636                 rsc->lock_node = node;
1637                 rsc->lock_time = shutdown_time(node);
1638             }
1639         }
1640     }
1641 
1642     if (rsc->lock_node == NULL) {
1643         // No lock needed
1644         return;
1645     }
1646 
1647     if (rsc->cluster->shutdown_lock > 0) {
1648         time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock;
1649 
1650         pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
1651                     rsc->id, pe__node_name(rsc->lock_node),
1652                     (long long) lock_expiration);
1653         pe__update_recheck_time(++lock_expiration, rsc->cluster,
1654                                 "shutdown lock expiration");
1655     } else {
1656         pe_rsc_info(rsc, "Locking %s to %s due to shutdown",
1657                     rsc->id, pe__node_name(rsc->lock_node));
1658     }
1659 
1660     // If resource is locked to one node, ban it from all other nodes
1661     g_list_foreach(rsc->cluster->nodes, ban_if_not_locked, rsc);
1662 }

/* [previous][next][first][last][top][bottom][index][help] */