root/lib/pacemaker/pcmk_sched_probes.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_expected_result
  2. pcmk__probe_resource_list
  3. probe_then_start
  4. guest_resource_will_stop
  5. probe_action
  6. pcmk__probe_rsc_on_node
  7. probe_needed_before_action
  8. add_probe_orderings_for_stops
  9. add_start_orderings_for_probe
  10. add_restart_orderings_for_probe
  11. clear_actions_tracking_flag
  12. add_start_restart_orderings_for_rsc
  13. order_then_probes
  14. pcmk__order_probes
  15. pcmk__schedule_probes

   1 /*
   2  * Copyright 2004-2023 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <glib.h>
  13 
  14 #include <crm/crm.h>
  15 #include <crm/pengine/status.h>
  16 #include <pacemaker-internal.h>
  17 #include "libpacemaker_private.h"
  18 
  19 /*!
  20  * \internal
  21  * \brief Add the expected result to a newly created probe
  22  *
  23  * \param[in,out] probe  Probe action to add expected result to
  24  * \param[in]     rsc    Resource that probe is for
  25  * \param[in]     node   Node that probe will run on
  26  */
  27 static void
  28 add_expected_result(pe_action_t *probe, const pe_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
  29                     const pe_node_t *node)
  30 {
  31     // Check whether resource is currently active on node
  32     pe_node_t *running = pe_find_node_id(rsc->running_on, node->details->id);
  33 
  34     // The expected result is what we think the resource's current state is
  35     if (running == NULL) {
  36         pe__add_action_expected_result(probe, CRM_EX_NOT_RUNNING);
  37 
  38     } else if (rsc->role == RSC_ROLE_PROMOTED) {
  39         pe__add_action_expected_result(probe, CRM_EX_PROMOTED);
  40     }
  41 }
  42 
  43 /*!
  44  * \internal
  45  * \brief Create any needed robes on a node for a list of resources
  46  *
  47  * \param[in,out] rscs  List of resources to create probes for
  48  * \param[in,out] node  Node to create probes on
  49  *
  50  * \return true if any probe was created, otherwise false
  51  */
  52 bool
  53 pcmk__probe_resource_list(GList *rscs, pe_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
  54 {
  55     bool any_created = false;
  56 
  57     for (GList *iter = rscs; iter != NULL; iter = iter->next) {
  58         pe_resource_t *rsc = (pe_resource_t *) iter->data;
  59 
  60         if (rsc->cmds->create_probe(rsc, node)) {
  61             any_created = true;
  62         }
  63     }
  64     return any_created;
  65 }
  66 
  67 /*!
  68  * \internal
  69  * \brief Order one resource's start after another's start-up probe
  70  *
  71  * \param[in,out] rsc1  Resource that might get start-up probe
  72  * \param[in]     rsc2  Resource that might be started
  73  */
  74 static void
  75 probe_then_start(pe_resource_t *rsc1, pe_resource_t *rsc2)
     /* [previous][next][first][last][top][bottom][index][help] */
  76 {
  77     if ((rsc1->allocated_to != NULL)
  78         && (g_hash_table_lookup(rsc1->known_on,
  79                                 rsc1->allocated_to->details->id) == NULL)) {
  80 
  81         pcmk__new_ordering(rsc1, pcmk__op_key(rsc1->id, RSC_STATUS, 0), NULL,
  82                            rsc2, pcmk__op_key(rsc2->id, RSC_START, 0), NULL,
  83                            pe_order_optional, rsc1->cluster);
  84     }
  85 }
  86 
  87 /*!
  88  * \internal
  89  * \brief Check whether a guest resource will stop
  90  *
  91  * \param[in] node  Guest node to check
  92  *
  93  * \return true if guest resource will likely stop, otherwise false
  94  */
  95 static bool
  96 guest_resource_will_stop(const pe_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
  97 {
  98     const pe_resource_t *guest_rsc = node->details->remote_rsc->container;
  99 
 100     /* Ideally, we'd check whether the guest has a required stop, but that
 101      * information doesn't exist yet, so approximate it ...
 102      */
 103     return node->details->remote_requires_reset
 104            || node->details->unclean
 105            || pcmk_is_set(guest_rsc->flags, pe_rsc_failed)
 106            || (guest_rsc->next_role == RSC_ROLE_STOPPED)
 107 
 108            // Guest is moving
 109            || ((guest_rsc->role > RSC_ROLE_STOPPED)
 110                && (guest_rsc->allocated_to != NULL)
 111                && (pe_find_node(guest_rsc->running_on,
 112                    guest_rsc->allocated_to->details->uname) == NULL));
 113 }
 114 
 115 /*!
 116  * \internal
 117  * \brief Create a probe action for a resource on a node
 118  *
 119  * \param[in,out] rsc   Resource to create probe for
 120  * \param[in,out] node  Node to create probe on
 121  *
 122  * \return Newly created probe action
 123  */
 124 static pe_action_t *
 125 probe_action(pe_resource_t *rsc, pe_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 126 {
 127     pe_action_t *probe = NULL;
 128     char *key = pcmk__op_key(rsc->id, RSC_STATUS, 0);
 129 
 130     crm_debug("Scheduling probe of %s %s on %s",
 131               role2text(rsc->role), rsc->id, pe__node_name(node));
 132 
 133     probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE,
 134                           rsc->cluster);
 135     pe__clear_action_flags(probe, pe_action_optional);
 136 
 137     pcmk__order_vs_unfence(rsc, node, probe, pe_order_optional);
 138     add_expected_result(probe, rsc, node);
 139     return probe;
 140 }
 141 
 142 /*!
 143  * \internal
 144  * \brief Create probes for a resource on a node, if needed
 145  *
 146  * \brief Schedule any probes needed for a resource on a node
 147  *
 148  * \param[in,out] rsc   Resource to create probe for
 149  * \param[in,out] node  Node to create probe on
 150  *
 151  * \return true if any probe was created, otherwise false
 152  */
 153 bool
 154 pcmk__probe_rsc_on_node(pe_resource_t *rsc, pe_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 155 {
 156     uint32_t flags = pe_order_optional;
 157     pe_action_t *probe = NULL;
 158     pe_node_t *allowed = NULL;
 159     pe_resource_t *top = uber_parent(rsc);
 160     const char *reason = NULL;
 161 
 162     CRM_CHECK((rsc != NULL) && (node != NULL), return false);
 163 
 164     if (!pcmk_is_set(rsc->cluster->flags, pe_flag_startup_probes)) {
 165         reason = "start-up probes are disabled";
 166         goto no_probe;
 167     }
 168 
 169     if (pe__is_guest_or_remote_node(node)) {
 170         const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
 171 
 172         if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) {
 173             reason = "Pacemaker Remote nodes cannot run stonith agents";
 174             goto no_probe;
 175 
 176         } else if (pe__is_guest_node(node)
 177                    && pe__resource_contains_guest_node(rsc->cluster, rsc)) {
 178             reason = "guest nodes cannot run resources containing guest nodes";
 179             goto no_probe;
 180 
 181         } else if (rsc->is_remote_node) {
 182             reason = "Pacemaker Remote nodes cannot host remote connections";
 183             goto no_probe;
 184         }
 185     }
 186 
 187     // If this is a collective resource, probes are created for its children
 188     if (rsc->children != NULL) {
 189         return pcmk__probe_resource_list(rsc->children, node);
 190     }
 191 
 192     if ((rsc->container != NULL) && !rsc->is_remote_node) {
 193         reason = "resource is inside a container";
 194         goto no_probe;
 195 
 196     } else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
 197         reason = "resource is orphaned";
 198         goto no_probe;
 199 
 200     } else if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
 201         reason = "resource state is already known";
 202         goto no_probe;
 203     }
 204 
 205     allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
 206 
 207     if (rsc->exclusive_discover || top->exclusive_discover) {
 208         // Exclusive discovery is enabled ...
 209 
 210         if (allowed == NULL) {
 211             // ... but this node is not allowed to run the resource
 212             reason = "resource has exclusive discovery but is not allowed "
 213                      "on node";
 214             goto no_probe;
 215 
 216         } else if (allowed->rsc_discover_mode != pe_discover_exclusive) {
 217             // ... but no constraint marks this node for discovery of resource
 218             reason = "resource has exclusive discovery but is not enabled "
 219                      "on node";
 220             goto no_probe;
 221         }
 222     }
 223 
 224     if (allowed == NULL) {
 225         allowed = node;
 226     }
 227     if (allowed->rsc_discover_mode == pe_discover_never) {
 228         reason = "node has discovery disabled";
 229         goto no_probe;
 230     }
 231 
 232     if (pe__is_guest_node(node)) {
 233         pe_resource_t *guest = node->details->remote_rsc->container;
 234 
 235         if (guest->role == RSC_ROLE_STOPPED) {
 236             // The guest is stopped, so we know no resource is active there
 237             reason = "node's guest is stopped";
 238             probe_then_start(guest, top);
 239             goto no_probe;
 240 
 241         } else if (guest_resource_will_stop(node)) {
 242             reason = "node's guest will stop";
 243 
 244             // Order resource start after guest stop (in case it's restarting)
 245             pcmk__new_ordering(guest, pcmk__op_key(guest->id, RSC_STOP, 0),
 246                                NULL, top, pcmk__op_key(top->id, RSC_START, 0),
 247                                NULL, pe_order_optional, rsc->cluster);
 248             goto no_probe;
 249         }
 250     }
 251 
 252     // We've eliminated all cases where a probe is not needed, so now it is
 253     probe = probe_action(rsc, node);
 254 
 255     /* Below, we will order the probe relative to start or reload. If this is a
 256      * clone instance, the start or reload is for the entire clone rather than
 257      * just the instance. Otherwise, the start or reload is for the resource
 258      * itself.
 259      */
 260     if (!pe_rsc_is_clone(top)) {
 261         top = rsc;
 262     }
 263 
 264     /* Prevent a start if the resource can't be probed, but don't cause the
 265      * resource or entire clone to stop if already active.
 266      */
 267     if (!pcmk_is_set(probe->flags, pe_action_runnable)
 268         && (top->running_on == NULL)) {
 269         pe__set_order_flags(flags, pe_order_runnable_left);
 270     }
 271 
 272     // Start or reload after probing the resource
 273     pcmk__new_ordering(rsc, NULL, probe,
 274                        top, pcmk__op_key(top->id, RSC_START, 0), NULL,
 275                        flags, rsc->cluster);
 276     pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL,
 277                        pe_order_optional, rsc->cluster);
 278 
 279     return true;
 280 
 281 no_probe:
 282     pe_rsc_trace(rsc,
 283                  "Skipping probe for %s on %s because %s",
 284                  rsc->id, node->details->id, reason);
 285     return false;
 286 }
 287 
 288 /*!
 289  * \internal
 290  * \brief Check whether a probe should be ordered before another action
 291  *
 292  * \param[in] probe  Probe action to check
 293  * \param[in] then   Other action to check
 294  *
 295  * \return true if \p probe should be ordered before \p then, otherwise false
 296  */
 297 static bool
 298 probe_needed_before_action(const pe_action_t *probe, const pe_action_t *then)
     /* [previous][next][first][last][top][bottom][index][help] */
 299 {
 300     // Probes on a node are performed after unfencing it, not before
 301     if (pcmk__str_eq(then->task, CRM_OP_FENCE, pcmk__str_casei)
 302          && (probe->node != NULL) && (then->node != NULL)
 303          && (probe->node->details == then->node->details)) {
 304         const char *op = g_hash_table_lookup(then->meta, "stonith_action");
 305 
 306         if (pcmk__str_eq(op, "on", pcmk__str_casei)) {
 307             return false;
 308         }
 309     }
 310 
 311     // Probes should be done on a node before shutting it down
 312     if (pcmk__str_eq(then->task, CRM_OP_SHUTDOWN, pcmk__str_none)
 313         && (probe->node != NULL) && (then->node != NULL)
 314         && (probe->node->details != then->node->details)) {
 315         return false;
 316     }
 317 
 318     // Otherwise probes should always be done before any other action
 319     return true;
 320 }
 321 
 322 /*!
 323  * \internal
 324  * \brief Add implicit "probe then X" orderings for "stop then X" orderings
 325  *
 326  * If the state of a resource is not known yet, a probe will be scheduled,
 327  * expecting a "not running" result. If the probe fails, a stop will not be
 328  * scheduled until the next transition. Thus, if there are ordering constraints
 329  * like "stop this resource then do something else that's not for the same
 330  * resource", add implicit "probe this resource then do something" equivalents
 331  * so the relation is upheld until we know whether a stop is needed.
 332  *
 333  * \param[in,out] data_set  Cluster working set
 334  */
 335 static void
 336 add_probe_orderings_for_stops(pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 337 {
 338     for (GList *iter = data_set->ordering_constraints; iter != NULL;
 339          iter = iter->next) {
 340 
 341         pe__ordering_t *order = iter->data;
 342         uint32_t order_flags = pe_order_optional;
 343         GList *probes = NULL;
 344         GList *then_actions = NULL;
 345 
 346         // Skip disabled orderings
 347         if (order->flags == pe_order_none) {
 348             continue;
 349         }
 350 
 351         // Skip non-resource orderings, and orderings for the same resource
 352         if ((order->lh_rsc == NULL) || (order->lh_rsc == order->rh_rsc)) {
 353             continue;
 354         }
 355 
 356         // Skip invalid orderings (shouldn't be possible)
 357         if (((order->lh_action == NULL) && (order->lh_action_task == NULL)) ||
 358             ((order->rh_action == NULL) && (order->rh_action_task == NULL))) {
 359             continue;
 360         }
 361 
 362         // Skip orderings for first actions other than stop
 363         if ((order->lh_action != NULL)
 364             && !pcmk__str_eq(order->lh_action->task, RSC_STOP, pcmk__str_none)) {
 365             continue;
 366         } else if ((order->lh_action == NULL)
 367                    && !pcmk__ends_with(order->lh_action_task, "_" RSC_STOP "_0")) {
 368             continue;
 369         }
 370 
 371         /* Do not imply a probe ordering for a resource inside of a stopping
 372          * container. Otherwise, it might introduce a transition loop, since a
 373          * probe could be scheduled after the container starts again.
 374          */
 375         if ((order->rh_rsc != NULL)
 376             && (order->lh_rsc->container == order->rh_rsc)) {
 377 
 378             if ((order->rh_action != NULL)
 379                 && pcmk__str_eq(order->rh_action->task, RSC_STOP,
 380                                 pcmk__str_none)) {
 381                 continue;
 382             } else if ((order->rh_action == NULL)
 383                        && pcmk__ends_with(order->rh_action_task,
 384                                           "_" RSC_STOP "_0")) {
 385                 continue;
 386             }
 387         }
 388 
 389         // Preserve certain order options for future filtering
 390         if (pcmk_is_set(order->flags, pe_order_apply_first_non_migratable)) {
 391             pe__set_order_flags(order_flags,
 392                                 pe_order_apply_first_non_migratable);
 393         }
 394         if (pcmk_is_set(order->flags, pe_order_same_node)) {
 395             pe__set_order_flags(order_flags, pe_order_same_node);
 396         }
 397 
 398         // Preserve certain order types for future filtering
 399         if ((order->flags == pe_order_anti_colocation)
 400             || (order->flags == pe_order_load)) {
 401             order_flags = order->flags;
 402         }
 403 
 404         // List all scheduled probes for the first resource
 405         probes = pe__resource_actions(order->lh_rsc, NULL, RSC_STATUS, FALSE);
 406         if (probes == NULL) { // There aren't any
 407             continue;
 408         }
 409 
 410         // List all relevant "then" actions
 411         if (order->rh_action != NULL) {
 412             then_actions = g_list_prepend(NULL, order->rh_action);
 413 
 414         } else if (order->rh_rsc != NULL) {
 415             then_actions = find_actions(order->rh_rsc->actions,
 416                                         order->rh_action_task, NULL);
 417             if (then_actions == NULL) { // There aren't any
 418                 g_list_free(probes);
 419                 continue;
 420             }
 421         }
 422 
 423         crm_trace("Implying 'probe then' orderings for '%s then %s' "
 424                   "(id=%d, type=%.6x)",
 425                   order->lh_action? order->lh_action->uuid : order->lh_action_task,
 426                   order->rh_action? order->rh_action->uuid : order->rh_action_task,
 427                   order->id, order->flags);
 428 
 429         for (GList *probe_iter = probes; probe_iter != NULL;
 430              probe_iter = probe_iter->next) {
 431 
 432             pe_action_t *probe = (pe_action_t *) probe_iter->data;
 433 
 434             for (GList *then_iter = then_actions; then_iter != NULL;
 435                  then_iter = then_iter->next) {
 436 
 437                 pe_action_t *then = (pe_action_t *) then_iter->data;
 438 
 439                 if (probe_needed_before_action(probe, then)) {
 440                     order_actions(probe, then, order_flags);
 441                 }
 442             }
 443         }
 444 
 445         g_list_free(then_actions);
 446         g_list_free(probes);
 447     }
 448 }
 449 
 450 /*!
 451  * \internal
 452  * \brief Add necessary orderings between probe and starts of clone instances
 453  *
 454  * , in additon to the ordering with the parent resource added upon creating
 455  * the probe.
 456  *
 457  * \param[in,out] probe     Probe as 'first' action in an ordering
 458  * \param[in,out] after     'then' action wrapper in the ordering
 459  */
 460 static void
 461 add_start_orderings_for_probe(pe_action_t *probe, pe_action_wrapper_t *after)
     /* [previous][next][first][last][top][bottom][index][help] */
 462 {
 463     uint32_t flags = pe_order_optional|pe_order_runnable_left;
 464 
 465     /* Although the ordering between the probe of the clone instance and the
 466      * start of its parent has been added in pcmk__probe_rsc_on_node(), we
 467      * avoided enforcing `pe_order_runnable_left` order type for that as long as
 468      * any of the clone instances are running to prevent them from being
 469      * unexpectedly stopped.
 470      *
 471      * On the other hand, we still need to prevent any inactive instances from
 472      * starting unless the probe is runnable so that we don't risk starting too
 473      * many instances before we know the state on all nodes.
 474      */
 475     if (after->action->rsc->variant <= pe_group
 476         || pcmk_is_set(probe->flags, pe_action_runnable)
 477         // The order type is already enforced for its parent.
 478         || pcmk_is_set(after->type, pe_order_runnable_left)
 479         || (pe__const_top_resource(probe->rsc, false) != after->action->rsc)
 480         || !pcmk__str_eq(after->action->task, RSC_START, pcmk__str_none)) {
 481         return;
 482     }
 483 
 484     crm_trace("Adding probe start orderings for '%s@%s (%s) "
 485               "then instances of %s@%s'",
 486               probe->uuid, pe__node_name(probe->node),
 487               pcmk_is_set(probe->flags, pe_action_runnable)? "runnable" : "unrunnable",
 488               after->action->uuid, pe__node_name(after->action->node));
 489 
 490     for (GList *then_iter = after->action->actions_after; then_iter != NULL;
 491          then_iter = then_iter->next) {
 492 
 493         pe_action_wrapper_t *then = (pe_action_wrapper_t *) then_iter->data;
 494 
 495         if (then->action->rsc->running_on
 496             || (pe__const_top_resource(then->action->rsc, false)
 497                 != after->action->rsc)
 498             || !pcmk__str_eq(then->action->task, RSC_START, pcmk__str_none)) {
 499             continue;
 500         }
 501 
 502         crm_trace("Adding probe start ordering for '%s@%s (%s) "
 503                   "then %s@%s' (type=%#.6x)",
 504                   probe->uuid, pe__node_name(probe->node),
 505                   pcmk_is_set(probe->flags, pe_action_runnable)? "runnable" : "unrunnable",
 506                   then->action->uuid, pe__node_name(then->action->node),
 507                   flags);
 508 
 509         /* Prevent the instance from starting if the instance can't, but don't
 510          * cause any other intances to stop if already active.
 511          */
 512         order_actions(probe, then->action, flags);
 513     }
 514 
 515     return;
 516 }
 517 
 518 /*!
 519  * \internal
 520  * \brief Order probes before restarts and re-promotes
 521  *
 522  * If a given ordering is a "probe then start" or "probe then promote" ordering,
 523  * add an implicit "probe then stop/demote" ordering in case the action is part
 524  * of a restart/re-promote, and do the same recursively for all actions ordered
 525  * after the "then" action.
 526  *
 527  * \param[in,out] probe     Probe as 'first' action in an ordering
 528  * \param[in,out] after     'then' action in the ordering
 529  * \param[in,out] data_set  Cluster working set
 530  */
 531 static void
 532 add_restart_orderings_for_probe(pe_action_t *probe, pe_action_t *after,
     /* [previous][next][first][last][top][bottom][index][help] */
 533                                 pe_working_set_t *data_set)
 534 {
 535     GList *iter = NULL;
 536     bool interleave = false;
 537     pe_resource_t *compatible_rsc = NULL;
 538 
 539     // Validate that this is a resource probe followed by some action
 540     if ((after == NULL) || (probe == NULL) || (probe->rsc == NULL)
 541         || (probe->rsc->variant != pe_native)
 542         || !pcmk__str_eq(probe->task, RSC_STATUS, pcmk__str_casei)) {
 543         return;
 544     }
 545 
 546     // Avoid running into any possible loop
 547     if (pcmk_is_set(after->flags, pe_action_tracking)) {
 548         return;
 549     }
 550     pe__set_action_flags(after, pe_action_tracking);
 551 
 552     crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'",
 553               probe->uuid, pe__node_name(probe->node),
 554               after->uuid, pe__node_name(after->node));
 555 
 556     /* Add restart orderings if "then" is for a different primitive.
 557      * Orderings for collective resources will be added later.
 558      */
 559     if ((after->rsc != NULL) && (after->rsc->variant == pe_native)
 560         && (probe->rsc != after->rsc)) {
 561 
 562             GList *then_actions = NULL;
 563 
 564             if (pcmk__str_eq(after->task, RSC_START, pcmk__str_casei)) {
 565                 then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP,
 566                                                     FALSE);
 567 
 568             } else if (pcmk__str_eq(after->task, RSC_PROMOTE, pcmk__str_casei)) {
 569                 then_actions = pe__resource_actions(after->rsc, NULL,
 570                                                     RSC_DEMOTE, FALSE);
 571             }
 572 
 573             for (iter = then_actions; iter != NULL; iter = iter->next) {
 574                 pe_action_t *then = (pe_action_t *) iter->data;
 575 
 576                 // Skip pseudo-actions (for example, those implied by fencing)
 577                 if (!pcmk_is_set(then->flags, pe_action_pseudo)) {
 578                     order_actions(probe, then, pe_order_optional);
 579                 }
 580             }
 581             g_list_free(then_actions);
 582     }
 583 
 584     /* Detect whether "then" is an interleaved clone action. For these, we want
 585      * to add orderings only for the relevant instance.
 586      */
 587     if ((after->rsc != NULL)
 588         && (after->rsc->variant > pe_group)) {
 589         const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
 590                                                        XML_RSC_ATTR_INTERLEAVE);
 591 
 592         interleave = crm_is_true(interleave_s);
 593         if (interleave) {
 594             compatible_rsc = pcmk__find_compatible_instance(probe->rsc,
 595                                                             after->rsc,
 596                                                             RSC_ROLE_UNKNOWN,
 597                                                             false);
 598         }
 599     }
 600 
 601     /* Now recursively do the same for all actions ordered after "then". This
 602      * also handles collective resources since the collective action will be
 603      * ordered before its individual instances' actions.
 604      */
 605     for (iter = after->actions_after; iter != NULL; iter = iter->next) {
 606         pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) iter->data;
 607 
 608         /* pe_order_implies_then is the reason why a required A.start
 609          * implies/enforces B.start to be required too, which is the cause of
 610          * B.restart/re-promote.
 611          *
 612          * Not sure about pe_order_implies_then_on_node though. It's now only
 613          * used for unfencing case, which tends to introduce transition
 614          * loops...
 615          */
 616         if (!pcmk_is_set(after_wrapper->type, pe_order_implies_then)) {
 617             /* The order type between a group/clone and its child such as
 618              * B.start-> B_child.start is:
 619              * pe_order_implies_first_printed | pe_order_runnable_left
 620              *
 621              * Proceed through the ordering chain and build dependencies with
 622              * its children.
 623              */
 624             if ((after->rsc == NULL)
 625                 || (after->rsc->variant < pe_group)
 626                 || (probe->rsc->parent == after->rsc)
 627                 || (after_wrapper->action->rsc == NULL)
 628                 || (after_wrapper->action->rsc->variant > pe_group)
 629                 || (after->rsc != after_wrapper->action->rsc->parent)) {
 630                 continue;
 631             }
 632 
 633             /* Proceed to the children of a group or a non-interleaved clone.
 634              * For an interleaved clone, proceed only to the relevant child.
 635              */
 636             if ((after->rsc->variant > pe_group) && interleave
 637                 && ((compatible_rsc == NULL)
 638                     || (compatible_rsc != after_wrapper->action->rsc))) {
 639                 continue;
 640             }
 641         }
 642 
 643         crm_trace("Recursively adding probe restart orderings for "
 644                   "'%s@%s then %s@%s' (type=%#.6x)",
 645                   after->uuid, pe__node_name(after->node),
 646                   after_wrapper->action->uuid,
 647                   pe__node_name(after_wrapper->action->node),
 648                   after_wrapper->type);
 649 
 650         add_restart_orderings_for_probe(probe, after_wrapper->action, data_set);
 651     }
 652 }
 653 
 654 /*!
 655  * \internal
 656  * \brief Clear the tracking flag on all scheduled actions
 657  *
 658  * \param[in,out] data_set  Cluster working set
 659  */
 660 static void
 661 clear_actions_tracking_flag(pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 662 {
 663     GList *gIter = NULL;
 664 
 665     for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
 666         pe_action_t *action = (pe_action_t *) gIter->data;
 667 
 668         pe__clear_action_flags(action, pe_action_tracking);
 669     }
 670 }
 671 
 672 /*!
 673  * \internal
 674  * \brief Add start and restart orderings for probes scheduled for a resource
 675  *
 676  * \param[in,out] rsc       Resource whose probes should be ordered
 677  * \param[in,out] data_set  Cluster working set
 678  */
 679 static void
 680 add_start_restart_orderings_for_rsc(pe_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
 681                                     pe_working_set_t *data_set)
 682 {
 683     GList *probes = NULL;
 684 
 685     // For collective resources, order each instance recursively
 686     if (rsc->variant != pe_native) {
 687         g_list_foreach(rsc->children,
 688                        (GFunc) add_start_restart_orderings_for_rsc, data_set);
 689         return;
 690     }
 691 
 692     // Find all probes for given resource
 693     probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
 694 
 695     // Add probe restart orderings for each probe found
 696     for (GList *iter = probes; iter != NULL; iter = iter->next) {
 697         pe_action_t *probe = (pe_action_t *) iter->data;
 698 
 699         for (GList *then_iter = probe->actions_after; then_iter != NULL;
 700              then_iter = then_iter->next) {
 701 
 702             pe_action_wrapper_t *then = (pe_action_wrapper_t *) then_iter->data;
 703 
 704             add_start_orderings_for_probe(probe, then);
 705             add_restart_orderings_for_probe(probe, then->action, data_set);
 706             clear_actions_tracking_flag(data_set);
 707         }
 708     }
 709 
 710     g_list_free(probes);
 711 }
 712 
 713 /*!
 714  * \internal
 715  * \brief Add "A then probe B" orderings for "A then B" orderings
 716  *
 717  * \param[in,out] data_set  Cluster working set
 718  *
 719  * \note This function is currently disabled (see next comment).
 720  */
 721 static void
 722 order_then_probes(pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 723 {
 724 #if 0
 725     /* Given an ordering "A then B", we would prefer to wait for A to be started
 726      * before probing B.
 727      *
 728      * For example, if A is a filesystem which B can't even run without, it
 729      * would be helpful if the author of B's agent could assume that A is
 730      * running before B.monitor will be called.
 731      *
 732      * However, we can't _only_ probe after A is running, otherwise we wouldn't
 733      * detect the state of B if A could not be started. We can't even do an
 734      * opportunistic version of this, because B may be moving:
 735      *
 736      *   A.stop -> A.start -> B.probe -> B.stop -> B.start
 737      *
 738      * and if we add B.stop -> A.stop here, we get a loop:
 739      *
 740      *   A.stop -> A.start -> B.probe -> B.stop -> A.stop
 741      *
 742      * We could kill the "B.probe -> B.stop" dependency, but that could mean
 743      * stopping B "too" soon, because B.start must wait for the probe, and
 744      * we don't want to stop B if we can't start it.
 745      *
 746      * We could add the ordering only if A is an anonymous clone with
 747      * clone-max == node-max (since we'll never be moving it). However, we could
 748      * still be stopping one instance at the same time as starting another.
 749      *
 750      * The complexity of checking for allowed conditions combined with the ever
 751      * narrowing use case suggests that this code should remain disabled until
 752      * someone gets smarter.
 753      */
 754     for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
 755         pe_resource_t *rsc = (pe_resource_t *) iter->data;
 756 
 757         pe_action_t *start = NULL;
 758         GList *actions = NULL;
 759         GList *probes = NULL;
 760 
 761         actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE);
 762 
 763         if (actions) {
 764             start = actions->data;
 765             g_list_free(actions);
 766         }
 767 
 768         if (start == NULL) {
 769             crm_err("No start action for %s", rsc->id);
 770             continue;
 771         }
 772 
 773         probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
 774 
 775         for (actions = start->actions_before; actions != NULL;
 776              actions = actions->next) {
 777 
 778             pe_action_wrapper_t *before = (pe_action_wrapper_t *) actions->data;
 779 
 780             pe_action_t *first = before->action;
 781             pe_resource_t *first_rsc = first->rsc;
 782 
 783             if (first->required_runnable_before) {
 784                 for (GList *clone_actions = first->actions_before;
 785                      clone_actions != NULL;
 786                      clone_actions = clone_actions->next) {
 787 
 788                     before = (pe_action_wrapper_t *) clone_actions->data;
 789 
 790                     crm_trace("Testing '%s then %s' for %s",
 791                               first->uuid, before->action->uuid, start->uuid);
 792 
 793                     CRM_ASSERT(before->action->rsc != NULL);
 794                     first_rsc = before->action->rsc;
 795                     break;
 796                 }
 797 
 798             } else if (!pcmk__str_eq(first->task, RSC_START, pcmk__str_none)) {
 799                 crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
 800             }
 801 
 802             if (first_rsc == NULL) {
 803                 continue;
 804 
 805             } else if (pe__const_top_resource(first_rsc, false)
 806                        == pe__const_top_resource(start->rsc, false)) {
 807                 crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
 808                 continue;
 809 
 810             } else if (!pe_rsc_is_clone(pe__const_top_resource(first_rsc,
 811                                                                false))) {
 812                 crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
 813                 continue;
 814             }
 815 
 816             crm_err("Applying %s before %s %d", first->uuid, start->uuid,
 817                     pe__const_top_resource(first_rsc, false)->variant);
 818 
 819             for (GList *probe_iter = probes; probe_iter != NULL;
 820                  probe_iter = probe_iter->next) {
 821 
 822                 pe_action_t *probe = (pe_action_t *) probe_iter->data;
 823 
 824                 crm_err("Ordering %s before %s", first->uuid, probe->uuid);
 825                 order_actions(first, probe, pe_order_optional);
 826             }
 827         }
 828     }
 829 #endif
 830 }
 831 
 832 void
 833 pcmk__order_probes(pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 834 {
 835     // Add orderings for "probe then X"
 836     g_list_foreach(data_set->resources,
 837                    (GFunc) add_start_restart_orderings_for_rsc, data_set);
 838     add_probe_orderings_for_stops(data_set);
 839 
 840     order_then_probes(data_set);
 841 }
 842 
 843 /*!
 844  * \internal
 845  * \brief Schedule any probes needed
 846  *
 847  * \param[in,out] data_set  Cluster working set
 848  *
 849  * \note This may also schedule fencing of failed remote nodes.
 850  */
 851 void
 852 pcmk__schedule_probes(pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 853 {
 854     // Schedule probes on each node in the cluster as needed
 855     for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
 856         pe_node_t *node = (pe_node_t *) iter->data;
 857         const char *probed = NULL;
 858 
 859         if (!node->details->online) { // Don't probe offline nodes
 860             if (pcmk__is_failed_remote_node(node)) {
 861                 pe_fence_node(data_set, node,
 862                               "the connection is unrecoverable", FALSE);
 863             }
 864             continue;
 865 
 866         } else if (node->details->unclean) { // ... or nodes that need fencing
 867             continue;
 868 
 869         } else if (!node->details->rsc_discovery_enabled) {
 870             // The user requested that probes not be done on this node
 871             continue;
 872         }
 873 
 874         /* This is no longer needed for live clusters, since the probe_complete
 875          * node attribute will never be in the CIB. However this is still useful
 876          * for processing old saved CIBs (< 1.1.14), including the
 877          * reprobe-target_rc regression test.
 878          */
 879         probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
 880         if (probed != NULL && crm_is_true(probed) == FALSE) {
 881             pe_action_t *probe_op = NULL;
 882 
 883             probe_op = custom_action(NULL,
 884                                      crm_strdup_printf("%s-%s", CRM_OP_REPROBE,
 885                                                        node->details->uname),
 886                                      CRM_OP_REPROBE, node, FALSE, TRUE,
 887                                      data_set);
 888             add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT,
 889                            XML_BOOLEAN_TRUE);
 890             continue;
 891         }
 892 
 893         // Probe each resource in the cluster on this node, as needed
 894         pcmk__probe_resource_list(data_set->resources, node);
 895     }
 896 }

/* [previous][next][first][last][top][bottom][index][help] */