root/lib/cluster/election.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. election_complete
  2. election_timer_cb
  3. election_state
  4. election_init
  5. election_remove
  6. election_reset
  7. election_fini
  8. election_timeout_start
  9. election_timeout_stop
  10. election_timeout_set_period
  11. get_uptime
  12. compare_age
  13. election_vote
  14. election_check
  15. parse_election_message
  16. record_vote
  17. send_no_vote
  18. election_count_vote
  19. election_clear_dampening

   1 /*
   2  * Copyright 2004-2022 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/time.h>
  13 #include <sys/resource.h>
  14 
  15 #include <crm/msg_xml.h>
  16 #include <crm/common/xml.h>
  17 
  18 #include <crm/common/mainloop.h>
  19 #include <crm/cluster/internal.h>
  20 #include <crm/cluster/election_internal.h>
  21 #include <crm/crm.h>
  22 
  23 #define STORM_INTERVAL   2      /* in seconds */
  24 
  25 struct election_s {
  26     enum election_result state;
  27     guint count;        // How many times local node has voted
  28     char *name;         // Descriptive name for this election
  29     char *uname;        // Local node's name
  30     GSourceFunc cb;     // Function to call if election is won
  31     GHashTable *voted;  // Key = node name, value = how node voted
  32     mainloop_timer_t *timeout; // When to abort if all votes not received
  33     int election_wins;         // Track wins, for storm detection
  34     bool wrote_blackbox;       // Write a storm blackbox at most once
  35     time_t expires;            // When storm detection period ends
  36     time_t last_election_loss; // When dampening period ends
  37 };
  38 
  39 static void
  40 election_complete(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
  41 {
  42     e->state = election_won;
  43     if (e->cb != NULL) {
  44         e->cb(e);
  45     }
  46     election_reset(e);
  47 }
  48 
  49 static gboolean
  50 election_timer_cb(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  51 {
  52     election_t *e = user_data;
  53 
  54     crm_info("%s timed out, declaring local node as winner", e->name);
  55     election_complete(e);
  56     return FALSE;
  57 }
  58 
  59 /*!
  60  * \brief Get current state of an election
  61  *
  62  * \param[in] e  Election object
  63  *
  64  * \return Current state of \e
  65  */
  66 enum election_result
  67 election_state(const election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
  68 {
  69     return (e == NULL)? election_error : e->state;
  70 }
  71 
  72 /*!
  73  * \brief Create a new election object
  74  *
  75  * Every node that wishes to participate in an election must create an election
  76  * object. Typically, this should be done once, at start-up. A caller should
  77  * only create a single election object.
  78  *
  79  * \param[in] name       Label for election (for logging)
  80  * \param[in] uname      Local node's name
  81  * \param[in] period_ms  How long to wait for all peers to vote
  82  * \param[in] cb         Function to call if local node wins election
  83  *
  84  * \return Newly allocated election object on success, NULL on error
  85  * \note The caller is responsible for freeing the returned value using
  86  *       election_fini().
  87  */
  88 election_t *
  89 election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb)
     /* [previous][next][first][last][top][bottom][index][help] */
  90 {
  91     election_t *e = NULL;
  92 
  93     static guint count = 0;
  94 
  95     CRM_CHECK(uname != NULL, return NULL);
  96 
  97     e = calloc(1, sizeof(election_t));
  98     if (e == NULL) {
  99         crm_perror(LOG_CRIT, "Cannot create election");
 100         return NULL;
 101     }
 102 
 103     e->uname = strdup(uname);
 104     if (e->uname == NULL) {
 105         crm_perror(LOG_CRIT, "Cannot create election");
 106         free(e);
 107         return NULL;
 108     }
 109 
 110     e->name = name? crm_strdup_printf("election-%s", name)
 111                   : crm_strdup_printf("election-%u", count++);
 112     e->cb = cb;
 113     e->timeout = mainloop_timer_add(e->name, period_ms, FALSE,
 114                                     election_timer_cb, e);
 115     crm_trace("Created %s", e->name);
 116     return e;
 117 }
 118 
 119 /*!
 120  * \brief Disregard any previous vote by specified peer
 121  *
 122  * This discards any recorded vote from a specified peer. Election users should
 123  * call this whenever a voting peer becomes inactive.
 124  *
 125  * \param[in,out] e      Election object
 126  * \param[in]     uname  Name of peer to disregard
 127  */
 128 void
 129 election_remove(election_t *e, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
 130 {
 131     if ((e != NULL) && (uname != NULL) && (e->voted != NULL)) {
 132         crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname);
 133         g_hash_table_remove(e->voted, uname);
 134     }
 135 }
 136 
 137 /*!
 138  * \brief Stop election timer and disregard all votes
 139  *
 140  * \param[in,out] e  Election object
 141  */
 142 void
 143 election_reset(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
 144 {
 145     if (e != NULL) {
 146         crm_trace("Resetting election %s", e->name);
 147         mainloop_timer_stop(e->timeout);
 148         if (e->voted) {
 149             crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
 150             g_hash_table_destroy(e->voted);
 151             e->voted = NULL;
 152         }
 153     }
 154 }
 155 
 156 /*!
 157  * \brief Free an election object
 158  *
 159  * Free all memory associated with an election object, stopping its
 160  * election timer (if running).
 161  *
 162  * \param[in,out] e  Election object
 163  */
 164 void
 165 election_fini(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
 166 {
 167     if (e != NULL) {
 168         election_reset(e);
 169         crm_trace("Destroying %s", e->name);
 170         mainloop_timer_del(e->timeout);
 171         free(e->uname);
 172         free(e->name);
 173         free(e);
 174     }
 175 }
 176 
 177 static void
 178 election_timeout_start(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
 179 {
 180     if (e != NULL) {
 181         mainloop_timer_start(e->timeout);
 182     }
 183 }
 184 
 185 /*!
 186  * \brief Stop an election's timer, if running
 187  *
 188  * \param[in,out] e  Election object
 189  */
 190 void
 191 election_timeout_stop(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
 192 {
 193     if (e != NULL) {
 194         mainloop_timer_stop(e->timeout);
 195     }
 196 }
 197 
 198 /*!
 199  * \brief Change an election's timeout (restarting timer if running)
 200  *
 201  * \param[in,out] e       Election object
 202  * \param[in]     period  New timeout
 203  */
 204 void
 205 election_timeout_set_period(election_t *e, guint period)
     /* [previous][next][first][last][top][bottom][index][help] */
 206 {
 207     if (e != NULL) {
 208         mainloop_timer_set_period(e->timeout, period);
 209     } else {
 210         crm_err("No election defined");
 211     }
 212 }
 213 
 214 static int
 215 get_uptime(struct timeval *output)
     /* [previous][next][first][last][top][bottom][index][help] */
 216 {
 217     static time_t expires = 0;
 218     static struct rusage info;
 219 
 220     time_t tm_now = time(NULL);
 221 
 222     if (expires < tm_now) {
 223         int rc = 0;
 224 
 225         info.ru_utime.tv_sec = 0;
 226         info.ru_utime.tv_usec = 0;
 227         rc = getrusage(RUSAGE_SELF, &info);
 228 
 229         output->tv_sec = 0;
 230         output->tv_usec = 0;
 231 
 232         if (rc < 0) {
 233             crm_perror(LOG_ERR, "Could not calculate the current uptime");
 234             expires = 0;
 235             return -1;
 236         }
 237 
 238         crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
 239                   (long)info.ru_utime.tv_usec);
 240     }
 241 
 242     expires = tm_now + STORM_INTERVAL;  /* N seconds after the last _access_ */
 243     output->tv_sec = info.ru_utime.tv_sec;
 244     output->tv_usec = info.ru_utime.tv_usec;
 245 
 246     return 1;
 247 }
 248 
 249 static int
 250 compare_age(struct timeval your_age)
     /* [previous][next][first][last][top][bottom][index][help] */
 251 {
 252     struct timeval our_age;
 253 
 254     get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */
 255 
 256     if (our_age.tv_sec > your_age.tv_sec) {
 257         crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
 258         return 1;
 259     } else if (our_age.tv_sec < your_age.tv_sec) {
 260         crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
 261         return -1;
 262     } else if (our_age.tv_usec > your_age.tv_usec) {
 263         crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
 264                   (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
 265         return 1;
 266     } else if (our_age.tv_usec < your_age.tv_usec) {
 267         crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
 268                   (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
 269         return -1;
 270     }
 271 
 272     return 0;
 273 }
 274 
 275 /*!
 276  * \brief Start a new election by offering local node's candidacy
 277  *
 278  * Broadcast a "vote" election message containing the local node's ID,
 279  * (incremented) election counter, and uptime, and start the election timer.
 280  *
 281  * \param[in,out] e  Election object
 282  *
 283  * \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if
 284  *       all active peers do so, or if the election times out, the local node
 285  *       wins the election. (If we lose to any peer vote, we will stop the
 286  *       timer, so a timeout means we did not lose -- either some peer did not
 287  *       vote, or we did not call election_check() in time.)
 288  */
 289 void
 290 election_vote(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
 291 {
 292     struct timeval age;
 293     xmlNode *vote = NULL;
 294     crm_node_t *our_node;
 295 
 296     if (e == NULL) {
 297         crm_trace("Election vote requested, but no election available");
 298         return;
 299     }
 300 
 301     our_node = crm_get_peer(0, e->uname);
 302     if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) {
 303         crm_trace("Cannot vote in %s yet: local node not connected to cluster",
 304                   e->name);
 305         return;
 306     }
 307 
 308     election_reset(e);
 309     e->state = election_in_progress;
 310     vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
 311 
 312     e->count++;
 313     crm_xml_add(vote, F_CRM_ELECTION_OWNER, our_node->uuid);
 314     crm_xml_add_int(vote, F_CRM_ELECTION_ID, e->count);
 315 
 316     get_uptime(&age);
 317     crm_xml_add_timeval(vote, F_CRM_ELECTION_AGE_S, F_CRM_ELECTION_AGE_US, &age);
 318 
 319     send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
 320     free_xml(vote);
 321 
 322     crm_debug("Started %s round %d", e->name, e->count);
 323     election_timeout_start(e);
 324     return;
 325 }
 326 
 327 /*!
 328  * \brief Check whether local node has won an election
 329  *
 330  * If all known peers have sent no-vote messages, stop the election timer, set
 331  * the election state to won, and call any registered win callback.
 332  *
 333  * \param[in,out] e  Election object
 334  *
 335  * \return TRUE if local node has won, FALSE otherwise
 336  * \note If all known peers have sent no-vote messages, but the election owner
 337  *       does not call this function, the election will not be won (and the
 338  *       callback will not be called) until the election times out.
 339  * \note This should be called when election_count_vote() returns
 340  *       \c election_in_progress.
 341  */
 342 bool
 343 election_check(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
 344 {
 345     int voted_size = 0;
 346     int num_members = 0;
 347 
 348     if (e == NULL) {
 349         crm_trace("Election check requested, but no election available");
 350         return FALSE;
 351     }
 352     if (e->voted == NULL) {
 353         crm_trace("%s check requested, but no votes received yet", e->name);
 354         return FALSE;
 355     }
 356 
 357     voted_size = g_hash_table_size(e->voted);
 358     num_members = crm_active_peers();
 359 
 360     /* in the case of #voted > #members, it is better to
 361      *   wait for the timeout and give the cluster time to
 362      *   stabilize
 363      */
 364     if (voted_size >= num_members) {
 365         /* we won and everyone has voted */
 366         election_timeout_stop(e);
 367         if (voted_size > num_members) {
 368             GHashTableIter gIter;
 369             const crm_node_t *node;
 370             char *key = NULL;
 371 
 372             crm_warn("Received too many votes in %s", e->name);
 373             g_hash_table_iter_init(&gIter, crm_peer_cache);
 374             while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
 375                 if (crm_is_peer_active(node)) {
 376                     crm_warn("* expected vote: %s", node->uname);
 377                 }
 378             }
 379 
 380             g_hash_table_iter_init(&gIter, e->voted);
 381             while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
 382                 crm_warn("* actual vote: %s", key);
 383             }
 384 
 385         }
 386 
 387         crm_info("%s won by local node", e->name);
 388         election_complete(e);
 389         return TRUE;
 390 
 391     } else {
 392         crm_debug("%s still waiting on %d of %d votes",
 393                   e->name, num_members - voted_size, num_members);
 394     }
 395 
 396     return FALSE;
 397 }
 398 
 399 #define LOSS_DAMPEN 2           /* in seconds */
 400 
 401 struct vote {
 402     const char *op;
 403     const char *from;
 404     const char *version;
 405     const char *election_owner;
 406     int election_id;
 407     struct timeval age;
 408 };
 409 
 410 /*!
 411  * \brief Unpack an election message
 412  *
 413  * \param[in] e        Election object (for logging only)
 414  * \param[in] message  Election message XML
 415  * \param[out] vote    Parsed fields from message
 416  *
 417  * \return TRUE if election message and election are valid, FALSE otherwise
 418  * \note The parsed struct's pointer members are valid only for the lifetime of
 419  *       the message argument.
 420  */
 421 static bool
 422 parse_election_message(const election_t *e, const xmlNode *message,
     /* [previous][next][first][last][top][bottom][index][help] */
 423                        struct vote *vote)
 424 {
 425     CRM_CHECK(message && vote, return FALSE);
 426 
 427     vote->election_id = -1;
 428     vote->age.tv_sec = -1;
 429     vote->age.tv_usec = -1;
 430 
 431     vote->op = crm_element_value(message, F_CRM_TASK);
 432     vote->from = crm_element_value(message, F_CRM_HOST_FROM);
 433     vote->version = crm_element_value(message, F_CRM_VERSION);
 434     vote->election_owner = crm_element_value(message, F_CRM_ELECTION_OWNER);
 435 
 436     crm_element_value_int(message, F_CRM_ELECTION_ID, &(vote->election_id));
 437 
 438     if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
 439         || (vote->election_owner == NULL) || (vote->election_id < 0)) {
 440 
 441         crm_warn("Invalid %s message from %s in %s ",
 442                  (vote->op? vote->op : "election"),
 443                  (vote->from? vote->from : "unspecified node"),
 444                  (e? e->name : "election"));
 445         return FALSE;
 446     }
 447 
 448     // Op-specific validation
 449 
 450     if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
 451         // Only vote ops have uptime
 452         crm_element_value_timeval(message, F_CRM_ELECTION_AGE_S,
 453                                   F_CRM_ELECTION_AGE_US, &(vote->age));
 454         if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
 455             crm_warn("Cannot count %s %s from %s because it is missing uptime",
 456                      (e? e->name : "election"), vote->op, vote->from);
 457             return FALSE;
 458         }
 459 
 460     } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
 461         crm_info("Cannot process %s message from %s because %s is not a known election op",
 462                  (e? e->name : "election"), vote->from, vote->op);
 463         return FALSE;
 464     }
 465 
 466     // Election validation
 467 
 468     if (e == NULL) {
 469         crm_info("Cannot count %s from %s because no election available",
 470                  vote->op, vote->from);
 471         return FALSE;
 472     }
 473 
 474     /* If the membership cache is NULL, we REALLY shouldn't be voting --
 475      * the question is how we managed to get here.
 476      */
 477     if (crm_peer_cache == NULL) {
 478         crm_info("Cannot count %s %s from %s because no peer information available",
 479                  e->name, vote->op, vote->from);
 480         return FALSE;
 481     }
 482     return TRUE;
 483 }
 484 
 485 static void
 486 record_vote(election_t *e, struct vote *vote)
     /* [previous][next][first][last][top][bottom][index][help] */
 487 {
 488     char *voter_copy = NULL;
 489     char *vote_copy = NULL;
 490 
 491     CRM_ASSERT(e && vote && vote->from && vote->op);
 492     if (e->voted == NULL) {
 493         e->voted = pcmk__strkey_table(free, free);
 494     }
 495 
 496     voter_copy = strdup(vote->from);
 497     vote_copy = strdup(vote->op);
 498     CRM_ASSERT(voter_copy && vote_copy);
 499 
 500     g_hash_table_replace(e->voted, voter_copy, vote_copy);
 501 }
 502 
 503 static void
 504 send_no_vote(crm_node_t *peer, struct vote *vote)
     /* [previous][next][first][last][top][bottom][index][help] */
 505 {
 506     // @TODO probably shouldn't hardcode CRM_SYSTEM_CRMD and crm_msg_crmd
 507 
 508     xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from,
 509                                      CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
 510 
 511     crm_xml_add(novote, F_CRM_ELECTION_OWNER, vote->election_owner);
 512     crm_xml_add_int(novote, F_CRM_ELECTION_ID, vote->election_id);
 513 
 514     send_cluster_message(peer, crm_msg_crmd, novote, TRUE);
 515     free_xml(novote);
 516 }
 517 
 518 /*!
 519  * \brief Process an election message (vote or no-vote) from a peer
 520  *
 521  * \param[in,out] e        Election object
 522  * \param[in]     message  Election message XML from peer
 523  * \param[in]     can_win  Whether local node is eligible to win
 524  *
 525  * \return Election state after new vote is considered
 526  * \note If the peer message is a vote, and we prefer the peer to win, this will
 527  *       send a no-vote reply to the peer.
 528  * \note The situations "we lost to this vote" from "this is a late no-vote
 529  *       after we've already lost" both return election_lost. If a caller needs
 530  *       to distinguish them, it should save the current state before calling
 531  *       this function, and then compare the result.
 532  */
 533 enum election_result
 534 election_count_vote(election_t *e, const xmlNode *message, bool can_win)
     /* [previous][next][first][last][top][bottom][index][help] */
 535 {
 536     int log_level = LOG_INFO;
 537     gboolean done = FALSE;
 538     gboolean we_lose = FALSE;
 539     const char *reason = "unknown";
 540     bool we_are_owner = FALSE;
 541     crm_node_t *our_node = NULL, *your_node = NULL;
 542     time_t tm_now = time(NULL);
 543     struct vote vote;
 544 
 545     CRM_CHECK(message != NULL, return election_error);
 546     if (parse_election_message(e, message, &vote) == FALSE) {
 547         return election_error;
 548     }
 549 
 550     your_node = crm_get_peer(0, vote.from);
 551     our_node = crm_get_peer(0, e->uname);
 552     we_are_owner = (our_node != NULL)
 553                    && pcmk__str_eq(our_node->uuid, vote.election_owner,
 554                                    pcmk__str_none);
 555 
 556     if (!can_win) {
 557         reason = "Not eligible";
 558         we_lose = TRUE;
 559 
 560     } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
 561         reason = "We are not part of the cluster";
 562         log_level = LOG_ERR;
 563         we_lose = TRUE;
 564 
 565     } else if (we_are_owner && (vote.election_id != e->count)) {
 566         log_level = LOG_TRACE;
 567         reason = "Superseded";
 568         done = TRUE;
 569 
 570     } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
 571         /* Possibly we cached the message in the FSA queue at a point that it wasn't */
 572         reason = "Peer is not part of our cluster";
 573         log_level = LOG_WARNING;
 574         done = TRUE;
 575 
 576     } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
 577                || pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) {
 578         /* Receiving our own broadcast vote, or a no-vote from peer, is a vote
 579          * for us to win
 580          */
 581         if (!we_are_owner) {
 582             crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)",
 583                      e->name, vote.election_id, vote.op, vote.from,
 584                      vote.election_owner);
 585             return election_error;
 586         }
 587         if (e->state != election_in_progress) {
 588             // Should only happen if we already lost
 589             crm_debug("Not counting %s round %d %s from %s because no election in progress",
 590                       e->name, vote.election_id, vote.op, vote.from);
 591             return e->state;
 592         }
 593         record_vote(e, &vote);
 594         reason = "Recorded";
 595         done = TRUE;
 596 
 597     } else {
 598         // A peer vote requires a comparison to determine which node is better
 599         int age_result = compare_age(vote.age);
 600         int version_result = compare_version(vote.version, CRM_FEATURE_SET);
 601 
 602         if (version_result < 0) {
 603             reason = "Version";
 604             we_lose = TRUE;
 605 
 606         } else if (version_result > 0) {
 607             reason = "Version";
 608 
 609         } else if (age_result < 0) {
 610             reason = "Uptime";
 611             we_lose = TRUE;
 612 
 613         } else if (age_result > 0) {
 614             reason = "Uptime";
 615 
 616         } else if (strcasecmp(e->uname, vote.from) > 0) {
 617             reason = "Host name";
 618             we_lose = TRUE;
 619 
 620         } else {
 621             reason = "Host name";
 622         }
 623     }
 624 
 625     if (e->expires < tm_now) {
 626         e->election_wins = 0;
 627         e->expires = tm_now + STORM_INTERVAL;
 628 
 629     } else if (done == FALSE && we_lose == FALSE) {
 630         int peers = 1 + g_hash_table_size(crm_peer_cache);
 631 
 632         /* If every node has to vote down every other node, thats N*(N-1) total elections
 633          * Allow some leeway before _really_ complaining
 634          */
 635         e->election_wins++;
 636         if (e->election_wins > (peers * peers)) {
 637             crm_warn("%s election storm detected: %d wins in %d seconds",
 638                      e->name, e->election_wins, STORM_INTERVAL);
 639             e->election_wins = 0;
 640             e->expires = tm_now + STORM_INTERVAL;
 641             if (e->wrote_blackbox == FALSE) {
 642                 /* It's questionable whether a black box (from every node in the
 643                  * cluster) would be truly helpful in diagnosing an election
 644                  * storm. It's also highly doubtful a production environment
 645                  * would get multiple election storms from distinct causes, so
 646                  * saving one blackbox per process lifetime should be
 647                  * sufficient. Alternatives would be to save a timestamp of the
 648                  * last blackbox write instead of a boolean, and write a new one
 649                  * if some amount of time has passed; or to save a storm count,
 650                  * write a blackbox on every Nth occurrence.
 651                  */
 652                 crm_write_blackbox(0, NULL);
 653                 e->wrote_blackbox = TRUE;
 654             }
 655         }
 656     }
 657 
 658     if (done) {
 659         do_crm_log(log_level + 1,
 660                    "Processed %s round %d %s (current round %d) from %s (%s)",
 661                    e->name, vote.election_id, vote.op, e->count, vote.from,
 662                    reason);
 663         return e->state;
 664 
 665     } else if (we_lose == FALSE) {
 666         /* We track the time of the last election loss to implement an election
 667          * dampening period, reducing the likelihood of an election storm. If
 668          * this node has lost within the dampening period, don't start a new
 669          * election, even if we win against a peer's vote -- the peer we lost to
 670          * should win again.
 671          *
 672          * @TODO This has a problem case: if an election winner immediately
 673          * leaves the cluster, and a new election is immediately called, all
 674          * nodes could lose, with no new winner elected. The ideal solution
 675          * would be to tie the election structure with the peer caches, which
 676          * would allow us to clear the dampening when the previous winner
 677          * leaves (and would allow other improvements as well).
 678          */
 679         if ((e->last_election_loss == 0)
 680             || ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) {
 681 
 682             do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)",
 683                        e->name, vote.election_id, vote.election_owner, vote.op,
 684                        vote.from, reason);
 685 
 686             e->last_election_loss = 0;
 687             election_timeout_stop(e);
 688 
 689             /* Start a new election by voting down this, and other, peers */
 690             e->state = election_start;
 691             return e->state;
 692         } else {
 693             char *loss_time = ctime(&e->last_election_loss);
 694 
 695             if (loss_time) {
 696                 // Show only HH:MM:SS
 697                 loss_time += 11;
 698                 loss_time[8] = '\0';
 699             }
 700             crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s",
 701                      e->name, vote.election_id, vote.election_owner, vote.from,
 702                      LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
 703         }
 704     }
 705 
 706     e->last_election_loss = tm_now;
 707 
 708     do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)",
 709                e->name, vote.election_id, vote.election_owner, vote.op,
 710                vote.from, reason);
 711 
 712     election_reset(e);
 713     send_no_vote(your_node, &vote);
 714     e->state = election_lost;
 715     return e->state;
 716 }
 717 
 718 /*!
 719  * \brief Reset any election dampening currently in effect
 720  *
 721  * \param[in,out] e        Election object to clear
 722  */
 723 void
 724 election_clear_dampening(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
 725 {
 726     e->last_election_loss = 0;
 727 }

/* [previous][next][first][last][top][bottom][index][help] */