root/daemons/fenced/pacemaker-fenced.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. st_ipc_accept
  2. st_ipc_dispatch
  3. st_ipc_closed
  4. st_ipc_destroy
  5. stonith_peer_callback
  6. stonith_peer_ais_callback
  7. stonith_peer_cs_destroy
  8. do_local_reply
  9. get_stonith_flag
  10. stonith_notify_client
  11. do_stonith_async_timeout_update
  12. do_stonith_notify
  13. do_stonith_notify_config
  14. do_stonith_notify_device
  15. do_stonith_notify_level
  16. topology_remove_helper
  17. remove_cib_device
  18. handle_topology_change
  19. remove_fencing_topology
  20. register_fencing_topology
  21. fencing_topology_init
  22. our_node_allowed_for
  23. watchdog_device_update
  24. update_stonith_watchdog_timeout_ms
  25. cib_device_update
  26. cib_devices_update
  27. update_cib_stonith_devices_v2
  28. update_cib_stonith_devices_v1
  29. update_cib_stonith_devices
  30. node_has_attr
  31. node_does_watchdog_fencing
  32. update_fencing_topology
  33. update_cib_cache_cb
  34. init_cib_cache_cb
  35. stonith_shutdown
  36. cib_connection_destroy
  37. stonith_cleanup
  38. setup_cib
  39. st_peer_update_callback
  40. main

   1 /*
   2  * Copyright 2009-2021 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/param.h>
  13 #include <stdio.h>
  14 #include <sys/types.h>
  15 #include <sys/stat.h>
  16 #include <unistd.h>
  17 #include <sys/utsname.h>
  18 
  19 #include <stdlib.h>
  20 #include <errno.h>
  21 #include <fcntl.h>
  22 #include <inttypes.h>  // PRIu32, PRIx32
  23 
  24 #include <crm/crm.h>
  25 #include <crm/msg_xml.h>
  26 #include <crm/common/ipc.h>
  27 #include <crm/common/ipc_internal.h>
  28 #include <crm/cluster/internal.h>
  29 
  30 #include <crm/stonith-ng.h>
  31 #include <crm/fencing/internal.h>
  32 #include <crm/common/xml.h>
  33 #include <crm/common/xml_internal.h>
  34 
  35 #include <crm/common/mainloop.h>
  36 
  37 #include <crm/cib/internal.h>
  38 #include <crm/pengine/status.h>
  39 #include <pacemaker-internal.h>
  40 
  41 #include <pacemaker-fenced.h>
  42 
  43 char *stonith_our_uname = NULL;
  44 long stonith_watchdog_timeout_ms = 0;
  45 GList *stonith_watchdog_targets = NULL;
  46 
  47 static GMainLoop *mainloop = NULL;
  48 
  49 gboolean stand_alone = FALSE;
  50 static gboolean no_cib_connect = FALSE;
  51 static gboolean stonith_shutdown_flag = FALSE;
  52 
  53 static qb_ipcs_service_t *ipcs = NULL;
  54 static xmlNode *local_cib = NULL;
  55 static pe_working_set_t *fenced_data_set = NULL;
  56 
  57 static cib_t *cib_api = NULL;
  58 
  59 static pcmk__output_t *out = NULL;
  60 
  61 pcmk__supported_format_t formats[] = {
  62     PCMK__SUPPORTED_FORMAT_LOG,
  63     PCMK__SUPPORTED_FORMAT_NONE,
  64     PCMK__SUPPORTED_FORMAT_TEXT,
  65     { NULL, NULL, NULL }
  66 };
  67 
  68 static void stonith_shutdown(int nsig);
  69 static void stonith_cleanup(void);
  70 
  71 static int32_t
  72 st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
     /* [previous][next][first][last][top][bottom][index][help] */
  73 {
  74     if (stonith_shutdown_flag) {
  75         crm_info("Ignoring new client [%d] during shutdown",
  76                  pcmk__client_pid(c));
  77         return -EPERM;
  78     }
  79 
  80     if (pcmk__new_client(c, uid, gid) == NULL) {
  81         return -EIO;
  82     }
  83     return 0;
  84 }
  85 
  86 /* Exit code means? */
  87 static int32_t
  88 st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
     /* [previous][next][first][last][top][bottom][index][help] */
  89 {
  90     uint32_t id = 0;
  91     uint32_t flags = 0;
  92     int call_options = 0;
  93     xmlNode *request = NULL;
  94     pcmk__client_t *c = pcmk__find_client(qbc);
  95     const char *op = NULL;
  96 
  97     if (c == NULL) {
  98         crm_info("Invalid client: %p", qbc);
  99         return 0;
 100     }
 101 
 102     request = pcmk__client_data2xml(c, data, &id, &flags);
 103     if (request == NULL) {
 104         pcmk__ipc_send_ack(c, id, flags, "nack", CRM_EX_PROTOCOL);
 105         return 0;
 106     }
 107 
 108 
 109     op = crm_element_value(request, F_CRM_TASK);
 110     if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
 111         crm_xml_add(request, F_TYPE, T_STONITH_NG);
 112         crm_xml_add(request, F_STONITH_OPERATION, op);
 113         crm_xml_add(request, F_STONITH_CLIENTID, c->id);
 114         crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c));
 115         crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
 116 
 117         send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE);
 118         free_xml(request);
 119         return 0;
 120     }
 121 
 122     if (c->name == NULL) {
 123         const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);
 124 
 125         if (value == NULL) {
 126             value = "unknown";
 127         }
 128         c->name = crm_strdup_printf("%s.%u", value, c->pid);
 129     }
 130 
 131     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
 132     crm_trace("Flags 0x%08" PRIx32 "/0x%08x for command %" PRIu32
 133               " from client %s", flags, call_options, id, pcmk__client_name(c));
 134 
 135     if (pcmk_is_set(call_options, st_opt_sync_call)) {
 136         CRM_ASSERT(flags & crm_ipc_client_response);
 137         CRM_LOG_ASSERT(c->request_id == 0);     /* This means the client has two synchronous events in-flight */
 138         c->request_id = id;     /* Reply only to the last one */
 139     }
 140 
 141     crm_xml_add(request, F_STONITH_CLIENTID, c->id);
 142     crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c));
 143     crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
 144 
 145     stonith_command(c, id, flags, request, NULL);
 146 
 147     free_xml(request);
 148     return 0;
 149 }
 150 
 151 /* Error code means? */
 152 static int32_t
 153 st_ipc_closed(qb_ipcs_connection_t * c)
     /* [previous][next][first][last][top][bottom][index][help] */
 154 {
 155     pcmk__client_t *client = pcmk__find_client(c);
 156 
 157     if (client == NULL) {
 158         return 0;
 159     }
 160 
 161     crm_trace("Connection %p closed", c);
 162     pcmk__free_client(client);
 163 
 164     /* 0 means: yes, go ahead and destroy the connection */
 165     return 0;
 166 }
 167 
 168 static void
 169 st_ipc_destroy(qb_ipcs_connection_t * c)
     /* [previous][next][first][last][top][bottom][index][help] */
 170 {
 171     crm_trace("Connection %p destroyed", c);
 172     st_ipc_closed(c);
 173 }
 174 
 175 static void
 176 stonith_peer_callback(xmlNode * msg, void *private_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 177 {
 178     const char *remote_peer = crm_element_value(msg, F_ORIG);
 179     const char *op = crm_element_value(msg, F_STONITH_OPERATION);
 180 
 181     if (pcmk__str_eq(op, "poke", pcmk__str_none)) {
 182         return;
 183     }
 184 
 185     crm_log_xml_trace(msg, "Peer[inbound]");
 186     stonith_command(NULL, 0, 0, msg, remote_peer);
 187 }
 188 
 189 #if SUPPORT_COROSYNC
 190 static void
 191 stonith_peer_ais_callback(cpg_handle_t handle,
     /* [previous][next][first][last][top][bottom][index][help] */
 192                           const struct cpg_name *groupName,
 193                           uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
 194 {
 195     uint32_t kind = 0;
 196     xmlNode *xml = NULL;
 197     const char *from = NULL;
 198     char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
 199 
 200     if(data == NULL) {
 201         return;
 202     }
 203     if (kind == crm_class_cluster) {
 204         xml = string2xml(data);
 205         if (xml == NULL) {
 206             crm_err("Invalid XML: '%.120s'", data);
 207             free(data);
 208             return;
 209         }
 210         crm_xml_add(xml, F_ORIG, from);
 211         /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
 212         stonith_peer_callback(xml, NULL);
 213     }
 214 
 215     free_xml(xml);
 216     free(data);
 217     return;
 218 }
 219 
 220 static void
 221 stonith_peer_cs_destroy(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 222 {
 223     crm_crit("Lost connection to cluster layer, shutting down");
 224     stonith_shutdown(0);
 225 }
 226 #endif
 227 
 228 void
 229 do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer)
     /* [previous][next][first][last][top][bottom][index][help] */
 230 {
 231     /* send callback to originating child */
 232     pcmk__client_t *client_obj = NULL;
 233     int local_rc = pcmk_rc_ok;
 234 
 235     crm_trace("Sending response");
 236     client_obj = pcmk__find_client_by_id(client_id);
 237 
 238     crm_trace("Sending callback to request originator");
 239     if (client_obj == NULL) {
 240         local_rc = EPROTO;
 241         crm_trace("No client to sent the response to.  F_STONITH_CLIENTID not set.");
 242 
 243     } else {
 244         int rid = 0;
 245 
 246         if (sync_reply) {
 247             CRM_LOG_ASSERT(client_obj->request_id);
 248 
 249             rid = client_obj->request_id;
 250             client_obj->request_id = 0;
 251 
 252             crm_trace("Sending response %d to client %s%s",
 253                       rid, pcmk__client_name(client_obj),
 254                       (from_peer? " (originator of delegated request)" : ""));
 255 
 256         } else {
 257             crm_trace("Sending an event to client %s%s",
 258                       pcmk__client_name(client_obj),
 259                       (from_peer? " (originator of delegated request)" : ""));
 260         }
 261 
 262         local_rc = pcmk__ipc_send_xml(client_obj, rid, notify_src,
 263                                       (sync_reply? crm_ipc_flags_none
 264                                        : crm_ipc_server_event));
 265     }
 266 
 267     if ((local_rc != pcmk_rc_ok) && (client_obj != NULL)) {
 268         crm_warn("%s reply to client %s failed: %s",
 269                  (sync_reply? "Synchronous" : "Asynchronous"),
 270                  pcmk__client_name(client_obj), pcmk_rc_str(local_rc));
 271     }
 272 }
 273 
 274 uint64_t
 275 get_stonith_flag(const char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
 276 {
 277     if (pcmk__str_eq(name, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) {
 278         return st_callback_notify_fence;
 279 
 280     } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) {
 281         return st_callback_device_add;
 282 
 283     } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) {
 284         return st_callback_device_del;
 285 
 286     } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY, pcmk__str_casei)) {
 287         return st_callback_notify_history;
 288 
 289     } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk__str_casei)) {
 290         return st_callback_notify_history_synced;
 291 
 292     }
 293     return st_callback_unknown;
 294 }
 295 
 296 static void
 297 stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 298 {
 299 
 300     xmlNode *update_msg = user_data;
 301     pcmk__client_t *client = value;
 302     const char *type = NULL;
 303 
 304     CRM_CHECK(client != NULL, return);
 305     CRM_CHECK(update_msg != NULL, return);
 306 
 307     type = crm_element_value(update_msg, F_SUBTYPE);
 308     CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
 309 
 310     if (client->ipcs == NULL) {
 311         crm_trace("Skipping client with NULL channel");
 312         return;
 313     }
 314 
 315     if (pcmk_is_set(client->flags, get_stonith_flag(type))) {
 316         int rc = pcmk__ipc_send_xml(client, 0, update_msg,
 317                                     crm_ipc_server_event|crm_ipc_server_error);
 318 
 319         if (rc != pcmk_rc_ok) {
 320             crm_warn("%s notification of client %s failed: %s "
 321                      CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client),
 322                      pcmk_rc_str(rc), client->id, rc);
 323         } else {
 324             crm_trace("Sent %s notification to client %s",
 325                       type, pcmk__client_name(client));
 326         }
 327     }
 328 }
 329 
 330 void
 331 do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
 332 {
 333     pcmk__client_t *client = NULL;
 334     xmlNode *notify_data = NULL;
 335 
 336     if (!timeout || !call_id || !client_id) {
 337         return;
 338     }
 339 
 340     client = pcmk__find_client_by_id(client_id);
 341     if (!client) {
 342         return;
 343     }
 344 
 345     notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE);
 346     crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE);
 347     crm_xml_add(notify_data, F_STONITH_CALLID, call_id);
 348     crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout);
 349 
 350     crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
 351 
 352     if (client) {
 353         pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event);
 354     }
 355 
 356     free_xml(notify_data);
 357 }
 358 
 359 void
 360 do_stonith_notify(int options, const char *type, int result, xmlNode * data)
     /* [previous][next][first][last][top][bottom][index][help] */
 361 {
 362     /* TODO: Standardize the contents of data */
 363     xmlNode *update_msg = create_xml_node(NULL, "notify");
 364 
 365     CRM_CHECK(type != NULL,;);
 366 
 367     crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
 368     crm_xml_add(update_msg, F_SUBTYPE, type);
 369     crm_xml_add(update_msg, F_STONITH_OPERATION, type);
 370     crm_xml_add_int(update_msg, F_STONITH_RC, result);
 371 
 372     if (data != NULL) {
 373         add_message_xml(update_msg, F_STONITH_CALLDATA, data);
 374     }
 375 
 376     crm_trace("Notifying clients");
 377     pcmk__foreach_ipc_client(stonith_notify_client, update_msg);
 378     free_xml(update_msg);
 379     crm_trace("Notify complete");
 380 }
 381 
 382 static void
 383 do_stonith_notify_config(int options, const char *op, int rc,
     /* [previous][next][first][last][top][bottom][index][help] */
 384                          const char *desc, int active)
 385 {
 386     xmlNode *notify_data = create_xml_node(NULL, op);
 387 
 388     CRM_CHECK(notify_data != NULL, return);
 389 
 390     crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
 391     crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
 392 
 393     do_stonith_notify(options, op, rc, notify_data);
 394     free_xml(notify_data);
 395 }
 396 
 397 void
 398 do_stonith_notify_device(int options, const char *op, int rc, const char *desc)
     /* [previous][next][first][last][top][bottom][index][help] */
 399 {
 400     do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list));
 401 }
 402 
 403 void
 404 do_stonith_notify_level(int options, const char *op, int rc, const char *desc)
     /* [previous][next][first][last][top][bottom][index][help] */
 405 {
 406     do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology));
 407 }
 408 
 409 static void
 410 topology_remove_helper(const char *node, int level)
     /* [previous][next][first][last][top][bottom][index][help] */
 411 {
 412     int rc;
 413     char *desc = NULL;
 414     xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
 415 
 416     crm_xml_add(data, F_STONITH_ORIGIN, __func__);
 417     crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
 418     crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
 419 
 420     rc = stonith_level_remove(data, &desc);
 421     do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc);
 422 
 423     free_xml(data);
 424     free(desc);
 425 }
 426 
 427 static void
 428 remove_cib_device(xmlXPathObjectPtr xpathObj)
     /* [previous][next][first][last][top][bottom][index][help] */
 429 {
 430     int max = numXpathResults(xpathObj), lpc = 0;
 431 
 432     for (lpc = 0; lpc < max; lpc++) {
 433         const char *rsc_id = NULL;
 434         const char *standard = NULL;
 435         xmlNode *match = getXpathResult(xpathObj, lpc);
 436 
 437         CRM_LOG_ASSERT(match != NULL);
 438         if(match != NULL) {
 439             standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
 440         }
 441 
 442         if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
 443             continue;
 444         }
 445 
 446         rsc_id = crm_element_value(match, XML_ATTR_ID);
 447 
 448         stonith_device_remove(rsc_id, TRUE);
 449     }
 450 }
 451 
 452 static void
 453 handle_topology_change(xmlNode *match, bool remove) 
     /* [previous][next][first][last][top][bottom][index][help] */
 454 {
 455     int rc;
 456     char *desc = NULL;
 457 
 458     CRM_CHECK(match != NULL, return);
 459     crm_trace("Updating %s", ID(match));
 460 
 461     if(remove) {
 462         int index = 0;
 463         char *key = stonith_level_key(match, -1);
 464 
 465         crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
 466         topology_remove_helper(key, index);
 467         free(key);
 468     }
 469 
 470     rc = stonith_level_register(match, &desc);
 471     do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc);
 472 
 473     free(desc);
 474 }
 475 
 476 static void
 477 remove_fencing_topology(xmlXPathObjectPtr xpathObj)
     /* [previous][next][first][last][top][bottom][index][help] */
 478 {
 479     int max = numXpathResults(xpathObj), lpc = 0;
 480 
 481     for (lpc = 0; lpc < max; lpc++) {
 482         xmlNode *match = getXpathResult(xpathObj, lpc);
 483 
 484         CRM_LOG_ASSERT(match != NULL);
 485         if (match && crm_element_value(match, XML_DIFF_MARKER)) {
 486             /* Deletion */
 487             int index = 0;
 488             char *target = stonith_level_key(match, -1);
 489 
 490             crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
 491             if (target == NULL) {
 492                 crm_err("Invalid fencing target in element %s", ID(match));
 493 
 494             } else if (index <= 0) {
 495                 crm_err("Invalid level for %s in element %s", target, ID(match));
 496 
 497             } else {
 498                 topology_remove_helper(target, index);
 499             }
 500             /* } else { Deal with modifications during the 'addition' stage */
 501         }
 502     }
 503 }
 504 
 505 static void
 506 register_fencing_topology(xmlXPathObjectPtr xpathObj)
     /* [previous][next][first][last][top][bottom][index][help] */
 507 {
 508     int max = numXpathResults(xpathObj), lpc = 0;
 509 
 510     for (lpc = 0; lpc < max; lpc++) {
 511         xmlNode *match = getXpathResult(xpathObj, lpc);
 512 
 513         handle_topology_change(match, TRUE);
 514     }
 515 }
 516 
 517 /* Fencing
 518 <diff crm_feature_set="3.0.6">
 519   <diff-removed>
 520     <fencing-topology>
 521       <fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="removed:top"/>
 522       <fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power" __crm_diff_marker__="removed:top"/>
 523       <fencing-level devices="disk,network" id="f-p2.1"/>
 524     </fencing-topology>
 525   </diff-removed>
 526   <diff-added>
 527     <fencing-topology>
 528       <fencing-level id="f-p.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="added:top"/>
 529       <fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,something"/>
 530       <fencing-level id="f-p3.1" target="pcmk-2" index="2" devices="power" __crm_diff_marker__="added:top"/>
 531     </fencing-topology>
 532   </diff-added>
 533 </diff>
 534 */
 535 
 536 static void
 537 fencing_topology_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 538 {
 539     xmlXPathObjectPtr xpathObj = NULL;
 540     const char *xpath = "//" XML_TAG_FENCING_LEVEL;
 541 
 542     crm_trace("Full topology refresh");
 543     free_topology_list();
 544     init_topology_list();
 545 
 546     /* Grab everything */
 547     xpathObj = xpath_search(local_cib, xpath);
 548     register_fencing_topology(xpathObj);
 549 
 550     freeXpathObject(xpathObj);
 551 }
 552 
 553 #define rsc_name(x) x->clone_name?x->clone_name:x->id
 554 
 555 /*!
 556  * \internal
 557  * \brief Check whether our uname is in a resource's allowed node list
 558  *
 559  * \param[in] rsc  Resource to check
 560  *
 561  * \return Pointer to node object if found, NULL otherwise
 562  */
 563 static pe_node_t *
 564 our_node_allowed_for(pe_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 565 {
 566     GHashTableIter iter;
 567     pe_node_t *node = NULL;
 568 
 569     if (rsc && stonith_our_uname) {
 570         g_hash_table_iter_init(&iter, rsc->allowed_nodes);
 571         while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
 572             if (node && strcmp(node->details->uname, stonith_our_uname) == 0) {
 573                 break;
 574             }
 575             node = NULL;
 576         }
 577     }
 578     return node;
 579 }
 580 
 581 static void
 582 watchdog_device_update(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 583 {
 584     if (stonith_watchdog_timeout_ms > 0) {
 585         if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) &&
 586             !stonith_watchdog_targets) {
 587             /* getting here watchdog-fencing enabled, no device there yet
 588                and reason isn't stonith_watchdog_targets preventing that
 589              */
 590             int rc;
 591             xmlNode *xml;
 592 
 593             xml = create_device_registration_xml(
 594                     STONITH_WATCHDOG_ID,
 595                     st_namespace_internal,
 596                     STONITH_WATCHDOG_AGENT,
 597                     NULL, /* stonith_device_register will add our
 598                              own name as PCMK_STONITH_HOST_LIST param
 599                              so we can skip that here
 600                            */
 601                     NULL);
 602             rc = stonith_device_register(xml, NULL, TRUE);
 603             free_xml(xml);
 604             if (rc != pcmk_ok) {
 605                 crm_crit("Cannot register watchdog pseudo fence agent");
 606                 crm_exit(CRM_EX_FATAL);
 607             }
 608         }
 609 
 610     } else {
 611         /* be silent if no device - todo parameter to stonith_device_remove */
 612         if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID)) {
 613             stonith_device_remove(STONITH_WATCHDOG_ID, TRUE);
 614         }
 615     }
 616 }
 617 
 618 static void
 619 update_stonith_watchdog_timeout_ms(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
 620 {
 621     xmlNode *stonith_enabled_xml = NULL;
 622     const char *stonith_enabled_s = NULL;
 623     long timeout_ms = 0;
 624 
 625     stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']",
 626                                            cib, LOG_NEVER);
 627     if (stonith_enabled_xml) {
 628         stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
 629     }
 630 
 631     if (stonith_enabled_s == NULL || crm_is_true(stonith_enabled_s)) {
 632         xmlNode *stonith_watchdog_xml = NULL;
 633         const char *value = NULL;
 634 
 635         stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']",
 636                                                 cib, LOG_NEVER);
 637         if (stonith_watchdog_xml) {
 638             value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE);
 639         }
 640         if (value) {
 641             timeout_ms = crm_get_msec(value);
 642         }
 643 
 644         if (timeout_ms < 0) {
 645             timeout_ms = pcmk__auto_watchdog_timeout();
 646         }
 647     }
 648 
 649     stonith_watchdog_timeout_ms = timeout_ms;
 650 }
 651 
 652 /*!
 653  * \internal
 654  * \brief If a resource or any of its children are STONITH devices, update their
 655  *        definitions given a cluster working set.
 656  *
 657  * \param[in] rsc       Resource to check
 658  * \param[in] data_set  Cluster working set with device information
 659  */
 660 static void cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 661 {
 662     pe_node_t *node = NULL;
 663     const char *value = NULL;
 664     const char *rclass = NULL;
 665     pe_node_t *parent = NULL;
 666 
 667     /* If this is a complex resource, check children rather than this resource itself. */
 668     if(rsc->children) {
 669         GList *gIter = NULL;
 670         for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
 671             cib_device_update(gIter->data, data_set);
 672             if(pe_rsc_is_clone(rsc)) {
 673                 crm_trace("Only processing one copy of the clone %s", rsc->id);
 674                 break;
 675             }
 676         }
 677         return;
 678     }
 679 
 680     /* We only care about STONITH resources. */
 681     rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
 682     if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
 683         return;
 684     }
 685 
 686     /* If this STONITH resource is disabled, remove it. */
 687     if (pe__resource_is_disabled(rsc)) {
 688         crm_info("Device %s has been disabled", rsc->id);
 689         return;
 690     }
 691 
 692     /* if watchdog-fencing is disabled handle any watchdog-fence
 693        resource as if it was disabled
 694      */
 695     if ((stonith_watchdog_timeout_ms <= 0) &&
 696         pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
 697         crm_info("Watchdog-fencing disabled thus handling "
 698                  "device %s as disabled", rsc->id);
 699         return;
 700     }
 701 
 702     /* Check whether our node is allowed for this resource (and its parent if in a group) */
 703     node = our_node_allowed_for(rsc);
 704     if (rsc->parent && (rsc->parent->variant == pe_group)) {
 705         parent = our_node_allowed_for(rsc->parent);
 706     }
 707 
 708     if(node == NULL) {
 709         /* Our node is disallowed, so remove the device */
 710         GHashTableIter iter;
 711 
 712         crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname);
 713         g_hash_table_iter_init(&iter, rsc->allowed_nodes);
 714         while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
 715             crm_trace("Available: %s = %d", node->details->uname, node->weight);
 716         }
 717 
 718         return;
 719 
 720     } else if(node->weight < 0 || (parent && parent->weight < 0)) {
 721         /* Our node (or its group) is disallowed by score, so remove the device */
 722         char *score = score2char((node->weight < 0) ? node->weight : parent->weight);
 723 
 724         crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score);
 725         free(score);
 726 
 727         return;
 728 
 729     } else {
 730         /* Our node is allowed, so update the device information */
 731         int rc;
 732         xmlNode *data;
 733         GHashTable *rsc_params = NULL;
 734         GHashTableIter gIter;
 735         stonith_key_value_t *params = NULL;
 736 
 737         const char *name = NULL;
 738         const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE);
 739         const char *rsc_provides = NULL;
 740 
 741         crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight);
 742         rsc_params = pe_rsc_params(rsc, node, data_set);
 743         get_meta_attributes(rsc->meta, rsc, node, data_set);
 744 
 745         rsc_provides = g_hash_table_lookup(rsc->meta, PCMK_STONITH_PROVIDES);
 746 
 747         g_hash_table_iter_init(&gIter, rsc_params);
 748         while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) {
 749             if (!name || !value) {
 750                 continue;
 751             }
 752             params = stonith_key_value_add(params, name, value);
 753             crm_trace(" %s=%s", name, value);
 754         }
 755 
 756         data = create_device_registration_xml(rsc_name(rsc), st_namespace_any,
 757                                               agent, params, rsc_provides);
 758         stonith_key_value_freeall(params, 1, 1);
 759         rc = stonith_device_register(data, NULL, TRUE);
 760         CRM_ASSERT(rc == pcmk_ok);
 761         free_xml(data);
 762     }
 763 }
 764 
 765 /*!
 766  * \internal
 767  * \brief Update all STONITH device definitions based on current CIB
 768  */
 769 static void
 770 cib_devices_update(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 771 {
 772     GHashTableIter iter;
 773     stonith_device_t *device = NULL;
 774 
 775     crm_info("Updating devices to version %s.%s.%s",
 776              crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN),
 777              crm_element_value(local_cib, XML_ATTR_GENERATION),
 778              crm_element_value(local_cib, XML_ATTR_NUMUPDATES));
 779 
 780     CRM_ASSERT(fenced_data_set != NULL);
 781     fenced_data_set->input = local_cib;
 782     fenced_data_set->now = crm_time_new(NULL);
 783     fenced_data_set->localhost = stonith_our_uname;
 784     pe__set_working_set_flags(fenced_data_set, pe_flag_quick_location);
 785 
 786     cluster_status(fenced_data_set);
 787     pcmk__schedule_actions(fenced_data_set, NULL, NULL);
 788 
 789     g_hash_table_iter_init(&iter, device_list);
 790     while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) {
 791         if (device->cib_registered) {
 792             device->dirty = TRUE;
 793         }
 794     }
 795 
 796     /* have list repopulated if cib has a watchdog-fencing-resource
 797        TODO: keep a cached list for queries happening while we are refreshing
 798      */
 799     g_list_free_full(stonith_watchdog_targets, free);
 800     stonith_watchdog_targets = NULL;
 801     g_list_foreach(fenced_data_set->resources, (GFunc) cib_device_update, fenced_data_set);
 802 
 803     g_hash_table_iter_init(&iter, device_list);
 804     while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) {
 805         if (device->dirty) {
 806             g_hash_table_iter_remove(&iter);
 807         }
 808     }
 809 
 810     fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it
 811     pe_reset_working_set(fenced_data_set);
 812 }
 813 
 814 static void
 815 update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 816 {
 817     xmlNode *change = NULL;
 818     char *reason = NULL;
 819     bool needs_update = FALSE;
 820     xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
 821 
 822     for (change = pcmk__xml_first_child(patchset); change != NULL;
 823          change = pcmk__xml_next(change)) {
 824         const char *op = crm_element_value(change, XML_DIFF_OP);
 825         const char *xpath = crm_element_value(change, XML_DIFF_PATH);
 826         const char *shortpath = NULL;
 827 
 828         if ((op == NULL) ||
 829             (strcmp(op, "move") == 0) ||
 830             strstr(xpath, "/"XML_CIB_TAG_STATUS)) {
 831             continue;
 832         } else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) {
 833             const char *rsc_id = NULL;
 834             char *search = NULL;
 835             char *mutable = NULL;
 836 
 837             if (strstr(xpath, XML_TAG_ATTR_SETS) ||
 838                 strstr(xpath, XML_TAG_META_SETS)) {
 839                 needs_update = TRUE;
 840                 reason = strdup("(meta) attribute deleted from resource");
 841                 break;
 842             } 
 843             mutable = strdup(xpath);
 844             rsc_id = strstr(mutable, "primitive[@id=\'");
 845             if (rsc_id != NULL) {
 846                 rsc_id += strlen("primitive[@id=\'");
 847                 search = strchr(rsc_id, '\'');
 848             }
 849             if (search != NULL) {
 850                 *search = 0;
 851                 stonith_device_remove(rsc_id, TRUE);
 852                 /* watchdog_device_update called afterwards
 853                    to fall back to implicit definition if needed */
 854             } else {
 855                 crm_warn("Ignoring malformed CIB update (resource deletion)");
 856             }
 857             free(mutable);
 858 
 859         } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) ||
 860                    strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) ||
 861                    strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) {
 862             shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
 863             reason = crm_strdup_printf("%s %s", op, shortpath+1);
 864             needs_update = TRUE;
 865             break;
 866         }
 867     }
 868 
 869     if(needs_update) {
 870         crm_info("Updating device list from CIB: %s", reason);
 871         cib_devices_update();
 872     } else {
 873         crm_trace("No updates for device list found in CIB");
 874     }
 875     free(reason);
 876 }
 877 
 878 
 879 static void
 880 update_cib_stonith_devices_v1(const char *event, xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 881 {
 882     const char *reason = "none";
 883     gboolean needs_update = FALSE;
 884     xmlXPathObjectPtr xpath_obj = NULL;
 885 
 886     /* process new constraints */
 887     xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION);
 888     if (numXpathResults(xpath_obj) > 0) {
 889         int max = numXpathResults(xpath_obj), lpc = 0;
 890 
 891         /* Safest and simplest to always recompute */
 892         needs_update = TRUE;
 893         reason = "new location constraint";
 894 
 895         for (lpc = 0; lpc < max; lpc++) {
 896             xmlNode *match = getXpathResult(xpath_obj, lpc);
 897 
 898             crm_log_xml_trace(match, "new constraint");
 899         }
 900     }
 901     freeXpathObject(xpath_obj);
 902 
 903     /* process deletions */
 904     xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE);
 905     if (numXpathResults(xpath_obj) > 0) {
 906         remove_cib_device(xpath_obj);
 907     }
 908     freeXpathObject(xpath_obj);
 909 
 910     /* process additions */
 911     xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE);
 912     if (numXpathResults(xpath_obj) > 0) {
 913         int max = numXpathResults(xpath_obj), lpc = 0;
 914 
 915         for (lpc = 0; lpc < max; lpc++) {
 916             const char *rsc_id = NULL;
 917             const char *standard = NULL;
 918             xmlNode *match = getXpathResult(xpath_obj, lpc);
 919 
 920             rsc_id = crm_element_value(match, XML_ATTR_ID);
 921             standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
 922 
 923             if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
 924                 continue;
 925             }
 926 
 927             crm_trace("Fencing resource %s was added or modified", rsc_id);
 928             reason = "new resource";
 929             needs_update = TRUE;
 930         }
 931     }
 932     freeXpathObject(xpath_obj);
 933 
 934     if(needs_update) {
 935         crm_info("Updating device list from CIB: %s", reason);
 936         cib_devices_update();
 937     }
 938 }
 939 
 940 static void
 941 update_cib_stonith_devices(const char *event, xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 942 {
 943     int format = 1;
 944     xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
 945 
 946     CRM_ASSERT(patchset);
 947     crm_element_value_int(patchset, "format", &format);
 948     switch(format) {
 949         case 1:
 950             update_cib_stonith_devices_v1(event, msg);
 951             break;
 952         case 2:
 953             update_cib_stonith_devices_v2(event, msg);
 954             break;
 955         default:
 956             crm_warn("Unknown patch format: %d", format);
 957     }
 958 }
 959 
 960 /* Needs to hold node name + attribute name + attribute value + 75 */
 961 #define XPATH_MAX 512
 962 
 963 /*!
 964  * \internal
 965  * \brief Check whether a node has a specific attribute name/value
 966  *
 967  * \param[in] node    Name of node to check
 968  * \param[in] name    Name of an attribute to look for
 969  * \param[in] value   The value the named attribute needs to be set to in order to be considered a match
 970  *
 971  * \return TRUE if the locally cached CIB has the specified node attribute
 972  */
 973 gboolean
 974 node_has_attr(const char *node, const char *name, const char *value)
     /* [previous][next][first][last][top][bottom][index][help] */
 975 {
 976     char xpath[XPATH_MAX];
 977     xmlNode *match;
 978     int n;
 979 
 980     CRM_CHECK(local_cib != NULL, return FALSE);
 981 
 982     /* Search for the node's attributes in the CIB. While the schema allows
 983      * multiple sets of instance attributes, and allows instance attributes to
 984      * use id-ref to reference values elsewhere, that is intended for resources,
 985      * so we ignore that here.
 986      */
 987     n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES
 988                  "/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS
 989                  "/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']",
 990                  node, name, value);
 991     match = get_xpath_object(xpath, local_cib, LOG_NEVER);
 992 
 993     CRM_CHECK(n < XPATH_MAX, return FALSE);
 994     return (match != NULL);
 995 }
 996 
 997 /*!
 998  * \internal
 999  * \brief Check whether a node does watchdog-fencing
1000  *
1001  * \param[in] node    Name of node to check
1002  *
1003  * \return TRUE if node found in stonith_watchdog_targets
1004  *         or stonith_watchdog_targets is empty indicating
1005  *         all nodes are doing watchdog-fencing
1006  */
1007 gboolean
1008 node_does_watchdog_fencing(const char *node)
     /* [previous][next][first][last][top][bottom][index][help] */
1009 {
1010     return ((stonith_watchdog_targets == NULL) ||
1011             pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei));
1012 }
1013 
1014 
1015 static void
1016 update_fencing_topology(const char *event, xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
1017 {
1018     int format = 1;
1019     const char *xpath;
1020     xmlXPathObjectPtr xpathObj = NULL;
1021     xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
1022 
1023     CRM_ASSERT(patchset);
1024     crm_element_value_int(patchset, "format", &format);
1025 
1026     if(format == 1) {
1027         /* Process deletions (only) */
1028         xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL;
1029         xpathObj = xpath_search(msg, xpath);
1030 
1031         remove_fencing_topology(xpathObj);
1032         freeXpathObject(xpathObj);
1033 
1034         /* Process additions and changes */
1035         xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL;
1036         xpathObj = xpath_search(msg, xpath);
1037 
1038         register_fencing_topology(xpathObj);
1039         freeXpathObject(xpathObj);
1040 
1041     } else if(format == 2) {
1042         xmlNode *change = NULL;
1043         int add[] = { 0, 0, 0 };
1044         int del[] = { 0, 0, 0 };
1045 
1046         xml_patch_versions(patchset, add, del);
1047 
1048         for (change = pcmk__xml_first_child(patchset); change != NULL;
1049              change = pcmk__xml_next(change)) {
1050             const char *op = crm_element_value(change, XML_DIFF_OP);
1051             const char *xpath = crm_element_value(change, XML_DIFF_PATH);
1052 
1053             if(op == NULL) {
1054                 continue;
1055 
1056             } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) {
1057                 /* Change to a specific entry */
1058 
1059                 crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath);
1060                 if(strcmp(op, "move") == 0) {
1061                     continue;
1062 
1063                 } else if(strcmp(op, "create") == 0) {
1064                     handle_topology_change(change->children, FALSE);
1065 
1066                 } else if(strcmp(op, "modify") == 0) {
1067                     xmlNode *match = first_named_child(change, XML_DIFF_RESULT);
1068 
1069                     if(match) {
1070                         handle_topology_change(match->children, TRUE);
1071                     }
1072 
1073                 } else if(strcmp(op, "delete") == 0) {
1074                     /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */
1075                     crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s",
1076                              op, add[0], add[1], add[2], xpath);
1077                     fencing_topology_init();
1078                     return;
1079                 }
1080 
1081             } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) {
1082                 /* Change to the topology in general */
1083                 crm_info("Re-initializing fencing topology after top-level %s operation  %d.%d.%d for %s",
1084                          op, add[0], add[1], add[2], xpath);
1085                 fencing_topology_init();
1086                 return;
1087 
1088             } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) {
1089                 /* Changes to the whole config section, possibly including the topology as a whild */
1090                 if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) {
1091                     crm_trace("Nothing for us in %s operation %d.%d.%d for %s.",
1092                               op, add[0], add[1], add[2], xpath);
1093 
1094                 } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) {
1095                     crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.",
1096                              op, add[0], add[1], add[2], xpath);
1097                     fencing_topology_init();
1098                     return;
1099                 }
1100 
1101             } else {
1102                 crm_trace("Nothing for us in %s operation %d.%d.%d for %s",
1103                           op, add[0], add[1], add[2], xpath);
1104             }
1105         }
1106 
1107     } else {
1108         crm_warn("Unknown patch format: %d", format);
1109     }
1110 }
1111 static bool have_cib_devices = FALSE;
1112 
1113 static void
1114 update_cib_cache_cb(const char *event, xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
1115 {
1116     int rc = pcmk_ok;
1117     xmlNode *stonith_enabled_xml = NULL;
1118     const char *stonith_enabled_s = NULL;
1119     static gboolean stonith_enabled_saved = TRUE;
1120     long timeout_ms_saved = stonith_watchdog_timeout_ms;
1121     gboolean need_full_refresh = FALSE;
1122 
1123     if(!have_cib_devices) {
1124         crm_trace("Skipping updates until we get a full dump");
1125         return;
1126 
1127     } else if(msg == NULL) {
1128         crm_trace("Missing %s update", event);
1129         return;
1130     }
1131 
1132     /* Maintain a local copy of the CIB so that we have full access
1133      * to device definitions, location constraints, and node attributes
1134      */
1135     if (local_cib != NULL) {
1136         int rc = pcmk_ok;
1137         xmlNode *patchset = NULL;
1138 
1139         crm_element_value_int(msg, F_CIB_RC, &rc);
1140         if (rc != pcmk_ok) {
1141             return;
1142         }
1143 
1144         patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
1145         xml_log_patchset(LOG_TRACE, "Config update", patchset);
1146         rc = xml_apply_patchset(local_cib, patchset, TRUE);
1147         switch (rc) {
1148             case pcmk_ok:
1149             case -pcmk_err_old_data:
1150                 break;
1151             case -pcmk_err_diff_resync:
1152             case -pcmk_err_diff_failed:
1153                 crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
1154                 free_xml(local_cib);
1155                 local_cib = NULL;
1156                 break;
1157             default:
1158                 crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
1159                 free_xml(local_cib);
1160                 local_cib = NULL;
1161         }
1162     }
1163 
1164     if (local_cib == NULL) {
1165         crm_trace("Re-requesting full CIB");
1166         rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call);
1167         if(rc != pcmk_ok) {
1168             crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc);
1169             return;
1170         }
1171         CRM_ASSERT(local_cib != NULL);
1172         stonith_enabled_saved = FALSE; /* Trigger a full refresh below */
1173     }
1174 
1175     pcmk__refresh_node_caches_from_cib(local_cib);
1176     update_stonith_watchdog_timeout_ms(local_cib);
1177 
1178     stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']",
1179                                            local_cib, LOG_NEVER);
1180     if (stonith_enabled_xml) {
1181         stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
1182     }
1183 
1184     if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) {
1185         crm_trace("Ignoring CIB updates while fencing is disabled");
1186         stonith_enabled_saved = FALSE;
1187 
1188     } else if (stonith_enabled_saved == FALSE) {
1189         crm_info("Updating fencing device and topology lists "
1190                  "now that fencing is enabled");
1191         stonith_enabled_saved = TRUE;
1192         need_full_refresh = TRUE;
1193 
1194     } else {
1195         if (timeout_ms_saved != stonith_watchdog_timeout_ms) {
1196             need_full_refresh = TRUE;
1197         } else {
1198             update_fencing_topology(event, msg);
1199             update_cib_stonith_devices(event, msg);
1200             watchdog_device_update();
1201         }
1202     }
1203 
1204     if (need_full_refresh) {
1205         fencing_topology_init();
1206         cib_devices_update();
1207         watchdog_device_update();
1208     }
1209 }
1210 
1211 static void
1212 init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1213 {
1214     crm_info("Updating device list from CIB");
1215     have_cib_devices = TRUE;
1216     local_cib = copy_xml(output);
1217 
1218     pcmk__refresh_node_caches_from_cib(local_cib);
1219     update_stonith_watchdog_timeout_ms(local_cib);
1220 
1221     fencing_topology_init();
1222     cib_devices_update();
1223     watchdog_device_update();
1224 }
1225 
1226 static void
1227 stonith_shutdown(int nsig)
     /* [previous][next][first][last][top][bottom][index][help] */
1228 {
1229     crm_info("Terminating with %d clients", pcmk__ipc_client_count());
1230     stonith_shutdown_flag = TRUE;
1231     if (mainloop != NULL && g_main_loop_is_running(mainloop)) {
1232         g_main_loop_quit(mainloop);
1233     } else {
1234         stonith_cleanup();
1235         crm_exit(CRM_EX_OK);
1236     }
1237 }
1238 
1239 static void
1240 cib_connection_destroy(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1241 {
1242     if (stonith_shutdown_flag) {
1243         crm_info("Connection to the CIB manager closed");
1244         return;
1245     } else {
1246         crm_crit("Lost connection to the CIB manager, shutting down");
1247     }
1248     if (cib_api) {
1249         cib_api->cmds->signoff(cib_api);
1250     }
1251     stonith_shutdown(0);
1252 }
1253 
1254 static void
1255 stonith_cleanup(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1256 {
1257     if (cib_api) {
1258         cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb);
1259         cib_api->cmds->signoff(cib_api);
1260     }
1261 
1262     if (ipcs) {
1263         qb_ipcs_destroy(ipcs);
1264     }
1265 
1266     crm_peer_destroy();
1267     pcmk__client_cleanup();
1268     free_stonith_remote_op_list();
1269     free_topology_list();
1270     free_device_list();
1271     free_metadata_cache();
1272 
1273     free(stonith_our_uname);
1274     stonith_our_uname = NULL;
1275 
1276     free_xml(local_cib);
1277     local_cib = NULL;
1278 }
1279 
1280 static pcmk__cli_option_t long_options[] = {
1281     // long option, argument type, storage, short option, description, flags
1282     {
1283         "stand-alone", no_argument, 0, 's',
1284         NULL, pcmk__option_default
1285     },
1286     {
1287         "stand-alone-w-cpg", no_argument, 0, 'c',
1288         NULL, pcmk__option_default
1289     },
1290     {
1291         "logfile", required_argument, 0, 'l',
1292         NULL, pcmk__option_default
1293     },
1294     {
1295         "verbose", no_argument, 0, 'V',
1296         NULL, pcmk__option_default
1297     },
1298     {
1299         "version", no_argument, 0, '$',
1300         NULL, pcmk__option_default
1301     },
1302     {
1303         "help", no_argument, 0, '?',
1304         NULL, pcmk__option_default
1305     },
1306     { 0, 0, 0, 0 }
1307 };
1308 
1309 static void
1310 setup_cib(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1311 {
1312     int rc, retries = 0;
1313 
1314     cib_api = cib_new();
1315     if (cib_api == NULL) {
1316         crm_err("No connection to the CIB manager");
1317         return;
1318     }
1319 
1320     do {
1321         sleep(retries);
1322         rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command);
1323     } while (rc == -ENOTCONN && ++retries < 5);
1324 
1325     if (rc != pcmk_ok) {
1326         crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc);
1327 
1328     } else if (pcmk_ok !=
1329                cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) {
1330         crm_err("Could not set CIB notification callback");
1331 
1332     } else {
1333         rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local);
1334         cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb",
1335                                          init_cib_cache_cb);
1336         cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy);
1337         crm_info("Watching for fencing topology changes");
1338     }
1339 }
1340 
1341 struct qb_ipcs_service_handlers ipc_callbacks = {
1342     .connection_accept = st_ipc_accept,
1343     .connection_created = NULL,
1344     .msg_process = st_ipc_dispatch,
1345     .connection_closed = st_ipc_closed,
1346     .connection_destroyed = st_ipc_destroy
1347 };
1348 
1349 /*!
1350  * \internal
1351  * \brief Callback for peer status changes
1352  *
1353  * \param[in] type  What changed
1354  * \param[in] node  What peer had the change
1355  * \param[in] data  Previous value of what changed
1356  */
1357 static void
1358 st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
     /* [previous][next][first][last][top][bottom][index][help] */
1359 {
1360     if ((type != crm_status_processes)
1361         && !pcmk_is_set(node->flags, crm_remote_node)) {
1362         /*
1363          * This is a hack until we can send to a nodeid and/or we fix node name lookups
1364          * These messages are ignored in stonith_peer_callback()
1365          */
1366         xmlNode *query = create_xml_node(NULL, "stonith_command");
1367 
1368         crm_xml_add(query, F_XML_TAGNAME, "stonith_command");
1369         crm_xml_add(query, F_TYPE, T_STONITH_NG);
1370         crm_xml_add(query, F_STONITH_OPERATION, "poke");
1371 
1372         crm_debug("Broadcasting our uname because of node %u", node->id);
1373         send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
1374 
1375         free_xml(query);
1376     }
1377 }
1378 
1379 int
1380 main(int argc, char **argv)
     /* [previous][next][first][last][top][bottom][index][help] */
1381 {
1382     int flag;
1383     int lpc = 0;
1384     int argerr = 0;
1385     int option_index = 0;
1386     crm_cluster_t *cluster = NULL;
1387     const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" };
1388     crm_ipc_t *old_instance = NULL;
1389     int rc = pcmk_rc_ok;
1390 
1391     crm_log_preinit(NULL, argc, argv);
1392     pcmk__set_cli_options(NULL, "[options]", long_options,
1393                           "daemon for executing fencing devices in a "
1394                           "Pacemaker cluster");
1395 
1396     while (1) {
1397         flag = pcmk__next_cli_option(argc, argv, &option_index, NULL);
1398         if (flag == -1) {
1399             break;
1400         }
1401 
1402         switch (flag) {
1403             case 'V':
1404                 crm_bump_log_level(argc, argv);
1405                 break;
1406             case 'l':
1407                 {
1408                     int rc = pcmk__add_logfile(optarg);
1409 
1410                     if (rc != pcmk_rc_ok) {
1411                         /* Logging has not yet been initialized, so stderr is
1412                          * the only way to get information out
1413                          */
1414                         fprintf(stderr, "Logging to %s is disabled: %s\n",
1415                                 optarg, pcmk_rc_str(rc));
1416                     }
1417                 }
1418                 break;
1419             case 's':
1420                 stand_alone = TRUE;
1421                 break;
1422             case 'c':
1423                 stand_alone = FALSE;
1424                 no_cib_connect = TRUE;
1425                 break;
1426             case '$':
1427             case '?':
1428                 pcmk__cli_help(flag, CRM_EX_OK);
1429                 break;
1430             default:
1431                 ++argerr;
1432                 break;
1433         }
1434     }
1435 
1436     if (argc - optind == 1 && pcmk__str_eq("metadata", argv[optind], pcmk__str_casei)) {
1437         printf("<?xml version=\"1.0\"?><!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n");
1438         printf("<resource-agent name=\"pacemaker-fenced\">\n");
1439         printf(" <version>1.0</version>\n");
1440         printf(" <longdesc lang=\"en\">Instance attributes available for all \"stonith\"-class resources"
1441                                        " and used by Pacemaker's fence daemon, formerly known as stonithd</longdesc>\n");
1442         printf(" <shortdesc lang=\"en\">Instance attributes available for all \"stonith\"-class resources</shortdesc>\n");
1443         printf(" <parameters>\n");
1444 
1445 #if 0
1446         // priority is not implemented yet
1447         printf("  <parameter name=\"priority\" unique=\"0\">\n");
1448         printf("    <shortdesc lang=\"en\">Devices that are not in a topology "
1449                "are tried in order of highest to lowest integer priority</shortdesc>\n");
1450         printf("    <content type=\"integer\" default=\"0\"/>\n");
1451         printf("  </parameter>\n");
1452 #endif
1453 
1454         printf("  <parameter name=\"%s\" unique=\"0\">\n",
1455                PCMK_STONITH_HOST_ARGUMENT);
1456         printf
1457             ("    <shortdesc lang=\"en\">Advanced use only: An alternate parameter to supply instead of 'port'</shortdesc>\n");
1458         printf
1459             ("    <longdesc lang=\"en\">Some devices do not support the standard 'port' parameter or may provide additional ones.\n"
1460              "Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n"
1461              "A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n"
1462              "     </longdesc>\n");
1463         printf("    <content type=\"string\" default=\"port\"/>\n");
1464         printf("  </parameter>\n");
1465 
1466         printf("  <parameter name=\"%s\" unique=\"0\">\n",
1467                PCMK_STONITH_HOST_MAP);
1468         printf
1469             ("    <shortdesc lang=\"en\">A mapping of host names to ports numbers for devices that do not support host names.</shortdesc>\n");
1470         printf
1471             ("    <longdesc lang=\"en\">Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2</longdesc>\n");
1472         printf("    <content type=\"string\" default=\"\"/>\n");
1473         printf("  </parameter>\n");
1474 
1475         printf("  <parameter name=\"%s\" unique=\"0\">\n",
1476                PCMK_STONITH_HOST_LIST);
1477         printf("    <shortdesc lang=\"en\">A list of machines controlled by "
1478                "this device (Optional unless %s=static-list).</shortdesc>\n",
1479                PCMK_STONITH_HOST_CHECK);
1480         printf("    <content type=\"string\" default=\"\"/>\n");
1481         printf("  </parameter>\n");
1482 
1483         printf("  <parameter name=\"%s\" unique=\"0\">\n",
1484                PCMK_STONITH_HOST_CHECK);
1485         printf
1486             ("    <shortdesc lang=\"en\">How to determine which machines are controlled by the device.</shortdesc>\n");
1487         printf("    <longdesc lang=\"en\">Allowed values: dynamic-list "
1488                "(query the device via the 'list' command), static-list "
1489                "(check the " PCMK_STONITH_HOST_LIST " attribute), status "
1490                "(query the device via the 'status' command), none (assume "
1491                "every device can fence every machine)</longdesc>\n");
1492         printf("    <content type=\"string\" default=\"dynamic-list\"/>\n");
1493         printf("  </parameter>\n");
1494 
1495         printf("  <parameter name=\"%s\" unique=\"0\">\n",
1496                PCMK_STONITH_DELAY_MAX);
1497         printf("    <shortdesc lang=\"en\">Enable a delay of no more than the "
1498                "time specified before executing fencing actions. Pacemaker "
1499                "derives the overall delay by taking the value of "
1500                PCMK_STONITH_DELAY_BASE " and adding a random delay value such "
1501                "that the sum is kept below this maximum.</shortdesc>\n");
1502         printf("    <longdesc lang=\"en\">This prevents double fencing when "
1503                "using slow devices such as sbd.\nUse this to enable a random "
1504                "delay for fencing actions.\nThe overall delay is derived from "
1505                "this random delay value adding a static delay so that the sum "
1506                "is kept below the maximum delay.</longdesc>\n");
1507         printf("    <content type=\"time\" default=\"0s\"/>\n");
1508         printf("  </parameter>\n");
1509 
1510         printf("  <parameter name=\"%s\" unique=\"0\">\n",
1511                PCMK_STONITH_DELAY_BASE);
1512         printf("    <shortdesc lang=\"en\">Enable a base delay for "
1513                "fencing actions and specify base delay value.</shortdesc>\n");
1514         printf("    <longdesc lang=\"en\">This prevents double fencing when "
1515                "different delays are configured on the nodes.\nUse this to "
1516                "enable a static delay for fencing actions.\nThe overall delay "
1517                "is derived from a random delay value adding this static delay "
1518                "so that the sum is kept below the maximum delay.\nSet to eg. "
1519                "node1:1s;node2:5 to set different value per node.</longdesc>\n");
1520         printf("    <content type=\"time\" default=\"0s\"/>\n");
1521         printf("  </parameter>\n");
1522 
1523         printf("  <parameter name=\"%s\" unique=\"0\">\n",
1524                PCMK_STONITH_ACTION_LIMIT);
1525         printf
1526             ("    <shortdesc lang=\"en\">The maximum number of actions can be performed in parallel on this device</shortdesc>\n");
1527         printf
1528             ("    <longdesc lang=\"en\">Cluster property concurrent-fencing=true needs to be configured first.\n"
1529              "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.</longdesc>\n");
1530         printf("    <content type=\"integer\" default=\"1\"/>\n");
1531         printf("  </parameter>\n");
1532 
1533 
1534         for (lpc = 0; lpc < PCMK__NELEM(actions); lpc++) {
1535             printf("  <parameter name=\"pcmk_%s_action\" unique=\"0\">\n", actions[lpc]);
1536             printf
1537                 ("    <shortdesc lang=\"en\">Advanced use only: An alternate command to run instead of '%s'</shortdesc>\n",
1538                  actions[lpc]);
1539             printf
1540                 ("    <longdesc lang=\"en\">Some devices do not support the standard commands or may provide additional ones.\n"
1541                  "Use this to specify an alternate, device-specific, command that implements the '%s' action.</longdesc>\n",
1542                  actions[lpc]);
1543             printf("    <content type=\"string\" default=\"%s\"/>\n", actions[lpc]);
1544             printf("  </parameter>\n");
1545 
1546             printf("  <parameter name=\"pcmk_%s_timeout\" unique=\"0\">\n", actions[lpc]);
1547             printf
1548                 ("    <shortdesc lang=\"en\">Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout</shortdesc>\n",
1549                  actions[lpc]);
1550             printf
1551                 ("    <longdesc lang=\"en\">Some devices need much more/less time to complete than normal.\n"
1552                  "Use this to specify an alternate, device-specific, timeout for '%s' actions.</longdesc>\n",
1553                  actions[lpc]);
1554             printf("    <content type=\"time\" default=\"60s\"/>\n");
1555             printf("  </parameter>\n");
1556 
1557             printf("  <parameter name=\"pcmk_%s_retries\" unique=\"0\">\n", actions[lpc]);
1558             printf
1559                 ("    <shortdesc lang=\"en\">Advanced use only: The maximum number of times to retry the '%s' command within the timeout period</shortdesc>\n",
1560                  actions[lpc]);
1561             printf("    <longdesc lang=\"en\">Some devices do not support multiple connections."
1562                    " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
1563                    " Use this option to alter the number of times Pacemaker retries '%s' actions before giving up."
1564                    "</longdesc>\n", actions[lpc]);
1565             printf("    <content type=\"integer\" default=\"2\"/>\n");
1566             printf("  </parameter>\n");
1567         }
1568 
1569         printf(" </parameters>\n");
1570         printf("</resource-agent>\n");
1571         return CRM_EX_OK;
1572     }
1573 
1574     if (optind != argc) {
1575         ++argerr;
1576     }
1577 
1578     if (argerr) {
1579         pcmk__cli_help('?', CRM_EX_USAGE);
1580     }
1581 
1582     crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
1583 
1584     crm_notice("Starting Pacemaker fencer");
1585 
1586     old_instance = crm_ipc_new("stonith-ng", 0);
1587     if (crm_ipc_connect(old_instance)) {
1588         /* IPC end-point already up */
1589         crm_ipc_close(old_instance);
1590         crm_ipc_destroy(old_instance);
1591         crm_err("pacemaker-fenced is already active, aborting startup");
1592         crm_exit(CRM_EX_OK);
1593     } else {
1594         /* not up or not authentic, we'll proceed either way */
1595         crm_ipc_destroy(old_instance);
1596         old_instance = NULL;
1597     }
1598 
1599     mainloop_add_signal(SIGTERM, stonith_shutdown);
1600 
1601     crm_peer_init();
1602 
1603     fenced_data_set = pe_new_working_set();
1604     CRM_ASSERT(fenced_data_set != NULL);
1605     pe__set_working_set_flags(fenced_data_set,
1606                               pe_flag_no_counts|pe_flag_no_compat);
1607     pe__set_working_set_flags(fenced_data_set, pe_flag_show_utilization);
1608 
1609     cluster = calloc(1, sizeof(crm_cluster_t));
1610     CRM_ASSERT(cluster != NULL);
1611 
1612     if (stand_alone == FALSE) {
1613 
1614         if (is_corosync_cluster()) {
1615 #if SUPPORT_COROSYNC
1616             cluster->destroy = stonith_peer_cs_destroy;
1617             cluster->cpg.cpg_deliver_fn = stonith_peer_ais_callback;
1618             cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
1619 #endif
1620         }
1621 
1622         crm_set_status_callback(&st_peer_update_callback);
1623 
1624         if (crm_cluster_connect(cluster) == FALSE) {
1625             crm_crit("Cannot sign in to the cluster... terminating");
1626             crm_exit(CRM_EX_FATAL);
1627         }
1628         stonith_our_uname = strdup(cluster->uname);
1629 
1630         if (no_cib_connect == FALSE) {
1631             setup_cib();
1632         }
1633 
1634     } else {
1635         stonith_our_uname = strdup("localhost");
1636     }
1637 
1638     init_device_list();
1639     init_topology_list();
1640 
1641     pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks);
1642 
1643     pcmk__register_formats(NULL, formats);
1644     rc = pcmk__output_new(&out, "log", NULL, argv);
1645     if ((rc != pcmk_rc_ok) || (out == NULL)) {
1646         crm_err("Can't log resource details due to internal error: %s\n",
1647                 pcmk_rc_str(rc));
1648         crm_exit(CRM_EX_FATAL);
1649     }
1650 
1651     pe__register_messages(out);
1652     pcmk__register_lib_messages(out);
1653 
1654     pcmk__output_set_log_level(out, LOG_TRACE);
1655     fenced_data_set->priv = out;
1656 
1657     /* Create the mainloop and run it... */
1658     mainloop = g_main_loop_new(NULL, FALSE);
1659     crm_notice("Pacemaker fencer successfully started and accepting connections");
1660     g_main_loop_run(mainloop);
1661 
1662     stonith_cleanup();
1663     free(cluster->uuid);
1664     free(cluster->uname);
1665     free(cluster);
1666     pe_free_working_set(fenced_data_set);
1667 
1668     pcmk__unregister_formats();
1669     out->finish(out, CRM_EX_OK, true, NULL);
1670     pcmk__output_free(out);
1671 
1672     crm_exit(CRM_EX_OK);
1673 }

/* [previous][next][first][last][top][bottom][index][help] */