pacemaker  1.1.18-7fdfbbe
Scalable High-Availability cluster resource manager
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
cluster.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include <crm_internal.h>
20 #include <dlfcn.h>
21 
22 #include <stdio.h>
23 #include <unistd.h>
24 #include <string.h>
25 #include <stdlib.h>
26 #include <time.h>
27 #include <sys/param.h>
28 #include <sys/types.h>
29 #include <sys/utsname.h>
30 
31 #include <crm/crm.h>
32 #include <crm/msg_xml.h>
33 
34 #include <crm/common/ipc.h>
35 #include <crm/cluster/internal.h>
36 
37 CRM_TRACE_INIT_DATA(cluster);
38 
39 #if SUPPORT_HEARTBEAT
40 void *hb_library = NULL;
41 #endif
42 
43 static char *
44 get_heartbeat_uuid(const char *uname)
45 {
46  char *uuid_calc = NULL;
47 
48 #if SUPPORT_HEARTBEAT
49  cl_uuid_t uuid_raw;
50  const char *unknown = "00000000-0000-0000-0000-000000000000";
51 
52  if (heartbeat_cluster == NULL) {
53  crm_warn("No connection to heartbeat, using uuid=uname");
54  return NULL;
55  } else if(uname == NULL) {
56  return NULL;
57  }
58 
59  if (heartbeat_cluster->llc_ops->get_uuid_by_name(heartbeat_cluster, uname, &uuid_raw) ==
60  HA_FAIL) {
61  crm_err("get_uuid_by_name() call failed for host %s", uname);
62  free(uuid_calc);
63  return NULL;
64  }
65 
66  uuid_calc = calloc(1, 50);
67  cl_uuid_unparse(&uuid_raw, uuid_calc);
68 
69  if (safe_str_eq(uuid_calc, unknown)) {
70  crm_warn("Could not calculate UUID for %s", uname);
71  free(uuid_calc);
72  return NULL;
73  }
74 #endif
75  return uuid_calc;
76 }
77 
78 static gboolean
79 uname_is_uuid(void)
80 {
81  static const char *uuid_pref = NULL;
82 
83  if (uuid_pref == NULL) {
84  uuid_pref = getenv("PCMK_uname_is_uuid");
85  }
86 
87  if (uuid_pref == NULL) {
88  /* true is legacy mode */
89  uuid_pref = "false";
90  }
91 
92  return crm_is_true(uuid_pref);
93 }
94 
95 int
96 get_corosync_id(int id, const char *uuid)
97 {
98  if (id == 0 && !uname_is_uuid() && is_corosync_cluster()) {
99  id = crm_atoi(uuid, "0");
100  }
101 
102  return id;
103 }
104 
105 char *
107 {
108  if(node == NULL) {
109  return NULL;
110 
111  } else if (!uname_is_uuid() && is_corosync_cluster()) {
112  if (node->id > 0) {
113  int len = 32;
114  char *buffer = NULL;
115 
116  buffer = calloc(1, (len + 1));
117  if (buffer != NULL) {
118  snprintf(buffer, len, "%u", node->id);
119  }
120 
121  return buffer;
122 
123  } else {
124  crm_info("Node %s is not yet known by corosync", node->uname);
125  }
126 
127  } else if (node->uname != NULL) {
128  return strdup(node->uname);
129  }
130 
131  return NULL;
132 }
133 
134 const char *
136 {
137  char *uuid = NULL;
139 
140  /* avoid blocking heartbeat calls where possible */
141  if(peer == NULL) {
142  return NULL;
143 
144  } else if (peer->uuid) {
145  return peer->uuid;
146  }
147 
148  switch (type) {
150  uuid = get_corosync_uuid(peer);
151  break;
152 
153  case pcmk_cluster_cman:
155  if (peer->uname) {
156  uuid = strdup(peer->uname);
157  }
158  break;
159 
161  uuid = get_heartbeat_uuid(peer->uname);
162  break;
163 
166  crm_err("Unsupported cluster type");
167  break;
168  }
169 
170  peer->uuid = uuid;
171  return peer->uuid;
172 }
173 
174 gboolean
176 {
178 
179  crm_notice("Connecting to cluster infrastructure: %s", name_for_cluster_type(type));
180 #if SUPPORT_COROSYNC
181  if (is_openais_cluster()) {
182  crm_peer_init();
183  return init_cs_connection(cluster);
184  }
185 #endif
186 
187 #if SUPPORT_HEARTBEAT
188  if (is_heartbeat_cluster()) {
189  int rv;
190 
191  /* coverity[var_deref_op] False positive */
192  if (cluster->hb_conn == NULL) {
193  /* No object passed in, create a new one. */
194  ll_cluster_t *(*new_cluster) (const char *llctype) =
195  find_library_function(&hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1);
196 
197  cluster->hb_conn = (*new_cluster) ("heartbeat");
198  /* dlclose(handle); */
199 
200  } else {
201  /* Object passed in. Disconnect first, then reconnect below. */
202  cluster->hb_conn->llc_ops->signoff(cluster->hb_conn, FALSE);
203  }
204 
205  /* make sure we are disconnected first with the old object, if any. */
206  if (heartbeat_cluster && heartbeat_cluster != cluster->hb_conn) {
207  heartbeat_cluster->llc_ops->signoff(heartbeat_cluster, FALSE);
208  }
209 
210  CRM_ASSERT(cluster->hb_conn != NULL);
211  heartbeat_cluster = cluster->hb_conn;
212 
213  rv = register_heartbeat_conn(cluster);
214  if (rv) {
215  /* we'll benefit from a bigger queue length on heartbeat side.
216  * Otherwise, if peers send messages faster than we can consume
217  * them right now, heartbeat messaging layer will kick us out once
218  * it's (small) default queue fills up :(
219  * If we fail to adjust the sendq length, that's not yet fatal, though.
220  */
221  if (HA_OK != heartbeat_cluster->llc_ops->set_sendq_len(heartbeat_cluster, 1024)) {
222  crm_warn("Cannot set sendq length: %s",
223  heartbeat_cluster->llc_ops->errmsg(heartbeat_cluster));
224  }
225  }
226  return rv;
227  }
228 #endif
229  crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type"));
230  return FALSE;
231 }
232 
233 void
235 {
237  const char *type_str = name_for_cluster_type(type);
238 
239  crm_info("Disconnecting from cluster infrastructure: %s", type_str);
240 #if SUPPORT_COROSYNC
241  if (is_openais_cluster()) {
243  terminate_cs_connection(cluster);
244  crm_info("Disconnected from %s", type_str);
245  return;
246  }
247 #endif
248 
249 #if SUPPORT_HEARTBEAT
250  if (is_heartbeat_cluster()) {
251  if (cluster == NULL) {
252  crm_info("No cluster connection");
253  return;
254 
255  } else if (cluster->hb_conn) {
256  cluster->hb_conn->llc_ops->signoff(cluster->hb_conn, TRUE);
257  cluster->hb_conn = NULL;
258  crm_info("Disconnected from %s", type_str);
259  return;
260 
261  } else {
262  crm_info("No %s connection", type_str);
263  return;
264  }
265  }
266 #endif
267  crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type"));
268 }
269 
270 gboolean
271 send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service, xmlNode * data,
272  gboolean ordered)
273 {
274 
275 #if SUPPORT_COROSYNC
276  if (is_openais_cluster()) {
277  return send_cluster_message_cs(data, FALSE, node, service);
278  }
279 #endif
280 #if SUPPORT_HEARTBEAT
281  if (is_heartbeat_cluster()) {
282  return send_ha_message(heartbeat_cluster, data, node ? node->uname : NULL, ordered);
283  }
284 #endif
285  return FALSE;
286 }
287 
288 const char *
290 {
291  static char *name = NULL;
292 
293  if(name) {
294  return name;
295  }
296  name = get_node_name(0);
297  return name;
298 }
299 
300 char *
302 {
303  char *name = NULL;
304  const char *isolation_host = NULL;
305  enum cluster_type_e stack;
306 
307  if (nodeid == 0) {
308  isolation_host = getenv("OCF_RESKEY_"CRM_META"_isolation_host");
309  if (isolation_host) {
310  return strdup(isolation_host);
311  }
312  }
313 
314  stack = get_cluster_type();
315  switch (stack) {
317  break;
318 
319 #if SUPPORT_PLUGIN
321  name = classic_node_name(nodeid);
322  break;
323 #else
324 # if SUPPORT_COROSYNC
326  name = corosync_node_name(0, nodeid);
327  break;
328 # endif
329 #endif
330 
331 #if SUPPORT_CMAN
332  case pcmk_cluster_cman:
333  name = cman_node_name(nodeid);
334  break;
335 #endif
336 
337  default:
338  crm_err("Unknown cluster type: %s (%d)", name_for_cluster_type(stack), stack);
339  }
340 
341  if(name == NULL && nodeid == 0) {
342  struct utsname res;
343  int rc = uname(&res);
344 
345  if (rc == 0) {
346  crm_notice("Defaulting to uname -n for the local %s node name",
347  name_for_cluster_type(stack));
348  name = strdup(res.nodename);
349  }
350 
351  if (name == NULL) {
352  crm_err("Could not obtain the local %s node name", name_for_cluster_type(stack));
354  }
355  }
356 
357  if (name == NULL) {
358  crm_notice("Could not obtain a node name for %s nodeid %u",
359  name_for_cluster_type(stack), nodeid);
360  }
361  return name;
362 }
363 
374 const char *
375 crm_peer_uname(const char *uuid)
376 {
377  GHashTableIter iter;
378  crm_node_t *node = NULL;
379 
380  CRM_CHECK(uuid != NULL, return NULL);
381 
382  /* remote nodes have the same uname and uuid */
383  if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) {
384  return uuid;
385  }
386 
387  /* avoid blocking calls where possible */
388  g_hash_table_iter_init(&iter, crm_peer_cache);
389  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
390  if(node->uuid && strcasecmp(node->uuid, uuid) == 0) {
391  if(node->uname) {
392  return node->uname;
393  }
394  break;
395  }
396  }
397  node = NULL;
398 
399 #if SUPPORT_COROSYNC
400  if (is_openais_cluster()) {
401  if (uname_is_uuid() == FALSE && is_corosync_cluster()) {
402  uint32_t id = crm_int_helper(uuid, NULL);
403  if(id != 0) {
404  node = crm_find_peer(id, NULL);
405  } else {
406  crm_err("Invalid node id: %s", uuid);
407  }
408 
409  } else {
410  node = crm_find_peer(0, uuid);
411  }
412 
413  if (node) {
414  crm_info("Setting uuid for node %s[%u] to '%s'", node->uname, node->id, uuid);
415  node->uuid = strdup(uuid);
416  if(node->uname) {
417  return node->uname;
418  }
419  }
420  return NULL;
421  }
422 #endif
423 
424 #if SUPPORT_HEARTBEAT
425  if (is_heartbeat_cluster()) {
426  if (heartbeat_cluster != NULL) {
427  cl_uuid_t uuid_raw;
428  char *uuid_copy = strdup(uuid);
429  char *uname = malloc(MAX_NAME);
430 
431  cl_uuid_parse(uuid_copy, &uuid_raw);
432 
433  if (heartbeat_cluster->llc_ops->get_name_by_uuid(heartbeat_cluster, &uuid_raw, uname,
434  MAX_NAME) == HA_FAIL) {
435  crm_err("Could not calculate uname for %s", uuid);
436  } else {
437  node = crm_get_peer(0, uname);
438  }
439 
440  free(uuid_copy);
441  free(uname);
442  }
443 
444  if (node) {
445  crm_info("Setting uuid for node %s to '%s'", node->uname, uuid);
446  node->uuid = strdup(uuid);
447  if(node->uname) {
448  return node->uname;
449  }
450  }
451  return NULL;
452  }
453 #endif
454 
455  return NULL;
456 }
457 
458 void
459 set_uuid(xmlNode *xml, const char *attr, crm_node_t *node)
460 {
461  const char *uuid_calc = crm_peer_uuid(node);
462 
463  crm_xml_add(xml, attr, uuid_calc);
464  return;
465 }
466 
467 const char *
469 {
470  switch (type) {
472  return "classic openais (with plugin)";
473  case pcmk_cluster_cman:
474  return "cman";
476  return "corosync";
478  return "heartbeat";
480  return "unknown";
482  return "invalid";
483  }
484  crm_err("Invalid cluster type: %d", type);
485  return "invalid";
486 }
487 
488 /* Do not expose these two */
490 static enum cluster_type_e cluster_type = pcmk_cluster_unknown;
491 
492 int
494 {
495  if (cluster_type == pcmk_cluster_unknown) {
496  crm_info("Cluster type set to: %s", name_for_cluster_type(type));
497  cluster_type = type;
498  return 0;
499 
500  } else if (cluster_type == type) {
501  return 0;
502 
503  } else if (pcmk_cluster_unknown == type) {
504  cluster_type = type;
505  return 0;
506  }
507 
508  crm_err("Cluster type already set to %s, ignoring %s",
509  name_for_cluster_type(cluster_type), name_for_cluster_type(type));
510  return -1;
511 }
512 enum cluster_type_e
514 {
515  bool detected = FALSE;
516  const char *cluster = NULL;
517 
518  /* Return the previous calculation, if any */
519  if (cluster_type != pcmk_cluster_unknown) {
520  return cluster_type;
521  }
522 
523  cluster = getenv("HA_cluster_type");
524 
525 #if SUPPORT_HEARTBEAT
526  /* If nothing is defined in the environment, try heartbeat (if supported) */
527  if(cluster == NULL) {
528  ll_cluster_t *hb;
529  ll_cluster_t *(*new_cluster) (const char *llctype) = find_library_function(
530  &hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1);
531 
532  hb = (*new_cluster) ("heartbeat");
533 
534  crm_debug("Testing with Heartbeat");
535  /*
536  * Test as "casual" client (clientid == NULL; will be replaced by
537  * current pid). We are trying to detect if we can communicate with
538  * heartbeat, not if we can register as some specific service.
539  * Otherwise all but one of several concurrent invocations will get
540  * HA_FAIL because of:
541  * WARN: duplicate client add request
542  * ERROR: api_process_registration_msg: cannot add client()
543  * and then likely fail :(
544  */
545  if (hb->llc_ops->signon(hb, NULL) == HA_OK) {
546  hb->llc_ops->signoff(hb, FALSE);
547 
548  cluster_type = pcmk_cluster_heartbeat;
549  detected = TRUE;
550  goto done;
551  }
552  }
553 #endif
554 
555 #if SUPPORT_COROSYNC
556  /* If nothing is defined in the environment, try corosync (if supported) */
557  if(cluster == NULL) {
558  crm_debug("Testing with Corosync");
559  cluster_type = find_corosync_variant();
560  if (cluster_type != pcmk_cluster_unknown) {
561  detected = TRUE;
562  goto done;
563  }
564  }
565 #endif
566 
567  /* Something was defined in the environment, test it against what we support */
568  crm_info("Verifying cluster type: '%s'", cluster?cluster:"-unspecified-");
569  if (cluster == NULL) {
570 
571 #if SUPPORT_HEARTBEAT
572  } else if (safe_str_eq(cluster, "heartbeat")) {
573  cluster_type = pcmk_cluster_heartbeat;
574 #endif
575 
576 #if SUPPORT_COROSYNC
577  } else if (safe_str_eq(cluster, "openais")
578  || safe_str_eq(cluster, "classic openais (with plugin)")) {
579  cluster_type = pcmk_cluster_classic_ais;
580 
581  } else if (safe_str_eq(cluster, "corosync")) {
582  cluster_type = pcmk_cluster_corosync;
583 #endif
584 
585 #if SUPPORT_CMAN
586  } else if (safe_str_eq(cluster, "cman")) {
587  cluster_type = pcmk_cluster_cman;
588 #endif
589 
590  } else {
591  cluster_type = pcmk_cluster_invalid;
592  goto done; /* Keep the compiler happy when no stacks are supported */
593  }
594 
595  done:
596  if (cluster_type == pcmk_cluster_unknown) {
597  crm_notice("Could not determine the current cluster type");
598 
599  } else if (cluster_type == pcmk_cluster_invalid) {
600  crm_notice("This installation does not support the '%s' cluster infrastructure: terminating.",
601  cluster);
603 
604  } else {
605  crm_info("%s an active '%s' cluster", detected?"Detected":"Assuming", name_for_cluster_type(cluster_type));
606  }
607 
608  return cluster_type;
609 }
610 
611 gboolean
613 {
615 }
616 
617 gboolean
619 {
621 }
622 
623 gboolean
625 {
627 }
628 
629 gboolean
631 {
633 
634  if (type == pcmk_cluster_classic_ais) {
635  return TRUE;
636  } else if (type == pcmk_cluster_corosync) {
637  return TRUE;
638  } else if (type == pcmk_cluster_cman) {
639  return TRUE;
640  }
641  return FALSE;
642 }
643 
644 gboolean
646 {
648 }
649 
650 gboolean
651 node_name_is_valid(const char *key, const char *name)
652 {
653  int octet;
654 
655  if (name == NULL) {
656  crm_trace("%s is empty", key);
657  return FALSE;
658 
659  } else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) {
660  crm_trace("%s contains an ipv4 address, ignoring: %s", key, name);
661  return FALSE;
662 
663  } else if (strstr(name, ":") != NULL) {
664  crm_trace("%s contains an ipv6 address, ignoring: %s", key, name);
665  return FALSE;
666  }
667  crm_trace("%s is valid", key);
668  return TRUE;
669 }
void crm_peer_destroy(void)
Definition: membership.c:431
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:164
A dumping ground.
void * find_library_function(void **handle, const char *lib, const char *fn, int fatal)
#define crm_notice(fmt, args...)
Definition: logging.h:250
gboolean is_openais_cluster(void)
Definition: cluster.c:630
crm_ais_msg_types
Definition: cluster.h:128
char * corosync_node_name(uint64_tcmap_handle, uint32_t nodeid)
Definition: corosync.c:52
uint32_t id
Definition: cluster.h:73
gboolean is_heartbeat_cluster(void)
Definition: cluster.c:645
char * uuid
Definition: cluster.h:83
char * get_corosync_uuid(crm_node_t *peer)
Definition: cluster.c:106
void terminate_cs_connection(crm_cluster_t *cluster)
Definition: corosync.c:140
const char * get_local_node_name(void)
Definition: cluster.c:289
void crm_peer_init(void)
Definition: membership.c:419
int get_corosync_id(int id, const char *uuid)
Definition: cluster.c:96
long long crm_int_helper(const char *text, char **end_text)
Definition: strings.c:80
GHashTable * crm_remote_peer_cache
Definition: membership.c:62
crm_node_t * crm_get_peer(unsigned int id, const char *uname)
Definition: membership.c:676
char * get_node_name(uint32_t nodeid)
Definition: cluster.c:301
void crm_cluster_disconnect(crm_cluster_t *cluster)
Definition: cluster.c:234
gboolean init_cs_connection(crm_cluster_t *cluster)
Definition: corosync.c:312
#define CRM_TRACE_INIT_DATA(name)
Definition: logging.h:119
char uname[MAX_NAME]
Definition: internal.h:53
#define crm_warn(fmt, args...)
Definition: logging.h:249
#define crm_atoi(text, default_text)
Definition: util.h:110
uint32_t id
Definition: internal.h:48
#define crm_debug(fmt, args...)
Definition: logging.h:253
cluster_type_e
Definition: cluster.h:210
#define crm_trace(fmt, args...)
Definition: logging.h:254
enum cluster_type_e find_corosync_variant(void)
Definition: corosync.c:440
gboolean is_cman_cluster(void)
Definition: cluster.c:612
const char * name_for_cluster_type(enum cluster_type_e type)
Definition: cluster.c:468
gboolean crm_cluster_connect(crm_cluster_t *cluster)
Definition: cluster.c:175
int set_cluster_type(enum cluster_type_e type)
Definition: cluster.c:493
#define MAX_NAME
Definition: crm.h:42
#define DAEMON_RESPAWN_STOP
Definition: crm.h:65
gboolean is_corosync_cluster(void)
Definition: cluster.c:618
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Definition: xml.c:2490
#define CRM_META
Definition: crm.h:53
gboolean node_name_is_valid(const char *key, const char *name)
Definition: cluster.c:651
#define crm_err(fmt, args...)
Definition: logging.h:248
const char * crm_peer_uname(const char *uuid)
Get the node name corresponding to a node UUID.
Definition: cluster.c:375
gboolean send_cluster_message_cs(xmlNode *msg, gboolean local, crm_node_t *node, enum crm_ais_msg_types dest)
Definition: cpg.c:509
#define uint32_t
Definition: stdint.in.h:158
#define CRM_ASSERT(expr)
Definition: error.h:35
char data[0]
Definition: internal.h:58
int crm_exit(int rc)
Definition: utils.c:83
Wrappers for and extensions to libqb IPC.
char * uname
Definition: cluster.h:82
gboolean crm_is_true(const char *s)
Definition: strings.c:165
void set_uuid(xmlNode *xml, const char *attr, crm_node_t *node)
Definition: cluster.c:459
#define safe_str_eq(a, b)
Definition: util.h:72
crm_node_t * crm_find_peer(unsigned int id, const char *uname)
Definition: membership.c:540
gboolean send_cluster_message(crm_node_t *node, enum crm_ais_msg_types service, xmlNode *data, gboolean ordered)
Definition: cluster.c:271
GHashTable * crm_peer_cache
Definition: membership.c:44
#define crm_info(fmt, args...)
Definition: logging.h:251
const char * crm_peer_uuid(crm_node_t *node)
Definition: cluster.c:135
gboolean is_classic_ais_cluster(void)
Definition: cluster.c:624
enum crm_ais_msg_types type
Definition: internal.h:51
enum cluster_type_e get_cluster_type(void)
Definition: cluster.c:513