pacemaker  2.0.3-4b1f869f0f
Scalable High-Availability cluster resource manager
unpack.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2019 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU Lesser General Public License
7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <stdio.h>
13 #include <string.h>
14 #include <glib.h>
15 
16 #include <crm/crm.h>
17 #include <crm/services.h>
18 #include <crm/msg_xml.h>
19 #include <crm/common/xml.h>
20 
21 #include <crm/common/util.h>
22 #include <crm/pengine/rules.h>
23 #include <crm/pengine/internal.h>
25 #include <unpack.h>
26 #include <pe_status_private.h>
27 
28 CRM_TRACE_INIT_DATA(pe_status);
29 
30 #define set_config_flag(data_set, option, flag) do { \
31  const char *tmp = pe_pref(data_set->config_hash, option); \
32  if(tmp) { \
33  if(crm_is_true(tmp)) { \
34  set_bit(data_set->flags, flag); \
35  } else { \
36  clear_bit(data_set->flags, flag); \
37  } \
38  } \
39  } while(0)
40 
41 static void unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
42  xmlNode **last_failure,
43  enum action_fail_response *failed,
44  pe_working_set_t *data_set);
45 static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node);
46 static void add_node_attrs(xmlNode *attrs, pe_node_t *node, bool overwrite,
47  pe_working_set_t *data_set);
48 
49 
50 // Bitmask for warnings we only want to print once
51 uint32_t pe_wo = 0;
52 
53 static gboolean
54 is_dangling_guest_node(node_t *node)
55 {
56  /* we are looking for a remote-node that was supposed to be mapped to a
57  * container resource, but all traces of that container have disappeared
58  * from both the config and the status section. */
59  if (pe__is_guest_or_remote_node(node) &&
60  node->details->remote_rsc &&
61  node->details->remote_rsc->container == NULL &&
63  return TRUE;
64  }
65 
66  return FALSE;
67 }
68 
69 
77 void
78 pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)
79 {
80  CRM_CHECK(node, return);
81 
82  /* A guest node is fenced by marking its container as failed */
83  if (pe__is_guest_node(node)) {
84  resource_t *rsc = node->details->remote_rsc->container;
85 
86  if (is_set(rsc->flags, pe_rsc_failed) == FALSE) {
87  if (!is_set(rsc->flags, pe_rsc_managed)) {
88  crm_notice("Not fencing guest node %s "
89  "(otherwise would because %s): "
90  "its guest resource %s is unmanaged",
91  node->details->uname, reason, rsc->id);
92  } else {
93  crm_warn("Guest node %s will be fenced "
94  "(by recovering its guest resource %s): %s",
95  node->details->uname, rsc->id, reason);
96 
97  /* We don't mark the node as unclean because that would prevent the
98  * node from running resources. We want to allow it to run resources
99  * in this transition if the recovery succeeds.
100  */
101  node->details->remote_requires_reset = TRUE;
102  set_bit(rsc->flags, pe_rsc_failed);
103  set_bit(rsc->flags, pe_rsc_stop);
104  }
105  }
106 
107  } else if (is_dangling_guest_node(node)) {
108  crm_info("Cleaning up dangling connection for guest node %s: "
109  "fencing was already done because %s, "
110  "and guest resource no longer exists",
111  node->details->uname, reason);
114 
115  } else if (pe__is_remote_node(node)) {
116  resource_t *rsc = node->details->remote_rsc;
117 
118  if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) {
119  crm_notice("Not fencing remote node %s "
120  "(otherwise would because %s): connection is unmanaged",
121  node->details->uname, reason);
122  } else if(node->details->remote_requires_reset == FALSE) {
123  node->details->remote_requires_reset = TRUE;
124  crm_warn("Remote node %s %s: %s",
125  node->details->uname,
126  pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
127  reason);
128  }
129  node->details->unclean = TRUE;
130  pe_fence_op(node, NULL, TRUE, reason, data_set);
131 
132  } else if (node->details->unclean) {
133  crm_trace("Cluster node %s %s because %s",
134  node->details->uname,
135  pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean",
136  reason);
137 
138  } else {
139  crm_warn("Cluster node %s %s: %s",
140  node->details->uname,
141  pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
142  reason);
143  node->details->unclean = TRUE;
144  pe_fence_op(node, NULL, TRUE, reason, data_set);
145  }
146 }
147 
148 // @TODO xpaths can't handle templates, rules, or id-refs
149 
150 // nvpair with provides or requires set to unfencing
151 #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
152  "[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'" \
153  "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
154  "and @" XML_NVPAIR_ATTR_VALUE "='unfencing']"
155 
156 // unfencing in rsc_defaults or any resource
157 #define XPATH_ENABLE_UNFENCING \
158  "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
159  "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
160  "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
161  "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
162 
163 static
164 void set_if_xpath(unsigned long long flag, const char *xpath,
165  pe_working_set_t *data_set)
166 {
167  xmlXPathObjectPtr result = NULL;
168 
169  if (is_not_set(data_set->flags, flag)) {
170  result = xpath_search(data_set->input, xpath);
171  if (result && (numXpathResults(result) > 0)) {
172  set_bit(data_set->flags, flag);
173  }
174  freeXpathObject(result);
175  }
176 }
177 
178 gboolean
179 unpack_config(xmlNode * config, pe_working_set_t * data_set)
180 {
181  const char *value = NULL;
182  GHashTable *config_hash = crm_str_table_new();
183 
184  data_set->config_hash = config_hash;
185 
186  pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, NULL, config_hash,
187  CIB_OPTIONS_FIRST, FALSE, data_set);
188 
189  verify_pe_options(data_set->config_hash);
190 
191  set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
192  if(is_not_set(data_set->flags, pe_flag_startup_probes)) {
193  crm_info("Startup probes: disabled (dangerous)");
194  }
195 
196  value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG);
197  if (value && crm_is_true(value)) {
198  crm_notice("Watchdog will be used via SBD if fencing is required "
199  "and stonith-watchdog-timeout is nonzero");
201  }
202 
203  /* Set certain flags via xpath here, so they can be used before the relevant
204  * configuration sections are unpacked.
205  */
206  set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set);
207 
208  value = pe_pref(data_set->config_hash, "stonith-timeout");
209  data_set->stonith_timeout = (int) crm_parse_interval_spec(value);
210  crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
211 
212  set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
213  crm_debug("STONITH of failed nodes is %s",
214  is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
215 
216  data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
217  if (!strcmp(data_set->stonith_action, "poweroff")) {
219  "Support for stonith-action of 'poweroff' is deprecated "
220  "and will be removed in a future release (use 'off' instead)");
221  data_set->stonith_action = "off";
222  }
223  crm_trace("STONITH will %s nodes", data_set->stonith_action);
224 
225  set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing);
226  crm_debug("Concurrent fencing is %s",
227  is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled");
228 
229  set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
230  crm_debug("Stop all active resources: %s",
231  is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false");
232 
233  set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
234  if (is_set(data_set->flags, pe_flag_symmetric_cluster)) {
235  crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
236  }
237 
238  value = pe_pref(data_set->config_hash, "no-quorum-policy");
239 
240  if (safe_str_eq(value, "ignore")) {
242 
243  } else if (safe_str_eq(value, "freeze")) {
245 
246  } else if (safe_str_eq(value, "demote")) {
248 
249  } else if (safe_str_eq(value, "suicide")) {
250  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
251  int do_panic = 0;
252 
254  &do_panic);
255  if (do_panic || is_set(data_set->flags, pe_flag_have_quorum)) {
257  } else {
258  crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
259  data_set->no_quorum_policy = no_quorum_stop;
260  }
261  } else {
262  crm_config_err("Resetting no-quorum-policy to 'stop': stonith is not configured");
263  data_set->no_quorum_policy = no_quorum_stop;
264  }
265 
266  } else {
267  data_set->no_quorum_policy = no_quorum_stop;
268  }
269 
270  switch (data_set->no_quorum_policy) {
271  case no_quorum_freeze:
272  crm_debug("On loss of quorum: Freeze resources");
273  break;
274  case no_quorum_stop:
275  crm_debug("On loss of quorum: Stop ALL resources");
276  break;
277  case no_quorum_demote:
278  crm_debug("On loss of quorum: "
279  "Demote promotable resources and stop other resources");
280  break;
281  case no_quorum_suicide:
282  crm_notice("On loss of quorum: Fence all remaining nodes");
283  break;
284  case no_quorum_ignore:
285  crm_notice("On loss of quorum: Ignore");
286  break;
287  }
288 
289  set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
290  crm_trace("Orphan resources are %s",
291  is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored");
292 
293  set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
294  crm_trace("Orphan resource actions are %s",
295  is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored");
296 
297  set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop);
298  crm_trace("Stopped resources are removed from the status section: %s",
299  is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false");
300 
301  set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
302  crm_trace("Maintenance mode: %s",
303  is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false");
304 
305  set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
306  crm_trace("Start failures are %s",
307  is_set(data_set->flags,
308  pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount");
309 
310  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
311  set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing);
312  }
313  if (is_set(data_set->flags, pe_flag_startup_fencing)) {
314  crm_trace("Unseen nodes will be fenced");
315  } else {
316  pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes");
317  }
318 
319  node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red"));
320  node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green"));
321  node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow"));
322 
323  crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
324  pe_pref(data_set->config_hash, "node-health-red"),
325  pe_pref(data_set->config_hash, "node-health-yellow"),
326  pe_pref(data_set->config_hash, "node-health-green"));
327 
328  data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
329  crm_trace("Placement strategy: %s", data_set->placement_strategy);
330 
331  return TRUE;
332 }
333 
334 static void
335 destroy_digest_cache(gpointer ptr)
336 {
337  op_digest_cache_t *data = ptr;
338 
339  free_xml(data->params_all);
340  free_xml(data->params_secure);
341  free_xml(data->params_restart);
342 
343  free(data->digest_all_calc);
344  free(data->digest_restart_calc);
345  free(data->digest_secure_calc);
346 
347  free(data);
348 }
349 
350 node_t *
351 pe_create_node(const char *id, const char *uname, const char *type,
352  const char *score, pe_working_set_t * data_set)
353 {
354  node_t *new_node = NULL;
355 
356  if (pe_find_node(data_set->nodes, uname) != NULL) {
357  crm_config_warn("Detected multiple node entries with uname=%s"
358  " - this is rarely intended", uname);
359  }
360 
361  new_node = calloc(1, sizeof(node_t));
362  if (new_node == NULL) {
363  return NULL;
364  }
365 
366  new_node->weight = char2score(score);
367  new_node->fixed = FALSE;
368  new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
369 
370  if (new_node->details == NULL) {
371  free(new_node);
372  return NULL;
373  }
374 
375  crm_trace("Creating node for entry %s/%s", uname, id);
376  new_node->details->id = id;
377  new_node->details->uname = uname;
378  new_node->details->online = FALSE;
379  new_node->details->shutdown = FALSE;
380  new_node->details->rsc_discovery_enabled = TRUE;
381  new_node->details->running_rsc = NULL;
382  new_node->details->type = node_ping;
383 
384  if (safe_str_eq(type, "remote")) {
385  new_node->details->type = node_remote;
387  } else if ((type == NULL) || safe_str_eq(type, "member")) {
388  new_node->details->type = node_member;
389  }
390 
391  new_node->details->attrs = crm_str_table_new();
392 
393  if (pe__is_guest_or_remote_node(new_node)) {
394  g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
395  strdup("remote"));
396  } else {
397  g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
398  strdup("cluster"));
399  }
400 
401  new_node->details->utilization = crm_str_table_new();
402 
403  new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash,
404  g_str_equal, free,
405  destroy_digest_cache);
406 
407  data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname);
408  return new_node;
409 }
410 
411 bool
412 remote_id_conflict(const char *remote_name, pe_working_set_t *data)
413 {
414  bool match = FALSE;
415 #if 1
416  pe_find_resource(data->resources, remote_name);
417 #else
418  if (data->name_check == NULL) {
419  data->name_check = g_hash_table_new(crm_str_hash, g_str_equal);
420  for (xml_rsc = __xml_first_child_element(parent); xml_rsc != NULL;
421  xml_rsc = __xml_next_element(xml_rsc)) {
422 
423  const char *id = ID(xml_rsc);
424 
425  /* avoiding heap allocation here because we know the duration of this hashtable allows us to */
426  g_hash_table_insert(data->name_check, (char *) id, (char *) id);
427  }
428  }
429  if (g_hash_table_lookup(data->name_check, remote_name)) {
430  match = TRUE;
431  }
432 #endif
433  if (match) {
434  crm_err("Invalid remote-node name, a resource called '%s' already exists.", remote_name);
435  return NULL;
436  }
437 
438  return match;
439 }
440 
441 
442 static const char *
443 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data)
444 {
445  xmlNode *attr_set = NULL;
446  xmlNode *attr = NULL;
447 
448  const char *container_id = ID(xml_obj);
449  const char *remote_name = NULL;
450  const char *remote_server = NULL;
451  const char *remote_port = NULL;
452  const char *connect_timeout = "60s";
453  const char *remote_allow_migrate=NULL;
454  const char *is_managed = NULL;
455 
456  for (attr_set = __xml_first_child_element(xml_obj); attr_set != NULL;
457  attr_set = __xml_next_element(attr_set)) {
458  if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) {
459  continue;
460  }
461 
462  for (attr = __xml_first_child_element(attr_set); attr != NULL;
463  attr = __xml_next_element(attr)) {
464  const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
465  const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
466 
468  remote_name = value;
469  } else if (safe_str_eq(name, "remote-addr")) {
470  remote_server = value;
471  } else if (safe_str_eq(name, "remote-port")) {
472  remote_port = value;
473  } else if (safe_str_eq(name, "remote-connect-timeout")) {
474  connect_timeout = value;
475  } else if (safe_str_eq(name, "remote-allow-migrate")) {
476  remote_allow_migrate=value;
477  } else if (safe_str_eq(name, XML_RSC_ATTR_MANAGED)) {
478  is_managed = value;
479  }
480  }
481  }
482 
483  if (remote_name == NULL) {
484  return NULL;
485  }
486 
487  if (remote_id_conflict(remote_name, data)) {
488  return NULL;
489  }
490 
491  pe_create_remote_xml(parent, remote_name, container_id,
492  remote_allow_migrate, is_managed,
493  connect_timeout, remote_server, remote_port);
494  return remote_name;
495 }
496 
497 static void
498 handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node)
499 {
500  if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
501  /* Ignore fencing for remote nodes that don't have a connection resource
502  * associated with them. This happens when remote node entries get left
503  * in the nodes section after the connection resource is removed.
504  */
505  return;
506  }
507 
508  if (is_set(data_set->flags, pe_flag_startup_fencing)) {
509  // All nodes are unclean until we've seen their status entry
510  new_node->details->unclean = TRUE;
511 
512  } else {
513  // Blind faith ...
514  new_node->details->unclean = FALSE;
515  }
516 
517  /* We need to be able to determine if a node's status section
518  * exists or not separate from whether the node is unclean. */
519  new_node->details->unseen = TRUE;
520 }
521 
522 gboolean
523 unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
524 {
525  xmlNode *xml_obj = NULL;
526  node_t *new_node = NULL;
527  const char *id = NULL;
528  const char *uname = NULL;
529  const char *type = NULL;
530  const char *score = NULL;
531 
532  for (xml_obj = __xml_first_child_element(xml_nodes); xml_obj != NULL;
533  xml_obj = __xml_next_element(xml_obj)) {
534 
535  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) {
536  new_node = NULL;
537 
538  id = crm_element_value(xml_obj, XML_ATTR_ID);
541  score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
542  crm_trace("Processing node %s/%s", uname, id);
543 
544  if (id == NULL) {
545  crm_config_err("Must specify id tag in <node>");
546  continue;
547  }
548  new_node = pe_create_node(id, uname, type, score, data_set);
549 
550  if (new_node == NULL) {
551  return FALSE;
552  }
553 
554 /* if(data_set->have_quorum == FALSE */
555 /* && data_set->no_quorum_policy == no_quorum_stop) { */
556 /* /\* start shutting resources down *\/ */
557 /* new_node->weight = -INFINITY; */
558 /* } */
559 
560  handle_startup_fencing(data_set, new_node);
561 
562  add_node_attrs(xml_obj, new_node, FALSE, data_set);
564  new_node->details->utilization, NULL,
565  FALSE, data_set);
566 
567  crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
568  }
569  }
570 
571  if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
572  crm_info("Creating a fake local node");
573  pe_create_node(data_set->localhost, data_set->localhost, NULL, 0,
574  data_set);
575  }
576 
577  return TRUE;
578 }
579 
580 static void
581 setup_container(resource_t * rsc, pe_working_set_t * data_set)
582 {
583  const char *container_id = NULL;
584 
585  if (rsc->children) {
586  GListPtr gIter = rsc->children;
587 
588  for (; gIter != NULL; gIter = gIter->next) {
589  resource_t *child_rsc = (resource_t *) gIter->data;
590 
591  setup_container(child_rsc, data_set);
592  }
593  return;
594  }
595 
596  container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
597  if (container_id && safe_str_neq(container_id, rsc->id)) {
598  resource_t *container = pe_find_resource(data_set->resources, container_id);
599 
600  if (container) {
601  rsc->container = container;
602  set_bit(container->flags, pe_rsc_is_container);
603  container->fillers = g_list_append(container->fillers, rsc);
604  pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
605  } else {
606  pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
607  }
608  }
609 }
610 
611 gboolean
612 unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
613 {
614  xmlNode *xml_obj = NULL;
615 
616  /* Create remote nodes and guest nodes from the resource configuration
617  * before unpacking resources.
618  */
619  for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL;
620  xml_obj = __xml_next_element(xml_obj)) {
621 
622  const char *new_node_id = NULL;
623 
624  /* Check for remote nodes, which are defined by ocf:pacemaker:remote
625  * primitives.
626  */
627  if (xml_contains_remote_node(xml_obj)) {
628  new_node_id = ID(xml_obj);
629  /* The "pe_find_node" check is here to make sure we don't iterate over
630  * an expanded node that has already been added to the node list. */
631  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
632  crm_trace("Found remote node %s defined by resource %s",
633  new_node_id, ID(xml_obj));
634  pe_create_node(new_node_id, new_node_id, "remote", NULL,
635  data_set);
636  }
637  continue;
638  }
639 
640  /* Check for guest nodes, which are defined by special meta-attributes
641  * of a primitive of any type (for example, VirtualDomain or Xen).
642  */
643  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) {
644  /* This will add an ocf:pacemaker:remote primitive to the
645  * configuration for the guest node's connection, to be unpacked
646  * later.
647  */
648  new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set);
649  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
650  crm_trace("Found guest node %s in resource %s",
651  new_node_id, ID(xml_obj));
652  pe_create_node(new_node_id, new_node_id, "remote", NULL,
653  data_set);
654  }
655  continue;
656  }
657 
658  /* Check for guest nodes inside a group. Clones are currently not
659  * supported as guest nodes.
660  */
661  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) {
662  xmlNode *xml_obj2 = NULL;
663  for (xml_obj2 = __xml_first_child_element(xml_obj); xml_obj2 != NULL;
664  xml_obj2 = __xml_next_element(xml_obj2)) {
665 
666  new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set);
667 
668  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
669  crm_trace("Found guest node %s in resource %s inside group %s",
670  new_node_id, ID(xml_obj2), ID(xml_obj));
671  pe_create_node(new_node_id, new_node_id, "remote", NULL,
672  data_set);
673  }
674  }
675  }
676  }
677  return TRUE;
678 }
679 
680 /* Call this after all the nodes and resources have been
681  * unpacked, but before the status section is read.
682  *
683  * A remote node's online status is reflected by the state
684  * of the remote node's connection resource. We need to link
685  * the remote node to this connection resource so we can have
686  * easy access to the connection resource during the PE calculations.
687  */
688 static void
689 link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc)
690 {
691  node_t *remote_node = NULL;
692 
693  if (new_rsc->is_remote_node == FALSE) {
694  return;
695  }
696 
697  if (is_set(data_set->flags, pe_flag_quick_location)) {
698  /* remote_nodes and remote_resources are not linked in quick location calculations */
699  return;
700  }
701 
702  remote_node = pe_find_node(data_set->nodes, new_rsc->id);
703  CRM_CHECK(remote_node != NULL, return;);
704 
705  pe_rsc_trace(new_rsc, "Linking remote connection resource %s to node %s",
706  new_rsc->id, remote_node->details->uname);
707  remote_node->details->remote_rsc = new_rsc;
708 
709  if (new_rsc->container == NULL) {
710  /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
711  * the same as is done for cluster nodes.
712  */
713  handle_startup_fencing(data_set, remote_node);
714 
715  } else {
716  /* pe_create_node() marks the new node as "remote" or "cluster"; now
717  * that we know the node is a guest node, update it correctly.
718  */
719  g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
720  strdup("container"));
721  }
722 }
723 
724 static void
725 destroy_tag(gpointer data)
726 {
727  tag_t *tag = data;
728 
729  if (tag) {
730  free(tag->id);
731  g_list_free_full(tag->refs, free);
732  free(tag);
733  }
734 }
735 
748 gboolean
749 unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
750 {
751  xmlNode *xml_obj = NULL;
752  GListPtr gIter = NULL;
753 
754  data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash,
755  g_str_equal, free,
756  destroy_tag);
757 
758  for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL;
759  xml_obj = __xml_next_element(xml_obj)) {
760 
761  resource_t *new_rsc = NULL;
762 
763  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) {
764  const char *template_id = ID(xml_obj);
765 
766  if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets,
767  template_id, NULL, NULL) == FALSE) {
768  /* Record the template's ID for the knowledge of its existence anyway. */
769  g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL);
770  }
771  continue;
772  }
773 
774  crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj));
775  if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) {
776  data_set->resources = g_list_append(data_set->resources, new_rsc);
777  pe_rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
778 
779  } else {
780  crm_config_err("Failed unpacking %s %s",
781  crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID));
782  if (new_rsc != NULL && new_rsc->fns != NULL) {
783  new_rsc->fns->free(new_rsc);
784  }
785  }
786  }
787 
788  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
789  resource_t *rsc = (resource_t *) gIter->data;
790 
791  setup_container(rsc, data_set);
792  link_rsc2remotenode(data_set, rsc);
793  }
794 
795  data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority);
796  if (is_set(data_set->flags, pe_flag_quick_location)) {
797  /* Ignore */
798 
799  } else if (is_set(data_set->flags, pe_flag_stonith_enabled)
800  && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
801 
802  crm_config_err("Resource start-up disabled since no STONITH resources have been defined");
803  crm_config_err("Either configure some or disable STONITH with the stonith-enabled option");
804  crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
805  }
806 
807  return TRUE;
808 }
809 
810 gboolean
811 unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
812 {
813  xmlNode *xml_tag = NULL;
814 
815  data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, free,
816  destroy_tag);
817 
818  for (xml_tag = __xml_first_child_element(xml_tags); xml_tag != NULL;
819  xml_tag = __xml_next_element(xml_tag)) {
820 
821  xmlNode *xml_obj_ref = NULL;
822  const char *tag_id = ID(xml_tag);
823 
824  if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) {
825  continue;
826  }
827 
828  if (tag_id == NULL) {
829  crm_config_err("Failed unpacking %s: %s should be specified",
830  crm_element_name(xml_tag), XML_ATTR_ID);
831  continue;
832  }
833 
834  for (xml_obj_ref = __xml_first_child_element(xml_tag); xml_obj_ref != NULL;
835  xml_obj_ref = __xml_next_element(xml_obj_ref)) {
836 
837  const char *obj_ref = ID(xml_obj_ref);
838 
839  if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) {
840  continue;
841  }
842 
843  if (obj_ref == NULL) {
844  crm_config_err("Failed unpacking %s for tag %s: %s should be specified",
845  crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID);
846  continue;
847  }
848 
849  if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
850  return FALSE;
851  }
852  }
853  }
854 
855  return TRUE;
856 }
857 
858 /* The ticket state section:
859  * "/cib/status/tickets/ticket_state" */
860 static gboolean
861 unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
862 {
863  const char *ticket_id = NULL;
864  const char *granted = NULL;
865  const char *last_granted = NULL;
866  const char *standby = NULL;
867  xmlAttrPtr xIter = NULL;
868 
869  ticket_t *ticket = NULL;
870 
871  ticket_id = ID(xml_ticket);
872  if (ticket_id == NULL || strlen(ticket_id) == 0) {
873  return FALSE;
874  }
875 
876  crm_trace("Processing ticket state for %s", ticket_id);
877 
878  ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
879  if (ticket == NULL) {
880  ticket = ticket_new(ticket_id, data_set);
881  if (ticket == NULL) {
882  return FALSE;
883  }
884  }
885 
886  for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
887  const char *prop_name = (const char *)xIter->name;
888  const char *prop_value = crm_element_value(xml_ticket, prop_name);
889 
890  if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) {
891  continue;
892  }
893  g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
894  }
895 
896  granted = g_hash_table_lookup(ticket->state, "granted");
897  if (granted && crm_is_true(granted)) {
898  ticket->granted = TRUE;
899  crm_info("We have ticket '%s'", ticket->id);
900  } else {
901  ticket->granted = FALSE;
902  crm_info("We do not have ticket '%s'", ticket->id);
903  }
904 
905  last_granted = g_hash_table_lookup(ticket->state, "last-granted");
906  if (last_granted) {
907  ticket->last_granted = crm_parse_int(last_granted, 0);
908  }
909 
910  standby = g_hash_table_lookup(ticket->state, "standby");
911  if (standby && crm_is_true(standby)) {
912  ticket->standby = TRUE;
913  if (ticket->granted) {
914  crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
915  }
916  } else {
917  ticket->standby = FALSE;
918  }
919 
920  crm_trace("Done with ticket state for %s", ticket_id);
921 
922  return TRUE;
923 }
924 
925 static gboolean
926 unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
927 {
928  xmlNode *xml_obj = NULL;
929 
930  for (xml_obj = __xml_first_child_element(xml_tickets); xml_obj != NULL;
931  xml_obj = __xml_next_element(xml_obj)) {
932 
933  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) {
934  continue;
935  }
936  unpack_ticket_state(xml_obj, data_set);
937  }
938 
939  return TRUE;
940 }
941 
942 static void
943 unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t * data_set)
944 {
945  const char *resource_discovery_enabled = NULL;
946  xmlNode *attrs = NULL;
947  resource_t *rsc = NULL;
948 
949  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
950  return;
951  }
952 
953  if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) {
954  return;
955  }
956  crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname);
957 
958  this_node->details->remote_maintenance =
960 
961  rsc = this_node->details->remote_rsc;
962  if (this_node->details->remote_requires_reset == FALSE) {
963  this_node->details->unclean = FALSE;
964  this_node->details->unseen = FALSE;
965  }
966  attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
967  add_node_attrs(attrs, this_node, TRUE, data_set);
968 
969  if (pe__shutdown_requested(this_node)) {
970  crm_info("Node %s is shutting down", this_node->details->uname);
971  this_node->details->shutdown = TRUE;
972  if (rsc) {
974  }
975  }
976 
977  if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
978  crm_info("Node %s is in standby-mode", this_node->details->uname);
979  this_node->details->standby = TRUE;
980  }
981 
982  if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
983  (rsc && !is_set(rsc->flags, pe_rsc_managed))) {
984  crm_info("Node %s is in maintenance-mode", this_node->details->uname);
985  this_node->details->maintenance = TRUE;
986  }
987 
988  resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
989  if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
990  if (pe__is_remote_node(this_node)
991  && is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
992  crm_warn("Ignoring %s attribute on remote node %s because stonith is disabled",
994  } else {
995  /* This is either a remote node with fencing enabled, or a guest
996  * node. We don't care whether fencing is enabled when fencing guest
997  * nodes, because they are "fenced" by recovering their containing
998  * resource.
999  */
1000  crm_info("Node %s has resource discovery disabled", this_node->details->uname);
1001  this_node->details->rsc_discovery_enabled = FALSE;
1002  }
1003  }
1004 }
1005 
1006 static bool
1007 unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
1008 {
1009  bool changed = false;
1010  xmlNode *lrm_rsc = NULL;
1011 
1012  for (xmlNode *state = __xml_first_child_element(status); state != NULL;
1013  state = __xml_next_element(state)) {
1014 
1015  const char *id = NULL;
1016  const char *uname = NULL;
1017  node_t *this_node = NULL;
1018  bool process = FALSE;
1019 
1020  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
1021  continue;
1022  }
1023 
1024  id = crm_element_value(state, XML_ATTR_ID);
1026  this_node = pe_find_node_any(data_set->nodes, id, uname);
1027 
1028  if (this_node == NULL) {
1029  crm_info("Node %s is unknown", id);
1030  continue;
1031 
1032  } else if (this_node->details->unpacked) {
1033  crm_info("Node %s is already processed", id);
1034  continue;
1035 
1036  } else if (!pe__is_guest_or_remote_node(this_node)
1037  && is_set(data_set->flags, pe_flag_stonith_enabled)) {
1038  // A redundant test, but preserves the order for regression tests
1039  process = TRUE;
1040 
1041  } else if (pe__is_guest_or_remote_node(this_node)) {
1042  bool check = FALSE;
1043  resource_t *rsc = this_node->details->remote_rsc;
1044 
1045  if(fence) {
1046  check = TRUE;
1047 
1048  } else if(rsc == NULL) {
1049  /* Not ready yet */
1050 
1051  } else if (pe__is_guest_node(this_node)
1052  && rsc->role == RSC_ROLE_STARTED
1053  && rsc->container->role == RSC_ROLE_STARTED) {
1054  /* Both the connection and its containing resource need to be
1055  * known to be up before we process resources running in it.
1056  */
1057  check = TRUE;
1058  crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED);
1059 
1060  } else if (!pe__is_guest_node(this_node)
1061  && rsc->role == RSC_ROLE_STARTED) {
1062  check = TRUE;
1063  crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED);
1064  }
1065 
1066  if (check) {
1067  determine_remote_online_status(data_set, this_node);
1068  unpack_handle_remote_attrs(this_node, state, data_set);
1069  process = TRUE;
1070  }
1071 
1072  } else if (this_node->details->online) {
1073  process = TRUE;
1074 
1075  } else if (fence) {
1076  process = TRUE;
1077  }
1078 
1079  if(process) {
1080  crm_trace("Processing lrm resource entries on %shealthy%s node: %s",
1081  fence?"un":"",
1082  (pe__is_guest_or_remote_node(this_node)? " remote" : ""),
1083  this_node->details->uname);
1084  changed = TRUE;
1085  this_node->details->unpacked = TRUE;
1086 
1087  lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
1088  lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
1089  unpack_lrm_resources(this_node, lrm_rsc, data_set);
1090  }
1091  }
1092  return changed;
1093 }
1094 
1095 /* remove nodes that are down, stopping */
1096 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
1097 /* anything else? */
1098 gboolean
1099 unpack_status(xmlNode * status, pe_working_set_t * data_set)
1100 {
1101  const char *id = NULL;
1102  const char *uname = NULL;
1103 
1104  xmlNode *state = NULL;
1105  node_t *this_node = NULL;
1106 
1107  crm_trace("Beginning unpack");
1108 
1109  if (data_set->tickets == NULL) {
1110  data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal,
1111  free, destroy_ticket);
1112  }
1113 
1114  for (state = __xml_first_child_element(status); state != NULL;
1115  state = __xml_next_element(state)) {
1116 
1117  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) {
1118  unpack_tickets_state((xmlNode *) state, data_set);
1119 
1120  } else if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) {
1121  xmlNode *attrs = NULL;
1122  const char *resource_discovery_enabled = NULL;
1123 
1124  id = crm_element_value(state, XML_ATTR_ID);
1126  this_node = pe_find_node_any(data_set->nodes, id, uname);
1127 
1128  if (uname == NULL) {
1129  /* error */
1130  continue;
1131 
1132  } else if (this_node == NULL) {
1133  crm_config_warn("Node %s in status section no longer exists", uname);
1134  continue;
1135 
1136  } else if (pe__is_guest_or_remote_node(this_node)) {
1137  /* online state for remote nodes is determined by the
1138  * rsc state after all the unpacking is done. we do however
1139  * need to mark whether or not the node has been fenced as this plays
1140  * a role during unpacking cluster node resource state */
1141  this_node->details->remote_was_fenced =
1143  continue;
1144  }
1145 
1146  crm_trace("Processing node id=%s, uname=%s", id, uname);
1147 
1148  /* Mark the node as provisionally clean
1149  * - at least we have seen it in the current cluster's lifetime
1150  */
1151  this_node->details->unclean = FALSE;
1152  this_node->details->unseen = FALSE;
1153  attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
1154  add_node_attrs(attrs, this_node, TRUE, data_set);
1155 
1156  if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
1157  crm_info("Node %s is in standby-mode", this_node->details->uname);
1158  this_node->details->standby = TRUE;
1159  }
1160 
1161  if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) {
1162  crm_info("Node %s is in maintenance-mode", this_node->details->uname);
1163  this_node->details->maintenance = TRUE;
1164  }
1165 
1166  resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
1167  if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
1168  crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes",
1170  }
1171 
1172  crm_trace("determining node state");
1173  determine_online_status(state, this_node, data_set);
1174 
1175  if (is_not_set(data_set->flags, pe_flag_have_quorum)
1176  && this_node->details->online
1177  && (data_set->no_quorum_policy == no_quorum_suicide)) {
1178  /* Everything else should flow from this automatically
1179  * At least until the PE becomes able to migrate off healthy resources
1180  */
1181  pe_fence_node(data_set, this_node, "cluster does not have quorum");
1182  }
1183  }
1184  }
1185 
1186 
1187  while(unpack_node_loop(status, FALSE, data_set)) {
1188  crm_trace("Start another loop");
1189  }
1190 
1191  // Now catch any nodes we didn't see
1192  unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set);
1193 
1194  /* Now that we know where resources are, we can schedule stops of containers
1195  * with failed bundle connections
1196  */
1197  if (data_set->stop_needed != NULL) {
1198  for (GList *item = data_set->stop_needed; item; item = item->next) {
1199  pe_resource_t *container = item->data;
1200  pe_node_t *node = pe__current_node(container);
1201 
1202  if (node) {
1203  stop_action(container, node, FALSE);
1204  }
1205  }
1206  g_list_free(data_set->stop_needed);
1207  data_set->stop_needed = NULL;
1208  }
1209 
1210  for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1211  node_t *this_node = gIter->data;
1212 
1213  if (this_node == NULL) {
1214  continue;
1215  } else if (!pe__is_guest_or_remote_node(this_node)) {
1216  continue;
1217  } else if(this_node->details->unpacked) {
1218  continue;
1219  }
1220  determine_remote_online_status(data_set, this_node);
1221  }
1222 
1223  return TRUE;
1224 }
1225 
1226 static gboolean
1227 determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state,
1228  node_t * this_node)
1229 {
1230  gboolean online = FALSE;
1231  const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
1232  const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
1233  const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
1234  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1235 
1236  if (!crm_is_true(in_cluster)) {
1237  crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster));
1238 
1239  } else if (safe_str_eq(is_peer, ONLINESTATUS)) {
1240  if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
1241  online = TRUE;
1242  } else {
1243  crm_debug("Node is not ready to run resources: %s", join);
1244  }
1245 
1246  } else if (this_node->details->expected_up == FALSE) {
1247  crm_trace("Controller is down: in_cluster=%s", crm_str(in_cluster));
1248  crm_trace("\tis_peer=%s, join=%s, expected=%s",
1249  crm_str(is_peer), crm_str(join), crm_str(exp_state));
1250 
1251  } else {
1252  /* mark it unclean */
1253  pe_fence_node(data_set, this_node, "peer is unexpectedly down");
1254  crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s",
1255  crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state));
1256  }
1257  return online;
1258 }
1259 
1260 static gboolean
1261 determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state,
1262  node_t * this_node)
1263 {
1264  gboolean online = FALSE;
1265  gboolean do_terminate = FALSE;
1266  bool crmd_online = FALSE;
1267  const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
1268  const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
1269  const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
1270  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1271  const char *terminate = pe_node_attribute_raw(this_node, "terminate");
1272 
1273 /*
1274  - XML_NODE_IN_CLUSTER ::= true|false
1275  - XML_NODE_IS_PEER ::= online|offline
1276  - XML_NODE_JOIN_STATE ::= member|down|pending|banned
1277  - XML_NODE_EXPECTED ::= member|down
1278 */
1279 
1280  if (crm_is_true(terminate)) {
1281  do_terminate = TRUE;
1282 
1283  } else if (terminate != NULL && strlen(terminate) > 0) {
1284  /* could be a time() value */
1285  char t = terminate[0];
1286 
1287  if (t != '0' && isdigit(t)) {
1288  do_terminate = TRUE;
1289  }
1290  }
1291 
1292  crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d",
1293  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
1294  crm_str(join), crm_str(exp_state), do_terminate);
1295 
1296  online = crm_is_true(in_cluster);
1297  crmd_online = safe_str_eq(is_peer, ONLINESTATUS);
1298  if (exp_state == NULL) {
1299  exp_state = CRMD_JOINSTATE_DOWN;
1300  }
1301 
1302  if (this_node->details->shutdown) {
1303  crm_debug("%s is shutting down", this_node->details->uname);
1304 
1305  /* Slightly different criteria since we can't shut down a dead peer */
1306  online = crmd_online;
1307 
1308  } else if (in_cluster == NULL) {
1309  pe_fence_node(data_set, this_node, "peer has not been seen by the cluster");
1310 
1311  } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) {
1312  pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria");
1313 
1314  } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) {
1315 
1316  if (crm_is_true(in_cluster) || crmd_online) {
1317  crm_info("- Node %s is not ready to run resources", this_node->details->uname);
1318  this_node->details->standby = TRUE;
1319  this_node->details->pending = TRUE;
1320 
1321  } else {
1322  crm_trace("%s is down or still coming up", this_node->details->uname);
1323  }
1324 
1325  } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN)
1326  && crm_is_true(in_cluster) == FALSE && !crmd_online) {
1327  crm_info("Node %s was just shot", this_node->details->uname);
1328  online = FALSE;
1329 
1330  } else if (crm_is_true(in_cluster) == FALSE) {
1331  pe_fence_node(data_set, this_node, "peer is no longer part of the cluster");
1332 
1333  } else if (!crmd_online) {
1334  pe_fence_node(data_set, this_node, "peer process is no longer available");
1335 
1336  /* Everything is running at this point, now check join state */
1337  } else if (do_terminate) {
1338  pe_fence_node(data_set, this_node, "termination was requested");
1339 
1340  } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
1341  crm_info("Node %s is active", this_node->details->uname);
1342 
1343  } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING)
1344  || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) {
1345  crm_info("Node %s is not ready to run resources", this_node->details->uname);
1346  this_node->details->standby = TRUE;
1347  this_node->details->pending = TRUE;
1348 
1349  } else {
1350  pe_fence_node(data_set, this_node, "peer was in an unknown state");
1351  crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d",
1352  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
1353  crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown);
1354  }
1355 
1356  return online;
1357 }
1358 
1359 static gboolean
1360 determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node)
1361 {
1362  resource_t *rsc = this_node->details->remote_rsc;
1363  resource_t *container = NULL;
1364  pe_node_t *host = NULL;
1365 
1366  /* If there is a node state entry for a (former) Pacemaker Remote node
1367  * but no resource creating that node, the node's connection resource will
1368  * be NULL. Consider it an offline remote node in that case.
1369  */
1370  if (rsc == NULL) {
1371  this_node->details->online = FALSE;
1372  goto remote_online_done;
1373  }
1374 
1375  container = rsc->container;
1376 
1377  if (container && (g_list_length(rsc->running_on) == 1)) {
1378  host = rsc->running_on->data;
1379  }
1380 
1381  /* If the resource is currently started, mark it online. */
1382  if (rsc->role == RSC_ROLE_STARTED) {
1383  crm_trace("%s node %s presumed ONLINE because connection resource is started",
1384  (container? "Guest" : "Remote"), this_node->details->id);
1385  this_node->details->online = TRUE;
1386  }
1387 
1388  /* consider this node shutting down if transitioning start->stop */
1389  if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
1390  crm_trace("%s node %s shutting down because connection resource is stopping",
1391  (container? "Guest" : "Remote"), this_node->details->id);
1392  this_node->details->shutdown = TRUE;
1393  }
1394 
1395  /* Now check all the failure conditions. */
1396  if(container && is_set(container->flags, pe_rsc_failed)) {
1397  crm_trace("Guest node %s UNCLEAN because guest resource failed",
1398  this_node->details->id);
1399  this_node->details->online = FALSE;
1400  this_node->details->remote_requires_reset = TRUE;
1401 
1402  } else if(is_set(rsc->flags, pe_rsc_failed)) {
1403  crm_trace("%s node %s OFFLINE because connection resource failed",
1404  (container? "Guest" : "Remote"), this_node->details->id);
1405  this_node->details->online = FALSE;
1406 
1407  } else if (rsc->role == RSC_ROLE_STOPPED
1408  || (container && container->role == RSC_ROLE_STOPPED)) {
1409 
1410  crm_trace("%s node %s OFFLINE because its resource is stopped",
1411  (container? "Guest" : "Remote"), this_node->details->id);
1412  this_node->details->online = FALSE;
1413  this_node->details->remote_requires_reset = FALSE;
1414 
1415  } else if (host && (host->details->online == FALSE)
1416  && host->details->unclean) {
1417  crm_trace("Guest node %s UNCLEAN because host is unclean",
1418  this_node->details->id);
1419  this_node->details->online = FALSE;
1420  this_node->details->remote_requires_reset = TRUE;
1421  }
1422 
1423 remote_online_done:
1424  crm_trace("Remote node %s online=%s",
1425  this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1426  return this_node->details->online;
1427 }
1428 
1429 gboolean
1430 determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set)
1431 {
1432  gboolean online = FALSE;
1433  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1434 
1435  if (this_node == NULL) {
1436  crm_config_err("No node to check");
1437  return online;
1438  }
1439 
1440  this_node->details->shutdown = FALSE;
1441  this_node->details->expected_up = FALSE;
1442 
1443  if (pe__shutdown_requested(this_node)) {
1444  this_node->details->shutdown = TRUE;
1445 
1446  } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
1447  this_node->details->expected_up = TRUE;
1448  }
1449 
1450  if (this_node->details->type == node_ping) {
1451  this_node->details->unclean = FALSE;
1452  online = FALSE; /* As far as resource management is concerned,
1453  * the node is safely offline.
1454  * Anyone caught abusing this logic will be shot
1455  */
1456 
1457  } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
1458  online = determine_online_status_no_fencing(data_set, node_state, this_node);
1459 
1460  } else {
1461  online = determine_online_status_fencing(data_set, node_state, this_node);
1462  }
1463 
1464  if (online) {
1465  this_node->details->online = TRUE;
1466 
1467  } else {
1468  /* remove node from contention */
1469  this_node->fixed = TRUE;
1470  this_node->weight = -INFINITY;
1471  }
1472 
1473  if (online && this_node->details->shutdown) {
1474  /* don't run resources here */
1475  this_node->fixed = TRUE;
1476  this_node->weight = -INFINITY;
1477  }
1478 
1479  if (this_node->details->type == node_ping) {
1480  crm_info("Node %s is not a pacemaker node", this_node->details->uname);
1481 
1482  } else if (this_node->details->unclean) {
1483  pe_proc_warn("Node %s is unclean", this_node->details->uname);
1484 
1485  } else if (this_node->details->online) {
1486  crm_info("Node %s is %s", this_node->details->uname,
1487  this_node->details->shutdown ? "shutting down" :
1488  this_node->details->pending ? "pending" :
1489  this_node->details->standby ? "standby" :
1490  this_node->details->maintenance ? "maintenance" : "online");
1491 
1492  } else {
1493  crm_trace("Node %s is offline", this_node->details->uname);
1494  }
1495 
1496  return online;
1497 }
1498 
1507 const char *
1508 pe_base_name_end(const char *id)
1509 {
1510  if (!crm_strlen_zero(id)) {
1511  const char *end = id + strlen(id) - 1;
1512 
1513  for (const char *s = end; s > id; --s) {
1514  switch (*s) {
1515  case '0':
1516  case '1':
1517  case '2':
1518  case '3':
1519  case '4':
1520  case '5':
1521  case '6':
1522  case '7':
1523  case '8':
1524  case '9':
1525  break;
1526  case ':':
1527  return (s == end)? s : (s - 1);
1528  default:
1529  return end;
1530  }
1531  }
1532  return end;
1533  }
1534  return NULL;
1535 }
1536 
1547 char *
1548 clone_strip(const char *last_rsc_id)
1549 {
1550  const char *end = pe_base_name_end(last_rsc_id);
1551  char *basename = NULL;
1552 
1553  CRM_ASSERT(end);
1554  basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1555  CRM_ASSERT(basename);
1556  return basename;
1557 }
1558 
1569 char *
1570 clone_zero(const char *last_rsc_id)
1571 {
1572  const char *end = pe_base_name_end(last_rsc_id);
1573  size_t base_name_len = end - last_rsc_id + 1;
1574  char *zero = NULL;
1575 
1576  CRM_ASSERT(end);
1577  zero = calloc(base_name_len + 3, sizeof(char));
1578  CRM_ASSERT(zero);
1579  memcpy(zero, last_rsc_id, base_name_len);
1580  zero[base_name_len] = ':';
1581  zero[base_name_len + 1] = '0';
1582  return zero;
1583 }
1584 
1585 static resource_t *
1586 create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set)
1587 {
1588  resource_t *rsc = NULL;
1589  xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
1590 
1591  copy_in_properties(xml_rsc, rsc_entry);
1592  crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
1593  crm_log_xml_debug(xml_rsc, "Orphan resource");
1594 
1595  if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) {
1596  return NULL;
1597  }
1598 
1599  if (xml_contains_remote_node(xml_rsc)) {
1600  node_t *node;
1601 
1602  crm_debug("Detected orphaned remote node %s", rsc_id);
1603  node = pe_find_node(data_set->nodes, rsc_id);
1604  if (node == NULL) {
1605  node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set);
1606  }
1607  link_rsc2remotenode(data_set, rsc);
1608 
1609  if (node) {
1610  crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
1611  node->details->shutdown = TRUE;
1612  }
1613  }
1614 
1615  if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
1616  /* This orphaned rsc needs to be mapped to a container. */
1617  crm_trace("Detected orphaned container filler %s", rsc_id);
1619  }
1620  set_bit(rsc->flags, pe_rsc_orphan);
1621  data_set->resources = g_list_append(data_set->resources, rsc);
1622  return rsc;
1623 }
1624 
1629 static pe_resource_t *
1630 create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id,
1631  pe_node_t *node, pe_working_set_t *data_set)
1632 {
1633  pe_resource_t *top = pe__create_clone_child(parent, data_set);
1634 
1635  // find_rsc() because we might be a cloned group
1636  pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
1637 
1638  pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
1639  top->id, parent->id, rsc_id, node->details->uname);
1640  return orphan;
1641 }
1642 
1657 static resource_t *
1658 find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent,
1659  const char *rsc_id)
1660 {
1661  GListPtr rIter = NULL;
1662  pe_resource_t *rsc = NULL;
1663  pe_resource_t *inactive_instance = NULL;
1664  gboolean skip_inactive = FALSE;
1665 
1666  CRM_ASSERT(parent != NULL);
1667  CRM_ASSERT(pe_rsc_is_clone(parent));
1668  CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique));
1669 
1670  // Check for active (or partially active, for cloned groups) instance
1671  pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id);
1672  for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
1673  GListPtr locations = NULL;
1674  resource_t *child = rIter->data;
1675 
1676  /* Check whether this instance is already known to be active or pending
1677  * anywhere, at this stage of unpacking. Because this function is called
1678  * for a resource before the resource's individual operation history
1679  * entries are unpacked, locations will generally not contain the
1680  * desired node.
1681  *
1682  * However, there are three exceptions:
1683  * (1) when child is a cloned group and we have already unpacked the
1684  * history of another member of the group on the same node;
1685  * (2) when we've already unpacked the history of another numbered
1686  * instance on the same node (which can happen if globally-unique
1687  * was flipped from true to false); and
1688  * (3) when we re-run calculations on the same data set as part of a
1689  * simulation.
1690  */
1691  child->fns->location(child, &locations, 2);
1692  if (locations) {
1693  /* We should never associate the same numbered anonymous clone
1694  * instance with multiple nodes, and clone instances can't migrate,
1695  * so there must be only one location, regardless of history.
1696  */
1697  CRM_LOG_ASSERT(locations->next == NULL);
1698 
1699  if (((pe_node_t *)locations->data)->details == node->details) {
1700  /* This child instance is active on the requested node, so check
1701  * for a corresponding configured resource. We use find_rsc()
1702  * instead of child because child may be a cloned group, and we
1703  * need the particular member corresponding to rsc_id.
1704  *
1705  * If the history entry is orphaned, rsc will be NULL.
1706  */
1707  rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
1708  if (rsc) {
1709  /* If there are multiple instance history entries for an
1710  * anonymous clone in a single node's history (which can
1711  * happen if globally-unique is switched from true to
1712  * false), we want to consider the instances beyond the
1713  * first as orphans, even if there are inactive instance
1714  * numbers available.
1715  */
1716  if (rsc->running_on) {
1717  crm_notice("Active (now-)anonymous clone %s has "
1718  "multiple (orphan) instance histories on %s",
1719  parent->id, node->details->uname);
1720  skip_inactive = TRUE;
1721  rsc = NULL;
1722  } else {
1723  pe_rsc_trace(parent, "Resource %s, active", rsc->id);
1724  }
1725  }
1726  }
1727  g_list_free(locations);
1728 
1729  } else {
1730  pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
1731  if (!skip_inactive && !inactive_instance
1732  && is_not_set(child->flags, pe_rsc_block)) {
1733  // Remember one inactive instance in case we don't find active
1734  inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
1735  pe_find_clone);
1736 
1737  /* ... but don't use it if it was already associated with a
1738  * pending action on another node
1739  */
1740  if (inactive_instance && inactive_instance->pending_node
1741  && (inactive_instance->pending_node->details != node->details)) {
1742  inactive_instance = NULL;
1743  }
1744  }
1745  }
1746  }
1747 
1748  if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
1749  pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
1750  rsc = inactive_instance;
1751  }
1752 
1753  /* If the resource has "requires" set to "quorum" or "nothing", and we don't
1754  * have a clone instance for every node, we don't want to consume a valid
1755  * instance number for unclean nodes. Such instances may appear to be active
1756  * according to the history, but should be considered inactive, so we can
1757  * start an instance elsewhere. Treat such instances as orphans.
1758  *
1759  * An exception is instances running on guest nodes -- since guest node
1760  * "fencing" is actually just a resource stop, requires shouldn't apply.
1761  *
1762  * @TODO Ideally, we'd use an inactive instance number if it is not needed
1763  * for any clean instances. However, we don't know that at this point.
1764  */
1765  if ((rsc != NULL) && is_not_set(rsc->flags, pe_rsc_needs_fencing)
1766  && (!node->details->online || node->details->unclean)
1767  && !pe__is_guest_node(node)
1768  && !pe__is_universal_clone(parent, data_set)) {
1769 
1770  rsc = NULL;
1771  }
1772 
1773  if (rsc == NULL) {
1774  rsc = create_anonymous_orphan(parent, rsc_id, node, data_set);
1775  pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
1776  }
1777  return rsc;
1778 }
1779 
1780 static resource_t *
1781 unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id,
1782  xmlNode * rsc_entry)
1783 {
1784  resource_t *rsc = NULL;
1785  resource_t *parent = NULL;
1786 
1787  crm_trace("looking for %s", rsc_id);
1788  rsc = pe_find_resource(data_set->resources, rsc_id);
1789 
1790  if (rsc == NULL) {
1791  /* If we didn't find the resource by its name in the operation history,
1792  * check it again as a clone instance. Even when clone-max=0, we create
1793  * a single :0 orphan to match against here.
1794  */
1795  char *clone0_id = clone_zero(rsc_id);
1796  resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id);
1797 
1798  if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) {
1799  rsc = clone0;
1800  parent = uber_parent(clone0);
1801  crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
1802  } else {
1803  crm_trace("%s is not known as %s either (orphan)",
1804  rsc_id, clone0_id);
1805  }
1806  free(clone0_id);
1807 
1808  } else if (rsc->variant > pe_native) {
1809  crm_trace("Resource history for %s is orphaned because it is no longer primitive",
1810  rsc_id);
1811  return NULL;
1812 
1813  } else {
1814  parent = uber_parent(rsc);
1815  }
1816 
1817  if (pe_rsc_is_anon_clone(parent)) {
1818 
1819  if (pe_rsc_is_bundled(parent)) {
1820  rsc = pe__find_bundle_replica(parent->parent, node);
1821  } else {
1822  char *base = clone_strip(rsc_id);
1823 
1824  rsc = find_anonymous_clone(data_set, node, parent, base);
1825  free(base);
1826  CRM_ASSERT(rsc != NULL);
1827  }
1828  }
1829 
1830  if (rsc && safe_str_neq(rsc_id, rsc->id)
1831  && safe_str_neq(rsc_id, rsc->clone_name)) {
1832 
1833  free(rsc->clone_name);
1834  rsc->clone_name = strdup(rsc_id);
1835  pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
1836  rsc_id, node->details->uname, rsc->id,
1837  (is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : ""));
1838  }
1839  return rsc;
1840 }
1841 
1842 static resource_t *
1843 process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set)
1844 {
1845  resource_t *rsc = NULL;
1846  const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
1847 
1848  crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname);
1849  rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
1850 
1851  if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
1853 
1854  } else {
1855  CRM_CHECK(rsc != NULL, return NULL);
1856  pe_rsc_trace(rsc, "Added orphan %s", rsc->id);
1857  resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__", data_set);
1858  }
1859  return rsc;
1860 }
1861 
1862 static void
1863 process_rsc_state(resource_t * rsc, node_t * node,
1864  enum action_fail_response on_fail,
1865  xmlNode * migrate_op, pe_working_set_t * data_set)
1866 {
1867  node_t *tmpnode = NULL;
1868  char *reason = NULL;
1869 
1870  CRM_ASSERT(rsc);
1871  pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
1872  rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail));
1873 
1874  /* process current state */
1875  if (rsc->role != RSC_ROLE_UNKNOWN) {
1876  resource_t *iter = rsc;
1877 
1878  while (iter) {
1879  if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
1880  node_t *n = node_copy(node);
1881 
1882  pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name,
1883  n->details->uname);
1884  g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
1885  }
1886  if (is_set(iter->flags, pe_rsc_unique)) {
1887  break;
1888  }
1889  iter = iter->parent;
1890  }
1891  }
1892 
1893  /* If a managed resource is believed to be running, but node is down ... */
1894  if (rsc->role > RSC_ROLE_STOPPED
1895  && node->details->online == FALSE
1896  && node->details->maintenance == FALSE
1897  && is_set(rsc->flags, pe_rsc_managed)) {
1898 
1899  gboolean should_fence = FALSE;
1900 
1901  /* If this is a guest node, fence it (regardless of whether fencing is
1902  * enabled, because guest node fencing is done by recovery of the
1903  * container resource rather than by the fencer). Mark the resource
1904  * we're processing as failed. When the guest comes back up, its
1905  * operation history in the CIB will be cleared, freeing the affected
1906  * resource to run again once we are sure we know its state.
1907  */
1908  if (pe__is_guest_node(node)) {
1909  set_bit(rsc->flags, pe_rsc_failed);
1910  set_bit(rsc->flags, pe_rsc_stop);
1911  should_fence = TRUE;
1912 
1913  } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
1914  if (pe__is_remote_node(node) && node->details->remote_rsc
1915  && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
1916 
1917  /* Setting unseen means that fencing of the remote node will
1918  * occur only if the connection resource is not going to start
1919  * somewhere. This allows connection resources on a failed
1920  * cluster node to move to another node without requiring the
1921  * remote nodes to be fenced as well.
1922  */
1923  node->details->unseen = TRUE;
1924  reason = crm_strdup_printf("%s is active there (fencing will be"
1925  " revoked if remote connection can "
1926  "be re-established elsewhere)",
1927  rsc->id);
1928  }
1929  should_fence = TRUE;
1930  }
1931 
1932  if (should_fence) {
1933  if (reason == NULL) {
1934  reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
1935  }
1936  pe_fence_node(data_set, node, reason);
1937  }
1938  free(reason);
1939  }
1940 
1941  if (node->details->unclean) {
1942  /* No extra processing needed
1943  * Also allows resources to be started again after a node is shot
1944  */
1945  on_fail = action_fail_ignore;
1946  }
1947 
1948  switch (on_fail) {
1949  case action_fail_ignore:
1950  /* nothing to do */
1951  break;
1952 
1953  case action_fail_demote:
1954  set_bit(rsc->flags, pe_rsc_failed);
1955  demote_action(rsc, node, FALSE);
1956  break;
1957 
1958  case action_fail_fence:
1959  /* treat it as if it is still running
1960  * but also mark the node as unclean
1961  */
1962  reason = crm_strdup_printf("%s failed there", rsc->id);
1963  pe_fence_node(data_set, node, reason);
1964  free(reason);
1965  break;
1966 
1967  case action_fail_standby:
1968  node->details->standby = TRUE;
1969  node->details->standby_onfail = TRUE;
1970  break;
1971 
1972  case action_fail_block:
1973  /* is_managed == FALSE will prevent any
1974  * actions being sent for the resource
1975  */
1977  set_bit(rsc->flags, pe_rsc_block);
1978  break;
1979 
1980  case action_fail_migrate:
1981  /* make sure it comes up somewhere else
1982  * or not at all
1983  */
1984  resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set);
1985  break;
1986 
1987  case action_fail_stop:
1988  rsc->next_role = RSC_ROLE_STOPPED;
1989  break;
1990 
1991  case action_fail_recover:
1992  if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
1993  set_bit(rsc->flags, pe_rsc_failed);
1994  set_bit(rsc->flags, pe_rsc_stop);
1995  stop_action(rsc, node, FALSE);
1996  }
1997  break;
1998 
2000  set_bit(rsc->flags, pe_rsc_failed);
2001  set_bit(rsc->flags, pe_rsc_stop);
2002 
2003  if (rsc->container && pe_rsc_is_bundled(rsc)) {
2004  /* A bundle's remote connection can run on a different node than
2005  * the bundle's container. We don't necessarily know where the
2006  * container is running yet, so remember it and add a stop
2007  * action for it later.
2008  */
2009  data_set->stop_needed = g_list_prepend(data_set->stop_needed,
2010  rsc->container);
2011  } else if (rsc->container) {
2012  stop_action(rsc->container, node, FALSE);
2013  } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
2014  stop_action(rsc, node, FALSE);
2015  }
2016  break;
2017 
2019  set_bit(rsc->flags, pe_rsc_failed);
2020  set_bit(rsc->flags, pe_rsc_stop);
2021  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
2022  tmpnode = NULL;
2023  if (rsc->is_remote_node) {
2024  tmpnode = pe_find_node(data_set->nodes, rsc->id);
2025  }
2026  if (tmpnode &&
2027  pe__is_remote_node(tmpnode) &&
2028  tmpnode->details->remote_was_fenced == 0) {
2029 
2030  /* The remote connection resource failed in a way that
2031  * should result in fencing the remote node.
2032  */
2033  pe_fence_node(data_set, tmpnode,
2034  "remote connection is unrecoverable");
2035  }
2036  }
2037 
2038  /* require the stop action regardless if fencing is occurring or not. */
2039  if (rsc->role > RSC_ROLE_STOPPED) {
2040  stop_action(rsc, node, FALSE);
2041  }
2042 
2043  /* if reconnect delay is in use, prevent the connection from exiting the
2044  * "STOPPED" role until the failure is cleared by the delay timeout. */
2045  if (rsc->remote_reconnect_ms) {
2046  rsc->next_role = RSC_ROLE_STOPPED;
2047  }
2048  break;
2049  }
2050 
2051  /* ensure a remote-node connection failure forces an unclean remote-node
2052  * to be fenced. By setting unseen = FALSE, the remote-node failure will
2053  * result in a fencing operation regardless if we're going to attempt to
2054  * reconnect to the remote-node in this transition or not. */
2055  if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
2056  tmpnode = pe_find_node(data_set->nodes, rsc->id);
2057  if (tmpnode && tmpnode->details->unclean) {
2058  tmpnode->details->unseen = FALSE;
2059  }
2060  }
2061 
2062  if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
2063  if (is_set(rsc->flags, pe_rsc_orphan)) {
2064  if (is_set(rsc->flags, pe_rsc_managed)) {
2065  crm_config_warn("Detected active orphan %s running on %s",
2066  rsc->id, node->details->uname);
2067  } else {
2068  crm_config_warn("Cluster configured not to stop active orphans."
2069  " %s must be stopped manually on %s",
2070  rsc->id, node->details->uname);
2071  }
2072  }
2073 
2074  native_add_running(rsc, node, data_set);
2075  switch (on_fail) {
2076  case action_fail_ignore:
2077  break;
2078  case action_fail_demote:
2079  case action_fail_block:
2080  set_bit(rsc->flags, pe_rsc_failed);
2081  break;
2082  default:
2083  set_bit(rsc->flags, pe_rsc_failed);
2084  set_bit(rsc->flags, pe_rsc_stop);
2085  break;
2086  }
2087 
2088  } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2089  /* Only do this for older status sections that included instance numbers
2090  * Otherwise stopped instances will appear as orphans
2091  */
2092  pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
2093  free(rsc->clone_name);
2094  rsc->clone_name = NULL;
2095 
2096  } else {
2097  GList *possible_matches = pe__resource_actions(rsc, node, RSC_STOP,
2098  FALSE);
2099  GListPtr gIter = possible_matches;
2100 
2101  for (; gIter != NULL; gIter = gIter->next) {
2102  action_t *stop = (action_t *) gIter->data;
2103 
2104  stop->flags |= pe_action_optional;
2105  }
2106 
2107  g_list_free(possible_matches);
2108  }
2109 }
2110 
2111 /* create active recurring operations as optional */
2112 static void
2113 process_recurring(node_t * node, resource_t * rsc,
2114  int start_index, int stop_index,
2115  GListPtr sorted_op_list, pe_working_set_t * data_set)
2116 {
2117  int counter = -1;
2118  const char *task = NULL;
2119  const char *status = NULL;
2120  GListPtr gIter = sorted_op_list;
2121 
2122  CRM_ASSERT(rsc);
2123  pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
2124 
2125  for (; gIter != NULL; gIter = gIter->next) {
2126  xmlNode *rsc_op = (xmlNode *) gIter->data;
2127 
2128  guint interval_ms = 0;
2129  char *key = NULL;
2130  const char *id = ID(rsc_op);
2131  const char *interval_ms_s = NULL;
2132 
2133  counter++;
2134 
2135  if (node->details->online == FALSE) {
2136  pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname);
2137  break;
2138 
2139  /* Need to check if there's a monitor for role="Stopped" */
2140  } else if (start_index < stop_index && counter <= stop_index) {
2141  pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname);
2142  continue;
2143 
2144  } else if (counter < start_index) {
2145  pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter);
2146  continue;
2147  }
2148 
2149  interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS);
2150  interval_ms = crm_parse_ms(interval_ms_s);
2151  if (interval_ms == 0) {
2152  pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname);
2153  continue;
2154  }
2155 
2156  status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2157  if (safe_str_eq(status, "-1")) {
2158  pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname);
2159  continue;
2160  }
2161  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2162  /* create the action */
2163  key = generate_op_key(rsc->id, task, interval_ms);
2164  pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname);
2165  custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
2166  }
2167 }
2168 
2169 void
2170 calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
2171 {
2172  int counter = -1;
2173  int implied_monitor_start = -1;
2174  int implied_clone_start = -1;
2175  const char *task = NULL;
2176  const char *status = NULL;
2177  GListPtr gIter = sorted_op_list;
2178 
2179  *stop_index = -1;
2180  *start_index = -1;
2181 
2182  for (; gIter != NULL; gIter = gIter->next) {
2183  xmlNode *rsc_op = (xmlNode *) gIter->data;
2184 
2185  counter++;
2186 
2187  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2188  status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2189 
2190  if (safe_str_eq(task, CRMD_ACTION_STOP)
2191  && safe_str_eq(status, "0")) {
2192  *stop_index = counter;
2193 
2194  } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2195  *start_index = counter;
2196 
2197  } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2198  const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
2199 
2200  if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) {
2201  implied_monitor_start = counter;
2202  }
2203  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
2204  implied_clone_start = counter;
2205  }
2206  }
2207 
2208  if (*start_index == -1) {
2209  if (implied_clone_start != -1) {
2210  *start_index = implied_clone_start;
2211  } else if (implied_monitor_start != -1) {
2212  *start_index = implied_monitor_start;
2213  }
2214  }
2215 }
2216 
2217 static resource_t *
2218 unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
2219 {
2220  GListPtr gIter = NULL;
2221  int stop_index = -1;
2222  int start_index = -1;
2223  enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
2224 
2225  const char *task = NULL;
2226  const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2227 
2228  resource_t *rsc = NULL;
2229  GListPtr op_list = NULL;
2230  GListPtr sorted_op_list = NULL;
2231 
2232  xmlNode *migrate_op = NULL;
2233  xmlNode *rsc_op = NULL;
2234  xmlNode *last_failure = NULL;
2235 
2237  enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
2238 
2239  crm_trace("[%s] Processing %s on %s",
2240  crm_element_name(rsc_entry), rsc_id, node->details->uname);
2241 
2242  /* extract operations */
2243  op_list = NULL;
2244  sorted_op_list = NULL;
2245 
2246  for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL;
2247  rsc_op = __xml_next_element(rsc_op)) {
2248  if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
2249  op_list = g_list_prepend(op_list, rsc_op);
2250  }
2251  }
2252 
2253  if (op_list == NULL) {
2254  /* if there are no operations, there is nothing to do */
2255  return NULL;
2256  }
2257 
2258  /* find the resource */
2259  rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
2260  if (rsc == NULL) {
2261  rsc = process_orphan_resource(rsc_entry, node, data_set);
2262  }
2263  CRM_ASSERT(rsc != NULL);
2264 
2265  /* process operations */
2266  saved_role = rsc->role;
2267  rsc->role = RSC_ROLE_UNKNOWN;
2268  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2269 
2270  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2271  xmlNode *rsc_op = (xmlNode *) gIter->data;
2272 
2273  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2274  if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2275  migrate_op = rsc_op;
2276  }
2277 
2278  unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set);
2279  }
2280 
2281  /* create active recurring operations as optional */
2282  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2283  process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
2284 
2285  /* no need to free the contents */
2286  g_list_free(sorted_op_list);
2287 
2288  process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
2289 
2290  if (get_target_role(rsc, &req_role)) {
2291  if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
2292  pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s"
2293  " with requested next role %s",
2294  rsc->id, role2text(rsc->next_role), role2text(req_role));
2295  rsc->next_role = req_role;
2296 
2297  } else if (req_role > rsc->next_role) {
2298  pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
2299  " with requested next role %s",
2300  rsc->id, role2text(rsc->next_role), role2text(req_role));
2301  }
2302  }
2303 
2304  if (saved_role > rsc->role) {
2305  rsc->role = saved_role;
2306  }
2307 
2308  return rsc;
2309 }
2310 
2311 static void
2312 handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
2313 {
2314  xmlNode *rsc_entry = NULL;
2315  for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL;
2316  rsc_entry = __xml_next_element(rsc_entry)) {
2317 
2318  resource_t *rsc;
2319  resource_t *container;
2320  const char *rsc_id;
2321  const char *container_id;
2322 
2323  if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) {
2324  continue;
2325  }
2326 
2327  container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
2328  rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2329  if (container_id == NULL || rsc_id == NULL) {
2330  continue;
2331  }
2332 
2333  container = pe_find_resource(data_set->resources, container_id);
2334  if (container == NULL) {
2335  continue;
2336  }
2337 
2338  rsc = pe_find_resource(data_set->resources, rsc_id);
2339  if (rsc == NULL ||
2340  is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE ||
2341  rsc->container != NULL) {
2342  continue;
2343  }
2344 
2345  pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
2346  rsc->id, container_id);
2347  rsc->container = container;
2348  container->fillers = g_list_append(container->fillers, rsc);
2349  }
2350 }
2351 
2352 gboolean
2353 unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
2354 {
2355  xmlNode *rsc_entry = NULL;
2356  gboolean found_orphaned_container_filler = FALSE;
2357 
2358  CRM_CHECK(node != NULL, return FALSE);
2359 
2360  crm_trace("Unpacking resources on %s", node->details->uname);
2361 
2362  for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL;
2363  rsc_entry = __xml_next_element(rsc_entry)) {
2364 
2365  if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
2366  resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set);
2367  if (!rsc) {
2368  continue;
2369  }
2370  if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
2371  found_orphaned_container_filler = TRUE;
2372  }
2373  }
2374  }
2375 
2376  /* now that all the resource state has been unpacked for this node
2377  * we have to go back and map any orphaned container fillers to their
2378  * container resource */
2379  if (found_orphaned_container_filler) {
2380  handle_orphaned_container_fillers(lrm_rsc_list, data_set);
2381  }
2382  return TRUE;
2383 }
2384 
2385 static void
2386 set_active(resource_t * rsc)
2387 {
2388  resource_t *top = uber_parent(rsc);
2389 
2390  if (top && is_set(top->flags, pe_rsc_promotable)) {
2391  rsc->role = RSC_ROLE_SLAVE;
2392  } else {
2393  rsc->role = RSC_ROLE_STARTED;
2394  }
2395 }
2396 
2397 static void
2398 set_node_score(gpointer key, gpointer value, gpointer user_data)
2399 {
2400  node_t *node = value;
2401  int *score = user_data;
2402 
2403  node->weight = *score;
2404 }
2405 
2406 #define STATUS_PATH_MAX 1024
2407 static xmlNode *
2408 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2409  bool success_only, pe_working_set_t *data_set)
2410 {
2411  int offset = 0;
2412  char xpath[STATUS_PATH_MAX];
2413  xmlNode *xml = NULL;
2414 
2415  offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node);
2416  offset +=
2417  snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']",
2418  resource);
2419 
2420  /* Need to check against transition_magic too? */
2421  if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) {
2422  offset +=
2423  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2424  "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op,
2425  source);
2426  } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) {
2427  offset +=
2428  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2429  "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op,
2430  source);
2431  } else {
2432  offset +=
2433  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2434  "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op);
2435  }
2436 
2437  CRM_LOG_ASSERT(offset > 0);
2438  xml = get_xpath_object(xpath, data_set->input, LOG_DEBUG);
2439 
2440  if (xml && success_only) {
2441  int rc = PCMK_OCF_UNKNOWN_ERROR;
2442  int status = PCMK_LRM_OP_ERROR;
2443 
2446  if ((rc != PCMK_OCF_OK) || (status != PCMK_LRM_OP_DONE)) {
2447  return NULL;
2448  }
2449  }
2450  return xml;
2451 }
2452 
2453 static int
2454 pe__call_id(xmlNode *op_xml)
2455 {
2456  int id = 0;
2457 
2458  if (op_xml) {
2460  }
2461  return id;
2462 }
2463 
2480 static bool
2481 stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
2482  pe_working_set_t *data_set)
2483 {
2484  xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP,
2485  node->details->uname, NULL, TRUE, data_set);
2486 
2487  return (stop_op && (pe__call_id(stop_op) > pe__call_id(xml_op)));
2488 }
2489 
2490 static void
2491 unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
2492  pe_working_set_t *data_set)
2493 {
2494  /* A successful migration sequence is:
2495  * migrate_to on source node
2496  * migrate_from on target node
2497  * stop on source node
2498  *
2499  * If a migrate_to is followed by a stop, the entire migration (successful
2500  * or failed) is complete, and we don't care what happened on the target.
2501  *
2502  * If no migrate_from has happened, the migration is considered to be
2503  * "partial". If the migrate_from failed, make sure the resource gets
2504  * stopped on both source and target (if up).
2505  *
2506  * If the migrate_to and migrate_from both succeeded (which also implies the
2507  * resource is no longer running on the source), but there is no stop, the
2508  * migration is considered to be "dangling". Schedule a stop on the source
2509  * in this case.
2510  */
2511  int from_rc = 0;
2512  int from_status = 0;
2513  pe_node_t *target_node = NULL;
2514  pe_node_t *source_node = NULL;
2515  xmlNode *migrate_from = NULL;
2516  const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2517  const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2518 
2519  // Sanity check
2520  CRM_CHECK(source && target && !strcmp(source, node->details->uname), return);
2521 
2522  if (stop_happened_after(rsc, node, xml_op, data_set)) {
2523  return;
2524  }
2525 
2526  // Clones are not allowed to migrate, so role can't be master
2527  rsc->role = RSC_ROLE_STARTED;
2528 
2529  target_node = pe_find_node(data_set->nodes, target);
2530  source_node = pe_find_node(data_set->nodes, source);
2531 
2532  // Check whether there was a migrate_from action on the target
2533  migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target,
2534  source, FALSE, data_set);
2535  if (migrate_from) {
2536  crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
2537  crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
2538  pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
2539  ID(migrate_from), target, from_status, from_rc);
2540  }
2541 
2542  if (migrate_from && from_rc == PCMK_OCF_OK
2543  && from_status == PCMK_LRM_OP_DONE) {
2544  /* The migrate_to and migrate_from both succeeded, so mark the migration
2545  * as "dangling". This will be used to schedule a stop action on the
2546  * source without affecting the target.
2547  */
2548  pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
2549  source);
2550  rsc->role = RSC_ROLE_STOPPED;
2551  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
2552 
2553  } else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed
2554  if (target_node && target_node->details->online) {
2555  pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node,
2556  target_node->details->online);
2557  native_add_running(rsc, target_node, data_set);
2558  }
2559 
2560  } else { // Pending, or complete but erased
2561  if (target_node && target_node->details->online) {
2562  pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node,
2563  target_node->details->online);
2564 
2565  native_add_running(rsc, target_node, data_set);
2566  if (source_node && source_node->details->online) {
2567  /* This is a partial migration: the migrate_to completed
2568  * successfully on the source, but the migrate_from has not
2569  * completed. Remember the source and target; if the newly
2570  * chosen target remains the same when we schedule actions
2571  * later, we may continue with the migration.
2572  */
2573  rsc->partial_migration_target = target_node;
2574  rsc->partial_migration_source = source_node;
2575  }
2576  } else {
2577  /* Consider it failed here - forces a restart, prevents migration */
2578  set_bit(rsc->flags, pe_rsc_failed);
2579  set_bit(rsc->flags, pe_rsc_stop);
2581  }
2582  }
2583 }
2584 
2585 static void
2586 unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
2587  pe_working_set_t *data_set)
2588 {
2589  int target_stop_id = 0;
2590  int target_migrate_from_id = 0;
2591  xmlNode *target_stop = NULL;
2592  xmlNode *target_migrate_from = NULL;
2593  const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2594  const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2595 
2596  // Sanity check
2597  CRM_CHECK(source && target && !strcmp(source, node->details->uname), return);
2598 
2599  /* If a migration failed, we have to assume the resource is active. Clones
2600  * are not allowed to migrate, so role can't be master.
2601  */
2602  rsc->role = RSC_ROLE_STARTED;
2603 
2604  // Check for stop on the target
2605  target_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, target, NULL,
2606  TRUE, data_set);
2607  target_stop_id = pe__call_id(target_stop);
2608 
2609  // Check for migrate_from on the target
2610  target_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target,
2611  source, TRUE, data_set);
2612  target_migrate_from_id = pe__call_id(target_migrate_from);
2613 
2614  if ((target_stop == NULL) || (target_stop_id < target_migrate_from_id)) {
2615  /* There was no stop on the source, or a stop that happened before a
2616  * migrate_from, so assume the resource is still active on the target
2617  * (if it is up).
2618  */
2619  node_t *target_node = pe_find_node(data_set->nodes, target);
2620 
2621  pe_rsc_trace(rsc, "stop (%d) + migrate_from (%d)",
2622  target_stop_id, target_migrate_from_id);
2623  if (target_node && target_node->details->online) {
2624  native_add_running(rsc, target_node, data_set);
2625  }
2626 
2627  } else if (target_migrate_from == NULL) {
2628  /* We know there was a stop on the target, but there may not have been a
2629  * migrate_from (the stop could have happened before migrate_from was
2630  * scheduled or attempted).
2631  *
2632  * That means this could be a "dangling" migration. But first, check
2633  * whether there is a newer migrate_from or start on the source node --
2634  * it's possible the failed migration was followed by a successful
2635  * full restart or migration in the reverse direction, in which case we
2636  * don't want to force it to stop.
2637  */
2638  xmlNode *source_migrate_from = NULL;
2639  xmlNode *source_start = NULL;
2640  int source_migrate_to_id = pe__call_id(xml_op);
2641 
2642  source_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, source,
2643  NULL, TRUE, data_set);
2644  if (pe__call_id(source_migrate_from) > source_migrate_to_id) {
2645  return;
2646  }
2647 
2648  source_start = find_lrm_op(rsc->id, CRMD_ACTION_START, source, NULL,
2649  TRUE, data_set);
2650  if (pe__call_id(source_start) > source_migrate_to_id) {
2651  return;
2652  }
2653 
2654  // Mark node as having dangling migration so we can force a stop later
2655  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
2656  }
2657 }
2658 
2659 static void
2660 unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node,
2661  xmlNode *xml_op, pe_working_set_t *data_set)
2662 {
2663  xmlNode *source_stop = NULL;
2664  xmlNode *source_migrate_to = NULL;
2665  const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2666  const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2667 
2668  // Sanity check
2669  CRM_CHECK(source && target && !strcmp(target, node->details->uname), return);
2670 
2671  /* If a migration failed, we have to assume the resource is active. Clones
2672  * are not allowed to migrate, so role can't be master.
2673  */
2674  rsc->role = RSC_ROLE_STARTED;
2675 
2676  // Check for a stop on the source
2677  source_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, source, NULL,
2678  TRUE, data_set);
2679 
2680  // Check for a migrate_to on the source
2681  source_migrate_to = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE,
2682  source, target, TRUE, data_set);
2683 
2684  if ((source_stop == NULL)
2685  || (pe__call_id(source_stop) < pe__call_id(source_migrate_to))) {
2686  /* There was no stop on the source, or a stop that happened before
2687  * migrate_to, so assume the resource is still active on the source (if
2688  * it is up).
2689  */
2690  pe_node_t *source_node = pe_find_node(data_set->nodes, source);
2691 
2692  if (source_node && source_node->details->online) {
2693  native_add_running(rsc, source_node, data_set);
2694  }
2695  }
2696 }
2697 
2698 static void
2699 record_failed_op(xmlNode *op, const pe_node_t *node,
2700  const pe_resource_t *rsc, pe_working_set_t *data_set)
2701 {
2702  xmlNode *xIter = NULL;
2703  const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY);
2704 
2705  if (node->details->online == FALSE) {
2706  return;
2707  }
2708 
2709  for (xIter = data_set->failed->children; xIter; xIter = xIter->next) {
2710  const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY);
2711  const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
2712 
2713  if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) {
2714  crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname);
2715  return;
2716  }
2717  }
2718 
2719  crm_trace("Adding entry %s on %s", op_key, node->details->uname);
2720  crm_xml_add(op, XML_ATTR_UNAME, node->details->uname);
2721  crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id);
2722  add_node_copy(data_set->failed, op);
2723 }
2724 
2725 static const char *get_op_key(xmlNode *xml_op)
2726 {
2727  const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
2728  if(key == NULL) {
2729  key = ID(xml_op);
2730  }
2731  return key;
2732 }
2733 
2734 static const char *
2735 last_change_str(xmlNode *xml_op)
2736 {
2737  time_t when;
2738  const char *when_s = NULL;
2739 
2741  &when) == pcmk_ok) {
2742  when_s = crm_now_string(&when);
2743  if (when_s) {
2744  // Skip day of week to make message shorter
2745  when_s = strchr(when_s, ' ');
2746  if (when_s) {
2747  ++when_s;
2748  }
2749  }
2750  }
2751  return ((when_s && *when_s)? when_s : "unknown time");
2752 }
2753 
2766 static int
2767 cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
2768 {
2769  switch (first) {
2770  case action_fail_demote:
2771  switch (second) {
2772  case action_fail_ignore:
2773  return 1;
2774  case action_fail_demote:
2775  return 0;
2776  default:
2777  return -1;
2778  }
2779  break;
2780 
2782  switch (second) {
2783  case action_fail_ignore:
2784  case action_fail_demote:
2785  case action_fail_recover:
2786  return 1;
2788  return 0;
2789  default:
2790  return -1;
2791  }
2792  break;
2793 
2795  switch (second) {
2796  case action_fail_ignore:
2797  case action_fail_demote:
2798  case action_fail_recover:
2800  return 1;
2802  return 0;
2803  default:
2804  return -1;
2805  }
2806  break;
2807 
2808  default:
2809  break;
2810  }
2811  switch (second) {
2812  case action_fail_demote:
2813  return (first == action_fail_ignore)? -1 : 1;
2814 
2816  switch (first) {
2817  case action_fail_ignore:
2818  case action_fail_demote:
2819  case action_fail_recover:
2820  return -1;
2821  default:
2822  return 1;
2823  }
2824  break;
2825 
2827  switch (first) {
2828  case action_fail_ignore:
2829  case action_fail_demote:
2830  case action_fail_recover:
2832  return -1;
2833  default:
2834  return 1;
2835  }
2836  break;
2837 
2838  default:
2839  break;
2840  }
2841  return first - second;
2842 }
2843 
2844 static void
2845 unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure,
2846  enum action_fail_response * on_fail, pe_working_set_t * data_set)
2847 {
2848  guint interval_ms = 0;
2849  bool is_probe = false;
2850  action_t *action = NULL;
2851 
2852  const char *key = get_op_key(xml_op);
2853  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2854  const char *exit_reason = crm_element_value(xml_op,
2856 
2857  CRM_ASSERT(rsc);
2858  CRM_CHECK(task != NULL, return);
2859 
2860  *last_failure = xml_op;
2861 
2862  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
2863  if ((interval_ms == 0) && !strcmp(task, CRMD_ACTION_STATUS)) {
2864  is_probe = true;
2865  }
2866 
2867  if (exit_reason == NULL) {
2868  exit_reason = "";
2869  }
2870 
2871  if (is_not_set(data_set->flags, pe_flag_symmetric_cluster)
2872  && (rc == PCMK_OCF_NOT_INSTALLED)) {
2873  crm_trace("Unexpected result (%s%s%s) was recorded for "
2874  "%s of %s on %s at %s " CRM_XS " rc=%d id=%s",
2875  services_ocf_exitcode_str(rc),
2876  (*exit_reason? ": " : ""), exit_reason,
2877  (is_probe? "probe" : task), rsc->id, node->details->uname,
2878  last_change_str(xml_op), rc, ID(xml_op));
2879  } else {
2880  crm_warn("Unexpected result (%s%s%s) was recorded for "
2881  "%s of %s on %s at %s " CRM_XS " rc=%d id=%s",
2882  services_ocf_exitcode_str(rc),
2883  (*exit_reason? ": " : ""), exit_reason,
2884  (is_probe? "probe" : task), rsc->id, node->details->uname,
2885  last_change_str(xml_op), rc, ID(xml_op));
2886 
2887  if (is_probe && (rc != PCMK_OCF_OK)
2888  && (rc != PCMK_OCF_NOT_RUNNING)
2889  && (rc != PCMK_OCF_RUNNING_MASTER)) {
2890 
2891  /* A failed (not just unexpected) probe result could mean the user
2892  * didn't know resources will be probed even where they can't run.
2893  */
2894  crm_notice("If it is not possible for %s to run on %s, see "
2895  "the resource-discovery option for location constraints",
2896  rsc->id, node->details->uname);
2897  }
2898 
2899  record_failed_op(xml_op, node, rsc, data_set);
2900  }
2901 
2902  action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
2903  if (cmp_on_fail(*on_fail, action->on_fail) < 0) {
2904  pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail),
2905  fail2text(action->on_fail), action->uuid, key);
2906  *on_fail = action->on_fail;
2907  }
2908 
2909  if (!strcmp(task, CRMD_ACTION_STOP)) {
2910  resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set);
2911 
2912  } else if (!strcmp(task, CRMD_ACTION_MIGRATE)) {
2913  unpack_migrate_to_failure(rsc, node, xml_op, data_set);
2914 
2915  } else if (!strcmp(task, CRMD_ACTION_MIGRATED)) {
2916  unpack_migrate_from_failure(rsc, node, xml_op, data_set);
2917 
2918  } else if (!strcmp(task, CRMD_ACTION_PROMOTE)) {
2919  rsc->role = RSC_ROLE_MASTER;
2920 
2921  } else if (!strcmp(task, CRMD_ACTION_DEMOTE)) {
2922  if (action->on_fail == action_fail_block) {
2923  rsc->role = RSC_ROLE_MASTER;
2924  rsc->next_role = RSC_ROLE_STOPPED;
2925 
2926  } else if(rc == PCMK_OCF_NOT_RUNNING) {
2927  rsc->role = RSC_ROLE_STOPPED;
2928 
2929  } else {
2930  /*
2931  * Staying in master role would put the PE/TE into a loop. Setting
2932  * slave role is not dangerous because the resource will be stopped
2933  * as part of recovery, and any master promotion will be ordered
2934  * after that stop.
2935  */
2936  rsc->role = RSC_ROLE_SLAVE;
2937  }
2938  }
2939 
2940  if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) {
2941  /* leave stopped */
2942  pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id);
2943  rsc->role = RSC_ROLE_STOPPED;
2944 
2945  } else if (rsc->role < RSC_ROLE_STARTED) {
2946  pe_rsc_trace(rsc, "Setting %s active", rsc->id);
2947  set_active(rsc);
2948  }
2949 
2950  pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
2951  rsc->id, role2text(rsc->role),
2952  node->details->unclean ? "true" : "false",
2953  fail2text(action->on_fail), role2text(action->fail_role));
2954 
2955  if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) {
2956  rsc->next_role = action->fail_role;
2957  }
2958 
2959  if (action->fail_role == RSC_ROLE_STOPPED) {
2960  int score = -INFINITY;
2961 
2962  resource_t *fail_rsc = rsc;
2963 
2964  if (fail_rsc->parent) {
2965  resource_t *parent = uber_parent(fail_rsc);
2966 
2967  if (pe_rsc_is_clone(parent)
2968  && is_not_set(parent->flags, pe_rsc_unique)) {
2969  /* For clone resources, if a child fails on an operation
2970  * with on-fail = stop, all the resources fail. Do this by preventing
2971  * the parent from coming up again. */
2972  fail_rsc = parent;
2973  }
2974  }
2975  crm_notice("%s will not be started under current conditions",
2976  fail_rsc->id);
2977  /* make sure it doesn't come up again */
2978  if (fail_rsc->allowed_nodes != NULL) {
2979  g_hash_table_destroy(fail_rsc->allowed_nodes);
2980  }
2981  fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes);
2982  g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
2983  }
2984 
2985  pe_free_action(action);
2986 }
2987 
3007 static int
3008 determine_op_status(
3009  resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set)
3010 {
3011  guint interval_ms = 0;
3012  bool is_probe = false;
3013  int result = PCMK_LRM_OP_DONE;
3014  const char *key = get_op_key(xml_op);
3015  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
3016  const char *exit_reason = crm_element_value(xml_op,
3018 
3019  CRM_ASSERT(rsc);
3020  CRM_CHECK(task != NULL, return PCMK_LRM_OP_ERROR);
3021 
3022  if (exit_reason == NULL) {
3023  exit_reason = "";
3024  }
3025 
3026  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
3027  if ((interval_ms == 0) && !strcmp(task, CRMD_ACTION_STATUS)) {
3028  is_probe = true;
3029  task = "probe";
3030  }
3031 
3032  if (target_rc < 0) {
3033  /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3034  * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3035  * target_rc in the transition key, which (along with the similar case
3036  * of a corrupted transition key in the CIB) will be reported to this
3037  * function as -1. Pacemaker 2.0+ does not support rolling upgrades from
3038  * those versions or processing of saved CIB files from those versions,
3039  * so we do not need to care much about this case.
3040  */
3041  result = PCMK_LRM_OP_ERROR;
3042  crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)",
3043  key, node->details->uname);
3044 
3045  } else if (target_rc != rc) {
3046  result = PCMK_LRM_OP_ERROR;
3047  pe_rsc_debug(rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)",
3048  key, node->details->uname,
3049  target_rc, services_ocf_exitcode_str(target_rc),
3050  rc, services_ocf_exitcode_str(rc),
3051  (*exit_reason? ": " : ""), exit_reason);
3052  }
3053 
3054  switch (rc) {
3055  case PCMK_OCF_OK:
3056  if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) {
3057  result = PCMK_LRM_OP_DONE;
3058  pe_rsc_info(rsc, "Probe found %s active on %s at %s",
3059  rsc->id, node->details->uname,
3060  last_change_str(xml_op));
3061  }
3062  break;
3063 
3064  case PCMK_OCF_NOT_RUNNING:
3065  if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) {
3066  result = PCMK_LRM_OP_DONE;
3067  rsc->role = RSC_ROLE_STOPPED;
3068 
3069  /* clear any previous failure actions */
3070  *on_fail = action_fail_ignore;
3071  rsc->next_role = RSC_ROLE_UNKNOWN;
3072  }
3073  break;
3074 
3076  if (is_probe && (rc != target_rc)) {
3077  result = PCMK_LRM_OP_DONE;
3078  pe_rsc_info(rsc,
3079  "Probe found %s active and promoted on %s at %s",
3080  rsc->id, node->details->uname,
3081  last_change_str(xml_op));
3082  }
3083  rsc->role = RSC_ROLE_MASTER;
3084  break;
3085 
3088  rsc->role = RSC_ROLE_MASTER;
3089  result = PCMK_LRM_OP_ERROR;
3090  break;
3091 
3093  result = PCMK_LRM_OP_ERROR_FATAL;
3094  break;
3095 
3097  if (interval_ms > 0) {
3098  result = PCMK_LRM_OP_NOTSUPPORTED;
3099  break;
3100  }
3101  // fall through
3105  if (!pe_can_fence(data_set, node)
3106  && !strcmp(task, CRMD_ACTION_STOP)) {
3107  /* If a stop fails and we can't fence, there's nothing else we can do */
3108  pe_proc_err("No further recovery can be attempted for %s "
3109  "because %s on %s failed (%s%s%s) at %s "
3110  CRM_XS " rc=%d id=%s", rsc->id, task,
3111  node->details->uname, services_ocf_exitcode_str(rc),
3112  (*exit_reason? ": " : ""), exit_reason,
3113  last_change_str(xml_op), rc, ID(xml_op));
3115  set_bit(rsc->flags, pe_rsc_block);
3116  }
3117  result = PCMK_LRM_OP_ERROR_HARD;
3118  break;
3119 
3120  default:
3121  if (result == PCMK_LRM_OP_DONE) {
3122  crm_info("Treating unknown exit status %d from %s of %s "
3123  "on %s at %s as failure",
3124  rc, task, rsc->id, node->details->uname,
3125  last_change_str(xml_op));
3126  result = PCMK_LRM_OP_ERROR;
3127  }
3128  break;
3129  }
3130  return result;
3131 }
3132 
3133 // return TRUE if start or monitor last failure but parameters changed
3134 static bool
3135 should_clear_for_param_change(xmlNode *xml_op, const char *task,
3136  pe_resource_t *rsc, pe_node_t *node,
3137  pe_working_set_t *data_set)
3138 {
3139  if (!strcmp(task, "start") || !strcmp(task, "monitor")) {
3140 
3141  if (pe__bundle_needs_remote_name(rsc)) {
3142  /* We haven't allocated resources yet, so we can't reliably
3143  * substitute addr parameters for the REMOTE_CONTAINER_HACK.
3144  * When that's needed, defer the check until later.
3145  */
3146  pe__add_param_check(xml_op, rsc, node, pe_check_last_failure,
3147  data_set);
3148 
3149  } else {
3150  op_digest_cache_t *digest_data = NULL;
3151 
3152  digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set);
3153  switch (digest_data->rc) {
3154  case RSC_DIGEST_UNKNOWN:
3155  crm_trace("Resource %s history entry %s on %s"
3156  " has no digest to compare",
3157  rsc->id, get_op_key(xml_op), node->details->id);
3158  break;
3159  case RSC_DIGEST_MATCH:
3160  break;
3161  default:
3162  return TRUE;
3163  }
3164  }
3165  }
3166  return FALSE;
3167 }
3168 
3169 // Order action after fencing of remote node, given connection rsc
3170 static void
3171 order_after_remote_fencing(pe_action_t *action, pe_resource_t *remote_conn,
3172  pe_working_set_t *data_set)
3173 {
3174  pe_node_t *remote_node = pe_find_node(data_set->nodes, remote_conn->id);
3175 
3176  if (remote_node) {
3177  pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
3178  data_set);
3179 
3180  order_actions(fence, action, pe_order_implies_then);
3181  }
3182 }
3183 
3184 static bool
3185 should_ignore_failure_timeout(pe_resource_t *rsc, xmlNode *xml_op,
3186  const char *task, guint interval_ms,
3187  bool is_last_failure, pe_working_set_t *data_set)
3188 {
3189  /* Clearing failures of recurring monitors has special concerns. The
3190  * executor reports only changes in the monitor result, so if the
3191  * monitor is still active and still getting the same failure result,
3192  * that will go undetected after the failure is cleared.
3193  *
3194  * Also, the operation history will have the time when the recurring
3195  * monitor result changed to the given code, not the time when the
3196  * result last happened.
3197  *
3198  * @TODO We probably should clear such failures only when the failure
3199  * timeout has passed since the last occurrence of the failed result.
3200  * However we don't record that information. We could maybe approximate
3201  * that by clearing only if there is a more recent successful monitor or
3202  * stop result, but we don't even have that information at this point
3203  * since we are still unpacking the resource's operation history.
3204  *
3205  * This is especially important for remote connection resources with a
3206  * reconnect interval, so in that case, we skip clearing failures
3207  * if the remote node hasn't been fenced.
3208  */
3209  if (rsc->remote_reconnect_ms
3210  && is_set(data_set->flags, pe_flag_stonith_enabled)
3211  && (interval_ms != 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3212 
3213  pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
3214 
3215  if (remote_node && !remote_node->details->remote_was_fenced) {
3216  if (is_last_failure) {
3217  crm_info("Waiting to clear monitor failure for remote node %s"
3218  " until fencing has occurred", rsc->id);
3219  }
3220  return TRUE;
3221  }
3222  }
3223  return FALSE;
3224 }
3225 
3248 static bool
3249 check_operation_expiry(pe_resource_t *rsc, pe_node_t *node, int rc,
3250  xmlNode *xml_op, pe_working_set_t *data_set)
3251 {
3252  bool expired = FALSE;
3253  bool is_last_failure = crm_ends_with(ID(xml_op), "_last_failure_0");
3254  time_t last_run = 0;
3255  guint interval_ms = 0;
3256  int unexpired_fail_count = 0;
3257  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
3258  const char *clear_reason = NULL;
3259 
3260  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
3261 
3262  if ((rsc->failure_timeout > 0)
3264  &last_run) == 0)) {
3265 
3266  // Resource has a failure-timeout, and history entry has a timestamp
3267 
3268  time_t now = get_effective_time(data_set);
3269  time_t last_failure = 0;
3270 
3271  // Is this particular operation history older than the failure timeout?
3272  if ((now >= (last_run + rsc->failure_timeout))
3273  && !should_ignore_failure_timeout(rsc, xml_op, task, interval_ms,
3274  is_last_failure, data_set)) {
3275  expired = TRUE;
3276  }
3277 
3278  // Does the resource as a whole have an unexpired fail count?
3279  unexpired_fail_count = pe_get_failcount(node, rsc, &last_failure,
3280  pe_fc_effective, xml_op,
3281  data_set);
3282 
3283  // Update scheduler recheck time according to *last* failure
3284  crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
3285  " last-failure@%lld",
3286  ID(xml_op), (long long) last_run, (expired? "" : "not "),
3287  (long long) now, unexpired_fail_count, rsc->failure_timeout,
3288  (long long) last_failure);
3289  last_failure += rsc->failure_timeout + 1;
3290  if (unexpired_fail_count && (now < last_failure)) {
3291  pe__update_recheck_time(last_failure, data_set);
3292  }
3293  }
3294 
3295  if (expired) {
3296  if (pe_get_failcount(node, rsc, NULL, pe_fc_default, xml_op, data_set)) {
3297 
3298  // There is a fail count ignoring timeout
3299 
3300  if (unexpired_fail_count == 0) {
3301  // There is no fail count considering timeout
3302  clear_reason = "it expired";
3303 
3304  } else {
3305  /* This operation is old, but there is an unexpired fail count.
3306  * In a properly functioning cluster, this should only be
3307  * possible if this operation is not a failure (otherwise the
3308  * fail count should be expired too), so this is really just a
3309  * failsafe.
3310  */
3311  expired = FALSE;
3312  }
3313 
3314  } else if (is_last_failure && rsc->remote_reconnect_ms) {
3315  /* Clear any expired last failure when reconnect interval is set,
3316  * even if there is no fail count.
3317  */
3318  clear_reason = "reconnect interval is set";
3319  }
3320  }
3321 
3322  if (!expired && is_last_failure
3323  && should_clear_for_param_change(xml_op, task, rsc, node, data_set)) {
3324  clear_reason = "resource parameters have changed";
3325  }
3326 
3327  if (clear_reason != NULL) {
3328  // Schedule clearing of the fail count
3329  pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason,
3330  data_set);
3331 
3332  if (is_set(data_set->flags, pe_flag_stonith_enabled)
3333  && rsc->remote_reconnect_ms) {
3334  /* If we're clearing a remote connection due to a reconnect
3335  * interval, we want to wait until any scheduled fencing
3336  * completes.
3337  *
3338  * We could limit this to remote_node->details->unclean, but at
3339  * this point, that's always true (it won't be reliable until
3340  * after unpack_node_loop() is done).
3341  */
3342  crm_info("Clearing %s failure will wait until any scheduled "
3343  "fencing of %s completes", task, rsc->id);
3344  order_after_remote_fencing(clear_op, rsc, data_set);
3345  }
3346  }
3347 
3348  if (expired && (interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3349  switch(rc) {
3350  case PCMK_OCF_OK:
3351  case PCMK_OCF_NOT_RUNNING:
3353  case PCMK_OCF_DEGRADED:
3355  // Don't expire probes that return these values
3356  expired = FALSE;
3357  break;
3358  }
3359  }
3360 
3361  return expired;
3362 }
3363 
3364 int pe__target_rc_from_xml(xmlNode *xml_op)
3365 {
3366  int target_rc = 0;
3367  const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
3368 
3369  if (key == NULL) {
3370  return -1;
3371  }
3372  decode_transition_key(key, NULL, NULL, NULL, &target_rc);
3373  return target_rc;
3374 }
3375 
3376 static enum action_fail_response
3377 get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set)
3378 {
3380  action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
3381 
3382  result = action->on_fail;
3383  pe_free_action(action);
3384 
3385  return result;
3386 }
3387 
3388 static void
3389 update_resource_state(resource_t * rsc, node_t * node, xmlNode * xml_op, const char * task, int rc,
3390  xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set)
3391 {
3392  gboolean clear_past_failure = FALSE;
3393 
3394  CRM_ASSERT(rsc);
3395  CRM_ASSERT(xml_op);
3396 
3397  if (rc == PCMK_OCF_NOT_RUNNING) {
3398  clear_past_failure = TRUE;
3399 
3400  } else if (rc == PCMK_OCF_NOT_INSTALLED) {
3401  rsc->role = RSC_ROLE_STOPPED;
3402 
3403  } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
3404  if (last_failure) {
3405  const char *op_key = get_op_key(xml_op);
3406  const char *last_failure_key = get_op_key(last_failure);
3407 
3408  if (safe_str_eq(op_key, last_failure_key)) {
3409  clear_past_failure = TRUE;
3410  }
3411  }
3412 
3413  if (rsc->role < RSC_ROLE_STARTED) {
3414  set_active(rsc);
3415  }
3416 
3417  } else if (safe_str_eq(task, CRMD_ACTION_START)) {
3418  rsc->role = RSC_ROLE_STARTED;
3419  clear_past_failure = TRUE;
3420 
3421  } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
3422  rsc->role = RSC_ROLE_STOPPED;
3423  clear_past_failure = TRUE;
3424 
3425  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
3426  rsc->role = RSC_ROLE_MASTER;
3427  clear_past_failure = TRUE;
3428 
3429  } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
3430 
3431  if (*on_fail == action_fail_demote) {
3432  // Demote clears an error only if on-fail=demote
3433  clear_past_failure = TRUE;
3434  }
3435  rsc->role = RSC_ROLE_SLAVE;
3436 
3437  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
3438  rsc->role = RSC_ROLE_STARTED;
3439  clear_past_failure = TRUE;
3440 
3441  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
3442  unpack_migrate_to_success(rsc, node, xml_op, data_set);
3443 
3444  } else if (rsc->role < RSC_ROLE_STARTED) {
3445  pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname);
3446  set_active(rsc);
3447  }
3448 
3449  /* clear any previous failure actions */
3450  if (clear_past_failure) {
3451  switch (*on_fail) {
3452  case action_fail_stop:
3453  case action_fail_fence:
3454  case action_fail_migrate:
3455  case action_fail_standby:
3456  pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop",
3457  rsc->id, fail2text(*on_fail));
3458  break;
3459 
3460  case action_fail_block:
3461  case action_fail_ignore:
3462  case action_fail_demote:
3463  case action_fail_recover:
3465  *on_fail = action_fail_ignore;
3466  rsc->next_role = RSC_ROLE_UNKNOWN;
3467  break;
3469  if (rsc->remote_reconnect_ms == 0) {
3470  /* With no reconnect interval, the connection is allowed to
3471  * start again after the remote node is fenced and
3472  * completely stopped. (With a reconnect interval, we wait
3473  * for the failure to be cleared entirely before attempting
3474  * to reconnect.)
3475  */
3476  *on_fail = action_fail_ignore;
3477  rsc->next_role = RSC_ROLE_UNKNOWN;
3478  }
3479  break;
3480  }
3481  }
3482 }
3483 
3504 static int
3505 remap_monitor_rc(int rc, xmlNode *xml_op, const pe_node_t *node,
3506  const pe_resource_t *rsc, pe_working_set_t *data_set)
3507 {
3508  int remapped_rc = rc;
3509 
3510  switch (rc) {
3511  case PCMK_OCF_DEGRADED:
3512  remapped_rc = PCMK_OCF_OK;
3513  break;
3514 
3516  remapped_rc = PCMK_OCF_RUNNING_MASTER;
3517  break;
3518 
3519  default:
3520  break;
3521  }
3522 
3523  if (rc != remapped_rc) {
3524  crm_trace("Remapping monitor result %d to %d", rc, remapped_rc);
3525  if (!node->details->shutdown || node->details->online) {
3526  record_failed_op(xml_op, node, rsc, data_set);
3527  }
3528  }
3529  return remapped_rc;
3530 }
3531 
3532 static void
3533 unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
3534  xmlNode **last_failure, enum action_fail_response *on_fail,
3535  pe_working_set_t *data_set)
3536 {
3537  int rc = 0;
3538  int task_id = 0;
3539  int target_rc = 0;
3540  int status = PCMK_LRM_OP_UNKNOWN;
3541  guint interval_ms = 0;
3542  const char *task = NULL;
3543  const char *task_key = NULL;
3544  const char *exit_reason = NULL;
3545  bool expired = FALSE;
3546  resource_t *parent = rsc;
3547  enum action_fail_response failure_strategy = action_fail_recover;
3548 
3549  CRM_CHECK(rsc && node && xml_op, return);
3550 
3551  target_rc = pe__target_rc_from_xml(xml_op);
3552  task_key = get_op_key(xml_op);
3553  task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
3554  exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON);
3555  if (exit_reason == NULL) {
3556  exit_reason = "";
3557  }
3558 
3559  crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
3560  crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
3561  crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
3562  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
3563 
3564  CRM_CHECK(task != NULL, return);
3565  CRM_CHECK(status <= PCMK_LRM_OP_INVALID, return);
3566  CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return);
3567 
3568  if (!strcmp(task, CRMD_ACTION_NOTIFY) ||
3569  !strcmp(task, CRMD_ACTION_METADATA)) {
3570  /* safe to ignore these */
3571  return;
3572  }
3573 
3574  if (is_not_set(rsc->flags, pe_rsc_unique)) {
3575  parent = uber_parent(rsc);
3576  }
3577 
3578  pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
3579  task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role));
3580 
3581  if (node->details->unclean) {
3582  pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean."
3583  " Further action depends on the value of the stop's on-fail attribute",
3584  node->details->uname, rsc->id);
3585  }
3586 
3587  /* It should be possible to call remap_monitor_rc() first then call
3588  * check_operation_expiry() only if rc != target_rc, because there should
3589  * never be a fail count without at least one unexpected result in the
3590  * resource history. That would be more efficient by avoiding having to call
3591  * check_operation_expiry() for expected results.
3592  *
3593  * However, we do have such configurations in the scheduler regression
3594  * tests, even if it shouldn't be possible with the current code. It's
3595  * probably a good idea anyway, but that would require updating the test
3596  * inputs to something currently possible.
3597  */
3598 
3599  if ((status != PCMK_LRM_OP_NOT_INSTALLED)
3600  && check_operation_expiry(rsc, node, rc, xml_op, data_set)) {
3601  expired = TRUE;
3602  }
3603 
3604  if (!strcmp(task, CRMD_ACTION_STATUS)) {
3605  rc = remap_monitor_rc(rc, xml_op, node, rsc, data_set);
3606  }
3607 
3608  if (expired && (rc != target_rc)) {
3609  const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
3610 
3611  if (interval_ms == 0) {
3612  crm_notice("Ignoring expired %s failure on %s "
3613  CRM_XS " actual=%d expected=%d magic=%s",
3614  task_key, node->details->uname, rc, target_rc, magic);
3615  goto done;
3616 
3617  } else if(node->details->online && node->details->unclean == FALSE) {
3618  /* Reschedule the recurring monitor. CancelXmlOp() won't work at
3619  * this stage, so as a hacky workaround, forcibly change the restart
3620  * digest so check_action_definition() does what we want later.
3621  *
3622  * @TODO We should skip this if there is a newer successful monitor.
3623  * Also, this causes rescheduling only if the history entry
3624  * has an op-digest (which the expire-non-blocked-failure
3625  * scheduler regression test doesn't, but that may not be a
3626  * realistic scenario in production).
3627  */
3628  crm_notice("Rescheduling %s after failure expired on %s "
3629  CRM_XS " actual=%d expected=%d magic=%s",
3630  task_key, node->details->uname, rc, target_rc, magic);
3631  crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout");
3632  goto done;
3633  }
3634  }
3635 
3636  /* If the executor reported an operation status of anything but done or
3637  * error, consider that final. But for done or error, we know better whether
3638  * it should be treated as a failure or not, because we know the expected
3639  * result.
3640  */
3641  if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) {
3642  status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
3643  pe_rsc_trace(rsc, "Remapped %s status to %d", task_key, status);
3644  }
3645 
3646  switch (status) {
3647  case PCMK_LRM_OP_CANCELLED:
3648  // Should never happen
3649  pe_err("Resource history contains cancellation '%s' "
3650  "(%s of %s on %s at %s)",
3651  ID(xml_op), task, rsc->id, node->details->uname,
3652  last_change_str(xml_op));
3653  break;
3654 
3655  case PCMK_LRM_OP_PENDING:
3656  if (!strcmp(task, CRMD_ACTION_START)) {
3658  set_active(rsc);
3659 
3660  } else if (!strcmp(task, CRMD_ACTION_PROMOTE)) {
3661  rsc->role = RSC_ROLE_MASTER;
3662 
3663  } else if (!strcmp(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
3664  /* If a pending migrate_to action is out on a unclean node,
3665  * we have to force the stop action on the target. */
3666  const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
3667  node_t *target = pe_find_node(data_set->nodes, migrate_target);
3668  if (target) {
3669  stop_action(rsc, target, FALSE);
3670  }
3671  }
3672 
3673  if (rsc->pending_task == NULL) {
3674  if ((interval_ms != 0) || strcmp(task, CRMD_ACTION_STATUS)) {
3675  rsc->pending_task = strdup(task);
3676  rsc->pending_node = node;
3677  } else {
3678  /* Pending probes are not printed, even if pending
3679  * operations are requested. If someone ever requests that
3680  * behavior, enable the below and the corresponding part of
3681  * native.c:native_pending_task().
3682  */
3683 #if 0
3684  rsc->pending_task = strdup("probe");
3685  rsc->pending_node = node;
3686 #endif
3687  }
3688  }
3689  break;
3690 
3691  case PCMK_LRM_OP_DONE:
3692  pe_rsc_trace(rsc, "%s of %s on %s completed at %s " CRM_XS " id=%s",
3693  task, rsc->id, node->details->uname,
3694  last_change_str(xml_op), ID(xml_op));
3695  update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set);
3696  break;
3697 
3699  failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
3700  if (failure_strategy == action_fail_ignore) {
3701  crm_warn("Cannot ignore failed %s of %s on %s: "
3702  "Resource agent doesn't exist "
3703  CRM_XS " status=%d rc=%d id=%s",
3704  task, rsc->id, node->details->uname, status, rc,
3705  ID(xml_op));
3706  /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
3707  *on_fail = action_fail_migrate;
3708  }
3709  resource_location(parent, node, -INFINITY, "hard-error", data_set);
3710  unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
3711  break;
3712 
3714  if (pe__is_guest_or_remote_node(node)
3715  && is_set(node->details->remote_rsc->flags, pe_rsc_managed)) {
3716  /* We should never get into a situation where a managed remote
3717  * connection resource is considered OK but a resource action
3718  * behind the connection gets a "not connected" status. But as a
3719  * fail-safe in case a bug or unusual circumstances do lead to
3720  * that, ensure the remote connection is considered failed.
3721  */
3724  }
3725 
3726  // fall through
3727 
3728  case PCMK_LRM_OP_ERROR:
3731  case PCMK_LRM_OP_TIMEOUT:
3733  case PCMK_LRM_OP_INVALID:
3734 
3735  failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
3736  if ((failure_strategy == action_fail_ignore)
3737  || (failure_strategy == action_fail_restart_container
3738  && !strcmp(task, CRMD_ACTION_STOP))) {
3739 
3740  crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s "
3741  "succeeded " CRM_XS " rc=%d id=%s",
3742  task, services_ocf_exitcode_str(rc),
3743  (*exit_reason? ": " : ""), exit_reason, rsc->id,
3744  node->details->uname, last_change_str(xml_op), rc,
3745  ID(xml_op));
3746 
3747  update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set);
3748  crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
3750 
3751  record_failed_op(xml_op, node, rsc, data_set);
3752 
3753  if ((failure_strategy == action_fail_restart_container)
3754  && cmp_on_fail(*on_fail, action_fail_recover) <= 0) {
3755  *on_fail = failure_strategy;
3756  }
3757 
3758  } else {
3759  unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
3760 
3761  if(status == PCMK_LRM_OP_ERROR_HARD) {
3762  do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE,
3763  "Preventing %s from restarting on %s because "
3764  "of hard failure (%s%s%s)" CRM_XS " rc=%d id=%s",
3765  parent->id, node->details->uname,
3766  services_ocf_exitcode_str(rc),
3767  (*exit_reason? ": " : ""), exit_reason,
3768  rc, ID(xml_op));
3769  resource_location(parent, node, -INFINITY, "hard-error", data_set);
3770 
3771  } else if(status == PCMK_LRM_OP_ERROR_FATAL) {
3772  crm_err("Preventing %s from restarting anywhere because "
3773  "of fatal failure (%s%s%s) " CRM_XS " rc=%d id=%s",
3774  parent->id, services_ocf_exitcode_str(rc),
3775  (*exit_reason? ": " : ""), exit_reason,
3776  rc, ID(xml_op));
3777  resource_location(parent, NULL, -INFINITY, "fatal-error", data_set);
3778  }
3779  }
3780  break;
3781  }
3782 
3783  done:
3784  pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s",
3785  rsc->id, task, role2text(rsc->role),
3786  role2text(rsc->next_role));
3787 }
3788 
3789 static void
3790 add_node_attrs(xmlNode *xml_obj, pe_node_t *node, bool overwrite,
3791  pe_working_set_t *data_set)
3792 {
3793  const char *cluster_name = NULL;
3794 
3795  g_hash_table_insert(node->details->attrs,
3796  strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
3797 
3798  g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
3799  strdup(node->details->id));
3800  if (safe_str_eq(node->details->id, data_set->dc_uuid)) {
3801  data_set->dc_node = node;
3802  node->details->is_dc = TRUE;
3803  g_hash_table_insert(node->details->attrs,
3804  strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
3805  } else {
3806  g_hash_table_insert(node->details->attrs,
3807  strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
3808  }
3809 
3810  cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name");
3811  if (cluster_name) {
3812  g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
3813  strdup(cluster_name));
3814  }
3815 
3817  node->details->attrs, NULL, overwrite, data_set);
3818 
3819  if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
3820  const char *site_name = pe_node_attribute_raw(node, "site-name");
3821 
3822  if (site_name) {
3823  g_hash_table_insert(node->details->attrs,
3824  strdup(CRM_ATTR_SITE_NAME),
3825  strdup(site_name));
3826 
3827  } else if (cluster_name) {
3828  /* Default to cluster-name if unset */
3829  g_hash_table_insert(node->details->attrs,
3830  strdup(CRM_ATTR_SITE_NAME),
3831  strdup(cluster_name));
3832  }
3833  }
3834 }
3835 
3836 static GListPtr
3837 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
3838 {
3839  int counter = -1;
3840  int stop_index = -1;
3841  int start_index = -1;
3842 
3843  xmlNode *rsc_op = NULL;
3844 
3845  GListPtr gIter = NULL;
3846  GListPtr op_list = NULL;
3847  GListPtr sorted_op_list = NULL;
3848 
3849  /* extract operations */
3850  op_list = NULL;
3851  sorted_op_list = NULL;
3852 
3853  for (rsc_op = __xml_first_child_element(rsc_entry);
3854  rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
3855  if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
3856  crm_xml_add(rsc_op, "resource", rsc);
3857  crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
3858  op_list = g_list_prepend(op_list, rsc_op);
3859  }
3860  }
3861 
3862  if (op_list == NULL) {
3863  /* if there are no operations, there is nothing to do */
3864  return NULL;
3865  }
3866 
3867  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
3868 
3869  /* create active recurring operations as optional */
3870  if (active_filter == FALSE) {
3871  return sorted_op_list;
3872  }
3873 
3874  op_list = NULL;
3875 
3876  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
3877 
3878  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
3879  xmlNode *rsc_op = (xmlNode *) gIter->data;
3880 
3881  counter++;
3882 
3883  if (start_index < stop_index) {
3884  crm_trace("Skipping %s: not active", ID(rsc_entry));
3885  break;
3886 
3887  } else if (counter < start_index) {
3888  crm_trace("Skipping %s: old", ID(rsc_op));
3889  continue;
3890  }
3891  op_list = g_list_append(op_list, rsc_op);
3892  }
3893 
3894  g_list_free(sorted_op_list);
3895  return op_list;
3896 }
3897 
3898 GListPtr
3899 find_operations(const char *rsc, const char *node, gboolean active_filter,
3900  pe_working_set_t * data_set)
3901 {
3902  GListPtr output = NULL;
3903  GListPtr intermediate = NULL;
3904 
3905  xmlNode *tmp = NULL;
3906  xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
3907 
3908  node_t *this_node = NULL;
3909 
3910  xmlNode *node_state = NULL;
3911 
3912  for (node_state = __xml_first_child_element(status); node_state != NULL;
3913  node_state = __xml_next_element(node_state)) {
3914 
3915  if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
3916  const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
3917 
3918  if (node != NULL && safe_str_neq(uname, node)) {
3919  continue;
3920  }
3921 
3922  this_node = pe_find_node(data_set->nodes, uname);
3923  if(this_node == NULL) {
3924  CRM_LOG_ASSERT(this_node != NULL);
3925  continue;
3926 
3927  } else if (pe__is_guest_or_remote_node(this_node)) {
3928  determine_remote_online_status(data_set, this_node);
3929 
3930  } else {
3931  determine_online_status(node_state, this_node, data_set);
3932  }
3933 
3934  if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
3935  /* offline nodes run no resources...
3936  * unless stonith is enabled in which case we need to
3937  * make sure rsc start events happen after the stonith
3938  */
3939  xmlNode *lrm_rsc = NULL;
3940 
3941  tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
3942  tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
3943 
3944  for (lrm_rsc = __xml_first_child_element(tmp); lrm_rsc != NULL;
3945  lrm_rsc = __xml_next_element(lrm_rsc)) {
3946  if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) {
3947 
3948  const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
3949 
3950  if (rsc != NULL && safe_str_neq(rsc_id, rsc)) {
3951  continue;
3952  }
3953 
3954  intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
3955  output = g_list_concat(output, intermediate);
3956  }
3957  }
3958  }
3959  }
3960  }
3961 
3962  return output;
3963 }
CRMD_ACTION_METADATA
#define CRMD_ACTION_METADATA
Definition: crm.h:187
get_target_role
gboolean get_target_role(resource_t *rsc, enum rsc_role_e *role)
Definition: utils.c:1800
resource_object_functions_s::location
pe_node_t *(* location)(const pe_resource_t *, GList **, int)
Definition: pe_types.h:53
pe__unpack_dataset_nvpairs
void pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name, GHashTable *node_hash, GHashTable *hash, const char *always_first, gboolean overwrite, pe_working_set_t *data_set)
Definition: utils.c:2616
pe_rsc_orphan
#define pe_rsc_orphan
Definition: pe_types.h:225
pe_ticket_s::last_granted
time_t last_granted
Definition: pe_types.h:424
pe_native
@ pe_native
Definition: pe_types.h:37
unpack_lrm_resources
gboolean unpack_lrm_resources(node_t *node, xmlNode *lrm_rsc_list, pe_working_set_t *data_set)
Definition: unpack.c:2353
GListPtr
GList * GListPtr
Definition: crm.h:215
XML_LRM_ATTR_MIGRATE_SOURCE
#define XML_LRM_ATTR_MIGRATE_SOURCE
Definition: msg_xml.h:285
pe_working_set_s::input
xmlNode * input
Definition: pe_types.h:119
CRM_ATTR_KIND
#define CRM_ATTR_KIND
Definition: crm.h:113
INFINITY
#define INFINITY
Definition: crm.h:96
CRMD_JOINSTATE_DOWN
#define CRMD_JOINSTATE_DOWN
Definition: crm.h:160
order_actions
gboolean order_actions(action_t *lh_action, action_t *rh_action, enum pe_ordering order)
Definition: utils.c:1836
action_fail_response
action_fail_response
Definition: common.h:41
XML_LRM_ATTR_TASK_KEY
#define XML_LRM_ATTR_TASK_KEY
Definition: msg_xml.h:261
crm_str_eq
gboolean crm_str_eq(const char *a, const char *b, gboolean use_case)
Definition: strings.c:224
pe_resource_s::variant
enum pe_obj_types variant
Definition: pe_types.h:303
XML_LRM_TAG_RESOURCE
#define XML_LRM_TAG_RESOURCE
Definition: msg_xml.h:227
pe_fc_default
@ pe_fc_default
Definition: internal.h:148
XML_RSC_ATTR_REMOTE_NODE
#define XML_RSC_ATTR_REMOTE_NODE
Definition: msg_xml.h:208
pe_tag_s::id
char * id
Definition: pe_types.h:430
pe_node_shared_s::rsc_discovery_enabled
gboolean rsc_discovery_enabled
Definition: pe_types.h:201
pe_resource_s::dangling_migrations
GListPtr dangling_migrations
Definition: pe_types.h:351
pe_proc_warn
#define pe_proc_warn(fmt...)
Definition: internal.h:24
pe_rsc_allow_migrate
#define pe_rsc_allow_migrate
Definition: pe_types.h:249
XML_LRM_ATTR_OPSTATUS
#define XML_LRM_ATTR_OPSTATUS
Definition: msg_xml.h:270
pe_find_resource
pe_resource_t * pe_find_resource(GListPtr rsc_list, const char *id_rh)
Definition: status.c:370
pe_working_set_s::resources
GListPtr resources
Definition: pe_types.h:140
action_fail_standby
@ action_fail_standby
Definition: common.h:50
PCMK_OCF_DEGRADED_MASTER
@ PCMK_OCF_DEGRADED_MASTER
Definition: services.h:106
pe_node_shared_s::expected_up
gboolean expected_up
Definition: pe_types.h:198
op_digest_cache_s::rc
enum rsc_digest_cmp_val rc
Definition: internal.h:329
no_quorum_freeze
@ no_quorum_freeze
Definition: pe_types.h:61
pe_resource_s::next_role
enum rsc_role_e next_role
Definition: pe_types.h:344
pe_find_node
pe_node_t * pe_find_node(GListPtr node_list, const char *uname)
Definition: status.c:422
pe_node_shared_s::unseen
gboolean unseen
Definition: pe_types.h:196
pe_working_set_s::nodes
GListPtr nodes
Definition: pe_types.h:139
msg_xml.h
RSC_ROLE_STOPPED
@ RSC_ROLE_STOPPED
Definition: common.h:100
XML_NODE_JOIN_STATE
#define XML_NODE_JOIN_STATE
Definition: msg_xml.h:238
node_score_yellow
int node_score_yellow
Definition: utils.c:65
XML_CIB_TAG_TICKETS
#define XML_CIB_TAG_TICKETS
Definition: msg_xml.h:385
pe_rsc_info
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:17
action_fail_stop
@ action_fail_stop
Definition: common.h:49
data
char data[0]
Definition: internal.h:12
PCMK_LRM_OP_ERROR
@ PCMK_LRM_OP_ERROR
Definition: services.h:125
pe_node_shared_s::remote_rsc
pe_resource_t * remote_rsc
Definition: pe_types.h:208
RSC_ROLE_MASTER
@ RSC_ROLE_MASTER
Definition: common.h:103
pe_rsc_debug
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:18
CRMD_ACTION_NOTIFY
#define CRMD_ACTION_NOTIFY
Definition: crm.h:183
XML_ATTR_TRANSITION_KEY
#define XML_ATTR_TRANSITION_KEY
Definition: msg_xml.h:358
pe_action_s::on_fail
enum action_fail_response on_fail
Definition: pe_types.h:386
action_fail_demote
@ action_fail_demote
Definition: common.h:65
crm_element_value_int
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
Definition: nvpair.c:555
pe_find_node_any
pe_node_t * pe_find_node_any(GListPtr node_list, const char *id, const char *uname)
Definition: status.c:394
pe_ticket_s::granted
gboolean granted
Definition: pe_types.h:423
create_xml_node
xmlNode * create_xml_node(xmlNode *parent, const char *name)
Definition: xml.c:1970
PCMK_LRM_OP_CANCELLED
@ PCMK_LRM_OP_CANCELLED
Definition: services.h:122
pe_resource_s::known_on
GHashTable * known_on
Definition: pe_types.h:340
CRM_ATTR_CLUSTER_NAME
#define CRM_ATTR_CLUSTER_NAME
Definition: crm.h:116
pe_resource_s::children
GListPtr children
Definition: pe_types.h:350
pe_resource_s::id
char * id
Definition: pe_types.h:294
stop_action
#define stop_action(rsc, node, optional)
Definition: internal.h:230
XML_NODE_IN_CLUSTER
#define XML_NODE_IN_CLUSTER
Definition: msg_xml.h:240
rsc_role_e
rsc_role_e
Definition: common.h:98
pe_working_set_s::stonith_timeout
int stonith_timeout
Definition: pe_types.h:130
unpack_tags
gboolean unpack_tags(xmlNode *xml_tags, pe_working_set_t *data_set)
Definition: unpack.c:811
pe_node_shared_s::running_rsc
GListPtr running_rsc
Definition: pe_types.h:209
get_xpath_object
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
Definition: xpath.c:220
sort_rsc_priority
gint sort_rsc_priority(gconstpointer a, gconstpointer b)
Definition: utils.c:428
pe_fc_effective
@ pe_fc_effective
Definition: internal.h:149
PCMK_LRM_OP_NOT_CONNECTED
@ PCMK_LRM_OP_NOT_CONNECTED
Definition: services.h:129
XML_CIB_TAG_STATE
#define XML_CIB_TAG_STATE
Definition: msg_xml.h:158
XML_RULE_ATTR_SCORE
#define XML_RULE_ATTR_SCORE
Definition: msg_xml.h:296
CRM_CHECK
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:157
pe_node_s::weight
int weight
Definition: pe_types.h:218
CRMD_JOINSTATE_MEMBER
#define CRMD_JOINSTATE_MEMBER
Definition: crm.h:162
crm_parse_ms
guint crm_parse_ms(const char *text)
Definition: strings.c:147
clear_bit
#define clear_bit(word, bit)
Definition: crm_internal.h:168
pe_pref
const char * pe_pref(GHashTable *options, const char *name)
Definition: common.c:187
XML_CIB_TAG_LRM
#define XML_CIB_TAG_LRM
Definition: msg_xml.h:225
CRM_ATTR_UNAME
#define CRM_ATTR_UNAME
Definition: crm.h:111
pe_node_shared_s::digest_cache
GHashTable * digest_cache
cache of calculated resource digests
Definition: pe_types.h:214
pe_node_s::details
struct pe_node_shared_s * details
Definition: pe_types.h:221
XML_CIB_TAG_TICKET_STATE
#define XML_CIB_TAG_TICKET_STATE
Definition: msg_xml.h:386
pe_working_set_s::stonith_action
const char * stonith_action
Definition: pe_types.h:125
custom_action
action_t * custom_action(resource_t *rsc, char *key, const char *task, node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set)
Definition: utils.c:480
crm_notice
#define crm_notice(fmt, args...)
Definition: logging.h:243
PCMK_LRM_OP_INVALID
@ PCMK_LRM_OP_INVALID
Definition: services.h:130
action_fail_recover
@ action_fail_recover
Definition: common.h:44
pe_node_shared_s::id
const char * id
Definition: pe_types.h:186
pe_working_set_s::dc_node
pe_node_t * dc_node
Definition: pe_types.h:124
type
enum crm_ais_msg_types type
Definition: internal.h:5
crm_err
#define crm_err(fmt, args...)
Definition: logging.h:241
pe_can_fence
bool pe_can_fence(pe_working_set_t *data_set, node_t *node)
Definition: utils.c:89
internal.h
pe__update_recheck_time
void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set)
Definition: utils.c:2602
XML_NODE_ATTR_RSC_DISCOVERY
#define XML_NODE_ATTR_RSC_DISCOVERY
Definition: msg_xml.h:342
CRMD_JOINSTATE_PENDING
#define CRMD_JOINSTATE_PENDING
Definition: crm.h:161
pe_resource_s::pending_node
pe_node_t * pending_node
Definition: pe_types.h:356
pe_working_set_s::stop_needed
GList * stop_needed
Definition: pe_types.h:168
crm_str_hash
#define crm_str_hash
Definition: util.h:62
crm_element_value_ms
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
Definition: nvpair.c:611
RSC_DIGEST_MATCH
@ RSC_DIGEST_MATCH
Definition: internal.h:318
pe_flag_stop_action_orphans
#define pe_flag_stop_action_orphans
Definition: pe_types.h:100
crm_trace
#define crm_trace(fmt, args...)
Definition: logging.h:247
pe_working_set_s::dc_uuid
char * dc_uuid
Definition: pe_types.h:123
pe_resource_s::meta
GHashTable * meta
Definition: pe_types.h:346
unpack_remote_nodes
gboolean unpack_remote_nodes(xmlNode *xml_resources, pe_working_set_t *data_set)
Definition: unpack.c:612
safe_str_eq
#define safe_str_eq(a, b)
Definition: util.h:61
XML_NVPAIR_ATTR_VALUE
#define XML_NVPAIR_ATTR_VALUE
Definition: msg_xml.h:340
pe_node_shared_s::pending
gboolean pending
Definition: pe_types.h:194
node_remote
@ node_remote
Definition: pe_types.h:71
XML_TAG_ATTR_SETS
#define XML_TAG_ATTR_SETS
Definition: msg_xml.h:163
uber_parent
pe_resource_t * uber_parent(pe_resource_t *rsc)
Definition: complex.c:765
ONLINESTATUS
#define ONLINESTATUS
Definition: util.h:36
pe__is_guest_or_remote_node
gboolean pe__is_guest_or_remote_node(pe_node_t *node)
Definition: remote.c:58
XML_ATTR_UNAME
#define XML_ATTR_UNAME
Definition: msg_xml.h:118
pe_ticket_s
Definition: pe_types.h:421
crm_warn
#define crm_warn(fmt, args...)
Definition: logging.h:242
pe_action_s::flags
enum pe_action_flags flags
Definition: pe_types.h:384
free_xml
void free_xml(xmlNode *child)
Definition: xml.c:2130
XML_TAG_TRANSIENT_NODEATTRS
#define XML_TAG_TRANSIENT_NODEATTRS
Definition: msg_xml.h:362
pe_resource_s::fillers
GListPtr fillers
Definition: pe_types.h:354
find_operations
GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set)
Definition: unpack.c:3899
pe_resource_s::running_on
GListPtr running_on
Definition: pe_types.h:339
pe_resource_s::partial_migration_target
pe_node_t * partial_migration_target
Definition: pe_types.h:337
pe_node_shared_s::utilization
GHashTable * utilization
Definition: pe_types.h:213
pe_rsc_is_container
#define pe_rsc_is_container
Definition: pe_types.h:253
xpath_search
xmlXPathObjectPtr xpath_search(xmlNode *xml_top, const char *path)
Definition: xpath.c:145
common_unpack
gboolean common_unpack(xmlNode *xml_obj, resource_t **rsc, resource_t *parent, pe_working_set_t *data_set)
Definition: complex.c:368
pe_node_shared_s::standby_onfail
gboolean standby_onfail
Definition: pe_types.h:193
pe_get_failcount
int pe_get_failcount(node_t *node, resource_t *rsc, time_t *last_failure, uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set)
Definition: failcounts.c:251
pe_flag_quick_location
#define pe_flag_quick_location
Definition: pe_types.h:111
xml.h
Wrappers for and extensions to libxml2.
XML_LRM_ATTR_RC
#define XML_LRM_ATTR_RC
Definition: msg_xml.h:271
pe_node_shared_s::is_dc
gboolean is_dc
Definition: pe_types.h:199
clone_strip
char * clone_strip(const char *last_rsc_id)
Definition: unpack.c:1548
crm_is_true
gboolean crm_is_true(const char *s)
Definition: strings.c:176
xml_contains_remote_node
gboolean xml_contains_remote_node(xmlNode *xml)
Definition: remote.c:92
XML_NODE_EXPECTED
#define XML_NODE_EXPECTED
Definition: msg_xml.h:239
pe_working_set_s::placement_strategy
const char * placement_strategy
Definition: pe_types.h:126
XML_CIB_TAG_STATUS
#define XML_CIB_TAG_STATUS
Definition: msg_xml.h:139
PCMK_OCF_DEGRADED
@ PCMK_OCF_DEGRADED
Definition: services.h:105
PCMK_LRM_OP_TIMEOUT
@ PCMK_LRM_OP_TIMEOUT
Definition: services.h:123
clone_zero
char * clone_zero(const char *last_rsc_id)
Definition: unpack.c:1570
pe_node_shared_s::remote_requires_reset
gboolean remote_requires_reset
Definition: pe_types.h:202
set_bit
#define set_bit(word, bit)
Definition: crm_internal.h:167
resource_object_functions_s::free
void(* free)(pe_resource_t *)
Definition: pe_types.h:54
CRM_ATTR_ID
#define CRM_ATTR_ID
Definition: crm.h:112
pe_fence_node
void pe_fence_node(pe_working_set_t *data_set, node_t *node, const char *reason)
Schedule a fence action for a node.
Definition: unpack.c:78
pe_working_set_s::tags
GHashTable * tags
Definition: pe_types.h:162
calculate_active_ops
void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
Definition: unpack.c:2170
XML_ATTR_ID
#define XML_ATTR_ID
Definition: msg_xml.h:96
pe_action_s::uuid
char * uuid
Definition: pe_types.h:380
XML_TAG_UTILIZATION
#define XML_TAG_UTILIZATION
Definition: msg_xml.h:171
ID
#define ID(x)
Definition: msg_xml.h:415
XML_CIB_TAG_RESOURCE
#define XML_CIB_TAG_RESOURCE
Definition: msg_xml.h:174
pe_create_node
node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t *data_set)
Definition: unpack.c:351
PCMK_LRM_OP_NOTSUPPORTED
@ PCMK_LRM_OP_NOTSUPPORTED
Definition: services.h:124
pe_wo_poweroff
@ pe_wo_poweroff
Definition: unpack.h:39
pe_ticket_s::id
char * id
Definition: pe_types.h:422
pe_err
#define pe_err(fmt...)
Definition: internal.h:21
action_fail_block
@ action_fail_block
Definition: common.h:48
RSC_ROLE_SLAVE
@ RSC_ROLE_SLAVE
Definition: common.h:102
pe_action_s
Definition: pe_types.h:371
pe_node_shared_s::shutdown
gboolean shutdown
Definition: pe_types.h:197
rsc_action_digest_cmp
op_digest_cache_t * rsc_action_digest_cmp(resource_t *rsc, xmlNode *xml_op, node_t *node, pe_working_set_t *data_set)
Definition: utils.c:2108
pe__create_clone_child
pe_resource_t * pe__create_clone_child(pe_resource_t *rsc, pe_working_set_t *data_set)
Definition: clone.c:60
pe_check_last_failure
@ pe_check_last_failure
Definition: pe_types.h:177
CRMD_JOINSTATE_NACK
#define CRMD_JOINSTATE_NACK
Definition: crm.h:163
crm_info
#define crm_info(fmt, args...)
Definition: logging.h:244
action_fail_fence
@ action_fail_fence
Definition: common.h:51
pe_flag_startup_fencing
#define pe_flag_startup_fencing
Definition: pe_types.h:105
CRM_LOG_ASSERT
#define CRM_LOG_ASSERT(expr)
Definition: logging.h:143
XML_CIB_TAG_PROPSET
#define XML_CIB_TAG_PROPSET
Definition: msg_xml.h:162
pe__is_remote_node
gboolean pe__is_remote_node(pe_node_t *node)
Definition: remote.c:36
pe__find_bundle_replica
pe_resource_t * pe__find_bundle_replica(const pe_resource_t *bundle, const pe_node_t *node)
Definition: bundle.c:1411
PCMK_OCF_FAILED_MASTER
@ PCMK_OCF_FAILED_MASTER
Definition: services.h:99
CRMD_ACTION_MIGRATED
#define CRMD_ACTION_MIGRATED
Definition: crm.h:170
XML_LRM_ATTR_RSCID
#define XML_LRM_ATTR_RSCID
Definition: msg_xml.h:269
CRM_ATTR_IS_DC
#define CRM_ATTR_IS_DC
Definition: crm.h:115
CRM_XS
#define CRM_XS
Definition: logging.h:34
uname
char uname[MAX_NAME]
Definition: internal.h:7
remote_id_conflict
bool remote_id_conflict(const char *remote_name, pe_working_set_t *data)
Definition: unpack.c:412
XML_LRM_ATTR_TASK
#define XML_LRM_ATTR_TASK
Definition: msg_xml.h:260
CIB_OPTIONS_FIRST
#define CIB_OPTIONS_FIRST
Definition: msg_xml.h:49
id
uint32_t id
Definition: internal.h:2
pe_rsc_orphan_container_filler
#define pe_rsc_orphan_container_filler
Definition: pe_types.h:228
pe_working_set_s::localhost
const char * localhost
Definition: pe_types.h:161
pe_node_s::fixed
gboolean fixed
Definition: pe_types.h:219
action_fail_migrate
@ action_fail_migrate
Definition: common.h:47
demote_action
#define demote_action(rsc, node, optional)
Definition: internal.h:256
XML_RSC_ATTR_MANAGED
#define XML_RSC_ATTR_MANAGED
Definition: msg_xml.h:195
STATUS_PATH_MAX
#define STATUS_PATH_MAX
Definition: unpack.c:2406
action_fail_reset_remote
@ action_fail_reset_remote
Definition: common.h:63
pe_working_set_s::config_hash
GHashTable * config_hash
Definition: pe_types.h:133
role2text
const char * role2text(enum rsc_role_e role)
Definition: common.c:338
crm_strdup_printf
char * crm_strdup_printf(char const *format,...) __attribute__((__format__(__printf__
pe_flag_stop_everything
#define pe_flag_stop_everything
Definition: pe_types.h:101
PCMK_LRM_OP_PENDING
@ PCMK_LRM_OP_PENDING
Definition: services.h:120
PCMK_OCF_INVALID_PARAM
@ PCMK_OCF_INVALID_PARAM
Definition: services.h:92
CRMD_ACTION_START
#define CRMD_ACTION_START
Definition: crm.h:172
RSC_ROLE_UNKNOWN
@ RSC_ROLE_UNKNOWN
Definition: common.h:99
crm_debug
#define crm_debug(fmt, args...)
Definition: logging.h:246
PCMK_OCF_INSUFFICIENT_PRIV
@ PCMK_OCF_INSUFFICIENT_PRIV
Definition: services.h:94
CRMD_ACTION_MIGRATE
#define CRMD_ACTION_MIGRATE
Definition: crm.h:169
XML_CIB_TAG_TAG
#define XML_CIB_TAG_TAG
Definition: msg_xml.h:389
CRMD_ACTION_STOP
#define CRMD_ACTION_STOP
Definition: crm.h:175
pe_node_shared_s::standby
gboolean standby
Definition: pe_types.h:192
native_add_running
void native_add_running(resource_t *rsc, node_t *node, pe_working_set_t *data_set)
Definition: native.c:39
sort_op_by_callid
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
Definition: utils.c:1668
pe_action_optional
@ pe_action_optional
Definition: pe_types.h:270
PCMK_LRM_OP_NOT_INSTALLED
@ PCMK_LRM_OP_NOT_INSTALLED
Definition: services.h:128
pe_resource_s::partial_migration_source
pe_node_t * partial_migration_source
Definition: pe_types.h:338
XML_RSC_OP_LAST_CHANGE
#define XML_RSC_OP_LAST_CHANGE
Definition: msg_xml.h:280
pe_tag_s
Definition: pe_types.h:429
determine_online_status
gboolean determine_online_status(xmlNode *node_state, node_t *this_node, pe_working_set_t *data_set)
Definition: unpack.c:1430
get_effective_time
time_t get_effective_time(pe_working_set_t *data_set)
Definition: utils.c:1785
pe_wo_blind
@ pe_wo_blind
Definition: unpack.h:36
no_quorum_demote
@ no_quorum_demote
Definition: pe_types.h:65
do_crm_log
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:122
RSC_STOP
#define RSC_STOP
Definition: crm.h:200
pe_find_clone
@ pe_find_clone
match only clone instances
Definition: pe_types.h:84
crm_xml_add
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition: nvpair.c:313
pe_warn_once
#define pe_warn_once(pe_wo_bit, fmt...)
Definition: unpack.h:47
action_fail_restart_container
@ action_fail_restart_container
Definition: common.h:55
PCMK_OCF_UNKNOWN_ERROR
@ PCMK_OCF_UNKNOWN_ERROR
Definition: services.h:91
pe_create_remote_xml
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *start_timeout, const char *server, const char *port)
Definition: remote.c:151
XML_LRM_TAG_RESOURCES
#define XML_LRM_TAG_RESOURCES
Definition: msg_xml.h:226
pe_working_set_s
Definition: pe_types.h:118
XML_LRM_TAG_RSC_OP
#define XML_LRM_TAG_RSC_OP
Definition: msg_xml.h:228
pe__shutdown_requested
bool pe__shutdown_requested(pe_node_t *node)
Definition: utils.c:2587
node_member
@ node_member
Definition: pe_types.h:70
crm_element_value
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:519
XML_NODE_IS_FENCED
#define XML_NODE_IS_FENCED
Definition: msg_xml.h:243
node_copy
node_t * node_copy(const node_t *this_node)
Definition: utils.c:132
no_quorum_ignore
@ no_quorum_ignore
Definition: pe_types.h:63
pe_status_private.h
PCMK_LRM_OP_UNKNOWN
@ PCMK_LRM_OP_UNKNOWN
Definition: services.h:119
crm_now_string
const char * crm_now_string(time_t *when)
Definition: iso8601.c:1701
node_hash_from_list
GHashTable * node_hash_from_list(GListPtr list)
Definition: utils.c:188
CRM_TRACE_INIT_DATA
CRM_TRACE_INIT_DATA(pe_status)
XML_CIB_TAG_GROUP
#define XML_CIB_TAG_GROUP
Definition: msg_xml.h:175
sort_node_uname
gint sort_node_uname(gconstpointer a, gconstpointer b)
Definition: utils.c:231
XML_TAG_META_SETS
#define XML_TAG_META_SETS
Definition: msg_xml.h:164
pe_tag_s::refs
GListPtr refs
Definition: pe_types.h:431
crm_log_xml_debug
#define crm_log_xml_debug(xml, text)
Definition: logging.h:254
pe_working_set_s::template_rsc_sets
GHashTable * template_rsc_sets
Definition: pe_types.h:160
pe_wo
uint32_t pe_wo
Definition: unpack.c:51
ticket_new
ticket_t * ticket_new(const char *ticket_id, pe_working_set_t *data_set)
Definition: utils.c:1917
pe_ticket_s::state
GHashTable * state
Definition: pe_types.h:426
pe_base_name_end
const char * pe_base_name_end(const char *id)
Definition: unpack.c:1508
PCMK_LRM_OP_ERROR_FATAL
@ PCMK_LRM_OP_ERROR_FATAL
Definition: services.h:127
rules.h
PCMK_OCF_UNIMPLEMENT_FEATURE
@ PCMK_OCF_UNIMPLEMENT_FEATURE
Definition: services.h:93
add_tag_ref
gboolean add_tag_ref(GHashTable *tags, const char *tag_name, const char *obj_ref)
Definition: utils.c:2463
pe_resource_s::container
pe_resource_t * container
Definition: pe_types.h:353
pe__target_rc_from_xml
int pe__target_rc_from_xml(xmlNode *xml_op)
Definition: unpack.c:3364
pe_fence_op
action_t * pe_fence_op(node_t *node, const char *op, bool optional, const char *reason, pe_working_set_t *data_set)
Definition: utils.c:2347
pe_rsc_needs_fencing
#define pe_rsc_needs_fencing
Definition: pe_types.h:256
pe_flag_have_stonith_resource
#define pe_flag_have_stonith_resource
Definition: pe_types.h:95
pe_rsc_unique
#define pe_rsc_unique
Definition: pe_types.h:231
crm_parse_int
int crm_parse_int(const char *text, const char *default_text)
Parse an integer value from a string.
Definition: strings.c:114
pe_order_implies_then
@ pe_order_implies_then
Definition: pe_types.h:454
resource_location
void resource_location(resource_t *rsc, node_t *node, int score, const char *tag, pe_working_set_t *data_set)
Definition: utils.c:1624
XML_LRM_ATTR_EXIT_REASON
#define XML_LRM_ATTR_EXIT_REASON
Definition: msg_xml.h:278
XML_BOOLEAN_FALSE
#define XML_BOOLEAN_FALSE
Definition: msg_xml.h:108
PCMK_LRM_OP_ERROR_HARD
@ PCMK_LRM_OP_ERROR_HARD
Definition: services.h:126
host
AIS_Host host
Definition: internal.h:6
decode_transition_key
gboolean decode_transition_key(const char *key, char **uuid, int *action, int *transition_id, int *target_rc)
Parse a transition key into its constituent parts.
Definition: operations.c:215
pe_ticket_s::standby
gboolean standby
Definition: pe_types.h:425
XML_CIB_TAG_RSC_TEMPLATE
#define XML_CIB_TAG_RSC_TEMPLATE
Definition: msg_xml.h:180
pe_resource_s::clone_name
char * clone_name
Definition: pe_types.h:295
safe_str_neq
gboolean safe_str_neq(const char *a, const char *b)
Definition: strings.c:161
verify_pe_options
void verify_pe_options(GHashTable *options)
Definition: common.c:181
PCMK_OCF_NOT_CONFIGURED
@ PCMK_OCF_NOT_CONFIGURED
Definition: services.h:96
crm_config_warn
#define crm_config_warn(fmt...)
Definition: crm_internal.h:180
PCMK_LRM_OP_DONE
@ PCMK_LRM_OP_DONE
Definition: services.h:121
XPATH_ENABLE_UNFENCING
#define XPATH_ENABLE_UNFENCING
Definition: unpack.c:157
pe_resource_s::parent
pe_resource_t * parent
Definition: pe_types.h:301
crm_parse_interval_spec
guint crm_parse_interval_spec(const char *input)
Definition: utils.c:545
XML_LRM_ATTR_MIGRATE_TARGET
#define XML_LRM_ATTR_MIGRATE_TARGET
Definition: msg_xml.h:286
crm_str
#define crm_str(x)
Definition: logging.h:267
char2score
int char2score(const char *score)
Definition: utils.c:202
XML_LRM_ATTR_RESTART_DIGEST
#define XML_LRM_ATTR_RESTART_DIGEST
Definition: msg_xml.h:276
services.h
Services API.
pe_flag_concurrent_fencing
#define pe_flag_concurrent_fencing
Definition: pe_types.h:97
pe_resource_s::flags
unsigned long long flags
Definition: pe_types.h:321
pe_flag_maintenance_mode
#define pe_flag_maintenance_mode
Definition: pe_types.h:92
pe_resource_s::remote_reconnect_ms
guint remote_reconnect_ms
Definition: pe_types.h:318
pe_flag_enable_unfencing
#define pe_flag_enable_unfencing
Definition: pe_types.h:96
pe_flag_remove_after_stop
#define pe_flag_remove_after_stop
Definition: pe_types.h:104
unpack_nodes
gboolean unpack_nodes(xmlNode *xml_nodes, pe_working_set_t *data_set)
Definition: unpack.c:523
PCMK_OCF_RUNNING_MASTER
@ PCMK_OCF_RUNNING_MASTER
Definition: services.h:98
pe_resource_s::role
enum rsc_role_e role
Definition: pe_types.h:343
pe_rsc_trace
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:19
PCMK_OCF_OK
@ PCMK_OCF_OK
Definition: services.h:90
XML_ATTR_TRANSITION_MAGIC
#define XML_ATTR_TRANSITION_MAGIC
Definition: msg_xml.h:357
XML_NODE_IS_PEER
#define XML_NODE_IS_PEER
Definition: msg_xml.h:241
pe_node_shared_s::remote_was_fenced
gboolean remote_was_fenced
Definition: pe_types.h:203
pe_flag_startup_probes
#define pe_flag_startup_probes
Definition: pe_types.h:107
crm_ends_with
gboolean crm_ends_with(const char *s, const char *match)
Definition: strings.c:313
pe__add_param_check
void pe__add_param_check(xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, enum pe_check_parameters, pe_working_set_t *data_set)
Definition: remote.c:215
CRM_ASSERT
#define CRM_ASSERT(expr)
Definition: results.h:42
find_xml_node
xmlNode * find_xml_node(xmlNode *cib, const char *node_path, gboolean must_find)
Definition: xml.c:1758
pe__bundle_needs_remote_name
bool pe__bundle_needs_remote_name(pe_resource_t *rsc)
Definition: bundle.c:955
pe__resource_actions
GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: utils.c:1576
pe_rsc_start_pending
#define pe_rsc_start_pending
Definition: pe_types.h:245
XML_ATTR_QUORUM_PANIC
#define XML_ATTR_QUORUM_PANIC
Definition: msg_xml.h:84
pe_working_set_s::tickets
GHashTable * tickets
Definition: pe_types.h:134
pe_node_shared_s
Definition: pe_types.h:185
pe_flag_start_failure_fatal
#define pe_flag_start_failure_fatal
Definition: pe_types.h:103
pe__clear_failcount
pe_action_t * pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_working_set_t *data_set)
Schedule a controller operation to clear a fail count.
Definition: failcounts.c:360
CRMD_ACTION_DEMOTE
#define CRMD_ACTION_DEMOTE
Definition: crm.h:180
set_config_flag
#define set_config_flag(data_set, option, flag)
Definition: unpack.c:30
pe_rsc_managed
#define pe_rsc_managed
Definition: pe_types.h:226
pe_action_s::fail_role
enum rsc_role_e fail_role
Definition: pe_types.h:387
action_fail_ignore
@ action_fail_ignore
Definition: common.h:42
iso8601_internal.h
CRMD_ACTION_STATUS
#define CRMD_ACTION_STATUS
Definition: crm.h:186
pe_node_shared_s::unpacked
gboolean unpacked
Definition: pe_types.h:205
pe_resource_s::pending_task
char * pending_task
Definition: pe_types.h:319
unpack.h
XML_LRM_ATTR_INTERVAL_MS
#define XML_LRM_ATTR_INTERVAL_MS
Definition: msg_xml.h:258
unpack_resources
gboolean unpack_resources(xmlNode *xml_resources, pe_working_set_t *data_set)
Definition: unpack.c:749
no_quorum_suicide
@ no_quorum_suicide
Definition: pe_types.h:64
crm_atoi
#define crm_atoi(text, default_text)
Definition: util.h:110
PCMK_OCF_NOT_RUNNING
@ PCMK_OCF_NOT_RUNNING
Definition: services.h:97
node_score_red
int node_score_red
Definition: utils.c:63
pe_node_shared_s::remote_maintenance
gboolean remote_maintenance
Definition: pe_types.h:204
pe_node_attribute_raw
const char * pe_node_attribute_raw(pe_node_t *node, const char *name)
Definition: common.c:471
XML_ATTR_TYPE
#define XML_ATTR_TYPE
Definition: msg_xml.h:99
RSC_DIGEST_UNKNOWN
@ RSC_DIGEST_UNKNOWN
Definition: internal.h:325
pe_rsc_promotable
#define pe_rsc_promotable
Definition: pe_types.h:233
copy_in_properties
void copy_in_properties(xmlNode *target, xmlNode *src)
Definition: xml.c:1830
pe_flag_stonith_enabled
#define pe_flag_stonith_enabled
Definition: pe_types.h:94
pe_node_shared_s::maintenance
gboolean maintenance
Definition: pe_types.h:200
pe__is_guest_node
gboolean pe__is_guest_node(pe_node_t *node)
Definition: remote.c:47
pe_resource_s
Definition: pe_types.h:293
pe_resource_s::allowed_nodes
GHashTable * allowed_nodes
Definition: pe_types.h:341
pe_working_set_s::flags
unsigned long long flags
Definition: pe_types.h:128
pe_node_shared_s::unclean
gboolean unclean
Definition: pe_types.h:195
XML_ATTR_HAVE_WATCHDOG
#define XML_ATTR_HAVE_WATCHDOG
Definition: msg_xml.h:86
unpack_config
gboolean unpack_config(xmlNode *config, pe_working_set_t *data_set)
Definition: unpack.c:179
pe_working_set_s::failed
xmlNode * failed
Definition: pe_types.h:147
RSC_ROLE_STARTED
@ RSC_ROLE_STARTED
Definition: common.h:101
pe_working_set_s::no_quorum_policy
enum pe_quorum_policy no_quorum_policy
Definition: pe_types.h:131
crm_element_value_epoch
int crm_element_value_epoch(const xmlNode *xml, const char *name, time_t *dest)
Retrieve the seconds-since-epoch value of an XML attribute.
Definition: nvpair.c:633
pe_flag_symmetric_cluster
#define pe_flag_symmetric_cluster
Definition: pe_types.h:91
generate_op_key
char * generate_op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key.
Definition: operations.c:39
XML_LRM_ATTR_CALLID
#define XML_LRM_ATTR_CALLID
Definition: msg_xml.h:272
pe_resource_s::failure_timeout
int failure_timeout
Definition: pe_types.h:316
CRM_ATTR_SITE_NAME
#define CRM_ATTR_SITE_NAME
Definition: crm.h:117
XML_CIB_TAG_NODE
#define XML_CIB_TAG_NODE
Definition: msg_xml.h:159
pe_node_shared_s::type
enum node_type type
Definition: pe_types.h:188
pe_node_shared_s::online
gboolean online
Definition: pe_types.h:191
pe_node_shared_s::uname
const char * uname
Definition: pe_types.h:187
no_quorum_stop
@ no_quorum_stop
Definition: pe_types.h:62
pe_free_action
void pe_free_action(action_t *action)
Definition: utils.c:1372
strndup
char * strndup(const char *str, size_t len)
destroy_ticket
void destroy_ticket(gpointer data)
Definition: utils.c:1905
XML_NODE_IS_MAINTENANCE
#define XML_NODE_IS_MAINTENANCE
Definition: msg_xml.h:244
resource_object_functions_s::find_rsc
pe_resource_t *(* find_rsc)(pe_resource_t *parent, const char *search, const pe_node_t *node, int flags)
Definition: pe_types.h:45
pe_rsc_stop
#define pe_rsc_stop
Definition: pe_types.h:239
PCMK_OCF_NOT_INSTALLED
@ PCMK_OCF_NOT_INSTALLED
Definition: services.h:95
pe_flag_stop_rsc_orphans
#define pe_flag_stop_rsc_orphans
Definition: pe_types.h:99
add_node_copy
xmlNode * add_node_copy(xmlNode *new_parent, xmlNode *xml_node)
Definition: xml.c:1948
pe_resource_s::is_remote_node
gboolean is_remote_node
Definition: pe_types.h:324
crm_internal.h
util.h
Utility functions.
pe_node_s
Definition: pe_types.h:217
pe__is_universal_clone
bool pe__is_universal_clone(pe_resource_t *rsc, pe_working_set_t *data_set)
Definition: clone.c:1032
freeXpathObject
void freeXpathObject(xmlXPathObjectPtr xpathObj)
Definition: xpath.c:45
pe_flag_have_quorum
#define pe_flag_have_quorum
Definition: pe_types.h:90
XML_CIB_TAG_OBJ_REF
#define XML_CIB_TAG_OBJ_REF
Definition: msg_xml.h:390
crm.h
A dumping ground.
node_score_green
int node_score_green
Definition: utils.c:64
CRMD_ACTION_PROMOTE
#define CRMD_ACTION_PROMOTE
Definition: crm.h:178
XML_RSC_ATTR_CONTAINER
#define XML_RSC_ATTR_CONTAINER
Definition: msg_xml.h:205
XML_NVPAIR_ATTR_NAME
#define XML_NVPAIR_ATTR_NAME
Definition: msg_xml.h:339
pe_rsc_block
#define pe_rsc_block
Definition: pe_types.h:227
fail2text
const char * fail2text(enum action_fail_response fail)
Definition: common.c:193
XML_BOOLEAN_TRUE
#define XML_BOOLEAN_TRUE
Definition: msg_xml.h:107
pe_rsc_failure_ignored
#define pe_rsc_failure_ignored
Definition: pe_types.h:251
pe_resource_s::fns
resource_object_functions_t * fns
Definition: pe_types.h:305
pcmk_ok
#define pcmk_ok
Definition: results.h:57
pe_flag_have_remote_nodes
#define pe_flag_have_remote_nodes
Definition: pe_types.h:109
unpack_status
gboolean unpack_status(xmlNode *status, pe_working_set_t *data_set)
Definition: unpack.c:1099
node_ping
@ node_ping
Definition: pe_types.h:69
pe_node_shared_s::attrs
GHashTable * attrs
Definition: pe_types.h:212
crm_config_err
#define crm_config_err(fmt...)
Definition: crm_internal.h:179
op_digest_cache_s
Definition: internal.h:328
pe_proc_err
#define pe_proc_err(fmt...)
Definition: internal.h:23
pe_rsc_failed
#define pe_rsc_failed
Definition: pe_types.h:243