diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c index aa2e3b9f3b4..bee282cb0f8 100644 --- a/daemons/attrd/attrd_attributes.c +++ b/daemons/attrd/attrd_attributes.c @@ -282,3 +282,195 @@ attrd_nvpair_id(const attribute_t *attr, const char *node_state_id) pcmk__xml_sanitize_id(nvpair_id); return nvpair_id; } + +/*! + * \internal + * \brief Check whether an attribute is one that must be written to the CIB + * + * \param[in] a Attribute to check + * + * \return false if we are in standalone mode or \p a is private, otherwise true + */ +bool +attrd_for_cib(const attribute_t *a) +{ + return !stand_alone && (a != NULL) + && !pcmk__is_set(a->flags, attrd_attr_is_private); +} + +/*! + * \internal + * \brief Drop NULL attribute values as indicated by given function + * + * Drop all NULL node attribute values that a given function indicates should + * be, based on the XML ID of an element that was removed from the CIB. + * + * \param[in] cib_id ID of XML element that was removed from CIB + * (a name/value pair, an attribute set, or a node state) + * \param[in] set_type If not NULL, drop only attributes with this set type + * \param[in] func Call this function for every attribute/value + * combination + */ +static void +drop_removed_values(const char *cib_id, const char *set_type, int peer_attrd_ver, + bool (*func)(const attribute_t *, const char *, + const char *)) +{ + int our_attrd_ver; + bool drop_immediately = false; + attribute_t *a = NULL; + GHashTableIter attr_iter; + const char *entry_type = pcmk__s(set_type, "status entry"); // for log + + CRM_CHECK((cib_id != NULL) && (func != NULL), return); + + pcmk__scan_min_int(ATTRD_PROTOCOL_VERSION, &our_attrd_ver, -1); + drop_immediately = (peer_attrd_ver != -1) + && ATTRD_SUPPORTS_CLEARING_CIB(our_attrd_ver) + && !ATTRD_SUPPORTS_CLEARING_CIB(peer_attrd_ver); + + // Check every attribute ... + g_hash_table_iter_init(&attr_iter, attributes); + while (g_hash_table_iter_next(&attr_iter, NULL, (gpointer *) &a)) { + attribute_value_t *v = NULL; + GHashTableIter value_iter; + + if (!attrd_for_cib(a) + || !pcmk__str_eq(a->set_type, set_type, pcmk__str_null_matches)) { + continue; + } + + // Check every value of the attribute ... + g_hash_table_iter_init(&value_iter, a->values); + while (g_hash_table_iter_next(&value_iter, NULL, (gpointer *) &v)) { + const char *id = NULL; + + if ((v->current != NULL) && !drop_immediately) { + continue; + } + + id = attrd_get_node_xml_id(v->nodename); + if (id == NULL) { + /* This shouldn't be a significant issue, since we will know the + * XML ID if *any* attribute for the node has ever been written. + */ + crm_trace("Ignoring %s[%s] after CIB erasure of %s %s because " + "its node XML ID is unknown (possibly attribute was " + "never written to CIB)", + a->id, v->nodename, entry_type, cib_id); + continue; + } + + if (!func(a, id, cib_id)) { + crm_trace("%s != %s", id, cib_id); + continue; + } + + if (drop_immediately) { + crm_debug("Dropping %s[%s] immediately", a->id, v->nodename); + } else { + crm_debug("Dropping %s[%s] after CIB erasure of %s %s", + a->id, v->nodename, entry_type, cib_id); + } + + g_hash_table_iter_remove(&value_iter); + } + } +} + +/*! + * \internal + * \brief Check whether an attribute value has a given XML ID + * + * \param[in] a Attribute being checked + * \param[in] xml_id XML ID of node state that attribute value is for + * \param[in] cib_id ID of name/value pair element that was removed from CIB + * + * \return \c true if value matches XML ID, otherwise \c false + */ +static bool +nvpair_matches(const attribute_t *a, const char *xml_id, const char *cib_id) +{ + char *id = attrd_nvpair_id(a, xml_id); + bool rc = pcmk__str_eq(id, cib_id, pcmk__str_none); + + free(id); + return rc; +} + +/*! + * \internal + * \brief Drop attribute value corresponding to given removed CIB entry + * + * \param[in] cib_id ID of name/value pair element that was removed from CIB + */ +void +attrd_drop_removed_value(const char *set_type, const char *cib_id) +{ + drop_removed_values(cib_id, set_type, -1, nvpair_matches); +} + +/*! + * \internal + * \brief Check whether an attribute value has a given attribute set ID + * + * \param[in] a Attribute being checked + * \param[in] xml_id XML ID of node state that attribute value is for + * \param[in] cib_id ID of attribute set that was removed from CIB + * + * \return \c true if value matches XML ID, otherwise \c false + */ +static bool +set_id_matches(const attribute_t *a, const char *xml_id, const char *cib_id) +{ + char *id = attrd_set_id(a, xml_id); + bool rc = false; + + if (pcmk__str_eq(id, cib_id, pcmk__str_none)) { + rc = true; + } + + free(id); + return rc; +} + +/*! + * \internal + * \brief Drop all removed attribute values for an attribute set + * + * \param[in] set_type XML element name of set that was removed + * \param[in] cib_id ID of attribute set that was removed from CIB + */ +void +attrd_drop_removed_set(const char *set_type, const char *cib_id) +{ + drop_removed_values(cib_id, set_type, -1, set_id_matches); +} + +/*! + * \internal + * \brief Check whether an attribute value has a given node state XML ID + * + * \param[in] a Attribute being checked + * \param[in] xml_id XML ID of node state that attribute value is for + * \param[in] cib_id ID of node state that was removed from CIB + * + * \return \c true if value matches XML ID, otherwise \c false + */ +static bool +node_matches(const attribute_t *a, const char *xml_id, const char *cib_id) +{ + return pcmk__str_eq(cib_id, xml_id, pcmk__str_none); +} + +/*! + * \internal + * \brief Drop all removed attribute values for a node + * + * \param[in] cib_id ID of node state that was removed from CIB + */ +void +attrd_drop_removed_values(const char *cib_id, int peer_attrd_ver) +{ + drop_removed_values(cib_id, NULL, peer_attrd_ver, node_matches); +} diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index a92bfe223bf..0bbcf0d60bc 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -34,7 +34,7 @@ attrd_cib_destroy_cb(gpointer user_data) cib->cmds->signoff(cib); - if (attrd_shutting_down(false)) { + if (attrd_shutting_down()) { crm_info("Disconnected from the CIB manager"); } else { @@ -45,6 +45,87 @@ attrd_cib_destroy_cb(gpointer user_data) } } +/*! + * \internal + * \brief Check a patchset change for deletion of node attribute values + * + * \param[in] xml Patchset change element + * \param[in] data Ignored + * + * \return pcmk_rc_ok (to always continue to next patchset change) + */ +static int +drop_values_in_deletion(xmlNode *xml, void *data) +{ + const char *value = NULL; + GRegex *regex = NULL; + GMatchInfo *match_info = NULL; + gchar *node_id = NULL; + gchar *set_type = NULL; + gchar *set_id = NULL; + gchar *attr_id = NULL; + + // Skip this change if it does not look like a deletion + value = pcmk__xe_get(xml, PCMK_XA_OPERATION); + if (!pcmk__str_eq(value, "delete", pcmk__str_none)) { + return pcmk_rc_ok; + } + + value = pcmk__xe_get(xml, PCMK_XA_PATH); + if (value == NULL) { + crm_warn("Ignoring malformed deletion in " + "CIB change notification: No " PCMK_XA_PATH); + return pcmk_rc_ok; + } + + regex = g_regex_new("^/" PCMK_XE_CIB + "/" PCMK_XE_STATUS + "/" PCMK__XE_NODE_STATE + "\\[@" PCMK_XA_ID "='(?[^']+)'\\]" + "(?:" + "/" PCMK__XE_TRANSIENT_ATTRIBUTES + "\\[@" PCMK_XA_ID "='[^']+'\\]" + "(?:" + "/(?[^[/]+)" + "\\[@" PCMK_XA_ID "='(?[^']+)'\\]" + "(?:" + "/" PCMK_XE_NVPAIR + "\\[@" PCMK_XA_ID "='(?[^']+)'\\]" + ")?" + ")?" + ")?$", + 0, 0, NULL); + + if (!g_regex_match(regex, value, 0, &match_info)) { + goto done; + } + + node_id = g_match_info_fetch_named(match_info, "NODE_ID"); + set_type = g_match_info_fetch_named(match_info, "SET_TYPE"); + set_id = g_match_info_fetch_named(match_info, "SET_ID"); + attr_id = g_match_info_fetch_named(match_info, "ATTR_ID"); + + if (!pcmk__str_empty(attr_id)) { + attrd_drop_removed_value(set_type, attr_id); + + } else if (!pcmk__str_empty(set_type)) { + CRM_CHECK(!pcmk__str_empty(set_id), goto done); + attrd_drop_removed_set(set_type, set_id); + + } else if (!pcmk__str_empty(node_id)) { + attrd_drop_removed_values(node_id, *(int *) data); + } + +done: + g_free(node_id); + g_free(set_type); + g_free(set_id); + g_free(attr_id); + g_match_info_free(match_info); + g_regex_unref(regex); + return pcmk_rc_ok; +} + static void attrd_cib_updated_cb(const char *event, xmlNode *msg) { @@ -57,7 +138,7 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg) } if (pcmk__cib_element_in_patchset(patchset, PCMK_XE_ALERTS)) { - if (attrd_shutting_down(true)) { + if (attrd_shutting_down()) { crm_debug("Ignoring alerts change in CIB during shutdown"); } else { mainloop_set_trigger(attrd_config_read); @@ -68,9 +149,26 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg) client_name = pcmk__xe_get(msg, PCMK__XA_CIB_CLIENTNAME); if (!cib__client_triggers_refresh(client_name)) { + const char *peer = pcmk__xe_get(msg, PCMK__XA_SRC); + int peer_attrd_ver = attrd_get_peer_protocol_ver(peer); + /* This change came from a source that ensured the CIB is consistent * with our attributes table, so we don't need to write anything out. + * If a removed attribute has been erased, we can forget it now. + */ + int format = 1; + + if ((pcmk__xe_get_int(patchset, PCMK_XA_FORMAT, &format) != pcmk_rc_ok) + || (format != 2)) { + crm_warn("Can't handle CIB patch format %d", format); + return; + } + + /* This won't modify patchset, but we need to break const to match the + * function signature. */ + pcmk__xe_foreach_child((xmlNode *) patchset, PCMK_XE_CHANGE, + drop_values_in_deletion, &peer_attrd_ver); return; } @@ -82,7 +180,7 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg) if (status_changed || pcmk__cib_element_in_patchset(patchset, PCMK_XE_NODES)) { - if (attrd_shutting_down(true)) { + if (attrd_shutting_down()) { crm_debug("Ignoring node change in CIB during shutdown"); return; } @@ -216,8 +314,8 @@ void attrd_cib_init(void) { /* We have no attribute values in memory, so wipe the CIB to match. This is - * normally done by the DC's controller when this node leaves the cluster, but - * this handles the case where the node restarted so quickly that the + * normally done by the writer when this node leaves the cluster, but this + * handles the case where the node restarted so quickly that the * cluster layer didn't notice. * * \todo If the attribute manager respawns after crashing (see @@ -494,18 +592,12 @@ write_attribute(attribute_t *a, bool ignore_delay) GHashTableIter iter; GHashTable *alert_attribute_value = NULL; int rc = pcmk_ok; - bool should_write = true; + bool should_write = attrd_for_cib(a); if (a == NULL) { return; } - // Private attributes (or any in standalone mode) are not written to the CIB - if (stand_alone || pcmk__is_set(a->flags, attrd_attr_is_private)) { - should_write = false; - } - - /* If this attribute will be written to the CIB ... */ if (should_write) { /* Defer the write if now's not a good time */ if (a->update && (a->update < last_cib_op_done)) { @@ -557,6 +649,7 @@ write_attribute(attribute_t *a, bool ignore_delay) while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { const char *node_xml_id = NULL; const char *prev_xml_id = NULL; + pcmk__node_status_t *peer = NULL; if (!should_write) { private_updates++; @@ -575,12 +668,23 @@ write_attribute(attribute_t *a, bool ignore_delay) // A Pacemaker Remote node's XML ID is the same as its name node_xml_id = v->nodename; + } else if (v->current == NULL) { + /* If a value was removed, check the caches for the node XML ID, + * but don't create a new cache entry. We don't want to re-create a + * purged node. + */ + peer = pcmk__search_node_caches(0, v->nodename, prev_xml_id, + pcmk__node_search_any + |pcmk__node_search_cluster_cib); + node_xml_id = pcmk__cluster_get_xml_id(peer); + if (node_xml_id == NULL) { + node_xml_id = prev_xml_id; + } + } else { // This creates a cluster node cache entry if none exists - pcmk__node_status_t *peer = pcmk__get_node(0, v->nodename, - prev_xml_id, - pcmk__node_search_any); - + peer = pcmk__get_node(0, v->nodename, prev_xml_id, + pcmk__node_search_any); node_xml_id = pcmk__cluster_get_xml_id(peer); if (node_xml_id == NULL) { node_xml_id = prev_xml_id; @@ -608,8 +712,8 @@ write_attribute(attribute_t *a, bool ignore_delay) if (rc != pcmk_rc_ok) { crm_err("Couldn't add %s[%s]='%s' to CIB transaction: %s " QB_XS " node XML ID %s", - a->id, v->nodename, v->current, pcmk_rc_str(rc), - node_xml_id); + a->id, v->nodename, pcmk__s(v->current, "(unset)"), + pcmk_rc_str(rc), node_xml_id); continue; } @@ -645,6 +749,7 @@ write_attribute(attribute_t *a, bool ignore_delay) "attrd_cib_callback", attrd_cib_callback, free)) { // Transmit alert of the attribute + // @TODO Do this in callback only if write was successful send_alert_attributes_value(a, alert_attribute_value); } } diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c index 26ccdc90f3a..3e50fc57654 100644 --- a/daemons/attrd/attrd_corosync.c +++ b/daemons/attrd/attrd_corosync.c @@ -46,7 +46,7 @@ attrd_peer_message(pcmk__node_status_t *peer, xmlNode *xml) return; } - if (attrd_shutting_down(false)) { + if (attrd_shutting_down()) { /* If we're shutting down, we want to continue responding to election * ops as long as we're a cluster member (because our vote may be * needed). Ignore all other messages. @@ -129,7 +129,7 @@ attrd_cpg_dispatch(cpg_handle_t handle, static void attrd_cpg_destroy(gpointer unused) { - if (attrd_shutting_down(false)) { + if (attrd_shutting_down()) { crm_info("Disconnected from Corosync process group"); } else { @@ -287,17 +287,6 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer, pcmk__str_update(&v->current, value); attrd_set_attr_flags(a, attrd_attr_changed); - if (pcmk__str_eq(host, attrd_cluster->priv->node_name, pcmk__str_casei) - && pcmk__str_eq(attr, PCMK__NODE_ATTR_SHUTDOWN, pcmk__str_none)) { - - if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) { - attrd_set_requesting_shutdown(); - - } else { - attrd_clear_requesting_shutdown(); - } - } - // Write out new value or start dampening timer if (a->timeout_ms && a->timer) { crm_trace("Delaying write of %s %s for dampening", @@ -549,12 +538,35 @@ attrd_peer_remove(const char *host, bool uncache, const char *source) host, source, (uncache? "and" : "without")); g_hash_table_iter_init(&aIter, attributes); - while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { - if(g_hash_table_remove(a->values, host)) { - crm_debug("Removed %s[%s] for peer %s", a->id, host, source); + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) &a)) { + /* If the attribute won't be written to the CIB, we can drop the value + * now. Otherwise we need to set it NULL and wait for a notification + * that it was erased, because if there's no writer or the current + * writer fails to write it then leaves, we may become the writer and + * need to do it. + */ + if (attrd_for_cib(a)) { + attribute_value_t *v = g_hash_table_lookup(a->values, host); + + if ((v != NULL) && (v->current != NULL)) { + crm_debug("Removed %s[%s] (by setting NULL) for %s", + a->id, host, source); + pcmk__str_update(&(v->current), NULL); + attrd_set_attr_flags(a, attrd_attr_changed); + } + } else if (g_hash_table_remove(a->values, host)) { + crm_debug("Removed %s[%s] immediately for %s", + a->id, host, source); } } + if (attrd_election_won()) { + attrd_cib_erase_transient_attrs(host); // Wipe from CIB + } else { + attrd_start_election_if_needed(); // Make sure CIB gets updated + } + + // Remove node from caches if requested if (uncache) { pcmk__purge_node_from_cache(host, 0); attrd_forget_node_xml_id(host); diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c index 74e57ed377f..729130c934c 100644 --- a/daemons/attrd/attrd_elections.c +++ b/daemons/attrd/attrd_elections.c @@ -41,7 +41,7 @@ attrd_start_election_if_needed(void) { if ((peer_writer == NULL) && (election_state(attrd_cluster) != election_in_progress) - && !attrd_shutting_down(false)) { + && !attrd_shutting_down()) { crm_info("Starting an election to determine the writer"); election_vote(attrd_cluster); @@ -63,7 +63,7 @@ attrd_handle_election_op(const pcmk__node_status_t *peer, xmlNode *xml) pcmk__xe_set(xml, PCMK__XA_SRC, peer->name); // Don't become writer if we're shutting down - rc = election_count_vote(attrd_cluster, xml, !attrd_shutting_down(false)); + rc = election_count_vote(attrd_cluster, xml, !attrd_shutting_down()); switch(rc) { case election_start: diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c index 4e32f4dc9a1..f3ff2f6c101 100644 --- a/daemons/attrd/attrd_ipc.c +++ b/daemons/attrd/attrd_ipc.c @@ -491,7 +491,7 @@ static int32_t attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("New client connection %p", c); - if (attrd_shutting_down(false)) { + if (attrd_shutting_down()) { crm_info("Ignoring new connection from pid %d during shutdown", pcmk__client_pid(c)); return -ECONNREFUSED; diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c index 8500e974a35..109592b153f 100644 --- a/daemons/attrd/attrd_utils.c +++ b/daemons/attrd/attrd_utils.c @@ -25,7 +25,6 @@ cib_t *the_cib = NULL; -static bool requesting_shutdown = false; static bool shutting_down = false; static GMainLoop *mloop = NULL; @@ -34,45 +33,17 @@ static GMainLoop *mloop = NULL; */ GHashTable *peer_protocol_vers = NULL; -/*! - * \internal - * \brief Set requesting_shutdown state - */ -void -attrd_set_requesting_shutdown(void) -{ - requesting_shutdown = true; -} - -/*! - * \internal - * \brief Clear requesting_shutdown state - */ -void -attrd_clear_requesting_shutdown(void) -{ - requesting_shutdown = false; -} - /*! * \internal * \brief Check whether local attribute manager is shutting down * - * \param[in] if_requested If \c true, also consider presence of - * \c PCMK__NODE_ATTR_SHUTDOWN attribute - * - * \return \c true if local attribute manager has begun shutdown sequence - * or (if \p if_requested is \c true) whether local node has a nonzero - * \c PCMK__NODE_ATTR_SHUTDOWN attribute set, otherwise \c false - * \note Most callers should pass \c false for \p if_requested, because the - * attribute manager needs to continue performing while the controller is - * shutting down, and even needs to be eligible for election in case all - * nodes are shutting down. + * \return \c true if local attribute manager has begun shutdown sequence, + * otherwise \c false */ bool -attrd_shutting_down(bool if_requested) +attrd_shutting_down(void) { - return shutting_down || (if_requested && requesting_shutdown); + return shutting_down; } /*! @@ -267,6 +238,23 @@ attrd_remove_peer_protocol_ver(const char *host) } } +int +attrd_get_peer_protocol_ver(const char *host) +{ + gpointer key; + + if (peer_protocol_vers == NULL) { + return -1; + } + + key = g_hash_table_lookup(peer_protocol_vers, host); + if (key == NULL) { + return -1; + } + + return GPOINTER_TO_INT(key); +} + /*! * \internal * \brief When a peer node broadcasts a message with its protocol version, keep diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h index d9423c8915e..4e48f087adb 100644 --- a/daemons/attrd/pacemaker-attrd.h +++ b/daemons/attrd/pacemaker-attrd.h @@ -43,11 +43,13 @@ * 5 2.1.5 Peers can request confirmation of a sent message * 6 2.1.7 PCMK__ATTRD_CMD_PEER_REMOVE supports PCMK__XA_REAP * 7 3.0.0 "flush" support dropped + * 8 3.0.2 attrd clears transient attributes from the CIB as well */ -#define ATTRD_PROTOCOL_VERSION "7" +#define ATTRD_PROTOCOL_VERSION "8" #define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4) #define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5) +#define ATTRD_SUPPORTS_CLEARING_CIB(x) ((x) >= 8) #define attrd_send_ack(client, id, flags) \ pcmk__ipc_send_ack((client), (id), (flags), PCMK__XE_ACK, \ @@ -56,10 +58,8 @@ void attrd_init_mainloop(void); void attrd_run_mainloop(void); -void attrd_set_requesting_shutdown(void); -void attrd_clear_requesting_shutdown(void); void attrd_free_waitlist(void); -bool attrd_shutting_down(bool if_requested); +bool attrd_shutting_down(void); void attrd_shutdown(int nsig); void attrd_init_ipc(void); void attrd_ipc_fini(void); @@ -211,6 +211,10 @@ void attrd_free_attribute_value(gpointer data); attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr); char *attrd_set_id(const attribute_t *attr, const char *node_state_id); char *attrd_nvpair_id(const attribute_t *attr, const char *node_state_id); +bool attrd_for_cib(const attribute_t *a); +void attrd_drop_removed_value(const char *set_type, const char *cib_id); +void attrd_drop_removed_set(const char *set_type, const char *cib_id); +void attrd_drop_removed_values(const char *cib_id, int peer_attrd_ver); enum attrd_write_options { attrd_write_changed = 0, @@ -224,6 +228,7 @@ void attrd_write_or_elect_attribute(attribute_t *a); extern int minimum_protocol_version; void attrd_remove_peer_protocol_ver(const char *host); void attrd_update_minimum_protocol_ver(const char *host, const char *value); +int attrd_get_peer_protocol_ver(const char *host); mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr); diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c index ea2bbf80aea..9d6daba4bb6 100644 --- a/daemons/controld/controld_attrd.c +++ b/daemons/controld/controld_attrd.c @@ -106,8 +106,15 @@ update_attrd_list(GList *attrs, uint32_t opts) } } +/*! + * \internal + * \brief Ask attribute manager to purge a node and its transient attributes + * + * \param[in] node_name Node to purge + * \param[in] from_cache If true, purge from node caches as well + */ void -update_attrd_remote_node_removed(const char *host, const char *user_name) +controld_purge_node_attrs(const char *node_name, bool from_cache) { int rc = pcmk_rc_ok; @@ -115,14 +122,15 @@ update_attrd_remote_node_removed(const char *host, const char *user_name) rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); } if (rc == pcmk_rc_ok) { - crm_trace("Asking attribute manager to purge Pacemaker Remote node %s", - host); - rc = pcmk__attrd_api_purge(attrd_api, host, true); + crm_debug("Asking %s to purge transient attributes%s for %s", + pcmk_ipc_name(attrd_api, true), + (from_cache? " and node cache" : ""), node_name); + rc = pcmk__attrd_api_purge(attrd_api, node_name, from_cache); } if (rc != pcmk_rc_ok) { - crm_err("Could not purge Pacemaker Remote node %s " - "in attribute manager%s: %s " QB_XS " rc=%d", - host, when(), pcmk_rc_str(rc), rc); + crm_err("Could not purge node %s from %s%s: %s " + QB_XS " rc=%d", node_name, pcmk_ipc_name(attrd_api, true), + when(), pcmk_rc_str(rc), rc); } } diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c index acde403cb7e..ad46e710a15 100644 --- a/daemons/controld/controld_callbacks.c +++ b/daemons/controld/controld_callbacks.c @@ -234,20 +234,10 @@ peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node, pcmk__str_casei) && !pcmk__cluster_is_node_active(node)) { - /* The DC has left, so delete its transient attributes and - * trigger a new election. - * - * A DC sends its shutdown request to all peers, who update the - * DC's expected state to down. This avoids fencing upon - * deletion of its transient attributes. - */ + // The DC has left, so trigger a new election crm_notice("Our peer on the DC (%s) is dead", controld_globals.dc_name); - register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); - controld_delete_node_state(node->name, controld_section_attrs, - cib_none); - } else if (AM_I_DC || pcmk__is_set(controld_globals.flags, controld_dc_left) || (controld_globals.dc_name == NULL)) { @@ -257,10 +247,6 @@ peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node, */ if (appeared) { controld_trigger_fencing_history_sync(false); - } else { - controld_delete_node_state(node->name, - controld_section_attrs, - cib_none); } } break; diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c index b2e3ed28c78..75a377c4657 100644 --- a/daemons/controld/controld_cib.c +++ b/daemons/controld/controld_cib.c @@ -279,30 +279,18 @@ cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, "[not(@" PCMK_OPT_SHUTDOWN_LOCK ") " \ "or " PCMK_OPT_SHUTDOWN_LOCK "<%lld]" -// Node's PCMK__XE_TRANSIENT_ATTRIBUTES section (name 1x) -#define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" PCMK__XE_TRANSIENT_ATTRIBUTES - -// Everything under PCMK__XE_NODE_STATE (name 1x) -#define XPATH_NODE_ALL XPATH_NODE_STATE "/*" - -/* Unlocked history + transient attributes - * (name 2x, (seconds_since_epoch - PCMK_OPT_SHUTDOWN_LOCK_LIMIT) 1x, name 1x) - */ -#define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS - /*! * \internal - * \brief Get the XPath and description of a node state section to be deleted + * \brief Get the XPath and description of resource history to be deleted * - * \param[in] uname Desired node - * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to be deleted - * \param[out] xpath Where to store XPath of \p section - * \param[out] desc If not \c NULL, where to store description of \p section + * \param[in] uname Name of node to delete resource history for + * \param[in] unlocked_only If true, delete history of only unlocked resources + * \param[out] xpath Where to store XPath for history deletion + * \param[out] desc If not NULL, where to store loggable description */ void -controld_node_state_deletion_strings(const char *uname, - enum controld_section_e section, - char **xpath, char **desc) +controld_node_history_deletion_strings(const char *uname, bool unlocked_only, + char **xpath, char **desc) { const char *desc_pre = NULL; @@ -310,33 +298,13 @@ controld_node_state_deletion_strings(const char *uname, long long expire = (long long) time(NULL) - controld_globals.shutdown_lock_limit; - switch (section) { - case controld_section_lrm: - *xpath = pcmk__assert_asprintf(XPATH_NODE_LRM, uname); - desc_pre = "resource history"; - break; - case controld_section_lrm_unlocked: - *xpath = pcmk__assert_asprintf(XPATH_NODE_LRM_UNLOCKED, uname, - uname, expire); - desc_pre = "resource history (other than shutdown locks)"; - break; - case controld_section_attrs: - *xpath = pcmk__assert_asprintf(XPATH_NODE_ATTRS, uname); - desc_pre = "transient attributes"; - break; - case controld_section_all: - *xpath = pcmk__assert_asprintf(XPATH_NODE_ALL, uname); - desc_pre = "all state"; - break; - case controld_section_all_unlocked: - *xpath = pcmk__assert_asprintf(XPATH_NODE_ALL_UNLOCKED, uname, - uname, expire, uname); - desc_pre = "all state (other than shutdown locks)"; - break; - default: - // We called this function incorrectly - pcmk__assert(false); - break; + if (unlocked_only) { + *xpath = pcmk__assert_asprintf(XPATH_NODE_LRM_UNLOCKED, + uname, uname, expire); + desc_pre = "resource history (other than shutdown locks)"; + } else { + *xpath = pcmk__assert_asprintf(XPATH_NODE_LRM, uname); + desc_pre = "resource history"; } if (desc != NULL) { @@ -346,15 +314,14 @@ controld_node_state_deletion_strings(const char *uname, /*! * \internal - * \brief Delete subsection of a node's CIB \c PCMK__XE_NODE_STATE + * \brief Delete a node's resource history from the CIB * - * \param[in] uname Desired node - * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to delete - * \param[in] options CIB call options to use + * \param[in] uname Name of node to delete resource history for + * \param[in] unlocked_only If true, delete history of only unlocked resources + * \param[in] options CIB call options to use */ void -controld_delete_node_state(const char *uname, enum controld_section_e section, - int options) +controld_delete_node_history(const char *uname, bool unlocked_only, int options) { cib_t *cib = controld_globals.cib_conn; char *xpath = NULL; @@ -363,8 +330,7 @@ controld_delete_node_state(const char *uname, enum controld_section_e section, pcmk__assert((uname != NULL) && (cib != NULL)); - controld_node_state_deletion_strings(uname, section, &xpath, &desc); - + controld_node_history_deletion_strings(uname, unlocked_only, &xpath, &desc); cib__set_call_options(options, "node state deletion", cib_xpath|cib_multiple); cib_rc = cib->cmds->remove(cib, xpath, NULL, options); diff --git a/daemons/controld/controld_cib.h b/daemons/controld/controld_cib.h index b8622d52280..116db64924f 100644 --- a/daemons/controld/controld_cib.h +++ b/daemons/controld/controld_cib.h @@ -1,5 +1,5 @@ /* - * Copyright 2004-2024 the Pacemaker project contributors + * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -46,20 +46,11 @@ int controld_update_cib(const char *section, xmlNode *data, int options, void *)); unsigned int cib_op_timeout(void); -// Subsections of PCMK__XE_NODE_STATE -enum controld_section_e { - controld_section_lrm, - controld_section_lrm_unlocked, - controld_section_attrs, - controld_section_all, - controld_section_all_unlocked -}; - -void controld_node_state_deletion_strings(const char *uname, - enum controld_section_e section, - char **xpath, char **desc); -void controld_delete_node_state(const char *uname, - enum controld_section_e section, int options); +void controld_node_history_deletion_strings(const char *uname, + bool unlocked_only, + char **xpath, char **desc); +void controld_delete_node_history(const char *uname, bool unlocked_only, + int options); int controld_delete_resource_history(const char *rsc_id, const char *node, const char *user_name, int call_options); diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index 0671e6b9c7c..5dc12a65a7e 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -1074,8 +1074,7 @@ force_reprobe(lrm_state_t *lrm_state, const char *from_sys, } /* Now delete the copy in the CIB */ - controld_delete_node_state(lrm_state->node_name, controld_section_lrm, - cib_none); + controld_delete_node_history(lrm_state->node_name, false, cib_none); } /*! diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index a39bd3b355b..af0bfb7bdb2 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -247,7 +247,13 @@ update_node_state_after_fencing(const char *target, const char *target_xml_id) crm_debug("Updating node state for %s after fencing (call %d)", target, rc); fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated); - controld_delete_node_state(peer->name, controld_section_all, cib_none); + // Delete node's resource history from CIB + controld_delete_node_history(peer->name, false, cib_none); + + // Ask attribute manager to delete node's transient attributes + // @TODO: This is the only call to controld_purge_node_attrs that doesn't + // want to also purge the node from the caches. Why? + controld_purge_node_attrs(peer->name, false); } /*! diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c index cb1a3d7270a..8aceecb2358 100644 --- a/daemons/controld/controld_join_dc.c +++ b/daemons/controld/controld_join_dc.c @@ -771,7 +771,8 @@ do_dc_join_ack(long long action, pcmk__node_status_t *peer = NULL; enum controld_join_phase phase = controld_join_none; - enum controld_section_e section = controld_section_lrm; + const bool unlocked_only = pcmk__is_set(controld_globals.flags, + controld_shutdown_lock_enabled); char *xpath = NULL; xmlNode *state = join_ack->xml; xmlNode *execd_state = NULL; @@ -833,10 +834,8 @@ do_dc_join_ack(long long action, } // Delete relevant parts of node's current executor state from CIB - if (pcmk__is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { - section = controld_section_lrm_unlocked; - } - controld_node_state_deletion_strings(join_from, section, &xpath, NULL); + controld_node_history_deletion_strings(join_from, unlocked_only, &xpath, + NULL); rc = cib->cmds->remove(cib, xpath, NULL, cib_xpath|cib_multiple|cib_transaction); diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c index 2061a7c48be..bae45297fff 100644 --- a/daemons/controld/controld_remote_ra.c +++ b/daemons/controld/controld_remote_ra.c @@ -200,36 +200,18 @@ should_purge_attributes(pcmk__node_status_t *node) return true; } -static enum controld_section_e -section_to_delete(bool purge) -{ - if (pcmk__is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { - if (purge) { - return controld_section_all_unlocked; - } else { - return controld_section_lrm_unlocked; - } - } else { - if (purge) { - return controld_section_all; - } else { - return controld_section_lrm; - } - } -} - static void purge_remote_node_attrs(int call_opt, pcmk__node_status_t *node) { - bool purge = should_purge_attributes(node); - enum controld_section_e section = section_to_delete(purge); + const bool unlocked_only = pcmk__is_set(controld_globals.flags, + controld_shutdown_lock_enabled); - /* Purge node from attrd's memory */ - if (purge) { - update_attrd_remote_node_removed(node->name, NULL); + // Purge node's transient attributes (from attribute manager and CIB) + if (should_purge_attributes(node)) { + controld_purge_node_attrs(node->name, true); } - controld_delete_node_state(node->name, section, call_opt); + controld_delete_node_history(node->name, unlocked_only, call_opt); } /*! @@ -311,37 +293,29 @@ remote_node_up(const char *node_name) pcmk__xml_free(update); } -enum down_opts { - DOWN_KEEP_LRM, - DOWN_ERASE_LRM -}; - /*! * \internal * \brief Handle cluster communication related to pacemaker_remote node leaving * * \param[in] node_name Name of lost node - * \param[in] opts Whether to keep or erase LRM history + * \param[in] erase_lrm If \c true, erase the LRM history */ static void -remote_node_down(const char *node_name, const enum down_opts opts) +remote_node_down(const char *node_name, bool erase_lrm) { xmlNode *update; int call_opt = crmd_cib_smart_opt(); pcmk__node_status_t *node = NULL; - /* Purge node from attrd's memory */ - update_attrd_remote_node_removed(node_name, NULL); + // Purge node's transient attributes (from attribute manager and CIB) + controld_purge_node_attrs(node_name, true); - /* Normally, only node attributes should be erased, and the resource history - * should be kept until the node comes back up. However, after a successful - * fence, we want to clear the history as well, so we don't think resources - * are still running on the node. + /* Normally, the resource history should be kept until the node comes back + * up. However, after a successful fence, clear the history so we don't + * think resources are still running on the node. */ - if (opts == DOWN_ERASE_LRM) { - controld_delete_node_state(node_name, controld_section_all, call_opt); - } else { - controld_delete_node_state(node_name, controld_section_attrs, call_opt); + if (erase_lrm) { + controld_delete_node_history(node_name, false, call_opt); } /* Ensure node is in the remote peer cache with lost state */ @@ -400,7 +374,7 @@ check_remote_node_state(const remote_ra_cmd_t *cmd) if (ra_data) { if (!pcmk__is_set(ra_data->status, takeover_complete)) { /* Stop means down if we didn't successfully migrate elsewhere */ - remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM); + remote_node_down(cmd->rsc_id, false); } else if (AM_I_DC == FALSE) { /* Only the connection host and DC track node state, * so if the connection migrated elsewhere and we aren't DC, @@ -678,7 +652,7 @@ remote_lrm_op_callback(lrmd_event_data_t * op) lrm_state->node_name); /* Do roughly what a 'stop' on the remote-resource would do */ handle_remote_ra_stop(lrm_state, NULL); - remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM); + remote_node_down(lrm_state->node_name, false); /* now fake the reply of a successful 'stop' */ synthesize_lrmd_success(NULL, lrm_state->node_name, PCMK_ACTION_STOP); @@ -1353,11 +1327,11 @@ remote_ra_process_pseudo(xmlNode *xml) * peer cache state will be incorrect unless and until the guest is * recovered. */ - if (result) { + if (result != NULL) { const char *remote = pcmk__xe_id(result); - if (remote) { - remote_node_down(remote, DOWN_ERASE_LRM); + if (remote != NULL) { + remote_node_down(remote, true); } } } diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h index e6338882e76..262e0d1f39b 100644 --- a/daemons/controld/controld_utils.h +++ b/daemons/controld/controld_utils.h @@ -69,7 +69,7 @@ void crm_update_quorum(gboolean quorum, gboolean force_update); void controld_close_attrd_ipc(void); void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node); void update_attrd_list(GList *attrs, uint32_t opts); -void update_attrd_remote_node_removed(const char *host, const char *user_name); +void controld_purge_node_attrs(const char *node_name, bool from_cache); void update_attrd_clear_failures(const char *host, const char *rsc, const char *op, const char *interval_spec, gboolean is_remote_node);