Skip to content

Reconcile QQ node dead during delete and redeclare #14241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions deps/rabbit/src/amqqueue.erl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
% exclusive_owner
get_exclusive_owner/1,
get_leader_node/1,
get_nodes/1,
% name (#resource)
get_name/1,
set_name/2,
Expand Down Expand Up @@ -394,15 +393,6 @@ get_leader_node(#amqqueue{pid = {_, Leader}}) -> Leader;
get_leader_node(#amqqueue{pid = none}) -> none;
get_leader_node(#amqqueue{pid = Pid}) -> node(Pid).

-spec get_nodes(amqqueue_v2()) -> [node(),...].

get_nodes(Q) ->
case amqqueue:get_type_state(Q) of
#{nodes := Nodes} ->
Nodes;
_ ->
[get_leader_node(Q)]
end.

% operator_policy

Expand Down
2 changes: 1 addition & 1 deletion deps/rabbit/src/rabbit_amqp_management.erl
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ encode_queue(Q, NumMsgs, NumConsumers) ->
{Leader :: node() | none, Replicas :: [node(),...]}.
queue_topology(Q) ->
Leader = amqqueue:get_leader_node(Q),
Replicas = amqqueue:get_nodes(Q),
Replicas = rabbit_amqqueue:get_nodes(Q),
{Leader, Replicas}.

decode_exchange({map, KVList}) ->
Expand Down
14 changes: 8 additions & 6 deletions deps/rabbit/src/rabbit_amqqueue.erl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
-export([list/0, list_durable/0, list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2,
emit_info_all/5, list_local/1, info_local/1,
emit_info_local/4, emit_info_down/4]).
-export([get_nodes/1]).
-export([count/0]).
-export([list_down/1, list_down/2, list_all/1,
count/1, list_names/0, list_names/1, list_local_names/0,
Expand Down Expand Up @@ -1233,6 +1234,12 @@ list() ->
count() ->
rabbit_db_queue:count().

-spec get_nodes(amqqueue:amqqueue_v2()) -> [node(),...].

get_nodes(Q) ->
[{members, Nodes}] = info(Q, [members]),
Nodes.

-spec list_names() -> [name()].

list_names() ->
Expand Down Expand Up @@ -2042,12 +2049,7 @@ pseudo_queue(#resource{kind = queue} = QueueName, Pid, Durable)
).

get_quorum_nodes(Q) ->
case amqqueue:get_type_state(Q) of
#{nodes := Nodes} ->
Nodes;
_ ->
[]
end.
rabbit_amqqueue:get_nodes(Q).

-spec prepend_extra_bcc(Qs) ->
Qs when Qs :: [amqqueue:amqqueue() |
Expand Down
7 changes: 7 additions & 0 deletions deps/rabbit/src/rabbit_core_ff.erl
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,10 @@
stability => stable,
depends_on => ['rabbitmq_4.0.0']
}}).

-rabbit_feature_flag(
{'track_qq_members_uids',
#{desc => "Track queue members UIDs in the metadata store",
stability => stable,
depends_on => []
}}).
17 changes: 2 additions & 15 deletions deps/rabbit/src/rabbit_jms_selector_parser.erl
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
-file("rabbit_jms_selector_parser.yrl", 0).
-module(rabbit_jms_selector_parser).
-file("rabbit_jms_selector_parser.erl", 3).
-export([parse/1, parse_and_scan/1, format_error/1]).
-file("rabbit_jms_selector_parser.yrl", 122).

Expand All @@ -26,9 +24,7 @@ process_escape_char({string, Line, Value}) ->
%%
%% %CopyrightBegin%
%%
%% SPDX-License-Identifier: Apache-2.0
%%
%% Copyright Ericsson AB 1996-2025. All Rights Reserved.
%% Copyright Ericsson AB 1996-2021. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
Expand All @@ -50,16 +46,10 @@ process_escape_char({string, Line, Value}) ->

-type yecc_ret() :: {'error', _} | {'ok', _}.

-ifdef (YECC_PARSE_DOC).
-doc ?YECC_PARSE_DOC.
-endif.
-spec parse(Tokens :: list()) -> yecc_ret().
parse(Tokens) ->
yeccpars0(Tokens, {no_func, no_location}, 0, [], []).

-ifdef (YECC_PARSE_AND_SCAN_DOC).
-doc ?YECC_PARSE_AND_SCAN_DOC.
-endif.
-spec parse_and_scan({function() | {atom(), atom()}, [_]}
| {atom(), atom(), [_]}) -> yecc_ret().
parse_and_scan({F, A}) ->
Expand All @@ -68,9 +58,6 @@ parse_and_scan({M, F, A}) ->
Arity = length(A),
yeccpars0([], {{fun M:F/Arity, A}, no_location}, 0, [], []).

-ifdef (YECC_FORMAT_ERROR_DOC).
-doc ?YECC_FORMAT_ERROR_DOC.
-endif.
-spec format_error(any()) -> [char() | list()].
format_error(Message) ->
case io_lib:deep_char_list(Message) of
Expand Down Expand Up @@ -212,7 +199,7 @@ yecctoken2string1(Other) ->



-file("rabbit_jms_selector_parser.erl", 215).
-file("rabbit_jms_selector_parser.erl", 202).

-dialyzer({nowarn_function, yeccpars2/7}).
-compile({nowarn_unused_function, yeccpars2/7}).
Expand Down
2 changes: 1 addition & 1 deletion deps/rabbit/src/rabbit_queue_location.erl
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ select_members(Size, _, AllNodes, RunningNodes, _, _, GetQueues) ->
Counters0 = maps:from_list([{N, 0} || N <- lists:delete(?MODULE:node(), AllNodes)]),
Queues = GetQueues(),
Counters = lists:foldl(fun(Q, Acc) ->
#{nodes := Nodes} = amqqueue:get_type_state(Q),
Nodes = rabbit_amqqueue:get_nodes(Q),
lists:foldl(fun(N, A)
when is_map_key(N, A) ->
maps:update_with(N, fun(C) -> C+1 end, A);
Expand Down
131 changes: 104 additions & 27 deletions deps/rabbit/src/rabbit_quorum_queue.erl
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,17 @@ start_cluster(Q) ->
{LeaderNode, FollowerNodes} =
rabbit_queue_location:select_leader_and_followers(Q, QuorumSize),
LeaderId = {RaName, LeaderNode},
UIDs = maps:from_list([{Node, ra:new_uid(ra_lib:to_binary(RaName))}
|| Node <- [LeaderNode | FollowerNodes]]),
NewQ0 = amqqueue:set_pid(Q, LeaderId),
NewQ1 = amqqueue:set_type_state(NewQ0,
#{nodes => [LeaderNode | FollowerNodes]}),
NewQ1 = case rabbit_feature_flags:is_enabled(track_qq_members_uids) of
false ->
amqqueue:set_type_state(NewQ0,
#{nodes => [LeaderNode | FollowerNodes]});
true ->
amqqueue:set_type_state(NewQ0,
#{nodes => UIDs})
end,

Versions = [V || {ok, V} <- erpc:multicall(FollowerNodes,
rabbit_fifo, version, [],
Expand Down Expand Up @@ -722,7 +730,7 @@ repair_amqqueue_nodes(Q0) ->
{Name, _} = amqqueue:get_pid(Q0),
Members = ra_leaderboard:lookup_members(Name),
RaNodes = [N || {_, N} <- Members],
#{nodes := Nodes} = amqqueue:get_type_state(Q0),
Nodes = get_nodes(Q0),
case lists:sort(RaNodes) =:= lists:sort(Nodes) of
true ->
%% up to date
Expand All @@ -731,7 +739,18 @@ repair_amqqueue_nodes(Q0) ->
%% update amqqueue record
Fun = fun (Q) ->
TS0 = amqqueue:get_type_state(Q),
TS = TS0#{nodes => RaNodes},
TS = case rabbit_feature_flags:is_enabled(track_qq_members_uids)
andalso has_uuid_tracking(TS0)
of
false ->
TS0#{nodes => RaNodes};
true ->
RaUids = maps:from_list([{N, erpc:call(N, ra_directory, uid_of,
[?RA_SYSTEM, Name],
?RPC_TIMEOUT)}
|| N <- RaNodes]),
TS0#{nodes => RaUids}
end,
amqqueue:set_type_state(Q, TS)
end,
_ = rabbit_amqqueue:update(QName, Fun),
Expand Down Expand Up @@ -791,6 +810,23 @@ recover(_Vhost, Queues) ->
ServerId = {Name, node()},
QName = amqqueue:get_name(Q0),
MutConf = make_mutable_config(Q0),
RaUId = ra_directory:uid_of(?RA_SYSTEM, Name),
Copy link
Contributor Author

@LoisSotoLopez LoisSotoLopez Aug 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Figured out what I was doing wrong but I'm not sure what's the best approach to fix it. Let me recap and raise some questions.

So when a QQ is declared I am setting the #{node() => ra_uid()} map in the Queue Type State. I'll refer to those values in the map as NodeRaUids, as opposed to the UId associated to each QQ as a ra cluster ( the one generated by calling rabbit_quorum_queue:make_ra_conf), which I'll refer to as ClusterRaUid.

When a queue is deleted and re-declared the Queue Type state gets re-generated (no doubt about that because it's a new queue). Therefore between queue reincarnations the Queue Type state will change. However, the ClusterRaUid will not change for those nodes that were dead during the delete+redeclare.

I was checking, on member recovery, whether the ClusterRaUid, as retrieved with ra_directory:uid_of, was associated to the current node in the map of NodeRaUids. That's not right.

My current approach is storing the NodeRaUids map in disk, as it is done for ClusterRaId, and on recover compare that in-disk map with the one in the QueueTypeState.

My questions for you guys are: does that last paragraph sound right? and if it does, what API should I use to store that in-disk copy of the NodeRaUids map? Is there any ra_ module that I could use?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we already store ClusterRaId that way, I guess we can persist more metadata that the Ra members won't otherwise have/preserve.

Copy link
Contributor Author

@LoisSotoLopez LoisSotoLopez Aug 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ClusterRaUid gets persisted through ra_directory.erl which seems designed to strictly storing Ra cluster's specific pieces of related information. Maybe we just need a qq_nodes_uids dets for the sole purpose of storing those NodeRaUids. Or using Rabbit's metadata storage.

Edited: forget about this, found the error that led to this confusion.

#{nodes := Nodes} = QTypeState0 = amqqueue:get_type_state(Q0),
QTypeState = case Nodes of
List when is_list(List) ->
%% Queue is not aware of node to uid mapping, do nothing
QTypeState0;
#{node() := RaUId} ->
%% Queue is aware and uid for current node is correct, do nothing
QTypeState0;
_ ->
%% Queue is aware but either current node has no UId or it
%% does not match the one returned by ra_directory, regen uid
maybe_delete_data_dir(RaUId),
NewRaUId = ra:new_uid(ra_lib:to_binary(Name)),
QTypeState0#{nodes := Nodes#{node() => NewRaUId}}
end,
Q = amqqueue:set_type_state(Q0, QTypeState),
Res = case ra:restart_server(?RA_SYSTEM, ServerId, MutConf) of
ok ->
% queue was restarted, good
Expand All @@ -803,7 +839,7 @@ recover(_Vhost, Queues) ->
[rabbit_misc:rs(QName), Err1]),
% queue was never started on this node
% so needs to be started from scratch.
case start_server(make_ra_conf(Q0, ServerId)) of
case start_server(make_ra_conf(Q, ServerId)) of
ok -> ok;
Err2 ->
rabbit_log:warning("recover: quorum queue ~w could not"
Expand All @@ -825,8 +861,7 @@ recover(_Vhost, Queues) ->
%% present in the rabbit_queue table and not just in
%% rabbit_durable_queue
%% So many code paths are dependent on this.
ok = rabbit_db_queue:set_dirty(Q0),
Q = Q0,
ok = rabbit_db_queue:set_dirty(Q),
case Res of
ok ->
{[Q | R0], F0};
Expand Down Expand Up @@ -1207,12 +1242,17 @@ cleanup_data_dir() ->
maybe_delete_data_dir(UId) ->
_ = ra_directory:unregister_name(?RA_SYSTEM, UId),
Dir = ra_env:server_data_dir(?RA_SYSTEM, UId),
{ok, Config} = ra_log:read_config(Dir),
case maps:get(machine, Config) of
{module, rabbit_fifo, _} ->
ra_lib:recursive_delete(Dir);
_ ->
ok
case filelib:is_dir(Dir) of
false ->
ok;
true ->
{ok, Config} = ra_log:read_config(Dir),
case maps:get(machine, Config) of
{module, rabbit_fifo, _} ->
ra_lib:recursive_delete(Dir);
_ ->
ok
end
end.

policy_changed(Q) ->
Expand Down Expand Up @@ -1378,16 +1418,29 @@ add_member(Q, Node, Membership) ->
do_add_member(Q, Node, Membership, ?MEMBER_CHANGE_TIMEOUT).


do_add_member(Q, Node, Membership, Timeout)
when ?is_amqqueue(Q) andalso
?amqqueue_is_quorum(Q) andalso
do_add_member(Q0, Node, Membership, Timeout)
when ?is_amqqueue(Q0) andalso
?amqqueue_is_quorum(Q0) andalso
is_atom(Node) ->
{RaName, _} = amqqueue:get_pid(Q),
QName = amqqueue:get_name(Q),
{RaName, _} = amqqueue:get_pid(Q0),
QName = amqqueue:get_name(Q0),
%% TODO parallel calls might crash this, or add a duplicate in quorum_nodes
ServerId = {RaName, Node},
Members = members(Q),

Members = members(Q0),
QTypeState0 = #{nodes := Nodes} = amqqueue:get_type_state(Q0),
NewRaUId = ra:new_uid(ra_lib:to_binary(RaName)),
QTypeState = case Nodes of
L when is_list(L) ->
%% Queue is not aware of node to uid mapping, just add the new node
QTypeState0#{nodes => lists:usort([Node | Nodes])};
#{Node := _} ->
%% Queue is aware and uid for targeted node exists, do nothing
QTypeState0;
_ ->
%% Queue is aware but current node has no UId, regen uid
QTypeState0#{nodes => Nodes#{Node => NewRaUId}}
end,
Q = amqqueue:set_type_state(Q0, QTypeState),
MachineVersion = erpc_call(Node, rabbit_fifo, version, [], infinity),
Conf = make_ra_conf(Q, ServerId, Membership, MachineVersion),
case ra:start_server(?RA_SYSTEM, Conf) of
Expand All @@ -1403,8 +1456,12 @@ do_add_member(Q, Node, Membership, Timeout)
{ok, {RaIndex, RaTerm}, Leader} ->
Fun = fun(Q1) ->
Q2 = update_type_state(
Q1, fun(#{nodes := Nodes} = Ts) ->
Ts#{nodes => lists:usort([Node | Nodes])}
Q1, fun(#{nodes := NodesList} = Ts) when is_list(NodesList) ->
Ts#{nodes => lists:usort([Node | NodesList])};
(#{nodes := #{Node := _}} = Ts) ->
Ts;
(#{nodes := NodesMap} = Ts) when is_map(NodesMap) ->
Ts#{nodes => maps:put(Node, NewRaUId, NodesMap)}
end),
amqqueue:set_pid(Q2, Leader)
end,
Expand Down Expand Up @@ -1477,8 +1534,10 @@ delete_member(Q, Node) when ?amqqueue_is_quorum(Q) ->
Fun = fun(Q1) ->
update_type_state(
Q1,
fun(#{nodes := Nodes} = Ts) ->
Ts#{nodes => lists:delete(Node, Nodes)}
fun(#{nodes := Nodes} = Ts) when is_list(Nodes) ->
Ts#{nodes => lists:delete(Node, Nodes)};
(#{nodes := Nodes} = Ts) when is_map(Nodes) ->
Ts#{nodes => maps:remove(Node, Nodes)}
end)
end,
_ = rabbit_amqqueue:update(QName, Fun),
Expand Down Expand Up @@ -1986,7 +2045,15 @@ make_ra_conf(Q, ServerId, TickTimeout,
QName = amqqueue:get_name(Q),
RaMachine = ra_machine(Q),
[{ClusterName, _} | _] = Members = members(Q),
UId = ra:new_uid(ra_lib:to_binary(ClusterName)),
{_, Node} = ServerId,
UId = case amqqueue:get_type_state(Q) of
#{nodes := #{Node := Id}} ->
Id;
_ ->
%% Queue was declared on an older version of RabbitMQ
%% and does not have the node to uid mappings
ra:new_uid(ra_lib:to_binary(ClusterName))
end,
FName = rabbit_misc:rs(QName),
Formatter = {?MODULE, format_ra_event, [QName]},
LogCfg = #{uid => UId,
Expand Down Expand Up @@ -2016,7 +2083,12 @@ make_mutable_config(Q) ->

get_nodes(Q) when ?is_amqqueue(Q) ->
#{nodes := Nodes} = amqqueue:get_type_state(Q),
Nodes.
case Nodes of
List when is_list(List) ->
List;
Map when is_map(Map) ->
maps:keys(Map)
end.

get_connected_nodes(Q) when ?is_amqqueue(Q) ->
ErlangNodes = [node() | nodes()],
Expand Down Expand Up @@ -2125,7 +2197,7 @@ force_checkpoint_on_queue(QName) ->
{ok, Q} when ?amqqueue_is_quorum(Q) ->
{RaName, _} = amqqueue:get_pid(Q),
rabbit_log:debug("Sending command to force ~ts to take a checkpoint", [QNameFmt]),
Nodes = amqqueue:get_nodes(Q),
Nodes = rabbit_amqqueue:get_nodes(Q),
_ = [ra:cast_aux_command({RaName, Node}, force_checkpoint)
|| Node <- Nodes],
ok;
Expand Down Expand Up @@ -2372,3 +2444,8 @@ queue_vm_stats_sups() ->
queue_vm_ets() ->
{[quorum_ets],
[[ra_log_ets]]}.

has_uuid_tracking(#{nodes := Nodes}) when is_map(Nodes) ->
true;
has_uuid_tracking(_QTypeState) ->
false.
4 changes: 2 additions & 2 deletions deps/rabbit/src/rabbit_stream_coordinator.erl
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ stop() ->

new_stream(Q, LeaderNode)
when ?is_amqqueue(Q) andalso is_atom(LeaderNode) ->
#{name := StreamId,
nodes := Nodes} = amqqueue:get_type_state(Q),
#{name := StreamId} = amqqueue:get_type_state(Q),
Nodes = rabbit_amqqueue:get_nodes(Q),
%% assertion leader is in nodes configuration
true = lists:member(LeaderNode, Nodes),
process_command({new_stream, StreamId,
Expand Down
Loading