Skip to content

Commit c253e7e

Browse files
authored
Merge pull request #10397 from Icinga/activation-priority-10179
Checkable#ProcessCheckResult(): discard🗑️ CR or delay its producers shutdown
2 parents 23ecc98 + 36743f3 commit c253e7e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+360
-214
lines changed

lib/base/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ set(base_SOURCES
8787
unixsocket.cpp unixsocket.hpp
8888
utility.cpp utility.hpp
8989
value.cpp value.hpp value-operators.cpp
90+
wait-group.cpp wait-group.hpp
9091
win32.hpp
9192
workqueue.cpp workqueue.hpp
9293
)

lib/base/wait-group.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/* Icinga 2 | (c) 2025 Icinga GmbH | GPLv2+ */
2+
3+
#include "base/wait-group.hpp"
4+
5+
using namespace icinga;
6+
7+
bool StoppableWaitGroup::try_lock_shared()
8+
{
9+
std::unique_lock lock (m_Mutex);
10+
11+
if (m_Stopped) {
12+
return false;
13+
}
14+
15+
++m_SharedLocks;
16+
return true;
17+
}
18+
19+
void StoppableWaitGroup::unlock_shared()
20+
{
21+
std::unique_lock lock (m_Mutex);
22+
23+
if (!--m_SharedLocks && m_Stopped) {
24+
lock.unlock();
25+
m_CV.notify_all();
26+
}
27+
}
28+
29+
/**
30+
* Disallow new shared locks, wait for all existing ones.
31+
*/
32+
void StoppableWaitGroup::Join()
33+
{
34+
std::unique_lock lock (m_Mutex);
35+
36+
m_Stopped = true;
37+
m_CV.wait(lock, [this] { return !m_SharedLocks; });
38+
}

lib/base/wait-group.hpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/* Icinga 2 | (c) 2025 Icinga GmbH | GPLv2+ */
2+
3+
#pragma once
4+
5+
#include "base/object.hpp"
6+
#include <condition_variable>
7+
#include <cstdint>
8+
#include <mutex>
9+
10+
namespace icinga
11+
{
12+
13+
/**
14+
* A synchronization interface that allows concurrent shared locking.
15+
*
16+
* @ingroup base
17+
*/
18+
class WaitGroup : public Object
19+
{
20+
public:
21+
DECLARE_PTR_TYPEDEFS(WaitGroup);
22+
23+
virtual bool try_lock_shared() = 0;
24+
virtual void unlock_shared() = 0;
25+
};
26+
27+
/**
28+
* A thread-safe wait group that can be stopped to prevent further shared locking.
29+
*
30+
* @ingroup base
31+
*/
32+
class StoppableWaitGroup : public WaitGroup
33+
{
34+
public:
35+
DECLARE_PTR_TYPEDEFS(StoppableWaitGroup);
36+
37+
StoppableWaitGroup() = default;
38+
StoppableWaitGroup(const StoppableWaitGroup&) = delete;
39+
StoppableWaitGroup(StoppableWaitGroup&&) = delete;
40+
StoppableWaitGroup& operator=(const StoppableWaitGroup&) = delete;
41+
StoppableWaitGroup& operator=(StoppableWaitGroup&&) = delete;
42+
43+
bool try_lock_shared() override;
44+
void unlock_shared() override;
45+
void Join();
46+
47+
private:
48+
std::mutex m_Mutex;
49+
std::condition_variable m_CV;
50+
uint_fast32_t m_SharedLocks = 0;
51+
bool m_Stopped = false;
52+
};
53+
54+
}

lib/checker/checkercomponent.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ void CheckerComponent::Stop(bool runtimeRemoved)
8181
m_CV.notify_all();
8282
}
8383

84+
m_WaitGroup->Join();
8485
m_ResultTimer->Stop(true);
8586
m_Thread.join();
8687

@@ -248,7 +249,7 @@ void CheckerComponent::CheckThreadProc()
248249
void CheckerComponent::ExecuteCheckHelper(const Checkable::Ptr& checkable)
249250
{
250251
try {
251-
checkable->ExecuteCheck();
252+
checkable->ExecuteCheck(m_WaitGroup);
252253
} catch (const std::exception& ex) {
253254
CheckResult::Ptr cr = new CheckResult();
254255
cr->SetState(ServiceUnknown);
@@ -262,7 +263,7 @@ void CheckerComponent::ExecuteCheckHelper(const Checkable::Ptr& checkable)
262263
cr->SetExecutionStart(now);
263264
cr->SetExecutionEnd(now);
264265

265-
checkable->ProcessCheckResult(cr);
266+
checkable->ProcessCheckResult(cr, m_WaitGroup);
266267

267268
Log(LogCritical, "checker", output);
268269
}

lib/checker/checkercomponent.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "base/configobject.hpp"
99
#include "base/timer.hpp"
1010
#include "base/utility.hpp"
11+
#include "base/wait-group.hpp"
1112
#include <boost/multi_index_container.hpp>
1213
#include <boost/multi_index/ordered_index.hpp>
1314
#include <boost/multi_index/key_extractors.hpp>
@@ -77,6 +78,7 @@ class CheckerComponent final : public ObjectImpl<CheckerComponent>
7778
CheckableSet m_IdleCheckables;
7879
CheckableSet m_PendingCheckables;
7980

81+
StoppableWaitGroup::Ptr m_WaitGroup = new StoppableWaitGroup();
8082
Timer::Ptr m_ResultTimer;
8183

8284
void CheckThreadProc();

lib/compat/externalcommandlistener.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ void ExternalCommandListener::Start(bool runtimeCreated)
5050
*/
5151
void ExternalCommandListener::Stop(bool runtimeRemoved)
5252
{
53+
m_WaitGroup->Join();
54+
5355
Log(LogInformation, "ExternalCommandListener")
5456
<< "'" << GetName() << "' stopped.";
5557

@@ -136,7 +138,7 @@ void ExternalCommandListener::CommandPipeThread(const String& commandPath)
136138
Log(LogInformation, "ExternalCommandListener")
137139
<< "Executing external command: " << command;
138140

139-
ExternalCommandProcessor::Execute(command);
141+
ExternalCommandProcessor::Execute(m_WaitGroup, command);
140142
} catch (const std::exception& ex) {
141143
Log(LogWarning, "ExternalCommandListener")
142144
<< "External command failed: " << DiagnosticInformation(ex, false);

lib/compat/externalcommandlistener.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include "compat/externalcommandlistener-ti.hpp"
77
#include "base/objectlock.hpp"
8+
#include "base/wait-group.hpp"
89
#include "base/timer.hpp"
910
#include "base/utility.hpp"
1011
#include <thread>
@@ -29,6 +30,8 @@ class ExternalCommandListener final : public ObjectImpl<ExternalCommandListener>
2930
void Stop(bool runtimeRemoved) override;
3031

3132
private:
33+
StoppableWaitGroup::Ptr m_WaitGroup = new StoppableWaitGroup();
34+
3235
#ifndef _WIN32
3336
std::thread m_CommandThread;
3437

lib/db_ido/idochecktask.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616

1717
using namespace icinga;
1818

19-
REGISTER_FUNCTION_NONCONST(Internal, IdoCheck, &IdoCheckTask::ScriptFunc, "checkable:cr:resolvedMacros:useResolvedMacros");
19+
REGISTER_FUNCTION_NONCONST(Internal, IdoCheck, &IdoCheckTask::ScriptFunc, "checkable:cr:producer:resolvedMacros:useResolvedMacros");
2020

2121
static void ReportIdoCheck(
22-
const Checkable::Ptr& checkable, const CheckCommand::Ptr& commandObj,
23-
const CheckResult::Ptr& cr, String output, ServiceState state = ServiceUnknown
22+
const Checkable::Ptr& checkable, const CheckCommand::Ptr& commandObj, const CheckResult::Ptr& cr,
23+
const WaitGroup::Ptr& producer, String output, ServiceState state = ServiceUnknown
2424
)
2525
{
2626
if (Checkable::ExecuteCommandProcessFinishedHandler) {
@@ -36,12 +36,12 @@ static void ReportIdoCheck(
3636
} else {
3737
cr->SetState(state);
3838
cr->SetOutput(output);
39-
checkable->ProcessCheckResult(cr);
39+
checkable->ProcessCheckResult(cr, producer);
4040
}
4141
}
4242

4343
void IdoCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr,
44-
const Dictionary::Ptr& resolvedMacros, bool useResolvedMacros)
44+
const WaitGroup::Ptr& producer, const Dictionary::Ptr& resolvedMacros, bool useResolvedMacros)
4545
{
4646
ServiceState state;
4747
CheckCommand::Ptr commandObj = CheckCommand::ExecuteOverride ? CheckCommand::ExecuteOverride : checkable->GetCheckCommand();
@@ -88,19 +88,19 @@ void IdoCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckResult
8888
return;
8989

9090
if (idoType.IsEmpty()) {
91-
ReportIdoCheck(checkable, commandObj, cr, "Attribute 'ido_type' must be set.");
91+
ReportIdoCheck(checkable, commandObj, cr, producer, "Attribute 'ido_type' must be set.");
9292
return;
9393
}
9494

9595
if (idoName.IsEmpty()) {
96-
ReportIdoCheck(checkable, commandObj, cr, "Attribute 'ido_name' must be set.");
96+
ReportIdoCheck(checkable, commandObj, cr, producer, "Attribute 'ido_name' must be set.");
9797
return;
9898
}
9999

100100
Type::Ptr type = Type::GetByName(idoType);
101101

102102
if (!type || !DbConnection::TypeInstance->IsAssignableFrom(type)) {
103-
ReportIdoCheck(checkable, commandObj, cr, "DB IDO type '" + idoType + "' is invalid.");
103+
ReportIdoCheck(checkable, commandObj, cr, producer, "DB IDO type '" + idoType + "' is invalid.");
104104
return;
105105
}
106106

@@ -110,25 +110,25 @@ void IdoCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckResult
110110
DbConnection::Ptr conn = static_pointer_cast<DbConnection>(dtype->GetObject(idoName));
111111

112112
if (!conn) {
113-
ReportIdoCheck(checkable, commandObj, cr, "DB IDO connection '" + idoName + "' does not exist.");
113+
ReportIdoCheck(checkable, commandObj, cr, producer, "DB IDO connection '" + idoName + "' does not exist.");
114114
return;
115115
}
116116

117117
double qps = conn->GetQueryCount(60) / 60.0;
118118

119119
if (conn->IsPaused()) {
120-
ReportIdoCheck(checkable, commandObj, cr, "DB IDO connection is temporarily disabled on this cluster instance.", ServiceOK);
120+
ReportIdoCheck(checkable, commandObj, cr, producer, "DB IDO connection is temporarily disabled on this cluster instance.", ServiceOK);
121121
return;
122122
}
123123

124124
double pendingQueries = conn->GetPendingQueryCount();
125125

126126
if (!conn->GetConnected()) {
127127
if (conn->GetShouldConnect()) {
128-
ReportIdoCheck(checkable, commandObj, cr, "Could not connect to the database server.", ServiceCritical);
128+
ReportIdoCheck(checkable, commandObj, cr, producer, "Could not connect to the database server.", ServiceCritical);
129129
} else {
130130
ReportIdoCheck(
131-
checkable, commandObj, cr,
131+
checkable, commandObj, cr, producer,
132132
"Not currently enabled: Another cluster instance is responsible for the IDO database.", ServiceOK
133133
);
134134
}
@@ -193,5 +193,5 @@ void IdoCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckResult
193193
{ new PerfdataValue("pending_queries", pendingQueries, false, "", pendingQueriesWarning, pendingQueriesCritical) }
194194
}));
195195

196-
ReportIdoCheck(checkable, commandObj, cr, msgbuf.str(), state);
196+
ReportIdoCheck(checkable, commandObj, cr, producer, msgbuf.str(), state);
197197
}

lib/db_ido/idochecktask.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class IdoCheckTask
1818
{
1919
public:
2020
static void ScriptFunc(const Checkable::Ptr& service, const CheckResult::Ptr& cr,
21-
const Dictionary::Ptr& resolvedMacros, bool useResolvedMacros);
21+
const WaitGroup::Ptr& producer, const Dictionary::Ptr& resolvedMacros, bool useResolvedMacros);
2222

2323
private:
2424
IdoCheckTask();

lib/icinga/apiactions.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,8 @@ Dictionary::Ptr ApiActions::ProcessCheckResult(const ConfigObject::Ptr& object,
126126
if (params->Contains("ttl"))
127127
cr->SetTtl(HttpUtility::GetLastParameter(params, "ttl"));
128128

129-
Result result = checkable->ProcessCheckResult(cr);
129+
Result result = checkable->ProcessCheckResult(cr, ApiListener::GetInstance()->GetWaitGroup());
130+
130131
switch (result) {
131132
case Result::Ok:
132133
return ApiActions::CreateResult(200, "Successfully processed check result for object '" + checkable->GetName() + "'.");
@@ -787,7 +788,7 @@ Dictionary::Ptr ApiActions::ExecuteCommand(const ConfigObject::Ptr& object, cons
787788
Defer resetCheckCommandOverride([]() {
788789
CheckCommand::ExecuteOverride = nullptr;
789790
});
790-
cmd->Execute(checkable, cr, execMacros, false);
791+
cmd->Execute(checkable, cr, listener->GetWaitGroup(), execMacros, false);
791792
}
792793
} else if (command_type == "EventCommand") {
793794
EventCommand::Ptr cmd = GetSingleObjectByNameUsingPermissions(EventCommand::GetTypeName(), resolved_command, ActionsHandler::AuthenticatedApiUser);

0 commit comments

Comments
 (0)