Skip to content

Commit fba56f0

Browse files
authored
Merge pull request #10254 from Icinga/Timeout-Cancel
Timeout: use less resources, clean them up better and make cancellation deterministic
2 parents 9bc9d14 + 8f72891 commit fba56f0

File tree

9 files changed

+252
-67
lines changed

9 files changed

+252
-67
lines changed

lib/base/io-engine.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,14 @@ void AsioConditionVariable::Wait(boost::asio::yield_context yc)
146146
m_Timer.async_wait(yc[ec]);
147147
}
148148

149+
/**
150+
* Cancels any pending timeout callback.
151+
*
152+
* Must be called in the strand in which the callback was scheduled!
153+
*/
149154
void Timeout::Cancel()
150155
{
151-
m_Cancelled.store(true);
156+
m_Cancelled->store(true);
152157

153158
boost::system::error_code ec;
154159
m_Timer.cancel(ec);

lib/base/io-engine.hpp

Lines changed: 63 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
#ifndef IO_ENGINE_H
44
#define IO_ENGINE_H
55

6+
#include "base/atomic.hpp"
7+
#include "base/debug.hpp"
68
#include "base/exception.hpp"
79
#include "base/lazy-init.hpp"
810
#include "base/logger.hpp"
9-
#include "base/shared-object.hpp"
11+
#include "base/shared.hpp"
1012
#include <atomic>
1113
#include <exception>
1214
#include <memory>
@@ -163,51 +165,80 @@ class AsioConditionVariable
163165
/**
164166
* I/O timeout emulator
165167
*
168+
* This class provides a workaround for Boost.ASIO's lack of built-in timeout support.
169+
* While Boost.ASIO handles asynchronous operations, it does not natively support timeouts for these operations.
170+
* This class uses a boost::asio::deadline_timer to emulate a timeout by scheduling a callback to be triggered
171+
* after a specified duration, effectively adding timeout behavior where none exists.
172+
* The callback is executed within the provided strand, ensuring thread-safety.
173+
*
174+
* The constructor returns immediately after scheduling the timeout callback.
175+
* The callback itself is invoked asynchronously when the timeout occurs.
176+
* This allows the caller to continue execution while the timeout is running in the background.
177+
*
178+
* The class provides a Cancel() method to unschedule any pending callback. If the callback has already been run,
179+
* calling Cancel() has no effect. This method can be used to abort the timeout early if the monitored operation
180+
* completes before the callback has been run. The Timeout destructor also automatically cancels any pending callback.
181+
* A callback is considered pending even if the timeout has already expired,
182+
* but the callback has not been executed yet due to a busy strand.
183+
*
166184
* @ingroup base
167185
*/
168-
class Timeout : public SharedObject
186+
class Timeout
169187
{
170188
public:
171-
DECLARE_PTR_TYPEDEFS(Timeout);
172-
173-
template<class Executor, class TimeoutFromNow, class OnTimeout>
174-
Timeout(boost::asio::io_context& io, Executor& executor, TimeoutFromNow timeoutFromNow, OnTimeout onTimeout)
175-
: m_Timer(io)
189+
using Timer = boost::asio::deadline_timer;
190+
191+
/**
192+
* Schedules onTimeout to be triggered after timeoutFromNow on strand.
193+
*
194+
* @param strand The strand in which the callback will be executed.
195+
* The caller must also run in this strand, as well as Cancel() and the destructor!
196+
* @param timeoutFromNow The duration after which the timeout callback will be triggered.
197+
* @param onTimeout The callback to invoke when the timeout occurs.
198+
*/
199+
template<class OnTimeout>
200+
Timeout(boost::asio::io_context::strand& strand, const Timer::duration_type& timeoutFromNow, OnTimeout onTimeout)
201+
: m_Timer(strand.context(), timeoutFromNow), m_Cancelled(Shared<Atomic<bool>>::Make(false))
176202
{
177-
Ptr keepAlive (this);
178-
179-
m_Cancelled.store(false);
180-
m_Timer.expires_from_now(std::move(timeoutFromNow));
181-
182-
IoEngine::SpawnCoroutine(executor, [this, keepAlive, onTimeout](boost::asio::yield_context yc) {
183-
if (m_Cancelled.load()) {
184-
return;
185-
}
203+
VERIFY(strand.running_in_this_thread());
186204

187-
{
188-
boost::system::error_code ec;
189-
190-
m_Timer.async_wait(yc[ec]);
191-
192-
if (ec) {
193-
return;
205+
m_Timer.async_wait(boost::asio::bind_executor(
206+
strand, [cancelled = m_Cancelled, onTimeout = std::move(onTimeout)](boost::system::error_code ec) {
207+
if (!ec && !cancelled->load()) {
208+
onTimeout();
194209
}
195210
}
211+
));
212+
}
196213

197-
if (m_Cancelled.load()) {
198-
return;
199-
}
200-
201-
auto f (onTimeout);
202-
f(std::move(yc));
203-
});
214+
Timeout(const Timeout&) = delete;
215+
Timeout(Timeout&&) = delete;
216+
Timeout& operator=(const Timeout&) = delete;
217+
Timeout& operator=(Timeout&&) = delete;
218+
219+
/**
220+
* Cancels any pending timeout callback.
221+
*
222+
* Must be called in the strand in which the callback was scheduled!
223+
*/
224+
~Timeout()
225+
{
226+
Cancel();
204227
}
205228

206229
void Cancel();
207230

208231
private:
209-
boost::asio::deadline_timer m_Timer;
210-
std::atomic<bool> m_Cancelled;
232+
Timer m_Timer;
233+
234+
/**
235+
* Indicates whether the Timeout has been cancelled.
236+
*
237+
* This must be Shared<> between the lambda in the constructor and Cancel() for the case
238+
* the destructor calls Cancel() while the lambda is already queued in the strand.
239+
* The whole Timeout instance can't be kept alive by the lambda because this would delay the destructor.
240+
*/
241+
Shared<Atomic<bool>>::Ptr m_Cancelled;
211242
};
212243

213244
}

lib/base/tlsstream.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,15 +140,12 @@ void AsioTlsStream::GracefulDisconnect(boost::asio::io_context::strand& strand,
140140
}
141141

142142
{
143-
Timeout::Ptr shutdownTimeout(new Timeout(strand.context(), strand, boost::posix_time::seconds(10),
144-
[this](boost::asio::yield_context yc) {
143+
Timeout shutdownTimeout (strand, boost::posix_time::seconds(10),
144+
[this] {
145145
// Forcefully terminate the connection if async_shutdown() blocked more than 10 seconds.
146146
ForceDisconnect();
147147
}
148-
));
149-
Defer cancelTimeout ([&shutdownTimeout]() {
150-
shutdownTimeout->Cancel();
151-
});
148+
);
152149

153150
// Close the TLS connection, effectively uses SSL_shutdown() to send a close_notify shutdown alert to the peer.
154151
boost::system::error_code ec;

lib/icingadb/redisconnection.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,6 @@ void RedisConnection::Connect(asio::yield_context& yc)
318318
auto conn (Shared<AsioTlsStream>::Make(m_Strand.context(), *m_TLSContext, m_Host));
319319
auto& tlsConn (conn->next_layer());
320320
auto connectTimeout (MakeTimeout(conn));
321-
Defer cancelTimeout ([&connectTimeout]() { connectTimeout->Cancel(); });
322321

323322
icinga::Connect(conn->lowest_layer(), m_Host, Convert::ToString(m_Port), yc);
324323
tlsConn.async_handshake(tlsConn.client, yc);
@@ -348,7 +347,6 @@ void RedisConnection::Connect(asio::yield_context& yc)
348347

349348
auto conn (Shared<TcpConn>::Make(m_Strand.context()));
350349
auto connectTimeout (MakeTimeout(conn));
351-
Defer cancelTimeout ([&connectTimeout]() { connectTimeout->Cancel(); });
352350

353351
icinga::Connect(conn->next_layer(), m_Host, Convert::ToString(m_Port), yc);
354352
Handshake(conn, yc);
@@ -361,7 +359,6 @@ void RedisConnection::Connect(asio::yield_context& yc)
361359

362360
auto conn (Shared<UnixConn>::Make(m_Strand.context()));
363361
auto connectTimeout (MakeTimeout(conn));
364-
Defer cancelTimeout ([&connectTimeout]() { connectTimeout->Cancel(); });
365362

366363
conn->next_layer().async_connect(Unix::endpoint(m_Path.CStr()), yc);
367364
Handshake(conn, yc);

lib/icingadb/redisconnection.hpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ namespace icinga
222222
void Handshake(StreamPtr& stream, boost::asio::yield_context& yc);
223223

224224
template<class StreamPtr>
225-
Timeout::Ptr MakeTimeout(StreamPtr& stream);
225+
Timeout MakeTimeout(StreamPtr& stream);
226226

227227
String m_Path;
228228
String m_Host;
@@ -512,15 +512,12 @@ void RedisConnection::Handshake(StreamPtr& strm, boost::asio::yield_context& yc)
512512
* @param stream Redis server connection
513513
*/
514514
template<class StreamPtr>
515-
Timeout::Ptr RedisConnection::MakeTimeout(StreamPtr& stream)
515+
Timeout RedisConnection::MakeTimeout(StreamPtr& stream)
516516
{
517-
Ptr keepAlive (this);
518-
519-
return new Timeout(
520-
m_Strand.context(),
517+
return Timeout(
521518
m_Strand,
522519
boost::posix_time::microseconds(intmax_t(m_ConnectTimeout * 1000000)),
523-
[keepAlive, stream](boost::asio::yield_context yc) {
520+
[stream] {
524521
boost::system::error_code ec;
525522
stream->lowest_layer().cancel(ec);
526523
}

lib/methods/ifwapichecktask.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -456,8 +456,8 @@ void IfwApiCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
456456
IoEngine::SpawnCoroutine(
457457
*strand,
458458
[strand, checkable, cr, psCommand, psHost, expectedSan, psPort, conn, req, checkTimeout, reportResult = std::move(reportResult)](asio::yield_context yc) {
459-
Timeout::Ptr timeout = new Timeout(strand->context(), *strand, boost::posix_time::microseconds(int64_t(checkTimeout * 1e6)),
460-
[&conn, &checkable](boost::asio::yield_context yc) {
459+
Timeout timeout (*strand, boost::posix_time::microseconds(int64_t(checkTimeout * 1e6)),
460+
[&conn, &checkable] {
461461
Log(LogNotice, "IfwApiCheckTask")
462462
<< "Timeout while checking " << checkable->GetReflectionType()->GetName()
463463
<< " '" << checkable->GetName() << "', cancelling attempt";
@@ -467,8 +467,6 @@ void IfwApiCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
467467
}
468468
);
469469

470-
Defer cancelTimeout ([&timeout]() { timeout->Cancel(); });
471-
472470
DoIfwNetIo(yc, cr, psCommand, psHost, expectedSan, psPort, *conn, *req);
473471

474472
cr->SetExecutionEnd(Utility::GetTime());

lib/remote/apilistener.cpp

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -534,16 +534,15 @@ void ApiListener::ListenerCoroutineProc(boost::asio::yield_context yc, const Sha
534534
auto strand (Shared<asio::io_context::strand>::Make(io));
535535

536536
IoEngine::SpawnCoroutine(*strand, [this, strand, sslConn, remoteEndpoint](asio::yield_context yc) {
537-
Timeout::Ptr timeout(new Timeout(strand->context(), *strand, boost::posix_time::microseconds(int64_t(GetConnectTimeout() * 1e6)),
538-
[sslConn, remoteEndpoint](asio::yield_context yc) {
537+
Timeout timeout (*strand, boost::posix_time::microseconds(int64_t(GetConnectTimeout() * 1e6)),
538+
[sslConn, remoteEndpoint] {
539539
Log(LogWarning, "ApiListener")
540540
<< "Timeout while processing incoming connection from " << remoteEndpoint;
541541

542542
boost::system::error_code ec;
543543
sslConn->lowest_layer().cancel(ec);
544544
}
545-
));
546-
Defer cancelTimeout([timeout]() { timeout->Cancel(); });
545+
);
547546

548547
NewClientHandler(yc, strand, sslConn, String(), RoleServer);
549548
});
@@ -585,17 +584,16 @@ void ApiListener::AddConnection(const Endpoint::Ptr& endpoint)
585584

586585
lock.unlock();
587586

588-
Timeout::Ptr timeout(new Timeout(strand->context(), *strand, boost::posix_time::microseconds(int64_t(GetConnectTimeout() * 1e6)),
589-
[sslConn, endpoint, host, port](asio::yield_context yc) {
587+
Timeout timeout (*strand, boost::posix_time::microseconds(int64_t(GetConnectTimeout() * 1e6)),
588+
[sslConn, endpoint, host, port] {
590589
Log(LogCritical, "ApiListener")
591590
<< "Timeout while reconnecting to endpoint '" << endpoint->GetName() << "' via host '" << host
592591
<< "' and port '" << port << "', cancelling attempt";
593592

594593
boost::system::error_code ec;
595594
sslConn->lowest_layer().cancel(ec);
596595
}
597-
));
598-
Defer cancelTimeout([&timeout]() { timeout->Cancel(); });
596+
);
599597

600598
Connect(sslConn->lowest_layer(), host, port, yc);
601599

@@ -683,19 +681,16 @@ void ApiListener::NewClientHandlerInternal(
683681
boost::system::error_code ec;
684682

685683
{
686-
Timeout::Ptr handshakeTimeout (new Timeout(
687-
strand->context(),
684+
Timeout handshakeTimeout (
688685
*strand,
689686
boost::posix_time::microseconds(intmax_t(Configuration::TlsHandshakeTimeout * 1000000)),
690-
[strand, client](asio::yield_context yc) {
687+
[client] {
691688
boost::system::error_code ec;
692689
client->lowest_layer().cancel(ec);
693690
}
694-
));
691+
);
695692

696693
sslConn.async_handshake(role == RoleClient ? sslConn.client : sslConn.server, yc[ec]);
697-
698-
handshakeTimeout->Cancel();
699694
}
700695

701696
if (ec) {

test/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ set(base_test_SOURCES
6262
base-convert.cpp
6363
base-dictionary.cpp
6464
base-fifo.cpp
65+
base-io-engine.cpp
6566
base-json.cpp
6667
base-match.cpp
6768
base-netstring.cpp
@@ -128,6 +129,11 @@ add_boost_test(base
128129
base_dictionary/keys_ordered
129130
base_fifo/construct
130131
base_fifo/io
132+
base_io_engine/timeout_run
133+
base_io_engine/timeout_cancelled
134+
base_io_engine/timeout_scope
135+
base_io_engine/timeout_due_cancelled
136+
base_io_engine/timeout_due_scope
131137
base_json/encode
132138
base_json/decode
133139
base_json/invalid1

0 commit comments

Comments
 (0)