From 690e0e0221804b99cac9ced2b775dc445829eef2 Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Tue, 4 Mar 2025 16:40:52 +0000 Subject: [PATCH 1/2] Take lock over Snapshotter state during callback --- src/node/snapshotter.h | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/node/snapshotter.h b/src/node/snapshotter.h index f9a4e32ac946..cfdb54024d6f 100644 --- a/src/node/snapshotter.h +++ b/src/node/snapshotter.h @@ -114,6 +114,11 @@ namespace ccf std::unique_ptr snapshot, uint32_t generation_count) { + auto snapshot_version = snapshot->get_version(); + + // Note this is NOT held for the entire function body, only to protect + // accesses to pending_snapshots + std::unique_lock guard(lock); if (pending_snapshots.size() >= max_pending_snapshots_count) { LOG_FAIL_FMT( @@ -123,7 +128,14 @@ namespace ccf return; } - auto snapshot_version = snapshot->get_version(); + // It is possible that the signature following the snapshot evidence is + // scheduled by another thread while the below snapshot evidence + // transaction is committed. To allow for such scenario, the evidence + // seqno is recorded via `record_snapshot_evidence_idx()` on a hook rather + // than here. + pending_snapshots[generation_count] = {}; + pending_snapshots[generation_count].version = snapshot_version; + guard.unlock(); auto serialised_snapshot = store->serialise_snapshot(std::move(snapshot)); auto serialised_snapshot_size = serialised_snapshot.size(); @@ -147,14 +159,6 @@ namespace ccf commit_evidence = commit_evidence_; }; - // It is possible that the signature following the snapshot evidence is - // scheduled by another thread while the below snapshot evidence - // transaction is committed. To allow for such scenario, the evidence - // seqno is recorded via `record_snapshot_evidence_idx()` on a hook rather - // than here. - pending_snapshots[generation_count] = {}; - pending_snapshots[generation_count].version = snapshot_version; - auto rc = tx.commit(cd, false, nullptr, capture_ws_digest_and_commit_evidence); if (rc != ccf::kv::CommitResult::SUCCESS) @@ -168,11 +172,13 @@ namespace ccf auto evidence_version = tx.commit_version(); + guard.lock(); pending_snapshots[generation_count].commit_evidence = commit_evidence; pending_snapshots[generation_count].write_set_digest = ws_digest; pending_snapshots[generation_count].snapshot_digest = cd.value(); pending_snapshots[generation_count].serialised_snapshot = std::move(serialised_snapshot); + guard.unlock(); auto to_host = writer_factory.create_writer_to_outside(); RINGBUFFER_WRITE_MESSAGE( From 776829352efd403b750e31a9b436baf73d58e81f Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Wed, 5 Mar 2025 10:02:01 +0000 Subject: [PATCH 2/2] Scoped locks --- src/node/snapshotter.h | 50 +++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/node/snapshotter.h b/src/node/snapshotter.h index cfdb54024d6f..851a621588a4 100644 --- a/src/node/snapshotter.h +++ b/src/node/snapshotter.h @@ -116,26 +116,25 @@ namespace ccf { auto snapshot_version = snapshot->get_version(); - // Note this is NOT held for the entire function body, only to protect - // accesses to pending_snapshots - std::unique_lock guard(lock); - if (pending_snapshots.size() >= max_pending_snapshots_count) { - LOG_FAIL_FMT( - "Skipping new snapshot generation as {} snapshots are already " - "pending", - pending_snapshots.size()); - return; - } + std::unique_lock guard(lock); + if (pending_snapshots.size() >= max_pending_snapshots_count) + { + LOG_FAIL_FMT( + "Skipping new snapshot generation as {} snapshots are already " + "pending", + pending_snapshots.size()); + return; + } - // It is possible that the signature following the snapshot evidence is - // scheduled by another thread while the below snapshot evidence - // transaction is committed. To allow for such scenario, the evidence - // seqno is recorded via `record_snapshot_evidence_idx()` on a hook rather - // than here. - pending_snapshots[generation_count] = {}; - pending_snapshots[generation_count].version = snapshot_version; - guard.unlock(); + // It is possible that the signature following the snapshot evidence is + // scheduled by another thread while the below snapshot evidence + // transaction is committed. To allow for such scenario, the evidence + // seqno is recorded via `record_snapshot_evidence_idx()` on a hook + // rather than here. + pending_snapshots[generation_count] = {}; + pending_snapshots[generation_count].version = snapshot_version; + } auto serialised_snapshot = store->serialise_snapshot(std::move(snapshot)); auto serialised_snapshot_size = serialised_snapshot.size(); @@ -172,13 +171,14 @@ namespace ccf auto evidence_version = tx.commit_version(); - guard.lock(); - pending_snapshots[generation_count].commit_evidence = commit_evidence; - pending_snapshots[generation_count].write_set_digest = ws_digest; - pending_snapshots[generation_count].snapshot_digest = cd.value(); - pending_snapshots[generation_count].serialised_snapshot = - std::move(serialised_snapshot); - guard.unlock(); + { + std::unique_lock guard(lock); + pending_snapshots[generation_count].commit_evidence = commit_evidence; + pending_snapshots[generation_count].write_set_digest = ws_digest; + pending_snapshots[generation_count].snapshot_digest = cd.value(); + pending_snapshots[generation_count].serialised_snapshot = + std::move(serialised_snapshot); + } auto to_host = writer_factory.create_writer_to_outside(); RINGBUFFER_WRITE_MESSAGE(