From dd7c88e67d0845dc6a88d58497258e53a12a7bc0 Mon Sep 17 00:00:00 2001 From: Victor Lyuboslavsky <2685025+getvictor@users.noreply.github.com> Date: Fri, 10 Oct 2025 16:04:26 -0500 Subject: [PATCH] Fixed MySQL deadlocks when multiple hosts are updating their certificates in host vitals at the same time. --- changes/34116-cert-source-deadlocks | 1 + server/datastore/mysql/host_certificates.go | 52 +++++++++++++++++---- 2 files changed, 44 insertions(+), 9 deletions(-) create mode 100644 changes/34116-cert-source-deadlocks diff --git a/changes/34116-cert-source-deadlocks b/changes/34116-cert-source-deadlocks new file mode 100644 index 000000000000..73407b33621e --- /dev/null +++ b/changes/34116-cert-source-deadlocks @@ -0,0 +1 @@ +Fixed MySQL deadlocks when multiple hosts are updating their certificates in host vitals at the same time. diff --git a/server/datastore/mysql/host_certificates.go b/server/datastore/mysql/host_certificates.go index 4ab5256ecc79..9ec323a97053 100644 --- a/server/datastore/mysql/host_certificates.go +++ b/server/datastore/mysql/host_certificates.go @@ -340,22 +340,56 @@ func replaceHostCertsSourcesDB(ctx context.Context, tx sqlx.ExtContext, toReplac return nil } + // Sort by host_certificate_id to ensure consistent lock ordering and prevent deadlocks + slices.SortFunc(toReplaceSources, func(a, b *fleet.HostCertificateRecord) int { + if a.ID != b.ID { + if a.ID < b.ID { + return -1 + } + return 1 + } + // Secondary sort by source/username for determinism + if a.Source != b.Source { + return strings.Compare(string(a.Source), string(b.Source)) + } + return strings.Compare(a.Username, b.Username) + }) + + // Build unique certificate IDs for deletion (already sorted from above) certIDs := make([]uint, 0, len(toReplaceSources)) - for _, source := range toReplaceSources { - certIDs = append(certIDs, source.ID) + var lastID uint + for i, source := range toReplaceSources { + // Deduplicate: only add if this ID is different from the last one + if i == 0 || source.ID != lastID { + certIDs = append(certIDs, source.ID) + lastID = source.ID + } } - // delete existing sources - stmtDelete := `DELETE FROM host_certificate_sources WHERE host_certificate_id IN (?)` - stmtDelete, args, err := sqlx.In(stmtDelete, certIDs) + // Check if any sources exist before deleting to avoid unnecessary gap locks + stmtCheck := `SELECT EXISTS(SELECT 1 FROM host_certificate_sources WHERE host_certificate_id IN (?))` + stmtCheck, args, err := sqlx.In(stmtCheck, certIDs) if err != nil { - return ctxerr.Wrap(ctx, err, "building delete host cert sources query") + return ctxerr.Wrap(ctx, err, "building check host cert sources query") + } + var exists bool + if err := sqlx.GetContext(ctx, tx, &exists, stmtCheck, args...); err != nil { + return ctxerr.Wrap(ctx, err, "checking if host cert sources exist") } - if _, err := tx.ExecContext(ctx, stmtDelete, args...); err != nil { - return ctxerr.Wrap(ctx, err, "deleting host cert sources") + + // Only delete if sources exist + if exists { + stmtDelete := `DELETE FROM host_certificate_sources WHERE host_certificate_id IN (?)` + stmtDelete, args, err := sqlx.In(stmtDelete, certIDs) + if err != nil { + return ctxerr.Wrap(ctx, err, "building delete host cert sources query") + } + if _, err := tx.ExecContext(ctx, stmtDelete, args...); err != nil { + return ctxerr.Wrap(ctx, err, "deleting host cert sources") + } } - // create incoming sources + // Insert new sources stmtInsert := ` INSERT INTO host_certificate_sources ( host_certificate_id,