From f30057365f90d07ef07080635cd91f792a179c1c Mon Sep 17 00:00:00 2001 From: Yuqi Yan Date: Fri, 17 Oct 2025 13:50:16 -0700 Subject: [PATCH 1/5] GCInspector should use different thresholds on GC events --- conf/cassandra.yaml | 16 +++ conf/cassandra_latest.yaml | 16 +++ .../org/apache/cassandra/config/Config.java | 2 + .../cassandra/config/DatabaseDescriptor.java | 21 ++++ .../apache/cassandra/service/GCInspector.java | 82 ++++++++++++-- .../cassandra/service/GCInspectorMXBean.java | 4 + .../cassandra/service/GCInspectorTest.java | 104 +++++++++++++++--- 7 files changed, 219 insertions(+), 26 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index d000f6be36ef..3f050776c421 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2048,6 +2048,11 @@ batch_size_fail_threshold: 50KiB # Log WARN on any batches not of type LOGGED than span across more partitions than this limit unlogged_batch_across_partitions_warn_threshold: 10 +# GCInspector configs: +# For GC like ShenandoahGC/ZGC etc., there are GC events that do not have STW pauses (Concurrent phases) +# Operator might find it reasonable to use lower thresholds for events require STW pauses and higher thresholds +# for concurrent phases. +# # GC Pauses greater than 200 ms will be logged at INFO level # This threshold can be adjusted to minimize logging if necessary # Min unit: ms @@ -2059,6 +2064,17 @@ unlogged_batch_across_partitions_warn_threshold: 10 # Min unit: ms # gc_warn_threshold: 1000ms +# GC Concurrent phase greater than 200 ms will be logged at INFO level +# This threshold can be adjusted to minimize logging if necessary +# Min unit: ms +# gc_concurrent_phase_log_threshold: 1000ms + +# GC Concurrent phase than gc_concurrent_phase_warn_threshold will be logged at WARN level +# Adjust the threshold based on your application throughput requirement. Setting to 0 +# will deactivate the feature. +# Min unit: ms +# gc_concurrent_phase_warn_threshold: 2000ms + # Maximum size of any value in SSTables. Safety measure to detect SSTable corruption # early. Any value size larger than this threshold will result into marking an SSTable # as corrupted. This should be positive and less than 2GiB. diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index 6d3f7b7a49d7..4f2480c8ebc9 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -1913,6 +1913,11 @@ batch_size_fail_threshold: 50KiB # Log WARN on any batches not of type LOGGED than span across more partitions than this limit unlogged_batch_across_partitions_warn_threshold: 10 +# GCInspector configs: +# For GC like ShenandoahGC/ZGC etc., there are GC events that do not have STW pauses. +# Such events are called Concurrent phases. Operator might find it reasonable to use lower thresholds +# for events require STW pauses and higher thresholds for concurrent phases. +# # GC Pauses greater than 200 ms will be logged at INFO level # This threshold can be adjusted to minimize logging if necessary # Min unit: ms @@ -1924,6 +1929,17 @@ unlogged_batch_across_partitions_warn_threshold: 10 # Min unit: ms # gc_warn_threshold: 1000ms +# GC Concurrent phase greater than 200 ms will be logged at INFO level +# This threshold can be adjusted to minimize logging if necessary +# Min unit: ms +# gc_concurrent_phase_log_threshold: 1000ms + +# GC Concurrent phase than gc_concurrent_phase_warn_threshold will be logged at WARN level +# Adjust the threshold based on your application throughput requirement. Setting to 0 +# will deactivate the feature. +# Min unit: ms +# gc_concurrent_phase_warn_threshold: 2000ms + # Maximum size of any value in SSTables. Safety measure to detect SSTable corruption # early. Any value size larger than this threshold will result into marking an SSTable # as corrupted. This should be positive and less than 2GiB. diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 33a0a25283a4..7ea67ebc4c17 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -593,6 +593,8 @@ public static class SSTableConfig public volatile DurationSpec.IntMillisecondsBound gc_log_threshold = new DurationSpec.IntMillisecondsBound("200ms"); @Replaces(oldName = "gc_warn_threshold_in_ms", converter = Converters.MILLIS_DURATION_INT, deprecated = true) public volatile DurationSpec.IntMillisecondsBound gc_warn_threshold = new DurationSpec.IntMillisecondsBound("1s"); + public volatile DurationSpec.IntMillisecondsBound gc_concurrent_phase_log_threshold = new DurationSpec.IntMillisecondsBound("1s"); + public volatile DurationSpec.IntMillisecondsBound gc_concurrent_phase_warn_threshold = new DurationSpec.IntMillisecondsBound("2s"); // TTL for different types of trace events. @Replaces(oldName = "tracetype_query_ttl", converter = Converters.SECONDS_DURATION, deprecated=true) diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 753a7413eafe..15975ef5b94f 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -4739,6 +4739,27 @@ public static void setGCWarnThreshold(int threshold) conf.gc_warn_threshold = new DurationSpec.IntMillisecondsBound(threshold); } + public static int getGCConcurrentPhaseLogThreshold() + { + return conf.gc_concurrent_phase_log_threshold.toMilliseconds(); + } + + public static void setGCConcurrentPhaseLogThreshold(int threshold) + { + conf.gc_concurrent_phase_log_threshold = new DurationSpec.IntMillisecondsBound(threshold); + } + + + public static int getGCConcurrentPhaseWarnThreshold() + { + return conf.gc_concurrent_phase_warn_threshold.toMilliseconds(); + } + + public static void setGCConcurrentPhaseWarnThreshold(int threshold) + { + conf.gc_concurrent_phase_warn_threshold = new DurationSpec.IntMillisecondsBound(threshold); + } + public static boolean isCDCEnabled() { return conf.cdc_enabled; diff --git a/src/java/org/apache/cassandra/service/GCInspector.java b/src/java/org/apache/cassandra/service/GCInspector.java index 4bdc3a05bade..f121f6c5487b 100644 --- a/src/java/org/apache/cassandra/service/GCInspector.java +++ b/src/java/org/apache/cassandra/service/GCInspector.java @@ -287,16 +287,28 @@ public void handleNotification(final Notification notification, final Object han if (state.compareAndSet(prev, new State(duration, bytes, prev))) break; } - - if (getGcWarnThresholdInMs() != 0 && duration > getGcWarnThresholdInMs()) - logger.warn(sb.toString()); - else if (duration > getGcLogThresholdInMs()) - logger.info(sb.toString()); - else if (logger.isTraceEnabled()) - logger.trace(sb.toString()); - if (duration > this.getStatusThresholdInMs()) - StatusLogger.log(); + if (isConcurrentPhase(info.getGcCause(), info.getGcName())) + { + if (getGcWarnThresholdInMs() != 0 && duration > getGcWarnThresholdInMs()) + logger.warn(sb.toString()); + else if (duration > getGcLogThresholdInMs()) + logger.info(sb.toString()); + else if (logger.isTraceEnabled()) + logger.trace(sb.toString()); + // TODO: trigger StatusLogger if concurrent phases take too long? + } + else + { + if (getGcConcurrentPhaseWarnThresholdInMs() != 0 && duration > getGcConcurrentPhaseWarnThresholdInMs()) + logger.warn(sb.toString()); + else if (duration > getGcConcurrentPhaseLogThresholdInMs()) + logger.info(sb.toString()); + else if (logger.isTraceEnabled()) + logger.trace(sb.toString()); + if (duration > this.getStatusThresholdInMs()) + StatusLogger.log(); + } // if we just finished an old gen collection and we're still using a lot of memory, try to reduce the pressure if (gcState.assumeGCIsOldGen) @@ -304,6 +316,24 @@ else if (logger.isTraceEnabled()) } } + static boolean isConcurrentPhase(String cause, String name) { + // Mostly taken from: https://github.com/Netflix/spectator/blob/v1.7.x/spectator-ext-gc/src/main/java/com/netflix/spectator/gc/GcLogger.java + // So far the only indicator known is that the cause will be reported as "No GC" + // when using CMS. + // + // For ZGC, behavior was changed in JDK17: https://bugs.openjdk.java.net/browse/JDK-8265136 + // For ZGC in older versions, there is no way to accurately get the amount of time + // in STW pauses. + // + // For G1, a new bean is added in JDK20 to indicate time spent in concurrent phases: + // https://bugs.openjdk.org/browse/JDK-8297247 + + return "No GC".equals(cause) // CMS + || "G1 Concurrent GC".equals(name) // G1 in JDK20+ + || name.endsWith(" Cycles"); // Shenandoah, ZGC + } + + public State getTotalSinceLastCheck() { return state.getAndSet(new State()); @@ -407,6 +437,40 @@ public void setGcLogThresholdInMs(long threshold) DatabaseDescriptor.setGCLogThreshold((int) threshold); } + public int getGcConcurrentPhaseWarnThresholdInMs() + { + return DatabaseDescriptor.getGCConcurrentPhaseWarnThreshold(); + } + + public void setGcConcurrentPhaseWarnThresholdInMs(int threshold) + { + long gcConcurrentPhaseLogThresholdInMs = getGcConcurrentPhaseLogThresholdInMs(); + if (threshold < 0) + throw new IllegalArgumentException("Threshold must be greater than or equal to 0"); + if (threshold != 0 && threshold <= gcConcurrentPhaseLogThresholdInMs) + throw new IllegalArgumentException("Threshold must be greater than gcConcurrentPhaseLogThresholdInMs which is currently " + + gcConcurrentPhaseLogThresholdInMs); + DatabaseDescriptor.setGCConcurrentPhaseWarnThreshold(threshold); + } + + public int getGcConcurrentPhaseLogThresholdInMs() + { + return DatabaseDescriptor.getGCConcurrentPhaseLogThreshold(); + } + + public void setGcConcurrentPhaseLogThresholdInMs(int threshold) + { + if (threshold <= 0) + throw new IllegalArgumentException("Threshold must be greater than 0"); + + long gcConcurrentPhaseWarnThresholdInMs = getGcConcurrentPhaseWarnThresholdInMs(); + if (gcConcurrentPhaseWarnThresholdInMs != 0 && threshold > gcConcurrentPhaseWarnThresholdInMs) + throw new IllegalArgumentException("Threshold must be less than gcConcurrentPhaseWarnThresholdInMs which is currently " + + gcConcurrentPhaseWarnThresholdInMs); + + DatabaseDescriptor.setGCConcurrentPhaseLogThreshold(threshold); + } + public long getGcLogThresholdInMs() { return DatabaseDescriptor.getGCLogThreshold(); diff --git a/src/java/org/apache/cassandra/service/GCInspectorMXBean.java b/src/java/org/apache/cassandra/service/GCInspectorMXBean.java index 08795ed269de..9765e9813c3c 100644 --- a/src/java/org/apache/cassandra/service/GCInspectorMXBean.java +++ b/src/java/org/apache/cassandra/service/GCInspectorMXBean.java @@ -27,6 +27,10 @@ public interface GCInspectorMXBean void setGcWarnThresholdInMs(long threshold); long getGcWarnThresholdInMs(); void setGcLogThresholdInMs(long threshold); + int getGcConcurrentPhaseLogThresholdInMs(); + void setGcConcurrentPhaseWarnThresholdInMs(int threshold); + int getGcConcurrentPhaseWarnThresholdInMs(); + void setGcConcurrentPhaseLogThresholdInMs(int threshold); long getGcLogThresholdInMs(); long getStatusThresholdInMs(); } diff --git a/test/unit/org/apache/cassandra/service/GCInspectorTest.java b/test/unit/org/apache/cassandra/service/GCInspectorTest.java index bcdf023294ad..5ccaf5e2e93b 100644 --- a/test/unit/org/apache/cassandra/service/GCInspectorTest.java +++ b/test/unit/org/apache/cassandra/service/GCInspectorTest.java @@ -18,11 +18,16 @@ package org.apache.cassandra.service; import org.apache.cassandra.config.DatabaseDescriptor; -import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + + public class GCInspectorTest { @@ -43,38 +48,81 @@ public void before() @Test public void ensureStaticFieldsHydrateFromConfig() { - Assert.assertEquals(DatabaseDescriptor.getGCLogThreshold(), gcInspector.getGcLogThresholdInMs()); - Assert.assertEquals(DatabaseDescriptor.getGCWarnThreshold(), gcInspector.getGcWarnThresholdInMs()); + assertEquals(DatabaseDescriptor.getGCLogThreshold(), gcInspector.getGcLogThresholdInMs()); + assertEquals(DatabaseDescriptor.getGCWarnThreshold(), gcInspector.getGcWarnThresholdInMs()); + assertEquals(DatabaseDescriptor.getGCConcurrentPhaseLogThreshold(), gcInspector.getGcConcurrentPhaseLogThresholdInMs()); + assertEquals(DatabaseDescriptor.getGCConcurrentPhaseWarnThreshold(), gcInspector.getGcConcurrentPhaseWarnThresholdInMs()); } @Test public void ensureStatusIsCalculated() { - Assert.assertTrue(gcInspector.getStatusThresholdInMs() > 0); + assertTrue(gcInspector.getStatusThresholdInMs() > 0); } - @Test(expected=IllegalArgumentException.class) + @Test public void ensureWarnGreaterThanLog() { - gcInspector.setGcWarnThresholdInMs(gcInspector.getGcLogThresholdInMs()); + try + { + gcInspector.setGcWarnThresholdInMs(gcInspector.getGcLogThresholdInMs()); + fail("Expect IllegalArgumentException"); + } + catch (IllegalArgumentException e) + { + // expected + } + try + { + gcInspector.setGcConcurrentPhaseWarnThresholdInMs(gcInspector.getGcConcurrentPhaseLogThresholdInMs()); + fail("Expect IllegalArgumentException"); + } + catch (IllegalArgumentException e) + { + // expected + } } @Test public void ensureZeroIsOk() { gcInspector.setGcWarnThresholdInMs(0); - Assert.assertEquals(gcInspector.getStatusThresholdInMs(), gcInspector.getGcLogThresholdInMs()); - Assert.assertEquals(0, DatabaseDescriptor.getGCWarnThreshold()); - Assert.assertEquals(200, DatabaseDescriptor.getGCLogThreshold()); + assertEquals(gcInspector.getStatusThresholdInMs(), gcInspector.getGcLogThresholdInMs()); + assertEquals(0, DatabaseDescriptor.getGCWarnThreshold()); + assertEquals(200, DatabaseDescriptor.getGCLogThreshold()); + + gcInspector.setGcConcurrentPhaseWarnThresholdInMs(0); + assertEquals(0, DatabaseDescriptor.getGCConcurrentPhaseWarnThreshold()); + assertEquals(1000, DatabaseDescriptor.getGCConcurrentPhaseLogThreshold()); } - @Test(expected=IllegalArgumentException.class) + @Test public void ensureLogLessThanWarn() { - Assert.assertEquals(200, gcInspector.getGcLogThresholdInMs()); + assertEquals(200, gcInspector.getGcLogThresholdInMs()); gcInspector.setGcWarnThresholdInMs(1000); - Assert.assertEquals(1000, gcInspector.getGcWarnThresholdInMs()); - gcInspector.setGcLogThresholdInMs(gcInspector.getGcWarnThresholdInMs() + 1); + assertEquals(1000, gcInspector.getGcWarnThresholdInMs()); + try + { + gcInspector.setGcLogThresholdInMs(gcInspector.getGcWarnThresholdInMs() + 1); + fail("Expect IllegalArgumentException"); + } + catch (IllegalArgumentException e) + { + // expected + } + assertEquals(1000, gcInspector.getGcConcurrentPhaseLogThresholdInMs()); + gcInspector.setGcConcurrentPhaseWarnThresholdInMs(2000); + assertEquals(2000, gcInspector.getGcConcurrentPhaseWarnThresholdInMs()); + try + { + gcInspector.setGcConcurrentPhaseLogThresholdInMs(gcInspector.getGcConcurrentPhaseWarnThresholdInMs() + 1); + fail("Expect IllegalArgumentException"); + } + catch (IllegalArgumentException e) + { + // expected + } } @Test @@ -82,10 +130,16 @@ public void testDefaults() { gcInspector.setGcLogThresholdInMs(200); gcInspector.setGcWarnThresholdInMs(1000); - Assert.assertEquals(200, DatabaseDescriptor.getGCLogThreshold()); - Assert.assertEquals(200, gcInspector.getGcLogThresholdInMs()); - Assert.assertEquals(1000, DatabaseDescriptor.getGCWarnThreshold()); - Assert.assertEquals(1000, gcInspector.getGcWarnThresholdInMs()); + gcInspector.setGcConcurrentPhaseLogThresholdInMs(1000); + gcInspector.setGcConcurrentPhaseWarnThresholdInMs(2000); + assertEquals(200, DatabaseDescriptor.getGCLogThreshold()); + assertEquals(200, gcInspector.getGcLogThresholdInMs()); + assertEquals(1000, DatabaseDescriptor.getGCWarnThreshold()); + assertEquals(1000, gcInspector.getGcWarnThresholdInMs()); + assertEquals(1000, DatabaseDescriptor.getGCConcurrentPhaseLogThreshold()); + assertEquals(1000, gcInspector.getGcConcurrentPhaseLogThresholdInMs()); + assertEquals(2000, DatabaseDescriptor.getGCConcurrentPhaseWarnThreshold()); + assertEquals(2000, gcInspector.getGcConcurrentPhaseWarnThresholdInMs()); } @Test(expected=IllegalArgumentException.class) @@ -94,4 +148,20 @@ public void testMaxValue() gcInspector.setGcLogThresholdInMs(200); gcInspector.setGcWarnThresholdInMs(Integer.MAX_VALUE+1L); } + + @Test + public void testIsConcurrentPhase() + { + assertTrue("No GC cause should be considered concurrent", + GCInspector.isConcurrentPhase("No GC", "SomeGCName")); + assertTrue("Shenandoah Cycles should be considered concurrent", + GCInspector.isConcurrentPhase("SomeCause", "Shenandoah Cycles")); + assertTrue("ZGC Cycles should be considered concurrent", + GCInspector.isConcurrentPhase("SomeCause", "ZGC Cycles")); + + assertFalse("ParallelGC should not be considered concurrent", + GCInspector.isConcurrentPhase("Allocation Failure", "PS Scavenge")); + assertFalse("G1 Young Generation should not be considered concurrent", + GCInspector.isConcurrentPhase("G1 Evacuation Pause", "G1 Young Generation")); + } } From a97ecb70d399900d95348f26a031840f71c73809 Mon Sep 17 00:00:00 2001 From: Yuqi Yan Date: Mon, 20 Oct 2025 08:10:17 -0700 Subject: [PATCH 2/5] upstream fix --- conf/cassandra.yaml | 2 +- conf/cassandra_latest.yaml | 2 +- src/java/org/apache/cassandra/service/GCInspector.java | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 3f050776c421..20476ee54769 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2064,7 +2064,7 @@ unlogged_batch_across_partitions_warn_threshold: 10 # Min unit: ms # gc_warn_threshold: 1000ms -# GC Concurrent phase greater than 200 ms will be logged at INFO level +# GC Concurrent phase greater than 1000 ms will be logged at INFO level # This threshold can be adjusted to minimize logging if necessary # Min unit: ms # gc_concurrent_phase_log_threshold: 1000ms diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index 4f2480c8ebc9..b43f6cf45cb8 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -1929,7 +1929,7 @@ unlogged_batch_across_partitions_warn_threshold: 10 # Min unit: ms # gc_warn_threshold: 1000ms -# GC Concurrent phase greater than 200 ms will be logged at INFO level +# GC Concurrent phase greater than 1000 ms will be logged at INFO level # This threshold can be adjusted to minimize logging if necessary # Min unit: ms # gc_concurrent_phase_log_threshold: 1000ms diff --git a/src/java/org/apache/cassandra/service/GCInspector.java b/src/java/org/apache/cassandra/service/GCInspector.java index f121f6c5487b..8d5e2c4971cc 100644 --- a/src/java/org/apache/cassandra/service/GCInspector.java +++ b/src/java/org/apache/cassandra/service/GCInspector.java @@ -290,9 +290,9 @@ public void handleNotification(final Notification notification, final Object han if (isConcurrentPhase(info.getGcCause(), info.getGcName())) { - if (getGcWarnThresholdInMs() != 0 && duration > getGcWarnThresholdInMs()) + if (getGcConcurrentPhaseWarnThresholdInMs() != 0 && duration > getGcConcurrentPhaseWarnThresholdInMs()) logger.warn(sb.toString()); - else if (duration > getGcLogThresholdInMs()) + else if (duration > getGcConcurrentPhaseLogThresholdInMs()) logger.info(sb.toString()); else if (logger.isTraceEnabled()) logger.trace(sb.toString()); @@ -300,9 +300,9 @@ else if (logger.isTraceEnabled()) } else { - if (getGcConcurrentPhaseWarnThresholdInMs() != 0 && duration > getGcConcurrentPhaseWarnThresholdInMs()) + if (getGcWarnThresholdInMs() != 0 && duration > getGcWarnThresholdInMs()) logger.warn(sb.toString()); - else if (duration > getGcConcurrentPhaseLogThresholdInMs()) + else if (duration > getGcLogThresholdInMs()) logger.info(sb.toString()); else if (logger.isTraceEnabled()) logger.trace(sb.toString()); From f7d3e051b581e14a16ed6172b408677a6e264a01 Mon Sep 17 00:00:00 2001 From: Stefan Miklosovic Date: Thu, 30 Oct 2025 13:41:03 +0100 Subject: [PATCH 3/5] added validation --- .../cassandra/config/DatabaseDescriptor.java | 51 ++++++++++++-- .../apache/cassandra/service/GCInspector.java | 66 +++++++++---------- .../cassandra/service/GCInspectorTest.java | 56 +++++----------- 3 files changed, 94 insertions(+), 79 deletions(-) diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 15975ef5b94f..41423eca2d5b 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -1220,6 +1220,12 @@ else if (conf.max_value_size.toMebibytes() >= 2048) // run audit logging options through sanitation and validation if (conf.audit_logging_options != null) setAuditLoggingOptions(conf.audit_logging_options); + + // just run through the validation by setting current values back to their setters, so we are sure that their values are valid + DatabaseDescriptor.setGCLogThreshold((int) DatabaseDescriptor.getGCLogThreshold()); + DatabaseDescriptor.setGCWarnThreshold((int) DatabaseDescriptor.getGCWarnThreshold()); + DatabaseDescriptor.setGCConcurrentPhaseLogThreshold(DatabaseDescriptor.getGCConcurrentPhaseLogThreshold()); + DatabaseDescriptor.setGCConcurrentPhaseWarnThreshold(DatabaseDescriptor.getGCConcurrentPhaseWarnThreshold()); } @VisibleForTesting @@ -4719,14 +4725,17 @@ public static long getGCLogThreshold() return conf.gc_log_threshold.toMilliseconds(); } - public static void setGCLogThreshold(int gcLogThreshold) + public static void setGCLogThreshold(int threshold) { - conf.gc_log_threshold = new DurationSpec.IntMillisecondsBound(gcLogThreshold); - } + if (threshold <= 0) + throw new ConfigurationException("Threshold value for gc_log_threshold must be greater than 0"); - public static EncryptionContext getEncryptionContext() - { - return encryptionContext; + long gcWarnThresholdInMs = getGCWarnThreshold(); + if (gcWarnThresholdInMs != 0 && threshold > gcWarnThresholdInMs) + throw new ConfigurationException("Threshold value for gc_log_threshold (" + threshold + ") must be less than gc_warn_threshold which is currently " + + gcWarnThresholdInMs); + + conf.gc_log_threshold = new DurationSpec.IntMillisecondsBound(threshold); } public static long getGCWarnThreshold() @@ -4736,6 +4745,14 @@ public static long getGCWarnThreshold() public static void setGCWarnThreshold(int threshold) { + if (threshold < 0) + throw new ConfigurationException("Threshold value for gc_warn_threshold must be greater than or equal to 0"); + + long gcLogThresholdInMs = getGCLogThreshold(); + if (threshold != 0 && threshold <= gcLogThresholdInMs) + throw new ConfigurationException("Threshold value for gc_warn_threshold (" + threshold + ") must be greater than gc_log_threshold which is currently " + + gcLogThresholdInMs); + conf.gc_warn_threshold = new DurationSpec.IntMillisecondsBound(threshold); } @@ -4746,10 +4763,17 @@ public static int getGCConcurrentPhaseLogThreshold() public static void setGCConcurrentPhaseLogThreshold(int threshold) { + if (threshold <= 0) + throw new ConfigurationException("Threshold must be greater than 0"); + + long gcConcurrentPhaseWarnThresholdInMs = getGCConcurrentPhaseWarnThreshold(); + if (gcConcurrentPhaseWarnThresholdInMs != 0 && threshold > gcConcurrentPhaseWarnThresholdInMs) + throw new ConfigurationException("Threshold value for gc_concurrent_phase_log_threshold (" + threshold + ") must be less than gc_concurrent_phase_warn_threshold which is currently " + + gcConcurrentPhaseWarnThresholdInMs); + conf.gc_concurrent_phase_log_threshold = new DurationSpec.IntMillisecondsBound(threshold); } - public static int getGCConcurrentPhaseWarnThreshold() { return conf.gc_concurrent_phase_warn_threshold.toMilliseconds(); @@ -4757,9 +4781,22 @@ public static int getGCConcurrentPhaseWarnThreshold() public static void setGCConcurrentPhaseWarnThreshold(int threshold) { + if (threshold < 0) + throw new ConfigurationException("Threshold value for gc_concurrent_phase_warn_threshold must be greater than or equal to 0"); + + long gcConcurrentPhaseLogThresholdInMs = getGCConcurrentPhaseLogThreshold(); + if (threshold != 0 && threshold <= gcConcurrentPhaseLogThresholdInMs) + throw new ConfigurationException("Threshold value for gc_concurrent_phase_warn_threshold (" + threshold + ") must be greater than gc_concurrent_phase_log_threshold which is currently " + + gcConcurrentPhaseLogThresholdInMs); + conf.gc_concurrent_phase_warn_threshold = new DurationSpec.IntMillisecondsBound(threshold); } + public static EncryptionContext getEncryptionContext() + { + return encryptionContext; + } + public static boolean isCDCEnabled() { return conf.cdc_enabled; diff --git a/src/java/org/apache/cassandra/service/GCInspector.java b/src/java/org/apache/cassandra/service/GCInspector.java index 8d5e2c4971cc..5faf5b9ac61c 100644 --- a/src/java/org/apache/cassandra/service/GCInspector.java +++ b/src/java/org/apache/cassandra/service/GCInspector.java @@ -408,15 +408,14 @@ private static long getFieldValue(Field field, boolean isAtomicLong) public void setGcWarnThresholdInMs(long threshold) { - long gcLogThresholdInMs = getGcLogThresholdInMs(); - if (threshold < 0) - throw new IllegalArgumentException("Threshold must be greater than or equal to 0"); - if (threshold != 0 && threshold <= gcLogThresholdInMs) - throw new IllegalArgumentException("Threshold must be greater than gcLogThresholdInMs which is currently " - + gcLogThresholdInMs); - if (threshold > Integer.MAX_VALUE) - throw new IllegalArgumentException("Threshold must be less than Integer.MAX_VALUE"); - DatabaseDescriptor.setGCWarnThreshold((int)threshold); + try + { + DatabaseDescriptor.setGCWarnThreshold((int)threshold); + } + catch (Throwable t) + { + throw new IllegalArgumentException(t.getMessage()); + } } public long getGcWarnThresholdInMs() @@ -426,15 +425,14 @@ public long getGcWarnThresholdInMs() public void setGcLogThresholdInMs(long threshold) { - if (threshold <= 0) - throw new IllegalArgumentException("Threshold must be greater than 0"); - - long gcWarnThresholdInMs = getGcWarnThresholdInMs(); - if (gcWarnThresholdInMs != 0 && threshold > gcWarnThresholdInMs) - throw new IllegalArgumentException("Threshold must be less than gcWarnThresholdInMs which is currently " - + gcWarnThresholdInMs); - - DatabaseDescriptor.setGCLogThreshold((int) threshold); + try + { + DatabaseDescriptor.setGCLogThreshold((int) threshold); + } + catch (Throwable t) + { + throw new IllegalArgumentException(t.getMessage()); + } } public int getGcConcurrentPhaseWarnThresholdInMs() @@ -444,13 +442,14 @@ public int getGcConcurrentPhaseWarnThresholdInMs() public void setGcConcurrentPhaseWarnThresholdInMs(int threshold) { - long gcConcurrentPhaseLogThresholdInMs = getGcConcurrentPhaseLogThresholdInMs(); - if (threshold < 0) - throw new IllegalArgumentException("Threshold must be greater than or equal to 0"); - if (threshold != 0 && threshold <= gcConcurrentPhaseLogThresholdInMs) - throw new IllegalArgumentException("Threshold must be greater than gcConcurrentPhaseLogThresholdInMs which is currently " - + gcConcurrentPhaseLogThresholdInMs); - DatabaseDescriptor.setGCConcurrentPhaseWarnThreshold(threshold); + try + { + DatabaseDescriptor.setGCConcurrentPhaseWarnThreshold(threshold); + } + catch (Throwable t) + { + throw new IllegalArgumentException(t.getMessage()); + } } public int getGcConcurrentPhaseLogThresholdInMs() @@ -460,15 +459,14 @@ public int getGcConcurrentPhaseLogThresholdInMs() public void setGcConcurrentPhaseLogThresholdInMs(int threshold) { - if (threshold <= 0) - throw new IllegalArgumentException("Threshold must be greater than 0"); - - long gcConcurrentPhaseWarnThresholdInMs = getGcConcurrentPhaseWarnThresholdInMs(); - if (gcConcurrentPhaseWarnThresholdInMs != 0 && threshold > gcConcurrentPhaseWarnThresholdInMs) - throw new IllegalArgumentException("Threshold must be less than gcConcurrentPhaseWarnThresholdInMs which is currently " - + gcConcurrentPhaseWarnThresholdInMs); - - DatabaseDescriptor.setGCConcurrentPhaseLogThreshold(threshold); + try + { + DatabaseDescriptor.setGCConcurrentPhaseLogThreshold(threshold); + } + catch (Throwable t) + { + throw new IllegalArgumentException(t.getMessage()); + } } public long getGcLogThresholdInMs() diff --git a/test/unit/org/apache/cassandra/service/GCInspectorTest.java b/test/unit/org/apache/cassandra/service/GCInspectorTest.java index 5ccaf5e2e93b..51d6a22a56d3 100644 --- a/test/unit/org/apache/cassandra/service/GCInspectorTest.java +++ b/test/unit/org/apache/cassandra/service/GCInspectorTest.java @@ -18,15 +18,15 @@ package org.apache.cassandra.service; import org.apache.cassandra.config.DatabaseDescriptor; + import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - public class GCInspectorTest { @@ -63,24 +63,13 @@ public void ensureStatusIsCalculated() @Test public void ensureWarnGreaterThanLog() { - try - { - gcInspector.setGcWarnThresholdInMs(gcInspector.getGcLogThresholdInMs()); - fail("Expect IllegalArgumentException"); - } - catch (IllegalArgumentException e) - { - // expected - } - try - { - gcInspector.setGcConcurrentPhaseWarnThresholdInMs(gcInspector.getGcConcurrentPhaseLogThresholdInMs()); - fail("Expect IllegalArgumentException"); - } - catch (IllegalArgumentException e) - { - // expected - } + assertThatThrownBy(() -> gcInspector.setGcWarnThresholdInMs(gcInspector.getGcLogThresholdInMs())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Threshold value for gc_warn_threshold (200) must be greater than gc_log_threshold which is currently 200"); + + assertThatThrownBy(() -> gcInspector.setGcConcurrentPhaseWarnThresholdInMs(gcInspector.getGcConcurrentPhaseLogThresholdInMs())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Threshold value for gc_concurrent_phase_warn_threshold (1000) must be greater than gc_concurrent_phase_log_threshold which is currently 1000"); } @Test @@ -102,27 +91,18 @@ public void ensureLogLessThanWarn() assertEquals(200, gcInspector.getGcLogThresholdInMs()); gcInspector.setGcWarnThresholdInMs(1000); assertEquals(1000, gcInspector.getGcWarnThresholdInMs()); - try - { - gcInspector.setGcLogThresholdInMs(gcInspector.getGcWarnThresholdInMs() + 1); - fail("Expect IllegalArgumentException"); - } - catch (IllegalArgumentException e) - { - // expected - } + + assertThatThrownBy(() -> gcInspector.setGcLogThresholdInMs(gcInspector.getGcWarnThresholdInMs() + 1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Threshold value for gc_log_threshold (1001) must be less than gc_warn_threshold which is currently 1000"); + assertEquals(1000, gcInspector.getGcConcurrentPhaseLogThresholdInMs()); gcInspector.setGcConcurrentPhaseWarnThresholdInMs(2000); assertEquals(2000, gcInspector.getGcConcurrentPhaseWarnThresholdInMs()); - try - { - gcInspector.setGcConcurrentPhaseLogThresholdInMs(gcInspector.getGcConcurrentPhaseWarnThresholdInMs() + 1); - fail("Expect IllegalArgumentException"); - } - catch (IllegalArgumentException e) - { - // expected - } + + assertThatThrownBy(() -> gcInspector.setGcConcurrentPhaseLogThresholdInMs(gcInspector.getGcConcurrentPhaseWarnThresholdInMs() + 1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Threshold value for gc_concurrent_phase_log_threshold (2001) must be less than gc_concurrent_phase_warn_threshold which is currently 2000"); } @Test From 0b3e68805935d911df0d3116d04ed13ad135c015 Mon Sep 17 00:00:00 2001 From: Stefan Miklosovic Date: Thu, 30 Oct 2025 13:46:50 +0100 Subject: [PATCH 4/5] trigger StatusLogger if concurrent phases take too long --- src/java/org/apache/cassandra/service/GCInspector.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/service/GCInspector.java b/src/java/org/apache/cassandra/service/GCInspector.java index 5faf5b9ac61c..1ce68df58ab9 100644 --- a/src/java/org/apache/cassandra/service/GCInspector.java +++ b/src/java/org/apache/cassandra/service/GCInspector.java @@ -296,7 +296,8 @@ else if (duration > getGcConcurrentPhaseLogThresholdInMs()) logger.info(sb.toString()); else if (logger.isTraceEnabled()) logger.trace(sb.toString()); - // TODO: trigger StatusLogger if concurrent phases take too long? + if (duration > this.getConcurrentStatusThresholdInMs()) + StatusLogger.log(); } else { @@ -479,4 +480,8 @@ public long getStatusThresholdInMs() return getGcWarnThresholdInMs() != 0 ? getGcWarnThresholdInMs() : getGcLogThresholdInMs(); } + public long getConcurrentStatusThresholdInMs() + { + return getGcConcurrentPhaseWarnThresholdInMs() != 0 ? getGcConcurrentPhaseWarnThresholdInMs() : getGcConcurrentPhaseLogThresholdInMs(); + } } From 819d94179886b9a334f879813eae7805be565394 Mon Sep 17 00:00:00 2001 From: Stefan Miklosovic Date: Fri, 31 Oct 2025 12:08:45 +0100 Subject: [PATCH 5/5] reflected review --- .../cassandra/config/DatabaseDescriptor.java | 44 ++++++++++++------- .../apache/cassandra/service/GCInspector.java | 38 +++------------- 2 files changed, 33 insertions(+), 49 deletions(-) diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 41423eca2d5b..dec260f9b94b 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -1221,11 +1221,21 @@ else if (conf.max_value_size.toMebibytes() >= 2048) if (conf.audit_logging_options != null) setAuditLoggingOptions(conf.audit_logging_options); - // just run through the validation by setting current values back to their setters, so we are sure that their values are valid - DatabaseDescriptor.setGCLogThreshold((int) DatabaseDescriptor.getGCLogThreshold()); - DatabaseDescriptor.setGCWarnThreshold((int) DatabaseDescriptor.getGCWarnThreshold()); - DatabaseDescriptor.setGCConcurrentPhaseLogThreshold(DatabaseDescriptor.getGCConcurrentPhaseLogThreshold()); - DatabaseDescriptor.setGCConcurrentPhaseWarnThreshold(DatabaseDescriptor.getGCConcurrentPhaseWarnThreshold()); + try + { + // Run through the validation by setting current values back to their setters, so we are sure that their values are valid. + // We are catching IllegalArgumentException and translating it to ConfigurationException to comply with + // rest of the logic in this method. These setters are also called in GCInspectorMXBean were IllegalArgumentException + // is thrown when arguments are invalid instead ConfigurationException, on purpose. + DatabaseDescriptor.setGCLogThreshold((int) DatabaseDescriptor.getGCLogThreshold()); + DatabaseDescriptor.setGCWarnThreshold((int) DatabaseDescriptor.getGCWarnThreshold()); + DatabaseDescriptor.setGCConcurrentPhaseLogThreshold(DatabaseDescriptor.getGCConcurrentPhaseLogThreshold()); + DatabaseDescriptor.setGCConcurrentPhaseWarnThreshold(DatabaseDescriptor.getGCConcurrentPhaseWarnThreshold()); + } + catch (IllegalArgumentException ex) + { + throw new ConfigurationException(ex.getMessage()); + } } @VisibleForTesting @@ -4728,12 +4738,12 @@ public static long getGCLogThreshold() public static void setGCLogThreshold(int threshold) { if (threshold <= 0) - throw new ConfigurationException("Threshold value for gc_log_threshold must be greater than 0"); + throw new IllegalArgumentException("Threshold value for gc_log_threshold must be greater than 0"); long gcWarnThresholdInMs = getGCWarnThreshold(); if (gcWarnThresholdInMs != 0 && threshold > gcWarnThresholdInMs) - throw new ConfigurationException("Threshold value for gc_log_threshold (" + threshold + ") must be less than gc_warn_threshold which is currently " - + gcWarnThresholdInMs); + throw new IllegalArgumentException("Threshold value for gc_log_threshold (" + threshold + ") must be less than gc_warn_threshold which is currently " + + gcWarnThresholdInMs); conf.gc_log_threshold = new DurationSpec.IntMillisecondsBound(threshold); } @@ -4746,12 +4756,12 @@ public static long getGCWarnThreshold() public static void setGCWarnThreshold(int threshold) { if (threshold < 0) - throw new ConfigurationException("Threshold value for gc_warn_threshold must be greater than or equal to 0"); + throw new IllegalArgumentException("Threshold value for gc_warn_threshold must be greater than or equal to 0"); long gcLogThresholdInMs = getGCLogThreshold(); if (threshold != 0 && threshold <= gcLogThresholdInMs) - throw new ConfigurationException("Threshold value for gc_warn_threshold (" + threshold + ") must be greater than gc_log_threshold which is currently " - + gcLogThresholdInMs); + throw new IllegalArgumentException("Threshold value for gc_warn_threshold (" + threshold + ") must be greater than gc_log_threshold which is currently " + + gcLogThresholdInMs); conf.gc_warn_threshold = new DurationSpec.IntMillisecondsBound(threshold); } @@ -4764,12 +4774,12 @@ public static int getGCConcurrentPhaseLogThreshold() public static void setGCConcurrentPhaseLogThreshold(int threshold) { if (threshold <= 0) - throw new ConfigurationException("Threshold must be greater than 0"); + throw new IllegalArgumentException("Threshold must be greater than 0"); long gcConcurrentPhaseWarnThresholdInMs = getGCConcurrentPhaseWarnThreshold(); if (gcConcurrentPhaseWarnThresholdInMs != 0 && threshold > gcConcurrentPhaseWarnThresholdInMs) - throw new ConfigurationException("Threshold value for gc_concurrent_phase_log_threshold (" + threshold + ") must be less than gc_concurrent_phase_warn_threshold which is currently " - + gcConcurrentPhaseWarnThresholdInMs); + throw new IllegalArgumentException("Threshold value for gc_concurrent_phase_log_threshold (" + threshold + ") must be less than gc_concurrent_phase_warn_threshold which is currently " + + gcConcurrentPhaseWarnThresholdInMs); conf.gc_concurrent_phase_log_threshold = new DurationSpec.IntMillisecondsBound(threshold); } @@ -4782,12 +4792,12 @@ public static int getGCConcurrentPhaseWarnThreshold() public static void setGCConcurrentPhaseWarnThreshold(int threshold) { if (threshold < 0) - throw new ConfigurationException("Threshold value for gc_concurrent_phase_warn_threshold must be greater than or equal to 0"); + throw new IllegalArgumentException("Threshold value for gc_concurrent_phase_warn_threshold must be greater than or equal to 0"); long gcConcurrentPhaseLogThresholdInMs = getGCConcurrentPhaseLogThreshold(); if (threshold != 0 && threshold <= gcConcurrentPhaseLogThresholdInMs) - throw new ConfigurationException("Threshold value for gc_concurrent_phase_warn_threshold (" + threshold + ") must be greater than gc_concurrent_phase_log_threshold which is currently " - + gcConcurrentPhaseLogThresholdInMs); + throw new IllegalArgumentException("Threshold value for gc_concurrent_phase_warn_threshold (" + threshold + ") must be greater than gc_concurrent_phase_log_threshold which is currently " + + gcConcurrentPhaseLogThresholdInMs); conf.gc_concurrent_phase_warn_threshold = new DurationSpec.IntMillisecondsBound(threshold); } diff --git a/src/java/org/apache/cassandra/service/GCInspector.java b/src/java/org/apache/cassandra/service/GCInspector.java index 1ce68df58ab9..da2d81434256 100644 --- a/src/java/org/apache/cassandra/service/GCInspector.java +++ b/src/java/org/apache/cassandra/service/GCInspector.java @@ -296,6 +296,7 @@ else if (duration > getGcConcurrentPhaseLogThresholdInMs()) logger.info(sb.toString()); else if (logger.isTraceEnabled()) logger.trace(sb.toString()); + if (duration > this.getConcurrentStatusThresholdInMs()) StatusLogger.log(); } @@ -307,6 +308,7 @@ else if (duration > getGcLogThresholdInMs()) logger.info(sb.toString()); else if (logger.isTraceEnabled()) logger.trace(sb.toString()); + if (duration > this.getStatusThresholdInMs()) StatusLogger.log(); } @@ -409,14 +411,7 @@ private static long getFieldValue(Field field, boolean isAtomicLong) public void setGcWarnThresholdInMs(long threshold) { - try - { - DatabaseDescriptor.setGCWarnThreshold((int)threshold); - } - catch (Throwable t) - { - throw new IllegalArgumentException(t.getMessage()); - } + DatabaseDescriptor.setGCWarnThreshold((int)threshold); } public long getGcWarnThresholdInMs() @@ -426,14 +421,7 @@ public long getGcWarnThresholdInMs() public void setGcLogThresholdInMs(long threshold) { - try - { - DatabaseDescriptor.setGCLogThreshold((int) threshold); - } - catch (Throwable t) - { - throw new IllegalArgumentException(t.getMessage()); - } + DatabaseDescriptor.setGCLogThreshold((int) threshold); } public int getGcConcurrentPhaseWarnThresholdInMs() @@ -443,14 +431,7 @@ public int getGcConcurrentPhaseWarnThresholdInMs() public void setGcConcurrentPhaseWarnThresholdInMs(int threshold) { - try - { - DatabaseDescriptor.setGCConcurrentPhaseWarnThreshold(threshold); - } - catch (Throwable t) - { - throw new IllegalArgumentException(t.getMessage()); - } + DatabaseDescriptor.setGCConcurrentPhaseWarnThreshold(threshold); } public int getGcConcurrentPhaseLogThresholdInMs() @@ -460,14 +441,7 @@ public int getGcConcurrentPhaseLogThresholdInMs() public void setGcConcurrentPhaseLogThresholdInMs(int threshold) { - try - { - DatabaseDescriptor.setGCConcurrentPhaseLogThreshold(threshold); - } - catch (Throwable t) - { - throw new IllegalArgumentException(t.getMessage()); - } + DatabaseDescriptor.setGCConcurrentPhaseLogThreshold(threshold); } public long getGcLogThresholdInMs()