apache · andygrove · Aug 22, 2025 · Aug 22, 2025 · Aug 22, 2025 · Oct 3, 2025
diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala
@@ -63,9 +63,11 @@ object CometConf extends ShimCometConf {
 
   def conf(key: String): ConfigBuilder = ConfigBuilder(key)
 
-  val COMET_EXEC_CONFIG_PREFIX = "spark.comet.exec";
+  val COMET_PREFIX = "spark.comet";
 
-  val COMET_EXPR_CONFIG_PREFIX = "spark.comet.expression";
+  val COMET_EXEC_CONFIG_PREFIX: String = s"$COMET_PREFIX.exec";
+
+  val COMET_EXPR_CONFIG_PREFIX: String = s"$COMET_PREFIX.expression";
 
   val COMET_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.enabled")
     .doc(
@@ -454,6 +456,13 @@ object CometConf extends ShimCometConf {
       .booleanConf
       .createWithDefault(false)
 
+  val COMET_DEBUG_MEMORY_ENABLED: ConfigEntry[Boolean] =
+    conf(s"$COMET_PREFIX.debug.memory")
+      .doc("When enabled, log all native memory pool interactions to stdout.")
+      .internal()
+      .booleanConf
+      .createWithDefault(false)
+
   val COMET_EXPLAIN_VERBOSE_ENABLED: ConfigEntry[Boolean] =
     conf("spark.comet.explain.verbose.enabled")
       .doc(

diff --git a/dev/scripts/mem_debug_chart.py b/dev/scripts/mem_debug_chart.py
@@ -0,0 +1,80 @@
+#!/usr/bin/python
+##############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+##############################################################################
+
+import argparse
+from pathlib import Path
+import pandas as pd
+import matplotlib.pyplot as plt
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("csv", help="CSV with columns: name,size")
+    ap.add_argument("--instant", action="store_true",
+                    help="Plot per-step stacked values (not cumulative totals).")
+    ap.add_argument("--bar", action="store_true",
+                    help="Use stacked bars instead of stacked area.")
+    ap.add_argument("--title", default=None, help="Optional plot title.")
+    args = ap.parse_args()
+
+    path = Path(args.csv)
+    df = pd.read_csv(path)
+
+    # Validate + clean
+    need = {"name", "size"}
+    if not need.issubset(set(df.columns)):
+        raise SystemExit("CSV must have columns: name,size")
+
+    df["size"] = pd.to_numeric(df["size"], errors="coerce").fillna(0)
+
+    # Treat each row as the next time step: t = 1..N
+    df = df.reset_index(drop=True).assign(t=lambda d: d.index + 1)
+
+    # Build wide matrix: one column per name, one row per time step
+    # If multiple entries exist for the same (t, name), they’ll be summed.
+    wide = (
+        df.groupby(["t", "name"], as_index=False)["size"].sum()
+        .pivot(index="t", columns="name", values="size")
+        .fillna(0.0)
+        .sort_index()
+    )
+
+    # Running totals unless --instant specified
+    plot_data = wide if args.instant else wide.cumsum(axis=0)
+
+    # Plot
+    if args.bar:
+        ax = plot_data.plot(kind="bar", stacked=True, figsize=(12, 6), width=1.0)
+    else:
+        ax = plot_data.plot.area(stacked=True, figsize=(12, 6))
+
+    ax.set_xlabel("step")
+    ax.set_ylabel("size" if args.instant else "cumulative size")
+    ax.set_title(args.title or ("Stacked running totals by name" if not args.instant
+                                else "Stacked per-step values by name"))
+    ax.legend(title="name", bbox_to_anchor=(1.02, 1), loc="upper left")
+    plt.tight_layout()
+
+    out = path.with_suffix(".stacked.png" if args.instant else ".stacked_cumulative.png")
+    plt.savefig(out, dpi=150)
+    print(f"Saved plot to {out}")
+    plt.show()
+
+if __name__ == "__main__":
+    main()
diff --git a/dev/scripts/mem_debug_to_csv.py b/dev/scripts/mem_debug_to_csv.py
@@ -0,0 +1,69 @@
+#!/usr/bin/python
+##############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+##############################################################################
+
+import argparse
+import re
+import sys
+
+def main(file, task_filter):
+    # keep track of running total allocation per consumer
+    alloc = {}
+
+    # open file
+    with open(file) as f:
+        # iterate over lines in file
+        print("name,size")
+        for line in f:
+            # print(line, file=sys.stderr)
+
+            # example line: [Task 486] MemoryPool[HashJoinInput[6]].shrink(1000)
+            # parse consumer name
+            re_match = re.search('\[Task (.*)\] MemoryPool\[(.*)\]\.(.*)\((.*)\)', line, re.IGNORECASE)
+            if re_match:
+                try:
+                    task = int(re_match.group(1))
+                    if task != task_filter:
+                        continue
+
+                    consumer = re_match.group(2)
+                    method = re_match.group(3)
+                    size = int(re_match.group(4))
+                    if method == "try_grow":
+                        if "Err" in line:
+                            continue
+
+                    if alloc.get(consumer) is None:
+                        alloc[consumer] = size
+                    else:
+                        if method == "grow" or method == "try_grow":
+                            alloc[consumer] = alloc[consumer] + size
+                        elif method == "shrink":
+                            alloc[consumer] = alloc[consumer] - size
+                    print(consumer, ",", alloc[consumer])
+                except:
+                    print("error parsing", line, file=sys.stderr)
+
+
+if __name__ == "__main__":
+    ap = argparse.ArgumentParser(description="Generate CSV From memory debug output")
+    ap.add_argument("--task", default=None, help="Task ID.")
+    ap.add_argument("--file", default=None, help="Spark log containing memory debug output")
+    args = ap.parse_args()
+    main(args.file, int(args.task))
diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs
@@ -78,6 +78,11 @@ use crate::execution::spark_plan::SparkPlan;
 
 use crate::execution::tracing::{log_memory_usage, trace_begin, trace_end, with_trace};
 
+use crate::execution::memory_pools::logging_pool::LoggingPool;
+use crate::execution::spark_config::{
+    SparkConfig, COMET_DEBUG_ENABLED, COMET_DEBUG_MEMORY, COMET_EXPLAIN_NATIVE_ENABLED,
+    COMET_TRACING_ENABLED,
+};
 use datafusion_comet_proto::spark_operator::operator::OpStruct;
 use log::info;
 use once_cell::sync::Lazy;
@@ -167,12 +172,21 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan(
     memory_limit: jlong,
     memory_limit_per_task: jlong,
     task_attempt_id: jlong,
-    debug_native: jboolean,
-    explain_native: jboolean,
-    tracing_enabled: jboolean,
 ) -> jlong {
     try_unwrap_or_throw(&e, |mut env| {
-        with_trace("createPlan", tracing_enabled != JNI_FALSE, || {
+        // Deserialize Spark configs
+        let array = unsafe { JPrimitiveArray::from_raw(serialized_spark_configs) };
+        let bytes = env.convert_byte_array(array)?;
+        let spark_configs = serde::deserialize_config(bytes.as_slice())?;
+        let spark_config: HashMap<String, String> = spark_configs.entries.into_iter().collect();
+
+        // Access Comet configs
+        let debug_native = spark_config.get_bool(COMET_DEBUG_ENABLED);
+        let explain_native = spark_config.get_bool(COMET_EXPLAIN_NATIVE_ENABLED);
+        let tracing_enabled = spark_config.get_bool(COMET_TRACING_ENABLED);
+        let logging_memory_pool = spark_config.get_bool(COMET_DEBUG_MEMORY);
+
+        with_trace("createPlan", tracing_enabled, || {
             // Init JVM classes
             JVMClasses::init(&mut env);
 
@@ -183,15 +197,6 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan(
             let bytes = env.convert_byte_array(array)?;
             let spark_plan = serde::deserialize_op(bytes.as_slice())?;
 
-            // Deserialize Spark configs
-            let array = unsafe { JPrimitiveArray::from_raw(serialized_spark_configs) };
-            let bytes = env.convert_byte_array(array)?;
-            let spark_configs = serde::deserialize_config(bytes.as_slice())?;
-
-            // Convert Spark configs to HashMap
-            let _spark_config_map: HashMap<String, String> =
-                spark_configs.entries.into_iter().collect();
-
             let metrics = Arc::new(jni_new_global_ref!(env, metrics_node)?);
 
             // Get the global references of input sources
@@ -218,6 +223,12 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan(
             let memory_pool =
                 create_memory_pool(&memory_pool_config, task_memory_manager, task_attempt_id);
 
+            let memory_pool = if logging_memory_pool {
+                Arc::new(LoggingPool::new(task_attempt_id as u64, memory_pool))
+            } else {
+                memory_pool
+            };
+
             // Get local directories for storing spill files
             let local_dirs_array = JObjectArray::from_raw(local_dirs);
             let num_local_dirs = env.get_array_length(&local_dirs_array)?;
@@ -256,10 +267,10 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan(
                 metrics_last_update_time: Instant::now(),
                 plan_creation_time,
                 session_ctx: Arc::new(session),
-                debug_native: debug_native == 1,
-                explain_native: explain_native == 1,
+                debug_native,
+                explain_native,
                 memory_pool_config,
-                tracing_enabled: tracing_enabled != JNI_FALSE,
+                tracing_enabled,
             });
 
             Ok(Box::into_raw(exec_context) as i64)

diff --git a/native/core/src/execution/memory_pools/logging_pool.rs b/native/core/src/execution/memory_pools/logging_pool.rs
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::execution::memory_pool::{MemoryPool, MemoryReservation};
+use log::info;
+use std::sync::Arc;
+
+#[derive(Debug)]
+pub(crate) struct LoggingPool {
+    task_attempt_id: u64,
+    pool: Arc<dyn MemoryPool>,
+}
+
+impl LoggingPool {
+    pub fn new(task_attempt_id: u64, pool: Arc<dyn MemoryPool>) -> Self {
+        Self {
+            task_attempt_id,
+            pool,
+        }
+    }
+}
+
+impl MemoryPool for LoggingPool {
+    fn grow(&self, reservation: &MemoryReservation, additional: usize) {
+        info!(
+            "[Task {}] MemoryPool[{}].grow({})",
+            self.task_attempt_id,
+            reservation.consumer().name(),
+            additional
+        );
+        self.pool.grow(reservation, additional);
+    }
+
+    fn shrink(&self, reservation: &MemoryReservation, shrink: usize) {
+        info!(
+            "[Task {}] MemoryPool[{}].shrink({})",
+            self.task_attempt_id,
+            reservation.consumer().name(),
+            shrink
+        );
+        self.pool.shrink(reservation, shrink);
+    }
+
+    fn try_grow(
+        &self,
+        reservation: &MemoryReservation,
+        additional: usize,
+    ) -> datafusion::common::Result<()> {
+        match self.pool.try_grow(reservation, additional) {
+            Ok(_) => {
+                info!(
+                    "[Task {}] MemoryPool[{}].try_grow({}) returning Ok",
+                    self.task_attempt_id,
+                    reservation.consumer().name(),
+                    additional
+                );
+                Ok(())
+            }
+            Err(e) => {
+                info!(
+                    "[Task {}] MemoryPool[{}].try_grow({}) returning Err: {e:?}",
+                    self.task_attempt_id,
+                    reservation.consumer().name(),
+                    additional
+                );
+                Err(e)
+            }
+        }
+    }
+
+    fn reserved(&self) -> usize {
+        self.pool.reserved()
+    }
+}
diff --git a/native/core/src/execution/memory_pools/mod.rs b/native/core/src/execution/memory_pools/mod.rs
@@ -17,6 +17,7 @@
 
 mod config;
 mod fair_pool;
+pub mod logging_pool;
 mod task_shared;
 mod unified_pool;
 

diff --git a/native/core/src/execution/mod.rs b/native/core/src/execution/mod.rs
@@ -27,6 +27,7 @@ pub(crate) mod sort;
 pub(crate) mod spark_plan;
 pub use datafusion_comet_spark_expr::timezone;
 mod memory_pools;
+pub(crate) mod spark_config;
 pub(crate) mod tracing;
 pub(crate) mod utils;