From f09e4922f22ecdf53914da4db5a7b8db150ee64a Mon Sep 17 00:00:00 2001
From: Yoav Gross <yoav@starkware.co>
Date: Thu, 21 Aug 2025 16:02:44 +0300
Subject: [PATCH] committer: add cli for committer benchmark

---
 Cargo.lock                                    |  37 ++++
 Cargo.toml                                    |   3 +
 commitlint.config.js                          |   1 +
 .../src/block_committer/input.rs              |   2 +-
 .../block_committer/state_diff_generator.rs   |   2 +-
 .../src/forest/filled_forest.rs               |  16 +-
 .../src/tracing_utils.rs                      |  16 ++
 crates/starknet_committer_cli/Cargo.toml      |  23 +++
 crates/starknet_committer_cli/src/commands.rs | 190 ++++++++++++++++++
 crates/starknet_committer_cli/src/lib.rs      |   1 +
 crates/starknet_committer_cli/src/main.rs     |  52 +++++
 workspace_tests/package_integrity_test.rs     |   3 +-
 12 files changed, 338 insertions(+), 8 deletions(-)
 create mode 100644 crates/starknet_committer_cli/Cargo.toml
 create mode 100644 crates/starknet_committer_cli/src/commands.rs
 create mode 100644 crates/starknet_committer_cli/src/lib.rs
 create mode 100644 crates/starknet_committer_cli/src/main.rs

diff --git a/Cargo.lock b/Cargo.lock
index 9f66c7cfacf..aa2a02b32d5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4836,6 +4836,27 @@ dependencies = [
  "typenum",
 ]
 
+[[package]]
+name = "csv"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf"
+dependencies = [
+ "csv-core",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "ctr"
 version = "0.9.2"
@@ -12262,6 +12283,22 @@ dependencies = [
  "tracing-subscriber",
 ]
 
+[[package]]
+name = "starknet_committer_cli"
+version = "0.0.0"
+dependencies = [
+ "clap",
+ "csv",
+ "rand 0.8.5",
+ "starknet_committer",
+ "starknet_committer_and_os_cli",
+ "starknet_patricia",
+ "starknet_patricia_storage",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+]
+
 [[package]]
 name = "starknet_os"
 version = "0.0.0"
diff --git a/Cargo.toml b/Cargo.toml
index 22be944024e..2cff2837bd7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -81,6 +81,7 @@ members = [
   "crates/starknet_api",
   "crates/starknet_committer",
   "crates/starknet_committer_and_os_cli",
+  "crates/starknet_committer_cli",
   "crates/starknet_os",
   "crates/starknet_os_flow_tests",
   "crates/starknet_patricia",
@@ -201,6 +202,7 @@ clap = "4.5.4"
 colored = "3"
 const_format = "0.2.30"
 criterion = "0.5.1"
+csv = "1.3.1"
 dashmap = "6.1.0"
 deadqueue = "0.2.4"
 defaultmap = "0.5.0"
@@ -311,6 +313,7 @@ starknet-types-core = "0.1.8"
 starknet_api = { path = "crates/starknet_api", version = "0.0.0" }
 starknet_committer.path = "crates/starknet_committer"
 starknet_committer_and_os_cli.path = "crates/starknet_committer_and_os_cli"
+starknet_committer_cli.path = "crates/starknet_committer_cli"
 starknet_os.path = "crates/starknet_os"
 starknet_os_flow_tests.path = "crates/starknet_os_flow_tests"
 starknet_patricia.path = "crates/starknet_patricia"
diff --git a/commitlint.config.js b/commitlint.config.js
index ca72c4035bb..b2a131a2fc9 100644
--- a/commitlint.config.js
+++ b/commitlint.config.js
@@ -80,6 +80,7 @@ const AllowedScopes = ['apollo_batcher',
     'signature',
     'starknet_api',
     'starknet_committer',
+    'starknet_committer_cli',
     'starknet_committer_and_os_cli',
     'starknet_os',
     'starknet_os_flow_tests',
diff --git a/crates/starknet_committer/src/block_committer/input.rs b/crates/starknet_committer/src/block_committer/input.rs
index 94aefe73aaa..b94b44d6b97 100644
--- a/crates/starknet_committer/src/block_committer/input.rs
+++ b/crates/starknet_committer/src/block_committer/input.rs
@@ -101,7 +101,7 @@ impl Default for ConfigImpl {
     }
 }
 
-#[derive(Debug, Eq, PartialEq)]
+#[derive(Debug, Default, Eq, PartialEq)]
 pub struct Input<C: Config> {
     /// All relevant information for the state diff commitment.
     pub state_diff: StateDiff,
diff --git a/crates/starknet_committer/src/block_committer/state_diff_generator.rs b/crates/starknet_committer/src/block_committer/state_diff_generator.rs
index fcb8facb362..404cb61080e 100644
--- a/crates/starknet_committer/src/block_committer/state_diff_generator.rs
+++ b/crates/starknet_committer/src/block_committer/state_diff_generator.rs
@@ -11,7 +11,7 @@ use crate::block_committer::random_structs::RandomValue;
 #[path = "state_diff_generator_test.rs"]
 pub mod state_diff_generator_test;
 
-pub(crate) const RANDOM_STATE_DIFF_CONTRACT_ADDRESS: u32 = 500_u32;
+pub const RANDOM_STATE_DIFF_CONTRACT_ADDRESS: u32 = 500_u32;
 pub(crate) const N_STORAGE_UPDATES: usize = 1000_usize;
 
 pub fn generate_random_state_diff<R: Rng>(rng: &mut R) -> StateDiff {
diff --git a/crates/starknet_committer/src/forest/filled_forest.rs b/crates/starknet_committer/src/forest/filled_forest.rs
index aee958db88c..ee6afec2baf 100644
--- a/crates/starknet_committer/src/forest/filled_forest.rs
+++ b/crates/starknet_committer/src/forest/filled_forest.rs
@@ -6,7 +6,7 @@ use starknet_patricia::patricia_merkle_tree::filled_tree::tree::FilledTree;
 use starknet_patricia::patricia_merkle_tree::node_data::leaf::LeafModifications;
 use starknet_patricia::patricia_merkle_tree::types::NodeIndex;
 use starknet_patricia::patricia_merkle_tree::updated_skeleton_tree::tree::UpdatedSkeletonTreeImpl;
-use starknet_patricia_storage::storage_trait::Storage;
+use starknet_patricia_storage::storage_trait::{DbHashMap, Storage};
 use tracing::info;
 
 use crate::block_committer::input::{
@@ -32,9 +32,11 @@ pub struct FilledForest {
 }
 
 impl FilledForest {
-    pub fn write_to_storage(&self, storage: &mut impl Storage) {
+    /// Writes the node serialization of the filled trees to storage. Returns the number of new
+    /// objects written to storage.
+    pub fn write_to_storage(&self, storage: &mut impl Storage) -> usize {
         // Serialize all trees to one hash map.
-        let new_db_objects = self
+        let new_db_objects: DbHashMap = self
             .storage_tries
             .values()
             .flat_map(|tree| tree.serialize().into_iter())
@@ -42,8 +44,12 @@ impl FilledForest {
             .chain(self.classes_trie.serialize())
             .collect();
 
-        // Store the new hash map
-        storage.mset(new_db_objects).expect("Write to storage failed");
+        // Store the new hash map.
+        let n_new_facts = new_db_objects.len();
+        storage
+            .mset(new_db_objects)
+            .unwrap_or_else(|_| panic!("Write of {n_new_facts} new facts to storage failed"));
+        n_new_facts
     }
 
     pub fn get_contract_root_hash(&self) -> HashOutput {
diff --git a/crates/starknet_committer_and_os_cli/src/tracing_utils.rs b/crates/starknet_committer_and_os_cli/src/tracing_utils.rs
index 1c2f5b4ff45..104ace35aab 100644
--- a/crates/starknet_committer_and_os_cli/src/tracing_utils.rs
+++ b/crates/starknet_committer_and_os_cli/src/tracing_utils.rs
@@ -1,4 +1,5 @@
 use tracing::level_filters::LevelFilter;
+use tracing::Level;
 use tracing_subscriber::prelude::*;
 use tracing_subscriber::reload::Handle;
 use tracing_subscriber::{filter, fmt, reload, Registry};
@@ -15,3 +16,18 @@ pub fn configure_tracing() -> Handle<LevelFilter, Registry> {
     tracing_subscriber::registry().with(global_filter).with(layer).init();
     global_filter_handle
 }
+
+/// Change the given log handle to the given log level.
+pub fn modify_log_level(log_level: String, log_filter_handle: Handle<LevelFilter, Registry>) {
+    let level = match log_level.to_lowercase().as_str() {
+        "error" => Level::ERROR,
+        "warn" => Level::WARN,
+        "info" => Level::INFO,
+        "debug" => Level::DEBUG,
+        "trace" => Level::TRACE,
+        _ => Level::INFO,
+    };
+    log_filter_handle
+        .modify(|filter| *filter = level.into())
+        .expect("Failed to set the log level.");
+}
diff --git a/crates/starknet_committer_cli/Cargo.toml b/crates/starknet_committer_cli/Cargo.toml
new file mode 100644
index 00000000000..f5e848f46da
--- /dev/null
+++ b/crates/starknet_committer_cli/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "starknet_committer_cli"
+version.workspace = true
+edition.workspace = true
+repository.workspace = true
+license.workspace = true
+description = "CLI for the committer storage benchmark."
+
+[lints]
+workspace = true
+
+[dependencies]
+clap = { workspace = true, features = ["cargo", "derive"] }
+csv.workspace = true
+rand.workspace = true
+starknet_committer = { workspace = true, features = ["testing"] }
+# TODO(Tzahi): Remove once tracing is moved to a common location.
+starknet_committer_and_os_cli.workspace = true
+starknet_patricia.workspace = true
+starknet_patricia_storage.workspace = true
+tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
+tracing.workspace = true
+tracing-subscriber.workspace = true
diff --git a/crates/starknet_committer_cli/src/commands.rs b/crates/starknet_committer_cli/src/commands.rs
new file mode 100644
index 00000000000..0e05fdb034b
--- /dev/null
+++ b/crates/starknet_committer_cli/src/commands.rs
@@ -0,0 +1,190 @@
+use std::fs::{self, File};
+use std::time::Instant;
+
+use csv::Writer;
+use rand::rngs::SmallRng;
+use rand::SeedableRng;
+use starknet_committer::block_committer::commit::commit_block;
+use starknet_committer::block_committer::input::{ConfigImpl, Input};
+use starknet_committer::block_committer::state_diff_generator::generate_random_state_diff;
+use starknet_patricia::hash::hash_trait::HashOutput;
+use starknet_patricia_storage::map_storage::MapStorage;
+use tracing::info;
+
+pub type InputImpl = Input<ConfigImpl>;
+
+struct TimeMeasurement {
+    timer: Option<Instant>,
+    total_time: u128,             // Total duration of all blocks (milliseconds).
+    per_fact_durations: Vec<u64>, // Average duration (microseconds) per new fact in a block.
+    n_facts: Vec<usize>,
+    block_durations: Vec<u64>, // Duration of a block (milliseconds).
+    facts_in_db: Vec<usize>,   // Number of facts in the DB prior to the current block.
+    total_facts: usize,
+}
+
+impl TimeMeasurement {
+    fn new(n_iterations: usize) -> Self {
+        Self {
+            timer: None,
+            total_time: 0,
+            per_fact_durations: Vec::with_capacity(n_iterations),
+            n_facts: Vec::with_capacity(n_iterations),
+            block_durations: Vec::with_capacity(n_iterations),
+            facts_in_db: Vec::with_capacity(n_iterations),
+            total_facts: 0,
+        }
+    }
+
+    fn start_measurement(&mut self) {
+        self.timer = Some(Instant::now());
+    }
+
+    fn stop_measurement(&mut self, facts_count: usize) {
+        let duration =
+            self.timer.expect("stop_measurement called before start_measurement").elapsed();
+        info!(
+            "Time elapsed for iteration {}: {} milliseconds",
+            self.n_results(),
+            duration.as_millis()
+        );
+        let millis = duration.as_millis();
+        self.total_time += millis;
+        #[allow(clippy::as_conversions)]
+        self.per_fact_durations
+            .push(duration.div_f32(facts_count as f32).as_micros().try_into().unwrap());
+        self.block_durations.push(millis.try_into().unwrap());
+        self.n_facts.push(facts_count);
+        self.facts_in_db.push(self.total_facts);
+        self.total_facts += facts_count;
+    }
+
+    fn n_results(&self) -> usize {
+        self.block_durations.len()
+    }
+
+    /// Returns the average time per block (milliseconds).
+    fn block_average_time(&self) -> f64 {
+        #[allow(clippy::as_conversions)]
+        {
+            self.total_time as f64 / self.n_results() as f64
+        }
+    }
+
+    /// Returns the average time per fact over a window of `window_size` blocks (microseconds).
+    fn average_window_time(&self, window_size: usize) -> Vec<f64> {
+        let mut averages = Vec::new(); // In milliseconds.
+        // Takes only the full windows, so if the last window is smaller than `window_size`, it is
+        // ignored.
+        let n_windows = self.n_results() / window_size;
+        for i in 0..n_windows {
+            let window_start = i * window_size;
+            let sum: u64 =
+                self.block_durations[window_start..window_start + window_size].iter().sum();
+            let sum_of_facts: usize =
+                self.n_facts[window_start..window_start + window_size].iter().sum();
+            #[allow(clippy::as_conversions)]
+            averages.push(1000.0 * sum as f64 / sum_of_facts as f64);
+        }
+        averages
+    }
+
+    fn pretty_print(&self, window_size: usize) {
+        if self.n_results() == 0 {
+            println!("No measurements were taken.");
+            return;
+        }
+
+        println!(
+            "Total time: {} milliseconds for {} iterations.",
+            self.total_time,
+            self.n_results()
+        );
+        println!(
+            "Average block time: {:.2} milliseconds.
+        ",
+            self.block_average_time()
+        );
+
+        println!("Average time per window of {window_size} iterations:");
+        let means = self.average_window_time(window_size);
+        let max = means.iter().cloned().fold(f64::MIN, f64::max);
+        // Print a graph visualization of block times.
+        for (i, fact_duration) in means.iter().enumerate() {
+            let norm = fact_duration / max;
+            #[allow(clippy::as_conversions)]
+            let width = (norm * 40.0).round() as usize; // up tp 40 characters wide
+            let bar = "█".repeat(width.max(1));
+            println!("win {i:>4}: {fact_duration:>8.4} microsecond / fact | {bar}");
+        }
+    }
+
+    fn to_csv(&self, path: &str, output_dir: &str) {
+        fs::create_dir_all(output_dir).expect("Failed to create output directory.");
+        let file =
+            File::create(format!("{output_dir}/{path}")).expect("Failed to create CSV file.");
+        let mut wtr = Writer::from_writer(file);
+        wtr.write_record([
+            "block_number",
+            "n_facts",
+            "facts_in_db",
+            "time_per_fact_micros",
+            "block_duration_millis",
+        ])
+        .expect("Failed to write CSV header.");
+        for (i, (((&per_fact, &n_facts), &duration), &facts_in_db)) in self
+            .per_fact_durations
+            .iter()
+            .zip(self.n_facts.iter())
+            .zip(self.block_durations.iter())
+            .zip(self.facts_in_db.iter())
+            .enumerate()
+        {
+            wtr.write_record(&[
+                i.to_string(),
+                n_facts.to_string(),
+                facts_in_db.to_string(),
+                per_fact.to_string(),
+                duration.to_string(),
+            ])
+            .expect("Failed to write CSV record.");
+        }
+        wtr.flush().expect("Failed to flush CSV writer.");
+    }
+}
+
+/// Runs the committer on n_iterations random generated blocks.
+/// Prints the time measurement to the console and saves statistics to a CSV file in the given
+/// output directory.
+pub async fn run_storage_benchmark(seed: u64, n_iterations: usize, output_dir: &str) {
+    let mut rng = SmallRng::seed_from_u64(seed);
+    let mut time_measurement = TimeMeasurement::new(n_iterations);
+
+    let mut storage = MapStorage::default();
+    let mut contracts_trie_root_hash = HashOutput::default();
+    let mut classes_trie_root_hash = HashOutput::default();
+
+    for i in 0..n_iterations {
+        info!("Committer storage benchmark iteration {}/{}", i + 1, n_iterations);
+        let input = InputImpl {
+            state_diff: generate_random_state_diff(&mut rng),
+            contracts_trie_root_hash,
+            classes_trie_root_hash,
+            config: ConfigImpl::default(),
+        };
+
+        time_measurement.start_measurement();
+        let filled_forest =
+            commit_block(input, &mut storage).await.expect("Failed to commit the given block.");
+        // TODO(Tzahi): measure the computation above and the fact writing below separately.
+        let n_new_facts = filled_forest.write_to_storage(&mut storage);
+
+        time_measurement.stop_measurement(n_new_facts);
+
+        contracts_trie_root_hash = filled_forest.get_contract_root_hash();
+        classes_trie_root_hash = filled_forest.get_compiled_class_root_hash();
+    }
+
+    time_measurement.pretty_print(50);
+    time_measurement.to_csv(&format!("{n_iterations}.csv"), output_dir);
+}
diff --git a/crates/starknet_committer_cli/src/lib.rs b/crates/starknet_committer_cli/src/lib.rs
new file mode 100644
index 00000000000..82b6da3c0a8
--- /dev/null
+++ b/crates/starknet_committer_cli/src/lib.rs
@@ -0,0 +1 @@
+pub mod commands;
diff --git a/crates/starknet_committer_cli/src/main.rs b/crates/starknet_committer_cli/src/main.rs
new file mode 100644
index 00000000000..feb25201f4b
--- /dev/null
+++ b/crates/starknet_committer_cli/src/main.rs
@@ -0,0 +1,52 @@
+use clap::{Args, Parser, Subcommand};
+use starknet_committer_and_os_cli::tracing_utils::{configure_tracing, modify_log_level};
+use starknet_committer_cli::commands::run_storage_benchmark;
+use tracing::info;
+use tracing::level_filters::LevelFilter;
+use tracing_subscriber::reload::Handle;
+use tracing_subscriber::Registry;
+
+#[derive(Parser, Debug)]
+pub struct CommitterCliCommand {
+    #[clap(subcommand)]
+    command: Command,
+}
+
+#[derive(Debug, Args)]
+struct StorageArgs {
+    /// Seed for the random number generator.
+    #[clap(short = 's', long, default_value = "42")]
+    seed: u64,
+    /// Number of iterations to run the benchmark.
+    #[clap(default_value = "1000")]
+    n_iterations: usize,
+    #[clap(long, default_value = "warn")]
+    log_level: String,
+    #[clap(long, default_value = "/tmp/committer_storage_benchmark")]
+    output_dir: String,
+}
+
+#[derive(Debug, Subcommand)]
+enum Command {
+    StorageBenchmark(StorageArgs),
+}
+
+pub async fn run_committer_cli(
+    committer_command: CommitterCliCommand,
+    log_filter_handle: Handle<LevelFilter, Registry>,
+) {
+    info!("Starting committer-cli with command: \n{:?}", committer_command);
+    match committer_command.command {
+        Command::StorageBenchmark(StorageArgs { seed, n_iterations, log_level, output_dir }) => {
+            modify_log_level(log_level, log_filter_handle);
+            run_storage_benchmark(seed, n_iterations, &output_dir).await;
+        }
+    }
+}
+
+#[tokio::main]
+async fn main() {
+    let log_filter_handle = configure_tracing();
+    let committer_command = CommitterCliCommand::parse();
+    run_committer_cli(committer_command, log_filter_handle).await;
+}
diff --git a/workspace_tests/package_integrity_test.rs b/workspace_tests/package_integrity_test.rs
index 246118ac4ca..12f0eae6328 100644
--- a/workspace_tests/package_integrity_test.rs
+++ b/workspace_tests/package_integrity_test.rs
@@ -4,7 +4,7 @@ use toml_test_utils::{DependencyValue, PackageEntryValue, MEMBER_TOMLS};
 
 /// Hard-coded list of crates that are allowed to use test code in their (non-dev) dependencies.
 /// Should only contain test-related crates.
-static CRATES_ALLOWED_TO_USE_TESTING_FEATURE: [&str; 6] = [
+static CRATES_ALLOWED_TO_USE_TESTING_FEATURE: [&str; 7] = [
     "apollo_integration_tests",
     "apollo_test_utils",
     "blockifier_test_utils",
@@ -14,6 +14,7 @@ static CRATES_ALLOWED_TO_USE_TESTING_FEATURE: [&str; 6] = [
     // TODO(Dori): Consider splitting the build of the CLI crate to a test build and a production
     //   build.
     "starknet_committer_and_os_cli",
+    "starknet_committer_cli",
 ];
 
 /// Tests that no member crate has itself in it's dependency tree - a crate cannot be published