diff --git a/jolt-core/benches/commit.rs b/jolt-core/benches/commit.rs index 84bbc480c..046935f9f 100644 --- a/jolt-core/benches/commit.rs +++ b/jolt-core/benches/commit.rs @@ -9,7 +9,7 @@ use rand_core::{RngCore, SeedableRng}; // use rayon::prelude::*; fn benchmark_dory_dense(c: &mut Criterion, name: &str, k: usize, t: usize) { - let globals = DoryGlobals::initialize_context(k, t, DoryContext::Main); + let globals = DoryGlobals::initialize_context(k, t, DoryContext::Main, None); let setup = ::setup_prover(k.log_2() + t.log_2()); let mut rng = ChaCha20Rng::seed_from_u64(111111u64); @@ -26,7 +26,7 @@ fn benchmark_dory_dense(c: &mut Criterion, name: &str, k: usize, t: usize) { } fn benchmark_dory_one_hot_batch(c: &mut Criterion, name: &str, k: usize, t: usize) { - let globals = DoryGlobals::initialize_context(k, t, DoryContext::Main); + let globals = DoryGlobals::initialize_context(k, t, DoryContext::Main, None); let setup = ::setup_prover(k.log_2() + t.log_2()); let mut rng = ChaCha20Rng::seed_from_u64(111111u64); @@ -52,7 +52,7 @@ fn benchmark_dory_one_hot_batch(c: &mut Criterion, name: &str, k: usize, t: usiz } fn benchmark_dory_mixed_batch(c: &mut Criterion, name: &str, k: usize, t: usize) { - let globals = DoryGlobals::initialize_context(k, t, DoryContext::Main); + let globals = DoryGlobals::initialize_context(k, t, DoryContext::Main, None); let setup = ::setup_prover(k.log_2() + t.log_2()); let mut rng = ChaCha20Rng::seed_from_u64(111111u64); diff --git a/jolt-core/src/poly/commitment/dory/commitment_scheme.rs b/jolt-core/src/poly/commitment/dory/commitment_scheme.rs index 9b1c04e20..d9b890c6e 100644 --- a/jolt-core/src/poly/commitment/dory/commitment_scheme.rs +++ b/jolt-core/src/poly/commitment/dory/commitment_scheme.rs @@ -1,6 +1,6 @@ //! Dory polynomial commitment scheme implementation -use super::dory_globals::DoryGlobals; +use super::dory_globals::{DoryGlobals, DoryLayout}; use super::jolt_dory_routines::{JoltG1Routines, JoltG2Routines}; use super::wrappers::{ jolt_to_ark, ArkDoryProof, ArkFr, ArkG1, ArkGT, ArkworksProverSetup, ArkworksVerifierSetup, @@ -118,10 +118,12 @@ impl CommitmentScheme for DoryCommitmentScheme { let sigma = num_cols.log_2(); let nu = num_rows.log_2(); + let reordered_point = reorder_opening_point_for_layout::(opening_point); + // Dory uses the opposite endian-ness as Jolt - let ark_point: Vec = opening_point + let ark_point: Vec = reordered_point .iter() - .rev() // Reverse the order for Dory + .rev() // Reverse the order for Dory .map(|p| { let f_val: ark_bn254::Fr = (*p).into(); jolt_to_ark(&f_val) @@ -152,10 +154,12 @@ impl CommitmentScheme for DoryCommitmentScheme { ) -> Result<(), ProofVerifyError> { let _span = trace_span!("DoryCommitmentScheme::verify").entered(); + let reordered_point = reorder_opening_point_for_layout::(opening_point); + // Dory uses the opposite endian-ness as Jolt - let ark_point: Vec = opening_point + let ark_point: Vec = reordered_point .iter() - .rev() // Reverse the order for Dory + .rev() .map(|p| { let f_val: ark_bn254::Fr = (*p).into(); jolt_to_ark(&f_val) @@ -341,3 +345,24 @@ impl StreamingCommitmentScheme for DoryCommitmentScheme { } } } + +/// Reorders opening_point for AddressMajor layout. +/// +/// For AddressMajor layout, reorders opening_point from [r_address, r_cycle] to [r_cycle, r_address]. +/// This ensures that after Dory's reversal and splitting: +/// - Column (right) vector gets address variables (matching AddressMajor column indexing) +/// - Row (left) vector gets cycle variables (matching AddressMajor row indexing) +/// +/// For CycleMajor layout, returns the point unchanged. +fn reorder_opening_point_for_layout( + opening_point: &[F::Challenge], +) -> Vec { + if DoryGlobals::get_layout() == DoryLayout::AddressMajor { + let log_T = DoryGlobals::get_T().log_2(); + let log_K = opening_point.len().saturating_sub(log_T); + let (r_address, r_cycle) = opening_point.split_at(log_K); + [r_cycle, r_address].concat() + } else { + opening_point.to_vec() + } +} diff --git a/jolt-core/src/poly/commitment/dory/dory_globals.rs b/jolt-core/src/poly/commitment/dory/dory_globals.rs index 4e142025c..c4c2ebe42 100644 --- a/jolt-core/src/poly/commitment/dory/dory_globals.rs +++ b/jolt-core/src/poly/commitment/dory/dory_globals.rs @@ -1,12 +1,141 @@ //! Global state management for Dory parameters use crate::utils::math::Math; +use allocative::Allocative; use dory::backends::arkworks::{init_cache, is_cached, ArkG1, ArkG2}; use std::sync::{ atomic::{AtomicU8, Ordering}, OnceLock, }; +/// Dory matrix layout for OneHot polynomials. +/// +/// This enum controls how polynomial coefficients (indexed by address k and cycle t) +/// are mapped to matrix positions for Dory commitment. +/// +/// For a OneHot polynomial with K addresses and T cycles: +/// - Total coefficients = K * T +/// - The Dory matrix shape is chosen by [`DoryGlobals::calculate_dimensions`] as either: +/// - square: `num_rows == num_cols` when `log2(K*T)` is even, or +/// - almost-square: `num_cols == 2*num_rows` when `log2(K*T)` is odd. +/// +/// The layout determines the mapping from (address, cycle) to matrix (row, col). +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Allocative)] +pub enum DoryLayout { + /// Cycle-major layout + /// + /// Coefficients are ordered by address first, then by cycle within each address: + /// ```text + /// Memory: [a0_t0, a0_t1, ..., a0_tT-1, a1_t0, a1_t1, ..., a1_tT-1, ...] + /// └──── address 0 cycles ────┘ └──── address 1 cycles ────┘ + /// + /// global_index = address * T + cycle + /// ``` + /// + /// Matrix layout (K=4 addresses, T=4 cycles): + /// ```text + /// col0 col1 col2 col3 + /// ┌────────┬────────┬────────┬────────┐ + /// row0 │ a0,t0 │ a0,t1 │ a0,t2 │ a0,t3 │ ← All of address 0 + /// ├────────┼────────┼────────┼────────┤ + /// row1 │ a1,t0 │ a1,t1 │ a1,t2 │ a1,t3 │ ← All of address 1 + /// ├────────┼────────┼────────┼────────┤ + /// row2 │ a2,t0 │ a2,t1 │ a2,t2 │ a2,t3 │ ← All of address 2 + /// ├────────┼────────┼────────┼────────┤ + /// row3 │ a3,t0 │ a3,t1 │ a3,t2 │ a3,t3 │ ← All of address 3 + /// └────────┴────────┴────────┴────────┘ + /// ``` + #[default] + CycleMajor, + + /// Address-major layout + /// + /// Coefficients are ordered by cycle first, then by address within each cycle: + /// ```text + /// Memory: [t0_a0, t0_a1, ..., t0_aK-1, t1_a0, t1_a1, ..., t1_aK-1, ...] + /// └──── cycle 0 addresses ───┘ └──── cycle 1 addresses ───┘ + /// + /// global_index = cycle * K + address + /// ``` + /// + /// Matrix layout (K=4 addresses, T=4 cycles): + /// ```text + /// col0 col1 col2 col3 + /// ┌────────┬────────┬────────┬────────┐ + /// row0 │ a0,t0 │ a1,t0 │ a2,t0 │ a3,t0 │ ← All of cycle 0 + /// ├────────┼────────┼────────┼────────┤ + /// row1 │ a0,t1 │ a1,t1 │ a2,t1 │ a3,t1 │ ← All of cycle 1 + /// ├────────┼────────┼────────┼────────┤ + /// row2 │ a0,t2 │ a1,t2 │ a2,t2 │ a3,t2 │ ← All of cycle 2 + /// ├────────┼────────┼────────┼────────┤ + /// row3 │ a0,t3 │ a1,t3 │ a2,t3 │ a3,t3 │ ← All of cycle 3 + /// └────────┴────────┴────────┴────────┘ + /// ``` + AddressMajor, +} + +impl DoryLayout { + /// Convert a (address, cycle) pair to a coefficient index. + /// + /// # Arguments + /// * `address` - The address index (0 to K-1) + /// * `cycle` - The cycle index (0 to T-1) + /// * `K` - Total number of addresses + /// * `T` - Total number of cycles + pub fn address_cycle_to_index( + &self, + address: usize, + cycle: usize, + K: usize, + T: usize, + ) -> usize { + match self { + DoryLayout::CycleMajor => address * T + cycle, + DoryLayout::AddressMajor => cycle * K + address, + } + } + + /// Convert a coefficient index to a (address, cycle) pair. + /// + /// # Arguments + /// * `index` - The linear coefficient index + /// * `K` - Total number of addresses + /// * `T` - Total number of cycles + pub fn index_to_address_cycle(&self, index: usize, K: usize, T: usize) -> (usize, usize) { + match self { + DoryLayout::CycleMajor => { + let address = index / T; + let cycle = index % T; + (address, cycle) + } + DoryLayout::AddressMajor => { + let cycle = index / K; + let address = index % K; + (address, cycle) + } + } + } +} + +impl From for DoryLayout { + fn from(value: u8) -> Self { + match value { + 0 => DoryLayout::CycleMajor, + 1 => DoryLayout::AddressMajor, + _ => panic!("Invalid DoryLayout value: {value}"), + } + } +} + +impl From for u8 { + fn from(layout: DoryLayout) -> Self { + match layout { + DoryLayout::CycleMajor => 0, + DoryLayout::AddressMajor => 1, + } + } +} + // Main polynomial globals static mut GLOBAL_T: OnceLock = OnceLock::new(); static mut MAX_NUM_ROWS: OnceLock = OnceLock::new(); @@ -25,6 +154,9 @@ static mut UNTRUSTED_ADVICE_NUM_COLUMNS: OnceLock = OnceLock::new(); // Context tracking: 0=Main, 1=TrustedAdvice, 2=UntrustedAdvice static CURRENT_CONTEXT: AtomicU8 = AtomicU8::new(0); +// Layout tracking: 0=CycleMajor, 1=AddressMajor +static CURRENT_LAYOUT: AtomicU8 = AtomicU8::new(0); + /// Dory commitment context - determines which set of global parameters to use #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum DoryContext { @@ -111,6 +243,55 @@ impl DoryGlobals { } } + /// Get the current Dory matrix layout + pub fn get_layout() -> DoryLayout { + CURRENT_LAYOUT.load(Ordering::SeqCst).into() + } + + /// Set the Dory matrix layout directly (test-only). + /// + /// In production code, prefer passing the layout to `initialize_context` instead. + #[cfg(test)] + pub fn set_layout(layout: DoryLayout) { + CURRENT_LAYOUT.store(layout as u8, Ordering::SeqCst); + } + + /// Returns the configured Dory matrix shape `(num_rows, num_cols)` for the current context. + pub fn matrix_shape() -> (usize, usize) { + (Self::get_max_num_rows(), Self::get_num_columns()) + } + + /// Returns the "K" used to initialize the *main* Dory matrix for OneHot polynomials. + /// + /// This is derived from the identity: + /// `K * T == num_rows * num_cols` (all values are powers of two in our usage). + pub fn k_from_matrix_shape() -> usize { + let (num_rows, num_cols) = Self::matrix_shape(); + let t = Self::get_T(); + debug_assert_eq!( + (num_rows * num_cols) % t, + 0, + "Invalid DoryGlobals: num_rows*num_cols must be divisible by T" + ); + (num_rows * num_cols) / t + } + + /// For `AddressMajor`, each Dory matrix row corresponds to this many cycles. + /// + /// Equivalent to `T / num_rows` and to `num_cols / K`. + pub fn address_major_cycles_per_row() -> usize { + let (num_rows, num_cols) = Self::matrix_shape(); + let k = Self::k_from_matrix_shape(); + debug_assert!(k > 0); + debug_assert_eq!(num_cols % k, 0, "Expected num_cols to be divisible by K"); + debug_assert_eq!( + Self::get_T() % num_rows, + 0, + "Expected T to be divisible by num_rows" + ); + num_cols / k + } + fn set_max_num_rows_for_context(max_num_rows: usize, context: DoryContext) { #[allow(static_mut_refs)] unsafe { @@ -234,18 +415,29 @@ impl DoryGlobals { /// * `K` - Maximum address space size (K in OneHot polynomials) /// * `T` - Maximum trace length (cycle count) /// * `context` - The Dory context to initialize (Main, TrustedAdvice, or UntrustedAdvice) + /// * `layout` - Optional layout for the Dory matrix. Only applies to Main context. + /// If `Some(layout)`, sets the layout. If `None`, leaves the existing layout + /// unchanged (defaults to `CycleMajor` after `reset()`). Ignored for advice contexts. /// /// The matrix dimensions are calculated to minimize padding: /// - If log2(K*T) is even: creates a square matrix /// - If log2(K*T) is odd: creates an almost-square matrix (columns = 2*rows) - pub fn initialize_context(K: usize, T: usize, context: DoryContext) -> Option<()> { + pub fn initialize_context( + K: usize, + T: usize, + context: DoryContext, + layout: Option, + ) -> Option<()> { let (num_columns, num_rows, t) = Self::calculate_dimensions(K, T); Self::set_num_columns_for_context(num_columns, context); Self::set_T_for_context(t, context); Self::set_max_num_rows_for_context(num_rows, context); - // For Main context, ensure subsequent uses of `get_*` read from it by default + // For Main context, set layout (if provided) and ensure subsequent uses of `get_*` read from it if context == DoryContext::Main { + if let Some(l) = layout { + CURRENT_LAYOUT.store(l as u8, Ordering::SeqCst); + } CURRENT_CONTEXT.store(DoryContext::Main as u8, Ordering::SeqCst); } @@ -262,6 +454,9 @@ impl DoryGlobals { let _ = MAX_NUM_ROWS.take(); let _ = NUM_COLUMNS.take(); + // Reset layout to default (CycleMajor) + CURRENT_LAYOUT.store(0, Ordering::SeqCst); + // Reset trusted advice globals let _ = TRUSTED_ADVICE_T.take(); let _ = TRUSTED_ADVICE_MAX_NUM_ROWS.take(); diff --git a/jolt-core/src/poly/commitment/dory/mod.rs b/jolt-core/src/poly/commitment/dory/mod.rs index 5846efa37..a9b785bbb 100644 --- a/jolt-core/src/poly/commitment/dory/mod.rs +++ b/jolt-core/src/poly/commitment/dory/mod.rs @@ -12,7 +12,7 @@ mod wrappers; mod tests; pub use commitment_scheme::DoryCommitmentScheme; -pub use dory_globals::{DoryContext, DoryGlobals}; +pub use dory_globals::{DoryContext, DoryGlobals, DoryLayout}; pub use jolt_dory_routines::{JoltG1Routines, JoltG2Routines}; pub use wrappers::{ ArkDoryProof, ArkFr, ArkG1, ArkG2, ArkGT, ArkworksProverSetup, ArkworksVerifierSetup, diff --git a/jolt-core/src/poly/commitment/dory/tests.rs b/jolt-core/src/poly/commitment/dory/tests.rs index 3553c1d86..c3cf91e89 100644 --- a/jolt-core/src/poly/commitment/dory/tests.rs +++ b/jolt-core/src/poly/commitment/dory/tests.rs @@ -65,7 +65,7 @@ mod tests { let num_coeffs = 1 << num_vars; // Dense polynomial: K = 1, T = num_coeffs - let _guard = DoryGlobals::initialize_context(1, num_coeffs, DoryContext::Main); + let _guard = DoryGlobals::initialize_context(1, num_coeffs, DoryContext::Main, None); let prover_setup = DoryCommitmentScheme::setup_prover(num_vars); let verifier_setup = DoryCommitmentScheme::setup_verifier(&prover_setup); @@ -242,7 +242,7 @@ mod tests { let num_coeffs = 1 << num_vars; // Dense polynomial: K = 1, T = num_coeffs - let _guard = DoryGlobals::initialize_context(1, num_coeffs, DoryContext::Main); + let _guard = DoryGlobals::initialize_context(1, num_coeffs, DoryContext::Main, None); let mut rng = thread_rng(); let coeffs: Vec = (0..num_coeffs).map(|_| Fr::rand(&mut rng)).collect(); @@ -383,10 +383,12 @@ mod tests { DoryGlobals::reset(); - let K = 8; - let T = 8; + // Use K=32, T=32 to ensure the test exercises both row and column variables + // in the Dory matrix (log2(32*32) = 10 variables, split as sigma=5, nu=5) + let K = 32; + let T = 32; - let _guard = DoryGlobals::initialize_context(K, T, DoryContext::Main); + let _guard = DoryGlobals::initialize_context(K, T, DoryContext::Main, None); let mut rng = thread_rng(); let nonzero_indices: Vec> = (0..T) @@ -447,11 +449,11 @@ mod tests { fn test_dory_homomorphic_combination() { DoryGlobals::reset(); - let num_vars = 8; + let num_vars = 10; let num_coeffs = 1 << num_vars; let num_polys = 5; - let _guard = DoryGlobals::initialize_context(1, num_coeffs, DoryContext::Main); + let _guard = DoryGlobals::initialize_context(1, num_coeffs, DoryContext::Main, None); let mut rng = thread_rng(); @@ -531,11 +533,11 @@ mod tests { fn test_dory_batch_commit_e2e() { DoryGlobals::reset(); - let num_vars = 8; + let num_vars = 10; let num_coeffs = 1 << num_vars; let num_polys = 5; - let _guard = DoryGlobals::initialize_context(1, num_coeffs, DoryContext::Main); + let _guard = DoryGlobals::initialize_context(1, num_coeffs, DoryContext::Main, None); let mut rng = thread_rng(); @@ -642,4 +644,298 @@ mod tests { "Verification should also succeed with direct commitment: {result2:?}" ); } + + #[test] + fn test_dory_layout_address_cycle_conversions() { + let K = 4; // 4 addresses + let T = 8; // 8 cycles + + // Test CycleMajor layout: index = address * T + cycle + let cycle_major = DoryLayout::CycleMajor; + + // Address 0: indices 0-7, Address 1: indices 8-15, etc. + assert_eq!(cycle_major.address_cycle_to_index(0, 0, K, T), 0); // addr 0, cycle 0 + assert_eq!(cycle_major.address_cycle_to_index(0, 1, K, T), 1); // addr 0, cycle 1 + assert_eq!(cycle_major.address_cycle_to_index(0, 7, K, T), 7); // addr 0, cycle 7 + assert_eq!(cycle_major.address_cycle_to_index(1, 0, K, T), 8); // addr 1, cycle 0 + assert_eq!(cycle_major.address_cycle_to_index(1, 1, K, T), 9); // addr 1, cycle 1 + assert_eq!(cycle_major.address_cycle_to_index(3, 7, K, T), 31); // addr 3, cycle 7 + + // Test reverse: index_to_address_cycle + assert_eq!(cycle_major.index_to_address_cycle(0, K, T), (0, 0)); + assert_eq!(cycle_major.index_to_address_cycle(1, K, T), (0, 1)); + assert_eq!(cycle_major.index_to_address_cycle(8, K, T), (1, 0)); + assert_eq!(cycle_major.index_to_address_cycle(31, K, T), (3, 7)); + + // Test AddressMajor layout: index = cycle * K + address + let addr_major = DoryLayout::AddressMajor; + + // Cycle 0: indices 0-3, Cycle 1: indices 4-7, etc. + assert_eq!(addr_major.address_cycle_to_index(0, 0, K, T), 0); // addr 0, cycle 0 + assert_eq!(addr_major.address_cycle_to_index(1, 0, K, T), 1); // addr 1, cycle 0 + assert_eq!(addr_major.address_cycle_to_index(3, 0, K, T), 3); // addr 3, cycle 0 + assert_eq!(addr_major.address_cycle_to_index(0, 1, K, T), 4); // addr 0, cycle 1 + assert_eq!(addr_major.address_cycle_to_index(1, 1, K, T), 5); // addr 1, cycle 1 + assert_eq!(addr_major.address_cycle_to_index(3, 7, K, T), 31); // addr 3, cycle 7 + + // Test reverse: index_to_address_cycle + assert_eq!(addr_major.index_to_address_cycle(0, K, T), (0, 0)); + assert_eq!(addr_major.index_to_address_cycle(1, K, T), (1, 0)); + assert_eq!(addr_major.index_to_address_cycle(4, K, T), (0, 1)); + assert_eq!(addr_major.index_to_address_cycle(31, K, T), (3, 7)); + + // Verify round-trip for both layouts + for addr in 0..K { + for cycle in 0..T { + let idx = cycle_major.address_cycle_to_index(addr, cycle, K, T); + assert_eq!(cycle_major.index_to_address_cycle(idx, K, T), (addr, cycle)); + + let idx = addr_major.address_cycle_to_index(addr, cycle, K, T); + assert_eq!(addr_major.index_to_address_cycle(idx, K, T), (addr, cycle)); + } + } + } + + #[test] + #[serial] + fn test_dory_layout_global_state() { + DoryGlobals::reset(); + + // Default should be CycleMajor + assert_eq!(DoryGlobals::get_layout(), DoryLayout::CycleMajor); + + // Set to AddressMajor + DoryGlobals::set_layout(DoryLayout::AddressMajor); + assert_eq!(DoryGlobals::get_layout(), DoryLayout::AddressMajor); + + // Set back to CycleMajor + DoryGlobals::set_layout(DoryLayout::CycleMajor); + assert_eq!(DoryGlobals::get_layout(), DoryLayout::CycleMajor); + } + + /// Dense polynomials are treated as k=1, so `AddressMajor` and `CycleMajor` + /// degenerate to same computation for Dory commitments + /// Hence, we expect them to produce the same commitment. + #[test] + #[serial] + fn test_dory_layout_dense_polynomials_same_commitment() { + DoryGlobals::reset(); + + let num_vars = 10; + let num_coeffs = 1 << num_vars; + + let _ = DoryGlobals::initialize_context(1, num_coeffs, DoryContext::Main, None); + + let mut rng = thread_rng(); + let coeffs: Vec = (0..num_coeffs).map(|_| Fr::rand(&mut rng)).collect(); + + let prover_setup = DoryCommitmentScheme::setup_prover(num_vars); + + DoryGlobals::set_layout(DoryLayout::CycleMajor); + let poly1 = MultilinearPolynomial::LargeScalars(DensePolynomial::new(coeffs.clone())); + let (commitment_cycle_major, _) = DoryCommitmentScheme::commit(&poly1, &prover_setup); + + DoryGlobals::set_layout(DoryLayout::AddressMajor); + let poly2 = MultilinearPolynomial::LargeScalars(DensePolynomial::new(coeffs)); + let (commitment_addr_major, _) = DoryCommitmentScheme::commit(&poly2, &prover_setup); + + assert_eq!( + commitment_cycle_major, commitment_addr_major, + "Dense polynomials should produce the same commitment with any layout" + ); + DoryGlobals::set_layout(DoryLayout::CycleMajor); + } + + #[test] + fn test_dory_layout_enum_methods() { + let K = 8; // addresses + let T = 16; // cycles + + let cycle_major = DoryLayout::CycleMajor; + let addr_major = DoryLayout::AddressMajor; + + let addr = 3; + let cycle = 7; + + let idx_cycle = cycle_major.address_cycle_to_index(addr, cycle, K, T); + let idx_addr = addr_major.address_cycle_to_index(addr, cycle, K, T); + + // CycleMajor: index = addr * T + cycle = 3 * 16 + 7 = 55 + assert_eq!(idx_cycle, 55); + + // AddressMajor: index = cycle * K + addr = 7 * 8 + 3 = 59 + assert_eq!(idx_addr, 59); + + assert_eq!( + cycle_major.index_to_address_cycle(idx_cycle, K, T), + (addr, cycle) + ); + assert_eq!( + addr_major.index_to_address_cycle(idx_addr, K, T), + (addr, cycle) + ); + } + + /// Test that AddressMajor one-hot polynomial proof/verify works correctly. + #[test] + #[serial] + fn test_dory_one_hot_address_major() { + use crate::poly::one_hot_polynomial::OneHotPolynomial; + + DoryGlobals::reset(); + + let K = 32; + let T = 32; + + let _guard = DoryGlobals::initialize_context( + K, + T, + DoryContext::Main, + Some(DoryLayout::AddressMajor), + ); + + let mut rng = thread_rng(); + let nonzero_indices: Vec> = (0..T) + .map(|_| { + if rng.gen::() { + Some(rng.gen::() % K as u8) + } else { + None + } + }) + .collect(); + + let one_hot_poly = OneHotPolynomial::from_indices(nonzero_indices, K); + let num_vars = one_hot_poly.get_num_vars(); + let poly = MultilinearPolynomial::OneHot(one_hot_poly); + + let opening_point: Vec<::Challenge> = (0..num_vars) + .map(|_| ::Challenge::random(&mut rng)) + .collect(); + + let prover_setup = DoryCommitmentScheme::setup_prover(num_vars); + let verifier_setup = DoryCommitmentScheme::setup_verifier(&prover_setup); + + let (commitment, row_commitments) = DoryCommitmentScheme::commit(&poly, &prover_setup); + + let evaluation = as PolynomialEvaluation>::evaluate( + &poly, + &opening_point, + ); + + let mut prove_transcript = Blake2bTranscript::new(b"dory_test"); + let proof = DoryCommitmentScheme::prove( + &prover_setup, + &poly, + &opening_point, + Some(row_commitments), + &mut prove_transcript, + ); + + let mut verify_transcript = Blake2bTranscript::new(b"dory_test"); + let verification_result = DoryCommitmentScheme::verify( + &proof, + &verifier_setup, + &mut verify_transcript, + &opening_point, + &evaluation, + &commitment, + ); + + assert!( + verification_result.is_ok(), + "Dory verification failed for AddressMajor OneHot: {verification_result:?}" + ); + } + + /// Test VMP correctness for AddressMajor layout with RLC polynomial (dense + one-hot). + #[test] + #[serial] + fn test_vmp_address_major_rlc() { + use crate::poly::one_hot_polynomial::OneHotPolynomial; + use crate::poly::rlc_polynomial::RLCPolynomial; + + DoryGlobals::reset(); + + let K = 16usize; + let T = 64usize; + + let _guard = DoryGlobals::initialize_context( + K, + T, + DoryContext::Main, + Some(DoryLayout::AddressMajor), + ); + + let num_columns = DoryGlobals::get_num_columns(); + let num_rows = DoryGlobals::get_max_num_rows(); + + let mut rng = thread_rng(); + + let dense_coeffs: Vec = (0..T).map(|_| Fr::rand(&mut rng)).collect(); + + let nonzero_indices: Vec> = (0..T) + .map(|_| { + if rng.gen::() { + Some(rng.gen::() % K as u8) + } else { + None + } + }) + .collect(); + let one_hot_poly = OneHotPolynomial::::from_indices(nonzero_indices.clone(), K); + + let dense_rlc_coeff: Fr = Fr::rand(&mut rng); + let one_hot_rlc_coeff: Fr = Fr::rand(&mut rng); + + let rlc_dense: Vec = dense_coeffs.iter().map(|c| *c * dense_rlc_coeff).collect(); + let rlc_poly = RLCPolynomial { + dense_rlc: rlc_dense.clone(), + one_hot_rlc: vec![( + one_hot_rlc_coeff, + std::sync::Arc::new(MultilinearPolynomial::OneHot(one_hot_poly.clone())), + )], + streaming_context: None, + }; + + let left_vec: Vec = (0..num_rows).map(|_| Fr::rand(&mut rng)).collect(); + + let vmp_result = rlc_poly.vector_matrix_product(&left_vec); + + let mut expected = vec![Fr::zero(); num_columns]; + let cycles_per_row = DoryGlobals::address_major_cycles_per_row(); + + // Dense contribution for AddressMajor layout: + // Dense coefficients occupy evenly-spaced columns (every K-th column). + // Coefficient i maps to: row = i / cycles_per_row, col = (i % cycles_per_row) * K + for (i, &coeff) in rlc_dense.iter().enumerate() { + let row = i / cycles_per_row; + let col = (i % cycles_per_row) * K; + if row < num_rows && col < num_columns { + expected[col] += left_vec[row] * coeff; + } + } + + // One-hot contribution: uses AddressMajor layout + for (cycle, k_opt) in nonzero_indices.iter().enumerate() { + if let Some(k) = k_opt { + let k = *k as usize; + // AddressMajor: global_index = cycle * K + address + let global_index = DoryLayout::AddressMajor.address_cycle_to_index(k, cycle, K, T); + let row = global_index / num_columns; + let col = global_index % num_columns; + if row < num_rows && col < num_columns { + expected[col] += left_vec[row] * one_hot_rlc_coeff; + } + } + } + + // Compare results + for (col, (actual, exp)) in vmp_result.iter().zip(expected.iter()).enumerate() { + assert_eq!( + *actual, *exp, + "VMP mismatch at column {col}: actual={actual:?}, expected={exp:?}" + ); + } + } } diff --git a/jolt-core/src/poly/commitment/dory/wrappers.rs b/jolt-core/src/poly/commitment/dory/wrappers.rs index 181ebb160..22f2c848d 100644 --- a/jolt-core/src/poly/commitment/dory/wrappers.rs +++ b/jolt-core/src/poly/commitment/dory/wrappers.rs @@ -3,10 +3,13 @@ use crate::{ field::JoltField, msm::VariableBaseMSM, - poly::multilinear_polynomial::{MultilinearPolynomial, PolynomialEvaluation}, + poly::{ + commitment::dory::{DoryGlobals, DoryLayout}, + multilinear_polynomial::{MultilinearPolynomial, PolynomialEvaluation}, + }, transcripts::{AppendToTranscript, Transcript}, }; -use ark_bn254::{Fr, G1Affine}; +use ark_bn254::Fr; use ark_ec::CurveGroup; use ark_ff::Zero; use dory::{ @@ -19,7 +22,6 @@ use dory::{ }, setup::ProverSetup, }; -use num_traits::One; use rayon::prelude::*; pub use dory::backends::arkworks::{ @@ -107,77 +109,97 @@ impl DoryPolynomial for MultilinearPolynomial { impl MultilinearLagrange for MultilinearPolynomial { fn vector_matrix_product(&self, left_vec: &[ArkFr], nu: usize, sigma: usize) -> Vec { use crate::utils::small_scalar::SmallScalar; + use ark_ff::One; - let num_cols = 1 << sigma; - let num_rows = 1 << nu; + let num_cols = 1usize << sigma; + let num_rows = 1usize << nu; let wrapped_left_side: Vec = left_vec.iter().map(ark_to_jolt).collect(); - macro_rules! compute_vector_matrix_product { - ($poly:expr, $field_mul_method:ident) => { - (0..num_cols) - .into_par_iter() - .map(|col_idx| { + // Helper for dense scalar vectors stored row-major as coeffs[row*num_cols + col] + macro_rules! vmp_row_major { + ($coeffs:expr, $mul:expr) => {{ + let coeffs = $coeffs; + let mut result = vec![Fr::zero(); num_cols]; + result + .par_iter_mut() + .enumerate() + .for_each(|(col_idx, dest)| { let mut sum = Fr::zero(); for row_idx in 0..num_rows.min(wrapped_left_side.len()) { let coeff_idx = row_idx * num_cols + col_idx; - if coeff_idx < $poly.len() { - sum += - $poly[coeff_idx].$field_mul_method(wrapped_left_side[row_idx]); + if coeff_idx < coeffs.len() { + sum += $mul(&coeffs[coeff_idx], wrapped_left_side[row_idx]); } } - jolt_to_ark(&sum) - }) - .collect() - }; + *dest = sum; + }); + result + .into_iter() + .map(|v| jolt_to_ark(&v)) + .collect::>() + }}; } match self { - MultilinearPolynomial::LargeScalars(poly) => (0..num_cols) - .into_par_iter() - .map(|col_idx| { - let mut sum = Fr::zero(); - for row_idx in 0..num_rows.min(wrapped_left_side.len()) { - let coeff_idx = row_idx * num_cols + col_idx; - if coeff_idx < poly.Z.len() { - sum += poly.Z[coeff_idx] * wrapped_left_side[row_idx]; + MultilinearPolynomial::LargeScalars(poly) => { + let coeffs = &poly.Z; + let mut result = vec![Fr::zero(); num_cols]; + result + .par_iter_mut() + .enumerate() + .for_each(|(col_idx, dest)| { + let mut sum = Fr::zero(); + for row_idx in 0..num_rows.min(wrapped_left_side.len()) { + let coeff_idx = row_idx * num_cols + col_idx; + if coeff_idx < coeffs.len() { + sum += coeffs[coeff_idx] * wrapped_left_side[row_idx]; + } } + *dest = sum; + }); + result.into_iter().map(|v| jolt_to_ark(&v)).collect() + } + MultilinearPolynomial::BoolScalars(poly) => { + vmp_row_major!(&poly.coeffs, |b: &bool, l: Fr| { + if *b { + l + } else { + Fr::zero() } - jolt_to_ark(&sum) }) - .collect(), + } MultilinearPolynomial::U8Scalars(poly) => { - compute_vector_matrix_product!(&poly.coeffs, field_mul) + vmp_row_major!(&poly.coeffs, |s: &u8, l: Fr| s.field_mul(l)) } MultilinearPolynomial::U16Scalars(poly) => { - compute_vector_matrix_product!(&poly.coeffs, field_mul) + vmp_row_major!(&poly.coeffs, |s: &u16, l: Fr| s.field_mul(l)) } MultilinearPolynomial::U32Scalars(poly) => { - compute_vector_matrix_product!(&poly.coeffs, field_mul) + vmp_row_major!(&poly.coeffs, |s: &u32, l: Fr| s.field_mul(l)) } MultilinearPolynomial::U64Scalars(poly) => { - compute_vector_matrix_product!(&poly.coeffs, field_mul) + vmp_row_major!(&poly.coeffs, |s: &u64, l: Fr| s.field_mul(l)) + } + MultilinearPolynomial::U128Scalars(poly) => { + vmp_row_major!(&poly.coeffs, |s: &u128, l: Fr| s.field_mul(l)) } MultilinearPolynomial::I64Scalars(poly) => { - compute_vector_matrix_product!(&poly.coeffs, field_mul) + vmp_row_major!(&poly.coeffs, |s: &i64, l: Fr| s.field_mul(l)) } MultilinearPolynomial::I128Scalars(poly) => { - compute_vector_matrix_product!(&poly.coeffs, field_mul) - } - MultilinearPolynomial::U128Scalars(poly) => { - compute_vector_matrix_product!(&poly.coeffs, field_mul) + vmp_row_major!(&poly.coeffs, |s: &i128, l: Fr| s.field_mul(l)) } MultilinearPolynomial::S128Scalars(poly) => { - compute_vector_matrix_product!(&poly.coeffs, field_mul) - } - MultilinearPolynomial::BoolScalars(poly) => { - compute_vector_matrix_product!(&poly.coeffs, field_mul) + vmp_row_major!(&poly.coeffs, |s: &ark_ff::biginteger::S128, l: Fr| s + .field_mul(l)) } MultilinearPolynomial::OneHot(poly) => { let mut result = vec![Fr::zero(); num_cols]; poly.vector_matrix_product(&wrapped_left_side, Fr::one(), &mut result); result.into_iter().map(|v| jolt_to_ark(&v)).collect() } + // In Jolt, we always perform the Dory opening proof using an RLCPolynomial MultilinearPolynomial::RLC(poly) => poly .vector_matrix_product(&wrapped_left_side) .into_iter() @@ -201,52 +223,126 @@ where std::slice::from_raw_parts(g1_generators.as_ptr() as *const ArkG1, g1_generators.len()) }; - // All polynomial types should use row_len bases (number of columns). - // The globals are sized to be >= what any polynomial needs. - let bases: Vec = g1_slice - .iter() - .take(row_len) - .map(|g| g.0.into_affine()) - .collect(); - - macro_rules! compute_msm { - ($coeffs:expr, $msm_method:ident) => { - $coeffs - .par_chunks(row_len) - .map(|row| ArkG1(VariableBaseMSM::$msm_method(&bases[..row.len()], row).unwrap())) - .collect() - }; - } + let dory_layout = DoryGlobals::get_layout(); + + // Dense polynomials (all scalar variants except OneHot/RLC) are committed row-wise. + // Under AddressMajor, dense coefficients occupy evenly-spaced columns, so each row + // commitment uses `cycles_per_row` bases (one per occupied column). + let (dense_affine_bases, dense_chunk_size): (Vec<_>, usize) = match dory_layout { + DoryLayout::CycleMajor => ( + g1_slice + .par_iter() + .take(row_len) + .map(|g| g.0.into_affine()) + .collect(), + row_len, + ), + DoryLayout::AddressMajor => { + let cycles_per_row = DoryGlobals::address_major_cycles_per_row(); + let bases: Vec<_> = g1_slice + .par_iter() + .take(row_len) + .step_by(row_len / cycles_per_row) + .map(|g| g.0.into_affine()) + .collect(); + (bases, cycles_per_row) + } + }; let result: Vec = match poly { - MultilinearPolynomial::LargeScalars(poly) => { - compute_msm!(&poly.Z, msm_field_elements) - } - MultilinearPolynomial::U8Scalars(poly) => compute_msm!(&poly.coeffs, msm_u8), - MultilinearPolynomial::U16Scalars(poly) => compute_msm!(&poly.coeffs, msm_u16), - MultilinearPolynomial::U32Scalars(poly) => compute_msm!(&poly.coeffs, msm_u32), - MultilinearPolynomial::U64Scalars(poly) => compute_msm!(&poly.coeffs, msm_u64), - MultilinearPolynomial::I64Scalars(poly) => compute_msm!(&poly.coeffs, msm_i64), - MultilinearPolynomial::I128Scalars(poly) => compute_msm!(&poly.coeffs, msm_i128), - MultilinearPolynomial::U128Scalars(poly) => compute_msm!(&poly.coeffs, msm_u128), - MultilinearPolynomial::S128Scalars(poly) => compute_msm!(&poly.coeffs, msm_s128), + MultilinearPolynomial::LargeScalars(poly) => poly + .Z + .par_chunks(dense_chunk_size) + .map(|row| { + ArkG1( + VariableBaseMSM::msm_field_elements(&dense_affine_bases[..row.len()], row) + .unwrap(), + ) + }) + .collect(), MultilinearPolynomial::BoolScalars(poly) => poly .coeffs - .par_chunks(row_len) + .par_chunks(dense_chunk_size) .map(|row| { let result = row .iter() - .zip(&bases[..row.len()]) + .zip(&dense_affine_bases[..row.len()]) .filter_map(|(&b, base)| if b { Some(*base) } else { None }) .sum(); ArkG1(result) }) .collect(), + MultilinearPolynomial::U8Scalars(poly) => poly + .coeffs + .par_chunks(dense_chunk_size) + .map(|row| { + ArkG1(VariableBaseMSM::msm_u8(&dense_affine_bases[..row.len()], row).unwrap()) + }) + .collect(), + MultilinearPolynomial::U16Scalars(poly) => poly + .coeffs + .par_chunks(dense_chunk_size) + .map(|row| { + ArkG1(VariableBaseMSM::msm_u16(&dense_affine_bases[..row.len()], row).unwrap()) + }) + .collect(), + MultilinearPolynomial::U32Scalars(poly) => poly + .coeffs + .par_chunks(dense_chunk_size) + .map(|row| { + ArkG1(VariableBaseMSM::msm_u32(&dense_affine_bases[..row.len()], row).unwrap()) + }) + .collect(), + MultilinearPolynomial::U64Scalars(poly) => poly + .coeffs + .par_chunks(dense_chunk_size) + .map(|row| { + ArkG1(VariableBaseMSM::msm_u64(&dense_affine_bases[..row.len()], row).unwrap()) + }) + .collect(), + MultilinearPolynomial::U128Scalars(poly) => poly + .coeffs + .par_chunks(dense_chunk_size) + .map(|row| { + ArkG1(VariableBaseMSM::msm_u128(&dense_affine_bases[..row.len()], row).unwrap()) + }) + .collect(), + MultilinearPolynomial::I64Scalars(poly) => poly + .coeffs + .par_chunks(dense_chunk_size) + .map(|row| { + ArkG1(VariableBaseMSM::msm_i64(&dense_affine_bases[..row.len()], row).unwrap()) + }) + .collect(), + MultilinearPolynomial::I128Scalars(poly) => poly + .coeffs + .par_chunks(dense_chunk_size) + .map(|row| { + ArkG1(VariableBaseMSM::msm_i128(&dense_affine_bases[..row.len()], row).unwrap()) + }) + .collect(), + MultilinearPolynomial::S128Scalars(poly) => poly + .coeffs + .par_chunks(dense_chunk_size) + .map(|row| { + ArkG1(VariableBaseMSM::msm_s128(&dense_affine_bases[..row.len()], row).unwrap()) + }) + .collect(), + // OneHot polynomials have their own commit_rows implementations + // that respect the DoryLayout setting (CycleMajor vs AddressMajor) MultilinearPolynomial::OneHot(poly) => { - poly.commit_rows(&bases).into_iter().map(ArkG1).collect() + let affine_bases: Vec<_> = g1_slice + .par_iter() + .take(row_len) + .map(|g| g.0.into_affine()) + .collect(); + poly.commit_rows(&affine_bases) + .into_iter() + .map(ArkG1) + .collect() } - MultilinearPolynomial::RLC(poly) => { - poly.commit_rows(&bases).into_iter().map(ArkG1).collect() + MultilinearPolynomial::RLC(_) => { + panic!("RLC polynomials should not be committed directly via commit_tier_1") } }; diff --git a/jolt-core/src/poly/one_hot_polynomial.rs b/jolt-core/src/poly/one_hot_polynomial.rs index 008f142c8..5a807446f 100644 --- a/jolt-core/src/poly/one_hot_polynomial.rs +++ b/jolt-core/src/poly/one_hot_polynomial.rs @@ -4,10 +4,9 @@ use crate::field::JoltField; use crate::msm::VariableBaseMSM; -use crate::poly::commitment::dory::DoryGlobals; +use crate::poly::commitment::dory::{DoryGlobals, DoryLayout}; use crate::poly::eq_poly::EqPolynomial; use crate::utils::math::Math; -use crate::utils::thread::unsafe_allocate_zero_vec; use allocative::Allocative; use ark_bn254::G1Affine; use ark_ec::CurveGroup; @@ -53,11 +52,15 @@ impl Default for OneHotPolynomial { impl OneHotPolynomial { /// The number of rows in the coefficient matrix used to - /// commit to this polynomial using Dory + /// commit to this polynomial using Dory. + /// + /// Note: the Dory matrix may be square or almost-square depending on `log2(K*T)`. pub fn num_rows(&self) -> usize { - let T = self.nonzero_indices.len() as u128; - let row_length = DoryGlobals::get_num_columns() as u128; - (T * self.K as u128 / row_length) as usize + let t = self.nonzero_indices.len(); + match DoryGlobals::get_layout() { + DoryLayout::AddressMajor => t.div_ceil(DoryGlobals::address_major_cycles_per_row()), + DoryLayout::CycleMajor => (t * self.K).div_ceil(DoryGlobals::get_num_columns()), + } } pub fn get_num_vars(&self) -> usize { @@ -85,16 +88,18 @@ impl OneHotPolynomial { C: Copy + Send + Sync + Into, F: std::ops::Mul + std::ops::SubAssign, { - assert_eq!(r.len(), self.get_num_vars()); - let (r_left, r_right) = r.split_at(self.num_rows().log_2()); - let eq_left = EqPolynomial::::evals(r_left); - let eq_right = EqPolynomial::::evals(r_right); - let mut left_product = unsafe_allocate_zero_vec(eq_right.len()); - self.vector_matrix_product(&eq_left, F::one(), &mut left_product); - left_product - .into_par_iter() - .zip_eq(eq_right.par_iter()) - .map(|(l, r)| l * r) + let log_t = self.nonzero_indices.len().log_2(); + let (r_address, r_cycle) = r.split_at(r.len() - log_t); + let eq_r_address = EqPolynomial::::evals(r_address); + let eq_r_cycle = EqPolynomial::::evals(r_cycle); + + self.nonzero_indices + .par_iter() + .zip(eq_r_cycle.par_iter()) + .map(|(k, eq_cycle)| match k { + Some(k) => eq_r_address[*k as usize] * eq_cycle, + None => F::zero(), + }) .sum() } @@ -114,275 +119,130 @@ impl OneHotPolynomial { &self, bases: &[G::Affine], ) -> Vec { + let layout = DoryGlobals::get_layout(); let num_rows = self.num_rows(); - tracing::debug!("Committing to one-hot polynomial with {num_rows} rows"); let row_len = DoryGlobals::get_num_columns(); - let T = DoryGlobals::get_T(); + let t = self.nonzero_indices.len(); + + debug_assert!( + bases.len() >= row_len, + "Expected at least row_len bases for Dory row commitments" + ); - let rows_per_k = T / row_len; - if rows_per_k >= rayon::current_num_threads() { - // This is the typical case (T >> K) + // Safety: This function is only called with G1Affine + let g1_bases = unsafe { std::mem::transmute::<&[G::Affine], &[G1Affine]>(bases) }; - let chunk_commitments: Vec> = self + // CycleMajor optimization for T >> K: process by cycle chunks, group by address + let rows_per_k = t / row_len; + if layout == DoryLayout::CycleMajor && rows_per_k >= rayon::current_num_threads() { + let chunk_commitments: Vec> = self .nonzero_indices .par_chunks(row_len) .map(|chunk| { - // Collect indices for each k let mut indices_per_k: Vec> = vec![Vec::new(); self.K]; - for (col_index, k) in chunk.iter().enumerate() { if let Some(k) = k { indices_per_k[*k as usize].push(col_index); } } - - // Safety: This function is only called with G1Affine - let g1_bases = - unsafe { std::mem::transmute::<&[G::Affine], &[G1Affine]>(bases) }; - - // Vectorized batch addition for all k values at once let results = jolt_optimizations::batch_g1_additions_multi(g1_bases, &indices_per_k); - - // Convert results to row_commitments let mut row_commitments = vec![G::zero(); self.K]; - for (k, result) in results.into_iter().enumerate() { + for (k, batch_result) in results.into_iter().enumerate() { if !indices_per_k[k].is_empty() { - // Convert G1Affine to G1Projective, then cast to G - let projective = ark_bn254::G1Projective::from(result); - // Safety: We know G is G1Projective in practice when called from dory + let projective = ark_bn254::G1Projective::from(batch_result); row_commitments[k] = unsafe { std::mem::transmute_copy(&projective) }; } } - row_commitments }) .collect(); + let mut result = vec![G::zero(); num_rows]; for (chunk_index, commitments) in chunk_commitments.iter().enumerate() { result .par_iter_mut() .skip(chunk_index) .step_by(rows_per_k) - .zip(commitments.into_par_iter()) + .zip(commitments.par_iter()) .for_each(|(dest, src)| *dest = *src); } + return result; + } - result - } else { - let num_chunks = rayon::current_num_threads().next_power_of_two(); - let chunk_size = std::cmp::max(1, num_rows / num_chunks); - // row_len is always a power of two (from DoryGlobals::calculate_dimensions) - let log_row_len = row_len.trailing_zeros(); - let row_len_mask = (row_len - 1) as u64; - - // Iterate over chunks of contiguous rows in parallel - let mut result: Vec = vec![G::zero(); num_rows]; - - // First, collect indices for each row - let mut row_indices: Vec> = vec![Vec::new(); num_rows]; - - for (t, k) in self.nonzero_indices.iter().enumerate() { - if let Some(k) = k { - let global_index = *k as u64 * T as u64 + t as u64; - let row_index = (global_index >> log_row_len) as usize; - let col_index = (global_index & row_len_mask) as usize; + // General path: collect column indices for each row based on layout + let mut row_indices: Vec> = vec![Vec::new(); num_rows]; + for (cycle, k) in self.nonzero_indices.iter().enumerate() { + if let Some(k) = k { + let global_index = layout.address_cycle_to_index(*k as usize, cycle, self.K, t); + let row_index = global_index / row_len; + let col_index = global_index % row_len; + if row_index < num_rows { row_indices[row_index].push(col_index); } } - - // Process rows in parallel chunks - // Safety: This function is only called with G1Affine - let g1_bases = unsafe { std::mem::transmute::<&[G::Affine], &[G1Affine]>(bases) }; - - result - .par_chunks_mut(chunk_size) - .zip(row_indices.par_chunks(chunk_size)) - .for_each(|(result_chunk, indices_chunk)| { - let results = - jolt_optimizations::batch_g1_additions_multi(g1_bases, indices_chunk); - - for (row_result, (indices, result)) in result_chunk - .iter_mut() - .zip(indices_chunk.iter().zip(results.into_iter())) - { - if !indices.is_empty() { - // Convert G1Affine to G1Projective, then cast to G - let projective = ark_bn254::G1Projective::from(result); - // Safety: We know G is G1Projective in practice when called from dory - *row_result = unsafe { std::mem::transmute_copy(&projective) }; - } - } - }); - result - } - } - - #[tracing::instrument(skip_all, name = "OneHotPolynomial::commit_one_hot_batch")] - pub fn commit_one_hot_batch + VariableBaseMSM>( - one_hot_polys: &[U], - bases: &[G::Affine], - ) -> Vec> - where - U: std::borrow::Borrow> + Sync, - { - let row_len = DoryGlobals::get_num_columns(); - let T = DoryGlobals::get_T(); - let rows_per_k = T / row_len; - - // Phase 1: Collect all chunks from all polynomials - #[derive(Clone)] - struct ChunkWork { - poly_idx: usize, - chunk_idx: usize, - chunk_start: usize, - chunk_len: usize, - K: usize, } - let all_chunks: Vec = one_hot_polys - .iter() - .enumerate() - .flat_map(|(poly_idx, poly)| { - let poly = poly.borrow(); - let num_chunks = poly.nonzero_indices.len().div_ceil(row_len); - (0..num_chunks).map(move |chunk_idx| { - let chunk_start = chunk_idx * row_len; - let chunk_len = - std::cmp::min(row_len, poly.nonzero_indices.len() - chunk_start); - ChunkWork { - poly_idx, - chunk_idx, - chunk_start, - chunk_len, - K: poly.K, - } - }) - }) - .collect(); - - // Phase 2: Process all chunks in parallel (flat parallelism) - let chunk_results: Vec<_> = all_chunks - .par_iter() - .map(|work| { - let poly = one_hot_polys[work.poly_idx].borrow(); - let chunk = - &poly.nonzero_indices[work.chunk_start..work.chunk_start + work.chunk_len]; - - // Collect indices for each k - let mut indices_per_k: Vec> = vec![Vec::new(); work.K]; - for (col_index, k) in chunk.iter().enumerate() { - if let Some(k) = k { - indices_per_k[*k as usize].push(col_index); - } - } - - // Safety: This function is only called with G1Affine - let g1_bases = unsafe { std::mem::transmute::<&[G::Affine], &[G1Affine]>(bases) }; - - // Vectorized batch addition for all k values at once - let results = - jolt_optimizations::batch_g1_additions_multi(g1_bases, &indices_per_k); - - // Convert results to row_commitments - let mut row_commitments = vec![G::zero(); work.K]; - for (k, result) in results.into_iter().enumerate() { - if !indices_per_k[k].is_empty() { - // Convert G1Affine to G1Projective, then cast to G - let projective = ark_bn254::G1Projective::from(result); - // Safety: We know G is G1Projective in practice when called from dory - row_commitments[k] = unsafe { std::mem::transmute_copy(&projective) }; - } - } - - (work.poly_idx, work.chunk_idx, row_commitments) - }) - .collect(); - - // Phase 3: Reassemble results by polynomial - let mut poly_results: Vec> = one_hot_polys - .iter() - .map(|poly| vec![G::zero(); poly.borrow().num_rows()]) - .collect(); - - // Group results by polynomial - let mut results_by_poly: Vec> = vec![Vec::new(); one_hot_polys.len()]; - for (poly_idx, chunk_idx, commitments) in chunk_results { - results_by_poly[poly_idx].push((chunk_idx, commitments)); - } - - // Scatter into final results (can be done in parallel per polynomial) - poly_results - .par_iter_mut() - .enumerate() - .for_each(|(poly_idx, result)| { - let poly = &one_hot_polys[poly_idx]; - let num_rows = poly.borrow().num_rows(); - - for (chunk_idx, commitments) in &results_by_poly[poly_idx] { - // Scatter this chunk's results into the output - for (k, commitment) in commitments.iter().enumerate() { - let row_idx = k * rows_per_k + chunk_idx; - if row_idx < num_rows { - result[row_idx] = *commitment; - } + // Process rows using batch additions + let num_chunks = rayon::current_num_threads().next_power_of_two(); + let chunk_size = num_rows.div_ceil(num_chunks).max(1); + let mut result: Vec = vec![G::zero(); num_rows]; + + result + .par_chunks_mut(chunk_size) + .zip(row_indices.par_chunks(chunk_size)) + .for_each(|(result_chunk, indices_chunk)| { + let results = jolt_optimizations::batch_g1_additions_multi(g1_bases, indices_chunk); + for (row_result, (indices, batch_result)) in result_chunk + .iter_mut() + .zip(indices_chunk.iter().zip(results.into_iter())) + { + if !indices.is_empty() { + let projective = ark_bn254::G1Projective::from(batch_result); + *row_result = unsafe { std::mem::transmute_copy(&projective) }; } } }); - poly_results + result } #[tracing::instrument(skip_all, name = "OneHotPolynomial::vector_matrix_product")] pub fn vector_matrix_product(&self, left_vec: &[F], coeff: F, result: &mut [F]) { - let T = DoryGlobals::get_T(); + let layout = DoryGlobals::get_layout(); + let t = self.nonzero_indices.len(); let num_columns = DoryGlobals::get_num_columns(); debug_assert_eq!(result.len(), num_columns); - let row_len = num_columns; - if T >= row_len { - // This is the typical case (T >= K) - let rows_per_k = T / row_len; + // CycleMajor optimization for T >= row_len (typical case where T >= K) + if layout == DoryLayout::CycleMajor && t >= num_columns { + let rows_per_k = t / num_columns; result .par_iter_mut() .enumerate() .for_each(|(col_index, dest)| { let mut col_dot_product = F::zero(); - for (row_offset, t) in (col_index..T).step_by(row_len).enumerate() { - if let Some(k) = self.nonzero_indices[t] { + for (row_offset, cycle) in (col_index..t).step_by(num_columns).enumerate() { + if let Some(k) = self.nonzero_indices[cycle] { let row_index = k as usize * rows_per_k + row_offset; col_dot_product += left_vec[row_index]; } } *dest += coeff * col_dot_product; }); - } else { - let num_chunks = rayon::current_num_threads().next_power_of_two(); - let chunk_size = std::cmp::max(1, num_columns / num_chunks); - // row_len and chunk_size are powers of two (from DoryGlobals and next_power_of_two) - let log_row_len = row_len.trailing_zeros(); - let row_len_mask = (row_len - 1) as u128; - let chunk_size_mask = chunk_size - 1; + return; + } - result - .par_chunks_mut(chunk_size) - .enumerate() - .for_each(|(chunk_index, chunk)| { - let min_col_index = chunk_index * chunk_size; - let max_col_index = min_col_index + chunk_size; - for (t, k) in self.nonzero_indices.iter().enumerate() { - if let Some(k) = k { - let global_index = *k as u128 * T as u128 + t as u128; - let col_index = (global_index & row_len_mask) as usize; - // If this coefficient falls in the chunk of rows corresponding - // to `chunk_index`, compute its contribution to the result. - if col_index >= min_col_index && col_index < max_col_index { - let row_index = (global_index >> log_row_len) as usize; - chunk[col_index & chunk_size_mask] += coeff * left_vec[row_index]; - } - } - } - }); + // General path: iterate through nonzero indices and compute contributions + for (cycle, k) in self.nonzero_indices.iter().enumerate() { + if let Some(k) = k { + let global_index = layout.address_cycle_to_index(*k as usize, cycle, self.K, t); + let row_index = global_index / num_columns; + let col_index = global_index % num_columns; + if row_index < left_vec.len() && col_index < result.len() { + result[col_index] += coeff * left_vec[row_index]; + } + } } } } @@ -400,7 +260,7 @@ mod tests { let K: usize = 1 << LOG_K; let T: usize = 1 << LOG_T; DoryGlobals::reset(); - let _guard = DoryGlobals::initialize_context(K, T, DoryContext::Main); + let _guard = DoryGlobals::initialize_context(K, T, DoryContext::Main, None); let mut rng = test_rng(); diff --git a/jolt-core/src/poly/rlc_polynomial.rs b/jolt-core/src/poly/rlc_polynomial.rs index 69eaccf68..47a68c231 100644 --- a/jolt-core/src/poly/rlc_polynomial.rs +++ b/jolt-core/src/poly/rlc_polynomial.rs @@ -1,6 +1,5 @@ use crate::field::{BarrettReduce, FMAdd, JoltField}; -use crate::msm::VariableBaseMSM; -use crate::poly::commitment::dory::DoryGlobals; +use crate::poly::commitment::dory::{DoryGlobals, DoryLayout}; use crate::poly::multilinear_polynomial::MultilinearPolynomial; use crate::utils::accumulation::Acc6S; use crate::utils::math::{s64_from_diff_u64s, Math}; @@ -10,8 +9,6 @@ use crate::zkvm::instruction::LookupQuery; use crate::zkvm::ram::remap_address; use crate::zkvm::{bytecode::BytecodePreprocessing, witness::CommittedPolynomial}; use allocative::Allocative; -use ark_bn254::{Fr, G1Projective}; -use ark_ec::CurveGroup; use common::constants::XLEN; use common::jolt_device::MemoryLayout; use itertools::Itertools; @@ -20,7 +17,6 @@ use std::collections::HashMap; use std::sync::Arc; use tracer::ChunksIterator; use tracer::{instruction::Cycle, LazyTraceIterator}; -use tracing::trace_span; #[derive(Clone, Debug)] pub struct RLCStreamingData { @@ -267,78 +263,6 @@ impl RLCPolynomial { result } - /// Commits to the rows of `RLCPolynomial`, viewing its coefficients - /// as a matrix (used in Dory). - /// We do so by computing the row commitments for the individual - /// polynomials comprising the linear combination, and taking the - /// linear combination of the resulting commitments. - // TODO(moodlezoup): we should be able to cache the row commitments - // for each underlying polynomial and take a linear combination of those - #[tracing::instrument(skip_all, name = "RLCPolynomial::commit_rows")] - pub fn commit_rows + VariableBaseMSM>( - &self, - bases: &[G::Affine], - ) -> Vec { - let num_rows = DoryGlobals::get_max_num_rows(); - tracing::debug!("Committing to RLC polynomial with {num_rows} rows"); - let row_len = DoryGlobals::get_num_columns(); - - let mut row_commitments = vec![G::zero(); num_rows]; - - // Compute the row commitments for dense submatrix - self.dense_rlc - .par_chunks(row_len) - .zip(row_commitments.par_iter_mut()) - .for_each(|(dense_row, commitment)| { - let msm_result: G = - VariableBaseMSM::msm_field_elements(&bases[..dense_row.len()], dense_row) - .unwrap(); - *commitment += msm_result - }); - - // Compute the row commitments for one-hot polynomials - for (coeff, poly) in self.one_hot_rlc.iter() { - let mut new_row_commitments: Vec = match poly.as_ref() { - MultilinearPolynomial::OneHot(one_hot) => one_hot.commit_rows(bases), - _ => panic!("Expected OneHot polynomial in one_hot_rlc"), - }; - - // TODO(moodlezoup): Avoid resize - new_row_commitments.resize(num_rows, G::zero()); - - let updated_row_commitments: &mut [G1Projective] = unsafe { - std::slice::from_raw_parts_mut( - new_row_commitments.as_mut_ptr() as *mut G1Projective, - new_row_commitments.len(), - ) - }; - - let current_row_commitments: &[G1Projective] = unsafe { - std::slice::from_raw_parts( - row_commitments.as_ptr() as *const G1Projective, - row_commitments.len(), - ) - }; - - let coeff_fr = unsafe { *(&raw const *coeff as *const Fr) }; - - let _span = trace_span!("vector_scalar_mul_add_gamma_g1_online"); - let _enter = _span.enter(); - - // Scales the row commitments for the current polynomial by - // its coefficient - jolt_optimizations::vector_scalar_mul_add_gamma_g1_online( - updated_row_commitments, - coeff_fr, - current_row_commitments, - ); - - let _ = std::mem::replace(&mut row_commitments, new_row_commitments); - } - - row_commitments - } - /// Computes a vector-matrix product, viewing the coefficients of the /// polynomial as a matrix (used in Dory). /// We do so by computing the vector-matrix product for the individual @@ -353,22 +277,45 @@ impl RLCPolynomial { // Streaming mode: generate rows on-demand from trace self.streaming_vector_matrix_product(left_vec, num_columns, Arc::clone(ctx)) } else { - // Linear space mode: use pre-computed dense_rlc - (0..num_columns) - .into_par_iter() - .map(|col_index| { - self.dense_rlc - .iter() - .skip(col_index) - .step_by(num_columns) - .zip(left_vec.iter()) - .map(|(&a, &b)| -> F { a * b }) - .sum::() - }) - .collect() + let mut dense_result = vec![F::zero(); num_columns]; + match DoryGlobals::get_layout() { + DoryLayout::CycleMajor => { + dense_result + .par_iter_mut() + .enumerate() + .for_each(|(col_idx, dest)| { + *dest = self + .dense_rlc + .iter() + .skip(col_idx) + .step_by(num_columns) + .zip(left_vec.iter()) + .map(|(&a, &b)| a * b) + .sum(); + }); + } + DoryLayout::AddressMajor => { + let cycles_per_row = DoryGlobals::address_major_cycles_per_row(); + dense_result + .par_iter_mut() + .step_by(num_columns / cycles_per_row) + .enumerate() + .for_each(|(offset, dot_product_result)| { + *dot_product_result = self + .dense_rlc + .par_iter() + .skip(offset) + .step_by(cycles_per_row) + .zip(left_vec.par_iter()) + .map(|(&a, &b)| -> F { a * b }) + .sum::(); + }); + } + } + dense_result }; - // Compute the vector-matrix product for one-hot polynomials (linear space) + // Compute the **linear space** vector-matrix product for one-hot polynomials for (coeff, poly) in self.one_hot_rlc.iter() { match poly.as_ref() { MultilinearPolynomial::OneHot(one_hot) => { @@ -396,9 +343,7 @@ impl RLCPolynomial { /// /// # Complexity /// It uses O(m + a) space where m is the number of rows - /// and a is the advice size. However, this is small enough in practice (advice is typically - /// much smaller than the trace). This function is used in both streaming and - /// non-streaming contexts, and mutates `result` in place. + /// and a is the advice size, so even though it is linear it is negl space overall. fn vmp_advice_contribution( result: &mut [F], left_vec: &[F], @@ -457,14 +402,20 @@ guardrail in gen_from_trace should ensure sigma_main >= sigma_a." /// Streaming VMP implementation that generates rows on-demand from trace. /// Achieves O(sqrt(n)) space complexity by lazily generating the witness. /// Single pass through trace for both dense and one-hot polynomials. + /// Note: Streaming optimization only works for CycleMajor layout. + /// For AddressMajor, we materialize the polynomial and use regular VMP. fn streaming_vector_matrix_product( &self, left_vec: &[F], num_columns: usize, ctx: Arc>, ) -> Vec { - let T = DoryGlobals::get_T(); + // For AddressMajor layout, materialize and use regular VMP + if DoryGlobals::get_layout() == DoryLayout::AddressMajor { + return self.address_major_vector_matrix_product(left_vec, num_columns, &ctx); + } + let T = DoryGlobals::get_T(); match &ctx.trace_source { TraceSource::Materialized(trace) => { self.materialized_vector_matrix_product(left_vec, num_columns, trace, &ctx, T) @@ -479,6 +430,79 @@ guardrail in gen_from_trace should ensure sigma_main >= sigma_a." } } + /// AddressMajor VMP: materialize the RLC polynomial from trace and use regular VMP. + #[tracing::instrument(skip_all, name = "RLCPolynomial::address_major_vmp")] + fn address_major_vector_matrix_product( + &self, + left_vec: &[F], + num_columns: usize, + ctx: &StreamingRLCContext, + ) -> Vec { + let trace = match &ctx.trace_source { + TraceSource::Materialized(trace) => trace, + TraceSource::Lazy(_) => panic!("AddressMajor VMP requires materialized trace"), + }; + + // Materialize the RLC polynomial from the streaming context + let materialized = self.materialize_from_context(ctx, trace); + + // Use the regular vector_matrix_product on the materialized polynomial + let mut result = materialized.vector_matrix_product(left_vec); + + Self::vmp_advice_contribution(&mut result, left_vec, num_columns, ctx); + + result + } + + /// Materialize an RLC polynomial from a streaming context. + #[tracing::instrument(skip_all, name = "RLCPolynomial::materialize_from_context")] + fn materialize_from_context( + &self, + ctx: &StreamingRLCContext, + trace: &[Cycle], + ) -> RLCPolynomial { + let T = DoryGlobals::get_T(); + let mut dense_rlc: Vec = unsafe_allocate_zero_vec(T); + + // Materialize dense polynomials (RdInc, RamInc) into dense_rlc + for (poly_id, coeff) in ctx.dense_polys.iter() { + let poly: MultilinearPolynomial = poly_id.generate_witness( + &ctx.preprocessing.bytecode, + &ctx.preprocessing.memory_layout, + trace, + Some(&ctx.one_hot_params), + ); + + // Add coeff * poly to dense_rlc + let len = poly.original_len().min(dense_rlc.len()); + dense_rlc[..len] + .par_iter_mut() + .enumerate() + .for_each(|(i, acc)| { + let val = poly.get_coeff(i); + *acc += *coeff * val; + }); + } + + // Materialize one-hot polynomials (Ra polynomials) + let mut one_hot_rlc = Vec::new(); + for (poly_id, coeff) in ctx.onehot_polys.iter() { + let poly = poly_id.generate_witness( + &ctx.preprocessing.bytecode, + &ctx.preprocessing.memory_layout, + trace, + Some(&ctx.one_hot_params), + ); + one_hot_rlc.push((*coeff, Arc::new(poly))); + } + + RLCPolynomial { + dense_rlc, + one_hot_rlc, + streaming_context: None, + } + } + /// Single-pass VMV over materialized trace. Parallelizes by dividing rows evenly across threads. #[tracing::instrument(skip_all)] fn materialized_vector_matrix_product( diff --git a/jolt-core/src/zkvm/proof_serialization.rs b/jolt-core/src/zkvm/proof_serialization.rs index be766b5ba..9712bd771 100644 --- a/jolt-core/src/zkvm/proof_serialization.rs +++ b/jolt-core/src/zkvm/proof_serialization.rs @@ -13,7 +13,7 @@ use crate::subprotocols::univariate_skip::UniSkipFirstRoundProof; use crate::{ field::JoltField, poly::{ - commitment::commitment_scheme::CommitmentScheme, + commitment::{commitment_scheme::CommitmentScheme, dory::DoryLayout}, opening_proof::{OpeningId, OpeningPoint, Openings, SumcheckId}, }, subprotocols::sumcheck::SumcheckInstanceProof, @@ -45,6 +45,41 @@ pub struct JoltProof, FS: Transcr pub bytecode_K: usize, pub rw_config: ReadWriteConfig, pub one_hot_config: OneHotConfig, + pub dory_layout: DoryLayout, +} + +impl CanonicalSerialize for DoryLayout { + fn serialize_with_mode( + &self, + writer: W, + compress: Compress, + ) -> Result<(), SerializationError> { + u8::from(*self).serialize_with_mode(writer, compress) + } + + fn serialized_size(&self, compress: Compress) -> usize { + u8::from(*self).serialized_size(compress) + } +} + +impl Valid for DoryLayout { + fn check(&self) -> Result<(), SerializationError> { + Ok(()) + } +} + +impl CanonicalDeserialize for DoryLayout { + fn deserialize_with_mode( + reader: R, + compress: Compress, + validate: Validate, + ) -> Result { + let value = u8::deserialize_with_mode(reader, compress, validate)?; + if value > 1 { + return Err(SerializationError::InvalidData); + } + Ok(DoryLayout::from(value)) + } } pub struct Claims(pub Openings); diff --git a/jolt-core/src/zkvm/prover.rs b/jolt-core/src/zkvm/prover.rs index 32608e02f..91e17d650 100644 --- a/jolt-core/src/zkvm/prover.rs +++ b/jolt-core/src/zkvm/prover.rs @@ -23,7 +23,10 @@ use crate::{ field::JoltField, guest, poly::{ - commitment::{commitment_scheme::StreamingCommitmentScheme, dory::DoryGlobals}, + commitment::{ + commitment_scheme::StreamingCommitmentScheme, + dory::{DoryGlobals, DoryLayout}, + }, multilinear_polynomial::MultilinearPolynomial, opening_proof::{ compute_advice_lagrange_factor, DoryOpeningState, OpeningAccumulator, @@ -307,11 +310,6 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme, ProofTranscrip trusted_advice_hint: Option, final_memory_state: Memory, ) -> Self { - // Dory globals are process-wide (OnceCell). In tests we run many end-to-end proofs with - // different trace lengths in a single process, so reset before each prover construction. - #[cfg(test)] - crate::poly::commitment::dory::DoryGlobals::reset(); - // truncate trailing zeros on device outputs program_io.outputs.truncate( program_io @@ -493,6 +491,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme, ProofTranscrip bytecode_K: self.one_hot_params.bytecode_k, rw_config: self.rw_config.clone(), one_hot_config: self.one_hot_params.to_config(), + dory_layout: DoryGlobals::get_layout(), }; let prove_duration = start.elapsed(); @@ -518,67 +517,107 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme, ProofTranscrip 1 << self.one_hot_params.log_k_chunk, self.padded_trace_len, DoryContext::Main, + Some(DoryGlobals::get_layout()), ); - // Generate and commit to all witness polynomials using streaming tier1/tier2 pattern - let T = DoryGlobals::get_T(); + let polys = all_committed_polynomials(&self.one_hot_params); - let row_len = DoryGlobals::get_num_columns(); - let num_rows = T / DoryGlobals::get_max_num_rows(); - - tracing::debug!( - "Generating and committing {} witness polynomials with T={}, row_len={}, num_rows={}", - polys.len(), - T, - row_len, - num_rows - ); - - // Tier 1: Compute row commitments for each polynomial - let mut row_commitments: Vec> = vec![vec![]; num_rows]; - - self.lazy_trace - .clone() - .pad_using(T, |_| Cycle::NoOp) - .iter_chunks(row_len) - .zip(row_commitments.iter_mut()) - .par_bridge() - .for_each(|(chunk, row_tier1_commitments)| { - let res: Vec<_> = polys - .par_iter() - .map(|poly| { - poly.stream_witness_and_commit_rows::<_, PCS>( - &self.preprocessing.generators, - &self.preprocessing.shared, - &chunk, - &self.one_hot_params, - ) - }) - .collect(); - *row_tier1_commitments = res; - }); - - // Transpose: row_commitments[row][poly] -> tier1_per_poly[poly][row] - let tier1_per_poly: Vec> = (0..polys.len()) - .into_par_iter() - .map(|poly_idx| { - row_commitments - .iter() - .flat_map(|row| row.get(poly_idx).cloned()) - .collect() - }) - .collect(); - - // Tier 2: Compute final commitments from tier1 commitments - let (commitments, hints): (Vec<_>, Vec<_>) = tier1_per_poly - .into_par_iter() - .zip(&polys) - .map(|(tier1_commitments, poly)| { - let onehot_k = poly.get_onehot_k(&self.one_hot_params); - PCS::aggregate_chunks(&self.preprocessing.generators, onehot_k, &tier1_commitments) - }) - .unzip(); + let T = DoryGlobals::get_T(); + + // For AddressMajor, use non-streaming commit path since streaming assumes CycleMajor layout + let (commitments, hint_map) = if DoryGlobals::get_layout() == DoryLayout::AddressMajor { + tracing::debug!( + "Using non-streaming commit path for AddressMajor layout with {} polynomials", + polys.len() + ); + + // Materialize the trace for non-streaming commit + let trace: Vec = self + .lazy_trace + .clone() + .pad_using(T, |_| Cycle::NoOp) + .collect(); + + // Generate witnesses and commit using the regular (non-streaming) path + let (commitments, hints): (Vec<_>, Vec<_>) = polys + .par_iter() + .map(|poly_id| { + let witness: MultilinearPolynomial = poly_id.generate_witness( + &self.preprocessing.shared.bytecode, + &self.preprocessing.shared.memory_layout, + &trace, + Some(&self.one_hot_params), + ); + PCS::commit(&witness, &self.preprocessing.generators) + }) + .unzip(); + + let hint_map = HashMap::from_iter(zip_eq(polys, hints)); + (commitments, hint_map) + } else { + // CycleMajor: use streaming + let row_len = DoryGlobals::get_num_columns(); + let num_rows = T / DoryGlobals::get_max_num_rows(); + + tracing::debug!( + "Generating and committing {} witness polynomials with T={}, row_len={}, num_rows={}", + polys.len(), + T, + row_len, + num_rows + ); - let hint_map = HashMap::from_iter(zip_eq(polys, hints)); + // Tier 1: Compute row commitments for each polynomial + let mut row_commitments: Vec> = vec![vec![]; num_rows]; + + self.lazy_trace + .clone() + .pad_using(T, |_| Cycle::NoOp) + .iter_chunks(row_len) + .zip(row_commitments.iter_mut()) + .par_bridge() + .for_each(|(chunk, row_tier1_commitments)| { + let res: Vec<_> = polys + .par_iter() + .map(|poly| { + poly.stream_witness_and_commit_rows::<_, PCS>( + &self.preprocessing.generators, + &self.preprocessing.shared, + &chunk, + &self.one_hot_params, + ) + }) + .collect(); + *row_tier1_commitments = res; + }); + + // Transpose: row_commitments[row][poly] -> tier1_per_poly[poly][row] + let tier1_per_poly: Vec> = (0..polys.len()) + .into_par_iter() + .map(|poly_idx| { + row_commitments + .iter() + .flat_map(|row| row.get(poly_idx).cloned()) + .collect() + }) + .collect(); + + // Tier 2: Compute final commitments from tier1 commitments + let (commitments, hints): (Vec<_>, Vec<_>) = tier1_per_poly + .into_par_iter() + .zip(&polys) + .map(|(tier1_commitments, poly)| { + let onehot_k = poly.get_onehot_k(&self.one_hot_params); + PCS::aggregate_chunks( + &self.preprocessing.generators, + onehot_k, + &tier1_commitments, + ) + }) + .unzip(); + + let hint_map = HashMap::from_iter(zip_eq(polys, hints)); + (commitments, hint_map) + }; // Append commitments to transcript for commitment in &commitments { @@ -609,7 +648,8 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme, ProofTranscrip let poly = MultilinearPolynomial::from(untrusted_advice_vec); let advice_len = poly.len().next_power_of_two().max(1); - let _guard = DoryGlobals::initialize_context(1, advice_len, DoryContext::UntrustedAdvice); + let _guard = + DoryGlobals::initialize_context(1, advice_len, DoryContext::UntrustedAdvice, None); let _ctx = DoryGlobals::with_context(DoryContext::UntrustedAdvice); let (commitment, hint) = PCS::commit(&poly, &self.preprocessing.generators); self.transcript.append_serializable(&commitment); @@ -1281,6 +1321,7 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme, ProofTranscrip self.one_hot_params.k_chunk, self.padded_trace_len, DoryContext::Main, + Some(DoryGlobals::get_layout()), ); // Get the unified opening point from HammingWeightClaimReduction @@ -1420,7 +1461,6 @@ impl<'a, F: JoltField, PCS: StreamingCommitmentScheme, ProofTranscrip advice_polys, ); - // Dory opening proof at the unified point PCS::prove( &self.preprocessing.generators, &joint_poly, @@ -1511,10 +1551,11 @@ mod tests { use serial_test::serial; use crate::host; + use crate::poly::commitment::dory::{DoryGlobals, DoryLayout}; use crate::poly::{ commitment::{ commitment_scheme::CommitmentScheme, - dory::{DoryCommitmentScheme, DoryContext, DoryGlobals}, + dory::{DoryCommitmentScheme, DoryContext}, }, multilinear_polynomial::MultilinearPolynomial, opening_proof::{OpeningAccumulator, SumcheckId}, @@ -1548,7 +1589,8 @@ mod tests { let poly = MultilinearPolynomial::::from(trusted_advice_words); let advice_len = poly.len().next_power_of_two().max(1); - let _guard = DoryGlobals::initialize_context(1, advice_len, DoryContext::TrustedAdvice); + let _guard = + DoryGlobals::initialize_context(1, advice_len, DoryContext::TrustedAdvice, None); let (commitment, hint) = { let _ctx = DoryGlobals::with_context(DoryContext::TrustedAdvice); DoryCommitmentScheme::commit(&poly, &preprocessing.generators) @@ -1559,6 +1601,7 @@ mod tests { #[test] #[serial] fn fib_e2e_dory() { + DoryGlobals::reset(); let mut program = host::Program::new("fibonacci-guest"); let inputs = postcard::to_stdvec(&100u32).unwrap(); let (bytecode, init_memory_state, _) = program.decode(); @@ -1603,6 +1646,7 @@ mod tests { #[test] #[serial] fn small_trace_e2e_dory() { + DoryGlobals::reset(); let mut program = host::Program::new("fibonacci-guest"); let inputs = postcard::to_stdvec(&5u32).unwrap(); let (bytecode, init_memory_state, _) = program.decode(); @@ -1657,6 +1701,7 @@ mod tests { #[test] #[serial] fn sha3_e2e_dory() { + DoryGlobals::reset(); // Ensure SHA3 inline library is linked and auto-registered #[cfg(feature = "host")] use jolt_inlines_keccak256 as _; @@ -1719,6 +1764,7 @@ mod tests { #[test] #[serial] fn sha2_e2e_dory() { + DoryGlobals::reset(); // Ensure SHA2 inline library is linked and auto-registered #[cfg(feature = "host")] use jolt_inlines_sha2 as _; @@ -1779,6 +1825,7 @@ mod tests { #[test] #[serial] fn sha2_e2e_dory_with_unused_advice() { + DoryGlobals::reset(); // SHA2 guest does not consume advice, but providing both trusted and untrusted advice // should still work correctly through the full pipeline: // - Trusted: commit in preprocessing-only context, reduce in Stage 6, batch in Stage 8 @@ -1839,6 +1886,7 @@ mod tests { #[test] #[serial] fn max_advice_with_small_trace() { + DoryGlobals::reset(); // Tests that max-sized advice (4KB = 512 words) works with a minimal trace. // With balanced dims (sigma_a=5, nu_a=4 for 512 words), the minimum padded trace // (256 cycles -> total_vars=12) is sufficient to embed advice. @@ -1899,6 +1947,7 @@ mod tests { #[test] #[serial] fn advice_e2e_dory() { + DoryGlobals::reset(); // Tests a guest (merkle-tree) that actually consumes both trusted and untrusted advice. let mut program = host::Program::new("merkle-tree-guest"); let (bytecode, init_memory_state, _) = program.decode(); @@ -1958,6 +2007,7 @@ mod tests { #[test] #[serial] fn advice_opening_point_derives_from_unified_point() { + DoryGlobals::reset(); // Tests that advice opening points are correctly derived from the unified main opening // point using Dory's balanced dimension policy. // @@ -2056,6 +2106,7 @@ mod tests { #[test] #[serial] fn memory_ops_e2e_dory() { + DoryGlobals::reset(); let mut program = host::Program::new("memory-ops-guest"); let (bytecode, init_memory_state, _) = program.decode(); let (_, _, _, io_device) = program.trace(&[], &[], &[]); @@ -2100,6 +2151,7 @@ mod tests { #[test] #[serial] fn btreemap_e2e_dory() { + DoryGlobals::reset(); let mut program = host::Program::new("btreemap-guest"); let (bytecode, init_memory_state, _) = program.decode(); let inputs = postcard::to_stdvec(&50u32).unwrap(); @@ -2145,6 +2197,7 @@ mod tests { #[test] #[serial] fn muldiv_e2e_dory() { + DoryGlobals::reset(); let mut program = host::Program::new("muldiv-guest"); let (bytecode, init_memory_state, _) = program.decode(); let inputs = postcard::to_stdvec(&[9u32, 5u32, 3u32]).unwrap(); @@ -2273,4 +2326,47 @@ mod tests { JoltVerifier::new(&verifier_preprocessing, proof, program_io, None, None).unwrap(); verifier.verify().unwrap(); } + + #[test] + #[serial] + fn fib_e2e_dory_address_major() { + DoryGlobals::reset(); + DoryGlobals::set_layout(DoryLayout::AddressMajor); + + let mut program = host::Program::new("fibonacci-guest"); + let inputs = postcard::to_stdvec(&50u32).unwrap(); + let (bytecode, init_memory_state, _) = program.decode(); + let (_, _, _, io_device) = program.trace(&inputs, &[], &[]); + + let shared_preprocessing = JoltSharedPreprocessing::new( + bytecode.clone(), + io_device.memory_layout.clone(), + init_memory_state, + 1 << 16, + ); + let prover_preprocessing = JoltProverPreprocessing::new(shared_preprocessing.clone()); + let elf_contents = program.get_elf_contents().expect("elf contents is None"); + let prover = RV64IMACProver::gen_from_elf( + &prover_preprocessing, + &elf_contents, + &inputs, + &[], + &[], + None, + None, + ); + let io_device = prover.program_io.clone(); + let (proof, debug_info) = prover.prove(); + + let verifier_preprocessing = JoltVerifierPreprocessing::new( + shared_preprocessing, + prover_preprocessing.generators.to_verifier_setup(), + ); + + // DoryGlobals is now initialized inside the verifier's verify_stage8 + RV64IMACVerifier::new(&verifier_preprocessing, proof, io_device, None, debug_info) + .expect("verifier creation failed") + .verify() + .expect("verification failed"); + } } diff --git a/jolt-core/src/zkvm/verifier.rs b/jolt-core/src/zkvm/verifier.rs index 449e5fafc..26bea2050 100644 --- a/jolt-core/src/zkvm/verifier.rs +++ b/jolt-core/src/zkvm/verifier.rs @@ -5,6 +5,7 @@ use std::path::Path; use std::sync::Arc; use crate::poly::commitment::commitment_scheme::CommitmentScheme; +use crate::poly::commitment::dory::{DoryContext, DoryGlobals}; use crate::subprotocols::sumcheck::BatchedSumcheck; use crate::zkvm::bytecode::BytecodePreprocessing; use crate::zkvm::claim_reductions::RegistersClaimReductionSumcheckVerifier; @@ -174,7 +175,6 @@ impl<'a, F: JoltField, PCS: CommitmentScheme, ProofTranscript: Transc #[tracing::instrument(skip_all)] pub fn verify(mut self) -> Result<(), anyhow::Error> { let _pprof_verify = pprof_scope!("verify"); - // Parameters are computed from trace length as needed fiat_shamir_preamble( &self.program_io, @@ -550,6 +550,15 @@ impl<'a, F: JoltField, PCS: CommitmentScheme, ProofTranscript: Transc /// Stage 8: Dory batch opening verification. fn verify_stage8(&mut self) -> Result<(), anyhow::Error> { + // Initialize DoryGlobals with the layout from the proof + // This ensures the verifier uses the same layout as the prover + let _guard = DoryGlobals::initialize_context( + 1 << self.one_hot_params.log_k_chunk, + self.proof.trace_length.next_power_of_two(), + DoryContext::Main, + Some(self.proof.dory_layout), + ); + // Get the unified opening point from HammingWeightClaimReduction // This contains (r_address_stage7 || r_cycle_stage6) in big-endian let (opening_point, _) = self.opening_accumulator.get_committed_polynomial_opening( diff --git a/jolt-sdk/macros/src/lib.rs b/jolt-sdk/macros/src/lib.rs index e34367e32..58ab22c7e 100644 --- a/jolt-sdk/macros/src/lib.rs +++ b/jolt-sdk/macros/src/lib.rs @@ -604,7 +604,7 @@ impl MacroBuilder { let num_rows = 1usize << nu_a; let num_cols = 1usize << sigma_a; - let _guard = jolt::DoryGlobals::initialize_context(num_rows, num_cols, jolt::DoryContext::TrustedAdvice); + let _guard = jolt::DoryGlobals::initialize_context(num_rows, num_cols, jolt::DoryContext::TrustedAdvice, None); let _ctx = jolt::DoryGlobals::with_context(jolt::DoryContext::TrustedAdvice); let poly = MultilinearPolynomial::::from(trusted_advice_vec);