scroll-tech · hero78119 · Sep 11, 2025 · Sep 11, 2025 · Sep 16, 2025 · Sep 17, 2025
diff --git a/ceno_zkvm/src/scheme/gpu/mod.rs b/ceno_zkvm/src/scheme/gpu/mod.rs
@@ -45,6 +45,7 @@ use gkr_iop::gpu::gpu_prover::*;
 
 pub struct GpuTowerProver;
 
+use crate::scheme::constants::NUM_FANIN;
 use gkr_iop::gpu::{ArcMultilinearExtensionGpu, MultilinearExtensionGpu};
 
 // Extract out_evals from GPU-built tower witnesses
@@ -59,7 +60,7 @@ fn extract_out_evals_from_gpu_towers<E: ff_ext::ExtensionField>(
     let mut w_out_evals = Vec::new();
     for (i, gpu_spec) in prod_gpu.iter().enumerate() {
         let first_layer_evals: Vec<E> = gpu_spec
-            .get_final_evals(0)
+            .get_output_evals()
             .expect("Failed to extract final evals from GPU product tower");
 
         // Product tower first layer should have 2 MLEs
@@ -81,7 +82,7 @@ fn extract_out_evals_from_gpu_towers<E: ff_ext::ExtensionField>(
     let mut lk_out_evals = Vec::new();
     for gpu_spec in logup_gpu.iter() {
         let first_layer_evals: Vec<E> = gpu_spec
-            .get_final_evals(0)
+            .get_output_evals()
             .expect("Failed to extract final evals from GPU logup tower");
 
         // Logup tower first layer should have 4 MLEs
@@ -481,7 +482,7 @@ impl<E: ExtensionField, PCS: PolynomialCommitmentScheme<E>> TowerProver<GpuBacke
         let span = entered_span!("prove_tower_relation", profiling_2 = true);
         let (point_gl, proof_gpu) = cuda_hal
             .tower
-            .create_proof(&input, basic_tr)
+            .create_proof(&cuda_hal, &input, NUM_FANIN, basic_tr)
             .expect("gpu tower create_proof failed");
         exit_span!(span);
 

diff --git a/gkr_iop/src/cpu/mod.rs b/gkr_iop/src/cpu/mod.rs
@@ -62,6 +62,10 @@ impl<'a, E: ExtensionField> MultilinearPolynomial<E> for MultilinearExtension<'a
     fn evaluations_len(&self) -> usize {
         self.evaluations.len()
     }
+
+    fn bh_signature(&self) -> E {
+        self.bh_signature()
+    }
 }
 
 impl<E: ExtensionField, PCS: PolynomialCommitmentScheme<E>> ProverBackend for CpuBackend<E, PCS> {

diff --git a/gkr_iop/src/gpu/mod.rs b/gkr_iop/src/gpu/mod.rs
@@ -71,8 +71,8 @@ pub use gpu_prover::*;
 /// Stores a multilinear polynomial in dense evaluation form.
 pub struct MultilinearExtensionGpu<'a, E: ExtensionField> {
     /// GPU polynomial data, supporting both base field and extension field
-    pub mle: GpuFieldType,
-    _phantom: PhantomData<&'a E>,
+    pub mle: GpuFieldType<'a>,
+    _phantom: PhantomData<E>,
 }
 
 impl<'a, E: ExtensionField> Default for MultilinearExtensionGpu<'a, E> {
@@ -122,6 +122,26 @@ impl<'a, E: ExtensionField> MultilinearPolynomial<E> for MultilinearExtensionGpu
     fn evaluations_len(&self) -> usize {
         self.mle.evaluations_len()
     }
+
+    fn bh_signature(&self) -> E {
+        if std::any::TypeId::of::<E::BaseField>()
+            != std::any::TypeId::of::<p3::goldilocks::Goldilocks>()
+        {
+            panic!("GPU backend only supports Goldilocks");
+        }
+
+        match &self.mle {
+            GpuFieldType::Base(poly) => {
+                let res: Vec<E> = unsafe { std::mem::transmute(vec![poly.bh_signature()]) };
+                res[0]
+            }
+            GpuFieldType::Ext(poly) => {
+                let res: Vec<E> = unsafe { std::mem::transmute(vec![poly.bh_signature()]) };
+                res[0]
+            }
+            GpuFieldType::Unreachable => unreachable!(),
+        }
+    }
 }
 
 impl<'a, E: ExtensionField> MultilinearExtensionGpu<'a, E> {
@@ -190,23 +210,23 @@ impl<'a, E: ExtensionField> MultilinearExtensionGpu<'a, E> {
     }
 
     /// Create from base field GpuPolynomial
-    pub fn from_ceno_gpu_base(mle_gpu: GpuPolynomial) -> Self {
+    pub fn from_ceno_gpu_base(mle_gpu: GpuPolynomial<'a>) -> Self {
         Self {
             mle: GpuFieldType::Base(mle_gpu),
             _phantom: PhantomData,
         }
     }
 
     /// Create from extension field GpuPolynomialExt
-    pub fn from_ceno_gpu_ext(mle_gpu: GpuPolynomialExt) -> Self {
+    pub fn from_ceno_gpu_ext(mle_gpu: GpuPolynomialExt<'a>) -> Self {
         Self {
             mle: GpuFieldType::Ext(mle_gpu),
             _phantom: PhantomData,
         }
     }
 
     /// Method for backward compatibility
-    pub fn from_ceno_gpu(mle_gpu: GpuPolynomial) -> Self {
+    pub fn from_ceno_gpu(mle_gpu: GpuPolynomial<'a>) -> Self {
         Self::from_ceno_gpu_base(mle_gpu)
     }
 
@@ -266,7 +286,8 @@ impl<E: ExtensionField, PCS: PolynomialCommitmentScheme<E>> ProverBackend for Gp
     type MultilinearPoly<'a> = MultilinearExtensionGpu<'a, E>;
     type Matrix = RowMajorMatrix<E::BaseField>;
     #[cfg(feature = "gpu")]
-    type PcsData = BasefoldCommitmentWithWitnessGpu<E::BaseField, BufferImpl<E::BaseField>>;
+    type PcsData =
+        BasefoldCommitmentWithWitnessGpu<E::BaseField, BufferImpl<'static, E::BaseField>>;
     #[cfg(not(feature = "gpu"))]
     type PcsData = <PCS as PolynomialCommitmentScheme<E>>::CommitmentWithWitness;
 

diff --git a/gkr_iop/src/hal.rs b/gkr_iop/src/hal.rs
@@ -1,19 +1,22 @@
-use ff_ext::ExtensionField;
-use mpcs::PolynomialCommitmentScheme;
-use multilinear_extensions::mle::Point;
-use std::{fmt::Debug, sync::Arc};
-
 use crate::gkr::layer::{
     Layer,
     hal::{LinearLayerProver, SumcheckLayerProver, ZerocheckLayerProver},
 };
+use ff_ext::ExtensionField;
+use mpcs::PolynomialCommitmentScheme;
+use multilinear_extensions::mle::Point;
+use std::{fmt::Debug, sync::Arc};
 
 pub trait MultilinearPolynomial<E: ExtensionField> {
     fn num_vars(&self) -> usize;
     fn eval(&self, point: Point<E>) -> E;
 
     /// Get the length of evaluation data
     fn evaluations_len(&self) -> usize;
+
+    /// Debug utility: generate a semantic signature value to represent the whole boolean hypercube elements
+    /// this function is very heavily as traverse whole boolean hypercube
+    fn bh_signature(&self) -> E;
 }
 
 /// Defines basic types like field, pcs that are common among all devices