Skip to content

feat: integrate GPU prover, add basefold modules #987

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 39 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
7e2448b
gpu hal, ceno_gpu
Velaciela Jul 21, 2025
d11638d
gpu prover tests
Velaciela Jul 21, 2025
f94f976
pin deps to specific git rev: ff_ext, mpcs, multilinear_extensions, t…
Velaciela Jul 21, 2025
de4ed0c
update: ceno-gpu
Velaciela Jul 21, 2025
0b9ca83
gpu prover: batch_commit_e2e
Velaciela Jul 24, 2025
23db134
gpu prover: batch_open_e2e
Velaciela Jul 24, 2025
31f6d34
Merge branch 'master' into feat/integrate-gpu-prover
Velaciela Jul 24, 2025
bd92978
update: deps git revision
Velaciela Jul 29, 2025
fddede0
update: gpu prover
Velaciela Jul 29, 2025
498eda3
dev: backup
Velaciela Jul 30, 2025
49b33e7
bk
Velaciela Jul 31, 2025
e208bad
BasefoldCommitmentWithWitnessGpu
Velaciela Aug 1, 2025
3bd302c
bk
Velaciela Aug 1, 2025
83550f0
bk
Velaciela Aug 1, 2025
554b994
commit_traces
Velaciela Aug 1, 2025
8bb3ea3
transport_proving_key
Velaciela Aug 5, 2025
8ad2975
tracing
Velaciela Aug 7, 2025
65a5841
warmup + rerun
Velaciela Aug 7, 2025
4e3b97a
Merge branch 'master' into feat/integrate-gpu-prover
Velaciela Aug 8, 2025
cf3ea44
pin to ceno#990
Velaciela Aug 8, 2025
3a8c41c
fix: gpu prover after merge
Velaciela Aug 8, 2025
91ced96
gpu feature
Velaciela Aug 8, 2025
ce8848c
simplify ProtocolWitnessGeneratorProver
Velaciela Aug 11, 2025
b74f96b
inner cpu prover
Velaciela Aug 11, 2025
c306c7b
refactor: feature gpu
Velaciela Aug 11, 2025
e8b80b1
default_backend_config
Velaciela Aug 11, 2025
743a4e5
gkr/layer/gpu: use cpu
Velaciela Aug 11, 2025
c33b58b
update
Velaciela Aug 11, 2025
2441cfd
conditional gpu build
Velaciela Aug 12, 2025
6fb1f80
format
Velaciela Aug 12, 2025
e18bd0a
minor
Velaciela Aug 12, 2025
06900bc
gpu prove_tower_relation with error
Velaciela Aug 14, 2025
ebc4d2d
support arbitrary number of tower specs
Velaciela Aug 15, 2025
a0f271e
fmt
Velaciela Aug 15, 2025
e765f9a
gpu: build_tower_witness
Velaciela Aug 21, 2025
c455e8f
prove_tower_relation calls build_tower_witness
Velaciela Aug 21, 2025
461e728
build_tower_witness_gpu()
Velaciela Aug 21, 2025
808626f
full gpu pipeline: tower build and prove
Velaciela Aug 21, 2025
b68c2f1
trace timer
Velaciela Aug 21, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
301 changes: 258 additions & 43 deletions Cargo.lock

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ tracing-forest = { version = "0.1.6" }
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
uint = "0.8"

ceno_gpu = { path = "utils/cuda_hal", package = "cuda_hal" }

[profile.dev]
lto = "thin"
# We are running our tests with optimizations turned on to make them faster.
Expand All @@ -98,3 +100,10 @@ opt-level = 3

[profile.release]
lto = "thin"







24 changes: 24 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Makefile for conditional GPU builds

.PHONY: enable-gpu disable-gpu clean help

help:
@echo "Available targets:"
@echo " enable-gpu - Switch to GPU mode (uses remote implementation, requires private repo access)"
@echo " disable-gpu - Switch to CPU mode (uses local placeholder, default state)"
@echo " clean - Clean build artifacts and reset to CPU mode"
@echo ""
@echo "Normal usage:"
@echo " cargo build # CPU build (default, no private repo fetch)"
@echo " make enable-gpu && cargo build # GPU build (requires private repo access)"

enable-gpu:
@./build-scripts/conditional-patch.sh enable-gpu

disable-gpu:
@./build-scripts/conditional-patch.sh disable-gpu

clean:
@cargo clean
@./build-scripts/conditional-patch.sh disable-gpu
@echo "Cleaned build artifacts and reset to CPU mode"
49 changes: 49 additions & 0 deletions build-scripts/conditional-patch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash

# Script to switch between local placeholder and remote GPU implementation
# Usage: ./build-scripts/conditional-patch.sh [enable-gpu|disable-gpu]

WORKSPACE_CARGO="Cargo.toml"

# Workspace dependency declarations
LOCAL_DEP='ceno_gpu = { path = "utils/cuda_hal", package = "cuda_hal" }'
REMOTE_DEP='ceno_gpu = { git = "ssh://[email protected]/scroll-tech/ceno-gpu.git", package = "cuda_hal", branch = "dev/integrate-into-ceno-as-dep" }'

if [ "$1" = "enable-gpu" ]; then
echo "Switching to GPU mode (using remote implementation)..."

# Replace local path with remote git in workspace dependencies
if [[ "$OSTYPE" == "darwin"* ]]; then
# macOS sed
sed -i '' "s|${LOCAL_DEP}|${REMOTE_DEP}|g" "$WORKSPACE_CARGO"
else
# Linux sed
sed -i "s|${LOCAL_DEP}|${REMOTE_DEP}|g" "$WORKSPACE_CARGO"
fi

echo "✅ Switched to remote GPU implementation"
echo "Now you can run: cargo build -p ceno_zkvm -F gpu"

elif [ "$1" = "disable-gpu" ]; then
echo "Switching to CPU mode (using local placeholder)..."

# Replace remote git with local path in workspace dependencies
if [[ "$OSTYPE" == "darwin"* ]]; then
# macOS sed
sed -i '' "s|${REMOTE_DEP}|${LOCAL_DEP}|g" "$WORKSPACE_CARGO"
else
# Linux sed
sed -i "s|${REMOTE_DEP}|${LOCAL_DEP}|g" "$WORKSPACE_CARGO"
fi

echo "✅ Switched to local placeholder implementation"
echo "Now you can run: cargo build -p ceno_zkvm --no-default-features"

else
echo "Usage: $0 [enable-gpu|disable-gpu]"
echo " enable-gpu - Switch to remote GPU implementation (requires private repo access)"
echo " disable-gpu - Switch to local placeholder (default, no private repo access)"
exit 1
fi

echo "Done."
4 changes: 2 additions & 2 deletions ceno_cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ tikv-jemallocator = { version = "0.6", optional = true }
ceno_emul = { path = "../ceno_emul" }
ceno_host = { path = "../ceno_host" }
ceno_zkvm = { path = "../ceno_zkvm" }
ff_ext = { path = "../ff_ext" }
ff_ext = { git = "https://github.com/scroll-tech/ceno", package = "ff_ext", rev = "7c277a1" }
gkr_iop = { path = "../gkr_iop" }
mpcs = { path = "../mpcs" }
mpcs = { git = "https://github.com/scroll-tech/ceno", package = "mpcs", rev = "7c277a1" }

[build-dependencies]
vergen-git2 = { version = "1", features = ["build", "cargo", "rustc", "emit_and_set"] }
Expand Down
13 changes: 5 additions & 8 deletions ceno_cli/src/commands/common_args/ceno.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,20 @@ use ceno_host::{CenoStdin, memory_from_file};
use ceno_zkvm::{
e2e::*,
scheme::{
constants::MAX_NUM_VARIABLES, mock_prover::LkMultiplicityKey, verifier::ZKVMVerifier,
constants::MAX_NUM_VARIABLES, create_backend, create_prover,
mock_prover::LkMultiplicityKey, verifier::ZKVMVerifier,
},
};
use clap::Args;
use ff_ext::{BabyBearExt4, ExtensionField, GoldilocksExt2};
use gkr_iop::cpu::{CpuBackend, CpuProver};

use mpcs::{
Basefold, BasefoldRSParams, PolynomialCommitmentScheme, SecurityLevel, Whir, WhirDefaultSpec,
};
use serde::Serialize;
use std::{
fs::File,
path::{Path, PathBuf},
rc::Rc,
};

/// Ceno options
Expand Down Expand Up @@ -373,12 +373,9 @@ fn run_elf_inner<
platform.hints.len()
);

// TODO support GPU backend + prover
let backend: Rc<_> =
CpuBackend::<E, PCS>::new(options.max_num_variables, options.security_level).into();

let backend = create_backend(options.max_num_variables, options.security_level);
Ok(run_e2e_with_checkpoint::<E, PCS, _, _>(
CpuProver::new(backend.clone()),
create_prover(backend.clone()),
program,
platform,
&hints,
Expand Down
4 changes: 2 additions & 2 deletions ceno_emul/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ version.workspace = true
anyhow.workspace = true
ceno_rt = { path = "../ceno_rt" }
elf = "0.7"
ff_ext = { version = "0", path = "../ff_ext" }
ff_ext = { git = "https://github.com/scroll-tech/ceno", package = "ff_ext", rev = "7c277a1" }
itertools.workspace = true
multilinear_extensions = { version = "0", path = "../multilinear_extensions" }
multilinear_extensions = { git = "https://github.com/scroll-tech/ceno", package = "multilinear_extensions", rev = "7c277a1" }
num-bigint.workspace = true
num-derive.workspace = true
num-traits.workspace = true
Expand Down
14 changes: 9 additions & 5 deletions ceno_zkvm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,23 @@ base64 = "0.22"
ceno-examples = { path = "../examples-builder" }
ceno_emul = { path = "../ceno_emul" }
ceno_host = { path = "../ceno_host" }
ceno_gpu = { workspace = true, optional = true }
either.workspace = true
ff_ext = { path = "../ff_ext" }
ff_ext = { git = "https://github.com/scroll-tech/ceno", package = "ff_ext", rev = "7c277a1" }
gkr_iop = { path = "../gkr_iop" }
keccakf = "0.1.2"
mpcs = { path = "../mpcs" }
multilinear_extensions = { version = "0", path = "../multilinear_extensions" }
mpcs = { git = "https://github.com/scroll-tech/ceno", package = "mpcs", rev = "7c277a1" }
multilinear_extensions = { git = "https://github.com/scroll-tech/ceno", package = "multilinear_extensions", rev = "7c277a1" }
p3.workspace = true
rand_chacha.workspace = true
rayon.workspace = true
serde.workspace = true
serde_json.workspace = true
sumcheck.workspace = true
transcript = { path = "../transcript" }
witness = { path = "../witness" }
transcript = { git = "https://github.com/scroll-tech/ceno", package = "transcript", rev = "7c277a1" }
witness = { git = "https://github.com/scroll-tech/ceno", package = "witness", rev = "7c277a1" }
once_cell = "1.21.3"
cudarc = { version = "0.13.0", features = ["driver", "cuda-version-from-build-system"], optional = true }

itertools.workspace = true
ndarray.workspace = true
Expand Down Expand Up @@ -70,6 +73,7 @@ glob = "0.3"
default = ["forbid_overflow"]
flamegraph = ["pprof2/flamegraph", "pprof2/criterion"]
forbid_overflow = []
gpu = ["gkr_iop/gpu", "dep:ceno_gpu", "dep:cudarc"]
jemalloc = ["dep:tikv-jemallocator", "dep:tikv-jemalloc-ctl"]
jemalloc-prof = ["jemalloc", "tikv-jemallocator?/profiling"]
nightly-features = [
Expand Down
15 changes: 9 additions & 6 deletions ceno_zkvm/benches/fibonacci.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
use std::{rc::Rc, time::Duration};
use std::time::Duration;

use ceno_emul::{Platform, Program};
use ceno_host::CenoStdin;
use ceno_zkvm::{
self,
e2e::{Checkpoint, Preset, run_e2e_with_checkpoint, setup_platform},
scheme::verifier::ZKVMVerifier,
scheme::{create_backend, create_prover, verifier::ZKVMVerifier},
};
mod alloc;
use criterion::*;

use ff_ext::GoldilocksExt2;
use gkr_iop::cpu::{CpuBackend, CpuProver};
use gkr_iop::cpu::default_backend_config;

use mpcs::BasefoldDefault;
use transcript::BasicTranscript;

Expand Down Expand Up @@ -40,14 +41,16 @@ fn setup() -> (Program, Platform) {

fn fibonacci_prove(c: &mut Criterion) {
let (program, platform) = setup();
let backend: Rc<_> = CpuBackend::<E, Pcs>::default().into();
let (max_num_variables, security_level) = default_backend_config();
let backend = create_backend::<E, Pcs>(max_num_variables, security_level);

for max_steps in [1usize << 20, 1usize << 21, 1usize << 22] {
// retrive 1 << 20th fibonacci element >> max_steps
let mut hints = CenoStdin::default();
let _ = hints.write(&20);
// estimate proof size data first
let result = run_e2e_with_checkpoint::<E, Pcs, _, _>(
CpuProver::new(backend.clone()),
create_prover(backend.clone()),
program.clone(),
platform.clone(),
&Vec::from(&hints),
Expand Down Expand Up @@ -84,7 +87,7 @@ fn fibonacci_prove(c: &mut Criterion) {
let mut time = Duration::new(0, 0);
for _ in 0..iters {
let result = run_e2e_with_checkpoint::<E, Pcs, _, _>(
CpuProver::new(backend.clone()),
create_prover(backend.clone()),
program.clone(),
platform.clone(),
&Vec::from(&hints),
Expand Down
11 changes: 7 additions & 4 deletions ceno_zkvm/benches/fibonacci_witness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ use ceno_host::CenoStdin;
use ceno_zkvm::{
self,
e2e::{Checkpoint, Preset, run_e2e_with_checkpoint, setup_platform},
scheme::{create_backend, create_prover},
};
use std::{fs, path::PathBuf, rc::Rc, time::Duration};
use std::{fs, path::PathBuf, time::Duration};
mod alloc;
use criterion::*;

use ff_ext::GoldilocksExt2;
use gkr_iop::cpu::{CpuBackend, CpuProver};
use gkr_iop::cpu::default_backend_config;
use mpcs::BasefoldDefault;

criterion_group! {
Expand Down Expand Up @@ -38,7 +40,8 @@ fn setup() -> (Program, Platform) {

fn fibonacci_witness(c: &mut Criterion) {
let (program, platform) = setup();
let backend: Rc<_> = CpuBackend::<E, Pcs>::default().into();
let (max_num_variables, security_level) = default_backend_config();
let backend = create_backend::<E, Pcs>(max_num_variables, security_level);

let max_steps = usize::MAX;
let mut group = c.benchmark_group(format!("fib_wit_max_steps_{}", max_steps));
Expand All @@ -59,7 +62,7 @@ fn fibonacci_witness(c: &mut Criterion) {
let mut time = Duration::new(0, 0);
for _ in 0..iters {
let result = run_e2e_with_checkpoint::<E, Pcs, _, _>(
CpuProver::new(backend.clone()),
create_prover(backend.clone()),
program.clone(),
platform.clone(),
&Vec::from(&hints),
Expand Down
11 changes: 7 additions & 4 deletions ceno_zkvm/benches/is_prime.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
use std::{rc::Rc, time::Duration};
use std::time::Duration;

use ceno_emul::{Platform, Program};
use ceno_host::CenoStdin;
use ceno_zkvm::{
self,
e2e::{Checkpoint, Preset, run_e2e_with_checkpoint, setup_platform},
scheme::{create_backend, create_prover},
};
mod alloc;
use criterion::*;
use ff_ext::GoldilocksExt2;
use gkr_iop::cpu::{CpuBackend, CpuProver};
use gkr_iop::cpu::default_backend_config;
use mpcs::BasefoldDefault;

criterion_group! {
Expand All @@ -36,7 +37,9 @@ fn setup() -> (Program, Platform) {

fn is_prime_1(c: &mut Criterion) {
let (program, platform) = setup();
let backend: Rc<_> = CpuBackend::<E, Pcs>::default().into();

let (max_num_variables, security_level) = default_backend_config();
let backend = create_backend::<E, Pcs>(max_num_variables, security_level);

for n in [100u32, 10000u32, 50000u32] {
let max_steps = usize::MAX;
Expand All @@ -56,7 +59,7 @@ fn is_prime_1(c: &mut Criterion) {

for _ in 0..iters {
let result = run_e2e_with_checkpoint::<E, Pcs, _, _>(
CpuProver::new(backend.clone()),
create_prover(backend.clone()),
program.clone(),
platform.clone(),
&hints,
Expand Down
10 changes: 6 additions & 4 deletions ceno_zkvm/benches/keccak.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
use std::{rc::Rc, time::Duration};
use std::time::Duration;

use ceno_emul::{Platform, Program};
use ceno_host::CenoStdin;
use ceno_zkvm::{
self,
e2e::{Checkpoint, Preset, run_e2e_with_checkpoint, setup_platform},
scheme::{create_backend, create_prover},
};
mod alloc;
use criterion::*;

use ff_ext::GoldilocksExt2;
use gkr_iop::cpu::{CpuBackend, CpuProver};
use gkr_iop::cpu::default_backend_config;
use mpcs::BasefoldDefault;

criterion_group! {
Expand Down Expand Up @@ -38,7 +39,8 @@ fn setup() -> (Program, Platform) {

fn keccak_prove(c: &mut Criterion) {
let (program, platform) = setup();
let backend: Rc<_> = CpuBackend::<E, Pcs>::default().into();
let (max_num_variables, security_level) = default_backend_config();
let backend = create_backend::<E, Pcs>(max_num_variables, security_level);
// retrive 1 << 20th keccak element >> max_steps
let mut hints = CenoStdin::default();
let _ = hints.write(&vec![1, 2, 3]);
Expand Down Expand Up @@ -80,7 +82,7 @@ fn keccak_prove(c: &mut Criterion) {
let mut time = Duration::new(0, 0);
for _ in 0..iters {
let result = run_e2e_with_checkpoint::<E, Pcs, _, _>(
CpuProver::new(backend.clone()),
create_prover(backend.clone()),
program.clone(),
platform.clone(),
&Vec::from(&hints),
Expand Down
10 changes: 6 additions & 4 deletions ceno_zkvm/benches/quadratic_sorting.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
use std::{rc::Rc, time::Duration};
use std::time::Duration;

use ceno_emul::{Platform, Program};
use ceno_host::CenoStdin;
use ceno_zkvm::{
self,
e2e::{Checkpoint, Preset, run_e2e_with_checkpoint, setup_platform},
scheme::{create_backend, create_prover},
};
mod alloc;
use criterion::*;
use ff_ext::GoldilocksExt2;
use gkr_iop::cpu::{CpuBackend, CpuProver};
use gkr_iop::cpu::default_backend_config;
use mpcs::BasefoldDefault;
use rand::{RngCore, SeedableRng};

Expand Down Expand Up @@ -38,7 +39,8 @@ fn setup() -> (Program, Platform) {

fn quadratic_sorting_1(c: &mut Criterion) {
let (program, platform) = setup();
let backend: Rc<_> = CpuBackend::<E, Pcs>::default().into();
let (max_num_variables, security_level) = default_backend_config();
let backend = create_backend::<E, Pcs>(max_num_variables, security_level);
let mut rng = rand::rngs::StdRng::seed_from_u64(42);

for n in [100, 500] {
Expand All @@ -58,7 +60,7 @@ fn quadratic_sorting_1(c: &mut Criterion) {
let mut time = Duration::new(0, 0);
for _ in 0..iters {
let result = run_e2e_with_checkpoint::<E, Pcs, _, _>(
CpuProver::new(backend.clone()),
create_prover(backend.clone()),
program.clone(),
platform.clone(),
&hints,
Expand Down
Loading