Skip to content

Commit 9588dd9

Browse files
committed
debug: illegal memory access
1 parent 3a4b0a3 commit 9588dd9

File tree

4 files changed

+52
-7
lines changed

4 files changed

+52
-7
lines changed

Cargo.lock

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ uint = "0.8"
8888

8989
ceno_gpu = { git = "ssh://[email protected]/scroll-tech/ceno-gpu.git", package = "cuda_hal", branch = "dev/integrate-into-ceno-as-dep" }
9090

91-
[patch."ssh://git@github.com/scroll-tech/ceno-gpu.git"]
92-
ceno_gpu = { path = "../ceno-gpu/cuda_hal", package = "cuda_hal" }
91+
# [patch."ssh://[email protected]/scroll-tech/ceno-gpu.git"]
92+
# ceno_gpu = { path = "../ceno-gpu/cuda_hal", package = "cuda_hal" }
9393

9494
[profile.dev]
9595
lto = "thin"

ceno_zkvm/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ serde_json.workspace = true
2929
sumcheck.workspace = true
3030
transcript = { git = "https://github.com/scroll-tech/ceno", package = "transcript", rev = "7adb306" }
3131
witness = { git = "https://github.com/scroll-tech/ceno", package = "witness", rev = "7adb306" }
32+
once_cell = "1.21.3"
33+
cudarc = { version = "0.13.0", features = ["driver", "cuda-version-from-build-system"] }
3234

3335
itertools.workspace = true
3436
ndarray.workspace = true

ceno_zkvm/src/scheme/gpu/mod.rs

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,26 @@ use transcript::{BasicTranscript, Transcript};
4343
use witness::next_pow2_instance_padding;
4444

4545
use ceno_gpu::gl64::CudaHalGL64;
46+
use cudarc::driver::{CudaDevice, DriverError};
47+
48+
use once_cell::sync::Lazy;
49+
use std::sync::Mutex;
50+
// static CUDA_HAL: Lazy<Mutex<CudaHalGL64>> = Lazy::new(|| {
51+
// Mutex::new(CudaHalGL64::new().unwrap())
52+
// });
53+
54+
static CUDA_DEVICE: Lazy<Result<Arc<CudaDevice>, DriverError>> = Lazy::new(|| {
55+
CudaDevice::new(0)
56+
});
57+
static CUDA_HAL: Lazy<Result<Arc<Mutex<CudaHalGL64>>, Box<dyn std::error::Error + Send + Sync>>> = Lazy::new(|| {
58+
let device = CUDA_DEVICE.as_ref().map_err(|e| format!("Device init failed: {:?}", e))?;
59+
device.bind_to_thread()?;
60+
61+
CudaHalGL64::new()
62+
.map(|hal| Arc::new(Mutex::new(hal)))
63+
.map_err(|e| Box::new(e) as Box<dyn std::error::Error + Send + Sync>)
64+
});
65+
4666

4767
pub struct GpuTowerProver;
4868

@@ -295,7 +315,12 @@ impl<E: ExtensionField, PCS: PolynomialCommitmentScheme<E>> TraceCommitter<GpuBa
295315
// panic!("error: type conversion failed");
296316
// };
297317

298-
let cuda_hal = CudaHalGL64::new().unwrap();
318+
// let cuda_hal = CUDA_HAL.lock().unwrap(); // CudaHalGL64::new().unwrap();
319+
let device = CUDA_DEVICE.as_ref().map_err(|e| format!("Device not available: {:?}", e)).unwrap();
320+
device.bind_to_thread().unwrap();
321+
let hal_arc = CUDA_HAL.as_ref().map_err(|e| format!("HAL not available: {:?}", e)).unwrap();
322+
let cuda_hal = hal_arc.lock().unwrap();
323+
299324
let traces_gl64: Vec<witness::RowMajorMatrix<p3::goldilocks::Goldilocks>> =
300325
unsafe { std::mem::transmute(vec_traces.clone()) };
301326
let pcs_data = cuda_hal.basefold.batch_commit(traces_gl64).unwrap();
@@ -863,6 +888,10 @@ impl<E: ExtensionField, PCS: PolynomialCommitmentScheme<E>> MainSumcheckProver<G
863888
}
864889
}
865890

891+
use p3::field::extension::BinomialExtensionField;
892+
type GL64 = p3::goldilocks::Goldilocks;
893+
type EGL64 = BinomialExtensionField<GL64, 2>;
894+
866895
impl<E: ExtensionField, PCS: PolynomialCommitmentScheme<E>> OpeningProver<GpuBackend<E, PCS>>
867896
for GpuProver<GpuBackend<E, PCS>>
868897
{
@@ -880,9 +909,14 @@ impl<E: ExtensionField, PCS: PolynomialCommitmentScheme<E>> OpeningProver<GpuBac
880909
panic!("GPU backend only supports Goldilocks base field");
881910
}
882911

883-
use p3::field::extension::BinomialExtensionField;
884-
type EGL64 = BinomialExtensionField<p3::goldilocks::Goldilocks, 2>;
885-
let cuda_hal = CudaHalGL64::new().unwrap();
912+
// use p3::field::extension::BinomialExtensionField;
913+
// type GL64 = p3::goldilocks::Goldilocks;
914+
// type EGL64 = BinomialExtensionField<GL64, 2>;
915+
// let cuda_hal = CUDA_HAL.lock().unwrap(); //CudaHalGL64::new().unwrap();
916+
let device = CUDA_DEVICE.as_ref().map_err(|e| format!("Device not available: {:?}", e)).unwrap();
917+
device.bind_to_thread().unwrap();
918+
let hal_arc = CUDA_HAL.as_ref().map_err(|e| format!("HAL not available: {:?}", e)).unwrap();
919+
let cuda_hal = hal_arc.lock().unwrap();
886920

887921
let mut rounds = vec![];
888922
rounds.push((
@@ -913,13 +947,17 @@ impl<E: ExtensionField, PCS: PolynomialCommitmentScheme<E>> OpeningProver<GpuBac
913947
));
914948
}
915949

950+
951+
use ceno_gpu::gl64::buffer::BufferImpl;
952+
use ceno_gpu::BasefoldCommitmentWithWitness as BasefoldCommitmentWithWitnessGpu;
953+
916954
// Type conversions using unsafe transmute
917955
let pp_gl64: &mpcs::basefold::structure::BasefoldProverParams<EGL64, mpcs::BasefoldRSParams> =
918956
unsafe { std::mem::transmute(self.pp.as_ref().unwrap()) };
919957
let rounds_gl64: Vec<_> = rounds
920958
.iter()
921959
.map(|(commitment, point_eval_pairs)| {
922-
let commitment_gl64: &mpcs::BasefoldCommitmentWithWitness<EGL64> =
960+
let commitment_gl64: &BasefoldCommitmentWithWitnessGpu<GL64, BufferImpl<GL64>> =
923961
unsafe { std::mem::transmute(*commitment) };
924962
let point_eval_pairs_gl64: Vec<_> = point_eval_pairs
925963
.iter()

0 commit comments

Comments
 (0)