scroll-tech · hero78119 · Apr 25, 2026 · Apr 26, 2026 · Apr 26, 2026 · Apr 26, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -27,16 +27,16 @@ version = "0.1.0"
 ceno_crypto_primitives = { git = "https://github.com/scroll-tech/ceno-patch.git", package = "ceno_crypto_primitives", branch = "main" }
 ceno_syscall = { git = "https://github.com/scroll-tech/ceno-patch.git", package = "ceno_syscall", branch = "main" }
 
-ff_ext = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "ff_ext", tag = "v1.0.0-alpha.24" }
-mpcs = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "mpcs", tag = "v1.0.0-alpha.24" }
-multilinear_extensions = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "multilinear_extensions", tag = "v1.0.0-alpha.24" }
-p3 = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "p3", tag = "v1.0.0-alpha.24" }
-poseidon = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "poseidon", tag = "v1.0.0-alpha.24" }
-sp1-curves = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "sp1-curves", tag = "v1.0.0-alpha.24" }
-sumcheck = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "sumcheck", tag = "v1.0.0-alpha.24" }
-transcript = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "transcript", tag = "v1.0.0-alpha.24" }
-whir = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "whir", tag = "v1.0.0-alpha.24" }
-witness = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "witness", tag = "v1.0.0-alpha.24" }
+ff_ext = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "ff_ext", tag = "v1.0.0-alpha.25" }
+mpcs = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "mpcs", tag = "v1.0.0-alpha.25" }
+multilinear_extensions = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "multilinear_extensions", tag = "v1.0.0-alpha.25" }
+p3 = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "p3", tag = "v1.0.0-alpha.25" }
+poseidon = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "poseidon", tag = "v1.0.0-alpha.25" }
+sp1-curves = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "sp1-curves", tag = "v1.0.0-alpha.25" }
+sumcheck = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "sumcheck", tag = "v1.0.0-alpha.25" }
+transcript = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "transcript", tag = "v1.0.0-alpha.25" }
+whir = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "whir", tag = "v1.0.0-alpha.25" }
+witness = { git = "https://github.com/scroll-tech/gkr-backend.git", package = "witness", tag = "v1.0.0-alpha.25" }
 
 anyhow = { version = "1.0", default-features = false }
 bincode = "1"
@@ -129,7 +129,7 @@ lto = "thin"
 
 #[patch."https://github.com/scroll-tech/ceno-gpu-mock.git"]
 #ceno_gpu = { path = "../ceno-gpu/cuda_hal", package = "cuda_hal", default-features = false, features = ["bb31"] }
-
+#
 #[patch."https://github.com/scroll-tech/gkr-backend"]
 #ff_ext = { path = "../gkr-backend/crates/ff_ext", package = "ff_ext" }
 #mpcs = { path = "../gkr-backend/crates/mpcs", package = "mpcs" }

diff --git a/ceno_zkvm/src/bin/e2e.rs b/ceno_zkvm/src/bin/e2e.rs
@@ -5,8 +5,7 @@ use ceno_zkvm::print_allocated_bytes;
 use ceno_zkvm::{
     e2e::{
         Checkpoint, FieldType, MultiProver, PcsKind, Preset, public_io_words_to_digest_words,
-        run_e2e_full_trace_verify, run_e2e_single_shard_debug_verify, run_e2e_with_checkpoint,
-        setup_platform, setup_platform_debug,
+        run_e2e_with_checkpoint, setup_platform, setup_platform_debug,
     },
     scheme::{
         ZKVMProof, constants::MAX_NUM_VARIABLES, create_backend, create_prover, hal::ProverDevice,
@@ -352,17 +351,10 @@ fn run_inner<
     fs::write(&vk_file, vk_bytes).unwrap();
 
     if checkpoint > Checkpoint::PrepVerify {
+        // `run_e2e_with_checkpoint` already performs the real verification for the
+        // complete flow. Re-running it here without the emulation exit code causes
+        // a false "Unfinished execution" error to be logged.
         let verifier = ZKVMVerifier::new(vk);
-        if target_shard_id.is_some() {
-            run_e2e_single_shard_debug_verify(
-                &verifier,
-                zkvm_proofs.first().cloned().expect("missing shard proof"),
-                None,
-                max_steps,
-            );
-        } else {
-            run_e2e_full_trace_verify(&verifier, zkvm_proofs.clone(), None, max_steps);
-        }
         soundness_test(zkvm_proofs.first().cloned().unwrap(), &verifier);
     }
 }

diff --git a/ceno_zkvm/src/e2e.rs b/ceno_zkvm/src/e2e.rs
@@ -46,6 +46,7 @@ use serde::Serialize;
 use std::collections::{HashMap, HashSet};
 use std::{
     collections::{BTreeMap, BTreeSet},
+    io::Write,
     marker::PhantomData,
     ops::Range,
     sync::Arc,
@@ -2193,9 +2194,18 @@ fn create_proofs_streaming<
 
                         let transcript = Transcript::new(b"riscv");
                         let start = std::time::Instant::now();
-                        let zkvm_proof = prover
-                            .create_proof(&shard_ctx, zkvm_witness, pi, transcript)
-                            .expect("create_proof failed");
+                        let zkvm_proof =
+                            match prover.create_proof(&shard_ctx, zkvm_witness, pi, transcript) {
+                                Ok(proof) => proof,
+                                Err(err) => {
+                                    eprintln!(
+                                        "create_proof failed for shard {}: {err:?}",
+                                        shard_ctx.shard_id
+                                    );
+                                    let _ = std::io::stderr().flush();
+                                    std::process::exit(1);
+                                }
+                            };
                         tracing::debug!(
                             "{}th shard proof created in {:?}",
                             shard_ctx.shard_id,
@@ -2254,9 +2264,18 @@ fn create_proofs_streaming<
 
                     let transcript = Transcript::new(b"riscv");
                     let start = std::time::Instant::now();
-                    let zkvm_proof = prover
-                        .create_proof(&shard_ctx, zkvm_witness, pi, transcript)
-                        .expect("create_proof failed");
+                    let zkvm_proof =
+                        match prover.create_proof(&shard_ctx, zkvm_witness, pi, transcript) {
+                            Ok(proof) => proof,
+                            Err(err) => {
+                                eprintln!(
+                                    "create_proof failed for shard {}: {err:?}",
+                                    shard_ctx.shard_id
+                                );
+                                let _ = std::io::stderr().flush();
+                                std::process::exit(1);
+                            }
+                        };
                     tracing::debug!(
                         "{}th shard proof created in {:?}",
                         shard_ctx.shard_id,

diff --git a/ceno_zkvm/src/instructions/gpu/chips/keccak.rs b/ceno_zkvm/src/instructions/gpu/chips/keccak.rs
@@ -348,8 +348,7 @@ fn replay_keccak_witness_only_from_packed<E: ExtensionField>(
 ) -> Result<RowMajorMatrix<E::BaseField>, ZKVMError> {
     use crate::precompiles::KECCAK_ROUNDS_CEIL_LOG2;
 
-    let num_padded_instances = num_instances.next_power_of_two().max(2);
-    let num_padded_rows = num_padded_instances * 32;
+    let num_rows = num_instances * 32;
     let rotation = KECCAK_ROUNDS_CEIL_LOG2;
 
     let col_map = info_span!("col_map").in_scope(|| extract_keccak_column_map(config, num_witin));
@@ -358,7 +357,7 @@ fn replay_keccak_witness_only_from_packed<E: ExtensionField>(
             .witgen_keccak(
                 &col_map,
                 packed_instances,
-                num_padded_rows,
+                num_rows,
                 shard_offset,
                 fetch_base_pc,
                 fetch_num_slots,
@@ -372,17 +371,18 @@ fn replay_keccak_witness_only_from_packed<E: ExtensionField>(
     let raw_witin = if crate::instructions::gpu::config::is_debug_compare_enabled()
         || !should_materialize_witness_on_gpu()
     {
-        info_span!("transpose_d2h", rows = num_padded_rows, cols = num_witin).in_scope(|| {
+        let produced_rows = gpu_result.witness.num_rows;
+        info_span!("transpose_d2h", rows = produced_rows, cols = num_witin).in_scope(|| {
             let mut rmm_buffer = hal
-                .alloc_elems_on_device(num_padded_rows * num_witin, false, None)
+                .alloc_elems_on_device(produced_rows * num_witin, false, None)
                 .map_err(|e| {
                     ZKVMError::InvalidWitness(format!("GPU alloc for transpose failed: {e}").into())
                 })?;
             matrix_transpose::<CudaHalBB31, ff_ext::BabyBearExt4, _>(
                 &hal.inner,
                 &mut rmm_buffer,
                 &gpu_result.witness.device_buffer,
-                num_padded_rows,
+                produced_rows,
                 num_witin,
             )
             .map_err(|e| ZKVMError::InvalidWitness(format!("GPU transpose failed: {e}").into()))?;
@@ -445,8 +445,7 @@ fn gpu_assign_keccak_inner<E: ExtensionField>(
     use crate::precompiles::KECCAK_ROUNDS_CEIL_LOG2;
 
     let num_instances = step_indices.len();
-    let num_padded_instances = num_instances.next_power_of_two().max(2);
-    let num_padded_rows = num_padded_instances * 32; // 2^5 = 32 rows per instance
+    let num_rows = num_instances * 32; // 2^5 = 32 rows per instance
     let rotation = KECCAK_ROUNDS_CEIL_LOG2; // = 5
     let materialize_initial_witness = crate::instructions::gpu::config::is_debug_compare_enabled()
         || should_materialize_witness_on_initial_assign();
@@ -479,7 +478,7 @@ fn gpu_assign_keccak_inner<E: ExtensionField>(
                 .witgen_keccak(
                     &col_map,
                     &packed_instances,
-                    num_padded_rows,
+                    num_rows,
                     shard_ctx.current_shard_offset_cycle(),
                     fetch_base_pc,
                     fetch_num_slots,
@@ -565,17 +564,18 @@ fn gpu_assign_keccak_inner<E: ExtensionField>(
     } else if crate::instructions::gpu::config::is_debug_compare_enabled()
         || !should_materialize_witness_on_gpu()
     {
-        info_span!("transpose_d2h", rows = num_padded_rows, cols = num_witin).in_scope(|| {
+        let produced_rows = gpu_result.witness.num_rows;
+        info_span!("transpose_d2h", rows = produced_rows, cols = num_witin).in_scope(|| {
             let mut rmm_buffer = hal
-                .alloc_elems_on_device(num_padded_rows * num_witin, false, None)
+                .alloc_elems_on_device(produced_rows * num_witin, false, None)
                 .map_err(|e| {
                     ZKVMError::InvalidWitness(format!("GPU alloc for transpose failed: {e}").into())
                 })?;
             matrix_transpose::<CudaHalBB31, ff_ext::BabyBearExt4, _>(
                 &hal.inner,
                 &mut rmm_buffer,
                 &gpu_result.witness.device_buffer,
-                num_padded_rows,
+                produced_rows,
                 num_witin,
             )
             .map_err(|e| ZKVMError::InvalidWitness(format!("GPU transpose failed: {e}").into()))?;

diff --git a/ceno_zkvm/src/instructions/gpu/chips/shard_ram.rs b/ceno_zkvm/src/instructions/gpu/chips/shard_ram.rs
@@ -439,11 +439,11 @@ pub(crate) fn try_gpu_assign_shard_ram<E: ExtensionField>(
     {
         let struct_data = tracing::info_span!(
             "gpu_shard_ram_structural_transpose_d2h",
-            num_rows_padded,
+            rows = gpu_structural.num_rows,
             num_structural_witin,
         )
         .in_scope(|| -> Result<_, ZKVMError> {
-            let wit_num_rows = num_rows_padded;
+            let wit_num_rows = gpu_structural.num_rows;
             let struct_num_cols = num_structural_witin;
             let mut struct_rmm_buf = hal
                 .witgen
@@ -684,11 +684,11 @@ pub(crate) fn try_gpu_assign_shard_ram_from_device<E: ExtensionField>(
     {
         let struct_data = tracing::info_span!(
             "gpu_shard_ram_structural_transpose_d2h_from_device",
-            num_rows_padded,
+            rows = gpu_structural.num_rows,
             num_structural_witin,
         )
         .in_scope(|| -> Result<_, ZKVMError> {
-            let wit_num_rows = num_rows_padded;
+            let wit_num_rows = gpu_structural.num_rows;
             let struct_num_cols = num_structural_witin;
             let mut struct_rmm_buf = hal
                 .witgen

diff --git a/ceno_zkvm/src/instructions/gpu/dispatch.rs b/ceno_zkvm/src/instructions/gpu/dispatch.rs
@@ -135,6 +135,7 @@ pub(crate) fn try_gpu_assign_instances<E: ExtensionField, I: Instruction<E>>(
     let total_instances = step_indices.len();
     if total_instances == 0 {
         // Empty: just return empty matrices
+        let num_witin = num_witin.max(1);
         let num_structural_witin = num_structural_witin.max(1);
         let raw_witin = RowMajorMatrix::<E::BaseField>::new(0, num_witin, I::padding_strategy());
         let raw_structural =
@@ -481,7 +482,7 @@ fn gpu_assign_instances_inner<E: ExtensionField, I: Instruction<E>>(
             total_instances,
             num_witin,
             I::padding_strategy(),
-        )
+        )?
     };
     if materialize_initial_witness {
         raw_witin.padding_by_strategy();
@@ -1484,7 +1485,7 @@ fn replay_gpu_witness_from_resident_raw<E: ExtensionField, I: Instruction<E>>(
         total_instances,
         replay.num_witin,
         I::padding_strategy(),
-    );
+    )?;
 
     // Keep replayed witness immutable after attaching the col-major device backing.
     // Mutating/padding a RowMajorMatrix clears device metadata, but replay consumers

diff --git a/ceno_zkvm/src/instructions/gpu/utils/d2h.rs b/ceno_zkvm/src/instructions/gpu/utils/d2h.rs
@@ -303,9 +303,9 @@ pub(crate) fn gpu_witness_to_rmm<E: ExtensionField>(
     num_rows: usize,
     num_cols: usize,
     padding: InstancePaddingStrategy,
-) -> RowMajorMatrix<E::BaseField> {
+) -> Result<RowMajorMatrix<E::BaseField>, ZKVMError> {
     let mut rmm = RowMajorMatrix::<E::BaseField>::new(num_rows, num_cols, padding);
     // Keep the original col-major witness buffer as the source of truth for GPU commit.
     rmm.set_device_backing(gpu_result.device_buffer, DeviceMatrixLayout::ColMajor);
-    rmm
+    Ok(rmm)
 }